mirror of
https://git.naxdy.org/Mirror/Ryujinx.git
synced 2025-01-18 08:20:33 +00:00
9f1cf6458c
* Initial implementation of migration between memory heaps - Missing OOM handling - Missing `_map` data safety when remapping - Copy may not have completed yet (needs some kind of fence) - Map may be unmapped before it is done being used. (needs scoped access) - SSBO accesses are all "writes" - maybe pass info in another way. - Missing keeping map type when resizing buffers (should this be done?) * Ensure migrated data is in place before flushing. * Fix issue where old waitable would be signalled. - There is a real issue where existing Auto<> references need to be replaced. * Swap bound Auto<> instances when swapping buffer backing * Fix conversion buffers * Don't try move buffers if the host has shared memory. * Make GPU methods return PinnedSpan with scope * Storage Hint * Fix stupidity * Fix rebase * Tweak rules Attempt to sidestep BOTW slowdown * Remove line * Migrate only when command buffers flush * Change backing swap log to debug * Address some feedback * Disallow backing swap when the flush lock is held by the current thread * Make PinnedSpan from ReadOnlySpan explicitly unsafe * Fix some small issues - Index buffer swap fixed - Allocate DeviceLocal buffers using a separate block list to images. * Remove alternative flags * Address feedback
488 lines
No EOL
15 KiB
C#
488 lines
No EOL
15 KiB
C#
using Ryujinx.Common;
|
|
using Ryujinx.Common.Configuration;
|
|
using Ryujinx.Graphics.GAL.Multithreading.Commands;
|
|
using Ryujinx.Graphics.GAL.Multithreading.Commands.Buffer;
|
|
using Ryujinx.Graphics.GAL.Multithreading.Commands.Renderer;
|
|
using Ryujinx.Graphics.GAL.Multithreading.Model;
|
|
using Ryujinx.Graphics.GAL.Multithreading.Resources;
|
|
using Ryujinx.Graphics.GAL.Multithreading.Resources.Programs;
|
|
using System;
|
|
using System.Diagnostics;
|
|
using System.Runtime.CompilerServices;
|
|
using System.Runtime.InteropServices;
|
|
using System.Threading;
|
|
|
|
namespace Ryujinx.Graphics.GAL.Multithreading
|
|
{
|
|
/// <summary>
|
|
/// The ThreadedRenderer is a layer that can be put in front of any Renderer backend to make
|
|
/// its processing happen on a separate thread, rather than intertwined with the GPU emulation.
|
|
/// A new thread is created to handle the GPU command processing, separate from the renderer thread.
|
|
/// Calls to the renderer, pipeline and resources are queued to happen on the renderer thread.
|
|
/// </summary>
|
|
public class ThreadedRenderer : IRenderer
|
|
{
|
|
private const int SpanPoolBytes = 4 * 1024 * 1024;
|
|
private const int MaxRefsPerCommand = 2;
|
|
private const int QueueCount = 10000;
|
|
|
|
private int _elementSize;
|
|
private IRenderer _baseRenderer;
|
|
private Thread _gpuThread;
|
|
private Thread _backendThread;
|
|
private bool _disposed;
|
|
private bool _running;
|
|
|
|
private AutoResetEvent _frameComplete = new AutoResetEvent(true);
|
|
|
|
private ManualResetEventSlim _galWorkAvailable;
|
|
private CircularSpanPool _spanPool;
|
|
|
|
private ManualResetEventSlim _invokeRun;
|
|
private AutoResetEvent _interruptRun;
|
|
|
|
private bool _lastSampleCounterClear = true;
|
|
|
|
private byte[] _commandQueue;
|
|
private object[] _refQueue;
|
|
|
|
private int _consumerPtr;
|
|
private int _commandCount;
|
|
|
|
private int _producerPtr;
|
|
private int _lastProducedPtr;
|
|
private int _invokePtr;
|
|
|
|
private int _refProducerPtr;
|
|
private int _refConsumerPtr;
|
|
|
|
private Action _interruptAction;
|
|
|
|
public event EventHandler<ScreenCaptureImageInfo> ScreenCaptured;
|
|
|
|
internal BufferMap Buffers { get; }
|
|
internal SyncMap Sync { get; }
|
|
internal CircularSpanPool SpanPool { get; }
|
|
internal ProgramQueue Programs { get; }
|
|
|
|
public IPipeline Pipeline { get; }
|
|
public IWindow Window { get; }
|
|
|
|
public IRenderer BaseRenderer => _baseRenderer;
|
|
|
|
public bool PreferThreading => _baseRenderer.PreferThreading;
|
|
|
|
public ThreadedRenderer(IRenderer renderer)
|
|
{
|
|
_baseRenderer = renderer;
|
|
|
|
renderer.ScreenCaptured += (sender, info) => ScreenCaptured?.Invoke(this, info);
|
|
renderer.SetInterruptAction(Interrupt);
|
|
|
|
Pipeline = new ThreadedPipeline(this, renderer.Pipeline);
|
|
Window = new ThreadedWindow(this, renderer);
|
|
Buffers = new BufferMap();
|
|
Sync = new SyncMap();
|
|
Programs = new ProgramQueue(renderer);
|
|
|
|
_galWorkAvailable = new ManualResetEventSlim(false);
|
|
_invokeRun = new ManualResetEventSlim();
|
|
_interruptRun = new AutoResetEvent(false);
|
|
_spanPool = new CircularSpanPool(this, SpanPoolBytes);
|
|
SpanPool = _spanPool;
|
|
|
|
_elementSize = BitUtils.AlignUp(CommandHelper.GetMaxCommandSize(), 4);
|
|
|
|
_commandQueue = new byte[_elementSize * QueueCount];
|
|
_refQueue = new object[MaxRefsPerCommand * QueueCount];
|
|
}
|
|
|
|
public void RunLoop(Action gpuLoop)
|
|
{
|
|
_running = true;
|
|
|
|
_backendThread = Thread.CurrentThread;
|
|
|
|
_gpuThread = new Thread(() => {
|
|
gpuLoop();
|
|
_running = false;
|
|
_galWorkAvailable.Set();
|
|
});
|
|
|
|
_gpuThread.Name = "GPU.MainThread";
|
|
_gpuThread.Start();
|
|
|
|
RenderLoop();
|
|
}
|
|
|
|
public void RenderLoop()
|
|
{
|
|
// Power through the render queue until the Gpu thread work is done.
|
|
|
|
while (_running && !_disposed)
|
|
{
|
|
_galWorkAvailable.Wait();
|
|
_galWorkAvailable.Reset();
|
|
|
|
if (Volatile.Read(ref _interruptAction) != null)
|
|
{
|
|
_interruptAction();
|
|
_interruptRun.Set();
|
|
|
|
Interlocked.Exchange(ref _interruptAction, null);
|
|
}
|
|
|
|
// The other thread can only increase the command count.
|
|
// We can assume that if it is above 0, it will stay there or get higher.
|
|
|
|
while (Volatile.Read(ref _commandCount) > 0 && Volatile.Read(ref _interruptAction) == null)
|
|
{
|
|
int commandPtr = _consumerPtr;
|
|
|
|
Span<byte> command = new Span<byte>(_commandQueue, commandPtr * _elementSize, _elementSize);
|
|
|
|
// Run the command.
|
|
|
|
CommandHelper.RunCommand(command, this, _baseRenderer);
|
|
|
|
if (Interlocked.CompareExchange(ref _invokePtr, -1, commandPtr) == commandPtr)
|
|
{
|
|
_invokeRun.Set();
|
|
}
|
|
|
|
_consumerPtr = (_consumerPtr + 1) % QueueCount;
|
|
|
|
Interlocked.Decrement(ref _commandCount);
|
|
}
|
|
}
|
|
}
|
|
|
|
internal SpanRef<T> CopySpan<T>(ReadOnlySpan<T> data) where T : unmanaged
|
|
{
|
|
return _spanPool.Insert(data);
|
|
}
|
|
|
|
private TableRef<T> Ref<T>(T reference)
|
|
{
|
|
return new TableRef<T>(this, reference);
|
|
}
|
|
|
|
internal ref T New<T>() where T : struct
|
|
{
|
|
while (_producerPtr == (Volatile.Read(ref _consumerPtr) + QueueCount - 1) % QueueCount)
|
|
{
|
|
// If incrementing the producer pointer would overflow, we need to wait.
|
|
// _consumerPtr can only move forward, so there's no race to worry about here.
|
|
|
|
Thread.Sleep(1);
|
|
}
|
|
|
|
int taken = _producerPtr;
|
|
_lastProducedPtr = taken;
|
|
|
|
_producerPtr = (_producerPtr + 1) % QueueCount;
|
|
|
|
Span<byte> memory = new Span<byte>(_commandQueue, taken * _elementSize, _elementSize);
|
|
ref T result = ref Unsafe.As<byte, T>(ref MemoryMarshal.GetReference(memory));
|
|
|
|
memory[memory.Length - 1] = (byte)((IGALCommand)result).CommandType;
|
|
|
|
return ref result;
|
|
}
|
|
|
|
internal int AddTableRef(object obj)
|
|
{
|
|
// The reference table is sized so that it will never overflow, so long as the references are taken after the command is allocated.
|
|
|
|
int index = _refProducerPtr;
|
|
|
|
_refQueue[index] = obj;
|
|
|
|
_refProducerPtr = (_refProducerPtr + 1) % _refQueue.Length;
|
|
|
|
return index;
|
|
}
|
|
|
|
internal object RemoveTableRef(int index)
|
|
{
|
|
Debug.Assert(index == _refConsumerPtr);
|
|
|
|
object result = _refQueue[_refConsumerPtr];
|
|
_refQueue[_refConsumerPtr] = null;
|
|
|
|
_refConsumerPtr = (_refConsumerPtr + 1) % _refQueue.Length;
|
|
|
|
return result;
|
|
}
|
|
|
|
internal void QueueCommand()
|
|
{
|
|
int result = Interlocked.Increment(ref _commandCount);
|
|
|
|
if (result == 1)
|
|
{
|
|
_galWorkAvailable.Set();
|
|
}
|
|
}
|
|
|
|
internal void InvokeCommand()
|
|
{
|
|
_invokeRun.Reset();
|
|
_invokePtr = _lastProducedPtr;
|
|
|
|
QueueCommand();
|
|
|
|
// Wait for the command to complete.
|
|
_invokeRun.Wait();
|
|
}
|
|
|
|
internal void WaitForFrame()
|
|
{
|
|
_frameComplete.WaitOne();
|
|
}
|
|
|
|
internal void SignalFrame()
|
|
{
|
|
_frameComplete.Set();
|
|
}
|
|
|
|
internal bool IsGpuThread()
|
|
{
|
|
return Thread.CurrentThread == _gpuThread;
|
|
}
|
|
|
|
public void BackgroundContextAction(Action action, bool alwaysBackground = false)
|
|
{
|
|
if (IsGpuThread() && !alwaysBackground)
|
|
{
|
|
// The action must be performed on the render thread.
|
|
New<ActionCommand>().Set(Ref(action));
|
|
InvokeCommand();
|
|
}
|
|
else
|
|
{
|
|
_baseRenderer.BackgroundContextAction(action, true);
|
|
}
|
|
}
|
|
|
|
public BufferHandle CreateBuffer(int size, BufferHandle storageHint)
|
|
{
|
|
BufferHandle handle = Buffers.CreateBufferHandle();
|
|
New<CreateBufferCommand>().Set(handle, size, storageHint);
|
|
QueueCommand();
|
|
|
|
return handle;
|
|
}
|
|
|
|
public IProgram CreateProgram(ShaderSource[] shaders, ShaderInfo info)
|
|
{
|
|
var program = new ThreadedProgram(this);
|
|
|
|
SourceProgramRequest request = new SourceProgramRequest(program, shaders, info);
|
|
|
|
Programs.Add(request);
|
|
|
|
New<CreateProgramCommand>().Set(Ref((IProgramRequest)request));
|
|
QueueCommand();
|
|
|
|
return program;
|
|
}
|
|
|
|
public ISampler CreateSampler(SamplerCreateInfo info)
|
|
{
|
|
var sampler = new ThreadedSampler(this);
|
|
New<CreateSamplerCommand>().Set(Ref(sampler), info);
|
|
QueueCommand();
|
|
|
|
return sampler;
|
|
}
|
|
|
|
public void CreateSync(ulong id, bool strict)
|
|
{
|
|
Sync.CreateSyncHandle(id);
|
|
New<CreateSyncCommand>().Set(id, strict);
|
|
QueueCommand();
|
|
}
|
|
|
|
public ITexture CreateTexture(TextureCreateInfo info, float scale)
|
|
{
|
|
if (IsGpuThread())
|
|
{
|
|
var texture = new ThreadedTexture(this, info, scale);
|
|
New<CreateTextureCommand>().Set(Ref(texture), info, scale);
|
|
QueueCommand();
|
|
|
|
return texture;
|
|
}
|
|
else
|
|
{
|
|
var texture = new ThreadedTexture(this, info, scale);
|
|
texture.Base = _baseRenderer.CreateTexture(info, scale);
|
|
|
|
return texture;
|
|
}
|
|
}
|
|
|
|
public void DeleteBuffer(BufferHandle buffer)
|
|
{
|
|
New<BufferDisposeCommand>().Set(buffer);
|
|
QueueCommand();
|
|
}
|
|
|
|
public PinnedSpan<byte> GetBufferData(BufferHandle buffer, int offset, int size)
|
|
{
|
|
if (IsGpuThread())
|
|
{
|
|
ResultBox<PinnedSpan<byte>> box = new ResultBox<PinnedSpan<byte>>();
|
|
New<BufferGetDataCommand>().Set(buffer, offset, size, Ref(box));
|
|
InvokeCommand();
|
|
|
|
return box.Result;
|
|
}
|
|
else
|
|
{
|
|
return _baseRenderer.GetBufferData(Buffers.MapBufferBlocking(buffer), offset, size);
|
|
}
|
|
}
|
|
|
|
public Capabilities GetCapabilities()
|
|
{
|
|
ResultBox<Capabilities> box = new ResultBox<Capabilities>();
|
|
New<GetCapabilitiesCommand>().Set(Ref(box));
|
|
InvokeCommand();
|
|
|
|
return box.Result;
|
|
}
|
|
|
|
public ulong GetCurrentSync()
|
|
{
|
|
return _baseRenderer.GetCurrentSync();
|
|
}
|
|
|
|
public HardwareInfo GetHardwareInfo()
|
|
{
|
|
return _baseRenderer.GetHardwareInfo();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Initialize the base renderer. Must be called on the render thread.
|
|
/// </summary>
|
|
/// <param name="logLevel">Log level to use</param>
|
|
public void Initialize(GraphicsDebugLevel logLevel)
|
|
{
|
|
_baseRenderer.Initialize(logLevel);
|
|
}
|
|
|
|
public IProgram LoadProgramBinary(byte[] programBinary, bool hasFragmentShader, ShaderInfo info)
|
|
{
|
|
var program = new ThreadedProgram(this);
|
|
|
|
BinaryProgramRequest request = new BinaryProgramRequest(program, programBinary, hasFragmentShader, info);
|
|
Programs.Add(request);
|
|
|
|
New<CreateProgramCommand>().Set(Ref((IProgramRequest)request));
|
|
QueueCommand();
|
|
|
|
return program;
|
|
}
|
|
|
|
public void PreFrame()
|
|
{
|
|
New<PreFrameCommand>();
|
|
QueueCommand();
|
|
}
|
|
|
|
public ICounterEvent ReportCounter(CounterType type, EventHandler<ulong> resultHandler, bool hostReserved)
|
|
{
|
|
ThreadedCounterEvent evt = new ThreadedCounterEvent(this, type, _lastSampleCounterClear);
|
|
New<ReportCounterCommand>().Set(Ref(evt), type, Ref(resultHandler), hostReserved);
|
|
QueueCommand();
|
|
|
|
if (type == CounterType.SamplesPassed)
|
|
{
|
|
_lastSampleCounterClear = false;
|
|
}
|
|
|
|
return evt;
|
|
}
|
|
|
|
public void ResetCounter(CounterType type)
|
|
{
|
|
New<ResetCounterCommand>().Set(type);
|
|
QueueCommand();
|
|
_lastSampleCounterClear = true;
|
|
}
|
|
|
|
public void Screenshot()
|
|
{
|
|
_baseRenderer.Screenshot();
|
|
}
|
|
|
|
public void SetBufferData(BufferHandle buffer, int offset, ReadOnlySpan<byte> data)
|
|
{
|
|
New<BufferSetDataCommand>().Set(buffer, offset, CopySpan(data));
|
|
QueueCommand();
|
|
}
|
|
|
|
public void UpdateCounters()
|
|
{
|
|
New<UpdateCountersCommand>();
|
|
QueueCommand();
|
|
}
|
|
|
|
public void WaitSync(ulong id)
|
|
{
|
|
Sync.WaitSyncAvailability(id);
|
|
|
|
_baseRenderer.WaitSync(id);
|
|
}
|
|
|
|
private void Interrupt(Action action)
|
|
{
|
|
// Interrupt the backend thread from any external thread and invoke the given action.
|
|
|
|
if (Thread.CurrentThread == _backendThread)
|
|
{
|
|
// If this is called from the backend thread, the action can run immediately.
|
|
action();
|
|
}
|
|
else
|
|
{
|
|
while (Interlocked.CompareExchange(ref _interruptAction, action, null) != null) { }
|
|
|
|
_galWorkAvailable.Set();
|
|
|
|
_interruptRun.WaitOne();
|
|
}
|
|
}
|
|
|
|
public void SetInterruptAction(Action<Action> interruptAction)
|
|
{
|
|
// Threaded renderer ignores given interrupt action, as it provides its own to the child renderer.
|
|
}
|
|
|
|
public void Dispose()
|
|
{
|
|
// Dispose must happen from the render thread, after all commands have completed.
|
|
|
|
// Stop the GPU thread.
|
|
_disposed = true;
|
|
|
|
if (_gpuThread != null && _gpuThread.IsAlive)
|
|
{
|
|
_gpuThread.Join();
|
|
}
|
|
|
|
// Dispose the renderer.
|
|
_baseRenderer.Dispose();
|
|
|
|
// Dispose events.
|
|
_frameComplete.Dispose();
|
|
_galWorkAvailable.Dispose();
|
|
_invokeRun.Dispose();
|
|
_interruptRun.Dispose();
|
|
|
|
Sync.Dispose();
|
|
}
|
|
}
|
|
} |