From 01ca055d382ff9d505f2ac7f1da0002da2362823 Mon Sep 17 00:00:00 2001 From: gdk Date: Sat, 22 Oct 2022 21:28:28 -0300 Subject: [PATCH] Global memory emulation using NV_shader_buffer_store and VK_EXT_buffer_device_address --- Ryujinx.Graphics.GAL/IPipeline.cs | 1 + Ryujinx.Graphics.GAL/IRenderer.cs | 1 + .../Multithreading/CommandHelper.cs | 4 + .../Multithreading/CommandType.cs | 2 + .../Buffer/BufferGetGpuAddressCommand.cs | 24 +++ .../UpdatePageTableGpuAddressCommand.cs | 18 ++ .../Multithreading/ThreadedPipeline.cs | 6 + .../Multithreading/ThreadedRenderer.cs | 16 ++ Ryujinx.Graphics.GAL/SupportBufferUpdater.cs | 14 ++ .../Engine/Compute/ComputeClass.cs | 12 +- .../Engine/Threed/StateUpdater.cs | 34 ++- Ryujinx.Graphics.Gpu/Memory/Buffer.cs | 11 + Ryujinx.Graphics.Gpu/Memory/BufferCache.cs | 14 +- Ryujinx.Graphics.Gpu/Memory/BufferManager.cs | 90 +++++++- .../Memory/BufferPageTable.cs | 201 ++++++++++++++++++ Ryujinx.Graphics.Gpu/Memory/Mapping.cs | 22 ++ Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs | 85 ++++++++ .../Shader/DiskCache/DiskCacheHostStorage.cs | 14 ++ Ryujinx.Graphics.Gpu/Shader/ResourceCounts.cs | 1 + Ryujinx.Graphics.OpenGL/Buffer.cs | 8 + Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs | 5 + Ryujinx.Graphics.OpenGL/Pipeline.cs | 5 + .../CodeGen/Glsl/Declarations.cs | 78 ++++--- .../CodeGen/Glsl/DefaultNames.cs | 1 + .../Glsl/HelperFunctions/GlobalMemory.glsl | 36 ++++ .../Glsl/HelperFunctions/GlobalMemoryVk.glsl | 34 +++ .../HelperFunctions/HelperFunctionNames.cs | 2 + .../CodeGen/Glsl/Instructions/InstGen.cs | 13 ++ .../Glsl/Instructions/InstGenHelper.cs | 2 + .../Glsl/Instructions/InstGenMemory.cs | 57 +++++ .../CodeGen/Spirv/CodeGenContext.cs | 1 + .../CodeGen/Spirv/Declarations.cs | 36 +++- .../CodeGen/Spirv/Instructions.cs | 102 ++++++++- .../CodeGen/Spirv/SpirvGenerator.cs | 10 + Ryujinx.Graphics.Shader/Decoders/Decoder.cs | 5 - .../Ryujinx.Graphics.Shader.csproj | 2 + Ryujinx.Graphics.Shader/ShaderProgramInfo.cs | 6 + .../StructuredIr/HelperFunctionsMask.cs | 21 +- .../StructuredIr/StructuredProgram.cs | 29 +++ Ryujinx.Graphics.Shader/SupportBuffer.cs | 3 + .../Translation/FeatureFlags.cs | 12 +- .../Translation/Rewriter.cs | 4 +- .../Translation/ShaderConfig.cs | 12 +- Ryujinx.Graphics.Vulkan/BufferHolder.cs | 17 +- Ryujinx.Graphics.Vulkan/BufferManager.cs | 22 +- Ryujinx.Graphics.Vulkan/Constants.cs | 2 +- .../DescriptorSetManager.cs | 2 +- .../HardwareCapabilities.cs | 3 + Ryujinx.Graphics.Vulkan/PipelineBase.cs | 5 + .../VulkanInitialization.cs | 15 ++ Ryujinx.Graphics.Vulkan/VulkanRenderer.cs | 6 + Ryujinx.Memory/Range/RangeList.cs | 12 ++ Spv.Generator/Autogenerated/CoreGrammar.cs | 12 +- 53 files changed, 1075 insertions(+), 75 deletions(-) create mode 100644 Ryujinx.Graphics.GAL/Multithreading/Commands/Buffer/BufferGetGpuAddressCommand.cs create mode 100644 Ryujinx.Graphics.GAL/Multithreading/Commands/UpdatePageTableGpuAddressCommand.cs create mode 100644 Ryujinx.Graphics.Gpu/Memory/BufferPageTable.cs create mode 100644 Ryujinx.Graphics.Gpu/Memory/Mapping.cs create mode 100644 Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/GlobalMemory.glsl create mode 100644 Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/GlobalMemoryVk.glsl diff --git a/Ryujinx.Graphics.GAL/IPipeline.cs b/Ryujinx.Graphics.GAL/IPipeline.cs index 26d019eb4..a52007848 100644 --- a/Ryujinx.Graphics.GAL/IPipeline.cs +++ b/Ryujinx.Graphics.GAL/IPipeline.cs @@ -107,6 +107,7 @@ namespace Ryujinx.Graphics.GAL bool TryHostConditionalRendering(ICounterEvent value, ICounterEvent compare, bool isEqual); void EndHostConditionalRendering(); + void UpdatePageTableGpuAddress(ulong address); void UpdateRenderScale(ReadOnlySpan scales, int totalCount, int fragmentCount); } } diff --git a/Ryujinx.Graphics.GAL/IRenderer.cs b/Ryujinx.Graphics.GAL/IRenderer.cs index c72320a2f..6f28468f3 100644 --- a/Ryujinx.Graphics.GAL/IRenderer.cs +++ b/Ryujinx.Graphics.GAL/IRenderer.cs @@ -27,6 +27,7 @@ namespace Ryujinx.Graphics.GAL void DeleteBuffer(BufferHandle buffer); ReadOnlySpan GetBufferData(BufferHandle buffer, int offset, int size); + ulong GetBufferGpuAddress(BufferHandle buffer); Capabilities GetCapabilities(); ulong GetCurrentSync(); diff --git a/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs b/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs index b2799e099..fb93782af 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs @@ -77,6 +77,8 @@ namespace Ryujinx.Graphics.GAL.Multithreading BufferDisposeCommand.Run(ref GetCommand(memory), threaded, renderer); _lookup[(int)CommandType.BufferGetData] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => BufferGetDataCommand.Run(ref GetCommand(memory), threaded, renderer); + _lookup[(int)CommandType.BufferGetGpuAddress] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => + BufferGetGpuAddressCommand.Run(ref GetCommand(memory), threaded, renderer); _lookup[(int)CommandType.BufferSetData] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => BufferSetDataCommand.Run(ref GetCommand(memory), threaded, renderer); @@ -229,6 +231,8 @@ namespace Ryujinx.Graphics.GAL.Multithreading TryHostConditionalRenderingCommand.Run(ref GetCommand(memory), threaded, renderer); _lookup[(int)CommandType.TryHostConditionalRenderingFlush] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => TryHostConditionalRenderingFlushCommand.Run(ref GetCommand(memory), threaded, renderer); + _lookup[(int)CommandType.UpdatePageTableGpuAddress] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => + UpdatePageTableGpuAddressCommand.Run(ref GetCommand(memory), threaded, renderer); _lookup[(int)CommandType.UpdateRenderScale] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => UpdateRenderScaleCommand.Run(ref GetCommand(memory), threaded, renderer); } diff --git a/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs b/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs index c199ff34c..047f811fa 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs @@ -17,6 +17,7 @@ BufferDispose, BufferGetData, + BufferGetGpuAddress, BufferSetData, CounterEventDispose, @@ -96,6 +97,7 @@ TextureBarrierTiled, TryHostConditionalRendering, TryHostConditionalRenderingFlush, + UpdatePageTableGpuAddress, UpdateRenderScale } } diff --git a/Ryujinx.Graphics.GAL/Multithreading/Commands/Buffer/BufferGetGpuAddressCommand.cs b/Ryujinx.Graphics.GAL/Multithreading/Commands/Buffer/BufferGetGpuAddressCommand.cs new file mode 100644 index 000000000..3d0839cc3 --- /dev/null +++ b/Ryujinx.Graphics.GAL/Multithreading/Commands/Buffer/BufferGetGpuAddressCommand.cs @@ -0,0 +1,24 @@ +using Ryujinx.Graphics.GAL.Multithreading.Model; + +namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Buffer +{ + struct BufferGetGpuAddressCommand : IGALCommand + { + public CommandType CommandType => CommandType.BufferGetGpuAddress; + private BufferHandle _buffer; + private TableRef> _result; + + public void Set(BufferHandle buffer, TableRef> result) + { + _buffer = buffer; + _result = result; + } + + public static void Run(ref BufferGetGpuAddressCommand command, ThreadedRenderer threaded, IRenderer renderer) + { + ulong result = renderer.GetBufferGpuAddress(threaded.Buffers.MapBuffer(command._buffer)); + + command._result.Get(threaded).Result = result; + } + } +} diff --git a/Ryujinx.Graphics.GAL/Multithreading/Commands/UpdatePageTableGpuAddressCommand.cs b/Ryujinx.Graphics.GAL/Multithreading/Commands/UpdatePageTableGpuAddressCommand.cs new file mode 100644 index 000000000..7b98b7133 --- /dev/null +++ b/Ryujinx.Graphics.GAL/Multithreading/Commands/UpdatePageTableGpuAddressCommand.cs @@ -0,0 +1,18 @@ +namespace Ryujinx.Graphics.GAL.Multithreading.Commands +{ + struct UpdatePageTableGpuAddressCommand : IGALCommand + { + public CommandType CommandType => CommandType.UpdatePageTableGpuAddress; + private ulong _address; + + public void Set(ulong address) + { + _address = address; + } + + public static void Run(ref UpdatePageTableGpuAddressCommand command, ThreadedRenderer threaded, IRenderer renderer) + { + renderer.Pipeline.UpdatePageTableGpuAddress(command._address); + } + } +} diff --git a/Ryujinx.Graphics.GAL/Multithreading/ThreadedPipeline.cs b/Ryujinx.Graphics.GAL/Multithreading/ThreadedPipeline.cs index ba120867c..665ac9b42 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/ThreadedPipeline.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/ThreadedPipeline.cs @@ -365,6 +365,12 @@ namespace Ryujinx.Graphics.GAL.Multithreading return false; } + public void UpdatePageTableGpuAddress(ulong address) + { + _renderer.New().Set(address); + _renderer.QueueCommand(); + } + public void UpdateRenderScale(ReadOnlySpan scales, int totalCount, int fragmentCount) { _renderer.New().Set(_renderer.CopySpan(scales.Slice(0, totalCount)), totalCount, fragmentCount); diff --git a/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs b/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs index a9e3b75c4..e3d6cd568 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs @@ -329,6 +329,22 @@ namespace Ryujinx.Graphics.GAL.Multithreading } } + public ulong GetBufferGpuAddress(BufferHandle buffer) + { + if (IsGpuThread()) + { + ResultBox box = new ResultBox(); + New().Set(buffer, Ref(box)); + InvokeCommand(); + + return box.Result; + } + else + { + return _baseRenderer.GetBufferGpuAddress(Buffers.MapBufferBlocking(buffer)); + } + } + public Capabilities GetCapabilities() { ResultBox box = new ResultBox(); diff --git a/Ryujinx.Graphics.GAL/SupportBufferUpdater.cs b/Ryujinx.Graphics.GAL/SupportBufferUpdater.cs index 6eeddb6c0..34756aa74 100644 --- a/Ryujinx.Graphics.GAL/SupportBufferUpdater.cs +++ b/Ryujinx.Graphics.GAL/SupportBufferUpdater.cs @@ -44,6 +44,20 @@ namespace Ryujinx.Graphics.GAL } } + public void UpdatePageTableBasePointer(ulong address) + { + uint addrLow = (uint)address; + uint addrHigh = (uint)(address >> 32); + + if (Data.PageTableBasePointer.X != addrLow || Data.PageTableBasePointer.Y != addrHigh) + { + Data.PageTableBasePointer.X = addrLow; + Data.PageTableBasePointer.Y = addrHigh; + + MarkDirty(SupportBuffer.PageTableBasePointerOffset, sizeof(ulong)); + } + } + public void UpdateFragmentRenderScaleCount(int count) { if (Data.FragmentRenderScaleCount.X != count) diff --git a/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs b/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs index 2ac738fdf..b18f2b0b4 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs @@ -203,14 +203,22 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute } _channel.BufferManager.SetComputeBufferBindings(cs.Bindings); - _channel.TextureManager.SetComputeBindings(cs.Bindings); + if (info.UsesGlobalMemory) + { + _channel.BufferManager.SynchronizeComputeStorageBuffers(info.UsesGlobalMemoryWrite); + } + // Should never return false for mismatching spec state, since the shader was fetched above. _channel.TextureManager.CommitComputeBindings(cs.SpecializationState); - _channel.BufferManager.CommitComputeBindings(); + if (info.UsesGlobalMemory) + { + _channel.BufferManager.UpdatePageTable(); + } + _context.Renderer.Pipeline.DispatchCompute(qmd.CtaRasterWidth, qmd.CtaRasterHeight, qmd.CtaRasterDepth); _3dEngine.ForceShaderUpdate(); diff --git a/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs b/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs index b611f4e70..e0c488868 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs @@ -36,6 +36,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed private ProgramPipelineState _pipeline; + private uint _globalMemoryUseMask; + private uint _globalMemoryWriteMask; private bool _vsUsesDrawParameters; private bool _vtgWritesRtLayer; private byte _vsClipDistancesWritten; @@ -309,6 +311,13 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed UpdateStorageBuffers(); + bool usesGlobalMemory = _globalMemoryUseMask != 0; + + if (usesGlobalMemory) + { + _channel.BufferManager.SynchronizeGraphicsStorageBuffers(_globalMemoryUseMask, _globalMemoryWriteMask); + } + if (!_channel.TextureManager.CommitGraphicsBindings(_shaderSpecState) || (buffers.HasUnalignedStorageBuffers != hasUnaligned)) { _currentSpecState.SetHasUnalignedStorageBuffer(buffers.HasUnalignedStorageBuffers); @@ -317,6 +326,11 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed } _channel.BufferManager.CommitGraphicsBindings(); + + if (usesGlobalMemory) + { + _channel.BufferManager.UpdatePageTable(); + } } /// @@ -1295,9 +1309,27 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed UpdateShaderBindings(gs.Bindings); + _globalMemoryUseMask = 0; + _globalMemoryWriteMask = 0; + for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++) { - _currentProgramInfo[stageIndex] = gs.Shaders[stageIndex + 1]?.Info; + ShaderProgramInfo info = gs.Shaders[stageIndex + 1]?.Info; + + _currentProgramInfo[stageIndex] = info; + + if (info != null) + { + if (info.UsesGlobalMemory) + { + _globalMemoryUseMask |= 1u << stageIndex; + } + + if (info.UsesGlobalMemoryWrite) + { + _globalMemoryWriteMask |= 1u << stageIndex; + } + } } _context.Renderer.Pipeline.SetProgram(gs.HostProgram); diff --git a/Ryujinx.Graphics.Gpu/Memory/Buffer.cs b/Ryujinx.Graphics.Gpu/Memory/Buffer.cs index 842249f34..dab879ef3 100644 --- a/Ryujinx.Graphics.Gpu/Memory/Buffer.cs +++ b/Ryujinx.Graphics.Gpu/Memory/Buffer.cs @@ -66,6 +66,7 @@ namespace Ryujinx.Graphics.Gpu.Memory private bool _syncActionRegistered; private int _referenceCount = 1; + private ulong _hostGpuAddress; /// /// Creates a new instance of the buffer. @@ -167,6 +168,16 @@ namespace Ryujinx.Graphics.Gpu.Memory return new BufferRange(Handle, offset, (int)size); } + public ulong GetHostGpuAddress(ulong address) + { + if (_hostGpuAddress == 0) + { + _hostGpuAddress = _context.Renderer.GetBufferGpuAddress(Handle); + } + + return _hostGpuAddress + (address - Address); + } + /// /// Checks if a given range overlaps with the buffer. /// diff --git a/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs b/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs index 00f590831..fa65333f7 100644 --- a/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs +++ b/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs @@ -27,7 +27,6 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Must lock for any access from other threads. /// private readonly RangeList _buffers; - private Buffer[] _bufferOverlaps; private readonly Dictionary _dirtyCache; @@ -395,6 +394,11 @@ namespace Ryujinx.Graphics.Gpu.Memory return GetBuffer(address, size, write).GetRange(address, size); } + public ulong GetBufferHostGpuAddress(ulong address, ulong size, bool write = false) + { + return GetBuffer(address, size, write).GetHostGpuAddress(address); + } + /// /// Gets a buffer for a given memory range. /// A buffer overlapping with the specified range is assumed to already exist on the cache. @@ -431,13 +435,19 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Start address of the memory range /// Size in bytes of the memory range - public void SynchronizeBufferRange(ulong address, ulong size) + /// Whether the buffer will be written to by this use + public void SynchronizeBufferRange(ulong address, ulong size, bool write = false) { if (size != 0) { Buffer buffer = _buffers.FindFirstOverlap(address, size); buffer.SynchronizeMemory(address, size); + + if (write) + { + buffer.SignalModified(address, size); + } } } diff --git a/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs b/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs index 1728cdb58..5042f62ee 100644 --- a/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs +++ b/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs @@ -1,5 +1,6 @@ using Ryujinx.Common; using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.Types; using Ryujinx.Graphics.Gpu.Image; using Ryujinx.Graphics.Gpu.Shader; using Ryujinx.Graphics.Shader; @@ -107,6 +108,9 @@ namespace Ryujinx.Graphics.Gpu.Memory private bool _transformFeedbackBuffersDirty; private bool _rebind; + private bool _rebindPageTable; + + private BufferPageTable _bufferPageTable; /// /// Creates a new instance of the buffer manager. @@ -137,6 +141,8 @@ namespace Ryujinx.Graphics.Gpu.Memory _bufferTextures = new List(); _ranges = new BufferAssignment[Constants.TotalGpUniformBuffers * Constants.ShaderStages]; + + _bufferPageTable = new BufferPageTable(context); } @@ -438,7 +444,7 @@ namespace Ryujinx.Graphics.Gpu.Memory CommitBufferTextureBindings(); // Force rebind after doing compute work. - Rebind(); + Rebind(rebindPageTable: false); } /// @@ -747,8 +753,90 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Force all bound textures and images to be rebound the next time CommitBindings is called. /// public void Rebind() + { + Rebind(rebindPageTable: true); + } + + /// + /// Force all bound textures and images to be rebound the next time CommitBindings is called. + /// + /// Indicates that the page table needs to also be rebound + public void Rebind(bool rebindPageTable) { _rebind = true; + + if (rebindPageTable) + { + _rebindPageTable = true; + } + } + + public void SynchronizeComputeStorageBuffers(bool write) + { + MemoryManager memoryManager = _channel.MemoryManager; + + var bufferCache = memoryManager.Physical.BufferCache; + + for (int index = 0; index < 16; index++) + { + ulong sbDescAddress = GetComputeUniformBufferAddress(0); + + int sbDescOffset = 0x310 + index * 0x10; + + sbDescAddress += (ulong)sbDescOffset; + + SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read(sbDescAddress); + + ulong address = bufferCache.TranslateAndCreateBuffer(memoryManager, sbDescriptor.PackAddress(), (ulong)sbDescriptor.Size); + + if (address != 0) + { + bufferCache.SynchronizeBufferRange(address, (ulong)sbDescriptor.Size, write); + } + } + } + + public void SynchronizeGraphicsStorageBuffers(uint globalMemoryUseMask, uint globalMemoryWriteMask) + { + MemoryManager memoryManager = _channel.MemoryManager; + + var bufferCache = memoryManager.Physical.BufferCache; + + for (int stage = 0; stage < Constants.ShaderStages; stage++) + { + if ((globalMemoryUseMask & (1u << stage)) == 0) + { + continue; + } + + bool write = (globalMemoryWriteMask & (1u << stage)) != 0; + + for (int index = 0; index < 16; index++) + { + ulong sbDescAddress = GetGraphicsUniformBufferAddress(stage, 0); + + int sbDescOffset = 0x110 + stage * 0x100 + index * 0x10; + + sbDescAddress += (ulong)sbDescOffset; + + SbDescriptor sbDescriptor = memoryManager.Physical.Read(sbDescAddress); + + ulong address = bufferCache.TranslateAndCreateBuffer(memoryManager, sbDescriptor.PackAddress(), (ulong)sbDescriptor.Size); + + if (address != 0) + { + bufferCache.SynchronizeBufferRange(address, (ulong)sbDescriptor.Size, write); + } + } + } + } + + public void UpdatePageTable() + { + MemoryManager memoryManager = _channel.MemoryManager; + + _bufferPageTable.Update(memoryManager, _rebindPageTable); + _rebindPageTable = false; } } } diff --git a/Ryujinx.Graphics.Gpu/Memory/BufferPageTable.cs b/Ryujinx.Graphics.Gpu/Memory/BufferPageTable.cs new file mode 100644 index 000000000..2752e2853 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Memory/BufferPageTable.cs @@ -0,0 +1,201 @@ +using Ryujinx.Graphics.GAL; +using System; +using System.Numerics; +using System.Collections.Generic; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Memory +{ + class BufferPageTable + { + private const int PageBits = MemoryManager.PtPageBits; + private const ulong PageSize = MemoryManager.PageSize; + private const ulong PageMask = MemoryManager.PageMask; + + private const int AsBits = 40; + private const ulong AsSize = 1UL << AsBits; + private const int AsPtBits = AsBits - PageBits; + private const int AsPtLevels = 2; + private const int AsPtLevelBits = AsPtBits / AsPtLevels; + + private const int PtLevel0Shift = PageBits; + private const int PtLevel1Shift = PtLevel0Shift + AsPtLevelBits; + private const ulong PtLevelMask = (1UL << AsPtLevelBits) - 1; + + private readonly GpuContext _context; + + private struct BufferMapping + { + public readonly ulong CpuAddress; + public readonly ulong GpuAddress; + public readonly ulong Size; + + public BufferMapping(ulong cpuAddress, ulong gpuAddress, ulong size) + { + CpuAddress = cpuAddress; + GpuAddress = gpuAddress; + Size = size; + } + } + + private BufferMapping[] _mappings; + private BufferHandle _bufferMap; + private ulong _bufferMapHostGpuAddress; + private int _bufferMapSize; + + private readonly Dictionary _blockIdMap; + private readonly ulong[] _blockBitmap; + + private readonly int[] _idMap; + private bool _idMapDataDirty; + + public BufferPageTable(GpuContext context) + { + _context = context; + + _blockIdMap = new Dictionary(); + _blockBitmap = new ulong[((1 << AsPtLevelBits) + 63) / 64]; + + _idMap = new int[1 << AsPtLevelBits]; + } + + public void Update(MemoryManager memoryManager, bool forceUpdate) + { + BufferCache bufferCache = memoryManager.Physical.BufferCache; + + if (memoryManager.MappingsModified || forceUpdate) + { + Mapping[] mappings = memoryManager.GetMappings(); + + BufferMapping[] bufferMappings = new BufferMapping[mappings.Length]; + + for (int i = 0; i < mappings.Length; i++) + { + Mapping mapping = mappings[i]; + ulong cpuAddress = bufferCache.TranslateAndCreateBuffer(memoryManager, mapping.Address, mapping.Size); + + bufferMappings[i] = new BufferMapping(cpuAddress, mapping.Address, mapping.Size); + } + + _mappings = bufferMappings; + + for (int i = 0; i < bufferMappings.Length; i++) + { + BufferMapping mapping = bufferMappings[i]; + + ulong hostAddress = 0; + + if (mapping.CpuAddress != 0) + { + hostAddress = bufferCache.GetBufferHostGpuAddress(mapping.CpuAddress, mapping.Size); + } + + Map(hostAddress, mapping.GpuAddress, mapping.Size); + } + + if (_idMapDataDirty) + { + BufferHandle bufferMap = EnsureBufferMap(_idMap.Length * sizeof(int)); + _context.Renderer.SetBufferData(bufferMap, 0, MemoryMarshal.Cast(_idMap)); + + _idMapDataDirty = false; + } + + _context.Renderer.Pipeline.UpdatePageTableGpuAddress(_bufferMapHostGpuAddress); + } + } + + private void Map(ulong hostAddress, ulong guestAddress, ulong size) + { + ulong endGuestAddress = guestAddress + size; + ulong blockSize = PageSize << AsPtLevelBits; + + while (guestAddress < endGuestAddress) + { + ulong nextGuestAddress = (guestAddress + blockSize) & ~(blockSize - 1); + + ulong chunckSize = Math.Min(nextGuestAddress - guestAddress, endGuestAddress - guestAddress); + + int pages = (int)(chunckSize / PageSize); + + int blockRegionOffset = sizeof(uint) << AsPtLevelBits; + int blockOffset = GetBlockId(guestAddress) * (sizeof(ulong) << AsPtLevelBits); + int blockInnerOffset = (int)((guestAddress >> PtLevel0Shift) & PtLevelMask) * sizeof(ulong); + int baseOffset = blockRegionOffset + blockOffset + blockInnerOffset; + + ulong[] data = new ulong[pages]; + + for (int page = 0; page < pages; page++) + { + data[page] = hostAddress; + + if (hostAddress != 0) + { + hostAddress += PageSize; + } + } + + BufferHandle bufferMap = EnsureBufferMap(blockRegionOffset + blockOffset + (sizeof(ulong) << AsPtLevelBits)); + _context.Renderer.SetBufferData(bufferMap, baseOffset, MemoryMarshal.Cast(data)); + + guestAddress += chunckSize; + } + } + + private BufferHandle EnsureBufferMap(int requiredSize) + { + if (requiredSize > _bufferMapSize) + { + BufferHandle newBuffer = _context.Renderer.CreateBuffer(requiredSize); + + if (_bufferMap != BufferHandle.Null) + { + _context.Renderer.Pipeline.CopyBuffer(_bufferMap, newBuffer, 0, 0, _bufferMapSize); + _context.Renderer.DeleteBuffer(_bufferMap); + } + + _bufferMap = newBuffer; + _bufferMapHostGpuAddress = _context.Renderer.GetBufferGpuAddress(_bufferMap); + _bufferMapSize = requiredSize; + } + + return _bufferMap; + } + + private int GetBlockId(ulong address) + { + int blockIndex = (int)((address >> PtLevel1Shift) & PtLevelMask); + + if (!_blockIdMap.TryGetValue(blockIndex, out int mappedIndex)) + { + mappedIndex = AllocateNewBlock(_blockBitmap); + + _idMap[blockIndex] = mappedIndex << AsPtLevelBits; + _idMapDataDirty = true; + + _blockIdMap.Add(blockIndex, mappedIndex); + } + + return mappedIndex; + } + + private static int AllocateNewBlock(ulong[] bitmap) + { + for (int index = 0; index < bitmap.Length; index++) + { + ref ulong v = ref bitmap[index]; + + if (v == ulong.MaxValue) + { + continue; + } + + int firstFreeBit = BitOperations.TrailingZeroCount(~v); + v |= 1UL << firstFreeBit; + return index * 64 + firstFreeBit; + } + + throw new InvalidOperationException("No free space left on the texture or sampler table."); + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Memory/Mapping.cs b/Ryujinx.Graphics.Gpu/Memory/Mapping.cs new file mode 100644 index 000000000..e30d07bea --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Memory/Mapping.cs @@ -0,0 +1,22 @@ +using Ryujinx.Memory.Range; + +namespace Ryujinx.Graphics.Gpu.Memory +{ + struct Mapping : IRange + { + public ulong Address { get; } + public ulong Size { get; } + public ulong EndAddress => Address + Size; + + public bool OverlapsWith(ulong address, ulong size) + { + return Address < address + size && address < EndAddress; + } + + public Mapping(ulong address, ulong size) + { + Address = address; + Size = size; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs index 0ac6160d9..a3ee67b89 100644 --- a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs +++ b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs @@ -32,6 +32,9 @@ namespace Ryujinx.Graphics.Gpu.Memory private readonly ulong[][] _pageTable; + private readonly RangeList _mappings; + internal bool MappingsModified { get; private set; } + public event EventHandler MemoryUnmapped; /// @@ -53,6 +56,7 @@ namespace Ryujinx.Graphics.Gpu.Memory Physical = physicalMemory; CounterCache = new CounterCache(); _pageTable = new ulong[PtLvl0Size][]; + _mappings = new RangeList(); MemoryUnmapped += Physical.TextureCache.MemoryUnmappedHandler; MemoryUnmapped += Physical.BufferCache.MemoryUnmappedHandler; MemoryUnmapped += CounterCache.MemoryUnmappedHandler; @@ -379,6 +383,11 @@ namespace Ryujinx.Graphics.Gpu.Memory { lock (_pageTable) { + if (kind == PteKind.Pitch) + { + AddMapping(va, size); + } + MemoryUnmapped?.Invoke(this, new UnmapEventArgs(va, size)); for (ulong offset = 0; offset < size; offset += PageSize) @@ -397,6 +406,8 @@ namespace Ryujinx.Graphics.Gpu.Memory { lock (_pageTable) { + RemoveMapping(va, size); + // Event handlers are not expected to be thread safe. MemoryUnmapped?.Invoke(this, new UnmapEventArgs(va, size)); @@ -677,5 +688,79 @@ namespace Ryujinx.Graphics.Gpu.Memory { return pte & 0xffffffffffffffUL; } + + private void AddMapping(ulong va, ulong size) + { + lock (_mappings) + { + ulong startAddress = va; + ulong endAddress = va + size; + + Mapping[] overlaps = Array.Empty(); + + int overlapsCount = _mappings.FindOverlapsNonOverlapping(va, size, ref overlaps); + for (int i = 0; i < overlapsCount; i++) + { + Mapping overlap = overlaps[i]; + + if (overlap.Address < startAddress) + { + startAddress = overlap.Address; + } + + if (overlap.EndAddress > endAddress) + { + endAddress = overlap.EndAddress; + } + + _mappings.Remove(overlap); + } + + _mappings.Add(new Mapping(startAddress, endAddress - startAddress)); + MappingsModified = true; + } + } + + private void RemoveMapping(ulong va, ulong size) + { + lock (_mappings) + { + ulong endAddress = va + size; + + Mapping[] overlaps = Array.Empty(); + + int overlapsCount = _mappings.FindOverlapsNonOverlapping(va, size, ref overlaps); + for (int i = 0; i < overlapsCount; i++) + { + Mapping overlap = overlaps[i]; + + _mappings.Remove(overlap); + + if (overlap.Address < va) + { + _mappings.Add(new Mapping(overlap.Address, va - overlap.Address)); + } + + if (overlap.EndAddress > endAddress) + { + _mappings.Add(new Mapping(endAddress, overlap.EndAddress - endAddress)); + } + } + + if (overlapsCount != 0) + { + MappingsModified = true; + } + } + } + + internal Mapping[] GetMappings() + { + lock (_mappings) + { + MappingsModified = false; + return _mappings.ToArray(); + } + } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs index 2bdb85bf0..d66f55cfe 100644 --- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs @@ -140,6 +140,16 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache /// public ShaderStage Stage; + /// + /// Indicates if the shader uses instructions that access global memory, such as LDG, STG and ATOM. + /// + public bool UsesGlobalMemory; + + /// + /// Indicates if the shader uses instructions that modify global memory, such as STG and ATOM. + /// + public bool UsesGlobalMemoryWrite; + /// /// Indicates if the shader accesses the Instance ID built-in variable. /// @@ -775,6 +785,8 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache textures, images, dataInfo.Stage, + dataInfo.UsesGlobalMemory, + dataInfo.UsesGlobalMemoryWrite, dataInfo.UsesInstanceId, dataInfo.UsesDrawParameters, dataInfo.UsesRtLayer, @@ -801,6 +813,8 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache dataInfo.TexturesCount = (ushort)info.Textures.Count; dataInfo.ImagesCount = (ushort)info.Images.Count; dataInfo.Stage = info.Stage; + dataInfo.UsesGlobalMemory = info.UsesGlobalMemory; + dataInfo.UsesGlobalMemoryWrite = info.UsesGlobalMemoryWrite; dataInfo.UsesInstanceId = info.UsesInstanceId; dataInfo.UsesDrawParameters = info.UsesDrawParameters; dataInfo.UsesRtLayer = info.UsesRtLayer; diff --git a/Ryujinx.Graphics.Gpu/Shader/ResourceCounts.cs b/Ryujinx.Graphics.Gpu/Shader/ResourceCounts.cs index b85423cb3..481616517 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ResourceCounts.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ResourceCounts.cs @@ -31,6 +31,7 @@ namespace Ryujinx.Graphics.Gpu.Shader public ResourceCounts() { UniformBuffersCount = 1; // The first binding is reserved for the support buffer. + StorageBuffersCount = 1; // The first binding is reserved for the buffer mappings table for GPU address translation. } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.OpenGL/Buffer.cs b/Ryujinx.Graphics.OpenGL/Buffer.cs index 68c82f955..d34bf88e4 100644 --- a/Ryujinx.Graphics.OpenGL/Buffer.cs +++ b/Ryujinx.Graphics.OpenGL/Buffer.cs @@ -73,6 +73,14 @@ namespace Ryujinx.Graphics.OpenGL } } + public static ulong GetGpuAddress(BufferHandle handle) + { + GL.BindBuffer(BufferTarget.CopyWriteBuffer, handle.ToInt32()); + GL.NV.MakeBufferResident((NvShaderBufferLoad)BufferTarget.CopyWriteBuffer, (NvShaderBufferLoad)All.ReadWrite); + GL.NV.GetBufferParameter(BufferTargetArb.CopyWriteBuffer, NvShaderBufferLoad.BufferGpuAddressNv, out ulong gpuAddress); + return gpuAddress; + } + public static void Resize(BufferHandle handle, int size) { GL.BindBuffer(BufferTarget.CopyWriteBuffer, handle.ToInt32()); diff --git a/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs b/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs index 9e008b33e..4b7f91daa 100644 --- a/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs +++ b/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs @@ -99,6 +99,11 @@ namespace Ryujinx.Graphics.OpenGL return Buffer.GetData(this, buffer, offset, size); } + public ulong GetBufferGpuAddress(BufferHandle buffer) + { + return Buffer.GetGpuAddress(buffer); + } + public Capabilities GetCapabilities() { return new Capabilities( diff --git a/Ryujinx.Graphics.OpenGL/Pipeline.cs b/Ryujinx.Graphics.OpenGL/Pipeline.cs index 8bcaf4c77..6b4d4e050 100644 --- a/Ryujinx.Graphics.OpenGL/Pipeline.cs +++ b/Ryujinx.Graphics.OpenGL/Pipeline.cs @@ -1537,6 +1537,11 @@ namespace Ryujinx.Graphics.OpenGL return (_boundDrawFramebuffer, _boundReadFramebuffer); } + public void UpdatePageTableGpuAddress(ulong address) + { + _supportBuffer.UpdatePageTableBasePointer(address); + } + public void UpdateRenderScale(ReadOnlySpan scales, int totalCount, int fragmentCount) { bool changed = false; diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs index c6e3b3390..b35493765 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs @@ -59,6 +59,22 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl context.AppendLine("#extension GL_NV_geometry_shader_passthrough : enable"); } + if ((info.HelperFunctionsMask & HelperFunctionsMask.GlobalMemory) != 0) + { + context.AppendLine("#extension GL_EXT_shader_16bit_storage : enable"); + context.AppendLine("#extension GL_EXT_shader_8bit_storage : enable"); + + if (context.Config.Options.TargetApi == TargetApi.Vulkan) + { + context.AppendLine("#extension GL_EXT_buffer_reference : enable"); + context.AppendLine("#extension GL_EXT_buffer_reference_uvec2 : enable"); + } + else + { + context.AppendLine("#extension GL_NV_shader_buffer_load : enable"); + } + } + context.AppendLine("#pragma optionNV(fastmath off)"); context.AppendLine(); @@ -241,37 +257,34 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl bool isFragment = context.Config.Stage == ShaderStage.Fragment; - if (isFragment || context.Config.Stage == ShaderStage.Compute || context.Config.Stage == ShaderStage.Vertex) + if (isFragment && context.Config.GpuAccessor.QueryEarlyZForce()) { - if (isFragment && context.Config.GpuAccessor.QueryEarlyZForce()) + context.AppendLine("layout(early_fragment_tests) in;"); + context.AppendLine(); + } + + if ((context.Config.UsedFeatures & (FeatureFlags.FragCoordXY | FeatureFlags.IntegerSampling)) != 0) + { + string stage = OperandManager.GetShaderStagePrefix(context.Config.Stage); + + int scaleElements = context.Config.GetTextureDescriptors().Length + context.Config.GetImageDescriptors().Length; + + if (isFragment) { - context.AppendLine("layout(early_fragment_tests) in;"); + scaleElements++; // Also includes render target scale, for gl_FragCoord. + } + + DeclareSupportUniformBlock(context, info, context.Config.Stage, scaleElements); + + if (context.Config.UsedFeatures.HasFlag(FeatureFlags.IntegerSampling) && scaleElements != 0) + { + AppendHelperFunction(context, $"Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_{stage}.glsl"); context.AppendLine(); } - - if ((context.Config.UsedFeatures & (FeatureFlags.FragCoordXY | FeatureFlags.IntegerSampling)) != 0) - { - string stage = OperandManager.GetShaderStagePrefix(context.Config.Stage); - - int scaleElements = context.Config.GetTextureDescriptors().Length + context.Config.GetImageDescriptors().Length; - - if (isFragment) - { - scaleElements++; // Also includes render target scale, for gl_FragCoord. - } - - DeclareSupportUniformBlock(context, context.Config.Stage, scaleElements); - - if (context.Config.UsedFeatures.HasFlag(FeatureFlags.IntegerSampling) && scaleElements != 0) - { - AppendHelperFunction(context, $"Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_{stage}.glsl"); - context.AppendLine(); - } - } - else if (isFragment || context.Config.Stage == ShaderStage.Vertex) - { - DeclareSupportUniformBlock(context, context.Config.Stage, 0); - } + } + else + { + DeclareSupportUniformBlock(context, info, context.Config.Stage, 0); } if ((info.HelperFunctionsMask & HelperFunctionsMask.AtomicMinMaxS32Shared) != 0) @@ -284,6 +297,13 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl"); } + if ((info.HelperFunctionsMask & HelperFunctionsMask.GlobalMemory) != 0) + { + AppendHelperFunction(context, context.Config.Options.TargetApi == TargetApi.Vulkan + ? "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/GlobalMemoryVk.glsl" + : "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/GlobalMemory.glsl"); + } + if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighS32) != 0) { AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl"); @@ -672,9 +692,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl context.AppendLine($"layout (location = {location}) patch out vec4 {name};"); } - private static void DeclareSupportUniformBlock(CodeGenContext context, ShaderStage stage, int scaleElements) + private static void DeclareSupportUniformBlock(CodeGenContext context, StructuredProgramInfo info, ShaderStage stage, int scaleElements) { bool needsSupportBlock = stage == ShaderStage.Fragment || + (info.HelperFunctionsMask & HelperFunctionsMask.GlobalMemory) != 0 || (context.Config.LastInVertexPipeline && context.Config.GpuAccessor.QueryViewportTransformDisable()); if (!needsSupportBlock && scaleElements == 0) @@ -700,6 +721,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl } context.AppendLine($"float {DefaultNames.SupportBlockRenderScaleName}[{SupportBuffer.RenderScaleMaxCount}];"); + context.AppendLine($"uvec4 {DefaultNames.SupportBlockPageTableBasePointerName};"); context.LeaveScope(";"); context.AppendLine(); diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs index 3ab4814ce..9cc575e87 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs @@ -21,6 +21,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl public const string SupportBlockViewportInverse = "s_viewport_inverse"; public const string SupportBlockFragmentScaleCount = "s_frag_scale_count"; public const string SupportBlockRenderScaleName = "s_render_scale"; + public const string SupportBlockPageTableBasePointerName = "s_page_table"; public const string BlockSuffix = "block"; diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/GlobalMemory.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/GlobalMemory.glsl new file mode 100644 index 000000000..9b2a52fe5 --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/GlobalMemory.glsl @@ -0,0 +1,36 @@ +uvec2 Helper_TranslateAddress(uvec2 address) +{ + uvec4* buffer_regions = (uvec4*)packPtr(s_page_table.xy); + + uint64_t address64 = packUint2x32(address); + uint count = buffer_regions[0].x; + uint left = 0; + uint right = count; + + while (left != right) + { + uint middle = left + ((right - left) >> 1); + uint offset = middle * 2; + uvec4 guest_info = buffer_regions[1 + offset]; + uvec4 host_info = buffer_regions[2 + offset]; + + uint64_t start_address = packUint2x32(guest_info.xy); + uint64_t end_address = packUint2x32(guest_info.zw); + if (address64 >= start_address && address64 < end_address) + { + uint64_t host_address = packUint2x32(host_info.xy); + return unpackUint2x32((address64 - start_address) + host_address); + } + + if (address64 < start_address) + { + right = middle; + } + else + { + left = middle + 1; + } + } + + return uvec2(0, 0); +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/GlobalMemoryVk.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/GlobalMemoryVk.glsl new file mode 100644 index 000000000..6031e8723 --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/GlobalMemoryVk.glsl @@ -0,0 +1,34 @@ +layout (buffer_reference, std430, buffer_reference_align = 8) buffer buffer_regions_block +{ + uint blockIndices[1 << 14]; + uvec2 pointers[]; +}; + +layout (buffer_reference, std430, buffer_reference_align = 1) buffer uint8_t_ptr +{ + uint8_t value; +}; + +layout (buffer_reference, std430, buffer_reference_align = 2) buffer uint16_t_ptr +{ + uint16_t value; +}; + +layout (buffer_reference, std430, buffer_reference_align = 4) buffer uint_ptr +{ + uint value; +}; + +uvec2 Helper_TranslateAddress(uvec2 address) +{ + buffer_regions_block br = buffer_regions_block(s_page_table.xy); + + uint l0 = (address.x >> 12) & 0x3fff; + uint l1 = ((address.x >> 26) & 0x3f) | ((address.y << 6) & 0x3fc0); + + uvec2 hostAddress = br.pointers[br.blockIndices[l1] + l0]; + + hostAddress.x += (address.x & 0xfff); + + return hostAddress; +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs index 54f35b15a..3cb420f07 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs @@ -18,5 +18,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl public static string StoreShared8 = "Helper_StoreShared8"; public static string StoreStorage16 = "Helper_StoreStorage16"; public static string StoreStorage8 = "Helper_StoreStorage8"; + + public static string TranslateAddress = "Helper_TranslateAddress"; } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs index b890b0158..f76d262c3 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs @@ -87,6 +87,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions switch (memRegion) { + case Instruction.MrGlobal: args += LoadGlobal(context, operation); break; case Instruction.MrShared: args += LoadShared(context, operation); break; case Instruction.MrStorage: args += LoadStorage(context, operation); break; @@ -170,6 +171,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions case Instruction.LoadConstant: return LoadConstant(context, operation); + case Instruction.LoadGlobal: + return LoadGlobal(context, operation); + case Instruction.LoadLocal: return LoadLocal(context, operation); @@ -194,6 +198,15 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions case Instruction.StoreAttribute: return StoreAttribute(context, operation); + case Instruction.StoreGlobal: + return StoreGlobal(context, operation); + + case Instruction.StoreGlobal16: + return StoreGlobal16(context, operation); + + case Instruction.StoreGlobal8: + return StoreGlobal8(context, operation); + case Instruction.StoreLocal: return StoreLocal(context, operation); diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs index c40f96f11..60317a066 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs @@ -83,6 +83,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions Add(Instruction.IsNan, InstType.CallUnary, "isnan"); Add(Instruction.LoadAttribute, InstType.Special); Add(Instruction.LoadConstant, InstType.Special); + Add(Instruction.LoadGlobal, InstType.Special); Add(Instruction.LoadLocal, InstType.Special); Add(Instruction.LoadShared, InstType.Special); Add(Instruction.LoadStorage, InstType.Special); @@ -118,6 +119,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions Add(Instruction.Sine, InstType.CallUnary, "sin"); Add(Instruction.SquareRoot, InstType.CallUnary, "sqrt"); Add(Instruction.StoreAttribute, InstType.Special); + Add(Instruction.StoreGlobal, InstType.Special); Add(Instruction.StoreLocal, InstType.Special); Add(Instruction.StoreShared, InstType.Special); Add(Instruction.StoreShared16, InstType.Special); diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs index 022e3a444..94225df9c 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs @@ -1,5 +1,6 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation; using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; using System; using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper; @@ -238,6 +239,24 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions } } + public static string LoadGlobal(CodeGenContext context, AstOperation operation) + { + IAstNode src1 = operation.GetSource(0); + IAstNode src2 = operation.GetSource(1); + + string addressLowExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + string addressHighExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1)); + + if (context.Config.Options.TargetApi == TargetApi.Vulkan) + { + return $"uint_ptr({HelperFunctionNames.TranslateAddress}(uvec2({addressLowExpr}, {addressHighExpr}))).value"; + } + else + { + return $"*(uint*)packPtr({HelperFunctionNames.TranslateAddress}(uvec2({addressLowExpr}, {addressHighExpr})))"; + } + } + public static string LoadLocal(CodeGenContext context, AstOperation operation) { return LoadLocalOrShared(context, operation, DefaultNames.LocalMemoryName); @@ -345,6 +364,44 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions return $"{attrName} = {value}"; } + public static string StoreGlobal(CodeGenContext context, AstOperation operation) + { + return StoreGlobal(context, operation, "uint"); + } + + public static string StoreGlobal16(CodeGenContext context, AstOperation operation) + { + return StoreGlobal(context, operation, "uint16_t"); + } + + public static string StoreGlobal8(CodeGenContext context, AstOperation operation) + { + return StoreGlobal(context, operation, "uint8_t"); + } + + private static string StoreGlobal(CodeGenContext context, AstOperation operation, string type) + { + IAstNode src1 = operation.GetSource(0); + IAstNode src2 = operation.GetSource(1); + IAstNode src3 = operation.GetSource(2); + + string addressLowExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + string addressHighExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1)); + + VariableType srcType = OperandManager.GetNodeDestType(context, src3); + + string src = TypeConversion.ReinterpretCast(context, src3, srcType, VariableType.U32); + + if (context.Config.Options.TargetApi == TargetApi.Vulkan) + { + return $"{type}_ptr({HelperFunctionNames.TranslateAddress}(uvec2({addressLowExpr}, {addressHighExpr}))).value = {src}"; + } + else + { + return $"*({type}*)packPtr({HelperFunctionNames.TranslateAddress}(uvec2({addressLowExpr}, {addressHighExpr}))) = {src}"; + } + } + public static string StoreLocal(CodeGenContext context, AstOperation operation) { return StoreLocalOrShared(context, operation, DefaultNames.LocalMemoryName); diff --git a/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs b/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs index dff5474a1..b6ed4f30c 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs @@ -24,6 +24,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv public int InputVertices { get; } public Dictionary UniformBuffers { get; } = new Dictionary(); + public Instruction PageTablePointerType { get; set; } public Instruction SupportBuffer { get; set; } public Instruction UniformBuffersArray { get; set; } public Instruction StorageBuffersArray { get; set; } diff --git a/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs b/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs index 54b00708b..db44138fb 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs @@ -101,7 +101,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv DeclareLocalMemory(context, localMemorySize); } - DeclareSupportBuffer(context); + DeclareSupportBuffer(context, info); DeclareUniformBuffers(context, context.Config.GetConstantBufferDescriptors()); DeclareStorageBuffers(context, context.Config.GetStorageBufferDescriptors()); DeclareSamplers(context, context.Config.GetTextureDescriptors()); @@ -133,9 +133,11 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv return variable; } - private static void DeclareSupportBuffer(CodeGenContext context) + private static void DeclareSupportBuffer(CodeGenContext context, StructuredProgramInfo info) { - if (!context.Config.Stage.SupportsRenderScale() && !(context.Config.LastInVertexPipeline && context.Config.GpuAccessor.QueryViewportTransformDisable())) + if (!context.Config.Stage.SupportsRenderScale() && + (info.HelperFunctionsMask & HelperFunctionsMask.GlobalMemory) == 0 && + !(context.Config.LastInVertexPipeline && context.Config.GpuAccessor.QueryViewportTransformDisable())) { return; } @@ -143,17 +145,26 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv var isBgraArrayType = context.TypeArray(context.TypeU32(), context.Constant(context.TypeU32(), SupportBuffer.FragmentIsBgraCount)); var viewportInverseVectorType = context.TypeVector(context.TypeFP32(), 4); var renderScaleArrayType = context.TypeArray(context.TypeFP32(), context.Constant(context.TypeU32(), SupportBuffer.RenderScaleMaxCount)); + var pageTablePointerVectorType = context.TypeVector(context.TypeU32(), 4); context.Decorate(isBgraArrayType, Decoration.ArrayStride, (LiteralInteger)SupportBuffer.FieldSize); context.Decorate(renderScaleArrayType, Decoration.ArrayStride, (LiteralInteger)SupportBuffer.FieldSize); - var supportBufferStructType = context.TypeStruct(false, context.TypeU32(), isBgraArrayType, viewportInverseVectorType, context.TypeS32(), renderScaleArrayType); + var supportBufferStructType = context.TypeStruct( + false, + context.TypeU32(), + isBgraArrayType, + viewportInverseVectorType, + context.TypeS32(), + renderScaleArrayType, + pageTablePointerVectorType); context.MemberDecorate(supportBufferStructType, 0, Decoration.Offset, (LiteralInteger)SupportBuffer.FragmentAlphaTestOffset); context.MemberDecorate(supportBufferStructType, 1, Decoration.Offset, (LiteralInteger)SupportBuffer.FragmentIsBgraOffset); context.MemberDecorate(supportBufferStructType, 2, Decoration.Offset, (LiteralInteger)SupportBuffer.ViewportInverseOffset); context.MemberDecorate(supportBufferStructType, 3, Decoration.Offset, (LiteralInteger)SupportBuffer.FragmentRenderScaleCountOffset); context.MemberDecorate(supportBufferStructType, 4, Decoration.Offset, (LiteralInteger)SupportBuffer.GraphicsRenderScaleOffset); + context.MemberDecorate(supportBufferStructType, 5, Decoration.Offset, (LiteralInteger)SupportBuffer.PageTableBasePointerOffset); context.Decorate(supportBufferStructType, Decoration.Block); var supportBufferPointerType = context.TypePointer(StorageClass.Uniform, supportBufferStructType); @@ -165,6 +176,23 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv context.AddGlobalVariable(supportBufferVariable); context.SupportBuffer = supportBufferVariable; + + if ((info.HelperFunctionsMask & HelperFunctionsMask.GlobalMemory) != 0) + { + var blockArrayType = context.TypeArray(context.TypeU32(), context.Constant(context.TypeU32(), 1 << 14)); + var pointerArrayType = context.TypeRuntimeArray(context.TypeVector(context.TypeU32(), 2)); + + context.Decorate(blockArrayType, Decoration.ArrayStride, (LiteralInteger)4); + context.Decorate(pointerArrayType, Decoration.ArrayStride, (LiteralInteger)8); + + var ptStructType = context.TypeStruct(false, blockArrayType, pointerArrayType); + + context.MemberDecorate(ptStructType, 0, Decoration.Offset, (LiteralInteger)0); + context.MemberDecorate(ptStructType, 1, Decoration.Offset, (LiteralInteger)((1 << 14) * sizeof(uint))); + context.Decorate(ptStructType, Decoration.Block); + + context.PageTablePointerType = context.TypePointer(StorageClass.PhysicalStorageBuffer, ptStructType); + } } private static void DeclareUniformBuffers(CodeGenContext context, BufferDescriptor[] descriptors) diff --git a/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs b/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs index d4a3102e2..29f0a9972 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs @@ -97,6 +97,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv Add(Instruction.IsNan, GenerateIsNan); Add(Instruction.LoadAttribute, GenerateLoadAttribute); Add(Instruction.LoadConstant, GenerateLoadConstant); + Add(Instruction.LoadGlobal, GenerateLoadGlobal); Add(Instruction.LoadLocal, GenerateLoadLocal); Add(Instruction.LoadShared, GenerateLoadShared); Add(Instruction.LoadStorage, GenerateLoadStorage); @@ -132,6 +133,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv Add(Instruction.Sine, GenerateSine); Add(Instruction.SquareRoot, GenerateSquareRoot); Add(Instruction.StoreAttribute, GenerateStoreAttribute); + Add(Instruction.StoreGlobal, GenerateStoreGlobal); + // Add(Instruction.StoreGlobal16, GenerateStoreGlobal16); + // Add(Instruction.StoreGlobal8, GenerateStoreGlobal8); Add(Instruction.StoreLocal, GenerateStoreLocal); Add(Instruction.StoreShared, GenerateStoreShared); Add(Instruction.StoreShared16, GenerateStoreShared16); @@ -954,6 +958,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv return new OperationResult(AggregateType.FP32, value); } + private static OperationResult GenerateLoadGlobal(CodeGenContext context, AstOperation operation) + { + var elemPointer = GetGlobalElemPointer(context, operation, context.TypeU32()); + var value = context.Load(context.TypeU32(), elemPointer, MemoryAccessMask.Aligned, 4); + + return new OperationResult(AggregateType.U32, value); + } + private static OperationResult GenerateLoadLocal(CodeGenContext context, AstOperation operation) { return GenerateLoadLocalOrShared(context, operation, StorageClass.Private, context.LocalMemory); @@ -1370,6 +1382,30 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv return OperationResult.Invalid; } + private static OperationResult GenerateStoreGlobal(CodeGenContext context, AstOperation operation) + { + var elemPointer = GetGlobalElemPointer(context, operation, context.TypeU32()); + context.Store(elemPointer, context.Get(AggregateType.U32, operation.GetSource(2)), MemoryAccessMask.Aligned, 4); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateStoreGlobal16(CodeGenContext context, AstOperation operation) + { + var elemPointer = GetGlobalElemPointer(context, operation, context.TypeInt(16, 0)); + context.Store(elemPointer, context.Get(AggregateType.U32, operation.GetSource(2)), MemoryAccessMask.Aligned, 2); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateStoreGlobal8(CodeGenContext context, AstOperation operation) + { + var elemPointer = GetGlobalElemPointer(context, operation, context.TypeInt(8, 0)); + context.Store(elemPointer, context.Get(AggregateType.U32, operation.GetSource(2)), MemoryAccessMask.Aligned, 1); + + return OperationResult.Invalid; + } + private static OperationResult GenerateStoreLocal(CodeGenContext context, AstOperation operation) { return GenerateStoreLocalOrShared(context, operation, StorageClass.Private, context.LocalMemory); @@ -1930,7 +1966,11 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv SpvInstruction elemPointer; Instruction mr = operation.Inst & Instruction.MrMask; - if (mr == Instruction.MrStorage) + if (mr == Instruction.MrGlobal) + { + elemPointer = GetGlobalElemPointer(context, operation, context.TypeU32()); + } + else if (mr == Instruction.MrStorage) { elemPointer = GetStorageElemPointer(context, operation); } @@ -1958,7 +1998,11 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv SpvInstruction elemPointer; Instruction mr = operation.Inst & Instruction.MrMask; - if (mr == Instruction.MrStorage) + if (mr == Instruction.MrGlobal) + { + elemPointer = GetGlobalElemPointer(context, operation, context.TypeU32()); + } + else if (mr == Instruction.MrStorage) { elemPointer = GetStorageElemPointer(context, operation); } @@ -2041,6 +2085,60 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv context.AddLabel(loopEnd); } + private static SpvInstruction GetGlobalElemPointer(CodeGenContext context, AstOperation operation, SpvInstruction elemType) + { + var vec4UintType = context.TypeVector(context.TypeU32(), 4); + var ptBasePointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, vec4UintType), context.SupportBuffer, context.Constant(context.TypeU32(), 5)); + ptBasePointer = context.Load(vec4UintType, ptBasePointer); + ptBasePointer = context.VectorShuffle(context.TypeVector(context.TypeU32(), 2), ptBasePointer, ptBasePointer, 0, 1); + ptBasePointer = context.Bitcast(context.PageTablePointerType, ptBasePointer); + + var addrLow = context.Get(AggregateType.U32, operation.GetSource(0)); + var addrHigh = context.Get(AggregateType.U32, operation.GetSource(1)); + + // uint l0 = (addrLow >> 12) & 0x3fff; + // uint l1 = ((addrLow >> 26) & 0x3f) | ((addrHigh << 6) & 0x3fc0); + var l0 = ShiftRightAndMask(context, addrLow, 12, 0x3fff); + var l1 = context.BitwiseOr(context.TypeU32(), + ShiftRightAndMask(context, addrLow, 26, 0x3f), + ShiftLeftAndMask(context, addrHigh, 6, 0x3fc0)); + + var blockIndexPointerType = context.TypePointer(StorageClass.PhysicalStorageBuffer, context.TypeU32()); + var blockIndex = context.AccessChain(blockIndexPointerType, ptBasePointer, context.Constant(context.TypeS32(), 0), l1); + blockIndex = context.Load(context.TypeU32(), blockIndex, MemoryAccessMask.Aligned, 4); + + var offset = context.IAdd(context.TypeU32(), blockIndex, l0); + + var vec2UintType = context.TypeVector(context.TypeU32(), 2); + var vec2UintPointerType = context.TypePointer(StorageClass.PhysicalStorageBuffer, vec2UintType); + var hostPointer = context.AccessChain(vec2UintPointerType, ptBasePointer, context.Constant(context.TypeS32(), 1), offset); + hostPointer = context.Load(vec2UintType, hostPointer, MemoryAccessMask.Aligned, 8); + + var pageOffset = context.BitwiseAnd(context.TypeU32(), addrLow, context.Constant(context.TypeU32(), 0xfff)); + + var hostPointerLow = context.IAdd(context.TypeU32(), context.CompositeExtract(context.TypeU32(), hostPointer, 0), pageOffset); + var hostPointerHigh = context.CompositeExtract(context.TypeU32(), hostPointer, 1); + + hostPointer = context.CompositeConstruct(vec2UintType, hostPointerLow, hostPointerHigh); + + var elemStructType = context.TypeStruct(false, elemType); + var elemStructPointerType = context.TypePointer(StorageClass.PhysicalStorageBuffer, elemStructType); + var elemPointerType = context.TypePointer(StorageClass.PhysicalStorageBuffer, elemType); + return context.AccessChain(elemPointerType, context.Bitcast(elemStructPointerType, hostPointer), context.Constant(context.TypeS32(), 0)); + } + + private static SpvInstruction ShiftLeftAndMask(CodeGenContext context, SpvInstruction value, int shift, int mask) + { + value = context.ShiftLeftLogical(context.TypeU32(), value, context.Constant(context.TypeS32(), shift)); + return context.BitwiseAnd(context.TypeU32(), value, context.Constant(context.TypeU32(), mask)); + } + + private static SpvInstruction ShiftRightAndMask(CodeGenContext context, SpvInstruction value, int shift, int mask) + { + value = context.ShiftRightLogical(context.TypeU32(), value, context.Constant(context.TypeS32(), shift)); + return context.BitwiseAnd(context.TypeU32(), value, context.Constant(context.TypeU32(), mask)); + } + private static SpvInstruction GetStorageElemPointer(CodeGenContext context, AstOperation operation) { var sbVariable = context.StorageBuffersArray; diff --git a/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs b/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs index 6e1db972d..d0064bdc6 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs @@ -94,6 +94,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv context.AddCapability(Capability.DrawParameters); } + if ((info.HelperFunctionsMask & HelperFunctionsMask.GlobalMemory) != 0) + { + context.AddCapability(Capability.PhysicalStorageBufferAddresses); + + context.AddExtension("SPV_KHR_physical_storage_buffer"); + } + + context.AddExtension("SPV_KHR_shader_ballot"); + context.AddExtension("SPV_KHR_subgroup_vote"); + Declarations.DeclareAll(context, info); if ((info.HelperFunctionsMask & NeedsInvocationIdMask) != 0) diff --git a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs index 380c425e5..a32691f33 100644 --- a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs +++ b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs @@ -234,11 +234,6 @@ namespace Ryujinx.Graphics.Shader.Decoders op = InstTable.GetOp(address, opCode); - if (op.Props.HasFlag(InstProps.TexB)) - { - config.SetUsedFeature(FeatureFlags.Bindless); - } - if (op.Name == InstName.Ald || op.Name == InstName.Ast || op.Name == InstName.Ipa) { SetUserAttributeUses(config, op.Name, opCode); diff --git a/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj b/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj index 3434e2a81..a500ed073 100644 --- a/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj +++ b/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj @@ -16,6 +16,8 @@ + + diff --git a/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs b/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs index bb75b10ae..253714968 100644 --- a/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs +++ b/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs @@ -11,6 +11,8 @@ namespace Ryujinx.Graphics.Shader public ReadOnlyCollection Images { get; } public ShaderStage Stage { get; } + public bool UsesGlobalMemory { get; } + public bool UsesGlobalMemoryWrite { get; } public bool UsesInstanceId { get; } public bool UsesDrawParameters { get; } public bool UsesRtLayer { get; } @@ -23,6 +25,8 @@ namespace Ryujinx.Graphics.Shader TextureDescriptor[] textures, TextureDescriptor[] images, ShaderStage stage, + bool usesGlobalMemory, + bool usesGlobalMemoryWrite, bool usesInstanceId, bool usesDrawParameters, bool usesRtLayer, @@ -35,6 +39,8 @@ namespace Ryujinx.Graphics.Shader Images = Array.AsReadOnly(images); Stage = stage; + UsesGlobalMemory = usesGlobalMemory; + UsesGlobalMemoryWrite = usesGlobalMemoryWrite; UsesInstanceId = usesInstanceId; UsesDrawParameters = usesDrawParameters; UsesRtLayer = usesRtLayer; diff --git a/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs b/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs index d45f8d4ee..d1e738a78 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs @@ -7,15 +7,16 @@ namespace Ryujinx.Graphics.Shader.StructuredIr { AtomicMinMaxS32Shared = 1 << 0, AtomicMinMaxS32Storage = 1 << 1, - MultiplyHighS32 = 1 << 2, - MultiplyHighU32 = 1 << 3, - Shuffle = 1 << 4, - ShuffleDown = 1 << 5, - ShuffleUp = 1 << 6, - ShuffleXor = 1 << 7, - StoreSharedSmallInt = 1 << 8, - StoreStorageSmallInt = 1 << 9, - SwizzleAdd = 1 << 10, - FSI = 1 << 11 + GlobalMemory = 1 << 2, + MultiplyHighS32 = 1 << 3, + MultiplyHighU32 = 1 << 4, + Shuffle = 1 << 5, + ShuffleDown = 1 << 6, + ShuffleUp = 1 << 7, + ShuffleXor = 1 << 8, + StoreSharedSmallInt = 1 << 9, + StoreStorageSmallInt = 1 << 10, + SwizzleAdd = 1 << 11, + FSI = 1 << 12 } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs index 7678a4bf6..415fd4437 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs @@ -223,6 +223,35 @@ namespace Ryujinx.Graphics.Shader.StructuredIr case Instruction.AtomicMinS32 | Instruction.MrStorage: context.Info.HelperFunctionsMask |= HelperFunctionsMask.AtomicMinMaxS32Storage; break; + case Instruction.AtomicAdd | Instruction.MrGlobal: + case Instruction.AtomicAnd | Instruction.MrGlobal: + case Instruction.AtomicCompareAndSwap | Instruction.MrGlobal: + case Instruction.AtomicMaxS32 | Instruction.MrGlobal: + case Instruction.AtomicMaxU32 | Instruction.MrGlobal: + case Instruction.AtomicMinS32 | Instruction.MrGlobal: + case Instruction.AtomicMinU32 | Instruction.MrGlobal: + case Instruction.AtomicOr | Instruction.MrGlobal: + case Instruction.AtomicSwap | Instruction.MrGlobal: + case Instruction.AtomicXor | Instruction.MrGlobal: + context.Config.SetUsedFeature(FeatureFlags.GlobalMemory); + context.Config.SetUsedFeature(FeatureFlags.GlobalMemoryWrite); + context.Info.HelperFunctionsMask |= HelperFunctionsMask.GlobalMemory; + break; + case Instruction.LoadGlobal: + case Instruction.StoreGlobal: + case Instruction.StoreGlobal16: + case Instruction.StoreGlobal8: + context.Config.SetUsedFeature(FeatureFlags.GlobalMemory); + + if (operation.Inst == Instruction.StoreGlobal || + operation.Inst == Instruction.StoreGlobal16 || + operation.Inst == Instruction.StoreGlobal8) + { + context.Config.SetUsedFeature(FeatureFlags.GlobalMemoryWrite); + } + + context.Info.HelperFunctionsMask |= HelperFunctionsMask.GlobalMemory; + break; case Instruction.MultiplyHighS32: context.Info.HelperFunctionsMask |= HelperFunctionsMask.MultiplyHighS32; break; diff --git a/Ryujinx.Graphics.Shader/SupportBuffer.cs b/Ryujinx.Graphics.Shader/SupportBuffer.cs index 5fe993278..9f538f7ff 100644 --- a/Ryujinx.Graphics.Shader/SupportBuffer.cs +++ b/Ryujinx.Graphics.Shader/SupportBuffer.cs @@ -22,6 +22,7 @@ namespace Ryujinx.Graphics.Shader public static int FragmentRenderScaleCountOffset; public static int GraphicsRenderScaleOffset; public static int ComputeRenderScaleOffset; + public static int PageTableBasePointerOffset; public const int FragmentIsBgraCount = 8; // One for the render target, 64 for the textures, and 8 for the images. @@ -45,6 +46,7 @@ namespace Ryujinx.Graphics.Shader FragmentRenderScaleCountOffset = OffsetOf(ref instance, ref instance.FragmentRenderScaleCount); GraphicsRenderScaleOffset = OffsetOf(ref instance, ref instance.RenderScale); ComputeRenderScaleOffset = GraphicsRenderScaleOffset + FieldSize; + PageTableBasePointerOffset = OffsetOf(ref instance, ref instance.PageTableBasePointer); } public Vector4 FragmentAlphaTest; @@ -54,5 +56,6 @@ namespace Ryujinx.Graphics.Shader // Render scale max count: 1 + 64 + 8. First scale is fragment output scale, others are textures/image inputs. public Array73> RenderScale; + public Vector4 PageTableBasePointer; } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs b/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs index c035f212d..467dc2e52 100644 --- a/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs +++ b/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs @@ -18,10 +18,12 @@ namespace Ryujinx.Graphics.Shader.Translation Bindless = 1 << 2, InstanceId = 1 << 3, DrawParameters = 1 << 4, - RtLayer = 1 << 5, - CbIndexing = 1 << 6, - IaIndexing = 1 << 7, - OaIndexing = 1 << 8, - FixedFuncAttr = 1 << 9 + GlobalMemory = 1 << 5, + GlobalMemoryWrite = 1 << 6, + RtLayer = 1 << 7, + CbIndexing = 1 << 8, + IaIndexing = 1 << 9, + OaIndexing = 1 << 10, + FixedFuncAttr = 1 << 11 } } diff --git a/Ryujinx.Graphics.Shader/Translation/Rewriter.cs b/Ryujinx.Graphics.Shader/Translation/Rewriter.cs index c4d2c5d90..a211c3db3 100644 --- a/Ryujinx.Graphics.Shader/Translation/Rewriter.cs +++ b/Ryujinx.Graphics.Shader/Translation/Rewriter.cs @@ -60,10 +60,10 @@ namespace Ryujinx.Graphics.Shader.Translation nextNode = node.Next; } - else if (UsesGlobalMemory(operation.Inst)) + /* else if (UsesGlobalMemory(operation.Inst)) { nextNode = RewriteGlobalAccess(node, config)?.Next ?? nextNode; - } + } */ node = nextNode; } diff --git a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs index 85b56b51f..f82906369 100644 --- a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs +++ b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs @@ -277,7 +277,15 @@ namespace Ryujinx.Graphics.Shader.Translation NextInputAttributesComponents = config.ThisInputAttributesComponents; NextUsedInputAttributesPerPatch = config.UsedInputAttributesPerPatch; NextUsesFixedFuncAttributes = config.UsedFeatures.HasFlag(FeatureFlags.FixedFuncAttr); - MergeOutputUserAttributes(config.UsedInputAttributes, config.UsedInputAttributesPerPatch); + MergeOutputUserAttributes(config.UsedInputAttributes | config.PassthroughAttributes, config.UsedInputAttributesPerPatch); + + int passthroughAttributes = config.PassthroughAttributes; + while (passthroughAttributes != 0) + { + int bit = BitOperations.TrailingZeroCount(passthroughAttributes); + NextInputAttributesComponents |= new UInt128(0xf, 0) << (bit * 4); + passthroughAttributes &= ~(1 << bit); + } if (UsedOutputAttributesPerPatch.Count != 0) { @@ -706,6 +714,8 @@ namespace Ryujinx.Graphics.Shader.Translation GetTextureDescriptors(), GetImageDescriptors(), Stage, + UsedFeatures.HasFlag(FeatureFlags.GlobalMemory), + UsedFeatures.HasFlag(FeatureFlags.GlobalMemoryWrite), UsedFeatures.HasFlag(FeatureFlags.InstanceId), UsedFeatures.HasFlag(FeatureFlags.DrawParameters), UsedFeatures.HasFlag(FeatureFlags.RtLayer), diff --git a/Ryujinx.Graphics.Vulkan/BufferHolder.cs b/Ryujinx.Graphics.Vulkan/BufferHolder.cs index 6288f16fb..121ebd690 100644 --- a/Ryujinx.Graphics.Vulkan/BufferHolder.cs +++ b/Ryujinx.Graphics.Vulkan/BufferHolder.cs @@ -136,12 +136,6 @@ namespace Ryujinx.Graphics.Vulkan } } - public BufferHandle GetHandle() - { - var handle = _bufferHandle; - return Unsafe.As(ref handle); - } - public unsafe IntPtr Map(int offset, int mappingSize) { return _map; @@ -182,6 +176,17 @@ namespace Ryujinx.Graphics.Vulkan throw new InvalidOperationException("The buffer is not host mapped."); } + public ulong GetGpuAddress() + { + BufferDeviceAddressInfo info = new BufferDeviceAddressInfo() + { + SType = StructureType.BufferDeviceAddressInfo, + Buffer = GetBuffer().GetUnsafe().Value + }; + + return _gd.Api.GetBufferDeviceAddress(_device, info); + } + public unsafe void SetData(int offset, ReadOnlySpan data, CommandBufferScoped? cbs = null, Action endRenderPass = null) { int dataSize = Math.Min(data.Length, Size - offset); diff --git a/Ryujinx.Graphics.Vulkan/BufferManager.cs b/Ryujinx.Graphics.Vulkan/BufferManager.cs index f32403712..ddc603c8d 100644 --- a/Ryujinx.Graphics.Vulkan/BufferManager.cs +++ b/Ryujinx.Graphics.Vulkan/BufferManager.cs @@ -78,9 +78,17 @@ namespace Ryujinx.Graphics.Vulkan { usage |= BufferUsageFlags.ConditionalRenderingBitExt; } - else if (gd.Capabilities.SupportsIndirectParameters) + else { - usage |= BufferUsageFlags.IndirectBufferBit; + if (gd.Capabilities.SupportsIndirectParameters) + { + usage |= BufferUsageFlags.IndirectBufferBit; + } + + if (gd.Capabilities.SupportsBufferDeviceAddress) + { + usage |= BufferUsageFlags.ShaderDeviceAddressBitExt; + } } var bufferCreateInfo = new BufferCreateInfo() @@ -326,6 +334,16 @@ namespace Ryujinx.Graphics.Vulkan return ReadOnlySpan.Empty; } + public ulong GetBufferGpuAddress(BufferHandle handle) + { + if (TryGetBuffer(handle, out var holder)) + { + return holder.GetGpuAddress(); + } + + return 0; + } + public void SetData(BufferHandle handle, int offset, ReadOnlySpan data) where T : unmanaged { SetData(handle, offset, MemoryMarshal.Cast(data), null, null); diff --git a/Ryujinx.Graphics.Vulkan/Constants.cs b/Ryujinx.Graphics.Vulkan/Constants.cs index f43d815ab..083f33e47 100644 --- a/Ryujinx.Graphics.Vulkan/Constants.cs +++ b/Ryujinx.Graphics.Vulkan/Constants.cs @@ -12,7 +12,7 @@ public const int MaxStorageBuffersPerStage = 16; public const int MaxTexturesPerStage = 64; public const int MaxImagesPerStage = 16; - public const int MaxUniformBufferBindings = MaxUniformBuffersPerStage * MaxShaderStages; + public const int MaxUniformBufferBindings = MaxUniformBuffersPerStage * MaxShaderStages + 1; public const int MaxStorageBufferBindings = MaxStorageBuffersPerStage * MaxShaderStages; public const int MaxTextureBindings = MaxTexturesPerStage * MaxShaderStages; public const int MaxImageBindings = MaxImagesPerStage * MaxShaderStages; diff --git a/Ryujinx.Graphics.Vulkan/DescriptorSetManager.cs b/Ryujinx.Graphics.Vulkan/DescriptorSetManager.cs index a88bb7b12..6f3f2aacf 100644 --- a/Ryujinx.Graphics.Vulkan/DescriptorSetManager.cs +++ b/Ryujinx.Graphics.Vulkan/DescriptorSetManager.cs @@ -26,7 +26,7 @@ namespace Ryujinx.Graphics.Vulkan var poolSizes = new DescriptorPoolSize[] { - new DescriptorPoolSize(DescriptorType.UniformBuffer, (1 + Constants.MaxUniformBufferBindings) * DescriptorPoolMultiplier), + new DescriptorPoolSize(DescriptorType.UniformBuffer, Constants.MaxUniformBufferBindings * DescriptorPoolMultiplier), new DescriptorPoolSize(DescriptorType.StorageBuffer, Constants.MaxStorageBufferBindings * DescriptorPoolMultiplier), new DescriptorPoolSize(DescriptorType.CombinedImageSampler, Constants.MaxTextureBindings * DescriptorPoolMultiplier), new DescriptorPoolSize(DescriptorType.StorageImage, Constants.MaxImageBindings * DescriptorPoolMultiplier), diff --git a/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs b/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs index 31acfc9b6..3d840a45f 100644 --- a/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs +++ b/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs @@ -11,6 +11,7 @@ namespace Ryujinx.Graphics.Vulkan public readonly bool SupportsGeometryShaderPassthrough; public readonly bool SupportsSubgroupSizeControl; public readonly bool SupportsShaderInt8; + public readonly bool SupportsBufferDeviceAddress; public readonly bool SupportsConditionalRendering; public readonly bool SupportsExtendedDynamicState; public readonly bool SupportsMultiView; @@ -32,6 +33,7 @@ namespace Ryujinx.Graphics.Vulkan bool supportsGeometryShaderPassthrough, bool supportsSubgroupSizeControl, bool supportsShaderInt8, + bool supportsBufferDeviceAddress, bool supportsConditionalRendering, bool supportsExtendedDynamicState, bool supportsMultiView, @@ -52,6 +54,7 @@ namespace Ryujinx.Graphics.Vulkan SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough; SupportsSubgroupSizeControl = supportsSubgroupSizeControl; SupportsShaderInt8 = supportsShaderInt8; + SupportsBufferDeviceAddress = supportsBufferDeviceAddress; SupportsConditionalRendering = supportsConditionalRendering; SupportsExtendedDynamicState = supportsExtendedDynamicState; SupportsMultiView = supportsMultiView; diff --git a/Ryujinx.Graphics.Vulkan/PipelineBase.cs b/Ryujinx.Graphics.Vulkan/PipelineBase.cs index dfcb32d40..044861e77 100644 --- a/Ryujinx.Graphics.Vulkan/PipelineBase.cs +++ b/Ryujinx.Graphics.Vulkan/PipelineBase.cs @@ -1222,6 +1222,11 @@ namespace Ryujinx.Graphics.Vulkan TextureBarrier(); } + public void UpdatePageTableGpuAddress(ulong address) + { + SupportBufferUpdater.UpdatePageTableBasePointer(address); + } + public void UpdateRenderScale(ReadOnlySpan scales, int totalCount, int fragmentCount) { bool changed = false; diff --git a/Ryujinx.Graphics.Vulkan/VulkanInitialization.cs b/Ryujinx.Graphics.Vulkan/VulkanInitialization.cs index 7813bb816..b730758f8 100644 --- a/Ryujinx.Graphics.Vulkan/VulkanInitialization.cs +++ b/Ryujinx.Graphics.Vulkan/VulkanInitialization.cs @@ -19,6 +19,7 @@ namespace Ryujinx.Graphics.Vulkan public static string[] DesirableExtensions { get; } = new string[] { + ExtBufferDeviceAddress.ExtensionName, ExtConditionalRendering.ExtensionName, ExtExtendedDynamicState.ExtensionName, KhrDrawIndirectCount.ExtensionName, @@ -491,6 +492,20 @@ namespace Ryujinx.Graphics.Vulkan pExtendedFeatures = &featuresSubgroupSizeControl; } + PhysicalDeviceBufferDeviceAddressFeaturesEXT featuresBufferDeviceAddress; + + if (supportedExtensions.Contains(ExtBufferDeviceAddress.ExtensionName)) + { + featuresBufferDeviceAddress = new PhysicalDeviceBufferDeviceAddressFeaturesEXT() + { + SType = StructureType.PhysicalDeviceBufferAddressFeaturesExt, + PNext = pExtendedFeatures, + BufferDeviceAddress = true + }; + + pExtendedFeatures = &featuresBufferDeviceAddress; + } + var enabledExtensions = RequiredExtensions.Union(DesirableExtensions.Intersect(supportedExtensions)).ToArray(); IntPtr* ppEnabledExtensions = stackalloc IntPtr[enabledExtensions.Length]; diff --git a/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs b/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs index e94720672..26a4d0b55 100644 --- a/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs +++ b/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs @@ -205,6 +205,7 @@ namespace Ryujinx.Graphics.Vulkan supportedExtensions.Contains("VK_NV_geometry_shader_passthrough"), supportedExtensions.Contains("VK_EXT_subgroup_size_control"), featuresShaderInt8.ShaderInt8, + supportedExtensions.Contains(ExtBufferDeviceAddress.ExtensionName), supportedExtensions.Contains(ExtConditionalRendering.ExtensionName), supportedExtensions.Contains(ExtExtendedDynamicState.ExtensionName), features2.Features.MultiViewport, @@ -344,6 +345,11 @@ namespace Ryujinx.Graphics.Vulkan return BufferManager.GetData(buffer, offset, size); } + public ulong GetBufferGpuAddress(BufferHandle buffer) + { + return BufferManager.GetBufferGpuAddress(buffer); + } + public unsafe Capabilities GetCapabilities() { FormatFeatureFlags compressedFormatFeatureFlags = diff --git a/Ryujinx.Memory/Range/RangeList.cs b/Ryujinx.Memory/Range/RangeList.cs index 7278e7eb4..1e6d90270 100644 --- a/Ryujinx.Memory/Range/RangeList.cs +++ b/Ryujinx.Memory/Range/RangeList.cs @@ -427,6 +427,18 @@ namespace Ryujinx.Memory.Range return ~left; } + public T[] ToArray() + { + T[] output = new T[Count]; + + for (int i = 0; i < output.Length; i++) + { + output[i] = _items[i].Value; + } + + return output; + } + public IEnumerator GetEnumerator() { for (int i = 0; i < Count; i++) diff --git a/Spv.Generator/Autogenerated/CoreGrammar.cs b/Spv.Generator/Autogenerated/CoreGrammar.cs index 3b2f6fa65..849aafd0d 100644 --- a/Spv.Generator/Autogenerated/CoreGrammar.cs +++ b/Spv.Generator/Autogenerated/CoreGrammar.cs @@ -710,7 +710,7 @@ namespace Spv.Generator return result; } - public Instruction Load(Instruction resultType, Instruction pointer, MemoryAccessMask memoryAccess = (MemoryAccessMask)int.MaxValue) + public Instruction Load(Instruction resultType, Instruction pointer, MemoryAccessMask memoryAccess = (MemoryAccessMask)int.MaxValue, LiteralInteger operand2 = null) { Instruction result = NewInstruction(Op.OpLoad, GetNewId(), resultType); @@ -718,13 +718,17 @@ namespace Spv.Generator if (memoryAccess != (MemoryAccessMask)int.MaxValue) { result.AddOperand(memoryAccess); + if (operand2 != null) + { + result.AddOperand(operand2); + } } AddToFunctionDefinitions(result); return result; } - public Instruction Store(Instruction pointer, Instruction obj, MemoryAccessMask memoryAccess = (MemoryAccessMask)int.MaxValue) + public Instruction Store(Instruction pointer, Instruction obj, MemoryAccessMask memoryAccess = (MemoryAccessMask)int.MaxValue, LiteralInteger operand2 = null) { Instruction result = NewInstruction(Op.OpStore); @@ -733,6 +737,10 @@ namespace Spv.Generator if (memoryAccess != (MemoryAccessMask)int.MaxValue) { result.AddOperand(memoryAccess); + if (operand2 != null) + { + result.AddOperand(operand2); + } } AddToFunctionDefinitions(result);