From f92921a6d118aa9c6acdb3ecaa3cd61a19fe341e Mon Sep 17 00:00:00 2001 From: gdkchan Date: Thu, 15 Jun 2023 17:31:53 -0300 Subject: [PATCH 1/2] Implement Load/Store Local/Shared and Atomic shared using new instructions (#5241) * Implement Load/Store Local/Shared and Atomic shared using new instructions * Remove now unused code * Fix base offset register overwrite * Fix missing storage buffer set index when generating GLSL for Vulkan * Shader cache version bump * Remove more unused code * Some PR feedback --- .../Shader/DiskCache/DiskCacheHostStorage.cs | 2 +- .../CodeGen/Glsl/Declarations.cs | 75 +++----- .../CodeGen/Glsl/DefaultNames.cs | 3 - .../AtomicMinMaxS32Shared.glsl | 21 --- .../HelperFunctions/HelperFunctionNames.cs | 8 - .../HelperFunctions/StoreSharedSmallInt.glsl | 23 --- .../CodeGen/Glsl/Instructions/InstGen.cs | 37 +--- .../Glsl/Instructions/InstGenHelper.cs | 8 - .../Glsl/Instructions/InstGenMemory.cs | 86 ++------- .../CodeGen/Glsl/OperandManager.cs | 15 +- .../CodeGen/Spirv/CodeGenContext.cs | 5 +- .../CodeGen/Spirv/Declarations.cs | 60 ++---- .../CodeGen/Spirv/Instructions.cs | 176 +++--------------- .../Instructions/InstEmitMemory.cs | 109 +++++------ .../IntermediateRepresentation/Instruction.cs | 6 - .../IntermediateRepresentation/StorageKind.cs | 11 +- .../Ryujinx.Graphics.Shader.csproj | 2 - .../StructuredIr/HelperFunctionsMask.cs | 18 +- .../StructuredIr/InstructionInfo.cs | 6 - .../StructuredIr/MemoryDefinition.cs | 18 ++ .../StructuredIr/ShaderProperties.cs | 22 +++ .../StructuredIr/StructuredProgram.cs | 11 -- .../Translation/EmitterContextInsts.cs | 40 +--- .../Translation/HelperFunctionManager.cs | 109 ++++++++++- .../Translation/HelperFunctionName.cs | 4 + .../Optimizations/GlobalToStorage.cs | 32 ++-- .../Translation/ResourceManager.cs | 25 ++- .../Translation/Rewriter.cs | 92 +++++++++ .../Translation/ShaderConfig.cs | 16 +- .../Translation/Translator.cs | 2 +- 30 files changed, 475 insertions(+), 567 deletions(-) delete mode 100644 src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl delete mode 100644 src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl create mode 100644 src/Ryujinx.Graphics.Shader/StructuredIr/MemoryDefinition.cs diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs index 153a2e8c1..5cfbfd386 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs @@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private const ushort FileFormatVersionMajor = 1; private const ushort FileFormatVersionMinor = 2; private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; - private const uint CodeGenVersion = 5080; + private const uint CodeGenVersion = 5241; private const string SharedTocFileName = "shared.toc"; private const string SharedDataFileName = "shared.data"; diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs index 958f1cef3..08e8eb195 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs @@ -71,40 +71,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl context.AppendLine($"const int {DefaultNames.UndefinedName} = 0;"); context.AppendLine(); - if (context.Config.Stage == ShaderStage.Compute) - { - int localMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeLocalMemorySize(), 4); - - if (localMemorySize != 0) - { - string localMemorySizeStr = NumberFormatter.FormatInt(localMemorySize); - - context.AppendLine($"uint {DefaultNames.LocalMemoryName}[{localMemorySizeStr}];"); - context.AppendLine(); - } - - int sharedMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeSharedMemorySize(), 4); - - if (sharedMemorySize != 0) - { - string sharedMemorySizeStr = NumberFormatter.FormatInt(sharedMemorySize); - - context.AppendLine($"shared uint {DefaultNames.SharedMemoryName}[{sharedMemorySizeStr}];"); - context.AppendLine(); - } - } - else if (context.Config.LocalMemorySize != 0) - { - int localMemorySize = BitUtils.DivRoundUp(context.Config.LocalMemorySize, 4); - - string localMemorySizeStr = NumberFormatter.FormatInt(localMemorySize); - - context.AppendLine($"uint {DefaultNames.LocalMemoryName}[{localMemorySizeStr}];"); - context.AppendLine(); - } - DeclareConstantBuffers(context, context.Config.Properties.ConstantBuffers.Values); DeclareStorageBuffers(context, context.Config.Properties.StorageBuffers.Values); + DeclareMemories(context, context.Config.Properties.LocalMemories.Values, isShared: false); + DeclareMemories(context, context.Config.Properties.SharedMemories.Values, isShared: true); var textureDescriptors = context.Config.GetTextureDescriptors(); if (textureDescriptors.Length != 0) @@ -238,11 +208,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl context.AppendLine(); } - if ((info.HelperFunctionsMask & HelperFunctionsMask.AtomicMinMaxS32Shared) != 0) - { - AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl"); - } - if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighS32) != 0) { AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl"); @@ -273,11 +238,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl"); } - if ((info.HelperFunctionsMask & HelperFunctionsMask.StoreSharedSmallInt) != 0) - { - AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl"); - } - if ((info.HelperFunctionsMask & HelperFunctionsMask.SwizzleAdd) != 0) { AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl"); @@ -358,7 +318,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl _ => "std430" }; - context.AppendLine($"layout (binding = {buffer.Binding}, {layout}) {declType} _{buffer.Name}"); + string set = string.Empty; + + if (context.Config.Options.TargetApi == TargetApi.Vulkan) + { + set = $"set = {buffer.Set}, "; + } + + context.AppendLine($"layout ({set}binding = {buffer.Binding}, {layout}) {declType} _{buffer.Name}"); context.EnterScope(); foreach (StructureField field in buffer.Type.Fields) @@ -391,6 +358,27 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl } } + private static void DeclareMemories(CodeGenContext context, IEnumerable memories, bool isShared) + { + string prefix = isShared ? "shared " : string.Empty; + + foreach (MemoryDefinition memory in memories) + { + string typeName = GetVarTypeName(context, memory.Type & ~AggregateType.Array); + + if (memory.ArrayLength > 0) + { + string arraySize = memory.ArrayLength.ToString(CultureInfo.InvariantCulture); + + context.AppendLine($"{prefix}{typeName} {memory.Name}[{arraySize}];"); + } + else + { + context.AppendLine($"{prefix}{typeName} {memory.Name}[];"); + } + } + } + private static void DeclareSamplers(CodeGenContext context, TextureDescriptor[] descriptors) { int arraySize = 0; @@ -717,7 +705,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl string code = EmbeddedResources.ReadAllText(filename); code = code.Replace("\t", CodeGenContext.Tab); - code = code.Replace("$SHARED_MEM$", DefaultNames.SharedMemoryName); if (context.Config.GpuAccessor.QueryHostSupportsShaderBallot()) { diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs index 5ee8259cf..e909dcf04 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs @@ -11,9 +11,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl public const string IAttributePrefix = "in_attr"; public const string OAttributePrefix = "out_attr"; - public const string LocalMemoryName = "local_mem"; - public const string SharedMemoryName = "shared_mem"; - public const string ArgumentNamePrefix = "a"; public const string UndefinedName = "undef"; diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl deleted file mode 100644 index 82b76bccf..000000000 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl +++ /dev/null @@ -1,21 +0,0 @@ -int Helper_AtomicMaxS32(int offset, int value) -{ - uint oldValue, newValue; - do - { - oldValue = $SHARED_MEM$[offset]; - newValue = uint(max(int(oldValue), value)); - } while (atomicCompSwap($SHARED_MEM$[offset], oldValue, newValue) != oldValue); - return int(oldValue); -} - -int Helper_AtomicMinS32(int offset, int value) -{ - uint oldValue, newValue; - do - { - oldValue = $SHARED_MEM$[offset]; - newValue = uint(min(int(oldValue), value)); - } while (atomicCompSwap($SHARED_MEM$[offset], oldValue, newValue) != oldValue); - return int(oldValue); -} \ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs index 54f35b15a..21c435475 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs @@ -2,9 +2,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl { static class HelperFunctionNames { - public static string AtomicMaxS32 = "Helper_AtomicMaxS32"; - public static string AtomicMinS32 = "Helper_AtomicMinS32"; - public static string MultiplyHighS32 = "Helper_MultiplyHighS32"; public static string MultiplyHighU32 = "Helper_MultiplyHighU32"; @@ -13,10 +10,5 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl public static string ShuffleUp = "Helper_ShuffleUp"; public static string ShuffleXor = "Helper_ShuffleXor"; public static string SwizzleAdd = "Helper_SwizzleAdd"; - - public static string StoreShared16 = "Helper_StoreShared16"; - public static string StoreShared8 = "Helper_StoreShared8"; - public static string StoreStorage16 = "Helper_StoreStorage16"; - public static string StoreStorage8 = "Helper_StoreStorage8"; } } \ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl deleted file mode 100644 index 2f57b5ff6..000000000 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl +++ /dev/null @@ -1,23 +0,0 @@ -void Helper_StoreShared16(int offset, uint value) -{ - int wordOffset = offset >> 2; - int bitOffset = (offset & 3) * 8; - uint oldValue, newValue; - do - { - oldValue = $SHARED_MEM$[wordOffset]; - newValue = bitfieldInsert(oldValue, value, bitOffset, 16); - } while (atomicCompSwap($SHARED_MEM$[wordOffset], oldValue, newValue) != oldValue); -} - -void Helper_StoreShared8(int offset, uint value) -{ - int wordOffset = offset >> 2; - int bitOffset = (offset & 3) * 8; - uint oldValue, newValue; - do - { - oldValue = $SHARED_MEM$[wordOffset]; - newValue = bitfieldInsert(oldValue, value, bitOffset, 8); - } while (atomicCompSwap($SHARED_MEM$[wordOffset], oldValue, newValue) != oldValue); -} \ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs index 01d8a6e7a..b2577a999 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs @@ -68,7 +68,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions string args = string.Empty; - if (atomic && operation.StorageKind == StorageKind.StorageBuffer) + if (atomic && (operation.StorageKind == StorageKind.StorageBuffer || operation.StorageKind == StorageKind.SharedMemory)) { args = GenerateLoadOrStore(context, operation, isStore: false); @@ -81,23 +81,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions args += ", " + GetSoureExpr(context, operation.GetSource(argIndex), dstType); } } - else if (atomic && operation.StorageKind == StorageKind.SharedMemory) - { - args = LoadShared(context, operation); - - // For shared memory access, the second argument is unused and should be ignored. - // It is there to make both storage and shared access have the same number of arguments. - // For storage, both inputs are consumed when the argument index is 0, so we should skip it here. - - for (int argIndex = 2; argIndex < arity; argIndex++) - { - args += ", "; - - AggregateType dstType = GetSrcVarType(inst, argIndex); - - args += GetSoureExpr(context, operation.GetSource(argIndex), dstType); - } - } else { for (int argIndex = 0; argIndex < arity; argIndex++) @@ -179,12 +162,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions case Instruction.Load: return Load(context, operation); - case Instruction.LoadLocal: - return LoadLocal(context, operation); - - case Instruction.LoadShared: - return LoadShared(context, operation); - case Instruction.Lod: return Lod(context, operation); @@ -200,18 +177,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions case Instruction.Store: return Store(context, operation); - case Instruction.StoreLocal: - return StoreLocal(context, operation); - - case Instruction.StoreShared: - return StoreShared(context, operation); - - case Instruction.StoreShared16: - return StoreShared16(context, operation); - - case Instruction.StoreShared8: - return StoreShared8(context, operation); - case Instruction.TextureSample: return TextureSample(context, operation); diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs index f42d98986..8b0b744ad 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs @@ -17,9 +17,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions Add(Instruction.AtomicAdd, InstType.AtomicBinary, "atomicAdd"); Add(Instruction.AtomicAnd, InstType.AtomicBinary, "atomicAnd"); Add(Instruction.AtomicCompareAndSwap, InstType.AtomicTernary, "atomicCompSwap"); - Add(Instruction.AtomicMaxS32, InstType.CallTernary, HelperFunctionNames.AtomicMaxS32); Add(Instruction.AtomicMaxU32, InstType.AtomicBinary, "atomicMax"); - Add(Instruction.AtomicMinS32, InstType.CallTernary, HelperFunctionNames.AtomicMinS32); Add(Instruction.AtomicMinU32, InstType.AtomicBinary, "atomicMin"); Add(Instruction.AtomicOr, InstType.AtomicBinary, "atomicOr"); Add(Instruction.AtomicSwap, InstType.AtomicBinary, "atomicExchange"); @@ -83,8 +81,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions Add(Instruction.ImageAtomic, InstType.Special); Add(Instruction.IsNan, InstType.CallUnary, "isnan"); Add(Instruction.Load, InstType.Special); - Add(Instruction.LoadLocal, InstType.Special); - Add(Instruction.LoadShared, InstType.Special); Add(Instruction.Lod, InstType.Special); Add(Instruction.LogarithmB2, InstType.CallUnary, "log2"); Add(Instruction.LogicalAnd, InstType.OpBinaryCom, "&&", 9); @@ -118,10 +114,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions Add(Instruction.Sine, InstType.CallUnary, "sin"); Add(Instruction.SquareRoot, InstType.CallUnary, "sqrt"); Add(Instruction.Store, InstType.Special); - Add(Instruction.StoreLocal, InstType.Special); - Add(Instruction.StoreShared, InstType.Special); - Add(Instruction.StoreShared16, InstType.Special); - Add(Instruction.StoreShared8, InstType.Special); Add(Instruction.Subtract, InstType.OpBinary, "-", 2); Add(Instruction.SwizzleAdd, InstType.CallTernary, HelperFunctionNames.SwizzleAdd); Add(Instruction.TextureSample, InstType.Special); diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs index c8084d9dd..99376ffb2 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs @@ -191,25 +191,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions return GenerateLoadOrStore(context, operation, isStore: false); } - public static string LoadLocal(CodeGenContext context, AstOperation operation) - { - return LoadLocalOrShared(context, operation, DefaultNames.LocalMemoryName); - } - - public static string LoadShared(CodeGenContext context, AstOperation operation) - { - return LoadLocalOrShared(context, operation, DefaultNames.SharedMemoryName); - } - - private static string LoadLocalOrShared(CodeGenContext context, AstOperation operation, string arrayName) - { - IAstNode src1 = operation.GetSource(0); - - string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); - - return $"{arrayName}[{offsetExpr}]"; - } - public static string Lod(CodeGenContext context, AstOperation operation) { AstTextureOperation texOp = (AstTextureOperation)operation; @@ -263,58 +244,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions return GenerateLoadOrStore(context, operation, isStore: true); } - public static string StoreLocal(CodeGenContext context, AstOperation operation) - { - return StoreLocalOrShared(context, operation, DefaultNames.LocalMemoryName); - } - - public static string StoreShared(CodeGenContext context, AstOperation operation) - { - return StoreLocalOrShared(context, operation, DefaultNames.SharedMemoryName); - } - - private static string StoreLocalOrShared(CodeGenContext context, AstOperation operation, string arrayName) - { - IAstNode src1 = operation.GetSource(0); - IAstNode src2 = operation.GetSource(1); - - string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); - - AggregateType srcType = OperandManager.GetNodeDestType(context, src2); - - string src = TypeConversion.ReinterpretCast(context, src2, srcType, AggregateType.U32); - - return $"{arrayName}[{offsetExpr}] = {src}"; - } - - public static string StoreShared16(CodeGenContext context, AstOperation operation) - { - IAstNode src1 = operation.GetSource(0); - IAstNode src2 = operation.GetSource(1); - - string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); - - AggregateType srcType = OperandManager.GetNodeDestType(context, src2); - - string src = TypeConversion.ReinterpretCast(context, src2, srcType, AggregateType.U32); - - return $"{HelperFunctionNames.StoreShared16}({offsetExpr}, {src})"; - } - - public static string StoreShared8(CodeGenContext context, AstOperation operation) - { - IAstNode src1 = operation.GetSource(0); - IAstNode src2 = operation.GetSource(1); - - string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); - - AggregateType srcType = OperandManager.GetNodeDestType(context, src2); - - string src = TypeConversion.ReinterpretCast(context, src2, srcType, AggregateType.U32); - - return $"{HelperFunctionNames.StoreShared8}({offsetExpr}, {src})"; - } - public static string TextureSample(CodeGenContext context, AstOperation operation) { AstTextureOperation texOp = (AstTextureOperation)operation; @@ -675,6 +604,21 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions varType = field.Type; break; + case StorageKind.LocalMemory: + case StorageKind.SharedMemory: + if (!(operation.GetSource(srcIndex++) is AstOperand bindingId) || bindingId.Type != OperandType.Constant) + { + throw new InvalidOperationException($"First input of {operation.Inst} with {storageKind} storage must be a constant operand."); + } + + MemoryDefinition memory = storageKind == StorageKind.LocalMemory + ? context.Config.Properties.LocalMemories[bindingId.Value] + : context.Config.Properties.SharedMemories[bindingId.Value]; + + varName = memory.Name; + varType = memory.Type; + break; + case StorageKind.Input: case StorageKind.InputPerPatch: case StorageKind.Output: diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs index 4fd1d17c4..4f6ca642c 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs @@ -113,7 +113,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl if (node is AstOperation operation) { - if (operation.Inst == Instruction.Load) + if (operation.Inst == Instruction.Load || operation.Inst.IsAtomic()) { switch (operation.StorageKind) { @@ -136,6 +136,19 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl return field.Type & AggregateType.ElementTypeMask; + case StorageKind.LocalMemory: + case StorageKind.SharedMemory: + if (!(operation.GetSource(0) is AstOperand bindingId) || bindingId.Type != OperandType.Constant) + { + throw new InvalidOperationException($"First input of {operation.Inst} with {operation.StorageKind} storage must be a constant operand."); + } + + MemoryDefinition memory = operation.StorageKind == StorageKind.LocalMemory + ? context.Config.Properties.LocalMemories[bindingId.Value] + : context.Config.Properties.SharedMemories[bindingId.Value]; + + return memory.Type & AggregateType.ElementTypeMask; + case StorageKind.Input: case StorageKind.InputPerPatch: case StorageKind.Output: diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs index 1f5167e66..a4daaa67e 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs @@ -25,8 +25,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv public Dictionary ConstantBuffers { get; } = new Dictionary(); public Dictionary StorageBuffers { get; } = new Dictionary(); - public Instruction LocalMemory { get; set; } - public Instruction SharedMemory { get; set; } + public Dictionary LocalMemories { get; } = new Dictionary(); + public Dictionary SharedMemories { get; } = new Dictionary(); public Dictionary SamplersTypes { get; } = new Dictionary(); public Dictionary Samplers { get; } = new Dictionary(); public Dictionary Images { get; } = new Dictionary(); @@ -35,7 +35,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv public Dictionary InputsPerPatch { get; } = new Dictionary(); public Dictionary OutputsPerPatch { get; } = new Dictionary(); - public Instruction CoordTemp { get; set; } public StructuredFunction CurrentFunction { get; set; } private readonly Dictionary _locals = new Dictionary(); private readonly Dictionary _localForArgs = new Dictionary(); diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs index eb2db514d..59acea4f6 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs @@ -6,7 +6,6 @@ using Spv.Generator; using System; using System.Collections.Generic; using System.Diagnostics; -using System.Linq; using System.Numerics; using static Spv.Specification; using SpvInstruction = Spv.Generator.Instruction; @@ -44,13 +43,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv context.AddLocalVariable(spvLocal); context.DeclareLocal(local, spvLocal); } - - var ivector2Type = context.TypeVector(context.TypeS32(), 2); - var coordTempPointerType = context.TypePointer(StorageClass.Function, ivector2Type); - var coordTemp = context.Variable(coordTempPointerType, StorageClass.Function); - - context.AddLocalVariable(coordTemp); - context.CoordTemp = coordTemp; } public static void DeclareLocalForArgs(CodeGenContext context, List functions) @@ -77,54 +69,30 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv public static void DeclareAll(CodeGenContext context, StructuredProgramInfo info) { - if (context.Config.Stage == ShaderStage.Compute) - { - int localMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeLocalMemorySize(), 4); - - if (localMemorySize != 0) - { - DeclareLocalMemory(context, localMemorySize); - } - - int sharedMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeSharedMemorySize(), 4); - - if (sharedMemorySize != 0) - { - DeclareSharedMemory(context, sharedMemorySize); - } - } - else if (context.Config.LocalMemorySize != 0) - { - int localMemorySize = BitUtils.DivRoundUp(context.Config.LocalMemorySize, 4); - DeclareLocalMemory(context, localMemorySize); - } - DeclareConstantBuffers(context, context.Config.Properties.ConstantBuffers.Values); DeclareStorageBuffers(context, context.Config.Properties.StorageBuffers.Values); + DeclareMemories(context, context.Config.Properties.LocalMemories, context.LocalMemories, StorageClass.Private); + DeclareMemories(context, context.Config.Properties.SharedMemories, context.SharedMemories, StorageClass.Workgroup); DeclareSamplers(context, context.Config.GetTextureDescriptors()); DeclareImages(context, context.Config.GetImageDescriptors()); DeclareInputsAndOutputs(context, info); } - private static void DeclareLocalMemory(CodeGenContext context, int size) + private static void DeclareMemories( + CodeGenContext context, + IReadOnlyDictionary memories, + Dictionary dict, + StorageClass storage) { - context.LocalMemory = DeclareMemory(context, StorageClass.Private, size); - } + foreach ((int id, MemoryDefinition memory) in memories) + { + var pointerType = context.TypePointer(storage, context.GetType(memory.Type, memory.ArrayLength)); + var variable = context.Variable(pointerType, storage); - private static void DeclareSharedMemory(CodeGenContext context, int size) - { - context.SharedMemory = DeclareMemory(context, StorageClass.Workgroup, size); - } + context.AddGlobalVariable(variable); - private static SpvInstruction DeclareMemory(CodeGenContext context, StorageClass storage, int size) - { - var arrayType = context.TypeArray(context.TypeU32(), context.Constant(context.TypeU32(), size)); - var pointerType = context.TypePointer(storage, arrayType); - var variable = context.Variable(pointerType, storage); - - context.AddGlobalVariable(variable); - - return variable; + dict.Add(id, variable); + } } private static void DeclareConstantBuffers(CodeGenContext context, IEnumerable buffers) diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs index 6c1157525..b451f7a48 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs @@ -97,8 +97,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv Add(Instruction.ImageStore, GenerateImageStore); Add(Instruction.IsNan, GenerateIsNan); Add(Instruction.Load, GenerateLoad); - Add(Instruction.LoadLocal, GenerateLoadLocal); - Add(Instruction.LoadShared, GenerateLoadShared); Add(Instruction.Lod, GenerateLod); Add(Instruction.LogarithmB2, GenerateLogarithmB2); Add(Instruction.LogicalAnd, GenerateLogicalAnd); @@ -132,10 +130,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv Add(Instruction.Sine, GenerateSine); Add(Instruction.SquareRoot, GenerateSquareRoot); Add(Instruction.Store, GenerateStore); - Add(Instruction.StoreLocal, GenerateStoreLocal); - Add(Instruction.StoreShared, GenerateStoreShared); - Add(Instruction.StoreShared16, GenerateStoreShared16); - Add(Instruction.StoreShared8, GenerateStoreShared8); Add(Instruction.Subtract, GenerateSubtract); Add(Instruction.SwizzleAdd, GenerateSwizzleAdd); Add(Instruction.TextureSample, GenerateTextureSample); @@ -871,30 +865,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv return GenerateLoadOrStore(context, operation, isStore: false); } - private static OperationResult GenerateLoadLocal(CodeGenContext context, AstOperation operation) - { - return GenerateLoadLocalOrShared(context, operation, StorageClass.Private, context.LocalMemory); - } - - private static OperationResult GenerateLoadShared(CodeGenContext context, AstOperation operation) - { - return GenerateLoadLocalOrShared(context, operation, StorageClass.Workgroup, context.SharedMemory); - } - - private static OperationResult GenerateLoadLocalOrShared( - CodeGenContext context, - AstOperation operation, - StorageClass storageClass, - SpvInstruction memory) - { - var offset = context.Get(AggregateType.S32, operation.GetSource(0)); - - var elemPointer = context.AccessChain(context.TypePointer(storageClass, context.TypeU32()), memory, offset); - var value = context.Load(context.TypeU32(), elemPointer); - - return new OperationResult(AggregateType.U32, value); - } - private static OperationResult GenerateLod(CodeGenContext context, AstOperation operation) { AstTextureOperation texOp = (AstTextureOperation)operation; @@ -1268,45 +1238,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv return GenerateLoadOrStore(context, operation, isStore: true); } - private static OperationResult GenerateStoreLocal(CodeGenContext context, AstOperation operation) - { - return GenerateStoreLocalOrShared(context, operation, StorageClass.Private, context.LocalMemory); - } - - private static OperationResult GenerateStoreShared(CodeGenContext context, AstOperation operation) - { - return GenerateStoreLocalOrShared(context, operation, StorageClass.Workgroup, context.SharedMemory); - } - - private static OperationResult GenerateStoreLocalOrShared( - CodeGenContext context, - AstOperation operation, - StorageClass storageClass, - SpvInstruction memory) - { - var offset = context.Get(AggregateType.S32, operation.GetSource(0)); - var value = context.Get(AggregateType.U32, operation.GetSource(1)); - - var elemPointer = context.AccessChain(context.TypePointer(storageClass, context.TypeU32()), memory, offset); - context.Store(elemPointer, value); - - return OperationResult.Invalid; - } - - private static OperationResult GenerateStoreShared16(CodeGenContext context, AstOperation operation) - { - GenerateStoreSharedSmallInt(context, operation, 16); - - return OperationResult.Invalid; - } - - private static OperationResult GenerateStoreShared8(CodeGenContext context, AstOperation operation) - { - GenerateStoreSharedSmallInt(context, operation, 8); - - return OperationResult.Invalid; - } - private static OperationResult GenerateSubtract(CodeGenContext context, AstOperation operation) { return GenerateBinary(context, operation, context.Delegates.FSub, context.Delegates.ISub); @@ -1827,55 +1758,27 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv AstOperation operation, Func emitU) { - var value = context.GetU32(operation.GetSource(operation.SourcesCount - 1)); + SpvInstruction elemPointer = GetStoragePointer(context, operation, out AggregateType varType); - SpvInstruction elemPointer; - - if (operation.StorageKind == StorageKind.StorageBuffer) - { - elemPointer = GetStoragePointer(context, operation, out _); - } - else if (operation.StorageKind == StorageKind.SharedMemory) - { - var offset = context.GetU32(operation.GetSource(0)); - elemPointer = context.AccessChain(context.TypePointer(StorageClass.Workgroup, context.TypeU32()), context.SharedMemory, offset); - } - else - { - throw new InvalidOperationException($"Invalid storage kind \"{operation.StorageKind}\"."); - } + var value = context.Get(varType, operation.GetSource(operation.SourcesCount - 1)); var one = context.Constant(context.TypeU32(), 1); var zero = context.Constant(context.TypeU32(), 0); - return new OperationResult(AggregateType.U32, emitU(context.TypeU32(), elemPointer, one, zero, value)); + return new OperationResult(varType, emitU(context.GetType(varType), elemPointer, one, zero, value)); } private static OperationResult GenerateAtomicMemoryCas(CodeGenContext context, AstOperation operation) { - var value0 = context.GetU32(operation.GetSource(operation.SourcesCount - 2)); - var value1 = context.GetU32(operation.GetSource(operation.SourcesCount - 1)); + SpvInstruction elemPointer = GetStoragePointer(context, operation, out AggregateType varType); - SpvInstruction elemPointer; - - if (operation.StorageKind == StorageKind.StorageBuffer) - { - elemPointer = GetStoragePointer(context, operation, out _); - } - else if (operation.StorageKind == StorageKind.SharedMemory) - { - var offset = context.GetU32(operation.GetSource(0)); - elemPointer = context.AccessChain(context.TypePointer(StorageClass.Workgroup, context.TypeU32()), context.SharedMemory, offset); - } - else - { - throw new InvalidOperationException($"Invalid storage kind \"{operation.StorageKind}\"."); - } + var value0 = context.Get(varType, operation.GetSource(operation.SourcesCount - 2)); + var value1 = context.Get(varType, operation.GetSource(operation.SourcesCount - 1)); var one = context.Constant(context.TypeU32(), 1); var zero = context.Constant(context.TypeU32(), 0); - return new OperationResult(AggregateType.U32, context.AtomicCompareExchange(context.TypeU32(), elemPointer, one, zero, zero, value1, value0)); + return new OperationResult(varType, context.AtomicCompareExchange(context.GetType(varType), elemPointer, one, zero, zero, value1, value0)); } private static OperationResult GenerateLoadOrStore(CodeGenContext context, AstOperation operation, bool isStore) @@ -1928,6 +1831,27 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv : context.StorageBuffers[bindingIndex.Value]; break; + case StorageKind.LocalMemory: + case StorageKind.SharedMemory: + if (!(operation.GetSource(srcIndex++) is AstOperand bindingId) || bindingId.Type != OperandType.Constant) + { + throw new InvalidOperationException($"First input of {operation.Inst} with {storageKind} storage must be a constant operand."); + } + + if (storageKind == StorageKind.LocalMemory) + { + storageClass = StorageClass.Private; + varType = context.Config.Properties.LocalMemories[bindingId.Value].Type & AggregateType.ElementTypeMask; + baseObj = context.LocalMemories[bindingId.Value]; + } + else + { + storageClass = StorageClass.Workgroup; + varType = context.Config.Properties.SharedMemories[bindingId.Value].Type & AggregateType.ElementTypeMask; + baseObj = context.SharedMemories[bindingId.Value]; + } + break; + case StorageKind.Input: case StorageKind.InputPerPatch: case StorageKind.Output: @@ -2048,50 +1972,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv return context.Load(context.GetType(varType), context.Inputs[ioDefinition]); } - private static void GenerateStoreSharedSmallInt(CodeGenContext context, AstOperation operation, int bitSize) - { - var offset = context.Get(AggregateType.U32, operation.GetSource(0)); - var value = context.Get(AggregateType.U32, operation.GetSource(1)); - - var wordOffset = context.ShiftRightLogical(context.TypeU32(), offset, context.Constant(context.TypeU32(), 2)); - var bitOffset = context.BitwiseAnd(context.TypeU32(), offset, context.Constant(context.TypeU32(), 3)); - bitOffset = context.ShiftLeftLogical(context.TypeU32(), bitOffset, context.Constant(context.TypeU32(), 3)); - - var memory = context.SharedMemory; - - var elemPointer = context.AccessChain(context.TypePointer(StorageClass.Workgroup, context.TypeU32()), memory, wordOffset); - - GenerateStoreSmallInt(context, elemPointer, bitOffset, value, bitSize); - } - - private static void GenerateStoreSmallInt( - CodeGenContext context, - SpvInstruction elemPointer, - SpvInstruction bitOffset, - SpvInstruction value, - int bitSize) - { - var loopStart = context.Label(); - var loopEnd = context.Label(); - - context.Branch(loopStart); - context.AddLabel(loopStart); - - var oldValue = context.Load(context.TypeU32(), elemPointer); - var newValue = context.BitFieldInsert(context.TypeU32(), oldValue, value, bitOffset, context.Constant(context.TypeU32(), bitSize)); - - var one = context.Constant(context.TypeU32(), 1); - var zero = context.Constant(context.TypeU32(), 0); - - var result = context.AtomicCompareExchange(context.TypeU32(), elemPointer, one, zero, zero, newValue, oldValue); - var failed = context.INotEqual(context.TypeBool(), result, oldValue); - - context.LoopMerge(loopEnd, loopStart, LoopControlMask.MaskNone); - context.BranchConditional(failed, loopStart, loopEnd); - - context.AddLabel(loopEnd); - } - private static OperationResult GetZeroOperationResult( CodeGenContext context, AstTextureOperation texOp, diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs index 9aa738200..99d7bec97 100644 --- a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs @@ -10,12 +10,6 @@ namespace Ryujinx.Graphics.Shader.Instructions { static partial class InstEmit { - private enum MemoryRegion - { - Local, - Shared - } - public static void Atom(EmitterContext context) { InstAtom op = context.GetOp(); @@ -51,7 +45,8 @@ namespace Ryujinx.Graphics.Shader.Instructions _ => AtomSize.U32 }; - Operand res = EmitAtomicOp(context, StorageKind.SharedMemory, op.AtomOp, size, offset, Const(0), value); + Operand id = Const(context.Config.ResourceManager.SharedMemoryId); + Operand res = EmitAtomicOp(context, StorageKind.SharedMemory, op.AtomOp, size, id, offset, value); context.Copy(GetDest(op.Dest), res); } @@ -114,14 +109,14 @@ namespace Ryujinx.Graphics.Shader.Instructions { InstLdl op = context.GetOp(); - EmitLoad(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); + EmitLoad(context, StorageKind.LocalMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); } public static void Lds(EmitterContext context) { InstLds op = context.GetOp(); - EmitLoad(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); + EmitLoad(context, StorageKind.SharedMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); } public static void Red(EmitterContext context) @@ -144,14 +139,14 @@ namespace Ryujinx.Graphics.Shader.Instructions { InstStl op = context.GetOp(); - EmitStore(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); + EmitStore(context, StorageKind.LocalMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); } public static void Sts(EmitterContext context) { InstSts op = context.GetOp(); - EmitStore(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); + EmitStore(context, StorageKind.SharedMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); } private static Operand EmitLoadConstant(EmitterContext context, Operand slot, Operand offset) @@ -192,8 +187,8 @@ namespace Ryujinx.Graphics.Shader.Instructions StorageKind storageKind, AtomOp op, AtomSize type, - Operand addrLow, - Operand addrHigh, + Operand e0, + Operand e1, Operand value) { Operand res = Const(0); @@ -203,7 +198,7 @@ namespace Ryujinx.Graphics.Shader.Instructions case AtomOp.Add: if (type == AtomSize.S32 || type == AtomSize.U32) { - res = context.AtomicAdd(storageKind, addrLow, addrHigh, value); + res = context.AtomicAdd(storageKind, e0, e1, value); } else { @@ -213,7 +208,7 @@ namespace Ryujinx.Graphics.Shader.Instructions case AtomOp.And: if (type == AtomSize.S32 || type == AtomSize.U32) { - res = context.AtomicAnd(storageKind, addrLow, addrHigh, value); + res = context.AtomicAnd(storageKind, e0, e1, value); } else { @@ -223,7 +218,7 @@ namespace Ryujinx.Graphics.Shader.Instructions case AtomOp.Xor: if (type == AtomSize.S32 || type == AtomSize.U32) { - res = context.AtomicXor(storageKind, addrLow, addrHigh, value); + res = context.AtomicXor(storageKind, e0, e1, value); } else { @@ -233,7 +228,7 @@ namespace Ryujinx.Graphics.Shader.Instructions case AtomOp.Or: if (type == AtomSize.S32 || type == AtomSize.U32) { - res = context.AtomicOr(storageKind, addrLow, addrHigh, value); + res = context.AtomicOr(storageKind, e0, e1, value); } else { @@ -243,11 +238,11 @@ namespace Ryujinx.Graphics.Shader.Instructions case AtomOp.Max: if (type == AtomSize.S32) { - res = context.AtomicMaxS32(storageKind, addrLow, addrHigh, value); + res = context.AtomicMaxS32(storageKind, e0, e1, value); } else if (type == AtomSize.U32) { - res = context.AtomicMaxU32(storageKind, addrLow, addrHigh, value); + res = context.AtomicMaxU32(storageKind, e0, e1, value); } else { @@ -257,11 +252,11 @@ namespace Ryujinx.Graphics.Shader.Instructions case AtomOp.Min: if (type == AtomSize.S32) { - res = context.AtomicMinS32(storageKind, addrLow, addrHigh, value); + res = context.AtomicMinS32(storageKind, e0, e1, value); } else if (type == AtomSize.U32) { - res = context.AtomicMinU32(storageKind, addrLow, addrHigh, value); + res = context.AtomicMinU32(storageKind, e0, e1, value); } else { @@ -275,7 +270,7 @@ namespace Ryujinx.Graphics.Shader.Instructions private static void EmitLoad( EmitterContext context, - MemoryRegion region, + StorageKind storageKind, LsSize2 size, Operand srcA, int rd, @@ -287,19 +282,19 @@ namespace Ryujinx.Graphics.Shader.Instructions return; } + int id = storageKind == StorageKind.LocalMemory + ? context.Config.ResourceManager.LocalMemoryId + : context.Config.ResourceManager.SharedMemoryId; bool isSmallInt = size < LsSize2.B32; - int count = 1; - - switch (size) + int count = size switch { - case LsSize2.B64: count = 2; break; - case LsSize2.B128: count = 4; break; - } + LsSize2.B64 => 2, + LsSize2.B128 => 4, + _ => 1 + }; - Operand baseOffset = context.IAdd(srcA, Const(offset)); - Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2)); // Word offset = byte offset / 4 (one word = 4 bytes). - Operand bitOffset = GetBitOffset(context, baseOffset); + Operand baseOffset = context.Copy(srcA); for (int index = 0; index < count; index++) { @@ -310,14 +305,10 @@ namespace Ryujinx.Graphics.Shader.Instructions break; } - Operand elemOffset = context.IAdd(wordOffset, Const(index)); - Operand value = null; - - switch (region) - { - case MemoryRegion.Local: value = context.LoadLocal(elemOffset); break; - case MemoryRegion.Shared: value = context.LoadShared(elemOffset); break; - } + Operand byteOffset = context.IAdd(baseOffset, Const(offset + index * 4)); + Operand wordOffset = context.ShiftRightU32(byteOffset, Const(2)); // Word offset = byte offset / 4 (one word = 4 bytes). + Operand bitOffset = GetBitOffset(context, byteOffset); + Operand value = context.Load(storageKind, id, wordOffset); if (isSmallInt) { @@ -360,7 +351,7 @@ namespace Ryujinx.Graphics.Shader.Instructions private static void EmitStore( EmitterContext context, - MemoryRegion region, + StorageKind storageKind, LsSize2 size, Operand srcA, int rd, @@ -372,52 +363,54 @@ namespace Ryujinx.Graphics.Shader.Instructions return; } + int id = storageKind == StorageKind.LocalMemory + ? context.Config.ResourceManager.LocalMemoryId + : context.Config.ResourceManager.SharedMemoryId; bool isSmallInt = size < LsSize2.B32; - int count = 1; - - switch (size) + int count = size switch { - case LsSize2.B64: count = 2; break; - case LsSize2.B128: count = 4; break; - } + LsSize2.B64 => 2, + LsSize2.B128 => 4, + _ => 1 + }; - Operand baseOffset = context.IAdd(srcA, Const(offset)); - Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2)); - Operand bitOffset = GetBitOffset(context, baseOffset); + Operand baseOffset = context.Copy(srcA); for (int index = 0; index < count; index++) { bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex; Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr); - Operand elemOffset = context.IAdd(wordOffset, Const(index)); + Operand byteOffset = context.IAdd(baseOffset, Const(offset + index * 4)); + Operand wordOffset = context.ShiftRightU32(byteOffset, Const(2)); + Operand bitOffset = GetBitOffset(context, byteOffset); - if (isSmallInt && region == MemoryRegion.Local) + if (isSmallInt && storageKind == StorageKind.LocalMemory) { - Operand word = context.LoadLocal(elemOffset); + Operand word = context.Load(storageKind, id, wordOffset); value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value); } - if (region == MemoryRegion.Local) + if (storageKind == StorageKind.LocalMemory) { - context.StoreLocal(elemOffset, value); + context.Store(storageKind, id, wordOffset, value); } - else if (region == MemoryRegion.Shared) + else if (storageKind == StorageKind.SharedMemory) { switch (size) { case LsSize2.U8: case LsSize2.S8: - context.StoreShared8(baseOffset, value); + context.Store(StorageKind.SharedMemory8, id, byteOffset, value); break; case LsSize2.U16: case LsSize2.S16: - context.StoreShared16(baseOffset, value); + context.Store(StorageKind.SharedMemory16, id, byteOffset, value); break; default: - context.StoreShared(elemOffset, value); + context.Store(storageKind, id, wordOffset, value); break; } } diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs index aecb67249..de41a2cf7 100644 --- a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs @@ -79,8 +79,6 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation ImageAtomic, IsNan, Load, - LoadLocal, - LoadShared, Lod, LogarithmB2, LogicalAnd, @@ -115,10 +113,6 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation Sine, SquareRoot, Store, - StoreLocal, - StoreShared, - StoreShared16, - StoreShared8, Subtract, SwizzleAdd, TextureSample, diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/StorageKind.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/StorageKind.cs index 2b5dd1dec..20576a454 100644 --- a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/StorageKind.cs +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/StorageKind.cs @@ -11,12 +11,13 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation StorageBuffer, LocalMemory, SharedMemory, + SharedMemory8, // TODO: Remove this and store type as a field on the Operation class itself. + SharedMemory16, // TODO: Remove this and store type as a field on the Operation class itself. GlobalMemory, - // TODO: Remove those and store type as a field on the Operation class itself. - GlobalMemoryS8, - GlobalMemoryS16, - GlobalMemoryU8, - GlobalMemoryU16 + GlobalMemoryS8, // TODO: Remove this and store type as a field on the Operation class itself. + GlobalMemoryS16, // TODO: Remove this and store type as a field on the Operation class itself. + GlobalMemoryU8, // TODO: Remove this and store type as a field on the Operation class itself. + GlobalMemoryU16 // TODO: Remove this and store type as a field on the Operation class itself. } static class StorageKindExtensions diff --git a/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj b/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj index 86de2e755..b1f1fb963 100644 --- a/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj +++ b/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj @@ -10,14 +10,12 @@ - - diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs index c348b5d93..ed910f96d 100644 --- a/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs @@ -5,15 +5,13 @@ namespace Ryujinx.Graphics.Shader.StructuredIr [Flags] enum HelperFunctionsMask { - AtomicMinMaxS32Shared = 1 << 0, - MultiplyHighS32 = 1 << 2, - MultiplyHighU32 = 1 << 3, - Shuffle = 1 << 4, - ShuffleDown = 1 << 5, - ShuffleUp = 1 << 6, - ShuffleXor = 1 << 7, - StoreSharedSmallInt = 1 << 8, - SwizzleAdd = 1 << 10, - FSI = 1 << 11 + MultiplyHighS32 = 1 << 2, + MultiplyHighU32 = 1 << 3, + Shuffle = 1 << 4, + ShuffleDown = 1 << 5, + ShuffleUp = 1 << 6, + ShuffleXor = 1 << 7, + SwizzleAdd = 1 << 10, + FSI = 1 << 11 } } \ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs index 6e2013501..b08478ad3 100644 --- a/src/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs @@ -90,8 +90,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr Add(Instruction.ImageAtomic, AggregateType.S32); Add(Instruction.IsNan, AggregateType.Bool, AggregateType.Scalar); Add(Instruction.Load, AggregateType.FP32); - Add(Instruction.LoadLocal, AggregateType.U32, AggregateType.S32); - Add(Instruction.LoadShared, AggregateType.U32, AggregateType.S32); Add(Instruction.Lod, AggregateType.FP32); Add(Instruction.LogarithmB2, AggregateType.Scalar, AggregateType.Scalar); Add(Instruction.LogicalAnd, AggregateType.Bool, AggregateType.Bool, AggregateType.Bool); @@ -121,10 +119,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr Add(Instruction.Sine, AggregateType.Scalar, AggregateType.Scalar); Add(Instruction.SquareRoot, AggregateType.Scalar, AggregateType.Scalar); Add(Instruction.Store, AggregateType.Void); - Add(Instruction.StoreLocal, AggregateType.Void, AggregateType.S32, AggregateType.U32); - Add(Instruction.StoreShared, AggregateType.Void, AggregateType.S32, AggregateType.U32); - Add(Instruction.StoreShared16, AggregateType.Void, AggregateType.S32, AggregateType.U32); - Add(Instruction.StoreShared8, AggregateType.Void, AggregateType.S32, AggregateType.U32); Add(Instruction.Subtract, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar); Add(Instruction.SwizzleAdd, AggregateType.FP32, AggregateType.FP32, AggregateType.FP32, AggregateType.S32); Add(Instruction.TextureSample, AggregateType.FP32); diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/MemoryDefinition.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/MemoryDefinition.cs new file mode 100644 index 000000000..c0bb750e7 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/MemoryDefinition.cs @@ -0,0 +1,18 @@ +using Ryujinx.Graphics.Shader.Translation; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + readonly struct MemoryDefinition + { + public string Name { get; } + public AggregateType Type { get; } + public int ArrayLength { get; } + + public MemoryDefinition(string name, AggregateType type, int arrayLength = 1) + { + Name = name; + Type = type; + ArrayLength = arrayLength; + } + } +} \ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/ShaderProperties.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/ShaderProperties.cs index 157c5937d..c6132ef8c 100644 --- a/src/Ryujinx.Graphics.Shader/StructuredIr/ShaderProperties.cs +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/ShaderProperties.cs @@ -6,14 +6,20 @@ namespace Ryujinx.Graphics.Shader.StructuredIr { private readonly Dictionary _constantBuffers; private readonly Dictionary _storageBuffers; + private readonly Dictionary _localMemories; + private readonly Dictionary _sharedMemories; public IReadOnlyDictionary ConstantBuffers => _constantBuffers; public IReadOnlyDictionary StorageBuffers => _storageBuffers; + public IReadOnlyDictionary LocalMemories => _localMemories; + public IReadOnlyDictionary SharedMemories => _sharedMemories; public ShaderProperties() { _constantBuffers = new Dictionary(); _storageBuffers = new Dictionary(); + _localMemories = new Dictionary(); + _sharedMemories = new Dictionary(); } public void AddConstantBuffer(int binding, BufferDefinition definition) @@ -25,5 +31,21 @@ namespace Ryujinx.Graphics.Shader.StructuredIr { _storageBuffers[binding] = definition; } + + public int AddLocalMemory(MemoryDefinition definition) + { + int id = _localMemories.Count; + _localMemories.Add(id, definition); + + return id; + } + + public int AddSharedMemory(MemoryDefinition definition) + { + int id = _sharedMemories.Count; + _sharedMemories.Add(id, definition); + + return id; + } } } \ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs index a8f132766..9d12a73cd 100644 --- a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs @@ -274,13 +274,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr // decide which helper functions are needed on the final generated code. switch (operation.Inst) { - case Instruction.AtomicMaxS32: - case Instruction.AtomicMinS32: - if (operation.StorageKind == StorageKind.SharedMemory) - { - context.Info.HelperFunctionsMask |= HelperFunctionsMask.AtomicMinMaxS32Shared; - } - break; case Instruction.MultiplyHighS32: context.Info.HelperFunctionsMask |= HelperFunctionsMask.MultiplyHighS32; break; @@ -299,10 +292,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr case Instruction.ShuffleXor: context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleXor; break; - case Instruction.StoreShared16: - case Instruction.StoreShared8: - context.Info.HelperFunctionsMask |= HelperFunctionsMask.StoreSharedSmallInt; - break; case Instruction.SwizzleAdd: context.Info.HelperFunctionsMask |= HelperFunctionsMask.SwizzleAdd; break; diff --git a/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs index be0cba809..0ba26107c 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs @@ -67,6 +67,11 @@ namespace Ryujinx.Graphics.Shader.Translation return context.Add(Instruction.AtomicAnd, storageKind, Local(), Const(binding), e0, e1, value); } + public static Operand AtomicCompareAndSwap(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand compare, Operand value) + { + return context.Add(Instruction.AtomicCompareAndSwap, storageKind, Local(), Const(binding), e0, compare, value); + } + public static Operand AtomicCompareAndSwap(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand compare, Operand value) { return context.Add(Instruction.AtomicCompareAndSwap, storageKind, Local(), Const(binding), e0, e1, compare, value); @@ -661,16 +666,6 @@ namespace Ryujinx.Graphics.Shader.Translation : context.Load(storageKind, (int)ioVariable, arrayIndex, elemIndex); } - public static Operand LoadLocal(this EmitterContext context, Operand a) - { - return context.Add(Instruction.LoadLocal, Local(), a); - } - - public static Operand LoadShared(this EmitterContext context, Operand a) - { - return context.Add(Instruction.LoadShared, Local(), a); - } - public static Operand MemoryBarrier(this EmitterContext context) { return context.Add(Instruction.MemoryBarrier); @@ -753,6 +748,11 @@ namespace Ryujinx.Graphics.Shader.Translation return context.Add(Instruction.Store, storageKind, null, e0, e1, value); } + public static Operand Store(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand value) + { + return context.Add(Instruction.Store, storageKind, null, Const(binding), e0, value); + } + public static Operand Store(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) { return context.Add(Instruction.Store, storageKind, null, Const(binding), e0, e1, value); @@ -797,26 +797,6 @@ namespace Ryujinx.Graphics.Shader.Translation : context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), arrayIndex, elemIndex, value); } - public static Operand StoreLocal(this EmitterContext context, Operand a, Operand b) - { - return context.Add(Instruction.StoreLocal, null, a, b); - } - - public static Operand StoreShared(this EmitterContext context, Operand a, Operand b) - { - return context.Add(Instruction.StoreShared, null, a, b); - } - - public static Operand StoreShared16(this EmitterContext context, Operand a, Operand b) - { - return context.Add(Instruction.StoreShared16, null, a, b); - } - - public static Operand StoreShared8(this EmitterContext context, Operand a, Operand b) - { - return context.Add(Instruction.StoreShared8, null, a, b); - } - public static Operand UnpackDouble2x32High(this EmitterContext context, Operand a) { return UnpackDouble2x32(context, a, 1); diff --git a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs index 6958b86f2..51a396821 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs @@ -9,13 +9,13 @@ namespace Ryujinx.Graphics.Shader.Translation class HelperFunctionManager { private readonly List _functionList; - private readonly Dictionary _functionIds; + private readonly Dictionary _functionIds; private readonly ShaderStage _stage; public HelperFunctionManager(List functionList, ShaderStage stage) { _functionList = functionList; - _functionIds = new Dictionary(); + _functionIds = new Dictionary(); _stage = stage; } @@ -29,14 +29,30 @@ namespace Ryujinx.Graphics.Shader.Translation public int GetOrCreateFunctionId(HelperFunctionName functionName) { - if (_functionIds.TryGetValue(functionName, out int functionId)) + if (_functionIds.TryGetValue((int)functionName, out int functionId)) { return functionId; } Function function = GenerateFunction(functionName); functionId = AddFunction(function); - _functionIds.Add(functionName, functionId); + _functionIds.Add((int)functionName, functionId); + + return functionId; + } + + public int GetOrCreateFunctionId(HelperFunctionName functionName, int id) + { + int key = (int)functionName | (id << 16); + + if (_functionIds.TryGetValue(key, out int functionId)) + { + return functionId; + } + + Function function = GenerateFunction(functionName, id); + functionId = AddFunction(function); + _functionIds.Add(key, functionId); return functionId; } @@ -140,6 +156,67 @@ namespace Ryujinx.Graphics.Shader.Translation return new Function(ControlFlowGraph.Create(context.GetOperations()).Blocks, "ConvertFloatToDouble", false, 1, 2); } + private static Function GenerateFunction(HelperFunctionName functionName, int id) + { + return functionName switch + { + HelperFunctionName.SharedAtomicMaxS32 => GenerateSharedAtomicSigned(id, isMin: false), + HelperFunctionName.SharedAtomicMinS32 => GenerateSharedAtomicSigned(id, isMin: true), + HelperFunctionName.SharedStore8 => GenerateSharedStore8(id), + HelperFunctionName.SharedStore16 => GenerateSharedStore16(id), + _ => throw new ArgumentException($"Invalid function name {functionName}") + }; + } + + private static Function GenerateSharedAtomicSigned(int id, bool isMin) + { + EmitterContext context = new EmitterContext(); + + Operand wordOffset = Argument(0); + Operand value = Argument(1); + + Operand result = GenerateSharedAtomicCasLoop(context, wordOffset, id, (memValue) => + { + return isMin + ? context.IMinimumS32(memValue, value) + : context.IMaximumS32(memValue, value); + }); + + context.Return(result); + + return new Function(ControlFlowGraph.Create(context.GetOperations()).Blocks, $"SharedAtomic{(isMin ? "Min" : "Max")}_{id}", true, 2, 0); + } + + private static Function GenerateSharedStore8(int id) + { + return GenerateSharedStore(id, 8); + } + + private static Function GenerateSharedStore16(int id) + { + return GenerateSharedStore(id, 16); + } + + private static Function GenerateSharedStore(int id, int bitSize) + { + EmitterContext context = new EmitterContext(); + + Operand offset = Argument(0); + Operand value = Argument(1); + + Operand wordOffset = context.ShiftRightU32(offset, Const(2)); + Operand bitOffset = GetBitOffset(context, offset); + + GenerateSharedAtomicCasLoop(context, wordOffset, id, (memValue) => + { + return context.BitfieldInsert(memValue, value, bitOffset, Const(bitSize)); + }); + + context.Return(); + + return new Function(ControlFlowGraph.Create(context.GetOperations()).Blocks, $"SharedStore{bitSize}_{id}", false, 2, 0); + } + private Function GenerateTexelFetchScaleFunction() { EmitterContext context = new EmitterContext(); @@ -226,5 +303,29 @@ namespace Ryujinx.Graphics.Shader.Translation return context.IAdd(Const(1), index); } } + + public static Operand GetBitOffset(EmitterContext context, Operand offset) + { + return context.ShiftLeft(context.BitwiseAnd(offset, Const(3)), Const(3)); + } + + private static Operand GenerateSharedAtomicCasLoop(EmitterContext context, Operand wordOffset, int id, Func opCallback) + { + Operand lblLoopHead = Label(); + + context.MarkLabel(lblLoopHead); + + Operand oldValue = context.Load(StorageKind.SharedMemory, id, wordOffset); + Operand newValue = opCallback(oldValue); + + Operand casResult = context.AtomicCompareAndSwap(StorageKind.SharedMemory, id, wordOffset, oldValue, newValue); + + Operand casFail = context.ICompareNotEqual(casResult, oldValue); + + context.BranchIfTrue(lblLoopHead, casFail); + + return oldValue; + } + } } \ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionName.cs b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionName.cs index 8c37c34c7..984f2d047 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionName.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionName.cs @@ -4,6 +4,10 @@ namespace Ryujinx.Graphics.Shader.Translation { ConvertDoubleToFloat, ConvertFloatToDouble, + SharedAtomicMaxS32, + SharedAtomicMinS32, + SharedStore8, + SharedStore16, TexelFetchScale, TextureSizeUnscale } diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs index 14904b260..9d260c678 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs @@ -244,7 +244,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations node = nextNode; } } - else if (operation.Inst == Instruction.StoreShared || operation.Inst == Instruction.StoreLocal) + else if (operation.Inst == Instruction.Store && + (operation.StorageKind == StorageKind.SharedMemory || + operation.StorageKind == StorageKind.LocalMemory)) { // The NVIDIA compiler can sometimes use shared or local memory as temporary // storage to place the base address and size on, so we need @@ -874,7 +876,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations if (bitSize < 32) { - Operand bitOffset = GetBitOffset(context, offset); + Operand bitOffset = HelperFunctionManager.GetBitOffset(context, offset); GenerateAtomicCasLoop(context, wordOffset, binding, (memValue) => { @@ -892,7 +894,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations if (IsSmallInt(storageKind)) { - Operand bitOffset = GetBitOffset(context, offset); + Operand bitOffset = HelperFunctionManager.GetBitOffset(context, offset); switch (storageKind) { @@ -921,11 +923,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations return true; } - private static Operand GetBitOffset(EmitterContext context, Operand offset) - { - return context.ShiftLeft(context.BitwiseAnd(offset, Const(3)), Const(3)); - } - private static Operand GenerateAtomicCasLoop(EmitterContext context, Operand wordOffset, int binding, Func opCallback) { Operand lblLoopHead = Label(); @@ -1070,15 +1067,18 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations { baseOffset = null; - if (operation.Inst == Instruction.LoadShared || operation.Inst == Instruction.StoreShared) + if (operation.Inst == Instruction.Load || operation.Inst == Instruction.Store) { - type = LsMemoryType.Shared; - return TryGetSharedMemoryOffsets(operation, out baseOffset, out constOffset); - } - else if (operation.Inst == Instruction.LoadLocal || operation.Inst == Instruction.StoreLocal) - { - type = LsMemoryType.Local; - return TryGetLocalMemoryOffset(operation, out constOffset); + if (operation.StorageKind == StorageKind.SharedMemory) + { + type = LsMemoryType.Shared; + return TryGetSharedMemoryOffsets(operation, out baseOffset, out constOffset); + } + else if (operation.StorageKind == StorageKind.LocalMemory) + { + type = LsMemoryType.Local; + return TryGetLocalMemoryOffset(operation, out constOffset); + } } type = default; diff --git a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs index 2d19a5a70..c58e4828b 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs @@ -1,3 +1,4 @@ +using Ryujinx.Common; using Ryujinx.Graphics.Shader.StructuredIr; using System; using System.Collections.Generic; @@ -22,9 +23,12 @@ namespace Ryujinx.Graphics.Shader.Translation private readonly HashSet _usedConstantBufferBindings; + public int LocalMemoryId { get; } + public int SharedMemoryId { get; } + public ShaderProperties Properties => _properties; - public ResourceManager(ShaderStage stage, IGpuAccessor gpuAccessor, ShaderProperties properties) + public ResourceManager(ShaderStage stage, IGpuAccessor gpuAccessor, ShaderProperties properties, int localMemorySize) { _gpuAccessor = gpuAccessor; _properties = properties; @@ -41,6 +45,25 @@ namespace Ryujinx.Graphics.Shader.Translation _usedConstantBufferBindings = new HashSet(); properties.AddConstantBuffer(0, new BufferDefinition(BufferLayout.Std140, 0, 0, "support_buffer", SupportBuffer.GetStructureType())); + + LocalMemoryId = -1; + SharedMemoryId = -1; + + if (localMemorySize != 0) + { + var lmem = new MemoryDefinition("local_memory", AggregateType.Array | AggregateType.U32, BitUtils.DivRoundUp(localMemorySize, sizeof(uint))); + + LocalMemoryId = properties.AddLocalMemory(lmem); + } + + int sharedMemorySize = stage == ShaderStage.Compute ? gpuAccessor.QueryComputeSharedMemorySize() : 0; + + if (sharedMemorySize != 0) + { + var smem = new MemoryDefinition("shared_memory", AggregateType.Array | AggregateType.U32, BitUtils.DivRoundUp(sharedMemorySize, sizeof(uint))); + + SharedMemoryId = properties.AddSharedMemory(smem); + } } public int GetConstantBufferBinding(int slot) diff --git a/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs index baa88251b..f5a524a0f 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs @@ -1,6 +1,8 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation; using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation.Optimizations; using System.Collections.Generic; +using System.Diagnostics; using System.Linq; using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; @@ -70,6 +72,15 @@ namespace Ryujinx.Graphics.Shader.Translation } } } + else + { + node = InsertSharedStoreSmallInt(hfm, node); + + if (config.Options.TargetLanguage != TargetLanguage.Spirv) + { + node = InsertSharedAtomicSigned(hfm, node); + } + } } } } @@ -171,6 +182,87 @@ namespace Ryujinx.Graphics.Shader.Translation operation.TurnIntoCopy(result); } + private static LinkedListNode InsertSharedStoreSmallInt(HelperFunctionManager hfm, LinkedListNode node) + { + Operation operation = (Operation)node.Value; + HelperFunctionName name; + + if (operation.StorageKind == StorageKind.SharedMemory8) + { + name = HelperFunctionName.SharedStore8; + } + else if (operation.StorageKind == StorageKind.SharedMemory16) + { + name = HelperFunctionName.SharedStore16; + } + else + { + return node; + } + + if (operation.Inst != Instruction.Store) + { + return node; + } + + Operand memoryId = operation.GetSource(0); + Operand byteOffset = operation.GetSource(1); + Operand value = operation.GetSource(2); + + Debug.Assert(memoryId.Type == OperandType.Constant); + + int functionId = hfm.GetOrCreateFunctionId(name, memoryId.Value); + + Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value }; + + LinkedListNode newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, (Operand)null, callArgs)); + + Utils.DeleteNode(node, operation); + + return newNode; + } + + private static LinkedListNode InsertSharedAtomicSigned(HelperFunctionManager hfm, LinkedListNode node) + { + Operation operation = (Operation)node.Value; + HelperFunctionName name; + + if (operation.Inst == Instruction.AtomicMaxS32) + { + name = HelperFunctionName.SharedAtomicMaxS32; + } + else if (operation.Inst == Instruction.AtomicMinS32) + { + name = HelperFunctionName.SharedAtomicMinS32; + } + else + { + return node; + } + + if (operation.StorageKind != StorageKind.SharedMemory) + { + return node; + } + + Operand result = operation.Dest; + Operand memoryId = operation.GetSource(0); + Operand byteOffset = operation.GetSource(1); + Operand value = operation.GetSource(2); + + Debug.Assert(memoryId.Type == OperandType.Constant); + + int functionId = hfm.GetOrCreateFunctionId(name, memoryId.Value); + + Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value }; + + LinkedListNode newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, result, callArgs)); + + Utils.DeleteNode(node, operation); + + return newNode; + } + private static LinkedListNode InsertTexelFetchScale(HelperFunctionManager hfm, LinkedListNode node, ShaderConfig config) { TextureOperation texOp = (TextureOperation)node.Value; diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs index 534bda70e..fa1250022 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs @@ -124,7 +124,7 @@ namespace Ryujinx.Graphics.Shader.Translation private TextureDescriptor[] _cachedTextureDescriptors; private TextureDescriptor[] _cachedImageDescriptors; - public ShaderConfig(ShaderStage stage, IGpuAccessor gpuAccessor, TranslationOptions options) + public ShaderConfig(ShaderStage stage, IGpuAccessor gpuAccessor, TranslationOptions options, int localMemorySize) { Stage = stage; GpuAccessor = gpuAccessor; @@ -143,7 +143,7 @@ namespace Ryujinx.Graphics.Shader.Translation _usedTextures = new Dictionary(); _usedImages = new Dictionary(); - ResourceManager = new ResourceManager(stage, gpuAccessor, new ShaderProperties()); + ResourceManager = new ResourceManager(stage, gpuAccessor, new ShaderProperties(), localMemorySize); if (!gpuAccessor.QueryHostSupportsTransformFeedback() && gpuAccessor.QueryTransformFeedbackEnabled()) { @@ -176,14 +176,17 @@ namespace Ryujinx.Graphics.Shader.Translation OutputTopology outputTopology, int maxOutputVertices, IGpuAccessor gpuAccessor, - TranslationOptions options) : this(stage, gpuAccessor, options) + TranslationOptions options) : this(stage, gpuAccessor, options, 0) { ThreadsPerInputPrimitive = 1; OutputTopology = outputTopology; MaxOutputVertices = maxOutputVertices; } - public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options) : this(header.Stage, gpuAccessor, options) + public ShaderConfig( + ShaderHeader header, + IGpuAccessor gpuAccessor, + TranslationOptions options) : this(header.Stage, gpuAccessor, options, GetLocalMemorySize(header)) { GpPassthrough = header.Stage == ShaderStage.Geometry && header.GpPassthrough; ThreadsPerInputPrimitive = header.ThreadsPerInputPrimitive; @@ -197,6 +200,11 @@ namespace Ryujinx.Graphics.Shader.Translation LastInVertexPipeline = header.Stage < ShaderStage.Fragment; } + private static int GetLocalMemorySize(ShaderHeader header) + { + return header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize + (header.ShaderLocalMemoryCrsSize / ThreadsPerWarp); + } + private void EnsureTransformFeedbackInitialized() { if (HasTransformFeedbackOutputs() && _transformFeedbackOutputs == null) diff --git a/src/Ryujinx.Graphics.Shader/Translation/Translator.cs b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs index c0212a5bc..b44d6daaa 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Translator.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs @@ -107,7 +107,7 @@ namespace Ryujinx.Graphics.Shader.Translation if (options.Flags.HasFlag(TranslationFlags.Compute)) { - config = new ShaderConfig(ShaderStage.Compute, gpuAccessor, options); + config = new ShaderConfig(ShaderStage.Compute, gpuAccessor, options, gpuAccessor.QueryComputeLocalMemorySize()); program = Decoder.Decode(config, address); } From f9a538bb0f02b4665f8cccbde0730e08da208024 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sat, 17 Jun 2023 16:28:27 -0300 Subject: [PATCH 2/2] Ensure shader local and shared memory sizes are not zero (#5321) --- .../Decoders/Decoder.cs | 11 ++++ .../Instructions/InstEmitMemory.cs | 18 +++++++ .../Translation/FeatureFlags.cs | 4 +- .../Translation/ResourceManager.cs | 51 ++++++++++++++----- .../Translation/ShaderConfig.cs | 10 ++-- .../Translation/TranslatorContext.cs | 12 +++++ 6 files changed, 88 insertions(+), 18 deletions(-) diff --git a/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs b/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs index c619b9bbc..4e6c6a5df 100644 --- a/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs +++ b/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs @@ -247,6 +247,17 @@ namespace Ryujinx.Graphics.Shader.Decoders { block.AddPushOp(op); } + else if (op.Name == InstName.Ldl || op.Name == InstName.Stl) + { + config.SetUsedFeature(FeatureFlags.LocalMemory); + } + else if (op.Name == InstName.Atoms || + op.Name == InstName.AtomsCas || + op.Name == InstName.Lds || + op.Name == InstName.Sts) + { + config.SetUsedFeature(FeatureFlags.SharedMemory); + } block.OpCodes.Add(op); diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs index 99d7bec97..40312f4a4 100644 --- a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs @@ -27,6 +27,12 @@ namespace Ryujinx.Graphics.Shader.Instructions public static void Atoms(EmitterContext context) { + if (context.Config.Stage != ShaderStage.Compute) + { + context.Config.GpuAccessor.Log($"Atoms instruction is not valid on \"{context.Config.Stage}\" stage."); + return; + } + InstAtoms op = context.GetOp(); Operand offset = context.ShiftRightU32(GetSrcReg(context, op.SrcA), Const(2)); @@ -114,6 +120,12 @@ namespace Ryujinx.Graphics.Shader.Instructions public static void Lds(EmitterContext context) { + if (context.Config.Stage != ShaderStage.Compute) + { + context.Config.GpuAccessor.Log($"Lds instruction is not valid on \"{context.Config.Stage}\" stage."); + return; + } + InstLds op = context.GetOp(); EmitLoad(context, StorageKind.SharedMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); @@ -144,6 +156,12 @@ namespace Ryujinx.Graphics.Shader.Instructions public static void Sts(EmitterContext context) { + if (context.Config.Stage != ShaderStage.Compute) + { + context.Config.GpuAccessor.Log($"Sts instruction is not valid on \"{context.Config.Stage}\" stage."); + return; + } + InstSts op = context.GetOp(); EmitStore(context, StorageKind.SharedMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); diff --git a/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs b/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs index e55ed13da..59d35d906 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs @@ -21,6 +21,8 @@ namespace Ryujinx.Graphics.Shader.Translation RtLayer = 1 << 5, IaIndexing = 1 << 7, OaIndexing = 1 << 8, - FixedFuncAttr = 1 << 9 + FixedFuncAttr = 1 << 9, + LocalMemory = 1 << 10, + SharedMemory = 1 << 11 } } diff --git a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs index c58e4828b..3a46f6e4e 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs @@ -8,6 +8,11 @@ namespace Ryujinx.Graphics.Shader.Translation { class ResourceManager { + // Those values are used if the shader as local or shared memory access, + // but for some reason the supplied size was 0. + private const int DefaultLocalMemorySize = 128; + private const int DefaultSharedMemorySize = 4096; + private static readonly string[] _stagePrefixes = new string[] { "cp", "vp", "tcp", "tep", "gp", "fp" }; private readonly IGpuAccessor _gpuAccessor; @@ -23,12 +28,12 @@ namespace Ryujinx.Graphics.Shader.Translation private readonly HashSet _usedConstantBufferBindings; - public int LocalMemoryId { get; } - public int SharedMemoryId { get; } + public int LocalMemoryId { get; private set; } + public int SharedMemoryId { get; private set; } public ShaderProperties Properties => _properties; - public ResourceManager(ShaderStage stage, IGpuAccessor gpuAccessor, ShaderProperties properties, int localMemorySize) + public ResourceManager(ShaderStage stage, IGpuAccessor gpuAccessor, ShaderProperties properties) { _gpuAccessor = gpuAccessor; _properties = properties; @@ -48,21 +53,43 @@ namespace Ryujinx.Graphics.Shader.Translation LocalMemoryId = -1; SharedMemoryId = -1; + } - if (localMemorySize != 0) + public void SetCurrentLocalMemory(int size, bool isUsed) + { + if (isUsed) { - var lmem = new MemoryDefinition("local_memory", AggregateType.Array | AggregateType.U32, BitUtils.DivRoundUp(localMemorySize, sizeof(uint))); + if (size <= 0) + { + size = DefaultLocalMemorySize; + } - LocalMemoryId = properties.AddLocalMemory(lmem); + var lmem = new MemoryDefinition("local_memory", AggregateType.Array | AggregateType.U32, BitUtils.DivRoundUp(size, sizeof(uint))); + + LocalMemoryId = Properties.AddLocalMemory(lmem); } - - int sharedMemorySize = stage == ShaderStage.Compute ? gpuAccessor.QueryComputeSharedMemorySize() : 0; - - if (sharedMemorySize != 0) + else { - var smem = new MemoryDefinition("shared_memory", AggregateType.Array | AggregateType.U32, BitUtils.DivRoundUp(sharedMemorySize, sizeof(uint))); + LocalMemoryId = -1; + } + } - SharedMemoryId = properties.AddSharedMemory(smem); + public void SetCurrentSharedMemory(int size, bool isUsed) + { + if (isUsed) + { + if (size <= 0) + { + size = DefaultSharedMemorySize; + } + + var smem = new MemoryDefinition("shared_memory", AggregateType.Array | AggregateType.U32, BitUtils.DivRoundUp(size, sizeof(uint))); + + SharedMemoryId = Properties.AddSharedMemory(smem); + } + else + { + SharedMemoryId = -1; } } diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs index fa1250022..e50c9a845 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs @@ -126,9 +126,10 @@ namespace Ryujinx.Graphics.Shader.Translation public ShaderConfig(ShaderStage stage, IGpuAccessor gpuAccessor, TranslationOptions options, int localMemorySize) { - Stage = stage; - GpuAccessor = gpuAccessor; - Options = options; + Stage = stage; + GpuAccessor = gpuAccessor; + Options = options; + LocalMemorySize = localMemorySize; _transformFeedbackDefinitions = new Dictionary(); @@ -143,7 +144,7 @@ namespace Ryujinx.Graphics.Shader.Translation _usedTextures = new Dictionary(); _usedImages = new Dictionary(); - ResourceManager = new ResourceManager(stage, gpuAccessor, new ShaderProperties(), localMemorySize); + ResourceManager = new ResourceManager(stage, gpuAccessor, new ShaderProperties()); if (!gpuAccessor.QueryHostSupportsTransformFeedback() && gpuAccessor.QueryTransformFeedbackEnabled()) { @@ -192,7 +193,6 @@ namespace Ryujinx.Graphics.Shader.Translation ThreadsPerInputPrimitive = header.ThreadsPerInputPrimitive; OutputTopology = header.OutputTopology; MaxOutputVertices = header.MaxOutputVertexCount; - LocalMemorySize = header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize + (header.ShaderLocalMemoryCrsSize / ThreadsPerWarp); ImapTypes = header.ImapTypes; OmapTargets = header.OmapTargets; OmapSampleMask = header.OmapSampleMask; diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs index 9647b13f1..13c5e0e40 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs @@ -149,6 +149,17 @@ namespace Ryujinx.Graphics.Shader.Translation public ShaderProgram Translate(TranslatorContext other = null) { + bool usesLocalMemory = _config.UsedFeatures.HasFlag(FeatureFlags.LocalMemory); + + _config.ResourceManager.SetCurrentLocalMemory(_config.LocalMemorySize, usesLocalMemory); + + if (_config.Stage == ShaderStage.Compute) + { + bool usesSharedMemory = _config.UsedFeatures.HasFlag(FeatureFlags.SharedMemory); + + _config.ResourceManager.SetCurrentSharedMemory(GpuAccessor.QueryComputeSharedMemorySize(), usesSharedMemory); + } + FunctionCode[] code = EmitShader(_program, _config, initializeOutputs: other == null, out _); if (other != null) @@ -157,6 +168,7 @@ namespace Ryujinx.Graphics.Shader.Translation // We need to share the resource manager since both shaders accesses the same constant buffers. other._config.ResourceManager = _config.ResourceManager; + other._config.ResourceManager.SetCurrentLocalMemory(other._config.LocalMemorySize, other._config.UsedFeatures.HasFlag(FeatureFlags.LocalMemory)); FunctionCode[] otherCode = EmitShader(other._program, other._config, initializeOutputs: true, out int aStart);