From 2cdcfe46d8959b0cbd8aea3b4439b30a55d47f00 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Thu, 8 Jun 2023 17:43:16 -0300 Subject: [PATCH] Remove barrier on Intel if control flow is potentially divergent (#5044) * Remove barrier on Intel if control flow is potentially divergent * Shader cache version bump --- src/Ryujinx.Graphics.GAL/Capabilities.cs | 3 ++ .../Shader/DiskCache/DiskCacheHostStorage.cs | 2 +- .../Shader/GpuAccessorBase.cs | 2 ++ src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs | 2 ++ .../CodeGen/Glsl/GlslGenerator.cs | 32 ++++++++++++++++--- .../CodeGen/Spirv/CodeGenContext.cs | 7 +++- .../CodeGen/Spirv/Instructions.cs | 12 +++++++ .../CodeGen/Spirv/SpirvGenerator.cs | 2 +- src/Ryujinx.Graphics.Shader/IGpuAccessor.cs | 9 ++++++ src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs | 1 + 10 files changed, 64 insertions(+), 8 deletions(-) diff --git a/src/Ryujinx.Graphics.GAL/Capabilities.cs b/src/Ryujinx.Graphics.GAL/Capabilities.cs index f2dd0963f..3b6e6b906 100644 --- a/src/Ryujinx.Graphics.GAL/Capabilities.cs +++ b/src/Ryujinx.Graphics.GAL/Capabilities.cs @@ -34,6 +34,7 @@ namespace Ryujinx.Graphics.GAL public readonly bool SupportsCubemapView; public readonly bool SupportsNonConstantTextureOffset; public readonly bool SupportsShaderBallot; + public readonly bool SupportsShaderBarrierDivergence; public readonly bool SupportsShaderFloat64; public readonly bool SupportsTextureShadowLod; public readonly bool SupportsViewportIndexVertexTessellation; @@ -82,6 +83,7 @@ namespace Ryujinx.Graphics.GAL bool supportsCubemapView, bool supportsNonConstantTextureOffset, bool supportsShaderBallot, + bool supportsShaderBarrierDivergence, bool supportsShaderFloat64, bool supportsTextureShadowLod, bool supportsViewportIndexVertexTessellation, @@ -126,6 +128,7 @@ namespace Ryujinx.Graphics.GAL SupportsCubemapView = supportsCubemapView; SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset; SupportsShaderBallot = supportsShaderBallot; + SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence; SupportsShaderFloat64 = supportsShaderFloat64; SupportsTextureShadowLod = supportsTextureShadowLod; SupportsViewportIndexVertexTessellation = supportsViewportIndexVertexTessellation; diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs index 9419ea92c..f35b542a2 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs @@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private const ushort FileFormatVersionMajor = 1; private const ushort FileFormatVersionMinor = 2; private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; - private const uint CodeGenVersion = 5159; + private const uint CodeGenVersion = 5044; private const string SharedTocFileName = "shared.toc"; private const string SharedDataFileName = "shared.data"; diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs index a60564e0e..57e79ac7f 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs @@ -141,6 +141,8 @@ namespace Ryujinx.Graphics.Gpu.Shader public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot; + public bool QueryHostSupportsShaderBarrierDivergence() => _context.Capabilities.SupportsShaderBarrierDivergence; + public bool QueryHostSupportsShaderFloat64() => _context.Capabilities.SupportsShaderFloat64; public bool QueryHostSupportsSnormBufferTextureFormat() => _context.Capabilities.SupportsSnormBufferTextureFormat; diff --git a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs index 234340e5f..81faa00ef 100644 --- a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs +++ b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs @@ -127,6 +127,7 @@ namespace Ryujinx.Graphics.OpenGL public Capabilities GetCapabilities() { bool intelWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows; + bool intelUnix = HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelUnix; bool amdWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows; return new Capabilities( @@ -158,6 +159,7 @@ namespace Ryujinx.Graphics.OpenGL supportsCubemapView: true, supportsNonConstantTextureOffset: HwCapabilities.SupportsNonConstantTextureOffset, supportsShaderBallot: HwCapabilities.SupportsShaderBallot, + supportsShaderBarrierDivergence: !(intelWindows || intelUnix), supportsShaderFloat64: true, supportsTextureShadowLod: HwCapabilities.SupportsTextureShadowLod, supportsViewportIndexVertexTessellation: HwCapabilities.SupportsShaderViewportLayerArray, diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs index 751d03507..fe0d275b6 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs @@ -28,18 +28,18 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl for (int i = 1; i < info.Functions.Count; i++) { - PrintFunction(context, info, info.Functions[i]); + PrintFunction(context, info.Functions[i]); context.AppendLine(); } } - PrintFunction(context, info, info.Functions[0], MainFunctionName); + PrintFunction(context, info.Functions[0], MainFunctionName); return context.GetCode(); } - private static void PrintFunction(CodeGenContext context, StructuredProgramInfo info, StructuredFunction function, string funcName = null) + private static void PrintFunction(CodeGenContext context, StructuredFunction function, string funcName = null) { context.CurrentFunction = function; @@ -48,7 +48,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl Declarations.DeclareLocals(context, function); - PrintBlock(context, function.MainBlock); + PrintBlock(context, function.MainBlock, funcName == MainFunctionName); context.LeaveScope(); } @@ -72,7 +72,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl return $"{Declarations.GetVarTypeName(context, function.ReturnType)} {funcName ?? function.Name}({string.Join(", ", args)})"; } - private static void PrintBlock(CodeGenContext context, AstBlock block) + private static void PrintBlock(CodeGenContext context, AstBlock block, bool isMainFunction) { AstBlockVisitor visitor = new AstBlockVisitor(block); @@ -112,10 +112,32 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl } }; + bool supportsBarrierDivergence = context.Config.GpuAccessor.QueryHostSupportsShaderBarrierDivergence(); + bool mayHaveReturned = false; + foreach (IAstNode node in visitor.Visit()) { if (node is AstOperation operation) { + if (!supportsBarrierDivergence) + { + if (operation.Inst == IntermediateRepresentation.Instruction.Barrier) + { + // Barrier on divergent control flow paths may cause the GPU to hang, + // so skip emitting the barrier for those cases. + if (visitor.Block.Type != AstBlockType.Main || mayHaveReturned || !isMainFunction) + { + context.Config.GpuAccessor.Log($"Shader has barrier on potentially divergent block, the barrier will be removed."); + + continue; + } + } + else if (operation.Inst == IntermediateRepresentation.Instruction.Return) + { + mayHaveReturned = true; + } + } + string expr = InstGen.GetExpression(context, operation); if (expr != null) diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs index c1bfa0883..1f5167e66 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs @@ -76,6 +76,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv public SpirvDelegates Delegates { get; } + public bool IsMainFunction { get; private set; } + public bool MayHaveReturned { get; set; } + public CodeGenContext( StructuredProgramInfo info, ShaderConfig config, @@ -108,8 +111,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv Delegates = new SpirvDelegates(this); } - public void StartFunction() + public void StartFunction(bool isMainFunction) { + IsMainFunction = isMainFunction; + MayHaveReturned = false; _locals.Clear(); _localForArgs.Clear(); _funcArgs.Clear(); diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs index 4be0c62be..6c1157525 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs @@ -242,6 +242,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv private static OperationResult GenerateBarrier(CodeGenContext context, AstOperation operation) { + // Barrier on divergent control flow paths may cause the GPU to hang, + // so skip emitting the barrier for those cases. + if (!context.Config.GpuAccessor.QueryHostSupportsShaderBarrierDivergence() && + (context.CurrentBlock.Type != AstBlockType.Main || context.MayHaveReturned || !context.IsMainFunction)) + { + context.Config.GpuAccessor.Log($"Shader has barrier on potentially divergent block, the barrier will be removed."); + + return OperationResult.Invalid; + } + context.ControlBarrier( context.Constant(context.TypeU32(), Scope.Workgroup), context.Constant(context.TypeU32(), Scope.Workgroup), @@ -1092,6 +1102,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv private static OperationResult GenerateReturn(CodeGenContext context, AstOperation operation) { + context.MayHaveReturned = true; + if (operation.SourcesCount != 0) { context.ReturnValue(context.Get(context.CurrentFunction.ReturnType, operation.GetSource(0))); diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs index a55e09fd3..5c736b605 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs @@ -148,7 +148,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv context.CurrentFunction = function; context.AddFunction(spvFunc); - context.StartFunction(); + context.StartFunction(isMainFunction: funcIndex == 0); Declarations.DeclareParameters(context, function); diff --git a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs index d4f99e11c..d3794cddd 100644 --- a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs +++ b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs @@ -331,6 +331,15 @@ namespace Ryujinx.Graphics.Shader return true; } + /// + /// Queries host GPU shader support for barrier instructions on divergent control flow paths. + /// + /// True if the GPU supports barriers on divergent control flow paths, false otherwise + bool QueryHostSupportsShaderBarrierDivergence() + { + return true; + } + /// /// Queries host GPU support for 64-bit floating point (double precision) operations on the shader. /// diff --git a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs index 0daec00c3..a059d683a 100644 --- a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs +++ b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs @@ -595,6 +595,7 @@ namespace Ryujinx.Graphics.Vulkan supportsCubemapView: !IsAmdGcn, supportsNonConstantTextureOffset: false, supportsShaderBallot: false, + supportsShaderBarrierDivergence: Vendor != Vendor.Intel, supportsShaderFloat64: Capabilities.SupportsShaderFloat64, supportsTextureShadowLod: false, supportsViewportIndexVertexTessellation: featuresVk12.ShaderOutputViewportIndex,