Remove barrier on Intel if control flow is potentially divergent (#5044)

* Remove barrier on Intel if control flow is potentially divergent

* Shader cache version bump
This commit is contained in:
gdkchan 2023-06-08 17:43:16 -03:00 committed by GitHub
parent fe30c03cac
commit 2cdcfe46d8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 64 additions and 8 deletions

View file

@ -34,6 +34,7 @@ namespace Ryujinx.Graphics.GAL
public readonly bool SupportsCubemapView; public readonly bool SupportsCubemapView;
public readonly bool SupportsNonConstantTextureOffset; public readonly bool SupportsNonConstantTextureOffset;
public readonly bool SupportsShaderBallot; public readonly bool SupportsShaderBallot;
public readonly bool SupportsShaderBarrierDivergence;
public readonly bool SupportsShaderFloat64; public readonly bool SupportsShaderFloat64;
public readonly bool SupportsTextureShadowLod; public readonly bool SupportsTextureShadowLod;
public readonly bool SupportsViewportIndexVertexTessellation; public readonly bool SupportsViewportIndexVertexTessellation;
@ -82,6 +83,7 @@ namespace Ryujinx.Graphics.GAL
bool supportsCubemapView, bool supportsCubemapView,
bool supportsNonConstantTextureOffset, bool supportsNonConstantTextureOffset,
bool supportsShaderBallot, bool supportsShaderBallot,
bool supportsShaderBarrierDivergence,
bool supportsShaderFloat64, bool supportsShaderFloat64,
bool supportsTextureShadowLod, bool supportsTextureShadowLod,
bool supportsViewportIndexVertexTessellation, bool supportsViewportIndexVertexTessellation,
@ -126,6 +128,7 @@ namespace Ryujinx.Graphics.GAL
SupportsCubemapView = supportsCubemapView; SupportsCubemapView = supportsCubemapView;
SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset; SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset;
SupportsShaderBallot = supportsShaderBallot; SupportsShaderBallot = supportsShaderBallot;
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
SupportsShaderFloat64 = supportsShaderFloat64; SupportsShaderFloat64 = supportsShaderFloat64;
SupportsTextureShadowLod = supportsTextureShadowLod; SupportsTextureShadowLod = supportsTextureShadowLod;
SupportsViewportIndexVertexTessellation = supportsViewportIndexVertexTessellation; SupportsViewportIndexVertexTessellation = supportsViewportIndexVertexTessellation;

View file

@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
private const ushort FileFormatVersionMajor = 1; private const ushort FileFormatVersionMajor = 1;
private const ushort FileFormatVersionMinor = 2; private const ushort FileFormatVersionMinor = 2;
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
private const uint CodeGenVersion = 5159; private const uint CodeGenVersion = 5044;
private const string SharedTocFileName = "shared.toc"; private const string SharedTocFileName = "shared.toc";
private const string SharedDataFileName = "shared.data"; private const string SharedDataFileName = "shared.data";

View file

@ -141,6 +141,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot; public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot;
public bool QueryHostSupportsShaderBarrierDivergence() => _context.Capabilities.SupportsShaderBarrierDivergence;
public bool QueryHostSupportsShaderFloat64() => _context.Capabilities.SupportsShaderFloat64; public bool QueryHostSupportsShaderFloat64() => _context.Capabilities.SupportsShaderFloat64;
public bool QueryHostSupportsSnormBufferTextureFormat() => _context.Capabilities.SupportsSnormBufferTextureFormat; public bool QueryHostSupportsSnormBufferTextureFormat() => _context.Capabilities.SupportsSnormBufferTextureFormat;

View file

@ -127,6 +127,7 @@ namespace Ryujinx.Graphics.OpenGL
public Capabilities GetCapabilities() public Capabilities GetCapabilities()
{ {
bool intelWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows; bool intelWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows;
bool intelUnix = HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelUnix;
bool amdWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows; bool amdWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows;
return new Capabilities( return new Capabilities(
@ -158,6 +159,7 @@ namespace Ryujinx.Graphics.OpenGL
supportsCubemapView: true, supportsCubemapView: true,
supportsNonConstantTextureOffset: HwCapabilities.SupportsNonConstantTextureOffset, supportsNonConstantTextureOffset: HwCapabilities.SupportsNonConstantTextureOffset,
supportsShaderBallot: HwCapabilities.SupportsShaderBallot, supportsShaderBallot: HwCapabilities.SupportsShaderBallot,
supportsShaderBarrierDivergence: !(intelWindows || intelUnix),
supportsShaderFloat64: true, supportsShaderFloat64: true,
supportsTextureShadowLod: HwCapabilities.SupportsTextureShadowLod, supportsTextureShadowLod: HwCapabilities.SupportsTextureShadowLod,
supportsViewportIndexVertexTessellation: HwCapabilities.SupportsShaderViewportLayerArray, supportsViewportIndexVertexTessellation: HwCapabilities.SupportsShaderViewportLayerArray,

View file

@ -28,18 +28,18 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
for (int i = 1; i < info.Functions.Count; i++) for (int i = 1; i < info.Functions.Count; i++)
{ {
PrintFunction(context, info, info.Functions[i]); PrintFunction(context, info.Functions[i]);
context.AppendLine(); context.AppendLine();
} }
} }
PrintFunction(context, info, info.Functions[0], MainFunctionName); PrintFunction(context, info.Functions[0], MainFunctionName);
return context.GetCode(); return context.GetCode();
} }
private static void PrintFunction(CodeGenContext context, StructuredProgramInfo info, StructuredFunction function, string funcName = null) private static void PrintFunction(CodeGenContext context, StructuredFunction function, string funcName = null)
{ {
context.CurrentFunction = function; context.CurrentFunction = function;
@ -48,7 +48,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
Declarations.DeclareLocals(context, function); Declarations.DeclareLocals(context, function);
PrintBlock(context, function.MainBlock); PrintBlock(context, function.MainBlock, funcName == MainFunctionName);
context.LeaveScope(); context.LeaveScope();
} }
@ -72,7 +72,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
return $"{Declarations.GetVarTypeName(context, function.ReturnType)} {funcName ?? function.Name}({string.Join(", ", args)})"; return $"{Declarations.GetVarTypeName(context, function.ReturnType)} {funcName ?? function.Name}({string.Join(", ", args)})";
} }
private static void PrintBlock(CodeGenContext context, AstBlock block) private static void PrintBlock(CodeGenContext context, AstBlock block, bool isMainFunction)
{ {
AstBlockVisitor visitor = new AstBlockVisitor(block); AstBlockVisitor visitor = new AstBlockVisitor(block);
@ -112,10 +112,32 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
} }
}; };
bool supportsBarrierDivergence = context.Config.GpuAccessor.QueryHostSupportsShaderBarrierDivergence();
bool mayHaveReturned = false;
foreach (IAstNode node in visitor.Visit()) foreach (IAstNode node in visitor.Visit())
{ {
if (node is AstOperation operation) if (node is AstOperation operation)
{ {
if (!supportsBarrierDivergence)
{
if (operation.Inst == IntermediateRepresentation.Instruction.Barrier)
{
// Barrier on divergent control flow paths may cause the GPU to hang,
// so skip emitting the barrier for those cases.
if (visitor.Block.Type != AstBlockType.Main || mayHaveReturned || !isMainFunction)
{
context.Config.GpuAccessor.Log($"Shader has barrier on potentially divergent block, the barrier will be removed.");
continue;
}
}
else if (operation.Inst == IntermediateRepresentation.Instruction.Return)
{
mayHaveReturned = true;
}
}
string expr = InstGen.GetExpression(context, operation); string expr = InstGen.GetExpression(context, operation);
if (expr != null) if (expr != null)

View file

@ -76,6 +76,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
public SpirvDelegates Delegates { get; } public SpirvDelegates Delegates { get; }
public bool IsMainFunction { get; private set; }
public bool MayHaveReturned { get; set; }
public CodeGenContext( public CodeGenContext(
StructuredProgramInfo info, StructuredProgramInfo info,
ShaderConfig config, ShaderConfig config,
@ -108,8 +111,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
Delegates = new SpirvDelegates(this); Delegates = new SpirvDelegates(this);
} }
public void StartFunction() public void StartFunction(bool isMainFunction)
{ {
IsMainFunction = isMainFunction;
MayHaveReturned = false;
_locals.Clear(); _locals.Clear();
_localForArgs.Clear(); _localForArgs.Clear();
_funcArgs.Clear(); _funcArgs.Clear();

View file

@ -242,6 +242,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
private static OperationResult GenerateBarrier(CodeGenContext context, AstOperation operation) private static OperationResult GenerateBarrier(CodeGenContext context, AstOperation operation)
{ {
// Barrier on divergent control flow paths may cause the GPU to hang,
// so skip emitting the barrier for those cases.
if (!context.Config.GpuAccessor.QueryHostSupportsShaderBarrierDivergence() &&
(context.CurrentBlock.Type != AstBlockType.Main || context.MayHaveReturned || !context.IsMainFunction))
{
context.Config.GpuAccessor.Log($"Shader has barrier on potentially divergent block, the barrier will be removed.");
return OperationResult.Invalid;
}
context.ControlBarrier( context.ControlBarrier(
context.Constant(context.TypeU32(), Scope.Workgroup), context.Constant(context.TypeU32(), Scope.Workgroup),
context.Constant(context.TypeU32(), Scope.Workgroup), context.Constant(context.TypeU32(), Scope.Workgroup),
@ -1092,6 +1102,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
private static OperationResult GenerateReturn(CodeGenContext context, AstOperation operation) private static OperationResult GenerateReturn(CodeGenContext context, AstOperation operation)
{ {
context.MayHaveReturned = true;
if (operation.SourcesCount != 0) if (operation.SourcesCount != 0)
{ {
context.ReturnValue(context.Get(context.CurrentFunction.ReturnType, operation.GetSource(0))); context.ReturnValue(context.Get(context.CurrentFunction.ReturnType, operation.GetSource(0)));

View file

@ -148,7 +148,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
context.CurrentFunction = function; context.CurrentFunction = function;
context.AddFunction(spvFunc); context.AddFunction(spvFunc);
context.StartFunction(); context.StartFunction(isMainFunction: funcIndex == 0);
Declarations.DeclareParameters(context, function); Declarations.DeclareParameters(context, function);

View file

@ -331,6 +331,15 @@ namespace Ryujinx.Graphics.Shader
return true; return true;
} }
/// <summary>
/// Queries host GPU shader support for barrier instructions on divergent control flow paths.
/// </summary>
/// <returns>True if the GPU supports barriers on divergent control flow paths, false otherwise</returns>
bool QueryHostSupportsShaderBarrierDivergence()
{
return true;
}
/// <summary> /// <summary>
/// Queries host GPU support for 64-bit floating point (double precision) operations on the shader. /// Queries host GPU support for 64-bit floating point (double precision) operations on the shader.
/// </summary> /// </summary>

View file

@ -595,6 +595,7 @@ namespace Ryujinx.Graphics.Vulkan
supportsCubemapView: !IsAmdGcn, supportsCubemapView: !IsAmdGcn,
supportsNonConstantTextureOffset: false, supportsNonConstantTextureOffset: false,
supportsShaderBallot: false, supportsShaderBallot: false,
supportsShaderBarrierDivergence: Vendor != Vendor.Intel,
supportsShaderFloat64: Capabilities.SupportsShaderFloat64, supportsShaderFloat64: Capabilities.SupportsShaderFloat64,
supportsTextureShadowLod: false, supportsTextureShadowLod: false,
supportsViewportIndexVertexTessellation: featuresVk12.ShaderOutputViewportIndex, supportsViewportIndexVertexTessellation: featuresVk12.ShaderOutputViewportIndex,