Shader: Bias textureGather instructions on AMD/Intel (#4703)

* Experimental (GLSL, forced)

* SPIR-V attempt

* Add capability

* Fix pCount == 1 on glsl

* Fix typo
This commit is contained in:
riperiperi 2023-04-22 22:02:39 +01:00 committed by GitHub
parent e27f5522e2
commit 8d9d508dc7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 84 additions and 9 deletions

View file

@ -48,6 +48,8 @@ namespace Ryujinx.Graphics.GAL
public readonly float MaximumSupportedAnisotropy; public readonly float MaximumSupportedAnisotropy;
public readonly int StorageBufferOffsetAlignment; public readonly int StorageBufferOffsetAlignment;
public readonly int GatherBiasPrecision;
public Capabilities( public Capabilities(
TargetApi api, TargetApi api,
string vendorName, string vendorName,
@ -87,7 +89,8 @@ namespace Ryujinx.Graphics.GAL
uint maximumImagesPerStage, uint maximumImagesPerStage,
int maximumComputeSharedMemorySize, int maximumComputeSharedMemorySize,
float maximumSupportedAnisotropy, float maximumSupportedAnisotropy,
int storageBufferOffsetAlignment) int storageBufferOffsetAlignment,
int gatherBiasPrecision)
{ {
Api = api; Api = api;
VendorName = vendorName; VendorName = vendorName;
@ -128,6 +131,7 @@ namespace Ryujinx.Graphics.GAL
MaximumComputeSharedMemorySize = maximumComputeSharedMemorySize; MaximumComputeSharedMemorySize = maximumComputeSharedMemorySize;
MaximumSupportedAnisotropy = maximumSupportedAnisotropy; MaximumSupportedAnisotropy = maximumSupportedAnisotropy;
StorageBufferOffsetAlignment = storageBufferOffsetAlignment; StorageBufferOffsetAlignment = storageBufferOffsetAlignment;
GatherBiasPrecision = gatherBiasPrecision;
} }
} }
} }

View file

@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
private const ushort FileFormatVersionMajor = 1; private const ushort FileFormatVersionMajor = 1;
private const ushort FileFormatVersionMinor = 2; private const ushort FileFormatVersionMinor = 2;
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
private const uint CodeGenVersion = 4404; private const uint CodeGenVersion = 4703;
private const string SharedTocFileName = "shared.toc"; private const string SharedTocFileName = "shared.toc";
private const string SharedDataFileName = "shared.data"; private const string SharedDataFileName = "shared.data";

View file

@ -112,6 +112,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
}; };
} }
public int QueryHostGatherBiasPrecision() => _context.Capabilities.GatherBiasPrecision;
public bool QueryHostReducedPrecision() => _context.Capabilities.ReduceShaderPrecision; public bool QueryHostReducedPrecision() => _context.Capabilities.ReduceShaderPrecision;
public bool QueryHostHasFrontFacingBug() => _context.Capabilities.HasFrontFacingBug; public bool QueryHostHasFrontFacingBug() => _context.Capabilities.HasFrontFacingBug;

View file

@ -103,11 +103,14 @@ namespace Ryujinx.Graphics.OpenGL
public Capabilities GetCapabilities() public Capabilities GetCapabilities()
{ {
bool intelWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows;
bool amdWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows;
return new Capabilities( return new Capabilities(
api: TargetApi.OpenGL, api: TargetApi.OpenGL,
vendorName: GpuVendor, vendorName: GpuVendor,
hasFrontFacingBug: HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows, hasFrontFacingBug: intelWindows,
hasVectorIndexingBug: HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows, hasVectorIndexingBug: amdWindows,
needsFragmentOutputSpecialization: false, needsFragmentOutputSpecialization: false,
reduceShaderPrecision: false, reduceShaderPrecision: false,
supportsAstcCompression: HwCapabilities.SupportsAstcCompression, supportsAstcCompression: HwCapabilities.SupportsAstcCompression,
@ -142,7 +145,8 @@ namespace Ryujinx.Graphics.OpenGL
maximumImagesPerStage: 8, maximumImagesPerStage: 8,
maximumComputeSharedMemorySize: HwCapabilities.MaximumComputeSharedMemorySize, maximumComputeSharedMemorySize: HwCapabilities.MaximumComputeSharedMemorySize,
maximumSupportedAnisotropy: HwCapabilities.MaximumSupportedAnisotropy, maximumSupportedAnisotropy: HwCapabilities.MaximumSupportedAnisotropy,
storageBufferOffsetAlignment: HwCapabilities.StorageBufferOffsetAlignment); storageBufferOffsetAlignment: HwCapabilities.StorageBufferOffsetAlignment,
gatherBiasPrecision: intelWindows || amdWindows ? 8 : 0); // Precision is 8 for these vendors on Vulkan.
} }
public void SetBufferData(BufferHandle buffer, int offset, ReadOnlySpan<byte> data) public void SetBufferData(BufferHandle buffer, int offset, ReadOnlySpan<byte> data)

View file

@ -677,7 +677,28 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
return vector; return vector;
} }
Append(ApplyScaling(AssemblePVector(pCount))); string ApplyBias(string vector)
{
int gatherBiasPrecision = context.Config.GpuAccessor.QueryHostGatherBiasPrecision();
if (isGather && gatherBiasPrecision != 0)
{
// GPU requires texture gather to be slightly offset to match NVIDIA behaviour when point is exactly between two texels.
// Offset by the gather precision divided by 2 to correct for rounding.
if (pCount == 1)
{
vector = $"{vector} + (1.0 / (float(textureSize({samplerName}, 0)) * float({1 << (gatherBiasPrecision + 1)})))";
}
else
{
vector = $"{vector} + (1.0 / (vec{pCount}(textureSize({samplerName}, 0).{"xyz".Substring(0, pCount)}) * float({1 << (gatherBiasPrecision + 1)})))";
}
}
return vector;
}
Append(ApplyBias(ApplyScaling(AssemblePVector(pCount))));
string AssembleDerivativesVector(int count) string AssembleDerivativesVector(int count)
{ {

View file

@ -4,6 +4,7 @@ using Ryujinx.Graphics.Shader.Translation;
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Diagnostics; using System.Diagnostics;
using System.Linq;
using System.Numerics; using System.Numerics;
using static Spv.Specification; using static Spv.Specification;
@ -1556,6 +1557,33 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
} }
} }
SpvInstruction ApplyBias(SpvInstruction vector, SpvInstruction image)
{
int gatherBiasPrecision = context.Config.GpuAccessor.QueryHostGatherBiasPrecision();
if (isGather && gatherBiasPrecision != 0)
{
// GPU requires texture gather to be slightly offset to match NVIDIA behaviour when point is exactly between two texels.
// Offset by the gather precision divided by 2 to correct for rounding.
var sizeType = pCount == 1 ? context.TypeS32() : context.TypeVector(context.TypeS32(), pCount);
var pVectorType = pCount == 1 ? context.TypeFP32() : context.TypeVector(context.TypeFP32(), pCount);
var bias = context.Constant(context.TypeFP32(), (float)(1 << (gatherBiasPrecision + 1)));
var biasVector = context.CompositeConstruct(pVectorType, Enumerable.Repeat(bias, pCount).ToArray());
var one = context.Constant(context.TypeFP32(), 1f);
var oneVector = context.CompositeConstruct(pVectorType, Enumerable.Repeat(one, pCount).ToArray());
var divisor = context.FMul(
pVectorType,
context.ConvertSToF(pVectorType, context.ImageQuerySize(sizeType, image)),
biasVector);
vector = context.FAdd(pVectorType, vector, context.FDiv(pVectorType, oneVector, divisor));
}
return vector;
}
SpvInstruction pCoords = AssemblePVector(pCount); SpvInstruction pCoords = AssemblePVector(pCount);
pCoords = ScalingHelpers.ApplyScaling(context, texOp, pCoords, intCoords, isBindless, isIndexed, isArray, pCount); pCoords = ScalingHelpers.ApplyScaling(context, texOp, pCoords, intCoords, isBindless, isIndexed, isArray, pCount);
@ -1716,6 +1744,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
image = context.Image(imageType, image); image = context.Image(imageType, image);
} }
pCoords = ApplyBias(pCoords, image);
var operands = operandsList.ToArray(); var operands = operandsList.ToArray();
SpvInstruction result; SpvInstruction result;

View file

@ -196,6 +196,15 @@ namespace Ryujinx.Graphics.Shader
return false; return false;
} }
/// <summary>
/// Queries host's gather operation precision bits for biasing their coordinates. Zero means no bias.
/// </summary>
/// <returns>Bits of gather operation precision to use for coordinate bias</returns>
int QueryHostGatherBiasPrecision()
{
return 0;
}
/// <summary> /// <summary>
/// Queries host about whether to reduce precision to improve performance. /// Queries host about whether to reduce precision to improve performance.
/// </summary> /// </summary>

View file

@ -46,6 +46,7 @@ namespace Ryujinx.Graphics.Vulkan
public readonly SampleCountFlags SupportedSampleCounts; public readonly SampleCountFlags SupportedSampleCounts;
public readonly PortabilitySubsetFlags PortabilitySubset; public readonly PortabilitySubsetFlags PortabilitySubset;
public readonly uint VertexBufferAlignment; public readonly uint VertexBufferAlignment;
public readonly uint SubTexelPrecisionBits;
public HardwareCapabilities( public HardwareCapabilities(
bool supportsIndexTypeUint8, bool supportsIndexTypeUint8,
@ -77,7 +78,8 @@ namespace Ryujinx.Graphics.Vulkan
ShaderStageFlags requiredSubgroupSizeStages, ShaderStageFlags requiredSubgroupSizeStages,
SampleCountFlags supportedSampleCounts, SampleCountFlags supportedSampleCounts,
PortabilitySubsetFlags portabilitySubset, PortabilitySubsetFlags portabilitySubset,
uint vertexBufferAlignment) uint vertexBufferAlignment,
uint subTexelPrecisionBits)
{ {
SupportsIndexTypeUint8 = supportsIndexTypeUint8; SupportsIndexTypeUint8 = supportsIndexTypeUint8;
SupportsCustomBorderColor = supportsCustomBorderColor; SupportsCustomBorderColor = supportsCustomBorderColor;
@ -109,6 +111,7 @@ namespace Ryujinx.Graphics.Vulkan
SupportedSampleCounts = supportedSampleCounts; SupportedSampleCounts = supportedSampleCounts;
PortabilitySubset = portabilitySubset; PortabilitySubset = portabilitySubset;
VertexBufferAlignment = vertexBufferAlignment; VertexBufferAlignment = vertexBufferAlignment;
SubTexelPrecisionBits = subTexelPrecisionBits;
} }
} }
} }

View file

@ -311,7 +311,8 @@ namespace Ryujinx.Graphics.Vulkan
propertiesSubgroupSizeControl.RequiredSubgroupSizeStages, propertiesSubgroupSizeControl.RequiredSubgroupSizeStages,
supportedSampleCounts, supportedSampleCounts,
portabilityFlags, portabilityFlags,
vertexBufferAlignment); vertexBufferAlignment,
properties.Limits.SubTexelPrecisionBits);
IsSharedMemory = MemoryAllocator.IsDeviceMemoryShared(_physicalDevice); IsSharedMemory = MemoryAllocator.IsDeviceMemoryShared(_physicalDevice);
@ -576,7 +577,8 @@ namespace Ryujinx.Graphics.Vulkan
maximumImagesPerStage: Constants.MaxImagesPerStage, maximumImagesPerStage: Constants.MaxImagesPerStage,
maximumComputeSharedMemorySize: (int)limits.MaxComputeSharedMemorySize, maximumComputeSharedMemorySize: (int)limits.MaxComputeSharedMemorySize,
maximumSupportedAnisotropy: (int)limits.MaxSamplerAnisotropy, maximumSupportedAnisotropy: (int)limits.MaxSamplerAnisotropy,
storageBufferOffsetAlignment: (int)limits.MinStorageBufferOffsetAlignment); storageBufferOffsetAlignment: (int)limits.MinStorageBufferOffsetAlignment,
gatherBiasPrecision: IsIntelWindows || IsAmdWindows ? (int)Capabilities.SubTexelPrecisionBits : 0);
} }
public HardwareInfo GetHardwareInfo() public HardwareInfo GetHardwareInfo()