From c95cdc853eccc92d3ee40b46b63426a8de866341 Mon Sep 17 00:00:00 2001 From: gdk Date: Mon, 11 Apr 2022 12:17:33 -0300 Subject: [PATCH] Enable shader cache on Vulkan and implement MultiplyHighS32/U32 on SPIR-V (missed those before) --- .../Shader/Cache/Migration.cs | 6 +- .../DiskCache/BackgroundDiskCacheWriter.cs | 2 +- .../Shader/DiskCache/DiskCacheGpuAccessor.cs | 54 ++++++- .../Shader/DiskCache/DiskCacheGuestStorage.cs | 6 +- .../Shader/DiskCache/DiskCacheHostStorage.cs | 150 ++++++++++++------ .../Shader/DiskCache/GuestCodeAndCbData.cs | 31 ++++ .../DiskCache/ParallelDiskCacheLoader.cs | 137 +++++++++------- .../DiskCache/ShaderBinarySerializer.cs | 49 ++++++ Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs | 40 +++-- .../Shader/ShaderSpecializationState.cs | 60 ++++++- .../CodeGen/Spirv/Instructions.cs | 26 +++ Ryujinx.Graphics.Vulkan/ShaderCollection.cs | 2 +- .../VulkanGraphicsDevice.cs | 4 +- Ryujinx/Ui/MainWindow.cs | 2 +- 14 files changed, 441 insertions(+), 128 deletions(-) create mode 100644 Ryujinx.Graphics.Gpu/Shader/DiskCache/GuestCodeAndCbData.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/DiskCache/ShaderBinarySerializer.cs diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/Migration.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/Migration.cs index 49acd3add..92f15139c 100644 --- a/Ryujinx.Graphics.Gpu/Shader/Cache/Migration.cs +++ b/Ryujinx.Graphics.Gpu/Shader/Cache/Migration.cs @@ -105,7 +105,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache entry.Header.GpuAccessorHeader.ComputeLocalMemorySize, entry.Header.GpuAccessorHeader.ComputeSharedMemorySize); - ShaderSpecializationState specState = new ShaderSpecializationState(computeState); + ShaderSpecializationState specState = new ShaderSpecializationState(ref computeState); foreach (var td in entry.TextureDescriptors) { @@ -198,7 +198,9 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache } } - ShaderSpecializationState specState = new ShaderSpecializationState(graphicsState, tfdNew); + ProgramPipelineState pipelineState = default; + + ShaderSpecializationState specState = new ShaderSpecializationState(ref graphicsState, ref pipelineState, tfdNew); for (int i = 0; i < entries.Length; i++) { diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs index 5c5e41c69..98655ed68 100644 --- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs @@ -83,7 +83,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache { _context = context; _hostStorage = hostStorage; - _fileWriterWorkerQueue = new AsyncWorkQueue(ProcessTask, "Gpu.BackgroundDiskCacheWriter"); + _fileWriterWorkerQueue = new AsyncWorkQueue(ProcessTask, "GPU.BackgroundDiskCacheWriter"); } /// diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs index 1ab9e8655..81569080b 100644 --- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs @@ -1,6 +1,8 @@ using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Image; using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; using System; using System.Runtime.InteropServices; @@ -16,7 +18,8 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private readonly ShaderSpecializationState _oldSpecState; private readonly ShaderSpecializationState _newSpecState; private readonly int _stageIndex; - private ResourceCounts _resourceCounts; + private readonly bool _isVulkan; + private readonly ResourceCounts _resourceCounts; /// /// Creates a new instance of the cached GPU state accessor for shader translation. @@ -41,6 +44,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache _oldSpecState = oldSpecState; _newSpecState = newSpecState; _stageIndex = stageIndex; + _isVulkan = context.Capabilities.Api == TargetApi.Vulkan; _resourceCounts = counts; } @@ -67,6 +71,36 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache return MemoryMarshal.Cast(_data.Span.Slice((int)address)); } + /// + public AlphaTestOp QueryAlphaTestCompare() + { + if (!_isVulkan || !_oldSpecState.GraphicsState.AlphaTestEnable) + { + return AlphaTestOp.Always; + } + + return _oldSpecState.GraphicsState.AlphaTestCompare switch + { + CompareOp.Never or CompareOp.NeverGl => AlphaTestOp.Never, + CompareOp.Less or CompareOp.LessGl => AlphaTestOp.Less, + CompareOp.Equal or CompareOp.EqualGl => AlphaTestOp.Equal, + CompareOp.LessOrEqual or CompareOp.LessOrEqualGl => AlphaTestOp.LessOrEqual, + CompareOp.Greater or CompareOp.GreaterGl => AlphaTestOp.Greater, + CompareOp.NotEqual or CompareOp.NotEqualGl => AlphaTestOp.NotEqual, + CompareOp.GreaterOrEqual or CompareOp.GreaterOrEqualGl => AlphaTestOp.GreaterOrEqual, + _ => AlphaTestOp.Always + }; + } + + /// + public float QueryAlphaTestReference() => _oldSpecState.GraphicsState.AlphaTestReference; + + /// + public AttributeType QueryAttributeType(int location) + { + return _oldSpecState.GraphicsState.AttributeTypes[location]; + } + /// public int QueryComputeLocalSizeX() => _oldSpecState.ComputeState.LocalSizeX; @@ -96,6 +130,18 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache return ConvertToInputTopology(_oldSpecState.GraphicsState.Topology, _oldSpecState.GraphicsState.TessellationMode); } + /// + public bool QueryProgramPointSize() + { + return _oldSpecState.GraphicsState.ProgramPointSizeEnable; + } + + /// + public float QueryPointSize() + { + return _oldSpecState.GraphicsState.PointSize; + } + /// public bool QueryTessCw() { @@ -136,6 +182,12 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache return _oldSpecState.GetCoordNormalized(_stageIndex, handle, cbufSlot); } + /// + public bool QueryTransformDepthMinusOneToOne() + { + return _oldSpecState.GraphicsState.DepthMode; + } + /// public bool QueryTransformFeedbackEnabled() { diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs index 4e338094f..b31428281 100644 --- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs @@ -193,8 +193,8 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache /// Guest TOC file stream /// Guest data file stream /// Guest shader index - /// Tuple with the guest code and constant buffer 1 data, respectively - public (byte[], byte[]) LoadShader(Stream tocFileStream, Stream dataFileStream, int index) + /// Guest code and constant buffer 1 data + public GuestCodeAndCbData LoadShader(Stream tocFileStream, Stream dataFileStream, int index) { if (_cache == null || index >= _cache.Length) { @@ -226,7 +226,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache _cache[index] = (guestCode, cb1Data); } - return (guestCode, cb1Data); + return new GuestCodeAndCbData(guestCode, cb1Data); } /// diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs index 5d99957f0..799838ec1 100644 --- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs @@ -1,5 +1,6 @@ using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; using System; using System.IO; using System.Numerics; @@ -19,7 +20,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private const uint TexdMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'D' << 24); private const ushort FileFormatVersionMajor = 1; - private const ushort FileFormatVersionMinor = 1; + private const ushort FileFormatVersionMinor = 2; private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; private const uint CodeGenVersion = 1; @@ -77,9 +78,14 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache public ulong Offset; /// - /// Size. + /// Size of uncompressed data. /// - public uint Size; + public uint UncompressedSize; + + /// + /// Size of compressed data. + /// + public uint CompressedSize; } /// @@ -196,6 +202,14 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private static string GetHostFileName(GpuContext context) { string apiName = context.Capabilities.Api.ToString().ToLowerInvariant(); + + // We are just storing SPIR-V directly on Vulkan, so the code won't change per vendor. + // We can just have a single file for all vendors. + if (context.Capabilities.Api == TargetApi.Vulkan) + { + return apiName; + } + string vendorName = RemoveInvalidCharacters(context.Capabilities.VendorName.ToLowerInvariant()); return $"{apiName}_{vendorName}"; } @@ -324,7 +338,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache stagesBitMask = 1; } - CachedShaderStage[] shaders = new CachedShaderStage[isCompute ? 1 : Constants.ShaderStages + 1]; + GuestCodeAndCbData?[] guestShaders = new GuestCodeAndCbData?[isCompute ? 1 : Constants.ShaderStages + 1]; DataEntryPerStage stageEntry = new DataEntryPerStage(); @@ -334,15 +348,11 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache dataReader.Read(ref stageEntry); - ShaderProgramInfo info = stageIndex != 0 || isCompute ? ReadShaderProgramInfo(ref dataReader) : null; - - (byte[] guestCode, byte[] cb1Data) = _guestStorage.LoadShader( + guestShaders[stageIndex] = _guestStorage.LoadShader( guestTocFileStream, guestDataFileStream, stageEntry.GuestCodeIndex); - shaders[stageIndex] = new CachedShaderStage(info, guestCode, cb1Data); - stagesBitMask &= ~(1u << stageIndex); } @@ -351,17 +361,38 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache if (loadHostCache) { - byte[] hostCode = ReadHostCode(context, ref hostTocFileStream, ref hostDataFileStream, programIndex); + (byte[] hostCode, CachedShaderStage[] shaders) = ReadHostCode( + context, + ref hostTocFileStream, + ref hostDataFileStream, + guestShaders, + programIndex); if (hostCode != null) { bool hasFragmentShader = shaders.Length > 5 && shaders[5] != null; int fragmentOutputMap = hasFragmentShader ? shaders[5].Info.FragmentOutputMap : -1; - IProgram hostProgram = context.Renderer.LoadProgramBinary(hostCode, hasFragmentShader, new ShaderInfo(fragmentOutputMap)); + + ShaderInfo shaderInfo = specState.PipelineState.HasValue + ? new ShaderInfo(fragmentOutputMap, specState.PipelineState.Value) + : new ShaderInfo(fragmentOutputMap); + + IProgram hostProgram; + + if (context.Capabilities.Api == TargetApi.Vulkan) + { + ShaderSource[] shaderSources = ShaderBinarySerializer.Unpack(shaders, hostCode, isCompute); + + hostProgram = context.Renderer.CreateProgram(shaderSources, shaderInfo); + } + else + { + hostProgram = context.Renderer.LoadProgramBinary(hostCode, hasFragmentShader, shaderInfo); + } CachedShaderProgram program = new CachedShaderProgram(hostProgram, specState, shaders); - loader.QueueHostProgram(program, hostProgram, programIndex, isCompute); + loader.QueueHostProgram(program, hostCode, programIndex, isCompute); } else { @@ -371,7 +402,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache if (!loadHostCache) { - loader.QueueGuestProgram(shaders, specState, programIndex, isCompute); + loader.QueueGuestProgram(guestShaders, specState, programIndex, isCompute); } loader.CheckCompilation(); @@ -393,9 +424,15 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache /// GPU context /// Host TOC file stream, intialized if needed /// Host data file stream, initialized if needed + /// Guest shader code for each active stage /// Index of the program on the cache /// Host binary code, or null if not found - private byte[] ReadHostCode(GpuContext context, ref Stream tocFileStream, ref Stream dataFileStream, int programIndex) + private (byte[], CachedShaderStage[]) ReadHostCode( + GpuContext context, + ref Stream tocFileStream, + ref Stream dataFileStream, + GuestCodeAndCbData?[] guestShaders, + int programIndex) { if (tocFileStream == null && dataFileStream == null) { @@ -404,7 +441,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache if (!File.Exists(tocFilePath) || !File.Exists(dataFilePath)) { - return null; + return (null, null); } tocFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: false); @@ -414,7 +451,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache int offset = Unsafe.SizeOf() + programIndex * Unsafe.SizeOf(); if (offset + Unsafe.SizeOf() > tocFileStream.Length) { - return null; + return (null, null); } if ((ulong)offset >= (ulong)dataFileStream.Length) @@ -436,11 +473,33 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache dataFileStream.Seek((long)offsetAndSize.Offset, SeekOrigin.Begin); - byte[] hostCode = new byte[offsetAndSize.Size]; + byte[] hostCode = new byte[offsetAndSize.UncompressedSize]; BinarySerializer.ReadCompressed(dataFileStream, hostCode); - return hostCode; + CachedShaderStage[] shaders = new CachedShaderStage[guestShaders.Length]; + BinarySerializer dataReader = new BinarySerializer(dataFileStream); + + dataFileStream.Seek((long)(offsetAndSize.Offset + offsetAndSize.CompressedSize), SeekOrigin.Begin); + + dataReader.BeginCompression(); + + for (int index = 0; index < guestShaders.Length; index++) + { + if (!guestShaders[index].HasValue) + { + continue; + } + + GuestCodeAndCbData guestShader = guestShaders[index].Value; + ShaderProgramInfo info = index != 0 || guestShaders.Length == 1 ? ReadShaderProgramInfo(ref dataReader) : null; + + shaders[index] = new CachedShaderStage(info, guestShader.Code, guestShader.Cb1Data); + } + + dataReader.EndCompression(); + + return (hostCode, shaders); } /// @@ -519,8 +578,6 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache stageEntry.GuestCodeIndex = _guestStorage.AddShader(shader.Code, shader.Cb1Data); dataWriter.Write(ref stageEntry); - - WriteShaderProgramInfo(ref dataWriter, shader.Info); } program.SpecializationState.Write(ref dataWriter); @@ -537,7 +594,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache return; } - WriteHostCode(context, hostCode, -1, streams); + WriteHostCode(context, hostCode, program.Shaders, streams); } /// @@ -574,29 +631,14 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache dataFileStream.SetLength(0); } - /// - /// Adds a host binary shader to the host cache. - /// - /// - /// This only modifies the host cache. The shader must already exist in the other caches. - /// This method should only be used for rebuilding the host cache after a clear. - /// - /// GPU context - /// Host binary code - /// Index of the program in the cache - public void AddHostShader(GpuContext context, ReadOnlySpan hostCode, int programIndex) - { - WriteHostCode(context, hostCode, programIndex); - } - /// /// Writes the host binary code on the host cache. /// /// GPU context /// Host binary code - /// Index of the program in the cache + /// Shader stages to be added to the host cache /// Output streams to use - private void WriteHostCode(GpuContext context, ReadOnlySpan hostCode, int programIndex, DiskCacheOutputStreams streams = null) + private void WriteHostCode(GpuContext context, ReadOnlySpan hostCode, CachedShaderStage[] shaders, DiskCacheOutputStreams streams = null) { var tocFileStream = streams != null ? streams.HostTocFileStream : DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: true); var dataFileStream = streams != null ? streams.HostDataFileStream : DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: true); @@ -607,26 +649,36 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache CreateToc(tocFileStream, ref header, TochMagic, 0); } - if (programIndex == -1) - { - tocFileStream.Seek(0, SeekOrigin.End); - } - else - { - tocFileStream.Seek(Unsafe.SizeOf() + (programIndex * Unsafe.SizeOf()), SeekOrigin.Begin); - } - + tocFileStream.Seek(0, SeekOrigin.End); dataFileStream.Seek(0, SeekOrigin.End); BinarySerializer tocWriter = new BinarySerializer(tocFileStream); + BinarySerializer dataWriter = new BinarySerializer(dataFileStream); OffsetAndSize offsetAndSize = new OffsetAndSize(); offsetAndSize.Offset = (ulong)dataFileStream.Position; - offsetAndSize.Size = (uint)hostCode.Length; - tocWriter.Write(ref offsetAndSize); + offsetAndSize.UncompressedSize = (uint)hostCode.Length; + + long dataStartPosition = dataFileStream.Position; BinarySerializer.WriteCompressed(dataFileStream, hostCode, DiskCacheCommon.GetCompressionAlgorithm()); + offsetAndSize.CompressedSize = (uint)(dataFileStream.Position - dataStartPosition); + + tocWriter.Write(ref offsetAndSize); + + dataWriter.BeginCompression(DiskCacheCommon.GetCompressionAlgorithm()); + + for (int index = 0; index < shaders.Length; index++) + { + if (shaders[index] != null) + { + WriteShaderProgramInfo(ref dataWriter, shaders[index].Info); + } + } + + dataWriter.EndCompression(); + if (streams == null) { tocFileStream.Dispose(); diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/GuestCodeAndCbData.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/GuestCodeAndCbData.cs new file mode 100644 index 000000000..b1ac819e6 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/GuestCodeAndCbData.cs @@ -0,0 +1,31 @@ +using Ryujinx.Graphics.Shader; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// Guest shader code and constant buffer data accessed by the shader. + /// + struct GuestCodeAndCbData + { + /// + /// Maxwell binary shader code. + /// + public byte[] Code { get; } + + /// + /// Constant buffer 1 data accessed by the shader. + /// + public byte[] Cb1Data { get; } + + /// + /// Creates a new instance of the guest shader code and constant buffer data. + /// + /// Maxwell binary shader code + /// Constant buffer 1 data accessed by the shader + public GuestCodeAndCbData(byte[] code, byte[] cb1Data) + { + Code = code; + Cb1Data = cb1Data; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs index 803b06766..825119688 100644 --- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs @@ -45,9 +45,9 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache public readonly CachedShaderProgram CachedProgram; /// - /// Host program. + /// Optional binary code. If not null, it is used instead of the backend host binary. /// - public readonly IProgram HostProgram; + public readonly byte[] BinaryCode; /// /// Program index. @@ -68,19 +68,18 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache /// Creates a new program validation entry. /// /// Cached shader program - /// Host program /// Program index /// Indicates if the program is a compute shader /// Indicates if the program is a host binary shader public ProgramEntry( CachedShaderProgram cachedProgram, - IProgram hostProgram, + byte[] binaryCode, int programIndex, bool isCompute, bool isBinary) { CachedProgram = cachedProgram; - HostProgram = hostProgram; + BinaryCode = binaryCode; ProgramIndex = programIndex; IsCompute = isCompute; IsBinary = isBinary; @@ -146,9 +145,9 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private struct AsyncProgramTranslation { /// - /// Cached shader stages. + /// Guest code for each active stage. /// - public readonly CachedShaderStage[] Shaders; + public readonly GuestCodeAndCbData?[] GuestShaders; /// /// Specialization state. @@ -168,17 +167,17 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache /// /// Creates a new program translation entry. /// - /// Cached shader stages + /// Guest code for each active stage /// Specialization state /// Program index /// Indicates if the program is a compute shader public AsyncProgramTranslation( - CachedShaderStage[] shaders, + GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex, bool isCompute) { - Shaders = shaders; + GuestShaders = guestShaders; SpecializationState = specState; ProgramIndex = programIndex; IsCompute = isCompute; @@ -188,7 +187,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private readonly Queue _validationQueue; private readonly ConcurrentQueue _compilationQueue; private readonly BlockingCollection _asyncTranslationQueue; - private readonly SortedList _programList; + private readonly SortedList _programList; private int _backendParallelCompileThreads; private int _compiledCount; @@ -220,7 +219,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache _validationQueue = new Queue(); _compilationQueue = new ConcurrentQueue(); _asyncTranslationQueue = new BlockingCollection(ThreadCount); - _programList = new SortedList(); + _programList = new SortedList(); _backendParallelCompileThreads = Math.Min(Environment.ProcessorCount, 8); // Must be kept in sync with the backend code. } @@ -235,7 +234,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache { workThreads[index] = new Thread(ProcessAsyncQueue) { - Name = $"Gpu.AsyncTranslationThread.{index}" + Name = $"GPU.AsyncTranslationThread.{index}" }; } @@ -287,7 +286,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache CheckCompilationBlocking(); - if (_needsHostRegen) + if (_needsHostRegen && Active) { // Rebuild both shared and host cache files. // Rebuilding shared is required because the shader information returned by the translator @@ -310,8 +309,8 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache break; } - CachedShaderProgram program = kv.Value; - _hostStorage.AddShader(_context, program, program.HostProgram.GetBinary(), streams); + (CachedShaderProgram program, byte[] binaryCode) = kv.Value; + _hostStorage.AddShader(_context, program, binaryCode, streams); } Logger.Info?.Print(LogClass.Gpu, $"Rebuilt {_programList.Count} shaders successfully."); @@ -342,24 +341,31 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache /// Enqueues a host program for compilation. /// /// Cached program - /// Host program to be compiled + /// Host binary code /// Program index /// Indicates if the program is a compute shader - public void QueueHostProgram(CachedShaderProgram cachedProgram, IProgram hostProgram, int programIndex, bool isCompute) + public void QueueHostProgram(CachedShaderProgram cachedProgram, byte[] binaryCode, int programIndex, bool isCompute) { - EnqueueForValidation(new ProgramEntry(cachedProgram, hostProgram, programIndex, isCompute, isBinary: true)); + EnqueueForValidation(new ProgramEntry(cachedProgram, binaryCode, programIndex, isCompute, isBinary: true)); } /// /// Enqueues a guest program for compilation. /// - /// Cached shader stages + /// Guest code for each active stage /// Specialization state /// Program index /// Indicates if the program is a compute shader - public void QueueGuestProgram(CachedShaderStage[] shaders, ShaderSpecializationState specState, int programIndex, bool isCompute) + public void QueueGuestProgram(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex, bool isCompute) { - _asyncTranslationQueue.Add(new AsyncProgramTranslation(shaders, specState, programIndex, isCompute)); + try + { + AsyncProgramTranslation asyncTranslation = new AsyncProgramTranslation(guestShaders, specState, programIndex, isCompute); + _asyncTranslationQueue.Add(asyncTranslation, _cancellationToken); + } + catch (OperationCanceledException) + { + } } /// @@ -374,7 +380,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache // If not yet compiled, do nothing. This avoids blocking to wait for shader compilation. while (_validationQueue.TryPeek(out ProgramEntry entry)) { - ProgramLinkStatus result = entry.HostProgram.CheckProgramLink(false); + ProgramLinkStatus result = entry.CachedProgram.HostProgram.CheckProgramLink(false); if (result != ProgramLinkStatus.Incomplete) { @@ -398,7 +404,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache while (_validationQueue.TryDequeue(out ProgramEntry entry) && Active) { - ProcessCompiledProgram(ref entry, entry.HostProgram.CheckProgramLink(true), asyncCompile: false); + ProcessCompiledProgram(ref entry, entry.CachedProgram.HostProgram.CheckProgramLink(true), asyncCompile: false); } } @@ -427,7 +433,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache _needsHostRegen = true; } - _programList.Add(entry.ProgramIndex, entry.CachedProgram); + _programList.Add(entry.ProgramIndex, (entry.CachedProgram, entry.BinaryCode)); SignalCompiled(); } else if (entry.IsBinary) @@ -436,13 +442,25 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache // we still have a chance to recompile from the guest binary. CachedShaderProgram program = entry.CachedProgram; + GuestCodeAndCbData?[] guestShaders = new GuestCodeAndCbData?[program.Shaders.Length]; + + for (int index = 0; index < program.Shaders.Length; index++) + { + CachedShaderStage shader = program.Shaders[index]; + + if (shader != null) + { + guestShaders[index] = new GuestCodeAndCbData(shader.Code, shader.Cb1Data); + } + } + if (asyncCompile) { - QueueGuestProgram(program.Shaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute); + QueueGuestProgram(guestShaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute); } else { - RecompileFromGuestCode(program.Shaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute); + RecompileFromGuestCode(guestShaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute); ProcessCompilationQueue(); } } @@ -476,10 +494,16 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache } } - IProgram hostProgram = _context.Renderer.CreateProgram(shaderSources, new ShaderInfo(fragmentOutputMap)); + ShaderInfo shaderInfo = compilation.SpecializationState.PipelineState.HasValue + ? new ShaderInfo(fragmentOutputMap, compilation.SpecializationState.PipelineState.Value) + : new ShaderInfo(fragmentOutputMap); + + IProgram hostProgram = _context.Renderer.CreateProgram(shaderSources, shaderInfo); CachedShaderProgram program = new CachedShaderProgram(hostProgram, compilation.SpecializationState, compilation.Shaders); - EnqueueForValidation(new ProgramEntry(program, hostProgram, compilation.ProgramIndex, compilation.IsCompute, isBinary: false)); + byte[] binaryCode = _context.Capabilities.Api == TargetApi.Vulkan ? ShaderBinarySerializer.Pack(shaderSources) : hostProgram.GetBinary(); + + EnqueueForValidation(new ProgramEntry(program, binaryCode, compilation.ProgramIndex, compilation.IsCompute, isBinary: false)); } } @@ -496,7 +520,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache // Submitting more seems to cause NVIDIA OpenGL driver to crash. if (_validationQueue.Count >= _backendParallelCompileThreads && _validationQueue.TryDequeue(out ProgramEntry entry)) { - ProcessCompiledProgram(ref entry, entry.HostProgram.CheckProgramLink(true), asyncCompile: false); + ProcessCompiledProgram(ref entry, entry.CachedProgram.HostProgram.CheckProgramLink(true), asyncCompile: false); } } @@ -513,7 +537,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache foreach (AsyncProgramTranslation asyncCompilation in _asyncTranslationQueue.GetConsumingEnumerable(ct)) { RecompileFromGuestCode( - asyncCompilation.Shaders, + asyncCompilation.GuestShaders, asyncCompilation.SpecializationState, asyncCompilation.ProgramIndex, asyncCompilation.IsCompute); @@ -527,21 +551,21 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache /// /// Recompiles a program from guest code. /// - /// Shader stages + /// Guest code for each active stage /// Specialization state /// Program index /// Indicates if the program is a compute shader - private void RecompileFromGuestCode(CachedShaderStage[] shaders, ShaderSpecializationState specState, int programIndex, bool isCompute) + private void RecompileFromGuestCode(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex, bool isCompute) { try { if (isCompute) { - RecompileComputeFromGuestCode(shaders, specState, programIndex); + RecompileComputeFromGuestCode(guestShaders, specState, programIndex); } else { - RecompileGraphicsFromGuestCode(shaders, specState, programIndex); + RecompileGraphicsFromGuestCode(guestShaders, specState, programIndex); } } catch (DiskCacheLoadException diskCacheLoadException) @@ -556,12 +580,16 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache /// /// Recompiles a graphics program from guest code. /// - /// Shader stages + /// Guest code for each active stage /// Specialization state /// Program index - private void RecompileGraphicsFromGuestCode(CachedShaderStage[] shaders, ShaderSpecializationState specState, int programIndex) + private void RecompileGraphicsFromGuestCode(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex) { - ShaderSpecializationState newSpecState = new ShaderSpecializationState(specState.GraphicsState, specState.TransformFeedbackDescriptors); + ShaderSpecializationState newSpecState = new ShaderSpecializationState( + ref specState.GraphicsState, + specState.PipelineState, + specState.TransformFeedbackDescriptors); + ResourceCounts counts = new ResourceCounts(); TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1]; @@ -571,10 +599,10 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache for (int stageIndex = Constants.ShaderStages - 1; stageIndex >= 0; stageIndex--) { - CachedShaderStage shader = shaders[stageIndex + 1]; - - if (shader != null) + if (guestShaders[stageIndex + 1].HasValue) { + GuestCodeAndCbData shader = guestShaders[stageIndex + 1].Value; + byte[] guestCode = shader.Code; byte[] cb1Data = shader.Cb1Data; @@ -586,10 +614,10 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache currentStage.SetNextStage(nextStage); } - if (stageIndex == 0 && shaders[0] != null) + if (stageIndex == 0 && guestShaders[0].HasValue) { - byte[] guestCodeA = shaders[0].Code; - byte[] cb1DataA = shaders[0].Cb1Data; + byte[] guestCodeA = guestShaders[0].Value.Code; + byte[] cb1DataA = guestShaders[0].Value.Cb1Data; DiskCacheGpuAccessor gpuAccessorA = new DiskCacheGpuAccessor(_context, guestCodeA, cb1DataA, specState, newSpecState, counts, 0); translatorContexts[0] = DecodeGraphicsShader(gpuAccessorA, api, DefaultFlags | TranslationFlags.VertexA, 0); @@ -600,6 +628,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache } } + CachedShaderStage[] shaders = new CachedShaderStage[guestShaders.Length]; List translatedStages = new List(); for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++) @@ -610,15 +639,15 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache { ShaderProgram program; - byte[] guestCode = shaders[stageIndex + 1].Code; - byte[] cb1Data = shaders[stageIndex + 1].Cb1Data; + byte[] guestCode = guestShaders[stageIndex + 1].Value.Code; + byte[] cb1Data = guestShaders[stageIndex + 1].Value.Cb1Data; - if (stageIndex == 0 && shaders[0] != null) + if (stageIndex == 0 && guestShaders[0].HasValue) { program = currentStage.Translate(translatorContexts[0]); - byte[] guestCodeA = shaders[0].Code; - byte[] cb1DataA = shaders[0].Cb1Data; + byte[] guestCodeA = guestShaders[0].Value.Code; + byte[] cb1DataA = guestShaders[0].Value.Cb1Data; shaders[0] = new CachedShaderStage(null, guestCodeA, cb1DataA); shaders[1] = new CachedShaderStage(program.Info, guestCode, cb1Data); @@ -643,21 +672,21 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache /// /// Recompiles a compute program from guest code. /// - /// Shader stages + /// Guest code for each active stage /// Specialization state /// Program index - private void RecompileComputeFromGuestCode(CachedShaderStage[] shaders, ShaderSpecializationState specState, int programIndex) + private void RecompileComputeFromGuestCode(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex) { - CachedShaderStage shader = shaders[0]; + GuestCodeAndCbData shader = guestShaders[0].Value; ResourceCounts counts = new ResourceCounts(); - ShaderSpecializationState newSpecState = new ShaderSpecializationState(specState.ComputeState); + ShaderSpecializationState newSpecState = new ShaderSpecializationState(ref specState.ComputeState); DiskCacheGpuAccessor gpuAccessor = new DiskCacheGpuAccessor(_context, shader.Code, shader.Cb1Data, specState, newSpecState, counts, 0); TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, _context.Capabilities.Api, 0); ShaderProgram program = translatorContext.Translate(); - shaders[0] = new CachedShaderStage(program.Info, shader.Code, shader.Cb1Data); + CachedShaderStage[] shaders = new[] { new CachedShaderStage(program.Info, shader.Code, shader.Cb1Data) }; _compilationQueue.Enqueue(new ProgramCompilation(new[] { program }, shaders, newSpecState, programIndex, isCompute: true)); } diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/ShaderBinarySerializer.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/ShaderBinarySerializer.cs new file mode 100644 index 000000000..11e54220f --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/ShaderBinarySerializer.cs @@ -0,0 +1,49 @@ +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader.Translation; +using System.Collections.Generic; +using System.IO; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + static class ShaderBinarySerializer + { + public static byte[] Pack(ShaderSource[] sources) + { + using MemoryStream output = new MemoryStream(); + using BinaryWriter writer = new BinaryWriter(output); + + for (int i = 0; i < sources.Length; i++) + { + writer.Write(sources[i].BinaryCode.Length); + writer.Write(sources[i].BinaryCode); + } + + return output.ToArray(); + } + + public static ShaderSource[] Unpack(CachedShaderStage[] stages, byte[] code, bool compute) + { + using MemoryStream input = new MemoryStream(code); + using BinaryReader reader = new BinaryReader(input); + + List output = new List(); + + for (int i = compute ? 0 : 1; i < stages.Length; i++) + { + CachedShaderStage stage = stages[i]; + + if (stage == null) + { + continue; + } + + int binaryCodeLength = reader.ReadInt32(); + byte[] binaryCode = reader.ReadBytes(binaryCodeLength); + + output.Add(new ShaderSource(binaryCode, ShaderCache.GetBindings(stage.Info), stage.Info.Stage, TargetLanguage.Spirv)); + } + + return output.ToArray(); + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index 33d3c48f9..cba7edfe0 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -62,11 +62,13 @@ namespace Ryujinx.Graphics.Gpu.Shader { public readonly CachedShaderProgram CachedProgram; public readonly IProgram HostProgram; + public readonly byte[] BinaryCode; - public ProgramToSave(CachedShaderProgram cachedProgram, IProgram hostProgram) + public ProgramToSave(CachedShaderProgram cachedProgram, IProgram hostProgram, byte[] binaryCode) { CachedProgram = cachedProgram; HostProgram = hostProgram; + BinaryCode = binaryCode; } } @@ -126,7 +128,7 @@ namespace Ryujinx.Graphics.Gpu.Shader { if (result == ProgramLinkStatus.Success) { - _cacheWriter.AddShader(programToSave.CachedProgram, programToSave.HostProgram.GetBinary()); + _cacheWriter.AddShader(programToSave.CachedProgram, programToSave.BinaryCode ?? programToSave.HostProgram.GetBinary()); } _programsToSaveQueue.Dequeue(); @@ -146,7 +148,9 @@ namespace Ryujinx.Graphics.Gpu.Shader { if (_diskCacheHostStorage.CacheEnabled) { - if (!_diskCacheHostStorage.CacheExists()) + // Migration disabled as Vulkan added a lot of new state, + // most migrated shaders would be unused due to the state not matching. + /* if (!_diskCacheHostStorage.CacheExists()) { // If we don't have a shader cache on the new format, try to perform migration from the old shader cache. Logger.Info?.Print(LogClass.Gpu, "No shader cache found, trying to migrate from legacy shader cache..."); @@ -154,7 +158,7 @@ namespace Ryujinx.Graphics.Gpu.Shader int migrationCount = Migration.MigrateFromLegacyCache(_context, _diskCacheHostStorage); Logger.Info?.Print(LogClass.Gpu, $"Migrated {migrationCount} shaders."); - } + } */ ParallelDiskCacheLoader loader = new ParallelDiskCacheLoader( _context, @@ -213,7 +217,7 @@ namespace Ryujinx.Graphics.Gpu.Shader return cpShader; } - ShaderSpecializationState specState = new ShaderSpecializationState(computeState); + ShaderSpecializationState specState = new ShaderSpecializationState(ref computeState); GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, computeState, default, specState); GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState); @@ -221,12 +225,14 @@ namespace Ryujinx.Graphics.Gpu.Shader TranslatedShader translatedShader = TranslateShader(_dumper, channel, translatorContext, cachedGuestCode); - IProgram hostProgram = _context.Renderer.CreateProgram(new ShaderSource[] { CreateShaderSource(translatedShader.Program) }, new ShaderInfo(-1)); + ShaderSource[] shaderSourcesArray = new ShaderSource[] { CreateShaderSource(translatedShader.Program) }; + + IProgram hostProgram = _context.Renderer.CreateProgram(shaderSourcesArray, new ShaderInfo(-1)); cpShader = new CachedShaderProgram(hostProgram, specState, translatedShader.Shader); _computeShaderCache.Add(cpShader); - EnqueueProgramToSave(new ProgramToSave(cpShader, hostProgram)); + EnqueueProgramToSave(cpShader, hostProgram, shaderSourcesArray); _cpPrograms[gpuVa] = cpShader; return cpShader; @@ -307,7 +313,7 @@ namespace Ryujinx.Graphics.Gpu.Shader TransformFeedbackDescriptor[] transformFeedbackDescriptors = GetTransformFeedbackDescriptors(ref state); - ShaderSpecializationState specState = new ShaderSpecializationState(graphicsState, transformFeedbackDescriptors); + ShaderSpecializationState specState = new ShaderSpecializationState(ref graphicsState, ref pipeline, transformFeedbackDescriptors); GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, default, graphicsState, specState, transformFeedbackDescriptors); ReadOnlySpan addressesSpan = addresses.AsSpan(); @@ -385,13 +391,15 @@ namespace Ryujinx.Graphics.Gpu.Shader UpdatePipelineInfo(ref state, ref pipeline, graphicsState, channel); + ShaderSource[] shaderSourcesArray = shaderSources.ToArray(); + int fragmentOutputMap = shaders[5]?.Info.FragmentOutputMap ?? -1; - IProgram hostProgram = _context.Renderer.CreateProgram(shaderSources.ToArray(), new ShaderInfo(fragmentOutputMap, pipeline)); + IProgram hostProgram = _context.Renderer.CreateProgram(shaderSourcesArray, new ShaderInfo(fragmentOutputMap, pipeline)); gpShaders = new CachedShaderProgram(hostProgram, specState, shaders); _graphicsShaderCache.Add(gpShaders); - EnqueueProgramToSave(new ProgramToSave(gpShaders, hostProgram)); + EnqueueProgramToSave(gpShaders, hostProgram, shaderSourcesArray); _gpPrograms[addresses] = gpShaders; return gpShaders; @@ -413,9 +421,15 @@ namespace Ryujinx.Graphics.Gpu.Shader /// /// This will not do anything if disk shader cache is disabled. /// - /// Program to be saved on disk - private void EnqueueProgramToSave(ProgramToSave programToSave) + /// Cached shader program + /// Host program + /// Source for each shader stage + private void EnqueueProgramToSave(CachedShaderProgram program, IProgram hostProgram, ShaderSource[] sources) { + byte[] binaryCode = _context.Capabilities.Api == TargetApi.Vulkan ? ShaderBinarySerializer.Pack(sources) : null; + + ProgramToSave programToSave = new ProgramToSave(program, hostProgram, binaryCode); + if (_diskCacheHostStorage.CacheEnabled) { _programsToSaveQueue.Enqueue(programToSave); @@ -646,7 +660,7 @@ namespace Ryujinx.Graphics.Gpu.Shader }; } - private static ShaderBindings GetBindings(ShaderProgramInfo info) + public static ShaderBindings GetBindings(ShaderProgramInfo info) { static bool IsBuffer(Graphics.Shader.TextureDescriptor descriptor) { diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs index 172ce14ce..3df9d1199 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs @@ -1,6 +1,7 @@ using Ryujinx.Common.Memory; using Ryujinx.Graphics.Gpu.Image; using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Shader.DiskCache; using Ryujinx.Graphics.Shader; using System; @@ -19,6 +20,7 @@ namespace Ryujinx.Graphics.Gpu.Shader private const uint TfbdMagic = (byte)'T' | ((byte)'F' << 8) | ((byte)'B' << 16) | ((byte)'D' << 24); private const uint TexkMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'K' << 24); private const uint TexsMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'S' << 24); + private const uint PgpsMagic = (byte)'P' | ((byte)'G' << 8) | ((byte)'P' << 16) | ((byte)'S' << 24); /// /// Flags indicating GPU state that is used by the shader. @@ -51,6 +53,11 @@ namespace Ryujinx.Graphics.Gpu.Shader /// public Array5 ConstantBufferUse; + /// + /// Optional pipeline state captured at the time of the shader use. + /// + public ProgramPipelineState? PipelineState; + /// /// Transform feedback buffers active at the time the shader was compiled. /// @@ -179,7 +186,7 @@ namespace Ryujinx.Graphics.Gpu.Shader /// Creates a new instance of the shader specialization state. /// /// Current compute engine state - public ShaderSpecializationState(GpuChannelComputeState state) : this() + public ShaderSpecializationState(ref GpuChannelComputeState state) : this() { ComputeState = state; _compute = true; @@ -190,7 +197,7 @@ namespace Ryujinx.Graphics.Gpu.Shader /// /// Current 3D engine state /// Optional transform feedback buffers in use, if any - public ShaderSpecializationState(GpuChannelGraphicsState state, TransformFeedbackDescriptor[] descriptors) : this() + private ShaderSpecializationState(ref GpuChannelGraphicsState state, TransformFeedbackDescriptor[] descriptors) : this() { GraphicsState = state; _compute = false; @@ -244,6 +251,34 @@ namespace Ryujinx.Graphics.Gpu.Shader } } + /// + /// Creates a new instance of the shader specialization state. + /// + /// Current 3D engine state + /// Current program pipeline state + /// Optional transform feedback buffers in use, if any + public ShaderSpecializationState( + ref GpuChannelGraphicsState state, + ref ProgramPipelineState pipelineState, + TransformFeedbackDescriptor[] descriptors) : this(ref state, descriptors) + { + PipelineState = pipelineState; + } + + /// + /// Creates a new instance of the shader specialization state. + /// + /// Current 3D engine state + /// Current program pipeline state + /// Optional transform feedback buffers in use, if any + public ShaderSpecializationState( + ref GpuChannelGraphicsState state, + ProgramPipelineState? pipelineState, + TransformFeedbackDescriptor[] descriptors) : this(ref state, descriptors) + { + PipelineState = pipelineState; + } + /// /// Indicates that the shader accesses the early Z force state. /// @@ -697,6 +732,17 @@ namespace Ryujinx.Graphics.Gpu.Shader constantBufferUsePerStageMask &= ~(1 << index); } + bool hasPipelineState = false; + + dataReader.Read(ref hasPipelineState); + + if (hasPipelineState) + { + ProgramPipelineState pipelineState = default; + dataReader.ReadWithMagicAndSize(ref pipelineState, PgpsMagic); + specState.PipelineState = pipelineState; + } + if (specState._queriedState.HasFlag(QueriedStateFlags.TransformFeedback)) { ushort tfCount = 0; @@ -755,6 +801,16 @@ namespace Ryujinx.Graphics.Gpu.Shader constantBufferUsePerStageMask &= ~(1 << index); } + bool hasPipelineState = PipelineState.HasValue; + + dataWriter.Write(ref hasPipelineState); + + if (hasPipelineState) + { + ProgramPipelineState pipelineState = PipelineState.Value; + dataWriter.WriteWithMagicAndSize(ref pipelineState, PgpsMagic); + } + if (_queriedState.HasFlag(QueriedStateFlags.TransformFeedback)) { ushort tfCount = (ushort)TransformFeedbackDescriptors.Length; diff --git a/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs b/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs index e219c3ed1..d160671ca 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs @@ -107,6 +107,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv Add(Instruction.Minimum, GenerateMinimum); Add(Instruction.MinimumU32, GenerateMinimumU32); Add(Instruction.Multiply, GenerateMultiply); + Add(Instruction.MultiplyHighS32, GenerateMultiplyHighS32); + Add(Instruction.MultiplyHighU32, GenerateMultiplyHighU32); Add(Instruction.Negate, GenerateNegate); Add(Instruction.PackDouble2x32, GeneratePackDouble2x32); Add(Instruction.PackHalf2x16, GeneratePackHalf2x16); @@ -1090,6 +1092,30 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv return GenerateBinary(context, operation, context.Delegates.FMul, context.Delegates.IMul); } + private static OperationResult GenerateMultiplyHighS32(CodeGenContext context, AstOperation operation) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + var resultType = context.TypeStruct(false, context.TypeS32(), context.TypeS32()); + var result = context.SMulExtended(resultType, context.GetS32(src1), context.GetS32(src2)); + result = context.CompositeExtract(context.TypeS32(), result, 1); + + return new OperationResult(AggregateType.S32, result); + } + + private static OperationResult GenerateMultiplyHighU32(CodeGenContext context, AstOperation operation) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + var resultType = context.TypeStruct(false, context.TypeU32(), context.TypeU32()); + var result = context.UMulExtended(resultType, context.GetU32(src1), context.GetU32(src2)); + result = context.CompositeExtract(context.TypeU32(), result, 1); + + return new OperationResult(AggregateType.U32, result); + } + private static OperationResult GenerateNegate(CodeGenContext context, AstOperation operation) { return GenerateUnary(context, operation, context.Delegates.FNegate, context.Delegates.SNegate); diff --git a/Ryujinx.Graphics.Vulkan/ShaderCollection.cs b/Ryujinx.Graphics.Vulkan/ShaderCollection.cs index 5fcb91ea2..2373d31f8 100644 --- a/Ryujinx.Graphics.Vulkan/ShaderCollection.cs +++ b/Ryujinx.Graphics.Vulkan/ShaderCollection.cs @@ -300,7 +300,7 @@ namespace Ryujinx.Graphics.Vulkan public byte[] GetBinary() { - throw new System.NotImplementedException(); + return null; } public void AddComputePipeline(Auto pipeline) diff --git a/Ryujinx.Graphics.Vulkan/VulkanGraphicsDevice.cs b/Ryujinx.Graphics.Vulkan/VulkanGraphicsDevice.cs index 8c0834006..66b6fa462 100644 --- a/Ryujinx.Graphics.Vulkan/VulkanGraphicsDevice.cs +++ b/Ryujinx.Graphics.Vulkan/VulkanGraphicsDevice.cs @@ -226,7 +226,9 @@ namespace Ryujinx.Graphics.Vulkan public IProgram CreateProgram(IShader[] shaders, ShaderInfo info) { - if (info.BackgroundCompile && info.State.HasValue && VulkanConfiguration.UseDynamicState) + bool isCompute = shaders.Length == 1 && ((Shader)shaders[0]).StageFlags == ShaderStageFlags.ShaderStageComputeBit; + + if (info.BackgroundCompile && (info.State.HasValue || isCompute) && VulkanConfiguration.UseDynamicState) { return new ShaderCollection(this, _device, shaders, info.State.Value); } diff --git a/Ryujinx/Ui/MainWindow.cs b/Ryujinx/Ui/MainWindow.cs index 51297bb77..be7956437 100644 --- a/Ryujinx/Ui/MainWindow.cs +++ b/Ryujinx/Ui/MainWindow.cs @@ -1037,7 +1037,7 @@ namespace Ryujinx.Ui Graphics.Gpu.GraphicsConfig.ResScale = (resScale == -1) ? resScaleCustom : resScale; Graphics.Gpu.GraphicsConfig.MaxAnisotropy = ConfigurationState.Instance.Graphics.MaxAnisotropy; Graphics.Gpu.GraphicsConfig.ShadersDumpPath = ConfigurationState.Instance.Graphics.ShadersDumpPath; - Graphics.Gpu.GraphicsConfig.EnableShaderCache = ConfigurationState.Instance.Graphics.EnableShaderCache && ConfigurationState.Instance.Graphics.GraphicsBackend != GraphicsBackend.Vulkan; + Graphics.Gpu.GraphicsConfig.EnableShaderCache = ConfigurationState.Instance.Graphics.EnableShaderCache; } public void SaveConfig()