From c95cdc853eccc92d3ee40b46b63426a8de866341 Mon Sep 17 00:00:00 2001
From: gdk <gab.dark.100@gmail.com>
Date: Mon, 11 Apr 2022 12:17:33 -0300
Subject: [PATCH] Enable shader cache on Vulkan and implement
 MultiplyHighS32/U32 on SPIR-V (missed those before)

---
 .../Shader/Cache/Migration.cs                 |   6 +-
 .../DiskCache/BackgroundDiskCacheWriter.cs    |   2 +-
 .../Shader/DiskCache/DiskCacheGpuAccessor.cs  |  54 ++++++-
 .../Shader/DiskCache/DiskCacheGuestStorage.cs |   6 +-
 .../Shader/DiskCache/DiskCacheHostStorage.cs  | 150 ++++++++++++------
 .../Shader/DiskCache/GuestCodeAndCbData.cs    |  31 ++++
 .../DiskCache/ParallelDiskCacheLoader.cs      | 137 +++++++++-------
 .../DiskCache/ShaderBinarySerializer.cs       |  49 ++++++
 Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs    |  40 +++--
 .../Shader/ShaderSpecializationState.cs       |  60 ++++++-
 .../CodeGen/Spirv/Instructions.cs             |  26 +++
 Ryujinx.Graphics.Vulkan/ShaderCollection.cs   |   2 +-
 .../VulkanGraphicsDevice.cs                   |   4 +-
 Ryujinx/Ui/MainWindow.cs                      |   2 +-
 14 files changed, 441 insertions(+), 128 deletions(-)
 create mode 100644 Ryujinx.Graphics.Gpu/Shader/DiskCache/GuestCodeAndCbData.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Shader/DiskCache/ShaderBinarySerializer.cs

diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/Migration.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/Migration.cs
index 49acd3add..92f15139c 100644
--- a/Ryujinx.Graphics.Gpu/Shader/Cache/Migration.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/Cache/Migration.cs
@@ -105,7 +105,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
                             entry.Header.GpuAccessorHeader.ComputeLocalMemorySize,
                             entry.Header.GpuAccessorHeader.ComputeSharedMemorySize);
 
-                        ShaderSpecializationState specState = new ShaderSpecializationState(computeState);
+                        ShaderSpecializationState specState = new ShaderSpecializationState(ref computeState);
 
                         foreach (var td in entry.TextureDescriptors)
                         {
@@ -198,7 +198,9 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
                             }
                         }
 
-                        ShaderSpecializationState specState = new ShaderSpecializationState(graphicsState, tfdNew);
+                        ProgramPipelineState pipelineState = default;
+
+                        ShaderSpecializationState specState = new ShaderSpecializationState(ref graphicsState, ref pipelineState, tfdNew);
 
                         for (int i = 0; i < entries.Length; i++)
                         {
diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs
index 5c5e41c69..98655ed68 100644
--- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs
@@ -83,7 +83,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
         {
             _context = context;
             _hostStorage = hostStorage;
-            _fileWriterWorkerQueue = new AsyncWorkQueue<CacheFileOperationTask>(ProcessTask, "Gpu.BackgroundDiskCacheWriter");
+            _fileWriterWorkerQueue = new AsyncWorkQueue<CacheFileOperationTask>(ProcessTask, "GPU.BackgroundDiskCacheWriter");
         }
 
         /// <summary>
diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs
index 1ab9e8655..81569080b 100644
--- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs
@@ -1,6 +1,8 @@
 using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
 using Ryujinx.Graphics.Gpu.Image;
 using Ryujinx.Graphics.Shader;
+using Ryujinx.Graphics.Shader.Translation;
 using System;
 using System.Runtime.InteropServices;
 
@@ -16,7 +18,8 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
         private readonly ShaderSpecializationState _oldSpecState;
         private readonly ShaderSpecializationState _newSpecState;
         private readonly int _stageIndex;
-        private ResourceCounts _resourceCounts;
+        private readonly bool _isVulkan;
+        private readonly ResourceCounts _resourceCounts;
 
         /// <summary>
         /// Creates a new instance of the cached GPU state accessor for shader translation.
@@ -41,6 +44,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
             _oldSpecState = oldSpecState;
             _newSpecState = newSpecState;
             _stageIndex = stageIndex;
+            _isVulkan = context.Capabilities.Api == TargetApi.Vulkan;
             _resourceCounts = counts;
         }
 
@@ -67,6 +71,36 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
             return MemoryMarshal.Cast<byte, ulong>(_data.Span.Slice((int)address));
         }
 
+        /// <inheritdoc/>
+        public AlphaTestOp QueryAlphaTestCompare()
+        {
+            if (!_isVulkan || !_oldSpecState.GraphicsState.AlphaTestEnable)
+            {
+                return AlphaTestOp.Always;
+            }
+
+            return _oldSpecState.GraphicsState.AlphaTestCompare switch
+            {
+                CompareOp.Never or CompareOp.NeverGl => AlphaTestOp.Never,
+                CompareOp.Less or CompareOp.LessGl => AlphaTestOp.Less,
+                CompareOp.Equal or CompareOp.EqualGl => AlphaTestOp.Equal,
+                CompareOp.LessOrEqual or CompareOp.LessOrEqualGl => AlphaTestOp.LessOrEqual,
+                CompareOp.Greater or CompareOp.GreaterGl => AlphaTestOp.Greater,
+                CompareOp.NotEqual or CompareOp.NotEqualGl => AlphaTestOp.NotEqual,
+                CompareOp.GreaterOrEqual or CompareOp.GreaterOrEqualGl => AlphaTestOp.GreaterOrEqual,
+                _ => AlphaTestOp.Always
+            };
+        }
+
+        /// <inheritdoc/>
+        public float QueryAlphaTestReference() => _oldSpecState.GraphicsState.AlphaTestReference;
+
+        /// <inheritdoc/>
+        public AttributeType QueryAttributeType(int location)
+        {
+            return _oldSpecState.GraphicsState.AttributeTypes[location];
+        }
+
         /// <inheritdoc/>
         public int QueryComputeLocalSizeX() => _oldSpecState.ComputeState.LocalSizeX;
 
@@ -96,6 +130,18 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
             return ConvertToInputTopology(_oldSpecState.GraphicsState.Topology, _oldSpecState.GraphicsState.TessellationMode);
         }
 
+        /// <inheritdoc/>
+        public bool QueryProgramPointSize()
+        {
+            return _oldSpecState.GraphicsState.ProgramPointSizeEnable;
+        }
+
+        /// <inheritdoc/>
+        public float QueryPointSize()
+        {
+            return _oldSpecState.GraphicsState.PointSize;
+        }
+
         /// <inheritdoc/>
         public bool QueryTessCw()
         {
@@ -136,6 +182,12 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
             return _oldSpecState.GetCoordNormalized(_stageIndex, handle, cbufSlot);
         }
 
+        /// <inheritdoc/>
+        public bool QueryTransformDepthMinusOneToOne()
+        {
+            return _oldSpecState.GraphicsState.DepthMode;
+        }
+
         /// <inheritdoc/>
         public bool QueryTransformFeedbackEnabled()
         {
diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs
index 4e338094f..b31428281 100644
--- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs
@@ -193,8 +193,8 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
         /// <param name="tocFileStream">Guest TOC file stream</param>
         /// <param name="dataFileStream">Guest data file stream</param>
         /// <param name="index">Guest shader index</param>
-        /// <returns>Tuple with the guest code and constant buffer 1 data, respectively</returns>
-        public (byte[], byte[]) LoadShader(Stream tocFileStream, Stream dataFileStream, int index)
+        /// <returns>Guest code and constant buffer 1 data</returns>
+        public GuestCodeAndCbData LoadShader(Stream tocFileStream, Stream dataFileStream, int index)
         {
             if (_cache == null || index >= _cache.Length)
             {
@@ -226,7 +226,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
                 _cache[index] = (guestCode, cb1Data);
             }
 
-            return (guestCode, cb1Data);
+            return new GuestCodeAndCbData(guestCode, cb1Data);
         }
 
         /// <summary>
diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
index 5d99957f0..799838ec1 100644
--- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
@@ -1,5 +1,6 @@
 using Ryujinx.Graphics.GAL;
 using Ryujinx.Graphics.Shader;
+using Ryujinx.Graphics.Shader.Translation;
 using System;
 using System.IO;
 using System.Numerics;
@@ -19,7 +20,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
         private const uint TexdMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'D' << 24);
 
         private const ushort FileFormatVersionMajor = 1;
-        private const ushort FileFormatVersionMinor = 1;
+        private const ushort FileFormatVersionMinor = 2;
         private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
         private const uint CodeGenVersion = 1;
 
@@ -77,9 +78,14 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
             public ulong Offset;
 
             /// <summary>
-            /// Size.
+            /// Size of uncompressed data.
             /// </summary>
-            public uint Size;
+            public uint UncompressedSize;
+
+            /// <summary>
+            /// Size of compressed data.
+            /// </summary>
+            public uint CompressedSize;
         }
 
         /// <summary>
@@ -196,6 +202,14 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
         private static string GetHostFileName(GpuContext context)
         {
             string apiName = context.Capabilities.Api.ToString().ToLowerInvariant();
+
+            // We are just storing SPIR-V directly on Vulkan, so the code won't change per vendor.
+            // We can just have a single file for all vendors.
+            if (context.Capabilities.Api == TargetApi.Vulkan)
+            {
+                return apiName;
+            }
+
             string vendorName = RemoveInvalidCharacters(context.Capabilities.VendorName.ToLowerInvariant());
             return $"{apiName}_{vendorName}";
         }
@@ -324,7 +338,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
                         stagesBitMask = 1;
                     }
 
-                    CachedShaderStage[] shaders = new CachedShaderStage[isCompute ? 1 : Constants.ShaderStages + 1];
+                    GuestCodeAndCbData?[] guestShaders = new GuestCodeAndCbData?[isCompute ? 1 : Constants.ShaderStages + 1];
 
                     DataEntryPerStage stageEntry = new DataEntryPerStage();
 
@@ -334,15 +348,11 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
 
                         dataReader.Read(ref stageEntry);
 
-                        ShaderProgramInfo info = stageIndex != 0 || isCompute ? ReadShaderProgramInfo(ref dataReader) : null;
-
-                        (byte[] guestCode, byte[] cb1Data) = _guestStorage.LoadShader(
+                        guestShaders[stageIndex] = _guestStorage.LoadShader(
                             guestTocFileStream,
                             guestDataFileStream,
                             stageEntry.GuestCodeIndex);
 
-                        shaders[stageIndex] = new CachedShaderStage(info, guestCode, cb1Data);
-
                         stagesBitMask &= ~(1u << stageIndex);
                     }
 
@@ -351,17 +361,38 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
 
                     if (loadHostCache)
                     {
-                        byte[] hostCode = ReadHostCode(context, ref hostTocFileStream, ref hostDataFileStream, programIndex);
+                        (byte[] hostCode, CachedShaderStage[] shaders) = ReadHostCode(
+                            context,
+                            ref hostTocFileStream,
+                            ref hostDataFileStream,
+                            guestShaders,
+                            programIndex);
 
                         if (hostCode != null)
                         {
                             bool hasFragmentShader = shaders.Length > 5 && shaders[5] != null;
                             int fragmentOutputMap = hasFragmentShader ? shaders[5].Info.FragmentOutputMap : -1;
-                            IProgram hostProgram = context.Renderer.LoadProgramBinary(hostCode, hasFragmentShader, new ShaderInfo(fragmentOutputMap));
+
+                            ShaderInfo shaderInfo = specState.PipelineState.HasValue
+                                ? new ShaderInfo(fragmentOutputMap, specState.PipelineState.Value)
+                                : new ShaderInfo(fragmentOutputMap);
+
+                            IProgram hostProgram;
+
+                            if (context.Capabilities.Api == TargetApi.Vulkan)
+                            {
+                                ShaderSource[] shaderSources = ShaderBinarySerializer.Unpack(shaders, hostCode, isCompute);
+
+                                hostProgram = context.Renderer.CreateProgram(shaderSources, shaderInfo);
+                            }
+                            else
+                            {
+                                hostProgram = context.Renderer.LoadProgramBinary(hostCode, hasFragmentShader, shaderInfo);
+                            }
 
                             CachedShaderProgram program = new CachedShaderProgram(hostProgram, specState, shaders);
 
-                            loader.QueueHostProgram(program, hostProgram, programIndex, isCompute);
+                            loader.QueueHostProgram(program, hostCode, programIndex, isCompute);
                         }
                         else
                         {
@@ -371,7 +402,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
 
                     if (!loadHostCache)
                     {
-                        loader.QueueGuestProgram(shaders, specState, programIndex, isCompute);
+                        loader.QueueGuestProgram(guestShaders, specState, programIndex, isCompute);
                     }
 
                     loader.CheckCompilation();
@@ -393,9 +424,15 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
         /// <param name="context">GPU context</param>
         /// <param name="tocFileStream">Host TOC file stream, intialized if needed</param>
         /// <param name="dataFileStream">Host data file stream, initialized if needed</param>
+        /// <param name="guestShaders">Guest shader code for each active stage</param>
         /// <param name="programIndex">Index of the program on the cache</param>
         /// <returns>Host binary code, or null if not found</returns>
-        private byte[] ReadHostCode(GpuContext context, ref Stream tocFileStream, ref Stream dataFileStream, int programIndex)
+        private (byte[], CachedShaderStage[]) ReadHostCode(
+            GpuContext context,
+            ref Stream tocFileStream,
+            ref Stream dataFileStream,
+            GuestCodeAndCbData?[] guestShaders,
+            int programIndex)
         {
             if (tocFileStream == null && dataFileStream == null)
             {
@@ -404,7 +441,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
 
                 if (!File.Exists(tocFilePath) || !File.Exists(dataFilePath))
                 {
-                    return null;
+                    return (null, null);
                 }
 
                 tocFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: false);
@@ -414,7 +451,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
             int offset = Unsafe.SizeOf<TocHeader>() + programIndex * Unsafe.SizeOf<OffsetAndSize>();
             if (offset + Unsafe.SizeOf<OffsetAndSize>() > tocFileStream.Length)
             {
-                return null;
+                return (null, null);
             }
 
             if ((ulong)offset >= (ulong)dataFileStream.Length)
@@ -436,11 +473,33 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
 
             dataFileStream.Seek((long)offsetAndSize.Offset, SeekOrigin.Begin);
 
-            byte[] hostCode = new byte[offsetAndSize.Size];
+            byte[] hostCode = new byte[offsetAndSize.UncompressedSize];
 
             BinarySerializer.ReadCompressed(dataFileStream, hostCode);
 
-            return hostCode;
+            CachedShaderStage[] shaders = new CachedShaderStage[guestShaders.Length];
+            BinarySerializer dataReader = new BinarySerializer(dataFileStream);
+
+            dataFileStream.Seek((long)(offsetAndSize.Offset + offsetAndSize.CompressedSize), SeekOrigin.Begin);
+
+            dataReader.BeginCompression();
+
+            for (int index = 0; index < guestShaders.Length; index++)
+            {
+                if (!guestShaders[index].HasValue)
+                {
+                    continue;
+                }
+
+                GuestCodeAndCbData guestShader = guestShaders[index].Value;
+                ShaderProgramInfo info = index != 0 || guestShaders.Length == 1 ? ReadShaderProgramInfo(ref dataReader) : null;
+
+                shaders[index] = new CachedShaderStage(info, guestShader.Code, guestShader.Cb1Data);
+            }
+
+            dataReader.EndCompression();
+
+            return (hostCode, shaders);
         }
 
         /// <summary>
@@ -519,8 +578,6 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
                 stageEntry.GuestCodeIndex = _guestStorage.AddShader(shader.Code, shader.Cb1Data);
 
                 dataWriter.Write(ref stageEntry);
-
-                WriteShaderProgramInfo(ref dataWriter, shader.Info);
             }
 
             program.SpecializationState.Write(ref dataWriter);
@@ -537,7 +594,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
                 return;
             }
 
-            WriteHostCode(context, hostCode, -1, streams);
+            WriteHostCode(context, hostCode, program.Shaders, streams);
         }
 
         /// <summary>
@@ -574,29 +631,14 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
             dataFileStream.SetLength(0);
         }
 
-        /// <summary>
-        /// Adds a host binary shader to the host cache.
-        /// </summary>
-        /// <remarks>
-        /// This only modifies the host cache. The shader must already exist in the other caches.
-        /// This method should only be used for rebuilding the host cache after a clear.
-        /// </remarks>
-        /// <param name="context">GPU context</param>
-        /// <param name="hostCode">Host binary code</param>
-        /// <param name="programIndex">Index of the program in the cache</param>
-        public void AddHostShader(GpuContext context, ReadOnlySpan<byte> hostCode, int programIndex)
-        {
-            WriteHostCode(context, hostCode, programIndex);
-        }
-
         /// <summary>
         /// Writes the host binary code on the host cache.
         /// </summary>
         /// <param name="context">GPU context</param>
         /// <param name="hostCode">Host binary code</param>
-        /// <param name="programIndex">Index of the program in the cache</param>
+        /// <param name="shaders">Shader stages to be added to the host cache</param>
         /// <param name="streams">Output streams to use</param>
-        private void WriteHostCode(GpuContext context, ReadOnlySpan<byte> hostCode, int programIndex, DiskCacheOutputStreams streams = null)
+        private void WriteHostCode(GpuContext context, ReadOnlySpan<byte> hostCode, CachedShaderStage[] shaders, DiskCacheOutputStreams streams = null)
         {
             var tocFileStream = streams != null ? streams.HostTocFileStream : DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: true);
             var dataFileStream = streams != null ? streams.HostDataFileStream : DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: true);
@@ -607,26 +649,36 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
                 CreateToc(tocFileStream, ref header, TochMagic, 0);
             }
 
-            if (programIndex == -1)
-            {
-                tocFileStream.Seek(0, SeekOrigin.End);
-            }
-            else
-            {
-                tocFileStream.Seek(Unsafe.SizeOf<TocHeader>() + (programIndex * Unsafe.SizeOf<OffsetAndSize>()), SeekOrigin.Begin);
-            }
-
+            tocFileStream.Seek(0, SeekOrigin.End);
             dataFileStream.Seek(0, SeekOrigin.End);
 
             BinarySerializer tocWriter = new BinarySerializer(tocFileStream);
+            BinarySerializer dataWriter = new BinarySerializer(dataFileStream);
 
             OffsetAndSize offsetAndSize = new OffsetAndSize();
             offsetAndSize.Offset = (ulong)dataFileStream.Position;
-            offsetAndSize.Size = (uint)hostCode.Length;
-            tocWriter.Write(ref offsetAndSize);
+            offsetAndSize.UncompressedSize = (uint)hostCode.Length;
+
+            long dataStartPosition = dataFileStream.Position;
 
             BinarySerializer.WriteCompressed(dataFileStream, hostCode, DiskCacheCommon.GetCompressionAlgorithm());
 
+            offsetAndSize.CompressedSize = (uint)(dataFileStream.Position - dataStartPosition);
+
+            tocWriter.Write(ref offsetAndSize);
+
+            dataWriter.BeginCompression(DiskCacheCommon.GetCompressionAlgorithm());
+
+            for (int index = 0; index < shaders.Length; index++)
+            {
+                if (shaders[index] != null)
+                {
+                    WriteShaderProgramInfo(ref dataWriter, shaders[index].Info);
+                }
+            }
+
+            dataWriter.EndCompression();
+
             if (streams == null)
             {
                 tocFileStream.Dispose();
diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/GuestCodeAndCbData.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/GuestCodeAndCbData.cs
new file mode 100644
index 000000000..b1ac819e6
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/GuestCodeAndCbData.cs
@@ -0,0 +1,31 @@
+using Ryujinx.Graphics.Shader;
+
+namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
+{
+    /// <summary>
+    /// Guest shader code and constant buffer data accessed by the shader.
+    /// </summary>
+    struct GuestCodeAndCbData
+    {
+        /// <summary>
+        /// Maxwell binary shader code.
+        /// </summary>
+        public byte[] Code { get; }
+
+        /// <summary>
+        /// Constant buffer 1 data accessed by the shader.
+        /// </summary>
+        public byte[] Cb1Data { get; }
+
+        /// <summary>
+        /// Creates a new instance of the guest shader code and constant buffer data.
+        /// </summary>
+        /// <param name="code">Maxwell binary shader code</param>
+        /// <param name="cb1Data">Constant buffer 1 data accessed by the shader</param>
+        public GuestCodeAndCbData(byte[] code, byte[] cb1Data)
+        {
+            Code = code;
+            Cb1Data = cb1Data;
+        }
+    }
+}
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs
index 803b06766..825119688 100644
--- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs
@@ -45,9 +45,9 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
             public readonly CachedShaderProgram CachedProgram;
 
             /// <summary>
-            /// Host program.
+            /// Optional binary code. If not null, it is used instead of the backend host binary.
             /// </summary>
-            public readonly IProgram HostProgram;
+            public readonly byte[] BinaryCode;
 
             /// <summary>
             /// Program index.
@@ -68,19 +68,18 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
             /// Creates a new program validation entry.
             /// </summary>
             /// <param name="cachedProgram">Cached shader program</param>
-            /// <param name="hostProgram">Host program</param>
             /// <param name="programIndex">Program index</param>
             /// <param name="isCompute">Indicates if the program is a compute shader</param>
             /// <param name="isBinary">Indicates if the program is a host binary shader</param>
             public ProgramEntry(
                 CachedShaderProgram cachedProgram,
-                IProgram hostProgram,
+                byte[] binaryCode,
                 int programIndex,
                 bool isCompute,
                 bool isBinary)
             {
                 CachedProgram = cachedProgram;
-                HostProgram = hostProgram;
+                BinaryCode = binaryCode;
                 ProgramIndex = programIndex;
                 IsCompute = isCompute;
                 IsBinary = isBinary;
@@ -146,9 +145,9 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
         private struct AsyncProgramTranslation
         {
             /// <summary>
-            /// Cached shader stages.
+            /// Guest code for each active stage.
             /// </summary>
-            public readonly CachedShaderStage[] Shaders;
+            public readonly GuestCodeAndCbData?[] GuestShaders;
 
             /// <summary>
             /// Specialization state.
@@ -168,17 +167,17 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
             /// <summary>
             /// Creates a new program translation entry.
             /// </summary>
-            /// <param name="shaders">Cached shader stages</param>
+            /// <param name="guestShaders">Guest code for each active stage</param>
             /// <param name="specState">Specialization state</param>
             /// <param name="programIndex">Program index</param>
             /// <param name="isCompute">Indicates if the program is a compute shader</param>
             public AsyncProgramTranslation(
-                CachedShaderStage[] shaders,
+                GuestCodeAndCbData?[] guestShaders,
                 ShaderSpecializationState specState,
                 int programIndex,
                 bool isCompute)
             {
-                Shaders = shaders;
+                GuestShaders = guestShaders;
                 SpecializationState = specState;
                 ProgramIndex = programIndex;
                 IsCompute = isCompute;
@@ -188,7 +187,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
         private readonly Queue<ProgramEntry> _validationQueue;
         private readonly ConcurrentQueue<ProgramCompilation> _compilationQueue;
         private readonly BlockingCollection<AsyncProgramTranslation> _asyncTranslationQueue;
-        private readonly SortedList<int, CachedShaderProgram> _programList;
+        private readonly SortedList<int, (CachedShaderProgram, byte[])> _programList;
 
         private int _backendParallelCompileThreads;
         private int _compiledCount;
@@ -220,7 +219,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
             _validationQueue = new Queue<ProgramEntry>();
             _compilationQueue = new ConcurrentQueue<ProgramCompilation>();
             _asyncTranslationQueue = new BlockingCollection<AsyncProgramTranslation>(ThreadCount);
-            _programList = new SortedList<int, CachedShaderProgram>();
+            _programList = new SortedList<int, (CachedShaderProgram, byte[])>();
             _backendParallelCompileThreads = Math.Min(Environment.ProcessorCount, 8); // Must be kept in sync with the backend code.
         }
 
@@ -235,7 +234,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
             {
                 workThreads[index] = new Thread(ProcessAsyncQueue)
                 {
-                    Name = $"Gpu.AsyncTranslationThread.{index}"
+                    Name = $"GPU.AsyncTranslationThread.{index}"
                 };
             }
 
@@ -287,7 +286,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
 
             CheckCompilationBlocking();
 
-            if (_needsHostRegen)
+            if (_needsHostRegen && Active)
             {
                 // Rebuild both shared and host cache files.
                 // Rebuilding shared is required because the shader information returned by the translator
@@ -310,8 +309,8 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
                                 break;
                             }
 
-                            CachedShaderProgram program = kv.Value;
-                            _hostStorage.AddShader(_context, program, program.HostProgram.GetBinary(), streams);
+                            (CachedShaderProgram program, byte[] binaryCode) = kv.Value;
+                            _hostStorage.AddShader(_context, program, binaryCode, streams);
                         }
 
                         Logger.Info?.Print(LogClass.Gpu, $"Rebuilt {_programList.Count} shaders successfully.");
@@ -342,24 +341,31 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
         /// Enqueues a host program for compilation.
         /// </summary>
         /// <param name="cachedProgram">Cached program</param>
-        /// <param name="hostProgram">Host program to be compiled</param>
+        /// <param name="binaryCode">Host binary code</param>
         /// <param name="programIndex">Program index</param>
         /// <param name="isCompute">Indicates if the program is a compute shader</param>
-        public void QueueHostProgram(CachedShaderProgram cachedProgram, IProgram hostProgram, int programIndex, bool isCompute)
+        public void QueueHostProgram(CachedShaderProgram cachedProgram, byte[] binaryCode, int programIndex, bool isCompute)
         {
-            EnqueueForValidation(new ProgramEntry(cachedProgram, hostProgram, programIndex, isCompute, isBinary: true));
+            EnqueueForValidation(new ProgramEntry(cachedProgram, binaryCode, programIndex, isCompute, isBinary: true));
         }
 
         /// <summary>
         /// Enqueues a guest program for compilation.
         /// </summary>
-        /// <param name="shaders">Cached shader stages</param>
+        /// <param name="guestShaders">Guest code for each active stage</param>
         /// <param name="specState">Specialization state</param>
         /// <param name="programIndex">Program index</param>
         /// <param name="isCompute">Indicates if the program is a compute shader</param>
-        public void QueueGuestProgram(CachedShaderStage[] shaders, ShaderSpecializationState specState, int programIndex, bool isCompute)
+        public void QueueGuestProgram(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex, bool isCompute)
         {
-            _asyncTranslationQueue.Add(new AsyncProgramTranslation(shaders, specState, programIndex, isCompute));
+            try
+            {
+                AsyncProgramTranslation asyncTranslation = new AsyncProgramTranslation(guestShaders, specState, programIndex, isCompute);
+                _asyncTranslationQueue.Add(asyncTranslation, _cancellationToken);
+            }
+            catch (OperationCanceledException)
+            {
+            }
         }
 
         /// <summary>
@@ -374,7 +380,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
             // If not yet compiled, do nothing. This avoids blocking to wait for shader compilation.
             while (_validationQueue.TryPeek(out ProgramEntry entry))
             {
-                ProgramLinkStatus result = entry.HostProgram.CheckProgramLink(false);
+                ProgramLinkStatus result = entry.CachedProgram.HostProgram.CheckProgramLink(false);
 
                 if (result != ProgramLinkStatus.Incomplete)
                 {
@@ -398,7 +404,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
 
             while (_validationQueue.TryDequeue(out ProgramEntry entry) && Active)
             {
-                ProcessCompiledProgram(ref entry, entry.HostProgram.CheckProgramLink(true), asyncCompile: false);
+                ProcessCompiledProgram(ref entry, entry.CachedProgram.HostProgram.CheckProgramLink(true), asyncCompile: false);
             }
         }
 
@@ -427,7 +433,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
                     _needsHostRegen = true;
                 }
 
-                _programList.Add(entry.ProgramIndex, entry.CachedProgram);
+                _programList.Add(entry.ProgramIndex, (entry.CachedProgram, entry.BinaryCode));
                 SignalCompiled();
             }
             else if (entry.IsBinary)
@@ -436,13 +442,25 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
                 // we still have a chance to recompile from the guest binary.
                 CachedShaderProgram program = entry.CachedProgram;
 
+                GuestCodeAndCbData?[] guestShaders = new GuestCodeAndCbData?[program.Shaders.Length];
+
+                for (int index = 0; index < program.Shaders.Length; index++)
+                {
+                    CachedShaderStage shader = program.Shaders[index];
+
+                    if (shader != null)
+                    {
+                        guestShaders[index] = new GuestCodeAndCbData(shader.Code, shader.Cb1Data);
+                    }
+                }
+
                 if (asyncCompile)
                 {
-                    QueueGuestProgram(program.Shaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute);
+                    QueueGuestProgram(guestShaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute);
                 }
                 else
                 {
-                    RecompileFromGuestCode(program.Shaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute);
+                    RecompileFromGuestCode(guestShaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute);
                     ProcessCompilationQueue();
                 }
             }
@@ -476,10 +494,16 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
                     }
                 }
 
-                IProgram hostProgram = _context.Renderer.CreateProgram(shaderSources, new ShaderInfo(fragmentOutputMap));
+                ShaderInfo shaderInfo = compilation.SpecializationState.PipelineState.HasValue
+                    ? new ShaderInfo(fragmentOutputMap, compilation.SpecializationState.PipelineState.Value)
+                    : new ShaderInfo(fragmentOutputMap);
+
+                IProgram hostProgram = _context.Renderer.CreateProgram(shaderSources, shaderInfo);
                 CachedShaderProgram program = new CachedShaderProgram(hostProgram, compilation.SpecializationState, compilation.Shaders);
 
-                EnqueueForValidation(new ProgramEntry(program, hostProgram, compilation.ProgramIndex, compilation.IsCompute, isBinary: false));
+                byte[] binaryCode = _context.Capabilities.Api == TargetApi.Vulkan ? ShaderBinarySerializer.Pack(shaderSources) : hostProgram.GetBinary();
+
+                EnqueueForValidation(new ProgramEntry(program, binaryCode, compilation.ProgramIndex, compilation.IsCompute, isBinary: false));
             }
         }
 
@@ -496,7 +520,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
             // Submitting more seems to cause NVIDIA OpenGL driver to crash.
             if (_validationQueue.Count >= _backendParallelCompileThreads && _validationQueue.TryDequeue(out ProgramEntry entry))
             {
-                ProcessCompiledProgram(ref entry, entry.HostProgram.CheckProgramLink(true), asyncCompile: false);
+                ProcessCompiledProgram(ref entry, entry.CachedProgram.HostProgram.CheckProgramLink(true), asyncCompile: false);
             }
         }
 
@@ -513,7 +537,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
                 foreach (AsyncProgramTranslation asyncCompilation in _asyncTranslationQueue.GetConsumingEnumerable(ct))
                 {
                     RecompileFromGuestCode(
-                        asyncCompilation.Shaders,
+                        asyncCompilation.GuestShaders,
                         asyncCompilation.SpecializationState,
                         asyncCompilation.ProgramIndex,
                         asyncCompilation.IsCompute);
@@ -527,21 +551,21 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
         /// <summary>
         /// Recompiles a program from guest code.
         /// </summary>
-        /// <param name="shaders">Shader stages</param>
+        /// <param name="guestShaders">Guest code for each active stage</param>
         /// <param name="specState">Specialization state</param>
         /// <param name="programIndex">Program index</param>
         /// <param name="isCompute">Indicates if the program is a compute shader</param>
-        private void RecompileFromGuestCode(CachedShaderStage[] shaders, ShaderSpecializationState specState, int programIndex, bool isCompute)
+        private void RecompileFromGuestCode(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex, bool isCompute)
         {
             try
             {
                 if (isCompute)
                 {
-                    RecompileComputeFromGuestCode(shaders, specState, programIndex);
+                    RecompileComputeFromGuestCode(guestShaders, specState, programIndex);
                 }
                 else
                 {
-                    RecompileGraphicsFromGuestCode(shaders, specState, programIndex);
+                    RecompileGraphicsFromGuestCode(guestShaders, specState, programIndex);
                 }
             }
             catch (DiskCacheLoadException diskCacheLoadException)
@@ -556,12 +580,16 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
         /// <summary>
         /// Recompiles a graphics program from guest code.
         /// </summary>
-        /// <param name="shaders">Shader stages</param>
+        /// <param name="guestShaders">Guest code for each active stage</param>
         /// <param name="specState">Specialization state</param>
         /// <param name="programIndex">Program index</param>
-        private void RecompileGraphicsFromGuestCode(CachedShaderStage[] shaders, ShaderSpecializationState specState, int programIndex)
+        private void RecompileGraphicsFromGuestCode(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex)
         {
-            ShaderSpecializationState newSpecState = new ShaderSpecializationState(specState.GraphicsState, specState.TransformFeedbackDescriptors);
+            ShaderSpecializationState newSpecState = new ShaderSpecializationState(
+                ref specState.GraphicsState,
+                specState.PipelineState,
+                specState.TransformFeedbackDescriptors);
+
             ResourceCounts counts = new ResourceCounts();
 
             TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1];
@@ -571,10 +599,10 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
 
             for (int stageIndex = Constants.ShaderStages - 1; stageIndex >= 0; stageIndex--)
             {
-                CachedShaderStage shader = shaders[stageIndex + 1];
-
-                if (shader != null)
+                if (guestShaders[stageIndex + 1].HasValue)
                 {
+                    GuestCodeAndCbData shader = guestShaders[stageIndex + 1].Value;
+
                     byte[] guestCode = shader.Code;
                     byte[] cb1Data = shader.Cb1Data;
 
@@ -586,10 +614,10 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
                         currentStage.SetNextStage(nextStage);
                     }
 
-                    if (stageIndex == 0 && shaders[0] != null)
+                    if (stageIndex == 0 && guestShaders[0].HasValue)
                     {
-                        byte[] guestCodeA = shaders[0].Code;
-                        byte[] cb1DataA = shaders[0].Cb1Data;
+                        byte[] guestCodeA = guestShaders[0].Value.Code;
+                        byte[] cb1DataA = guestShaders[0].Value.Cb1Data;
 
                         DiskCacheGpuAccessor gpuAccessorA = new DiskCacheGpuAccessor(_context, guestCodeA, cb1DataA, specState, newSpecState, counts, 0);
                         translatorContexts[0] = DecodeGraphicsShader(gpuAccessorA, api, DefaultFlags | TranslationFlags.VertexA, 0);
@@ -600,6 +628,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
                 }
             }
 
+            CachedShaderStage[] shaders = new CachedShaderStage[guestShaders.Length];
             List<ShaderProgram> translatedStages = new List<ShaderProgram>();
 
             for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++)
@@ -610,15 +639,15 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
                 {
                     ShaderProgram program;
 
-                    byte[] guestCode = shaders[stageIndex + 1].Code;
-                    byte[] cb1Data = shaders[stageIndex + 1].Cb1Data;
+                    byte[] guestCode = guestShaders[stageIndex + 1].Value.Code;
+                    byte[] cb1Data = guestShaders[stageIndex + 1].Value.Cb1Data;
 
-                    if (stageIndex == 0 && shaders[0] != null)
+                    if (stageIndex == 0 && guestShaders[0].HasValue)
                     {
                         program = currentStage.Translate(translatorContexts[0]);
 
-                        byte[] guestCodeA = shaders[0].Code;
-                        byte[] cb1DataA = shaders[0].Cb1Data;
+                        byte[] guestCodeA = guestShaders[0].Value.Code;
+                        byte[] cb1DataA = guestShaders[0].Value.Cb1Data;
 
                         shaders[0] = new CachedShaderStage(null, guestCodeA, cb1DataA);
                         shaders[1] = new CachedShaderStage(program.Info, guestCode, cb1Data);
@@ -643,21 +672,21 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
         /// <summary>
         /// Recompiles a compute program from guest code.
         /// </summary>
-        /// <param name="shaders">Shader stages</param>
+        /// <param name="guestShaders">Guest code for each active stage</param>
         /// <param name="specState">Specialization state</param>
         /// <param name="programIndex">Program index</param>
-        private void RecompileComputeFromGuestCode(CachedShaderStage[] shaders, ShaderSpecializationState specState, int programIndex)
+        private void RecompileComputeFromGuestCode(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex)
         {
-            CachedShaderStage shader = shaders[0];
+            GuestCodeAndCbData shader = guestShaders[0].Value;
             ResourceCounts counts = new ResourceCounts();
-            ShaderSpecializationState newSpecState = new ShaderSpecializationState(specState.ComputeState);
+            ShaderSpecializationState newSpecState = new ShaderSpecializationState(ref specState.ComputeState);
             DiskCacheGpuAccessor gpuAccessor = new DiskCacheGpuAccessor(_context, shader.Code, shader.Cb1Data, specState, newSpecState, counts, 0);
 
             TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, _context.Capabilities.Api, 0);
 
             ShaderProgram program = translatorContext.Translate();
 
-            shaders[0] = new CachedShaderStage(program.Info, shader.Code, shader.Cb1Data);
+            CachedShaderStage[] shaders = new[] { new CachedShaderStage(program.Info, shader.Code, shader.Cb1Data) };
 
             _compilationQueue.Enqueue(new ProgramCompilation(new[] { program }, shaders, newSpecState, programIndex, isCompute: true));
         }
diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/ShaderBinarySerializer.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/ShaderBinarySerializer.cs
new file mode 100644
index 000000000..11e54220f
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/ShaderBinarySerializer.cs
@@ -0,0 +1,49 @@
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Shader.Translation;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
+{
+    static class ShaderBinarySerializer
+    {
+        public static byte[] Pack(ShaderSource[] sources)
+        {
+            using MemoryStream output = new MemoryStream();
+            using BinaryWriter writer = new BinaryWriter(output);
+
+            for (int i = 0; i < sources.Length; i++)
+            {
+                writer.Write(sources[i].BinaryCode.Length);
+                writer.Write(sources[i].BinaryCode);
+            }
+
+            return output.ToArray();
+        }
+
+        public static ShaderSource[] Unpack(CachedShaderStage[] stages, byte[] code, bool compute)
+        {
+            using MemoryStream input = new MemoryStream(code);
+            using BinaryReader reader = new BinaryReader(input);
+
+            List<ShaderSource> output = new List<ShaderSource>();
+
+            for (int i = compute ? 0 : 1; i < stages.Length; i++)
+            {
+                CachedShaderStage stage = stages[i];
+
+                if (stage == null)
+                {
+                    continue;
+                }
+
+                int binaryCodeLength = reader.ReadInt32();
+                byte[] binaryCode = reader.ReadBytes(binaryCodeLength);
+
+                output.Add(new ShaderSource(binaryCode, ShaderCache.GetBindings(stage.Info), stage.Info.Stage, TargetLanguage.Spirv));
+            }
+
+            return output.ToArray();
+        }
+    }
+}
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
index 33d3c48f9..cba7edfe0 100644
--- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
@@ -62,11 +62,13 @@ namespace Ryujinx.Graphics.Gpu.Shader
         {
             public readonly CachedShaderProgram CachedProgram;
             public readonly IProgram HostProgram;
+            public readonly byte[] BinaryCode;
 
-            public ProgramToSave(CachedShaderProgram cachedProgram, IProgram hostProgram)
+            public ProgramToSave(CachedShaderProgram cachedProgram, IProgram hostProgram, byte[] binaryCode)
             {
                 CachedProgram = cachedProgram;
                 HostProgram = hostProgram;
+                BinaryCode = binaryCode;
             }
         }
 
@@ -126,7 +128,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
                 {
                     if (result == ProgramLinkStatus.Success)
                     {
-                        _cacheWriter.AddShader(programToSave.CachedProgram, programToSave.HostProgram.GetBinary());
+                        _cacheWriter.AddShader(programToSave.CachedProgram, programToSave.BinaryCode ?? programToSave.HostProgram.GetBinary());
                     }
 
                     _programsToSaveQueue.Dequeue();
@@ -146,7 +148,9 @@ namespace Ryujinx.Graphics.Gpu.Shader
         {
             if (_diskCacheHostStorage.CacheEnabled)
             {
-                if (!_diskCacheHostStorage.CacheExists())
+                // Migration disabled as Vulkan added a lot of new state,
+                // most migrated shaders would be unused due to the state not matching.
+                /* if (!_diskCacheHostStorage.CacheExists())
                 {
                     // If we don't have a shader cache on the new format, try to perform migration from the old shader cache.
                     Logger.Info?.Print(LogClass.Gpu, "No shader cache found, trying to migrate from legacy shader cache...");
@@ -154,7 +158,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
                     int migrationCount = Migration.MigrateFromLegacyCache(_context, _diskCacheHostStorage);
 
                     Logger.Info?.Print(LogClass.Gpu, $"Migrated {migrationCount} shaders.");
-                }
+                } */
 
                 ParallelDiskCacheLoader loader = new ParallelDiskCacheLoader(
                     _context,
@@ -213,7 +217,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
                 return cpShader;
             }
 
-            ShaderSpecializationState specState = new ShaderSpecializationState(computeState);
+            ShaderSpecializationState specState = new ShaderSpecializationState(ref computeState);
             GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, computeState, default, specState);
             GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState);
 
@@ -221,12 +225,14 @@ namespace Ryujinx.Graphics.Gpu.Shader
 
             TranslatedShader translatedShader = TranslateShader(_dumper, channel, translatorContext, cachedGuestCode);
 
-            IProgram hostProgram = _context.Renderer.CreateProgram(new ShaderSource[] { CreateShaderSource(translatedShader.Program) }, new ShaderInfo(-1));
+            ShaderSource[] shaderSourcesArray = new ShaderSource[] { CreateShaderSource(translatedShader.Program) };
+
+            IProgram hostProgram = _context.Renderer.CreateProgram(shaderSourcesArray, new ShaderInfo(-1));
 
             cpShader = new CachedShaderProgram(hostProgram, specState, translatedShader.Shader);
 
             _computeShaderCache.Add(cpShader);
-            EnqueueProgramToSave(new ProgramToSave(cpShader, hostProgram));
+            EnqueueProgramToSave(cpShader, hostProgram, shaderSourcesArray);
             _cpPrograms[gpuVa] = cpShader;
 
             return cpShader;
@@ -307,7 +313,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
 
             TransformFeedbackDescriptor[] transformFeedbackDescriptors = GetTransformFeedbackDescriptors(ref state);
 
-            ShaderSpecializationState specState = new ShaderSpecializationState(graphicsState, transformFeedbackDescriptors);
+            ShaderSpecializationState specState = new ShaderSpecializationState(ref graphicsState, ref pipeline, transformFeedbackDescriptors);
             GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, default, graphicsState, specState, transformFeedbackDescriptors);
 
             ReadOnlySpan<ulong> addressesSpan = addresses.AsSpan();
@@ -385,13 +391,15 @@ namespace Ryujinx.Graphics.Gpu.Shader
 
             UpdatePipelineInfo(ref state, ref pipeline, graphicsState, channel);
 
+            ShaderSource[] shaderSourcesArray = shaderSources.ToArray();
+
             int fragmentOutputMap = shaders[5]?.Info.FragmentOutputMap ?? -1;
-            IProgram hostProgram = _context.Renderer.CreateProgram(shaderSources.ToArray(), new ShaderInfo(fragmentOutputMap, pipeline));
+            IProgram hostProgram = _context.Renderer.CreateProgram(shaderSourcesArray, new ShaderInfo(fragmentOutputMap, pipeline));
 
             gpShaders = new CachedShaderProgram(hostProgram, specState, shaders);
 
             _graphicsShaderCache.Add(gpShaders);
-            EnqueueProgramToSave(new ProgramToSave(gpShaders, hostProgram));
+            EnqueueProgramToSave(gpShaders, hostProgram, shaderSourcesArray);
             _gpPrograms[addresses] = gpShaders;
 
             return gpShaders;
@@ -413,9 +421,15 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <remarks>
         /// This will not do anything if disk shader cache is disabled.
         /// </remarks>
-        /// <param name="programToSave">Program to be saved on disk</param>
-        private void EnqueueProgramToSave(ProgramToSave programToSave)
+        /// <param name="program">Cached shader program</param>
+        /// <param name="hostProgram">Host program</param>
+        /// <param name="sources">Source for each shader stage</param>
+        private void EnqueueProgramToSave(CachedShaderProgram program, IProgram hostProgram, ShaderSource[] sources)
         {
+            byte[] binaryCode = _context.Capabilities.Api == TargetApi.Vulkan ? ShaderBinarySerializer.Pack(sources) : null;
+
+            ProgramToSave programToSave = new ProgramToSave(program, hostProgram, binaryCode);
+
             if (_diskCacheHostStorage.CacheEnabled)
             {
                 _programsToSaveQueue.Enqueue(programToSave);
@@ -646,7 +660,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
             };
         }
 
-        private static ShaderBindings GetBindings(ShaderProgramInfo info)
+        public static ShaderBindings GetBindings(ShaderProgramInfo info)
         {
             static bool IsBuffer(Graphics.Shader.TextureDescriptor descriptor)
             {
diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs
index 172ce14ce..3df9d1199 100644
--- a/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs
@@ -1,6 +1,7 @@
 using Ryujinx.Common.Memory;
 using Ryujinx.Graphics.Gpu.Image;
 using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.GAL;
 using Ryujinx.Graphics.Gpu.Shader.DiskCache;
 using Ryujinx.Graphics.Shader;
 using System;
@@ -19,6 +20,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         private const uint TfbdMagic = (byte)'T' | ((byte)'F' << 8) | ((byte)'B' << 16) | ((byte)'D' << 24);
         private const uint TexkMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'K' << 24);
         private const uint TexsMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'S' << 24);
+        private const uint PgpsMagic = (byte)'P' | ((byte)'G' << 8) | ((byte)'P' << 16) | ((byte)'S' << 24);
 
         /// <summary>
         /// Flags indicating GPU state that is used by the shader.
@@ -51,6 +53,11 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// </summary>
         public Array5<uint> ConstantBufferUse;
 
+        /// <summary>
+        /// Optional pipeline state captured at the time of the shader use.
+        /// </summary>
+        public ProgramPipelineState? PipelineState;
+
         /// <summary>
         /// Transform feedback buffers active at the time the shader was compiled.
         /// </summary>
@@ -179,7 +186,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// Creates a new instance of the shader specialization state.
         /// </summary>
         /// <param name="state">Current compute engine state</param>
-        public ShaderSpecializationState(GpuChannelComputeState state) : this()
+        public ShaderSpecializationState(ref GpuChannelComputeState state) : this()
         {
             ComputeState = state;
             _compute = true;
@@ -190,7 +197,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// </summary>
         /// <param name="state">Current 3D engine state</param>
         /// <param name="descriptors">Optional transform feedback buffers in use, if any</param>
-        public ShaderSpecializationState(GpuChannelGraphicsState state, TransformFeedbackDescriptor[] descriptors) : this()
+        private ShaderSpecializationState(ref GpuChannelGraphicsState state, TransformFeedbackDescriptor[] descriptors) : this()
         {
             GraphicsState = state;
             _compute = false;
@@ -244,6 +251,34 @@ namespace Ryujinx.Graphics.Gpu.Shader
             }
         }
 
+        /// <summary>
+        /// Creates a new instance of the shader specialization state.
+        /// </summary>
+        /// <param name="state">Current 3D engine state</param>
+        /// <param name="pipelineState">Current program pipeline state</param>
+        /// <param name="descriptors">Optional transform feedback buffers in use, if any</param>
+        public ShaderSpecializationState(
+            ref GpuChannelGraphicsState state,
+            ref ProgramPipelineState pipelineState,
+            TransformFeedbackDescriptor[] descriptors) : this(ref state, descriptors)
+        {
+            PipelineState = pipelineState;
+        }
+
+        /// <summary>
+        /// Creates a new instance of the shader specialization state.
+        /// </summary>
+        /// <param name="state">Current 3D engine state</param>
+        /// <param name="pipelineState">Current program pipeline state</param>
+        /// <param name="descriptors">Optional transform feedback buffers in use, if any</param>
+        public ShaderSpecializationState(
+            ref GpuChannelGraphicsState state,
+            ProgramPipelineState? pipelineState,
+            TransformFeedbackDescriptor[] descriptors) : this(ref state, descriptors)
+        {
+            PipelineState = pipelineState;
+        }
+
         /// <summary>
         /// Indicates that the shader accesses the early Z force state.
         /// </summary>
@@ -697,6 +732,17 @@ namespace Ryujinx.Graphics.Gpu.Shader
                 constantBufferUsePerStageMask &= ~(1 << index);
             }
 
+            bool hasPipelineState = false;
+
+            dataReader.Read(ref hasPipelineState);
+
+            if (hasPipelineState)
+            {
+                ProgramPipelineState pipelineState = default;
+                dataReader.ReadWithMagicAndSize(ref pipelineState, PgpsMagic);
+                specState.PipelineState = pipelineState;
+            }
+
             if (specState._queriedState.HasFlag(QueriedStateFlags.TransformFeedback))
             {
                 ushort tfCount = 0;
@@ -755,6 +801,16 @@ namespace Ryujinx.Graphics.Gpu.Shader
                 constantBufferUsePerStageMask &= ~(1 << index);
             }
 
+            bool hasPipelineState = PipelineState.HasValue;
+
+            dataWriter.Write(ref hasPipelineState);
+
+            if (hasPipelineState)
+            {
+                ProgramPipelineState pipelineState = PipelineState.Value;
+                dataWriter.WriteWithMagicAndSize(ref pipelineState, PgpsMagic);
+            }
+
             if (_queriedState.HasFlag(QueriedStateFlags.TransformFeedback))
             {
                 ushort tfCount = (ushort)TransformFeedbackDescriptors.Length;
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs b/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs
index e219c3ed1..d160671ca 100644
--- a/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs
+++ b/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs
@@ -107,6 +107,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
             Add(Instruction.Minimum,                  GenerateMinimum);
             Add(Instruction.MinimumU32,               GenerateMinimumU32);
             Add(Instruction.Multiply,                 GenerateMultiply);
+            Add(Instruction.MultiplyHighS32,          GenerateMultiplyHighS32);
+            Add(Instruction.MultiplyHighU32,          GenerateMultiplyHighU32);
             Add(Instruction.Negate,                   GenerateNegate);
             Add(Instruction.PackDouble2x32,           GeneratePackDouble2x32);
             Add(Instruction.PackHalf2x16,             GeneratePackHalf2x16);
@@ -1090,6 +1092,30 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
             return GenerateBinary(context, operation, context.Delegates.FMul, context.Delegates.IMul);
         }
 
+        private static OperationResult GenerateMultiplyHighS32(CodeGenContext context, AstOperation operation)
+        {
+            var src1 = operation.GetSource(0);
+            var src2 = operation.GetSource(1);
+
+            var resultType = context.TypeStruct(false, context.TypeS32(), context.TypeS32());
+            var result = context.SMulExtended(resultType, context.GetS32(src1), context.GetS32(src2));
+            result = context.CompositeExtract(context.TypeS32(), result, 1);
+
+            return new OperationResult(AggregateType.S32, result);
+        }
+
+        private static OperationResult GenerateMultiplyHighU32(CodeGenContext context, AstOperation operation)
+        {
+            var src1 = operation.GetSource(0);
+            var src2 = operation.GetSource(1);
+
+            var resultType = context.TypeStruct(false, context.TypeU32(), context.TypeU32());
+            var result = context.UMulExtended(resultType, context.GetU32(src1), context.GetU32(src2));
+            result = context.CompositeExtract(context.TypeU32(), result, 1);
+
+            return new OperationResult(AggregateType.U32, result);
+        }
+
         private static OperationResult GenerateNegate(CodeGenContext context, AstOperation operation)
         {
             return GenerateUnary(context, operation, context.Delegates.FNegate, context.Delegates.SNegate);
diff --git a/Ryujinx.Graphics.Vulkan/ShaderCollection.cs b/Ryujinx.Graphics.Vulkan/ShaderCollection.cs
index 5fcb91ea2..2373d31f8 100644
--- a/Ryujinx.Graphics.Vulkan/ShaderCollection.cs
+++ b/Ryujinx.Graphics.Vulkan/ShaderCollection.cs
@@ -300,7 +300,7 @@ namespace Ryujinx.Graphics.Vulkan
 
         public byte[] GetBinary()
         {
-            throw new System.NotImplementedException();
+            return null;
         }
 
         public void AddComputePipeline(Auto<DisposablePipeline> pipeline)
diff --git a/Ryujinx.Graphics.Vulkan/VulkanGraphicsDevice.cs b/Ryujinx.Graphics.Vulkan/VulkanGraphicsDevice.cs
index 8c0834006..66b6fa462 100644
--- a/Ryujinx.Graphics.Vulkan/VulkanGraphicsDevice.cs
+++ b/Ryujinx.Graphics.Vulkan/VulkanGraphicsDevice.cs
@@ -226,7 +226,9 @@ namespace Ryujinx.Graphics.Vulkan
 
         public IProgram CreateProgram(IShader[] shaders, ShaderInfo info)
         {
-            if (info.BackgroundCompile && info.State.HasValue && VulkanConfiguration.UseDynamicState)
+            bool isCompute = shaders.Length == 1 && ((Shader)shaders[0]).StageFlags == ShaderStageFlags.ShaderStageComputeBit;
+
+            if (info.BackgroundCompile && (info.State.HasValue || isCompute) && VulkanConfiguration.UseDynamicState)
             {
                 return new ShaderCollection(this, _device, shaders, info.State.Value);
             }
diff --git a/Ryujinx/Ui/MainWindow.cs b/Ryujinx/Ui/MainWindow.cs
index 51297bb77..be7956437 100644
--- a/Ryujinx/Ui/MainWindow.cs
+++ b/Ryujinx/Ui/MainWindow.cs
@@ -1037,7 +1037,7 @@ namespace Ryujinx.Ui
             Graphics.Gpu.GraphicsConfig.ResScale          = (resScale == -1) ? resScaleCustom : resScale;
             Graphics.Gpu.GraphicsConfig.MaxAnisotropy     = ConfigurationState.Instance.Graphics.MaxAnisotropy;
             Graphics.Gpu.GraphicsConfig.ShadersDumpPath   = ConfigurationState.Instance.Graphics.ShadersDumpPath;
-            Graphics.Gpu.GraphicsConfig.EnableShaderCache = ConfigurationState.Instance.Graphics.EnableShaderCache && ConfigurationState.Instance.Graphics.GraphicsBackend != GraphicsBackend.Vulkan;
+            Graphics.Gpu.GraphicsConfig.EnableShaderCache = ConfigurationState.Instance.Graphics.EnableShaderCache;
         }
 
         public void SaveConfig()