diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
index b96b71be4..39331f965 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
@@ -641,106 +641,34 @@ namespace ChocolArm64.Instruction
 
         public static void Frecpe_S(AILEmitterCtx Context)
         {
-            EmitFrecpe(Context, 0, Scalar: true);
+            EmitScalarUnaryOpF(Context, () =>
+            {
+                EmitUnarySoftFloatCall(Context, nameof(ASoftFloat.RecipEstimate));
+            });
         }
 
         public static void Frecpe_V(AILEmitterCtx Context)
         {
-            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
-
-            int SizeF = Op.Size & 1;
-
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
-
-            for (int Index = 0; Index < Bytes >> SizeF + 2; Index++)
+            EmitVectorUnaryOpF(Context, () =>
             {
-                EmitFrecpe(Context, Index, Scalar: false);
-            }
-
-            if (Op.RegisterSize == ARegisterSize.SIMD64)
-            {
-                EmitVectorZeroUpper(Context, Op.Rd);
-            }
-        }
-
-        private static void EmitFrecpe(AILEmitterCtx Context, int Index, bool Scalar)
-        {
-            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
-
-            int SizeF = Op.Size & 1;
-
-            if (SizeF == 0)
-            {
-                Context.EmitLdc_R4(1);
-            }
-            else /* if (SizeF == 1) */
-            {
-                Context.EmitLdc_R8(1);
-            }
-
-            EmitVectorExtractF(Context, Op.Rn, Index, SizeF);
-
-            Context.Emit(OpCodes.Div);
-
-            if (Scalar)
-            {
-                EmitVectorZeroAll(Context, Op.Rd);
-            }
-
-            EmitVectorInsertF(Context, Op.Rd, Index, SizeF);
+                EmitUnarySoftFloatCall(Context, nameof(ASoftFloat.RecipEstimate));
+            });
         }
 
         public static void Frecps_S(AILEmitterCtx Context)
         {
-            EmitFrecps(Context, 0, Scalar: true);
+            EmitScalarBinaryOpF(Context, () =>
+            {
+                EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.RecipStep));
+            });
         }
 
         public static void Frecps_V(AILEmitterCtx Context)
         {
-            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
-
-            int SizeF = Op.Size & 1;
-
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
-
-            for (int Index = 0; Index < Bytes >> SizeF + 2; Index++)
+            EmitVectorBinaryOpF(Context, () =>
             {
-                EmitFrecps(Context, Index, Scalar: false);
-            }
-
-            if (Op.RegisterSize == ARegisterSize.SIMD64)
-            {
-                EmitVectorZeroUpper(Context, Op.Rd);
-            }
-        }
-
-        private static void EmitFrecps(AILEmitterCtx Context, int Index, bool Scalar)
-        {
-            AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
-
-            int SizeF = Op.Size & 1;
-
-            if (SizeF == 0)
-            {
-                Context.EmitLdc_R4(2);
-            }
-            else /* if (SizeF == 1) */
-            {
-                Context.EmitLdc_R8(2);
-            }
-
-            EmitVectorExtractF(Context, Op.Rn, Index, SizeF);
-            EmitVectorExtractF(Context, Op.Rm, Index, SizeF);
-
-            Context.Emit(OpCodes.Mul);
-            Context.Emit(OpCodes.Sub);
-
-            if (Scalar)
-            {
-                EmitVectorZeroAll(Context, Op.Rd);
-            }
-
-            EmitVectorInsertF(Context, Op.Rd, Index, SizeF);
+                EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.RecipStep));
+            });
         }
 
         public static void Frinta_S(AILEmitterCtx Context)
diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
index 0f6ea42ce..d895ec9c7 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
@@ -253,6 +253,26 @@ namespace ChocolArm64.Instruction
             Context.EmitCall(MthdInfo);
         }
 
+        public static void EmitBinarySoftFloatCall(AILEmitterCtx Context, string Name)
+        {
+            IAOpCodeSimd Op = (IAOpCodeSimd)Context.CurrOp;
+
+            int SizeF = Op.Size & 1;
+
+            MethodInfo MthdInfo;
+
+            if (SizeF == 0)
+            {
+                MthdInfo = typeof(ASoftFloat).GetMethod(Name, new Type[] { typeof(float), typeof(float) });
+            }
+            else /* if (SizeF == 1) */
+            {
+                MthdInfo = typeof(ASoftFloat).GetMethod(Name, new Type[] { typeof(double), typeof(double) });
+            }
+
+            Context.EmitCall(MthdInfo);
+        }
+
         public static void EmitScalarBinaryOpByElemF(AILEmitterCtx Context, Action Emit)
         {
             AOpCodeSimdRegElemF Op = (AOpCodeSimdRegElemF)Context.CurrOp;
diff --git a/ChocolArm64/Instruction/ASoftFloat.cs b/ChocolArm64/Instruction/ASoftFloat.cs
index 7bee69bae..e63c82bee 100644
--- a/ChocolArm64/Instruction/ASoftFloat.cs
+++ b/ChocolArm64/Instruction/ASoftFloat.cs
@@ -7,8 +7,10 @@ namespace ChocolArm64.Instruction
         static ASoftFloat()
         {
             InvSqrtEstimateTable = BuildInvSqrtEstimateTable();
+            RecipEstimateTable = BuildRecipEstimateTable();
         }
 
+        private static readonly byte[] RecipEstimateTable;
         private static readonly byte[] InvSqrtEstimateTable;
 
         private static byte[] BuildInvSqrtEstimateTable()
@@ -38,6 +40,22 @@ namespace ChocolArm64.Instruction
             return Table;
         }
 
+        private static byte[] BuildRecipEstimateTable()
+        {
+            byte[] Table = new byte[256];
+            for (ulong index = 0; index < 256; index++)
+            {
+                ulong a = index | 0x100;
+
+                a = (a << 1) + 1;
+                ulong b = 0x80000 / a;
+                b = (b + 1) >> 1;
+
+                Table[index] = (byte)(b & 0xFF);
+            }
+            return Table;
+        }
+
         public static float InvSqrtEstimate(float x)
         {
             return (float)InvSqrtEstimate((double)x);
@@ -50,14 +68,8 @@ namespace ChocolArm64.Instruction
             long x_exp = (long)((x_bits >> 52) & 0x7FF);
             ulong scaled = x_bits & ((1ul << 52) - 1);
 
-            if (x_exp == 0x7ff)
+            if (x_exp == 0x7FF && scaled != 0)
             {
-                if (scaled == 0)
-                {
-                    // Infinity -> Zero
-                    return BitConverter.Int64BitsToDouble((long)x_sign);
-                }
-
                 // NaN
                 return BitConverter.Int64BitsToDouble((long)(x_bits | 0x0008000000000000));
             }
@@ -79,6 +91,18 @@ namespace ChocolArm64.Instruction
                 scaled <<= 1;
             }
 
+            if (x_sign != 0)
+            {
+                // Negative -> NaN
+                return BitConverter.Int64BitsToDouble((long)0x7ff8000000000000);
+            }
+
+            if (x_exp == 0x7ff && scaled == 0)
+            {
+                // Infinity -> Zero
+                return BitConverter.Int64BitsToDouble((long)x_sign);
+            }
+
             if (((ulong)x_exp & 1) == 1)
             {
                 scaled >>= 45;
@@ -99,5 +123,107 @@ namespace ChocolArm64.Instruction
             ulong result = x_sign | (result_exp << 52) | fraction;
             return BitConverter.Int64BitsToDouble((long)result);
         }
+
+        public static float RecipEstimate(float x)
+        {
+            return (float)RecipEstimate((double)x);
+        }
+
+        public static double RecipEstimate(double x)
+        {
+            ulong x_bits = (ulong)BitConverter.DoubleToInt64Bits(x);
+            ulong x_sign = x_bits & 0x8000000000000000;
+            ulong x_exp = (x_bits >> 52) & 0x7FF;
+            ulong scaled = x_bits & ((1ul << 52) - 1);
+
+            if (x_exp >= 2045)
+            {
+                if (x_exp == 0x7ff && scaled != 0)
+                {
+                    // NaN
+                    return BitConverter.Int64BitsToDouble((long)(x_bits | 0x0008000000000000));
+                }
+
+                // Infinity, or Out of range -> Zero
+                return BitConverter.Int64BitsToDouble((long)x_sign);
+            }
+
+            if (x_exp == 0)
+            {
+                if (scaled == 0)
+                {
+                    // Zero -> Infinity
+                    return BitConverter.Int64BitsToDouble((long)(x_sign | 0x7ff0000000000000));
+                }
+
+                // Denormal
+                if ((scaled & (1ul << 51)) == 0)
+                {
+                    x_exp = ~0ul;
+                    scaled <<= 2;
+                }
+                else
+                {
+                    scaled <<= 1;
+                }
+            }
+
+            scaled >>= 44;
+            scaled &= 0xFF;
+
+            ulong result_exp = (2045 - x_exp) & 0x7FF;
+            ulong estimate = (ulong)RecipEstimateTable[scaled];
+            ulong fraction = estimate << 44;
+
+            if (result_exp == 0)
+            {
+                fraction >>= 1;
+                fraction |= 1ul << 51;
+            }
+            else if (result_exp == 0x7FF)
+            {
+                result_exp = 0;
+                fraction >>= 2;
+                fraction |= 1ul << 50;
+            }
+
+            ulong result = x_sign | (result_exp << 52) | fraction;
+            return BitConverter.Int64BitsToDouble((long)result);
+        }
+
+        public static float RecipStep(float op1, float op2)
+        {
+            return (float)RecipStep((double)op1, (double)op2);
+        }
+
+        public static double RecipStep(double op1, double op2)
+        {
+            op1 = -op1;
+
+            ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1);
+            ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2);
+
+            ulong op1_sign = op1_bits & 0x8000000000000000;
+            ulong op2_sign = op2_bits & 0x8000000000000000;
+            ulong op1_other = op1_bits & 0x7FFFFFFFFFFFFFFF;
+            ulong op2_other = op2_bits & 0x7FFFFFFFFFFFFFFF;
+
+            bool inf1 = op1_other == 0x7ff0000000000000;
+            bool inf2 = op2_other == 0x7ff0000000000000;
+            bool zero1 = op1_other == 0;
+            bool zero2 = op2_other == 0;
+
+            if ((inf1 && zero2) || (zero1 && inf2))
+            {
+                return 2.0;
+            }
+            else if (inf1 || inf2)
+            {
+                // Infinity
+                return BitConverter.Int64BitsToDouble((long)(0x7ff0000000000000 | (op1_sign ^ op2_sign)));
+            }
+
+            return 2.0 + op1 * op2;
+        }
     }
 }
\ No newline at end of file
diff --git a/ChocolArm64/Memory/AMemory.cs b/ChocolArm64/Memory/AMemory.cs
index c02bf172f..da5cf0074 100644
--- a/ChocolArm64/Memory/AMemory.cs
+++ b/ChocolArm64/Memory/AMemory.cs
@@ -33,19 +33,25 @@ namespace ChocolArm64.Memory
 
         private byte* RamPtr;
 
+        private int HostPageSize;
+
         public AMemory()
         {
             Manager = new AMemoryMgr();
 
             Monitors = new Dictionary<int, ArmMonitor>();
 
+            IntPtr Size = (IntPtr)AMemoryMgr.RamSize + AMemoryMgr.PageSize;
+
             if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
             {
-                Ram = AMemoryWin32.Allocate((IntPtr)AMemoryMgr.RamSize + AMemoryMgr.PageSize);
+                Ram = AMemoryWin32.Allocate(Size);
+
+                HostPageSize = AMemoryWin32.GetPageSize(Ram, Size);
             }
             else
             {
-                Ram = Marshal.AllocHGlobal((IntPtr)AMemoryMgr.RamSize + AMemoryMgr.PageSize);
+                Ram = Marshal.AllocHGlobal(Size);
             }
 
             RamPtr = (byte*)Ram;
@@ -149,49 +155,53 @@ namespace ChocolArm64.Memory
             }
         }
 
-        public long GetHostPageSize()
+        public int GetHostPageSize()
         {
-            if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
-            {
-                return AMemoryMgr.PageSize;
-            }
-
-            IntPtr MemAddress = new IntPtr(RamPtr);
-            IntPtr MemSize    = new IntPtr(AMemoryMgr.RamSize);
-
-            long PageSize = AMemoryWin32.IsRegionModified(MemAddress, MemSize, Reset: false);
-
-            if (PageSize < 1)
-            {
-                throw new InvalidOperationException();
-            }
-
-            return PageSize;
+            return HostPageSize;
         }
 
-        public bool IsRegionModified(long Position, long Size)
+        public bool[] IsRegionModified(long Position, long Size)
         {
             if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
             {
-                return true;
+                return null;
             }
 
             long EndPos = Position + Size;
 
             if ((ulong)EndPos < (ulong)Position)
             {
-                return false;
+                return null;
             }
 
             if ((ulong)EndPos > AMemoryMgr.RamSize)
             {
-                return false;
+                return null;
             }
 
             IntPtr MemAddress = new IntPtr(RamPtr + Position);
             IntPtr MemSize    = new IntPtr(Size);
 
-            return AMemoryWin32.IsRegionModified(MemAddress, MemSize, Reset: true) != 0;
+            int HostPageMask = HostPageSize - 1;
+
+            Position &= ~HostPageMask;
+
+            Size = EndPos - Position;
+
+            IntPtr[] Addresses  = new IntPtr[(Size + HostPageMask) / HostPageSize];
+
+            AMemoryWin32.IsRegionModified(MemAddress, MemSize, Addresses, out int Count);
+
+            bool[] Modified = new bool[Addresses.Length];
+
+            for (int Index = 0; Index < Count; Index++)
+            {
+                long VA = Addresses[Index].ToInt64() - Ram.ToInt64();
+
+                Modified[(VA - Position) / HostPageSize] = true;
+            }
+
+            return Modified;
         }
 
         public sbyte ReadSByte(long Position)
diff --git a/ChocolArm64/Memory/AMemoryWin32.cs b/ChocolArm64/Memory/AMemoryWin32.cs
index d097dc871..387ca32c2 100644
--- a/ChocolArm64/Memory/AMemoryWin32.cs
+++ b/ChocolArm64/Memory/AMemoryWin32.cs
@@ -49,7 +49,7 @@ namespace ChocolArm64.Memory
             VirtualFree(Address, IntPtr.Zero, MEM_RELEASE);
         }
 
-        public unsafe static long IsRegionModified(IntPtr Address, IntPtr Size, bool Reset)
+        public unsafe static int GetPageSize(IntPtr Address, IntPtr Size)
         {
             IntPtr[] Addresses = new IntPtr[1];
 
@@ -57,17 +57,36 @@ namespace ChocolArm64.Memory
 
             long Granularity;
 
-            int Flags = Reset ? WRITE_WATCH_FLAG_RESET : 0;
-
             GetWriteWatch(
-                Flags,
+                0,
                 Address,
                 Size,
                 Addresses,
                 &Count,
                 &Granularity);
 
-            return Count != 0 ? Granularity : 0;
+            return (int)Granularity;
+        }
+
+        public unsafe static void IsRegionModified(
+            IntPtr   Address,
+            IntPtr   Size,
+            IntPtr[] Addresses,
+            out int  AddrCount)
+        {
+            long Count = Addresses.Length;
+
+            long Granularity;
+
+            GetWriteWatch(
+                WRITE_WATCH_FLAG_RESET,
+                Address,
+                Size,
+                Addresses,
+                &Count,
+                &Granularity);
+
+            AddrCount = (int)Count;
         }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics/Gal/IGalRasterizer.cs b/Ryujinx.Graphics/Gal/IGalRasterizer.cs
index 586eae6ba..2598efb61 100644
--- a/Ryujinx.Graphics/Gal/IGalRasterizer.cs
+++ b/Ryujinx.Graphics/Gal/IGalRasterizer.cs
@@ -36,6 +36,12 @@ namespace Ryujinx.Graphics.Gal
 
         void SetClearStencil(int Stencil);
 
+        void EnablePrimitiveRestart();
+
+        void DisablePrimitiveRestart();
+
+        void SetPrimitiveRestartIndex(uint Index);
+
         void CreateVbo(long Key, byte[] Buffer);
 
         void CreateIbo(long Key, byte[] Buffer);
diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs
index b98857117..a4ec7f87c 100644
--- a/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs
+++ b/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs
@@ -184,6 +184,21 @@ namespace Ryujinx.Graphics.Gal.OpenGL
             GL.ClearStencil(Stencil);
         }
 
+        public void EnablePrimitiveRestart()
+        {
+            GL.Enable(EnableCap.PrimitiveRestart);
+        }
+
+        public void DisablePrimitiveRestart()
+        {
+            GL.Disable(EnableCap.PrimitiveRestart);
+        }
+
+        public void SetPrimitiveRestartIndex(uint Index)
+        {
+            GL.PrimitiveRestartIndex(Index);
+        }
+
         public void CreateVbo(long Key, byte[] Buffer)
         {
             int Handle = GL.GenBuffer();
diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs
index d46f5089a..b9f9cc497 100644
--- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs
+++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs
@@ -85,6 +85,7 @@ namespace Ryujinx.HLE.Gpu.Engines
             SetDepth();
             SetStencil();
             SetAlphaBlending();
+            SetPrimitiveRestart();
 
             UploadTextures(Vmm, Keys);
             UploadUniforms(Vmm);
@@ -389,6 +390,29 @@ namespace Ryujinx.HLE.Gpu.Engines
             }
         }
 
+        private void SetPrimitiveRestart()
+        {
+            bool Enable = (ReadRegister(NvGpuEngine3dReg.PrimRestartEnable) & 1) != 0;
+
+            if (Enable)
+            {
+                Gpu.Renderer.Rasterizer.EnablePrimitiveRestart();
+            }
+            else
+            {
+                Gpu.Renderer.Rasterizer.DisablePrimitiveRestart();
+            }
+
+            if (!Enable)
+            {
+                return;
+            }
+
+            uint Index = (uint)ReadRegister(NvGpuEngine3dReg.PrimRestartIndex);
+
+            Gpu.Renderer.Rasterizer.SetPrimitiveRestartIndex(Index);
+        }
+
         private void UploadTextures(NvGpuVmm Vmm, long[] Keys)
         {
             long BaseShPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress);
@@ -440,19 +464,17 @@ namespace Ryujinx.HLE.Gpu.Engines
 
             GalTextureSampler Sampler = TextureFactory.MakeSampler(Gpu, Vmm, TscPosition);
 
-            long TextureAddress = Vmm.ReadInt64(TicPosition + 4) & 0xffffffffffff;
+            long Key = Vmm.ReadInt64(TicPosition + 4) & 0xffffffffffff;
 
-            long Key = TextureAddress;
+            Key = Vmm.GetPhysicalAddress(Key);
 
-            TextureAddress = Vmm.GetPhysicalAddress(TextureAddress);
-
-            if (IsFrameBufferPosition(TextureAddress))
+            if (IsFrameBufferPosition(Key))
             {
                 //This texture is a frame buffer texture,
                 //we shouldn't read anything from memory and bind
                 //the frame buffer texture instead, since we're not
                 //really writing anything to memory.
-                Gpu.Renderer.FrameBuffer.BindTexture(TextureAddress, TexIndex);
+                Gpu.Renderer.FrameBuffer.BindTexture(Key, TexIndex);
             }
             else
             {
@@ -520,6 +542,8 @@ namespace Ryujinx.HLE.Gpu.Engines
         {
             long IndexPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.IndexArrayAddress);
 
+            long IboKey = Vmm.GetPhysicalAddress(IndexPosition);
+
             int IndexEntryFmt = ReadRegister(NvGpuEngine3dReg.IndexArrayFormat);
             int IndexFirst    = ReadRegister(NvGpuEngine3dReg.IndexBatchFirst);
             int IndexCount    = ReadRegister(NvGpuEngine3dReg.IndexBatchCount);
@@ -537,16 +561,16 @@ namespace Ryujinx.HLE.Gpu.Engines
             {
                 int IbSize = IndexCount * IndexEntrySize;
 
-                bool IboCached = Gpu.Renderer.Rasterizer.IsIboCached(IndexPosition, (uint)IbSize);
+                bool IboCached = Gpu.Renderer.Rasterizer.IsIboCached(IboKey, (uint)IbSize);
 
-                if (!IboCached || Vmm.IsRegionModified(IndexPosition, (uint)IbSize, NvGpuBufferType.Index))
+                if (!IboCached || Vmm.IsRegionModified(IboKey, (uint)IbSize, NvGpuBufferType.Index))
                 {
                     byte[] Data = Vmm.ReadBytes(IndexPosition, (uint)IbSize);
 
-                    Gpu.Renderer.Rasterizer.CreateIbo(IndexPosition, Data);
+                    Gpu.Renderer.Rasterizer.CreateIbo(IboKey, Data);
                 }
 
-                Gpu.Renderer.Rasterizer.SetIndexArray(IndexPosition, IbSize, IndexFormat);
+                Gpu.Renderer.Rasterizer.SetIndexArray(IboKey, IbSize, IndexFormat);
             }
 
             List<GalVertexAttrib>[] Attribs = new List<GalVertexAttrib>[32];
@@ -595,20 +619,22 @@ namespace Ryujinx.HLE.Gpu.Engines
                     continue;
                 }
 
+                long VboKey = Vmm.GetPhysicalAddress(VertexPosition);
+
                 int Stride = Control & 0xfff;
 
                 long VbSize = (VertexEndPos - VertexPosition) + 1;
 
-                bool VboCached = Gpu.Renderer.Rasterizer.IsVboCached(VertexPosition, VbSize);
+                bool VboCached = Gpu.Renderer.Rasterizer.IsVboCached(VboKey, VbSize);
 
-                if (!VboCached || Vmm.IsRegionModified(VertexPosition, VbSize, NvGpuBufferType.Vertex))
+                if (!VboCached || Vmm.IsRegionModified(VboKey, VbSize, NvGpuBufferType.Vertex))
                 {
                     byte[] Data = Vmm.ReadBytes(VertexPosition, VbSize);
 
-                    Gpu.Renderer.Rasterizer.CreateVbo(VertexPosition, Data);
+                    Gpu.Renderer.Rasterizer.CreateVbo(VboKey, Data);
                 }
 
-                Gpu.Renderer.Rasterizer.SetVertexArray(Index, Stride, VertexPosition, Attribs[Index].ToArray());
+                Gpu.Renderer.Rasterizer.SetVertexArray(Index, Stride, VboKey, Attribs[Index].ToArray());
             }
 
             GalPrimitiveType PrimType = (GalPrimitiveType)(PrimCtrl & 0xffff);
@@ -617,7 +643,7 @@ namespace Ryujinx.HLE.Gpu.Engines
             {
                 int VertexBase = ReadRegister(NvGpuEngine3dReg.VertexArrayElemBase);
 
-                Gpu.Renderer.Rasterizer.DrawElements(IndexPosition, IndexFirst, VertexBase, PrimType);
+                Gpu.Renderer.Rasterizer.DrawElements(IboKey, IndexFirst, VertexBase, PrimType);
             }
             else
             {
diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs
index 9eb2966d9..3de2885ef 100644
--- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs
+++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs
@@ -50,6 +50,8 @@ namespace Ryujinx.HLE.Gpu.Engines
         StencilBackFuncFunc  = 0x569,
         ShaderAddress        = 0x582,
         VertexBeginGl        = 0x586,
+        PrimRestartEnable    = 0x591,
+        PrimRestartIndex     = 0x592,
         IndexArrayAddress    = 0x5f2,
         IndexArrayEndAddr    = 0x5f4,
         IndexArrayFormat     = 0x5f6,
diff --git a/Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs b/Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs
index 7474aa33f..469cd6cd0 100644
--- a/Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs
+++ b/Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs
@@ -4,6 +4,7 @@ namespace Ryujinx.HLE.Gpu.Memory
     {
         Index,
         Vertex,
-        Texture
+        Texture,
+        Count
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs b/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs
index 36f6406a1..0c81dd150 100644
--- a/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs
+++ b/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs
@@ -274,11 +274,9 @@ namespace Ryujinx.HLE.Gpu.Memory
             PageTable[L0][L1] = TgtAddr;
         }
 
-        public bool IsRegionModified(long Position, long Size, NvGpuBufferType BufferType)
+        public bool IsRegionModified(long PA, long Size, NvGpuBufferType BufferType)
         {
-            long PA = GetPhysicalAddress(Position);
-
-            return Cache.IsRegionModified(Memory, BufferType, Position, PA, Size);
+            return Cache.IsRegionModified(Memory, BufferType, PA, Size);
         }
 
         public byte ReadByte(long Position)
diff --git a/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs b/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs
index c7108f00c..ac9bd850e 100644
--- a/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs
+++ b/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs
@@ -11,43 +11,53 @@ namespace Ryujinx.HLE.Gpu.Memory
 
         private class CachedPage
         {
-            private List<(long Start, long End)> Regions;
+            private struct Range
+            {
+                public long Start;
+                public long End;
+
+                public Range(long Start, long End)
+                {
+                    this.Start = Start;
+                    this.End   = End;
+                }
+            }
+
+            private List<Range>[] Regions;
 
             public LinkedListNode<long> Node { get; set; }
 
-            public int Count => Regions.Count;
-
             public int Timestamp { get; private set; }
 
-            public long PABase { get; private set; }
-
-            public NvGpuBufferType BufferType { get; private set; }
-
-            public CachedPage(long PABase, NvGpuBufferType BufferType)
+            public CachedPage()
             {
-                this.PABase     = PABase;
-                this.BufferType = BufferType;
+                Regions = new List<Range>[(int)NvGpuBufferType.Count];
 
-                Regions = new List<(long, long)>();
+                for (int Index = 0; Index < Regions.Length; Index++)
+                {
+                    Regions[Index] = new List<Range>();
+                }
             }
 
-            public bool AddRange(long Start, long End)
+            public bool AddRange(long Start, long End, NvGpuBufferType BufferType)
             {
-                for (int Index = 0; Index < Regions.Count; Index++)
-                {
-                    (long RgStart, long RgEnd) = Regions[Index];
+                List<Range> BtRegions = Regions[(int)BufferType];
 
-                    if (Start >= RgStart && End <= RgEnd)
+                for (int Index = 0; Index < BtRegions.Count; Index++)
+                {
+                    Range Rg = BtRegions[Index];
+
+                    if (Start >= Rg.Start && End <= Rg.End)
                     {
                         return false;
                     }
 
-                    if (Start <= RgEnd && RgStart <= End)
+                    if (Start <= Rg.End && Rg.Start <= End)
                     {
-                        long MinStart = Math.Min(RgStart, Start);
-                        long MaxEnd   = Math.Max(RgEnd,   End);
+                        long MinStart = Math.Min(Rg.Start, Start);
+                        long MaxEnd   = Math.Max(Rg.End,   End);
 
-                        Regions[Index] = (MinStart, MaxEnd);
+                        BtRegions[Index] = new Range(MinStart, MaxEnd);
 
                         Timestamp = Environment.TickCount;
 
@@ -55,12 +65,24 @@ namespace Ryujinx.HLE.Gpu.Memory
                     }
                 }
 
-                Regions.Add((Start, End));
+                BtRegions.Add(new Range(Start, End));
 
                 Timestamp = Environment.TickCount;
 
                 return true;
             }
+
+            public int GetTotalCount()
+            {
+                int Count = 0;
+
+                for (int Index = 0; Index < Regions.Length; Index++)
+                {
+                    Count += Regions[Index].Count;
+                }
+
+                return Count;
+            }
         }
 
         private Dictionary<long, CachedPage> Cache;
@@ -76,71 +98,61 @@ namespace Ryujinx.HLE.Gpu.Memory
             SortedCache = new LinkedList<long>();
         }
 
-        public bool IsRegionModified(
-            AMemory         Memory,
-            NvGpuBufferType BufferType,
-            long            VA,
-            long            PA,
-            long            Size)
+        public bool IsRegionModified(AMemory Memory, NvGpuBufferType BufferType, long PA, long Size)
         {
+            bool[] Modified = Memory.IsRegionModified(PA, Size);
+
+            if (Modified == null)
+            {
+                return true;
+            }
+
             ClearCachedPagesIfNeeded();
 
             long PageSize = Memory.GetHostPageSize();
 
             long Mask = PageSize - 1;
 
-            long VAEnd = VA + Size;
             long PAEnd = PA + Size;
 
             bool RegMod = false;
 
-            while (VA < VAEnd)
-            {
-                long Key    = VA & ~Mask;
-                long PABase = PA & ~Mask;
+            int Index = 0;
+
+            while (PA < PAEnd)
+            {
+                long Key = PA & ~Mask;
 
-                long VAPgEnd = Math.Min((VA + PageSize) & ~Mask, VAEnd);
                 long PAPgEnd = Math.Min((PA + PageSize) & ~Mask, PAEnd);
 
                 bool IsCached = Cache.TryGetValue(Key, out CachedPage Cp);
 
-                bool PgReset = false;
-
-                if (!IsCached)
+                if (IsCached)
                 {
-                    Cp = new CachedPage(PABase, BufferType);
+                    CpCount -= Cp.GetTotalCount();
 
-                    Cache.Add(Key, Cp);
+                    SortedCache.Remove(Cp.Node);
                 }
                 else
                 {
-                    CpCount -= Cp.Count;
+                    Cp = new CachedPage();
 
-                    SortedCache.Remove(Cp.Node);
-
-                    if (Cp.PABase     != PABase ||
-                        Cp.BufferType != BufferType)
-                    {
-                        PgReset = true;
-                    }
+                    Cache.Add(Key, Cp);
                 }
 
-                PgReset |= Memory.IsRegionModified(PA, PAPgEnd - PA) && IsCached;
-
-                if (PgReset)
+                if (Modified[Index++] && IsCached)
                 {
-                    Cp = new CachedPage(PABase, BufferType);
+                    Cp = new CachedPage();
 
                     Cache[Key] = Cp;
                 }
 
                 Cp.Node = SortedCache.AddLast(Key);
 
-                RegMod |= Cp.AddRange(VA, VAPgEnd);
+                RegMod |= Cp.AddRange(PA, PAPgEnd, BufferType);
 
-                CpCount += Cp.Count;
+                CpCount += Cp.GetTotalCount();
 
-                VA = VAPgEnd;
                 PA = PAPgEnd;
             }
 
@@ -169,7 +181,7 @@ namespace Ryujinx.HLE.Gpu.Memory
 
                 Cache.Remove(Key);
 
-                CpCount -= Cp.Count;
+                CpCount -= Cp.GetTotalCount();
 
                 TimeDelta = RingDelta(Cp.Timestamp, Timestamp);
             }
diff --git a/Ryujinx.HLE/OsHle/Services/Aud/AudErr.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudErr.cs
new file mode 100644
index 000000000..fa201d8cd
--- /dev/null
+++ b/Ryujinx.HLE/OsHle/Services/Aud/AudErr.cs
@@ -0,0 +1,8 @@
+namespace Ryujinx.HLE.OsHle.Services.Aud
+{
+    static class AudErr
+    {
+        public const int DeviceNotFound        = 1;
+        public const int UnsupportedSampleRate = 3;
+    }
+}
\ No newline at end of file
diff --git a/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs b/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs
index 18aedb32c..54ffa6d90 100644
--- a/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs
+++ b/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs
@@ -6,6 +6,8 @@ using Ryujinx.HLE.OsHle.Ipc;
 using System.Collections.Generic;
 using System.Text;
 
+using static Ryujinx.HLE.OsHle.ErrorCode;
+
 namespace Ryujinx.HLE.OsHle.Services.Aud
 {
     class IAudioOutManager : IpcService
@@ -28,36 +30,44 @@ namespace Ryujinx.HLE.OsHle.Services.Aud
         }
 
         public long ListAudioOuts(ServiceCtx Context)
-        {   
-            ListAudioOutsMethod(Context, Context.Request.ReceiveBuff[0].Position, Context.Request.ReceiveBuff[0].Size);
-
-            return 0;
+        {
+            return ListAudioOutsImpl(
+                Context,
+                Context.Request.ReceiveBuff[0].Position,
+                Context.Request.ReceiveBuff[0].Size);
         }
 
         public long OpenAudioOut(ServiceCtx Context)
         {
-            OpenAudioOutMethod(Context, Context.Request.SendBuff[0].Position, Context.Request.SendBuff[0].Size,
-                Context.Request.ReceiveBuff[0].Position, Context.Request.ReceiveBuff[0].Size);
-
-            return 0;
+            return OpenAudioOutImpl(
+                Context,
+                Context.Request.SendBuff[0].Position,
+                Context.Request.SendBuff[0].Size,
+                Context.Request.ReceiveBuff[0].Position,
+                Context.Request.ReceiveBuff[0].Size);
         }
-        
+
         public long ListAudioOutsAuto(ServiceCtx Context)
-        { 
-            ListAudioOutsMethod(Context, Context.Request.GetBufferType0x22().Position, Context.Request.GetBufferType0x22().Size);
+        {
+            (long RecvPosition, long RecvSize) = Context.Request.GetBufferType0x22();
 
-            return 0;
+            return ListAudioOutsImpl(Context, RecvPosition, RecvSize);
         }
-		
+
         public long OpenAudioOutAuto(ServiceCtx Context)
         {
-            OpenAudioOutMethod(Context, Context.Request.GetBufferType0x21().Position, Context.Request.GetBufferType0x21().Size,
-                Context.Request.GetBufferType0x22().Position, Context.Request.GetBufferType0x22().Size);
+            (long SendPosition, long SendSize) = Context.Request.GetBufferType0x21();
+            (long RecvPosition, long RecvSize) = Context.Request.GetBufferType0x22();
 
-            return 0;
+            return OpenAudioOutImpl(
+                Context,
+                SendPosition,
+                SendSize,
+                RecvPosition,
+                RecvSize);
         }
-        
-        public void ListAudioOutsMethod(ServiceCtx Context, long Position, long Size)
+
+        private long ListAudioOutsImpl(ServiceCtx Context, long Position, long Size)
         {
             int NameCount = 0;
 
@@ -75,23 +85,29 @@ namespace Ryujinx.HLE.OsHle.Services.Aud
             }
 
             Context.ResponseData.Write(NameCount);
+
+            return 0;
         }
-        
-        public void OpenAudioOutMethod(ServiceCtx Context, long SendPosition, long SendSize, long ReceivePosition, long ReceiveSize)
+
+        private long OpenAudioOutImpl(ServiceCtx Context, long SendPosition, long SendSize, long ReceivePosition, long ReceiveSize)
         {
-            IAalOutput AudioOut = Context.Ns.AudioOut;
-                
             string DeviceName = AMemoryHelper.ReadAsciiString(
                 Context.Memory,
                 SendPosition,
-                SendSize
-            );
-            
+                SendSize);
+
             if (DeviceName == string.Empty)
             {
                 DeviceName = DefaultAudioOutput;
             }
 
+            if (DeviceName != DefaultAudioOutput)
+            {
+                Context.Ns.Log.PrintWarning(LogClass.Audio, "Invalid device name!");
+
+                return MakeError(ErrorModule.Audio, AudErr.DeviceNotFound);
+            }
+
             byte[] DeviceNameBuffer = Encoding.ASCII.GetBytes(DeviceName + "\0");
 
             if ((ulong)DeviceNameBuffer.Length <= (ulong)ReceiveSize)
@@ -101,19 +117,21 @@ namespace Ryujinx.HLE.OsHle.Services.Aud
             else
             {
                 Context.Ns.Log.PrintError(LogClass.ServiceAudio, $"Output buffer size {ReceiveSize} too small!");
-            }       
+            }
 
             int SampleRate = Context.RequestData.ReadInt32();
             int Channels   = Context.RequestData.ReadInt32();
 
-            Channels = (ushort)(Channels >> 16);
-
-            if (SampleRate == 0)
+            if (SampleRate != 48000)
             {
-                SampleRate = 48000;
+                Context.Ns.Log.PrintWarning(LogClass.Audio, "Invalid sample rate!");
+
+                return MakeError(ErrorModule.Audio, AudErr.UnsupportedSampleRate);
             }
 
-            if (Channels < 1 || Channels > 2)
+            Channels = (ushort)Channels;
+
+            if (Channels == 0)
             {
                 Channels = 2;
             }
@@ -125,7 +143,9 @@ namespace Ryujinx.HLE.OsHle.Services.Aud
                 ReleaseEvent.WaitEvent.Set();
             };
 
-            int Track = AudioOut.OpenTrack(SampleRate, Channels, Callback, out AudioFormat Format);
+            IAalOutput AudioOut = Context.Ns.AudioOut;
+
+            int Track = AudioOut.OpenTrack(SampleRate, 2, Callback, out AudioFormat Format);
 
             MakeObject(Context, new IAudioOut(AudioOut, ReleaseEvent, Track));
 
@@ -133,6 +153,8 @@ namespace Ryujinx.HLE.OsHle.Services.Aud
             Context.ResponseData.Write(Channels);
             Context.ResponseData.Write((int)Format);
             Context.ResponseData.Write((int)PlaybackState.Stopped);
+
+            return 0;
         }
     }
 }
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs
index 98be2fc5b..2a0f5ed91 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs
@@ -163,26 +163,18 @@ namespace Ryujinx.Tests.Cpu
             Assert.That(Sse41.Extract(ThreadState.V6, (byte)0), Is.EqualTo(A * B));
         }
 
-        [Test, Description("FRECPE D0, D1")]
-        public void Frecpe_S([Random(100)] double A)
+        [TestCase(0x00000000u, 0x7F800000u)]
+        [TestCase(0x80000000u, 0xFF800000u)]
+        [TestCase(0x00FFF000u, 0x7E000000u)]
+        [TestCase(0x41200000u, 0x3DCC8000u)]
+        [TestCase(0xC1200000u, 0xBDCC8000u)]
+        [TestCase(0x001FFFFFu, 0x7F800000u)]
+        [TestCase(0x007FF000u, 0x7E800000u)]
+        public void Frecpe_S(uint A, uint Result)
         {
-            AThreadState ThreadState = SingleOpcode(0x5EE1D820, V1: MakeVectorE0(A));
-
-            Assert.That(VectorExtractDouble(ThreadState.V0, 0), Is.EqualTo(1 / A));
-        }
-
-        [Test, Description("FRECPE V2.4S, V0.4S")]
-        public void Frecpe_V([Random(100)] float A)
-        {
-            AThreadState ThreadState = SingleOpcode(0x4EA1D802, V0: Sse.SetAllVector128(A));
-
-            Assert.Multiple(() =>
-            {
-                Assert.That(Sse41.Extract(ThreadState.V2, (byte)0), Is.EqualTo(1 / A));
-                Assert.That(Sse41.Extract(ThreadState.V2, (byte)1), Is.EqualTo(1 / A));
-                Assert.That(Sse41.Extract(ThreadState.V2, (byte)2), Is.EqualTo(1 / A));
-                Assert.That(Sse41.Extract(ThreadState.V2, (byte)3), Is.EqualTo(1 / A));
-            });
+            Vector128<float> V1 = MakeVectorE0(A);
+            AThreadState ThreadState = SingleOpcode(0x5EA1D820, V1: V1);
+            Assert.AreEqual(Result, GetVectorE0(ThreadState.V0));
         }
 
         [Test, Description("FRECPS D0, D1, D2")]
@@ -202,12 +194,13 @@ namespace Ryujinx.Tests.Cpu
                 V2: Sse.SetAllVector128(A),
                 V0: Sse.SetAllVector128(B));
 
+            float Result = (float)(2 - ((double)A * (double)B));
             Assert.Multiple(() =>
             {
-                Assert.That(Sse41.Extract(ThreadState.V4, (byte)0), Is.EqualTo(2 - (A * B)));
-                Assert.That(Sse41.Extract(ThreadState.V4, (byte)1), Is.EqualTo(2 - (A * B)));
-                Assert.That(Sse41.Extract(ThreadState.V4, (byte)2), Is.EqualTo(2 - (A * B)));
-                Assert.That(Sse41.Extract(ThreadState.V4, (byte)3), Is.EqualTo(2 - (A * B)));
+                Assert.That(Sse41.Extract(ThreadState.V4, (byte)0), Is.EqualTo(Result));
+                Assert.That(Sse41.Extract(ThreadState.V4, (byte)1), Is.EqualTo(Result));
+                Assert.That(Sse41.Extract(ThreadState.V4, (byte)2), Is.EqualTo(Result));
+                Assert.That(Sse41.Extract(ThreadState.V4, (byte)3), Is.EqualTo(Result));
             });
         }