From af1516a1466de474c7f8fb5f564219b9323e1c26 Mon Sep 17 00:00:00 2001 From: Merry Date: Sun, 8 Jul 2018 16:41:46 +0100 Subject: [PATCH 1/5] ASoftFloat: Fix InvSqrtEstimate for negative values (#233) --- ChocolArm64/Instruction/ASoftFloat.cs | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/ChocolArm64/Instruction/ASoftFloat.cs b/ChocolArm64/Instruction/ASoftFloat.cs index 7bee69bae..1bd716658 100644 --- a/ChocolArm64/Instruction/ASoftFloat.cs +++ b/ChocolArm64/Instruction/ASoftFloat.cs @@ -50,14 +50,8 @@ namespace ChocolArm64.Instruction long x_exp = (long)((x_bits >> 52) & 0x7FF); ulong scaled = x_bits & ((1ul << 52) - 1); - if (x_exp == 0x7ff) + if (x_exp == 0x7FF && scaled != 0) { - if (scaled == 0) - { - // Infinity -> Zero - return BitConverter.Int64BitsToDouble((long)x_sign); - } - // NaN return BitConverter.Int64BitsToDouble((long)(x_bits | 0x0008000000000000)); } @@ -79,6 +73,18 @@ namespace ChocolArm64.Instruction scaled <<= 1; } + if (x_sign != 0) + { + // Negative -> NaN + return BitConverter.Int64BitsToDouble((long)0x7ff8000000000000); + } + + if (x_exp == 0x7ff && scaled == 0) + { + // Infinity -> Zero + return BitConverter.Int64BitsToDouble((long)x_sign); + } + if (((ulong)x_exp & 1) == 1) { scaled >>= 45; From dc04b5465fe57231e88411c4771f2d63c9b342c1 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sun, 8 Jul 2018 12:42:10 -0300 Subject: [PATCH 2/5] Improvements to IAudioOutManager (#232) * Improvements to IAudioOutManager * Make implementation private --- Ryujinx.HLE/OsHle/Services/Aud/AudErr.cs | 8 ++ .../OsHle/Services/Aud/IAudioOutManager.cs | 86 ++++++++++++------- 2 files changed, 62 insertions(+), 32 deletions(-) create mode 100644 Ryujinx.HLE/OsHle/Services/Aud/AudErr.cs diff --git a/Ryujinx.HLE/OsHle/Services/Aud/AudErr.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudErr.cs new file mode 100644 index 000000000..fa201d8cd --- /dev/null +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudErr.cs @@ -0,0 +1,8 @@ +namespace Ryujinx.HLE.OsHle.Services.Aud +{ + static class AudErr + { + public const int DeviceNotFound = 1; + public const int UnsupportedSampleRate = 3; + } +} \ No newline at end of file diff --git a/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs b/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs index 18aedb32c..54ffa6d90 100644 --- a/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs +++ b/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs @@ -6,6 +6,8 @@ using Ryujinx.HLE.OsHle.Ipc; using System.Collections.Generic; using System.Text; +using static Ryujinx.HLE.OsHle.ErrorCode; + namespace Ryujinx.HLE.OsHle.Services.Aud { class IAudioOutManager : IpcService @@ -28,36 +30,44 @@ namespace Ryujinx.HLE.OsHle.Services.Aud } public long ListAudioOuts(ServiceCtx Context) - { - ListAudioOutsMethod(Context, Context.Request.ReceiveBuff[0].Position, Context.Request.ReceiveBuff[0].Size); - - return 0; + { + return ListAudioOutsImpl( + Context, + Context.Request.ReceiveBuff[0].Position, + Context.Request.ReceiveBuff[0].Size); } public long OpenAudioOut(ServiceCtx Context) { - OpenAudioOutMethod(Context, Context.Request.SendBuff[0].Position, Context.Request.SendBuff[0].Size, - Context.Request.ReceiveBuff[0].Position, Context.Request.ReceiveBuff[0].Size); - - return 0; + return OpenAudioOutImpl( + Context, + Context.Request.SendBuff[0].Position, + Context.Request.SendBuff[0].Size, + Context.Request.ReceiveBuff[0].Position, + Context.Request.ReceiveBuff[0].Size); } - + public long ListAudioOutsAuto(ServiceCtx Context) - { - ListAudioOutsMethod(Context, Context.Request.GetBufferType0x22().Position, Context.Request.GetBufferType0x22().Size); + { + (long RecvPosition, long RecvSize) = Context.Request.GetBufferType0x22(); - return 0; + return ListAudioOutsImpl(Context, RecvPosition, RecvSize); } - + public long OpenAudioOutAuto(ServiceCtx Context) { - OpenAudioOutMethod(Context, Context.Request.GetBufferType0x21().Position, Context.Request.GetBufferType0x21().Size, - Context.Request.GetBufferType0x22().Position, Context.Request.GetBufferType0x22().Size); + (long SendPosition, long SendSize) = Context.Request.GetBufferType0x21(); + (long RecvPosition, long RecvSize) = Context.Request.GetBufferType0x22(); - return 0; + return OpenAudioOutImpl( + Context, + SendPosition, + SendSize, + RecvPosition, + RecvSize); } - - public void ListAudioOutsMethod(ServiceCtx Context, long Position, long Size) + + private long ListAudioOutsImpl(ServiceCtx Context, long Position, long Size) { int NameCount = 0; @@ -75,23 +85,29 @@ namespace Ryujinx.HLE.OsHle.Services.Aud } Context.ResponseData.Write(NameCount); + + return 0; } - - public void OpenAudioOutMethod(ServiceCtx Context, long SendPosition, long SendSize, long ReceivePosition, long ReceiveSize) + + private long OpenAudioOutImpl(ServiceCtx Context, long SendPosition, long SendSize, long ReceivePosition, long ReceiveSize) { - IAalOutput AudioOut = Context.Ns.AudioOut; - string DeviceName = AMemoryHelper.ReadAsciiString( Context.Memory, SendPosition, - SendSize - ); - + SendSize); + if (DeviceName == string.Empty) { DeviceName = DefaultAudioOutput; } + if (DeviceName != DefaultAudioOutput) + { + Context.Ns.Log.PrintWarning(LogClass.Audio, "Invalid device name!"); + + return MakeError(ErrorModule.Audio, AudErr.DeviceNotFound); + } + byte[] DeviceNameBuffer = Encoding.ASCII.GetBytes(DeviceName + "\0"); if ((ulong)DeviceNameBuffer.Length <= (ulong)ReceiveSize) @@ -101,19 +117,21 @@ namespace Ryujinx.HLE.OsHle.Services.Aud else { Context.Ns.Log.PrintError(LogClass.ServiceAudio, $"Output buffer size {ReceiveSize} too small!"); - } + } int SampleRate = Context.RequestData.ReadInt32(); int Channels = Context.RequestData.ReadInt32(); - Channels = (ushort)(Channels >> 16); - - if (SampleRate == 0) + if (SampleRate != 48000) { - SampleRate = 48000; + Context.Ns.Log.PrintWarning(LogClass.Audio, "Invalid sample rate!"); + + return MakeError(ErrorModule.Audio, AudErr.UnsupportedSampleRate); } - if (Channels < 1 || Channels > 2) + Channels = (ushort)Channels; + + if (Channels == 0) { Channels = 2; } @@ -125,7 +143,9 @@ namespace Ryujinx.HLE.OsHle.Services.Aud ReleaseEvent.WaitEvent.Set(); }; - int Track = AudioOut.OpenTrack(SampleRate, Channels, Callback, out AudioFormat Format); + IAalOutput AudioOut = Context.Ns.AudioOut; + + int Track = AudioOut.OpenTrack(SampleRate, 2, Callback, out AudioFormat Format); MakeObject(Context, new IAudioOut(AudioOut, ReleaseEvent, Track)); @@ -133,6 +153,8 @@ namespace Ryujinx.HLE.OsHle.Services.Aud Context.ResponseData.Write(Channels); Context.ResponseData.Write((int)Format); Context.ResponseData.Write((int)PlaybackState.Stopped); + + return 0; } } } From 6479c3e48479259bca79bee6f1016e8108cc33a8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 8 Jul 2018 13:14:35 -0300 Subject: [PATCH 3/5] Implement GPU primitive restart (#221) --- Ryujinx.Graphics/Gal/IGalRasterizer.cs | 6 +++++ Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs | 15 ++++++++++++ Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs | 24 ++++++++++++++++++++ Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs | 2 ++ 4 files changed, 47 insertions(+) diff --git a/Ryujinx.Graphics/Gal/IGalRasterizer.cs b/Ryujinx.Graphics/Gal/IGalRasterizer.cs index 586eae6ba..2598efb61 100644 --- a/Ryujinx.Graphics/Gal/IGalRasterizer.cs +++ b/Ryujinx.Graphics/Gal/IGalRasterizer.cs @@ -36,6 +36,12 @@ namespace Ryujinx.Graphics.Gal void SetClearStencil(int Stencil); + void EnablePrimitiveRestart(); + + void DisablePrimitiveRestart(); + + void SetPrimitiveRestartIndex(uint Index); + void CreateVbo(long Key, byte[] Buffer); void CreateIbo(long Key, byte[] Buffer); diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs index b98857117..a4ec7f87c 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs @@ -184,6 +184,21 @@ namespace Ryujinx.Graphics.Gal.OpenGL GL.ClearStencil(Stencil); } + public void EnablePrimitiveRestart() + { + GL.Enable(EnableCap.PrimitiveRestart); + } + + public void DisablePrimitiveRestart() + { + GL.Disable(EnableCap.PrimitiveRestart); + } + + public void SetPrimitiveRestartIndex(uint Index) + { + GL.PrimitiveRestartIndex(Index); + } + public void CreateVbo(long Key, byte[] Buffer) { int Handle = GL.GenBuffer(); diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs index d46f5089a..10c99494b 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs @@ -85,6 +85,7 @@ namespace Ryujinx.HLE.Gpu.Engines SetDepth(); SetStencil(); SetAlphaBlending(); + SetPrimitiveRestart(); UploadTextures(Vmm, Keys); UploadUniforms(Vmm); @@ -389,6 +390,29 @@ namespace Ryujinx.HLE.Gpu.Engines } } + private void SetPrimitiveRestart() + { + bool Enable = (ReadRegister(NvGpuEngine3dReg.PrimRestartEnable) & 1) != 0; + + if (Enable) + { + Gpu.Renderer.Rasterizer.EnablePrimitiveRestart(); + } + else + { + Gpu.Renderer.Rasterizer.DisablePrimitiveRestart(); + } + + if (!Enable) + { + return; + } + + uint Index = (uint)ReadRegister(NvGpuEngine3dReg.PrimRestartIndex); + + Gpu.Renderer.Rasterizer.SetPrimitiveRestartIndex(Index); + } + private void UploadTextures(NvGpuVmm Vmm, long[] Keys) { long BaseShPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress); diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs index 9eb2966d9..3de2885ef 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs @@ -50,6 +50,8 @@ namespace Ryujinx.HLE.Gpu.Engines StencilBackFuncFunc = 0x569, ShaderAddress = 0x582, VertexBeginGl = 0x586, + PrimRestartEnable = 0x591, + PrimRestartIndex = 0x592, IndexArrayAddress = 0x5f2, IndexArrayEndAddr = 0x5f4, IndexArrayFormat = 0x5f6, From 0f8f40486d1b3215c845325744bd545149223805 Mon Sep 17 00:00:00 2001 From: Merry Date: Sun, 8 Jul 2018 20:54:47 +0100 Subject: [PATCH 4/5] ChocolArm64: More accurate implementation of Frecpe & Frecps (#228) * ChocolArm64: More accurate implementation of Frecpe * ChocolArm64: Handle infinities and zeros in Frecps --- .../Instruction/AInstEmitSimdArithmetic.cs | 100 ++------------- .../Instruction/AInstEmitSimdHelper.cs | 20 +++ ChocolArm64/Instruction/ASoftFloat.cs | 120 ++++++++++++++++++ Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs | 39 +++--- 4 files changed, 170 insertions(+), 109 deletions(-) diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index b96b71be4..39331f965 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -641,106 +641,34 @@ namespace ChocolArm64.Instruction public static void Frecpe_S(AILEmitterCtx Context) { - EmitFrecpe(Context, 0, Scalar: true); + EmitScalarUnaryOpF(Context, () => + { + EmitUnarySoftFloatCall(Context, nameof(ASoftFloat.RecipEstimate)); + }); } public static void Frecpe_V(AILEmitterCtx Context) { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int SizeF = Op.Size & 1; - - int Bytes = Context.CurrOp.GetBitsCount() >> 3; - - for (int Index = 0; Index < Bytes >> SizeF + 2; Index++) + EmitVectorUnaryOpF(Context, () => { - EmitFrecpe(Context, Index, Scalar: false); - } - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - - private static void EmitFrecpe(AILEmitterCtx Context, int Index, bool Scalar) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int SizeF = Op.Size & 1; - - if (SizeF == 0) - { - Context.EmitLdc_R4(1); - } - else /* if (SizeF == 1) */ - { - Context.EmitLdc_R8(1); - } - - EmitVectorExtractF(Context, Op.Rn, Index, SizeF); - - Context.Emit(OpCodes.Div); - - if (Scalar) - { - EmitVectorZeroAll(Context, Op.Rd); - } - - EmitVectorInsertF(Context, Op.Rd, Index, SizeF); + EmitUnarySoftFloatCall(Context, nameof(ASoftFloat.RecipEstimate)); + }); } public static void Frecps_S(AILEmitterCtx Context) { - EmitFrecps(Context, 0, Scalar: true); + EmitScalarBinaryOpF(Context, () => + { + EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.RecipStep)); + }); } public static void Frecps_V(AILEmitterCtx Context) { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int SizeF = Op.Size & 1; - - int Bytes = Context.CurrOp.GetBitsCount() >> 3; - - for (int Index = 0; Index < Bytes >> SizeF + 2; Index++) + EmitVectorBinaryOpF(Context, () => { - EmitFrecps(Context, Index, Scalar: false); - } - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - - private static void EmitFrecps(AILEmitterCtx Context, int Index, bool Scalar) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - int SizeF = Op.Size & 1; - - if (SizeF == 0) - { - Context.EmitLdc_R4(2); - } - else /* if (SizeF == 1) */ - { - Context.EmitLdc_R8(2); - } - - EmitVectorExtractF(Context, Op.Rn, Index, SizeF); - EmitVectorExtractF(Context, Op.Rm, Index, SizeF); - - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Sub); - - if (Scalar) - { - EmitVectorZeroAll(Context, Op.Rd); - } - - EmitVectorInsertF(Context, Op.Rd, Index, SizeF); + EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.RecipStep)); + }); } public static void Frinta_S(AILEmitterCtx Context) diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs index 0f6ea42ce..d895ec9c7 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs @@ -253,6 +253,26 @@ namespace ChocolArm64.Instruction Context.EmitCall(MthdInfo); } + public static void EmitBinarySoftFloatCall(AILEmitterCtx Context, string Name) + { + IAOpCodeSimd Op = (IAOpCodeSimd)Context.CurrOp; + + int SizeF = Op.Size & 1; + + MethodInfo MthdInfo; + + if (SizeF == 0) + { + MthdInfo = typeof(ASoftFloat).GetMethod(Name, new Type[] { typeof(float), typeof(float) }); + } + else /* if (SizeF == 1) */ + { + MthdInfo = typeof(ASoftFloat).GetMethod(Name, new Type[] { typeof(double), typeof(double) }); + } + + Context.EmitCall(MthdInfo); + } + public static void EmitScalarBinaryOpByElemF(AILEmitterCtx Context, Action Emit) { AOpCodeSimdRegElemF Op = (AOpCodeSimdRegElemF)Context.CurrOp; diff --git a/ChocolArm64/Instruction/ASoftFloat.cs b/ChocolArm64/Instruction/ASoftFloat.cs index 1bd716658..e63c82bee 100644 --- a/ChocolArm64/Instruction/ASoftFloat.cs +++ b/ChocolArm64/Instruction/ASoftFloat.cs @@ -7,8 +7,10 @@ namespace ChocolArm64.Instruction static ASoftFloat() { InvSqrtEstimateTable = BuildInvSqrtEstimateTable(); + RecipEstimateTable = BuildRecipEstimateTable(); } + private static readonly byte[] RecipEstimateTable; private static readonly byte[] InvSqrtEstimateTable; private static byte[] BuildInvSqrtEstimateTable() @@ -38,6 +40,22 @@ namespace ChocolArm64.Instruction return Table; } + private static byte[] BuildRecipEstimateTable() + { + byte[] Table = new byte[256]; + for (ulong index = 0; index < 256; index++) + { + ulong a = index | 0x100; + + a = (a << 1) + 1; + ulong b = 0x80000 / a; + b = (b + 1) >> 1; + + Table[index] = (byte)(b & 0xFF); + } + return Table; + } + public static float InvSqrtEstimate(float x) { return (float)InvSqrtEstimate((double)x); @@ -105,5 +123,107 @@ namespace ChocolArm64.Instruction ulong result = x_sign | (result_exp << 52) | fraction; return BitConverter.Int64BitsToDouble((long)result); } + + public static float RecipEstimate(float x) + { + return (float)RecipEstimate((double)x); + } + + public static double RecipEstimate(double x) + { + ulong x_bits = (ulong)BitConverter.DoubleToInt64Bits(x); + ulong x_sign = x_bits & 0x8000000000000000; + ulong x_exp = (x_bits >> 52) & 0x7FF; + ulong scaled = x_bits & ((1ul << 52) - 1); + + if (x_exp >= 2045) + { + if (x_exp == 0x7ff && scaled != 0) + { + // NaN + return BitConverter.Int64BitsToDouble((long)(x_bits | 0x0008000000000000)); + } + + // Infinity, or Out of range -> Zero + return BitConverter.Int64BitsToDouble((long)x_sign); + } + + if (x_exp == 0) + { + if (scaled == 0) + { + // Zero -> Infinity + return BitConverter.Int64BitsToDouble((long)(x_sign | 0x7ff0000000000000)); + } + + // Denormal + if ((scaled & (1ul << 51)) == 0) + { + x_exp = ~0ul; + scaled <<= 2; + } + else + { + scaled <<= 1; + } + } + + scaled >>= 44; + scaled &= 0xFF; + + ulong result_exp = (2045 - x_exp) & 0x7FF; + ulong estimate = (ulong)RecipEstimateTable[scaled]; + ulong fraction = estimate << 44; + + if (result_exp == 0) + { + fraction >>= 1; + fraction |= 1ul << 51; + } + else if (result_exp == 0x7FF) + { + result_exp = 0; + fraction >>= 2; + fraction |= 1ul << 50; + } + + ulong result = x_sign | (result_exp << 52) | fraction; + return BitConverter.Int64BitsToDouble((long)result); + } + + public static float RecipStep(float op1, float op2) + { + return (float)RecipStep((double)op1, (double)op2); + } + + public static double RecipStep(double op1, double op2) + { + op1 = -op1; + + ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1); + ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2); + + ulong op1_sign = op1_bits & 0x8000000000000000; + ulong op2_sign = op2_bits & 0x8000000000000000; + ulong op1_other = op1_bits & 0x7FFFFFFFFFFFFFFF; + ulong op2_other = op2_bits & 0x7FFFFFFFFFFFFFFF; + + bool inf1 = op1_other == 0x7ff0000000000000; + bool inf2 = op2_other == 0x7ff0000000000000; + bool zero1 = op1_other == 0; + bool zero2 = op2_other == 0; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + return 2.0; + } + else if (inf1 || inf2) + { + // Infinity + return BitConverter.Int64BitsToDouble((long)(0x7ff0000000000000 | (op1_sign ^ op2_sign))); + } + + return 2.0 + op1 * op2; + } } } \ No newline at end of file diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs index 98be2fc5b..2a0f5ed91 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs @@ -163,26 +163,18 @@ namespace Ryujinx.Tests.Cpu Assert.That(Sse41.Extract(ThreadState.V6, (byte)0), Is.EqualTo(A * B)); } - [Test, Description("FRECPE D0, D1")] - public void Frecpe_S([Random(100)] double A) + [TestCase(0x00000000u, 0x7F800000u)] + [TestCase(0x80000000u, 0xFF800000u)] + [TestCase(0x00FFF000u, 0x7E000000u)] + [TestCase(0x41200000u, 0x3DCC8000u)] + [TestCase(0xC1200000u, 0xBDCC8000u)] + [TestCase(0x001FFFFFu, 0x7F800000u)] + [TestCase(0x007FF000u, 0x7E800000u)] + public void Frecpe_S(uint A, uint Result) { - AThreadState ThreadState = SingleOpcode(0x5EE1D820, V1: MakeVectorE0(A)); - - Assert.That(VectorExtractDouble(ThreadState.V0, 0), Is.EqualTo(1 / A)); - } - - [Test, Description("FRECPE V2.4S, V0.4S")] - public void Frecpe_V([Random(100)] float A) - { - AThreadState ThreadState = SingleOpcode(0x4EA1D802, V0: Sse.SetAllVector128(A)); - - Assert.Multiple(() => - { - Assert.That(Sse41.Extract(ThreadState.V2, (byte)0), Is.EqualTo(1 / A)); - Assert.That(Sse41.Extract(ThreadState.V2, (byte)1), Is.EqualTo(1 / A)); - Assert.That(Sse41.Extract(ThreadState.V2, (byte)2), Is.EqualTo(1 / A)); - Assert.That(Sse41.Extract(ThreadState.V2, (byte)3), Is.EqualTo(1 / A)); - }); + Vector128 V1 = MakeVectorE0(A); + AThreadState ThreadState = SingleOpcode(0x5EA1D820, V1: V1); + Assert.AreEqual(Result, GetVectorE0(ThreadState.V0)); } [Test, Description("FRECPS D0, D1, D2")] @@ -202,12 +194,13 @@ namespace Ryujinx.Tests.Cpu V2: Sse.SetAllVector128(A), V0: Sse.SetAllVector128(B)); + float Result = (float)(2 - ((double)A * (double)B)); Assert.Multiple(() => { - Assert.That(Sse41.Extract(ThreadState.V4, (byte)0), Is.EqualTo(2 - (A * B))); - Assert.That(Sse41.Extract(ThreadState.V4, (byte)1), Is.EqualTo(2 - (A * B))); - Assert.That(Sse41.Extract(ThreadState.V4, (byte)2), Is.EqualTo(2 - (A * B))); - Assert.That(Sse41.Extract(ThreadState.V4, (byte)3), Is.EqualTo(2 - (A * B))); + Assert.That(Sse41.Extract(ThreadState.V4, (byte)0), Is.EqualTo(Result)); + Assert.That(Sse41.Extract(ThreadState.V4, (byte)1), Is.EqualTo(Result)); + Assert.That(Sse41.Extract(ThreadState.V4, (byte)2), Is.EqualTo(Result)); + Assert.That(Sse41.Extract(ThreadState.V4, (byte)3), Is.EqualTo(Result)); }); } From 095db47e132a475e25d128e691ebdae101611cc9 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sun, 8 Jul 2018 16:55:15 -0300 Subject: [PATCH 5/5] Query multiple pages at once with GetWriteWatch (#222) * Query multiple pages at once with GetWriteWatch * Allow multiple buffer types to share the same page, aways use the physical address as cache key * Remove a variable that is no longer needed --- ChocolArm64/Memory/AMemory.cs | 58 ++++++----- ChocolArm64/Memory/AMemoryWin32.cs | 29 +++++- Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs | 32 +++--- Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs | 3 +- Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs | 6 +- Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs | 120 ++++++++++++---------- 6 files changed, 145 insertions(+), 103 deletions(-) diff --git a/ChocolArm64/Memory/AMemory.cs b/ChocolArm64/Memory/AMemory.cs index c02bf172f..da5cf0074 100644 --- a/ChocolArm64/Memory/AMemory.cs +++ b/ChocolArm64/Memory/AMemory.cs @@ -33,19 +33,25 @@ namespace ChocolArm64.Memory private byte* RamPtr; + private int HostPageSize; + public AMemory() { Manager = new AMemoryMgr(); Monitors = new Dictionary(); + IntPtr Size = (IntPtr)AMemoryMgr.RamSize + AMemoryMgr.PageSize; + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { - Ram = AMemoryWin32.Allocate((IntPtr)AMemoryMgr.RamSize + AMemoryMgr.PageSize); + Ram = AMemoryWin32.Allocate(Size); + + HostPageSize = AMemoryWin32.GetPageSize(Ram, Size); } else { - Ram = Marshal.AllocHGlobal((IntPtr)AMemoryMgr.RamSize + AMemoryMgr.PageSize); + Ram = Marshal.AllocHGlobal(Size); } RamPtr = (byte*)Ram; @@ -149,49 +155,53 @@ namespace ChocolArm64.Memory } } - public long GetHostPageSize() + public int GetHostPageSize() { - if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) - { - return AMemoryMgr.PageSize; - } - - IntPtr MemAddress = new IntPtr(RamPtr); - IntPtr MemSize = new IntPtr(AMemoryMgr.RamSize); - - long PageSize = AMemoryWin32.IsRegionModified(MemAddress, MemSize, Reset: false); - - if (PageSize < 1) - { - throw new InvalidOperationException(); - } - - return PageSize; + return HostPageSize; } - public bool IsRegionModified(long Position, long Size) + public bool[] IsRegionModified(long Position, long Size) { if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { - return true; + return null; } long EndPos = Position + Size; if ((ulong)EndPos < (ulong)Position) { - return false; + return null; } if ((ulong)EndPos > AMemoryMgr.RamSize) { - return false; + return null; } IntPtr MemAddress = new IntPtr(RamPtr + Position); IntPtr MemSize = new IntPtr(Size); - return AMemoryWin32.IsRegionModified(MemAddress, MemSize, Reset: true) != 0; + int HostPageMask = HostPageSize - 1; + + Position &= ~HostPageMask; + + Size = EndPos - Position; + + IntPtr[] Addresses = new IntPtr[(Size + HostPageMask) / HostPageSize]; + + AMemoryWin32.IsRegionModified(MemAddress, MemSize, Addresses, out int Count); + + bool[] Modified = new bool[Addresses.Length]; + + for (int Index = 0; Index < Count; Index++) + { + long VA = Addresses[Index].ToInt64() - Ram.ToInt64(); + + Modified[(VA - Position) / HostPageSize] = true; + } + + return Modified; } public sbyte ReadSByte(long Position) diff --git a/ChocolArm64/Memory/AMemoryWin32.cs b/ChocolArm64/Memory/AMemoryWin32.cs index d097dc871..387ca32c2 100644 --- a/ChocolArm64/Memory/AMemoryWin32.cs +++ b/ChocolArm64/Memory/AMemoryWin32.cs @@ -49,7 +49,7 @@ namespace ChocolArm64.Memory VirtualFree(Address, IntPtr.Zero, MEM_RELEASE); } - public unsafe static long IsRegionModified(IntPtr Address, IntPtr Size, bool Reset) + public unsafe static int GetPageSize(IntPtr Address, IntPtr Size) { IntPtr[] Addresses = new IntPtr[1]; @@ -57,17 +57,36 @@ namespace ChocolArm64.Memory long Granularity; - int Flags = Reset ? WRITE_WATCH_FLAG_RESET : 0; - GetWriteWatch( - Flags, + 0, Address, Size, Addresses, &Count, &Granularity); - return Count != 0 ? Granularity : 0; + return (int)Granularity; + } + + public unsafe static void IsRegionModified( + IntPtr Address, + IntPtr Size, + IntPtr[] Addresses, + out int AddrCount) + { + long Count = Addresses.Length; + + long Granularity; + + GetWriteWatch( + WRITE_WATCH_FLAG_RESET, + Address, + Size, + Addresses, + &Count, + &Granularity); + + AddrCount = (int)Count; } } } \ No newline at end of file diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs index 10c99494b..b9f9cc497 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs @@ -464,19 +464,17 @@ namespace Ryujinx.HLE.Gpu.Engines GalTextureSampler Sampler = TextureFactory.MakeSampler(Gpu, Vmm, TscPosition); - long TextureAddress = Vmm.ReadInt64(TicPosition + 4) & 0xffffffffffff; + long Key = Vmm.ReadInt64(TicPosition + 4) & 0xffffffffffff; - long Key = TextureAddress; + Key = Vmm.GetPhysicalAddress(Key); - TextureAddress = Vmm.GetPhysicalAddress(TextureAddress); - - if (IsFrameBufferPosition(TextureAddress)) + if (IsFrameBufferPosition(Key)) { //This texture is a frame buffer texture, //we shouldn't read anything from memory and bind //the frame buffer texture instead, since we're not //really writing anything to memory. - Gpu.Renderer.FrameBuffer.BindTexture(TextureAddress, TexIndex); + Gpu.Renderer.FrameBuffer.BindTexture(Key, TexIndex); } else { @@ -544,6 +542,8 @@ namespace Ryujinx.HLE.Gpu.Engines { long IndexPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.IndexArrayAddress); + long IboKey = Vmm.GetPhysicalAddress(IndexPosition); + int IndexEntryFmt = ReadRegister(NvGpuEngine3dReg.IndexArrayFormat); int IndexFirst = ReadRegister(NvGpuEngine3dReg.IndexBatchFirst); int IndexCount = ReadRegister(NvGpuEngine3dReg.IndexBatchCount); @@ -561,16 +561,16 @@ namespace Ryujinx.HLE.Gpu.Engines { int IbSize = IndexCount * IndexEntrySize; - bool IboCached = Gpu.Renderer.Rasterizer.IsIboCached(IndexPosition, (uint)IbSize); + bool IboCached = Gpu.Renderer.Rasterizer.IsIboCached(IboKey, (uint)IbSize); - if (!IboCached || Vmm.IsRegionModified(IndexPosition, (uint)IbSize, NvGpuBufferType.Index)) + if (!IboCached || Vmm.IsRegionModified(IboKey, (uint)IbSize, NvGpuBufferType.Index)) { byte[] Data = Vmm.ReadBytes(IndexPosition, (uint)IbSize); - Gpu.Renderer.Rasterizer.CreateIbo(IndexPosition, Data); + Gpu.Renderer.Rasterizer.CreateIbo(IboKey, Data); } - Gpu.Renderer.Rasterizer.SetIndexArray(IndexPosition, IbSize, IndexFormat); + Gpu.Renderer.Rasterizer.SetIndexArray(IboKey, IbSize, IndexFormat); } List[] Attribs = new List[32]; @@ -619,20 +619,22 @@ namespace Ryujinx.HLE.Gpu.Engines continue; } + long VboKey = Vmm.GetPhysicalAddress(VertexPosition); + int Stride = Control & 0xfff; long VbSize = (VertexEndPos - VertexPosition) + 1; - bool VboCached = Gpu.Renderer.Rasterizer.IsVboCached(VertexPosition, VbSize); + bool VboCached = Gpu.Renderer.Rasterizer.IsVboCached(VboKey, VbSize); - if (!VboCached || Vmm.IsRegionModified(VertexPosition, VbSize, NvGpuBufferType.Vertex)) + if (!VboCached || Vmm.IsRegionModified(VboKey, VbSize, NvGpuBufferType.Vertex)) { byte[] Data = Vmm.ReadBytes(VertexPosition, VbSize); - Gpu.Renderer.Rasterizer.CreateVbo(VertexPosition, Data); + Gpu.Renderer.Rasterizer.CreateVbo(VboKey, Data); } - Gpu.Renderer.Rasterizer.SetVertexArray(Index, Stride, VertexPosition, Attribs[Index].ToArray()); + Gpu.Renderer.Rasterizer.SetVertexArray(Index, Stride, VboKey, Attribs[Index].ToArray()); } GalPrimitiveType PrimType = (GalPrimitiveType)(PrimCtrl & 0xffff); @@ -641,7 +643,7 @@ namespace Ryujinx.HLE.Gpu.Engines { int VertexBase = ReadRegister(NvGpuEngine3dReg.VertexArrayElemBase); - Gpu.Renderer.Rasterizer.DrawElements(IndexPosition, IndexFirst, VertexBase, PrimType); + Gpu.Renderer.Rasterizer.DrawElements(IboKey, IndexFirst, VertexBase, PrimType); } else { diff --git a/Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs b/Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs index 7474aa33f..469cd6cd0 100644 --- a/Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs +++ b/Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs @@ -4,6 +4,7 @@ namespace Ryujinx.HLE.Gpu.Memory { Index, Vertex, - Texture + Texture, + Count } } \ No newline at end of file diff --git a/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs b/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs index 36f6406a1..0c81dd150 100644 --- a/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs +++ b/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs @@ -274,11 +274,9 @@ namespace Ryujinx.HLE.Gpu.Memory PageTable[L0][L1] = TgtAddr; } - public bool IsRegionModified(long Position, long Size, NvGpuBufferType BufferType) + public bool IsRegionModified(long PA, long Size, NvGpuBufferType BufferType) { - long PA = GetPhysicalAddress(Position); - - return Cache.IsRegionModified(Memory, BufferType, Position, PA, Size); + return Cache.IsRegionModified(Memory, BufferType, PA, Size); } public byte ReadByte(long Position) diff --git a/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs b/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs index c7108f00c..ac9bd850e 100644 --- a/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs +++ b/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs @@ -11,43 +11,53 @@ namespace Ryujinx.HLE.Gpu.Memory private class CachedPage { - private List<(long Start, long End)> Regions; + private struct Range + { + public long Start; + public long End; + + public Range(long Start, long End) + { + this.Start = Start; + this.End = End; + } + } + + private List[] Regions; public LinkedListNode Node { get; set; } - public int Count => Regions.Count; - public int Timestamp { get; private set; } - public long PABase { get; private set; } - - public NvGpuBufferType BufferType { get; private set; } - - public CachedPage(long PABase, NvGpuBufferType BufferType) + public CachedPage() { - this.PABase = PABase; - this.BufferType = BufferType; + Regions = new List[(int)NvGpuBufferType.Count]; - Regions = new List<(long, long)>(); + for (int Index = 0; Index < Regions.Length; Index++) + { + Regions[Index] = new List(); + } } - public bool AddRange(long Start, long End) + public bool AddRange(long Start, long End, NvGpuBufferType BufferType) { - for (int Index = 0; Index < Regions.Count; Index++) - { - (long RgStart, long RgEnd) = Regions[Index]; + List BtRegions = Regions[(int)BufferType]; - if (Start >= RgStart && End <= RgEnd) + for (int Index = 0; Index < BtRegions.Count; Index++) + { + Range Rg = BtRegions[Index]; + + if (Start >= Rg.Start && End <= Rg.End) { return false; } - if (Start <= RgEnd && RgStart <= End) + if (Start <= Rg.End && Rg.Start <= End) { - long MinStart = Math.Min(RgStart, Start); - long MaxEnd = Math.Max(RgEnd, End); + long MinStart = Math.Min(Rg.Start, Start); + long MaxEnd = Math.Max(Rg.End, End); - Regions[Index] = (MinStart, MaxEnd); + BtRegions[Index] = new Range(MinStart, MaxEnd); Timestamp = Environment.TickCount; @@ -55,12 +65,24 @@ namespace Ryujinx.HLE.Gpu.Memory } } - Regions.Add((Start, End)); + BtRegions.Add(new Range(Start, End)); Timestamp = Environment.TickCount; return true; } + + public int GetTotalCount() + { + int Count = 0; + + for (int Index = 0; Index < Regions.Length; Index++) + { + Count += Regions[Index].Count; + } + + return Count; + } } private Dictionary Cache; @@ -76,71 +98,61 @@ namespace Ryujinx.HLE.Gpu.Memory SortedCache = new LinkedList(); } - public bool IsRegionModified( - AMemory Memory, - NvGpuBufferType BufferType, - long VA, - long PA, - long Size) + public bool IsRegionModified(AMemory Memory, NvGpuBufferType BufferType, long PA, long Size) { + bool[] Modified = Memory.IsRegionModified(PA, Size); + + if (Modified == null) + { + return true; + } + ClearCachedPagesIfNeeded(); long PageSize = Memory.GetHostPageSize(); long Mask = PageSize - 1; - long VAEnd = VA + Size; long PAEnd = PA + Size; bool RegMod = false; - while (VA < VAEnd) - { - long Key = VA & ~Mask; - long PABase = PA & ~Mask; + int Index = 0; + + while (PA < PAEnd) + { + long Key = PA & ~Mask; - long VAPgEnd = Math.Min((VA + PageSize) & ~Mask, VAEnd); long PAPgEnd = Math.Min((PA + PageSize) & ~Mask, PAEnd); bool IsCached = Cache.TryGetValue(Key, out CachedPage Cp); - bool PgReset = false; - - if (!IsCached) + if (IsCached) { - Cp = new CachedPage(PABase, BufferType); + CpCount -= Cp.GetTotalCount(); - Cache.Add(Key, Cp); + SortedCache.Remove(Cp.Node); } else { - CpCount -= Cp.Count; + Cp = new CachedPage(); - SortedCache.Remove(Cp.Node); - - if (Cp.PABase != PABase || - Cp.BufferType != BufferType) - { - PgReset = true; - } + Cache.Add(Key, Cp); } - PgReset |= Memory.IsRegionModified(PA, PAPgEnd - PA) && IsCached; - - if (PgReset) + if (Modified[Index++] && IsCached) { - Cp = new CachedPage(PABase, BufferType); + Cp = new CachedPage(); Cache[Key] = Cp; } Cp.Node = SortedCache.AddLast(Key); - RegMod |= Cp.AddRange(VA, VAPgEnd); + RegMod |= Cp.AddRange(PA, PAPgEnd, BufferType); - CpCount += Cp.Count; + CpCount += Cp.GetTotalCount(); - VA = VAPgEnd; PA = PAPgEnd; } @@ -169,7 +181,7 @@ namespace Ryujinx.HLE.Gpu.Memory Cache.Remove(Key); - CpCount -= Cp.Count; + CpCount -= Cp.GetTotalCount(); TimeDelta = RingDelta(Cp.Timestamp, Timestamp); }