From 2f37583ab3b49aa5064a72c8d3b4e8245ebb6b5b Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sat, 14 Jul 2018 13:08:39 -0300 Subject: [PATCH 1/4] Some small shader related fixes (#258) * Some small shader related fixes * Address PR feedback --- Ryujinx.Graphics/Gal/IGalShader.cs | 2 ++ Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs | 12 +++++++ Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs | 31 +++++++++++++------ Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs | 10 +++--- Ryujinx.HLE/Gpu/Texture/TextureHelper.cs | 28 ++++++++--------- Ryujinx.HLE/Gpu/Texture/TextureReader.cs | 2 +- 6 files changed, 57 insertions(+), 28 deletions(-) diff --git a/Ryujinx.Graphics/Gal/IGalShader.cs b/Ryujinx.Graphics/Gal/IGalShader.cs index 06f3fac97..9adaceaf5 100644 --- a/Ryujinx.Graphics/Gal/IGalShader.cs +++ b/Ryujinx.Graphics/Gal/IGalShader.cs @@ -18,6 +18,8 @@ namespace Ryujinx.Graphics.Gal void Bind(long Key); + void Unbind(GalShaderType Type); + void BindProgram(); } } \ No newline at end of file diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs index 3c5c874ea..c55a758b4 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs @@ -203,6 +203,18 @@ namespace Ryujinx.Graphics.Gal.OpenGL } } + public void Unbind(GalShaderType Type) + { + switch (Type) + { + case GalShaderType.Vertex: Current.Vertex = null; break; + case GalShaderType.TessControl: Current.TessControl = null; break; + case GalShaderType.TessEvaluation: Current.TessEvaluation = null; break; + case GalShaderType.Geometry: Current.Geometry = null; break; + case GalShaderType.Fragment: Current.Fragment = null; break; + } + } + public void BindProgram() { if (Current.Vertex == null || diff --git a/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs b/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs index f3075a504..575fb72f9 100644 --- a/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs +++ b/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs @@ -216,7 +216,7 @@ namespace Ryujinx.Graphics.Gal.Shader private void PrintDeclOutAttributes() { - if (Decl.ShaderType == GalShaderType.Vertex) + if (Decl.ShaderType != GalShaderType.Fragment) { SB.AppendLine("layout (location = " + GlslDecl.PositionOutAttrLocation + ") out vec4 " + GlslDecl.PositionOutAttrName + ";"); } @@ -337,7 +337,10 @@ namespace Ryujinx.Graphics.Gal.Shader if (Decl.ShaderType == GalShaderType.Vertex) { SB.AppendLine(IdentationStr + "gl_Position.xy *= " + GlslDecl.FlipUniformName + ";"); + } + if (Decl.ShaderType != GalShaderType.Fragment) + { SB.AppendLine(IdentationStr + GlslDecl.PositionOutAttrName + " = gl_Position;"); SB.AppendLine(IdentationStr + GlslDecl.PositionOutAttrName + ".w = 1;"); } @@ -598,9 +601,6 @@ namespace Ryujinx.Graphics.Gal.Shader { switch (Op.Inst) { - case ShaderIrInst.Frcp: - return true; - case ShaderIrInst.Ipa: case ShaderIrInst.Texq: case ShaderIrInst.Texs: @@ -608,8 +608,7 @@ namespace Ryujinx.Graphics.Gal.Shader return false; } - return Op.OperandB != null || - Op.OperandC != null; + return true; } private string GetName(ShaderIrOperCbuf Cbuf) @@ -711,13 +710,13 @@ namespace Ryujinx.Graphics.Gal.Shader } else { - return Imm.Value.ToString(CultureInfo.InvariantCulture); + return GetIntConst(Imm.Value); } } private string GetValue(ShaderIrOperImmf Immf) { - return Immf.Value.ToString(CultureInfo.InvariantCulture); + return GetFloatConst(Immf.Value); } private string GetName(ShaderIrOperPred Pred) @@ -1047,7 +1046,7 @@ namespace Ryujinx.Graphics.Gal.Shader if (!float.IsNaN(Value) && !float.IsInfinity(Value)) { - return Value.ToString(CultureInfo.InvariantCulture); + return GetFloatConst(Value); } } break; @@ -1064,6 +1063,20 @@ namespace Ryujinx.Graphics.Gal.Shader return Expr; } + private static string GetIntConst(int Value) + { + string Expr = Value.ToString(CultureInfo.InvariantCulture); + + return Value < 0 ? "(" + Expr + ")" : Expr; + } + + private static string GetFloatConst(float Value) + { + string Expr = Value.ToString(CultureInfo.InvariantCulture); + + return Value < 0 ? "(" + Expr + ")" : Expr; + } + private static OperType GetDstNodeType(ShaderIrNode Node) { //Special case instructions with the result type different diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs index 2bacd71b3..5c474ab0b 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs @@ -172,6 +172,8 @@ namespace Ryujinx.HLE.Gpu.Engines for (; Index < 6; Index++) { + GalShaderType Type = GetTypeFromProgram(Index); + int Control = ReadRegister(NvGpuEngine3dReg.ShaderNControl + Index * 0x10); int Offset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset + Index * 0x10); @@ -180,16 +182,16 @@ namespace Ryujinx.HLE.Gpu.Engines if (!Enable) { + Gpu.Renderer.Shader.Unbind(Type); + continue; } long Key = BasePosition + (uint)Offset; - GalShaderType ShaderType = GetTypeFromProgram(Index); + Keys[(int)Type] = Key; - Keys[(int)ShaderType] = Key; - - Gpu.Renderer.Shader.Create(Vmm, Key, ShaderType); + Gpu.Renderer.Shader.Create(Vmm, Key, Type); Gpu.Renderer.Shader.Bind(Key); } diff --git a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs index 6b9a30635..c0749d6a2 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs @@ -62,67 +62,67 @@ namespace Ryujinx.HLE.Gpu.Texture { return CompressedTextureSize(Texture.Width, Texture.Height, 4, 4, 16); } - + case GalTextureFormat.Astc2D5x5: { return CompressedTextureSize(Texture.Width, Texture.Height, 5, 5, 16); } - + case GalTextureFormat.Astc2D6x6: { return CompressedTextureSize(Texture.Width, Texture.Height, 6, 6, 16); } - + case GalTextureFormat.Astc2D8x8: { return CompressedTextureSize(Texture.Width, Texture.Height, 8, 8, 16); } - + case GalTextureFormat.Astc2D10x10: { return CompressedTextureSize(Texture.Width, Texture.Height, 10, 10, 16); } - + case GalTextureFormat.Astc2D12x12: { return CompressedTextureSize(Texture.Width, Texture.Height, 12, 12, 16); } - + case GalTextureFormat.Astc2D5x4: { return CompressedTextureSize(Texture.Width, Texture.Height, 5, 4, 16); } - + case GalTextureFormat.Astc2D6x5: { return CompressedTextureSize(Texture.Width, Texture.Height, 6, 5, 16); } - + case GalTextureFormat.Astc2D8x6: { return CompressedTextureSize(Texture.Width, Texture.Height, 8, 6, 16); } - + case GalTextureFormat.Astc2D10x8: { return CompressedTextureSize(Texture.Width, Texture.Height, 10, 8, 16); } - + case GalTextureFormat.Astc2D12x10: { return CompressedTextureSize(Texture.Width, Texture.Height, 12, 10, 16); } - + case GalTextureFormat.Astc2D8x5: { return CompressedTextureSize(Texture.Width, Texture.Height, 8, 5, 16); } - + case GalTextureFormat.Astc2D10x5: { return CompressedTextureSize(Texture.Width, Texture.Height, 10, 5, 16); } - + case GalTextureFormat.Astc2D10x6: { return CompressedTextureSize(Texture.Width, Texture.Height, 10, 6, 16); @@ -139,7 +139,7 @@ namespace Ryujinx.HLE.Gpu.Texture return W * H * Bpb; } - + public static (AMemory Memory, long Position) GetMemoryAndPosition( IAMemory Memory, long Position) diff --git a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs index 8bd4dbcba..6c08cd6c4 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs @@ -30,7 +30,7 @@ namespace Ryujinx.HLE.Gpu.Texture case GalTextureFormat.Astc2D5x5: return Read16BptCompressedTexture(Memory, Texture, 5, 5); case GalTextureFormat.Astc2D6x6: return Read16BptCompressedTexture(Memory, Texture, 6, 6); case GalTextureFormat.Astc2D8x8: return Read16BptCompressedTexture(Memory, Texture, 8, 8); - case GalTextureFormat.Astc2D10x10: return Read16BptCompressedTexture(Memory, Texture, 10, 10); + case GalTextureFormat.Astc2D10x10: return Read16BptCompressedTexture(Memory, Texture, 10, 10); case GalTextureFormat.Astc2D12x12: return Read16BptCompressedTexture(Memory, Texture, 12, 12); case GalTextureFormat.Astc2D5x4: return Read16BptCompressedTexture(Memory, Texture, 5, 4); case GalTextureFormat.Astc2D6x5: return Read16BptCompressedTexture(Memory, Texture, 6, 5); From 514218ab98acc1f0ace2e2cc0b8c1091ffccc6ce Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sat, 14 Jul 2018 13:13:02 -0300 Subject: [PATCH 2/4] Add SMLSL, SQRSHRN and SRSHR (Vector) cpu instructions, nits (#225) * Add SMLSL, SQRSHRN and SRSHR (Vector) cpu instructions * Address PR feedback * Address PR feedback * Remove another useless temp var * nit: Alignment * Replace Context.CurrOp.GetBitsCount() with Op.GetBitsCount() * Fix encodings and move flag bit test out of the loop --- ChocolArm64/AOpCodeTable.cs | 20 ++- .../Instruction/AInstEmitSimdArithmetic.cs | 118 ++++----------- ChocolArm64/Instruction/AInstEmitSimdCmp.cs | 6 +- ChocolArm64/Instruction/AInstEmitSimdCvt.cs | 4 +- .../Instruction/AInstEmitSimdHelper.cs | 135 ++++++++++++++++-- .../Instruction/AInstEmitSimdLogical.cs | 4 +- .../Instruction/AInstEmitSimdMemory.cs | 5 +- ChocolArm64/Instruction/AInstEmitSimdMove.cs | 18 +-- ChocolArm64/Instruction/AInstEmitSimdShift.cs | 108 ++++++++++---- 9 files changed, 265 insertions(+), 153 deletions(-) diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs index fb4763ef8..0e979aa44 100644 --- a/ChocolArm64/AOpCodeTable.cs +++ b/ChocolArm64/AOpCodeTable.cs @@ -371,16 +371,22 @@ namespace ChocolArm64 SetA64("0x001110<<1xxxxx011011xxxxxxxxxx", AInstEmit.Smin_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx101011xxxxxxxxxx", AInstEmit.Sminp_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx100000xxxxxxxxxx", AInstEmit.Smlal_V, typeof(AOpCodeSimdReg)); + SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", AInstEmit.Smlsl_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", AInstEmit.Smull_V, typeof(AOpCodeSimdReg)); + SetA64("0x00111100>>>xxx100111xxxxxxxxxx", AInstEmit.Sqrshrn_V, typeof(AOpCodeSimdShImm)); SetA64("01011110<<100001010010xxxxxxxxxx", AInstEmit.Sqxtn_S, typeof(AOpCodeSimd)); SetA64("0x001110<<100001010010xxxxxxxxxx", AInstEmit.Sqxtn_V, typeof(AOpCodeSimd)); SetA64("01111110<<100001001010xxxxxxxxxx", AInstEmit.Sqxtun_S, typeof(AOpCodeSimd)); SetA64("0x101110<<100001001010xxxxxxxxxx", AInstEmit.Sqxtun_V, typeof(AOpCodeSimd)); + SetA64("0x00111100>>>xxx001001xxxxxxxxxx", AInstEmit.Srshr_V, typeof(AOpCodeSimdShImm)); + SetA64("0100111101xxxxxx001001xxxxxxxxxx", AInstEmit.Srshr_V, typeof(AOpCodeSimdShImm)); SetA64("0>001110<<1xxxxx010001xxxxxxxxxx", AInstEmit.Sshl_V, typeof(AOpCodeSimdReg)); SetA64("0x00111100>>>xxx101001xxxxxxxxxx", AInstEmit.Sshll_V, typeof(AOpCodeSimdShImm)); - SetA64("010111110>>>>xxx000001xxxxxxxxxx", AInstEmit.Sshr_S, typeof(AOpCodeSimdShImm)); - SetA64("0x0011110>>>>xxx000001xxxxxxxxxx", AInstEmit.Sshr_V, typeof(AOpCodeSimdShImm)); - SetA64("0x0011110>>>>xxx000101xxxxxxxxxx", AInstEmit.Ssra_V, typeof(AOpCodeSimdShImm)); + SetA64("0101111101xxxxxx000001xxxxxxxxxx", AInstEmit.Sshr_S, typeof(AOpCodeSimdShImm)); + SetA64("0x00111100>>>xxx000001xxxxxxxxxx", AInstEmit.Sshr_V, typeof(AOpCodeSimdShImm)); + SetA64("0100111101xxxxxx000001xxxxxxxxxx", AInstEmit.Sshr_V, typeof(AOpCodeSimdShImm)); + SetA64("0x00111100>>>xxx000101xxxxxxxxxx", AInstEmit.Ssra_V, typeof(AOpCodeSimdShImm)); + SetA64("0100111101xxxxxx000101xxxxxxxxxx", AInstEmit.Ssra_V, typeof(AOpCodeSimdShImm)); SetA64("0x00110000000000xxxxxxxxxxxxxxxx", AInstEmit.St__Vms, typeof(AOpCodeSimdMemMs)); SetA64("0x001100100xxxxxxxxxxxxxxxxxxxxx", AInstEmit.St__Vms, typeof(AOpCodeSimdMemMs)); SetA64("0x00110100x00000xxxxxxxxxxxxxxxx", AInstEmit.St__Vss, typeof(AOpCodeSimdMemSs)); @@ -419,9 +425,11 @@ namespace ChocolArm64 SetA64("0x101110<<100001010010xxxxxxxxxx", AInstEmit.Uqxtn_V, typeof(AOpCodeSimd)); SetA64("0>101110<<1xxxxx010001xxxxxxxxxx", AInstEmit.Ushl_V, typeof(AOpCodeSimdReg)); SetA64("0x10111100>>>xxx101001xxxxxxxxxx", AInstEmit.Ushll_V, typeof(AOpCodeSimdShImm)); - SetA64("011111110>>>>xxx000001xxxxxxxxxx", AInstEmit.Ushr_S, typeof(AOpCodeSimdShImm)); - SetA64("0x1011110>>>>xxx000001xxxxxxxxxx", AInstEmit.Ushr_V, typeof(AOpCodeSimdShImm)); - SetA64("0x1011110>>>>xxx000101xxxxxxxxxx", AInstEmit.Usra_V, typeof(AOpCodeSimdShImm)); + SetA64("0111111101xxxxxx000001xxxxxxxxxx", AInstEmit.Ushr_S, typeof(AOpCodeSimdShImm)); + SetA64("0x10111100>>>xxx000001xxxxxxxxxx", AInstEmit.Ushr_V, typeof(AOpCodeSimdShImm)); + SetA64("0110111101xxxxxx000001xxxxxxxxxx", AInstEmit.Ushr_V, typeof(AOpCodeSimdShImm)); + SetA64("0x10111100>>>xxx000101xxxxxxxxxx", AInstEmit.Usra_V, typeof(AOpCodeSimdShImm)); + SetA64("0110111101xxxxxx000101xxxxxxxxxx", AInstEmit.Usra_V, typeof(AOpCodeSimdShImm)); SetA64("0>001110<<0xxxxx000110xxxxxxxxxx", AInstEmit.Uzp1_V, typeof(AOpCodeSimdReg)); SetA64("0>001110<<0xxxxx010110xxxxxxxxxx", AInstEmit.Uzp2_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<100001001010xxxxxxxxxx", AInstEmit.Xtn_V, typeof(AOpCodeSimd)); diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index 39331f965..2fc8f178d 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -65,11 +65,12 @@ namespace ChocolArm64.Instruction { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; EmitVectorExtractZx(Context, Op.Rn, 0, Op.Size); - for (int Index = 1; Index < (Bytes >> Op.Size); Index++) + for (int Index = 1; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); @@ -97,9 +98,10 @@ namespace ChocolArm64.Instruction { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); @@ -190,84 +192,6 @@ namespace ChocolArm64.Instruction } } - private static void EmitSaturatingExtNarrow(AILEmitterCtx Context, bool SignedSrc, bool SignedDst, bool Scalar) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int Elems = (!Scalar ? 8 >> Op.Size : 1); - int ESize = 8 << Op.Size; - - int Part = (!Scalar & (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0); - - int TMaxValue = (SignedDst ? (1 << (ESize - 1)) - 1 : (int)((1L << ESize) - 1L)); - int TMinValue = (SignedDst ? -((1 << (ESize - 1))) : 0); - - Context.EmitLdc_I8(0L); - Context.EmitSttmp(); - - for (int Index = 0; Index < Elems; Index++) - { - AILLabel LblLe = new AILLabel(); - AILLabel LblGeEnd = new AILLabel(); - - EmitVectorExtract(Context, Op.Rn, Index, Op.Size + 1, SignedSrc); - - Context.Emit(OpCodes.Dup); - - Context.EmitLdc_I4(TMaxValue); - Context.Emit(OpCodes.Conv_U8); - - Context.Emit(SignedSrc ? OpCodes.Ble_S : OpCodes.Ble_Un_S, LblLe); - - Context.Emit(OpCodes.Pop); - - Context.EmitLdc_I4(TMaxValue); - - Context.EmitLdc_I8(0x8000000L); - Context.EmitSttmp(); - - Context.Emit(OpCodes.Br_S, LblGeEnd); - - Context.MarkLabel(LblLe); - - Context.Emit(OpCodes.Dup); - - Context.EmitLdc_I4(TMinValue); - Context.Emit(OpCodes.Conv_I8); - - Context.Emit(SignedSrc ? OpCodes.Bge_S : OpCodes.Bge_Un_S, LblGeEnd); - - Context.Emit(OpCodes.Pop); - - Context.EmitLdc_I4(TMinValue); - - Context.EmitLdc_I8(0x8000000L); - Context.EmitSttmp(); - - Context.MarkLabel(LblGeEnd); - - if (Scalar) - { - EmitVectorZeroLower(Context, Op.Rd); - } - - EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size); - } - - if (Part == 0) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - - Context.EmitLdarg(ATranslatedSub.StateArgIdx); - Context.EmitLdarg(ATranslatedSub.StateArgIdx); - Context.EmitCallPropGet(typeof(AThreadState), nameof(AThreadState.Fpsr)); - Context.EmitLdtmp(); - Context.Emit(OpCodes.Conv_I4); - Context.Emit(OpCodes.Or); - Context.EmitCallPropSet(typeof(AThreadState), nameof(AThreadState.Fpsr)); - } - public static void Fabd_S(AILEmitterCtx Context) { EmitScalarBinaryOpF(Context, () => @@ -338,7 +262,7 @@ namespace ChocolArm64.Instruction int SizeF = Op.Size & 1; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> SizeF + 2; int Half = Elems >> 1; @@ -870,7 +794,7 @@ namespace ChocolArm64.Instruction int SizeF = Op.Size & 1; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; for (int Index = 0; Index < Bytes >> SizeF + 2; Index++) { @@ -1102,6 +1026,15 @@ namespace ChocolArm64.Instruction }); } + public static void Smlsl_V(AILEmitterCtx Context) + { + EmitVectorWidenRnRmTernaryOpSx(Context, () => + { + Context.Emit(OpCodes.Mul); + Context.Emit(OpCodes.Sub); + }); + } + public static void Smull_V(AILEmitterCtx Context) { EmitVectorWidenRnRmBinaryOpSx(Context, () => Context.Emit(OpCodes.Mul)); @@ -1109,22 +1042,22 @@ namespace ChocolArm64.Instruction public static void Sqxtn_S(AILEmitterCtx Context) { - EmitSaturatingExtNarrow(Context, SignedSrc: true, SignedDst: true, Scalar: true); + EmitScalarSaturatingNarrowOpSxSx(Context, () => { }); } public static void Sqxtn_V(AILEmitterCtx Context) { - EmitSaturatingExtNarrow(Context, SignedSrc: true, SignedDst: true, Scalar: false); + EmitVectorSaturatingNarrowOpSxSx(Context, () => { }); } public static void Sqxtun_S(AILEmitterCtx Context) { - EmitSaturatingExtNarrow(Context, SignedSrc: true, SignedDst: false, Scalar: true); + EmitScalarSaturatingNarrowOpSxZx(Context, () => { }); } public static void Sqxtun_V(AILEmitterCtx Context) { - EmitSaturatingExtNarrow(Context, SignedSrc: true, SignedDst: false, Scalar: false); + EmitVectorSaturatingNarrowOpSxZx(Context, () => { }); } public static void Sub_S(AILEmitterCtx Context) @@ -1198,11 +1131,12 @@ namespace ChocolArm64.Instruction { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; EmitVectorExtractZx(Context, Op.Rn, 0, Op.Size); - for (int Index = 1; Index < (Bytes >> Op.Size); Index++) + for (int Index = 1; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); @@ -1272,12 +1206,12 @@ namespace ChocolArm64.Instruction public static void Uqxtn_S(AILEmitterCtx Context) { - EmitSaturatingExtNarrow(Context, SignedSrc: false, SignedDst: false, Scalar: true); + EmitScalarSaturatingNarrowOpZxZx(Context, () => { }); } public static void Uqxtn_V(AILEmitterCtx Context) { - EmitSaturatingExtNarrow(Context, SignedSrc: false, SignedDst: false, Scalar: false); + EmitVectorSaturatingNarrowOpZxZx(Context, () => { }); } } } diff --git a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs index 68a7ab880..773d98944 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs @@ -363,7 +363,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = (!Scalar ? Bytes >> Op.Size : 1); ulong SzMask = ulong.MaxValue >> (64 - (8 << Op.Size)); @@ -407,7 +407,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = (!Scalar ? Bytes >> Op.Size : 1); ulong SzMask = ulong.MaxValue >> (64 - (8 << Op.Size)); @@ -454,7 +454,7 @@ namespace ChocolArm64.Instruction int SizeF = Op.Size & 1; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; for (int Index = 0; Index < Bytes >> SizeF + 2; Index++) { diff --git a/ChocolArm64/Instruction/AInstEmitSimdCvt.cs b/ChocolArm64/Instruction/AInstEmitSimdCvt.cs index da584743c..7b355494d 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdCvt.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdCvt.cs @@ -337,7 +337,7 @@ namespace ChocolArm64.Instruction int FBits = GetFBits(Context); - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; for (int Index = 0; Index < (Bytes >> SizeI); Index++) { @@ -426,7 +426,7 @@ namespace ChocolArm64.Instruction int FBits = GetFBits(Context); - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; for (int Index = 0; Index < (Bytes >> SizeI); Index++) { diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs index d895ec9c7..1f7a2dad1 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs @@ -3,6 +3,7 @@ using ChocolArm64.State; using ChocolArm64.Translation; using System; using System.Reflection; +using System.Reflection.Emit; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; @@ -417,7 +418,7 @@ namespace ChocolArm64.Instruction int SizeF = Op.Size & 1; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; for (int Index = 0; Index < (Bytes >> SizeF + 2); Index++) { @@ -467,7 +468,7 @@ namespace ChocolArm64.Instruction int SizeF = Op.Size & 1; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; for (int Index = 0; Index < (Bytes >> SizeF + 2); Index++) { @@ -527,9 +528,10 @@ namespace ChocolArm64.Instruction { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { if (Opers.HasFlag(OperFlags.Rd)) { @@ -582,9 +584,10 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { if (Ternary) { @@ -622,9 +625,10 @@ namespace ChocolArm64.Instruction { AOpCodeSimdImm Op = (AOpCodeSimdImm)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { if (Binary) { @@ -739,11 +743,11 @@ namespace ChocolArm64.Instruction EmitVectorPairwiseOp(Context, Emit, false); } - private static void EmitVectorPairwiseOp(AILEmitterCtx Context, Action Emit, bool Signed) + public static void EmitVectorPairwiseOp(AILEmitterCtx Context, Action Emit, bool Signed) { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; int Half = Elems >> 1; @@ -769,6 +773,117 @@ namespace ChocolArm64.Instruction } } + public static void EmitScalarSaturatingNarrowOpSxSx(AILEmitterCtx Context, Action Emit) + { + EmitSaturatingNarrowOp(Context, Emit, true, true, true); + } + + public static void EmitScalarSaturatingNarrowOpSxZx(AILEmitterCtx Context, Action Emit) + { + EmitSaturatingNarrowOp(Context, Emit, true, false, true); + } + + public static void EmitScalarSaturatingNarrowOpZxZx(AILEmitterCtx Context, Action Emit) + { + EmitSaturatingNarrowOp(Context, Emit, false, false, true); + } + + public static void EmitVectorSaturatingNarrowOpSxSx(AILEmitterCtx Context, Action Emit) + { + EmitSaturatingNarrowOp(Context, Emit, true, true, false); + } + + public static void EmitVectorSaturatingNarrowOpSxZx(AILEmitterCtx Context, Action Emit) + { + EmitSaturatingNarrowOp(Context, Emit, true, false, false); + } + + public static void EmitVectorSaturatingNarrowOpZxZx(AILEmitterCtx Context, Action Emit) + { + EmitSaturatingNarrowOp(Context, Emit, false, false, false); + } + + public static void EmitSaturatingNarrowOp( + AILEmitterCtx Context, + Action Emit, + bool SignedSrc, + bool SignedDst, + bool Scalar) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + int Elems = !Scalar ? 8 >> Op.Size : 1; + int ESize = 8 << Op.Size; + + int Part = !Scalar && (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0; + + long TMaxValue = SignedDst ? (1 << (ESize - 1)) - 1 : (1L << ESize) - 1L; + long TMinValue = SignedDst ? -((1 << (ESize - 1))) : 0; + + Context.EmitLdc_I8(0L); + Context.EmitSttmp(); + + for (int Index = 0; Index < Elems; Index++) + { + AILLabel LblLe = new AILLabel(); + AILLabel LblGeEnd = new AILLabel(); + + EmitVectorExtract(Context, Op.Rn, Index, Op.Size + 1, SignedSrc); + + Emit(); + + Context.Emit(OpCodes.Dup); + + Context.EmitLdc_I8(TMaxValue); + + Context.Emit(SignedSrc ? OpCodes.Ble_S : OpCodes.Ble_Un_S, LblLe); + + Context.Emit(OpCodes.Pop); + + Context.EmitLdc_I8(TMaxValue); + Context.EmitLdc_I8(0x8000000L); + Context.EmitSttmp(); + + Context.Emit(OpCodes.Br_S, LblGeEnd); + + Context.MarkLabel(LblLe); + + Context.Emit(OpCodes.Dup); + + Context.EmitLdc_I8(TMinValue); + + Context.Emit(SignedSrc ? OpCodes.Bge_S : OpCodes.Bge_Un_S, LblGeEnd); + + Context.Emit(OpCodes.Pop); + + Context.EmitLdc_I8(TMinValue); + Context.EmitLdc_I8(0x8000000L); + Context.EmitSttmp(); + + Context.MarkLabel(LblGeEnd); + + if (Scalar) + { + EmitVectorZeroLower(Context, Op.Rd); + } + + EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size); + } + + if (Part == 0) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + + Context.EmitLdarg(ATranslatedSub.StateArgIdx); + Context.EmitLdarg(ATranslatedSub.StateArgIdx); + Context.EmitCallPropGet(typeof(AThreadState), nameof(AThreadState.Fpsr)); + Context.EmitLdtmp(); + Context.Emit(OpCodes.Conv_I4); + Context.Emit(OpCodes.Or); + Context.EmitCallPropSet(typeof(AThreadState), nameof(AThreadState.Fpsr)); + } + public static void EmitScalarSet(AILEmitterCtx Context, int Reg, int Size) { EmitVectorZeroAll(Context, Reg); diff --git a/ChocolArm64/Instruction/AInstEmitSimdLogical.cs b/ChocolArm64/Instruction/AInstEmitSimdLogical.cs index 8475a8a47..9f5af96cb 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdLogical.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdLogical.cs @@ -55,7 +55,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; for (int Index = 0; Index < Elems; Index++) @@ -195,7 +195,7 @@ namespace ChocolArm64.Instruction throw new InvalidOperationException(); } - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; int ContainerMask = (1 << (ContainerSize - Op.Size)) - 1; diff --git a/ChocolArm64/Instruction/AInstEmitSimdMemory.cs b/ChocolArm64/Instruction/AInstEmitSimdMemory.cs index d98ec012e..368b014fb 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdMemory.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdMemory.cs @@ -105,13 +105,14 @@ namespace ChocolArm64.Instruction throw new InvalidOperationException(); } - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; for (int SElem = 0; SElem < Op.SElems; SElem++) { int Rt = (Op.Rt + SElem) & 0x1f; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { EmitMemAddress(); diff --git a/ChocolArm64/Instruction/AInstEmitSimdMove.cs b/ChocolArm64/Instruction/AInstEmitSimdMove.cs index d67946a97..739f01c62 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdMove.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdMove.cs @@ -14,9 +14,10 @@ namespace ChocolArm64.Instruction { AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { Context.EmitLdintzr(Op.Rn); @@ -42,9 +43,10 @@ namespace ChocolArm64.Instruction { AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Op.DstIndex, Op.Size); @@ -64,7 +66,7 @@ namespace ChocolArm64.Instruction Context.EmitLdvec(Op.Rd); Context.EmitStvectmp(); - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Position = Op.Imm4; @@ -329,7 +331,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; @@ -355,7 +357,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; int Half = Elems >> 1; @@ -382,7 +384,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; int Half = Elems >> 1; diff --git a/ChocolArm64/Instruction/AInstEmitSimdShift.cs b/ChocolArm64/Instruction/AInstEmitSimdShift.cs index 24d35abe4..6f6b56068 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdShift.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdShift.cs @@ -27,9 +27,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - int Shift = Op.Imm - (8 << Op.Size); - - EmitVectorShImmBinaryZx(Context, () => Context.Emit(OpCodes.Shl), Shift); + EmitVectorShImmBinaryZx(Context, () => Context.Emit(OpCodes.Shl), GetImmShl(Op)); } public static void Shll_V(AILEmitterCtx Context) @@ -45,22 +43,21 @@ namespace ChocolArm64.Instruction { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - int Shift = (8 << (Op.Size + 1)) - Op.Imm; - - EmitVectorShImmNarrowBinaryZx(Context, () => Context.Emit(OpCodes.Shr_Un), Shift); + EmitVectorShImmNarrowBinaryZx(Context, () => Context.Emit(OpCodes.Shr_Un), GetImmShr(Op)); } public static void Sli_V(AILEmitterCtx Context) { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - int Shift = Op.Imm - (8 << Op.Size); + int Shift = GetImmShl(Op); - ulong Mask = Shift != 0 ? ulong.MaxValue >> (64 - Shift) : 0; + ulong Mask = Shift != 0 ? ulong.MaxValue >> (64 - Shift) : 0; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); @@ -84,6 +81,39 @@ namespace ChocolArm64.Instruction } } + public static void Sqrshrn_V(AILEmitterCtx Context) + { + AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + + int Shift = GetImmShr(Op); + + long RoundConst = 1L << (Shift - 1); + + Action Emit = () => + { + Context.EmitLdc_I8(RoundConst); + + Context.Emit(OpCodes.Add); + + Context.EmitLdc_I4(Shift); + + Context.Emit(OpCodes.Shr); + }; + + EmitVectorSaturatingNarrowOpSxSx(Context, Emit); + } + + public static void Srshr_V(AILEmitterCtx Context) + { + AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + + int Shift = GetImmShr(Op); + + long RoundConst = 1L << (Shift - 1); + + EmitVectorRoundShImmBinarySx(Context, () => Context.Emit(OpCodes.Shr), Shift, RoundConst); + } + public static void Sshl_V(AILEmitterCtx Context) { EmitVectorShl(Context, Signed: true); @@ -93,9 +123,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - int Shift = Op.Imm - (8 << Op.Size); - - EmitVectorShImmWidenBinarySx(Context, () => Context.Emit(OpCodes.Shl), Shift); + EmitVectorShImmWidenBinarySx(Context, () => Context.Emit(OpCodes.Shl), GetImmShl(Op)); } public static void Sshr_S(AILEmitterCtx Context) @@ -115,24 +143,20 @@ namespace ChocolArm64.Instruction { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - int Shift = (8 << (Op.Size + 1)) - Op.Imm; - - EmitVectorShImmBinarySx(Context, () => Context.Emit(OpCodes.Shr), Shift); + EmitVectorShImmBinarySx(Context, () => Context.Emit(OpCodes.Shr), GetImmShr(Op)); } public static void Ssra_V(AILEmitterCtx Context) { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - int Shift = (8 << (Op.Size + 1)) - Op.Imm; - Action Emit = () => { Context.Emit(OpCodes.Shr); Context.Emit(OpCodes.Add); }; - EmitVectorShImmTernarySx(Context, Emit, Shift); + EmitVectorShImmTernarySx(Context, Emit, GetImmShr(Op)); } public static void Ushl_V(AILEmitterCtx Context) @@ -144,9 +168,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - int Shift = Op.Imm - (8 << Op.Size); - - EmitVectorShImmWidenBinaryZx(Context, () => Context.Emit(OpCodes.Shl), Shift); + EmitVectorShImmWidenBinaryZx(Context, () => Context.Emit(OpCodes.Shl), GetImmShl(Op)); } public static void Ushr_S(AILEmitterCtx Context) @@ -251,28 +273,51 @@ namespace ChocolArm64.Instruction } } + [Flags] + private enum ShImmFlags + { + None = 0, + + Signed = 1 << 0, + Ternary = 1 << 1, + Rounded = 1 << 2, + + SignedTernary = Signed | Ternary, + SignedRounded = Signed | Rounded + } + private static void EmitVectorShImmBinarySx(AILEmitterCtx Context, Action Emit, int Imm) { - EmitVectorShImmOp(Context, Emit, Imm, false, true); + EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.Signed); } private static void EmitVectorShImmTernarySx(AILEmitterCtx Context, Action Emit, int Imm) { - EmitVectorShImmOp(Context, Emit, Imm, true, true); + EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.SignedTernary); } private static void EmitVectorShImmBinaryZx(AILEmitterCtx Context, Action Emit, int Imm) { - EmitVectorShImmOp(Context, Emit, Imm, false, false); + EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.None); } - private static void EmitVectorShImmOp(AILEmitterCtx Context, Action Emit, int Imm, bool Ternary, bool Signed) + private static void EmitVectorRoundShImmBinarySx(AILEmitterCtx Context, Action Emit, int Imm, long Rc) + { + EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.SignedRounded, Rc); + } + + private static void EmitVectorShImmOp(AILEmitterCtx Context, Action Emit, int Imm, ShImmFlags Flags, long Rc = 0) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + bool Signed = (Flags & ShImmFlags.Signed) != 0; + bool Ternary = (Flags & ShImmFlags.Ternary) != 0; + bool Rounded = (Flags & ShImmFlags.Rounded) != 0; + + for (int Index = 0; Index < Elems; Index++) { if (Ternary) { @@ -281,6 +326,13 @@ namespace ChocolArm64.Instruction EmitVectorExtract(Context, Op.Rn, Index, Op.Size, Signed); + if (Rounded) + { + Context.EmitLdc_I8(Rc); + + Context.Emit(OpCodes.Add); + } + Context.EmitLdc_I4(Imm); Emit(); From fc12fca96237c9aadc78436dc7c77480fa83fa09 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sat, 14 Jul 2018 13:53:44 -0300 Subject: [PATCH 3/4] Allow using ulong max value as yield (#263) --- Ryujinx.HLE/OsHle/Kernel/SvcThread.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Ryujinx.HLE/OsHle/Kernel/SvcThread.cs b/Ryujinx.HLE/OsHle/Kernel/SvcThread.cs index b0a7490a3..8702203e4 100644 --- a/Ryujinx.HLE/OsHle/Kernel/SvcThread.cs +++ b/Ryujinx.HLE/OsHle/Kernel/SvcThread.cs @@ -87,7 +87,7 @@ namespace Ryujinx.HLE.OsHle.Kernel KThread CurrThread = Process.GetThread(ThreadState.Tpidr); - if (TimeoutNs == 0) + if (TimeoutNs == 0 || TimeoutNs == ulong.MaxValue) { Process.Scheduler.Yield(CurrThread); } From be31f5b46d16f0f8730d9a9ec71f938eee97524a Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Sat, 14 Jul 2018 20:07:44 +0200 Subject: [PATCH 4/4] Improve CountLeadingZeros() algorithm, nits. (#219) * Update AInstEmitSimdArithmetic.cs * Update ASoftFallback.cs * Update ASoftFallback.cs * Update ASoftFallback.cs * Update AInstEmitSimdArithmetic.cs --- .../Instruction/AInstEmitSimdArithmetic.cs | 4 +- ChocolArm64/Instruction/ASoftFallback.cs | 46 ++++++++++++++----- 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index 2fc8f178d..a39ffc093 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -101,11 +101,13 @@ namespace ChocolArm64.Instruction int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; + int ESize = 8 << Op.Size; + for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); - Context.EmitLdc_I4(8 << Op.Size); + Context.EmitLdc_I4(ESize); Emit(); diff --git a/ChocolArm64/Instruction/ASoftFallback.cs b/ChocolArm64/Instruction/ASoftFallback.cs index 5c0a9c8e3..ae3994b02 100644 --- a/ChocolArm64/Instruction/ASoftFallback.cs +++ b/ChocolArm64/Instruction/ASoftFallback.cs @@ -12,22 +12,42 @@ namespace ChocolArm64.Instruction public static ulong CountLeadingSigns(ulong Value, int Size) { - return CountLeadingZeros((Value >> 1) ^ Value, Size - 1); - } + Value ^= Value >> 1; - public static ulong CountLeadingZeros(ulong Value, int Size) - { - int HighBit = Size - 1; + int HighBit = Size - 2; for (int Bit = HighBit; Bit >= 0; Bit--) { - if (((Value >> Bit) & 1) != 0) + if (((Value >> Bit) & 0b1) != 0) { return (ulong)(HighBit - Bit); } } - return (ulong)Size; + return (ulong)(Size - 1); + } + + private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 }; + + public static ulong CountLeadingZeros(ulong Value, int Size) + { + if (Value == 0) + { + return (ulong)Size; + } + + int NibbleIdx = Size; + int PreCount, Count = 0; + + do + { + NibbleIdx -= 4; + PreCount = ClzNibbleTbl[(Value >> NibbleIdx) & 0b1111]; + Count += PreCount; + } + while (PreCount == 4); + + return (ulong)Count; } public static uint CountSetBits8(uint Value) @@ -61,8 +81,8 @@ namespace ChocolArm64.Instruction private static uint Crc32w(uint Crc, uint Poly, uint Val) { - Crc = Crc32(Crc, Poly, (byte)(Val >> 0)); - Crc = Crc32(Crc, Poly, (byte)(Val >> 8)); + Crc = Crc32(Crc, Poly, (byte)(Val >> 0 )); + Crc = Crc32(Crc, Poly, (byte)(Val >> 8 )); Crc = Crc32(Crc, Poly, (byte)(Val >> 16)); Crc = Crc32(Crc, Poly, (byte)(Val >> 24)); @@ -71,8 +91,8 @@ namespace ChocolArm64.Instruction private static uint Crc32x(uint Crc, uint Poly, ulong Val) { - Crc = Crc32(Crc, Poly, (byte)(Val >> 0)); - Crc = Crc32(Crc, Poly, (byte)(Val >> 8)); + Crc = Crc32(Crc, Poly, (byte)(Val >> 0 )); + Crc = Crc32(Crc, Poly, (byte)(Val >> 8 )); Crc = Crc32(Crc, Poly, (byte)(Val >> 16)); Crc = Crc32(Crc, Poly, (byte)(Val >> 24)); Crc = Crc32(Crc, Poly, (byte)(Val >> 32)); @@ -168,9 +188,10 @@ namespace ChocolArm64.Instruction public static long SMulHi128(long LHS, long RHS) { - long Result = (long)UMulHi128((ulong)(LHS), (ulong)(RHS)); + long Result = (long)UMulHi128((ulong)LHS, (ulong)RHS); if (LHS < 0) Result -= RHS; if (RHS < 0) Result -= LHS; + return Result; } @@ -187,6 +208,7 @@ namespace ChocolArm64.Instruction ulong Z1 = T & 0xFFFFFFFF; ulong Z0 = T >> 32; Z1 += LLow * RHigh; + return LHigh * RHigh + Z0 + (Z1 >> 32); } }