CPU (A64): Add Fmaxp & Fminp Scalar Inst.s, Fast & Slow Paths; with Tests. (#5502)

* Add Fmaxp & Fminp Scalar Inst.s, Fast & Slow Paths; with Tests.

* Ptc.InternalVersion = 5502
This commit is contained in:
Domenico V 2023-07-31 01:57:37 +02:00 committed by GitHub
parent f95b7c5877
commit 2be8b6ea45
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 59 additions and 1 deletions

View file

@ -330,6 +330,7 @@ namespace ARMeilleure.Decoders
SetA64("011111100x110000110010xxxxxxxxxx", InstName.Fmaxnmp_S, InstEmit.Fmaxnmp_S, OpCodeSimd.Create); SetA64("011111100x110000110010xxxxxxxxxx", InstName.Fmaxnmp_S, InstEmit.Fmaxnmp_S, OpCodeSimd.Create);
SetA64("0>1011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnmp_V, InstEmit.Fmaxnmp_V, OpCodeSimdReg.Create); SetA64("0>1011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnmp_V, InstEmit.Fmaxnmp_V, OpCodeSimdReg.Create);
SetA64("0110111000110000110010xxxxxxxxxx", InstName.Fmaxnmv_V, InstEmit.Fmaxnmv_V, OpCodeSimd.Create); SetA64("0110111000110000110010xxxxxxxxxx", InstName.Fmaxnmv_V, InstEmit.Fmaxnmv_V, OpCodeSimd.Create);
SetA64("011111100x110000111110xxxxxxxxxx", InstName.Fmaxp_S, InstEmit.Fmaxp_S, OpCodeSimd.Create);
SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", InstName.Fmaxp_V, InstEmit.Fmaxp_V, OpCodeSimdReg.Create); SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", InstName.Fmaxp_V, InstEmit.Fmaxp_V, OpCodeSimdReg.Create);
SetA64("0110111000110000111110xxxxxxxxxx", InstName.Fmaxv_V, InstEmit.Fmaxv_V, OpCodeSimd.Create); SetA64("0110111000110000111110xxxxxxxxxx", InstName.Fmaxv_V, InstEmit.Fmaxv_V, OpCodeSimd.Create);
SetA64("000111100x1xxxxx010110xxxxxxxxxx", InstName.Fmin_S, InstEmit.Fmin_S, OpCodeSimdReg.Create); SetA64("000111100x1xxxxx010110xxxxxxxxxx", InstName.Fmin_S, InstEmit.Fmin_S, OpCodeSimdReg.Create);
@ -339,6 +340,7 @@ namespace ARMeilleure.Decoders
SetA64("011111101x110000110010xxxxxxxxxx", InstName.Fminnmp_S, InstEmit.Fminnmp_S, OpCodeSimd.Create); SetA64("011111101x110000110010xxxxxxxxxx", InstName.Fminnmp_S, InstEmit.Fminnmp_S, OpCodeSimd.Create);
SetA64("0>1011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnmp_V, InstEmit.Fminnmp_V, OpCodeSimdReg.Create); SetA64("0>1011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnmp_V, InstEmit.Fminnmp_V, OpCodeSimdReg.Create);
SetA64("0110111010110000110010xxxxxxxxxx", InstName.Fminnmv_V, InstEmit.Fminnmv_V, OpCodeSimd.Create); SetA64("0110111010110000110010xxxxxxxxxx", InstName.Fminnmv_V, InstEmit.Fminnmv_V, OpCodeSimd.Create);
SetA64("011111101x110000111110xxxxxxxxxx", InstName.Fminp_S, InstEmit.Fminp_S, OpCodeSimd.Create);
SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", InstName.Fminp_V, InstEmit.Fminp_V, OpCodeSimdReg.Create); SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", InstName.Fminp_V, InstEmit.Fminp_V, OpCodeSimdReg.Create);
SetA64("0110111010110000111110xxxxxxxxxx", InstName.Fminv_V, InstEmit.Fminv_V, OpCodeSimd.Create); SetA64("0110111010110000111110xxxxxxxxxx", InstName.Fminv_V, InstEmit.Fminv_V, OpCodeSimd.Create);
SetA64("010111111xxxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Se, InstEmit.Fmla_Se, OpCodeSimdRegElemF.Create); SetA64("010111111xxxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Se, InstEmit.Fmla_Se, OpCodeSimdRegElemF.Create);

View file

@ -883,6 +883,31 @@ namespace ARMeilleure.Instructions
} }
} }
public static void Fmaxp_S(ArmEmitterContext context)
{
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FmaxpS);
}
else if (Optimizations.FastFP && Optimizations.UseSse41)
{
EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
{
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
{
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
}, scalar: true, op1, op2);
});
}
else
{
EmitScalarPairwiseOpF(context, (op1, op2) =>
{
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2);
});
}
}
public static void Fmaxp_V(ArmEmitterContext context) public static void Fmaxp_V(ArmEmitterContext context)
{ {
if (Optimizations.UseAdvSimd) if (Optimizations.UseAdvSimd)
@ -1081,6 +1106,31 @@ namespace ARMeilleure.Instructions
} }
} }
public static void Fminp_S(ArmEmitterContext context)
{
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FminpS);
}
else if (Optimizations.FastFP && Optimizations.UseSse41)
{
EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
{
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
{
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
}, scalar: true, op1, op2);
});
}
else
{
EmitScalarPairwiseOpF(context, (op1, op2) =>
{
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2);
});
}
}
public static void Fminp_V(ArmEmitterContext context) public static void Fminp_V(ArmEmitterContext context)
{ {
if (Optimizations.UseAdvSimd) if (Optimizations.UseAdvSimd)

View file

@ -228,6 +228,7 @@ namespace ARMeilleure.Instructions
Fmaxnmp_S, Fmaxnmp_S,
Fmaxnmp_V, Fmaxnmp_V,
Fmaxnmv_V, Fmaxnmv_V,
Fmaxp_S,
Fmaxp_V, Fmaxp_V,
Fmaxv_V, Fmaxv_V,
Fmin_S, Fmin_S,
@ -237,6 +238,7 @@ namespace ARMeilleure.Instructions
Fminnmp_S, Fminnmp_S,
Fminnmp_V, Fminnmp_V,
Fminnmv_V, Fminnmv_V,
Fminp_S,
Fminp_V, Fminp_V,
Fminv_V, Fminv_V,
Fmla_Se, Fmla_Se,

View file

@ -29,7 +29,7 @@ namespace ARMeilleure.Translation.PTC
private const string OuterHeaderMagicString = "PTCohd\0\0"; private const string OuterHeaderMagicString = "PTCohd\0\0";
private const string InnerHeaderMagicString = "PTCihd\0\0"; private const string InnerHeaderMagicString = "PTCihd\0\0";
private const uint InternalVersion = 5343; //! To be incremented manually for each change to the ARMeilleure project. private const uint InternalVersion = 5502; //! To be incremented manually for each change to the ARMeilleure project.
private const string ActualDir = "0"; private const string ActualDir = "0";
private const string BackupDir = "1"; private const string BackupDir = "1";

View file

@ -764,7 +764,9 @@ namespace Ryujinx.Tests.Cpu
{ {
0x7E30D820u, // FADDP S0, V1.2S 0x7E30D820u, // FADDP S0, V1.2S
0x7E30C820u, // FMAXNMP S0, V1.2S 0x7E30C820u, // FMAXNMP S0, V1.2S
0x7E30F820u, // FMAXP S0, V1.2S
0x7EB0C820u, // FMINNMP S0, V1.2S 0x7EB0C820u, // FMINNMP S0, V1.2S
0x7EB0F820u, // FMINP S0, V1.2S
}; };
} }
@ -774,7 +776,9 @@ namespace Ryujinx.Tests.Cpu
{ {
0x7E70D820u, // FADDP D0, V1.2D 0x7E70D820u, // FADDP D0, V1.2D
0x7E70C820u, // FMAXNMP D0, V1.2D 0x7E70C820u, // FMAXNMP D0, V1.2D
0x7E70F820u, // FMAXP D0, V1.2D
0x7EF0C820u, // FMINNMP D0, V1.2D 0x7EF0C820u, // FMINNMP D0, V1.2D
0x7EF0F820u, // FMINP D0, V1.2D
}; };
} }