From 8e70b2d3fbec84b39babb7ecfb852c5488cd01f0 Mon Sep 17 00:00:00 2001
From: Merry <git@mary.rs>
Date: Sat, 5 Feb 2022 19:26:38 +0000
Subject: [PATCH] ARMeilleure: A32: Implement SHSUB8

---
 ARMeilleure/Decoders/OpCodeTable.cs       |  1 +
 ARMeilleure/Instructions/InstEmitAlu32.cs | 66 ++++++++++++++---------
 ARMeilleure/Instructions/InstName.cs      |  1 +
 Ryujinx.Tests/Cpu/CpuTestAlu32.cs         | 21 +++++++-
 4 files changed, 63 insertions(+), 26 deletions(-)

diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs
index 5500d8368..53328a735 100644
--- a/ARMeilleure/Decoders/OpCodeTable.cs
+++ b/ARMeilleure/Decoders/OpCodeTable.cs
@@ -734,6 +734,7 @@ namespace ARMeilleure.Decoders
             SetA32("<<<<0111101xxxxxxxxxxxxxx101xxxx", InstName.Sbfx,    InstEmit32.Sbfx,    OpCode32AluBf.Create);
             SetA32("<<<<01110001xxxx1111xxxx0001xxxx", InstName.Sdiv,    InstEmit32.Sdiv,    OpCode32AluMla.Create);
             SetA32("<<<<01100011xxxxxxxx11111001xxxx", InstName.Shadd8,  InstEmit32.Shadd8,  OpCode32AluReg.Create);
+            SetA32("<<<<01100011xxxxxxxx11111111xxxx", InstName.Shsub8,  InstEmit32.Shsub8,  OpCode32AluReg.Create);
             SetA32("<<<<00010000xxxxxxxxxxxx1xx0xxxx", InstName.Smla__,  InstEmit32.Smla__,  OpCode32AluMla.Create);
             SetA32("<<<<0000111xxxxxxxxxxxxx1001xxxx", InstName.Smlal,   InstEmit32.Smlal,   OpCode32AluUmull.Create);
             SetA32("<<<<00010100xxxxxxxxxxxx1xx0xxxx", InstName.Smlal__, InstEmit32.Smlal__, OpCode32AluUmull.Create);
diff --git a/ARMeilleure/Instructions/InstEmitAlu32.cs b/ARMeilleure/Instructions/InstEmitAlu32.cs
index 112748e14..66b8a8a7e 100644
--- a/ARMeilleure/Instructions/InstEmitAlu32.cs
+++ b/ARMeilleure/Instructions/InstEmitAlu32.cs
@@ -392,6 +392,11 @@ namespace ARMeilleure.Instructions
             EmitHadd8(context, false);
         }
 
+        public static void Shsub8(ArmEmitterContext context)
+        {
+            EmitHsub8(context, false);
+        }
+
         public static void Ssat(ArmEmitterContext context)
         {
             OpCode32Sat op = (OpCode32Sat)context.CurrOp;
@@ -484,31 +489,7 @@ namespace ARMeilleure.Instructions
 
         public static void Uhsub8(ArmEmitterContext context)
         {
-            OpCode32AluReg op = (OpCode32AluReg)context.CurrOp;
-
-            Operand m = GetIntA32(context, op.Rm);
-            Operand n = GetIntA32(context, op.Rn);
-
-            Operand left, right, res;
-
-            // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
-            // Note that x^y always contains the LSB of the result.
-            // Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
-
-            left = context.BitwiseExclusiveOr(m, n);
-            right = context.BitwiseAnd(left, m);
-            left = context.ShiftRightUI(left, Const(1));
-
-            // We must now perform a partitioned subtraction.
-            // We can do this because minuend contains 7 bit fields.
-            // We use the extra bit in minuend as a bit to borrow from; we set this bit.
-            // We invert this bit at the end as this tells us if that bit was borrowed from.
-
-            res = context.BitwiseOr(left, Const(0x80808080));
-            res = context.Subtract(res, right);
-            res = context.BitwiseExclusiveOr(res, Const(0x80808080));
-
-            SetIntA32(context, op.Rd, res);
+            EmitHsub8(context, true);
         }
 
         public static void Usat(ArmEmitterContext context)
@@ -710,6 +691,41 @@ namespace ARMeilleure.Instructions
             SetIntA32(context, op.Rd, res);
         }
 
+        private static void EmitHsub8(ArmEmitterContext context, bool unsigned)
+        {
+            OpCode32AluReg op = (OpCode32AluReg)context.CurrOp;
+
+            Operand m = GetIntA32(context, op.Rm);
+            Operand n = GetIntA32(context, op.Rn);
+            Operand left, right, carry, res;
+
+            // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
+            // Note that x^y always contains the LSB of the result.
+            // Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
+
+            carry = context.BitwiseExclusiveOr(m, n);
+            left = context.ShiftRightUI(carry, Const(1));
+            right = context.BitwiseAnd(carry, m);
+
+            // We must now perform a partitioned subtraction.
+            // We can do this because minuend contains 7 bit fields.
+            // We use the extra bit in minuend as a bit to borrow from; we set this bit.
+            // We invert this bit at the end as this tells us if that bit was borrowed from.
+
+            res = context.BitwiseOr(left, Const(0x80808080));
+            res = context.Subtract(res, right);
+            res = context.BitwiseExclusiveOr(res, Const(0x80808080));
+
+            if (!unsigned)
+            {
+                // We then sign extend the result into this bit.
+                carry = context.BitwiseAnd(carry, Const(0x80808080));
+                res = context.BitwiseExclusiveOr(res, carry);
+            }
+
+            SetIntA32(context, op.Rd, res);
+        }
+
         private static void EmitSat(ArmEmitterContext context, int intMin, int intMax)
         {
             OpCode32Sat op = (OpCode32Sat)context.CurrOp;
diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs
index 4503be818..3e0164958 100644
--- a/ARMeilleure/Instructions/InstName.cs
+++ b/ARMeilleure/Instructions/InstName.cs
@@ -80,6 +80,7 @@ namespace ARMeilleure.Instructions
         Sbcs,
         Sbfm,
         Sdiv,
+        Shsub8,
         Smaddl,
         Smsubl,
         Smulh,
diff --git a/Ryujinx.Tests/Cpu/CpuTestAlu32.cs b/Ryujinx.Tests/Cpu/CpuTestAlu32.cs
index 9a664c4d4..170bf98bd 100644
--- a/Ryujinx.Tests/Cpu/CpuTestAlu32.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestAlu32.cs
@@ -96,6 +96,25 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn();
         }
 
+        [Test, Pairwise]
+        public void Shsub8([Values(0u, 0xdu)] uint rd,
+                           [Values(1u)] uint rm,
+                           [Values(2u)] uint rn,
+                           [Random(RndCnt)] uint w0,
+                           [Random(RndCnt)] uint w1,
+                           [Random(RndCnt)] uint w2)
+        {
+            uint opcode = 0xE6300FF0u; // SHSUB8 R0, R0, R0
+
+            opcode |= ((rm & 15) << 0) | ((rd & 15) << 12) | ((rn & 15) << 16);
+
+            uint sp = TestContext.CurrentContext.Random.NextUInt();
+
+            SingleOpcode(opcode, r0: w0, r1: w1, r2: w2, sp: sp);
+
+            CompareAgainstUnicorn();
+        }
+
         [Test, Pairwise]
         public void Ssat_Usat([ValueSource("_Ssat_Usat_")] uint opcode,
                               [Values(0u, 0xdu)] uint rd,
@@ -158,7 +177,7 @@ namespace Ryujinx.Tests.Cpu
                            [Random(RndCnt)] uint w1,
                            [Random(RndCnt)] uint w2)
         {
-            uint opcode = 0xE6700FF0u; //UHSUB8 R0, R0, R0
+            uint opcode = 0xE6700FF0u; // UHSUB8 R0, R0, R0
 
             opcode |= ((rm & 15) << 0) | ((rd & 15) << 12) | ((rn & 15) << 16);