From f0a59f345c633b757ebd2a22fca23d7dab0f9f99 Mon Sep 17 00:00:00 2001
From: gdk <gab.dark.100@gmail.com>
Date: Thu, 14 Nov 2019 14:20:30 -0300
Subject: [PATCH] Add partial support for the BRX shader instruction

---
 Ryujinx.Graphics.Shader/Decoders/Block.cs     |  22 +--
 Ryujinx.Graphics.Shader/Decoders/Decoder.cs   | 140 ++++++++++++------
 .../{FmulScale.cs => FPMultiplyScale.cs}      |   2 +-
 .../Decoders/IOpCodeFArith.cs                 |   2 +-
 .../Decoders/OpCodeBranchIndir.cs             |  23 +++
 .../Decoders/OpCodeBranchPop.cs               |  15 ++
 .../Decoders/OpCodeFArith.cs                  |   4 +-
 .../Decoders/OpCodeFArithImm32.cs             |   2 +-
 .../Decoders/{OpCodeSsy.cs => OpCodePush.cs}  |   8 +-
 .../Decoders/OpCodeSync.cs                    |  15 --
 .../Decoders/OpCodeTable.cs                   |   9 +-
 .../Instructions/InstEmitFArith.cs            |  14 +-
 .../Instructions/InstEmitFlow.cs              |  60 ++++++--
 Ryujinx.Graphics.Shader/ShaderProgram.cs      |   5 +-
 .../Translation/Translator.cs                 |  47 +++---
 15 files changed, 242 insertions(+), 126 deletions(-)
 rename Ryujinx.Graphics.Shader/Decoders/{FmulScale.cs => FPMultiplyScale.cs} (89%)
 create mode 100644 Ryujinx.Graphics.Shader/Decoders/OpCodeBranchIndir.cs
 create mode 100644 Ryujinx.Graphics.Shader/Decoders/OpCodeBranchPop.cs
 rename Ryujinx.Graphics.Shader/Decoders/{OpCodeSsy.cs => OpCodePush.cs} (58%)
 delete mode 100644 Ryujinx.Graphics.Shader/Decoders/OpCodeSync.cs

diff --git a/Ryujinx.Graphics.Shader/Decoders/Block.cs b/Ryujinx.Graphics.Shader/Decoders/Block.cs
index b5e610d71..238b0bd6e 100644
--- a/Ryujinx.Graphics.Shader/Decoders/Block.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/Block.cs
@@ -11,15 +11,17 @@ namespace Ryujinx.Graphics.Shader.Decoders
         public Block Next   { get; set; }
         public Block Branch { get; set; }
 
-        public List<OpCode>    OpCodes    { get; }
-        public List<OpCodeSsy> SsyOpCodes { get; }
+        public OpCodeBranchIndir BrIndir { get; set; }
+
+        public List<OpCode>    OpCodes     { get; }
+        public List<OpCodePush> PushOpCodes { get; }
 
         public Block(ulong address)
         {
             Address = address;
 
-            OpCodes    = new List<OpCode>();
-            SsyOpCodes = new List<OpCodeSsy>();
+            OpCodes     = new List<OpCode>();
+            PushOpCodes = new List<OpCodePush>();
         }
 
         public void Split(Block rightBlock)
@@ -45,7 +47,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
 
             rightBlock.OpCodes.AddRange(OpCodes.GetRange(splitIndex, splitCount));
 
-            rightBlock.UpdateSsyOpCodes();
+            rightBlock.UpdatePushOps();
 
             EndAddress = rightBlock.Address;
 
@@ -54,7 +56,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
 
             OpCodes.RemoveRange(splitIndex, splitCount);
 
-            UpdateSsyOpCodes();
+            UpdatePushOps();
         }
 
         private static int BinarySearch(List<OpCode> opCodes, ulong address)
@@ -99,18 +101,18 @@ namespace Ryujinx.Graphics.Shader.Decoders
             return null;
         }
 
-        public void UpdateSsyOpCodes()
+        public void UpdatePushOps()
         {
-            SsyOpCodes.Clear();
+            PushOpCodes.Clear();
 
             for (int index = 0; index < OpCodes.Count; index++)
             {
-                if (!(OpCodes[index] is OpCodeSsy op))
+                if (!(OpCodes[index] is OpCodePush op))
                 {
                     continue;
                 }
 
-                SsyOpCodes.Add(op);
+                PushOpCodes.Add(op);
             }
         }
     }
diff --git a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs
index 4078440b1..6841c98d3 100644
--- a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs
@@ -43,9 +43,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
                 return block;
             }
 
-            ulong startAddress = headerSize;
-
-            GetBlock(startAddress);
+            GetBlock(0);
 
             while (workQueue.TryDequeue(out Block currBlock))
             {
@@ -67,7 +65,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
                 }
 
                 // If we have a block after the current one, set the limit address.
-                ulong limitAddress = (ulong)code.Length;
+                ulong limitAddress = (ulong)code.Length - headerSize;
 
                 if (nBlkIndex != blocks.Count)
                 {
@@ -85,13 +83,15 @@ namespace Ryujinx.Graphics.Shader.Decoders
                     }
                 }
 
-                FillBlock(code, currBlock, limitAddress, startAddress);
+                FillBlock(code, currBlock, limitAddress, headerSize);
 
                 if (currBlock.OpCodes.Count != 0)
                 {
-                    foreach (OpCodeSsy ssyOp in currBlock.SsyOpCodes)
+                    // We should have blocks for all possible branch targets,
+                    // including those from SSY/PBK instructions.
+                    foreach (OpCodePush pushOp in currBlock.PushOpCodes)
                     {
-                        GetBlock(ssyOp.GetAbsoluteAddress());
+                        GetBlock(pushOp.GetAbsoluteAddress());
                     }
 
                     // Set child blocks. "Branch" is the block the branch instruction
@@ -100,9 +100,25 @@ namespace Ryujinx.Graphics.Shader.Decoders
                     // or end of program, Next is null.
                     OpCode lastOp = currBlock.GetLastOp();
 
-                    if (lastOp is OpCodeBranch op)
+                    if (lastOp is OpCodeBranch opBr)
                     {
-                        currBlock.Branch = GetBlock(op.GetAbsoluteAddress());
+                        currBlock.Branch = GetBlock(opBr.GetAbsoluteAddress());
+                    }
+                    else if (lastOp is OpCodeBranchIndir opBrIndir)
+                    {
+                        // An indirect branch could go anywhere, we don't know the target.
+                        // Those instructions are usually used on a switch to jump table
+                        // compiler optimization, and in those cases the possible targets
+                        // seems to be always right after the BRX itself. We can assume
+                        // that the possible targets are all the blocks in-between the
+                        // instruction right after the BRX, and the common target that
+                        // all the "cases" should eventually jump to, acting as the
+                        // switch break.
+                        Block firstTarget = GetBlock(currBlock.EndAddress);
+
+                        firstTarget.BrIndir = opBrIndir;
+
+                        opBrIndir.PossibleTargets.Add(firstTarget);
                     }
 
                     if (!IsUnconditionalBranch(lastOp))
@@ -122,13 +138,28 @@ namespace Ryujinx.Graphics.Shader.Decoders
                 {
                     blocks.Add(currBlock);
                 }
+
+                // Do we have a block after the current one?
+                if (!IsExit(currBlock.GetLastOp()) && currBlock.BrIndir != null)
+                {
+                    bool targetVisited = visited.ContainsKey(currBlock.EndAddress);
+
+                    Block possibleTarget = GetBlock(currBlock.EndAddress);
+
+                    currBlock.BrIndir.PossibleTargets.Add(possibleTarget);
+
+                    if (!targetVisited)
+                    {
+                        possibleTarget.BrIndir = currBlock.BrIndir;
+                    }
+                }
             }
 
-            foreach (Block ssyBlock in blocks.Where(x => x.SsyOpCodes.Count != 0))
+            foreach (Block block in blocks.Where(x => x.PushOpCodes.Count != 0))
             {
-                for (int ssyIndex = 0; ssyIndex < ssyBlock.SsyOpCodes.Count; ssyIndex++)
+                for (int pushOpIndex = 0; pushOpIndex < block.PushOpCodes.Count; pushOpIndex++)
                 {
-                    PropagateSsy(visited, ssyBlock, ssyIndex);
+                    PropagatePushOp(visited, block, pushOpIndex);
                 }
             }
 
@@ -180,21 +211,21 @@ namespace Ryujinx.Graphics.Shader.Decoders
 
             do
             {
-                if (address >= limitAddress)
+                if (address + 7 >= limitAddress)
                 {
                     break;
                 }
 
                 // Ignore scheduling instructions, which are written every 32 bytes.
-                if (((address - startAddress) & 0x1f) == 0)
+                if ((address & 0x1f) == 0)
                 {
                     address += 8;
 
                     continue;
                 }
 
-                uint word0 = BinaryPrimitives.ReadUInt32LittleEndian(code.Slice((int)address));
-                uint word1 = BinaryPrimitives.ReadUInt32LittleEndian(code.Slice((int)address + 4));
+                uint word0 = BinaryPrimitives.ReadUInt32LittleEndian(code.Slice((int)(startAddress + address)));
+                uint word1 = BinaryPrimitives.ReadUInt32LittleEndian(code.Slice((int)(startAddress + address + 4)));
 
                 ulong opAddress = address;
 
@@ -221,7 +252,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
 
             block.EndAddress = address;
 
-            block.UpdateSsyOpCodes();
+            block.UpdatePushOps();
         }
 
         private static bool IsUnconditionalBranch(OpCode opCode)
@@ -242,10 +273,16 @@ namespace Ryujinx.Graphics.Shader.Decoders
         private static bool IsBranch(OpCode opCode)
         {
             return (opCode is OpCodeBranch opBranch && !opBranch.PushTarget) ||
-                    opCode is OpCodeSync ||
+                    opCode is OpCodeBranchIndir ||
+                    opCode is OpCodeBranchPop ||
                     opCode is OpCodeExit;
         }
 
+        private static bool IsExit(OpCode opCode)
+        {
+            return opCode is OpCodeExit;
+        }
+
         private static OpCode MakeOpCode(Type type, InstEmitter emitter, ulong address, long opCode)
         {
             if (type == null)
@@ -282,8 +319,8 @@ namespace Ryujinx.Graphics.Shader.Decoders
             private enum RestoreType
             {
                 None,
-                PopSsy,
-                PushSync
+                PopPushOp,
+                PushBranchOp
             }
 
             private RestoreType _restoreType;
@@ -299,45 +336,45 @@ namespace Ryujinx.Graphics.Shader.Decoders
                 _restoreValue = 0;
             }
 
-            public PathBlockState(int oldSsyStackSize)
+            public PathBlockState(int oldStackSize)
             {
                 Block         = null;
-                _restoreType  = RestoreType.PopSsy;
-                _restoreValue = (ulong)oldSsyStackSize;
+                _restoreType  = RestoreType.PopPushOp;
+                _restoreValue = (ulong)oldStackSize;
             }
 
             public PathBlockState(ulong syncAddress)
             {
                 Block         = null;
-                _restoreType  = RestoreType.PushSync;
+                _restoreType  = RestoreType.PushBranchOp;
                 _restoreValue = syncAddress;
             }
 
-            public void RestoreStackState(Stack<ulong> ssyStack)
+            public void RestoreStackState(Stack<ulong> branchStack)
             {
-                if (_restoreType == RestoreType.PushSync)
+                if (_restoreType == RestoreType.PushBranchOp)
                 {
-                    ssyStack.Push(_restoreValue);
+                    branchStack.Push(_restoreValue);
                 }
-                else if (_restoreType == RestoreType.PopSsy)
+                else if (_restoreType == RestoreType.PopPushOp)
                 {
-                    while (ssyStack.Count > (uint)_restoreValue)
+                    while (branchStack.Count > (uint)_restoreValue)
                     {
-                        ssyStack.Pop();
+                        branchStack.Pop();
                     }
                 }
             }
         }
 
-        private static void PropagateSsy(Dictionary<ulong, Block> blocks, Block ssyBlock, int ssyIndex)
+        private static void PropagatePushOp(Dictionary<ulong, Block> blocks, Block currBlock, int pushOpIndex)
         {
-            OpCodeSsy ssyOp = ssyBlock.SsyOpCodes[ssyIndex];
+            OpCodePush pushOp = currBlock.PushOpCodes[pushOpIndex];
 
             Stack<PathBlockState> workQueue = new Stack<PathBlockState>();
 
             HashSet<Block> visited = new HashSet<Block>();
 
-            Stack<ulong> ssyStack = new Stack<ulong>();
+            Stack<ulong> branchStack = new Stack<ulong>();
 
             void Push(PathBlockState pbs)
             {
@@ -347,32 +384,32 @@ namespace Ryujinx.Graphics.Shader.Decoders
                 }
             }
 
-            Push(new PathBlockState(ssyBlock));
+            Push(new PathBlockState(currBlock));
 
             while (workQueue.TryPop(out PathBlockState pbs))
             {
                 if (pbs.ReturningFromVisit)
                 {
-                    pbs.RestoreStackState(ssyStack);
+                    pbs.RestoreStackState(branchStack);
 
                     continue;
                 }
 
                 Block current = pbs.Block;
 
-                int ssyOpCodesCount = current.SsyOpCodes.Count;
+                int pushOpsCount = current.PushOpCodes.Count;
 
-                if (ssyOpCodesCount != 0)
+                if (pushOpsCount != 0)
                 {
-                    Push(new PathBlockState(ssyStack.Count));
+                    Push(new PathBlockState(branchStack.Count));
 
-                    for (int index = ssyIndex; index < ssyOpCodesCount; index++)
+                    for (int index = pushOpIndex; index < pushOpsCount; index++)
                     {
-                        ssyStack.Push(current.SsyOpCodes[index].GetAbsoluteAddress());
+                        branchStack.Push(current.PushOpCodes[index].GetAbsoluteAddress());
                     }
                 }
 
-                ssyIndex = 0;
+                pushOpIndex = 0;
 
                 if (current.Next != null)
                 {
@@ -383,17 +420,24 @@ namespace Ryujinx.Graphics.Shader.Decoders
                 {
                     Push(new PathBlockState(current.Branch));
                 }
-                else if (current.GetLastOp() is OpCodeSync op)
+                else if (current.GetLastOp() is OpCodeBranchIndir brIndir)
                 {
-                    ulong syncAddress = ssyStack.Pop();
-
-                    if (ssyStack.Count == 0)
+                    foreach (Block possibleTarget in brIndir.PossibleTargets)
                     {
-                        ssyStack.Push(syncAddress);
+                        Push(new PathBlockState(possibleTarget));
+                    }
+                }
+                else if (current.GetLastOp() is OpCodeBranchPop op)
+                {
+                    ulong syncAddress = branchStack.Pop();
 
-                        op.Targets.Add(ssyOp, op.Targets.Count);
+                    if (branchStack.Count == 0)
+                    {
+                        branchStack.Push(syncAddress);
 
-                        ssyOp.Syncs.TryAdd(op, Local());
+                        op.Targets.Add(pushOp, op.Targets.Count);
+
+                        pushOp.PopOps.TryAdd(op, Local());
                     }
                     else
                     {
diff --git a/Ryujinx.Graphics.Shader/Decoders/FmulScale.cs b/Ryujinx.Graphics.Shader/Decoders/FPMultiplyScale.cs
similarity index 89%
rename from Ryujinx.Graphics.Shader/Decoders/FmulScale.cs
rename to Ryujinx.Graphics.Shader/Decoders/FPMultiplyScale.cs
index c35c6e489..398c0e66f 100644
--- a/Ryujinx.Graphics.Shader/Decoders/FmulScale.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/FPMultiplyScale.cs
@@ -1,6 +1,6 @@
 namespace Ryujinx.Graphics.Shader.Decoders
 {
-    enum FmulScale
+    enum FPMultiplyScale
     {
         None      = 0,
         Divide2   = 1,
diff --git a/Ryujinx.Graphics.Shader/Decoders/IOpCodeFArith.cs b/Ryujinx.Graphics.Shader/Decoders/IOpCodeFArith.cs
index d68ccf593..3d06eae0d 100644
--- a/Ryujinx.Graphics.Shader/Decoders/IOpCodeFArith.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/IOpCodeFArith.cs
@@ -4,7 +4,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
     {
         RoundingMode RoundingMode { get; }
 
-        FmulScale Scale { get; }
+        FPMultiplyScale Scale { get; }
 
         bool FlushToZero { get; }
         bool AbsoluteA   { get; }
diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeBranchIndir.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeBranchIndir.cs
new file mode 100644
index 000000000..3e694e61c
--- /dev/null
+++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeBranchIndir.cs
@@ -0,0 +1,23 @@
+using Ryujinx.Graphics.Shader.Instructions;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+    class OpCodeBranchIndir : OpCode
+    {
+        public HashSet<Block> PossibleTargets { get; }
+
+        public Register Ra { get; }
+
+        public int Offset { get; }
+
+        public OpCodeBranchIndir(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode)
+        {
+            PossibleTargets = new HashSet<Block>();
+
+            Ra = new Register(opCode.Extract(8, 8), RegisterType.Gpr);
+
+            Offset = ((int)(opCode >> 20) << 8) >> 8;
+        }
+    }
+}
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeBranchPop.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeBranchPop.cs
new file mode 100644
index 000000000..7ea66fe45
--- /dev/null
+++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeBranchPop.cs
@@ -0,0 +1,15 @@
+using Ryujinx.Graphics.Shader.Instructions;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+    class OpCodeBranchPop : OpCode
+    {
+        public Dictionary<OpCodePush, int> Targets { get; }
+
+        public OpCodeBranchPop(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode)
+        {
+            Targets = new Dictionary<OpCodePush, int>();
+        }
+    }
+}
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeFArith.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeFArith.cs
index c88f7f0ee..cfbf65c3d 100644
--- a/Ryujinx.Graphics.Shader/Decoders/OpCodeFArith.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeFArith.cs
@@ -6,7 +6,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
     {
         public RoundingMode RoundingMode { get; }
 
-        public FmulScale Scale { get; }
+        public FPMultiplyScale Scale { get; }
 
         public bool FlushToZero { get; }
         public bool AbsoluteA   { get; }
@@ -15,7 +15,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
         {
             RoundingMode = (RoundingMode)opCode.Extract(39, 2);
 
-            Scale = (FmulScale)opCode.Extract(41, 3);
+            Scale = (FPMultiplyScale)opCode.Extract(41, 3);
 
             FlushToZero = opCode.Extract(44);
             AbsoluteA   = opCode.Extract(46);
diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeFArithImm32.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeFArithImm32.cs
index ec9da6f30..aecc5143c 100644
--- a/Ryujinx.Graphics.Shader/Decoders/OpCodeFArithImm32.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeFArithImm32.cs
@@ -7,7 +7,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
     {
         public RoundingMode RoundingMode => RoundingMode.ToNearest;
 
-        public FmulScale Scale => FmulScale.None;
+        public FPMultiplyScale Scale => FPMultiplyScale.None;
 
         public bool FlushToZero { get; }
         public bool AbsoluteA   { get; }
diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeSsy.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodePush.cs
similarity index 58%
rename from Ryujinx.Graphics.Shader/Decoders/OpCodeSsy.cs
rename to Ryujinx.Graphics.Shader/Decoders/OpCodePush.cs
index d3831e22d..a7657bcf8 100644
--- a/Ryujinx.Graphics.Shader/Decoders/OpCodeSsy.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/OpCodePush.cs
@@ -4,13 +4,13 @@ using System.Collections.Generic;
 
 namespace Ryujinx.Graphics.Shader.Decoders
 {
-    class OpCodeSsy : OpCodeBranch
+    class OpCodePush : OpCodeBranch
     {
-        public Dictionary<OpCodeSync, Operand> Syncs { get; }
+        public Dictionary<OpCodeBranchPop, Operand> PopOps { get; }
 
-        public OpCodeSsy(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode)
+        public OpCodePush(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode)
         {
-            Syncs = new Dictionary<OpCodeSync, Operand>();
+            PopOps = new Dictionary<OpCodeBranchPop, Operand>();
 
             Predicate = new Register(RegisterConsts.PredicateTrueIndex, RegisterType.Predicate);
 
diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeSync.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeSync.cs
deleted file mode 100644
index 081d08a0d..000000000
--- a/Ryujinx.Graphics.Shader/Decoders/OpCodeSync.cs
+++ /dev/null
@@ -1,15 +0,0 @@
-using Ryujinx.Graphics.Shader.Instructions;
-using System.Collections.Generic;
-
-namespace Ryujinx.Graphics.Shader.Decoders
-{
-    class OpCodeSync : OpCode
-    {
-        public Dictionary<OpCodeSsy, int> Targets { get; }
-
-        public OpCodeSync(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode)
-        {
-            Targets = new Dictionary<OpCodeSsy, int>();
-        }
-    }
-}
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs
index 58bd2b88e..bc30940d8 100644
--- a/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs
@@ -41,7 +41,8 @@ namespace Ryujinx.Graphics.Shader.Decoders
             Set("0101001111110x", InstEmit.Bfi,     typeof(OpCodeAluRegCbuf));
             Set("0101101111110x", InstEmit.Bfi,     typeof(OpCodeAluReg));
             Set("111000100100xx", InstEmit.Bra,     typeof(OpCodeBranch));
-            Set("111000110100xx", InstEmit.Brk,     typeof(OpCodeSync));
+            Set("111000110100xx", InstEmit.Brk,     typeof(OpCodeBranchPop));
+            Set("111000100101xx", InstEmit.Brx,     typeof(OpCodeBranchIndir));
             Set("0101000010100x", InstEmit.Csetp,   typeof(OpCodePsetp));
             Set("111000110000xx", InstEmit.Exit,    typeof(OpCodeExit));
             Set("0100110010101x", InstEmit.F2F,     typeof(OpCodeFArithCbuf));
@@ -137,7 +138,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
             Set("0101110010011x", InstEmit.Mov,     typeof(OpCodeAluReg));
             Set("0101000010000x", InstEmit.Mufu,    typeof(OpCodeFArith));
             Set("1111101111100x", InstEmit.Out,     typeof(OpCode));
-            Set("111000101010xx", InstEmit.Pbk,     typeof(OpCodeSsy));
+            Set("111000101010xx", InstEmit.Pbk,     typeof(OpCodePush));
             Set("0100110000001x", InstEmit.Popc,    typeof(OpCodeAluCbuf));
             Set("0011100x00001x", InstEmit.Popc,    typeof(OpCodeAluImm));
             Set("0101110000001x", InstEmit.Popc,    typeof(OpCodeAluReg));
@@ -157,12 +158,12 @@ namespace Ryujinx.Graphics.Shader.Decoders
             Set("0100110000101x", InstEmit.Shr,     typeof(OpCodeAluCbuf));
             Set("0011100x00101x", InstEmit.Shr,     typeof(OpCodeAluImm));
             Set("0101110000101x", InstEmit.Shr,     typeof(OpCodeAluReg));
-            Set("111000101001xx", InstEmit.Ssy,     typeof(OpCodeSsy));
+            Set("111000101001xx", InstEmit.Ssy,     typeof(OpCodePush));
             Set("1110111101010x", InstEmit.St,      typeof(OpCodeMemory));
             Set("1110111011011x", InstEmit.Stg,     typeof(OpCodeMemory));
             Set("1110111101011x", InstEmit.Sts,     typeof(OpCodeMemory));
             Set("11101011001xxx", InstEmit.Sust,    typeof(OpCodeImage));
-            Set("1111000011111x", InstEmit.Sync,    typeof(OpCodeSync));
+            Set("1111000011111x", InstEmit.Sync,    typeof(OpCodeBranchPop));
             Set("110000xxxx111x", InstEmit.Tex,     typeof(OpCodeTex));
             Set("1101111010111x", InstEmit.TexB,    typeof(OpCodeTexB));
             Set("1101x00xxxxxxx", InstEmit.Texs,    typeof(OpCodeTexs));
diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs
index 4f7072eb0..79d92c2d7 100644
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs
@@ -97,14 +97,14 @@ namespace Ryujinx.Graphics.Shader.Instructions
 
             switch (op.Scale)
             {
-                case FmulScale.None: break;
+                case FPMultiplyScale.None: break;
 
-                case FmulScale.Divide2:   srcA = context.FPDivide  (srcA, ConstF(2)); break;
-                case FmulScale.Divide4:   srcA = context.FPDivide  (srcA, ConstF(4)); break;
-                case FmulScale.Divide8:   srcA = context.FPDivide  (srcA, ConstF(8)); break;
-                case FmulScale.Multiply2: srcA = context.FPMultiply(srcA, ConstF(2)); break;
-                case FmulScale.Multiply4: srcA = context.FPMultiply(srcA, ConstF(4)); break;
-                case FmulScale.Multiply8: srcA = context.FPMultiply(srcA, ConstF(8)); break;
+                case FPMultiplyScale.Divide2:   srcA = context.FPDivide  (srcA, ConstF(2)); break;
+                case FPMultiplyScale.Divide4:   srcA = context.FPDivide  (srcA, ConstF(4)); break;
+                case FPMultiplyScale.Divide8:   srcA = context.FPDivide  (srcA, ConstF(8)); break;
+                case FPMultiplyScale.Multiply2: srcA = context.FPMultiply(srcA, ConstF(2)); break;
+                case FPMultiplyScale.Multiply4: srcA = context.FPMultiply(srcA, ConstF(4)); break;
+                case FPMultiplyScale.Multiply8: srcA = context.FPMultiply(srcA, ConstF(8)); break;
 
                 default: break; //TODO: Warning.
             }
diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs
index e17c9d6c6..4a9f5f7fc 100644
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs
@@ -20,6 +20,36 @@ namespace Ryujinx.Graphics.Shader.Instructions
             EmitBrkOrSync(context);
         }
 
+        public static void Brx(EmitterContext context)
+        {
+            OpCodeBranchIndir op = (OpCodeBranchIndir)context.CurrOp;
+
+            int offset = (int)op.Address + 8 + op.Offset;
+
+            Operand address = context.IAdd(Register(op.Ra), Const(offset));
+
+            // Sorting the target addresses in descending order improves the code,
+            // since it will always check the most distant targets first, then the
+            // near ones. This can be easily transformed into if/else statements.
+            IOrderedEnumerable<Block> sortedTargets = op.PossibleTargets.OrderByDescending(x => x.Address);
+
+            Block lastTarget = sortedTargets.LastOrDefault();
+
+            foreach (Block possibleTarget in sortedTargets)
+            {
+                Operand label = context.GetLabel(possibleTarget.Address);
+
+                if (possibleTarget != lastTarget)
+                {
+                    context.BranchIfTrue(label, context.ICompareEqual(address, Const((int)possibleTarget.Address)));
+                }
+                else
+                {
+                    context.Branch(label);
+                }
+            }
+        }
+
         public static void Exit(EmitterContext context)
         {
             OpCodeExit op = (OpCodeExit)context.CurrOp;
@@ -54,45 +84,45 @@ namespace Ryujinx.Graphics.Shader.Instructions
 
         private static void EmitPbkOrSsy(EmitterContext context)
         {
-            OpCodeSsy op = (OpCodeSsy)context.CurrOp;
+            OpCodePush op = (OpCodePush)context.CurrOp;
 
-            foreach (KeyValuePair<OpCodeSync, Operand> kv in op.Syncs)
+            foreach (KeyValuePair<OpCodeBranchPop, Operand> kv in op.PopOps)
             {
-                OpCodeSync opSync = kv.Key;
+                OpCodeBranchPop opSync = kv.Key;
 
                 Operand local = kv.Value;
 
-                int ssyIndex = opSync.Targets[op];
+                int pushOpIndex = opSync.Targets[op];
 
-                context.Copy(local, Const(ssyIndex));
+                context.Copy(local, Const(pushOpIndex));
             }
         }
 
         private static void EmitBrkOrSync(EmitterContext context)
         {
-            OpCodeSync op = (OpCodeSync)context.CurrOp;
+            OpCodeBranchPop op = (OpCodeBranchPop)context.CurrOp;
 
             if (op.Targets.Count == 1)
             {
-                // If we have only one target, then the SSY is basically
+                // If we have only one target, then the SSY/PBK is basically
                 // a branch, we can produce better codegen for this case.
-                OpCodeSsy opSsy = op.Targets.Keys.First();
+                OpCodePush pushOp = op.Targets.Keys.First();
 
-                EmitBranch(context, opSsy.GetAbsoluteAddress());
+                EmitBranch(context, pushOp.GetAbsoluteAddress());
             }
             else
             {
-                foreach (KeyValuePair<OpCodeSsy, int> kv in op.Targets)
+                foreach (KeyValuePair<OpCodePush, int> kv in op.Targets)
                 {
-                    OpCodeSsy opSsy = kv.Key;
+                    OpCodePush pushOp = kv.Key;
 
-                    Operand label = context.GetLabel(opSsy.GetAbsoluteAddress());
+                    Operand label = context.GetLabel(pushOp.GetAbsoluteAddress());
 
-                    Operand local = opSsy.Syncs[op];
+                    Operand local = pushOp.PopOps[op];
 
-                    int ssyIndex = kv.Value;
+                    int pushOpIndex = kv.Value;
 
-                    context.BranchIfTrue(label, context.ICompareEqual(local, Const(ssyIndex)));
+                    context.BranchIfTrue(label, context.ICompareEqual(local, Const(pushOpIndex)));
                 }
             }
         }
diff --git a/Ryujinx.Graphics.Shader/ShaderProgram.cs b/Ryujinx.Graphics.Shader/ShaderProgram.cs
index 5d04f2cf4..4d0c6e5bd 100644
--- a/Ryujinx.Graphics.Shader/ShaderProgram.cs
+++ b/Ryujinx.Graphics.Shader/ShaderProgram.cs
@@ -10,11 +10,14 @@ namespace Ryujinx.Graphics.Shader
 
         public string Code { get; private set; }
 
-        internal ShaderProgram(ShaderProgramInfo info, ShaderStage stage, string code)
+        public int Size { get; }
+
+        internal ShaderProgram(ShaderProgramInfo info, ShaderStage stage, string code, int size)
         {
             Info  = info;
             Stage = stage;
             Code  = code;
+            Size  = size;
         }
 
         public void Prepend(string line)
diff --git a/Ryujinx.Graphics.Shader/Translation/Translator.cs b/Ryujinx.Graphics.Shader/Translation/Translator.cs
index b7a5bffab..55617b24c 100644
--- a/Ryujinx.Graphics.Shader/Translation/Translator.cs
+++ b/Ryujinx.Graphics.Shader/Translation/Translator.cs
@@ -46,7 +46,12 @@ namespace Ryujinx.Graphics.Shader.Translation
             bool compute   = (translationConfig.Flags & TranslationFlags.Compute)   != 0;
             bool debugMode = (translationConfig.Flags & TranslationFlags.DebugMode) != 0;
 
-            Operation[] ops = DecodeShader(code, compute, debugMode, out ShaderHeader header);
+            Operation[] ops = DecodeShader(
+                code,
+                compute,
+                debugMode,
+                out ShaderHeader header,
+                out int size);
 
             ShaderStage stage;
 
@@ -76,15 +81,15 @@ namespace Ryujinx.Graphics.Shader.Translation
                 maxOutputVertexCount,
                 outputTopology);
 
-            return Translate(ops, config);
+            return Translate(ops, config, size);
         }
 
         public static ShaderProgram Translate(Span<byte> vpACode, Span<byte> vpBCode, TranslationConfig translationConfig)
         {
             bool debugMode = (translationConfig.Flags & TranslationFlags.DebugMode) != 0;
 
-            Operation[] vpAOps = DecodeShader(vpACode, compute: false, debugMode, out _);
-            Operation[] vpBOps = DecodeShader(vpBCode, compute: false, debugMode, out ShaderHeader header);
+            Operation[] vpAOps = DecodeShader(vpACode, compute: false, debugMode, out _, out _);
+            Operation[] vpBOps = DecodeShader(vpBCode, compute: false, debugMode, out ShaderHeader header, out int sizeB);
 
             ShaderConfig config = new ShaderConfig(
                 header.Stage,
@@ -93,10 +98,10 @@ namespace Ryujinx.Graphics.Shader.Translation
                 header.MaxOutputVertexCount,
                 header.OutputTopology);
 
-            return Translate(Combine(vpAOps, vpBOps), config);
+            return Translate(Combine(vpAOps, vpBOps), config, sizeB);
         }
 
-        private static ShaderProgram Translate(Operation[] ops, ShaderConfig config)
+        private static ShaderProgram Translate(Operation[] ops, ShaderConfig config, int size)
         {
             BasicBlock[] irBlocks = ControlFlowGraph.MakeCfg(ops);
 
@@ -122,17 +127,20 @@ namespace Ryujinx.Graphics.Shader.Translation
 
             string glslCode = program.Code;
 
-            return new ShaderProgram(spInfo, config.Stage, glslCode);
+            return new ShaderProgram(spInfo, config.Stage, glslCode, size);
         }
 
-        private static Operation[] DecodeShader(Span<byte> code, bool compute, bool debugMode, out ShaderHeader header)
+        private static Operation[] DecodeShader(
+            Span<byte>       code,
+            bool             compute,
+            bool             debugMode,
+            out ShaderHeader header,
+            out int          size)
         {
             Block[] cfg;
 
             EmitterContext context;
 
-            ulong headerSize;
-
             if (compute)
             {
                 header = null;
@@ -140,8 +148,6 @@ namespace Ryujinx.Graphics.Shader.Translation
                 cfg = Decoder.Decode(code, 0);
 
                 context = new EmitterContext(ShaderStage.Compute, header);
-
-                headerSize = 0;
             }
             else
             {
@@ -150,14 +156,19 @@ namespace Ryujinx.Graphics.Shader.Translation
                 cfg = Decoder.Decode(code, HeaderSize);
 
                 context = new EmitterContext(header.Stage, header);
-
-                headerSize = HeaderSize;
             }
 
+            ulong maxEndAddress = 0;
+
             for (int blkIndex = 0; blkIndex < cfg.Length; blkIndex++)
             {
                 Block block = cfg[blkIndex];
 
+                if (maxEndAddress < block.EndAddress)
+                {
+                    maxEndAddress = block.EndAddress;
+                }
+
                 context.CurrBlock = block;
 
                 context.MarkLabel(context.GetLabel(block.Address));
@@ -179,7 +190,7 @@ namespace Ryujinx.Graphics.Shader.Translation
                             instName = "???";
                         }
 
-                        string dbgComment = $"0x{(op.Address - headerSize):X6}: 0x{op.RawOpCode:X16} {instName}";
+                        string dbgComment = $"0x{op.Address:X6}: 0x{op.RawOpCode:X16} {instName}";
 
                         context.Add(new CommentNode(dbgComment));
                     }
@@ -193,13 +204,13 @@ namespace Ryujinx.Graphics.Shader.Translation
 
                     bool skipPredicateCheck = op.Emitter == InstEmit.Bra;
 
-                    if (op is OpCodeSync opSync)
+                    if (op is OpCodeBranchPop opBranchPop)
                     {
                         // If the instruction is a SYNC instruction with only one
                         // possible target address, then the instruction is basically
                         // just a simple branch, we can generate code similar to branch
                         // instructions, with the condition check on the branch itself.
-                        skipPredicateCheck |= opSync.Targets.Count < 2;
+                        skipPredicateCheck |= opBranchPop.Targets.Count < 2;
                     }
 
                     if (!(op.Predicate.IsPT || skipPredicateCheck))
@@ -243,6 +254,8 @@ namespace Ryujinx.Graphics.Shader.Translation
                 }
             }
 
+            size = (int)maxEndAddress + (compute ? 0 : HeaderSize);
+
             return context.GetOperations();
         }