Implement a Macro JIT (#1445)

* Implement a Macro JIT

* Nit: space
This commit is contained in:
gdkchan 2020-08-02 22:36:57 -03:00 committed by GitHub
parent c11855565e
commit 60db4c3530
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 774 additions and 159 deletions

View file

@ -13,6 +13,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
class GPFifoClass : IDeviceState class GPFifoClass : IDeviceState
{ {
private readonly GpuContext _context; private readonly GpuContext _context;
private readonly GPFifoProcessor _parent;
private readonly DeviceState<GPFifoClassState> _state; private readonly DeviceState<GPFifoClassState> _state;
private const int MacrosCount = 0x80; private const int MacrosCount = 0x80;
@ -24,18 +25,15 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
private readonly Macro[] _macros; private readonly Macro[] _macros;
private readonly int[] _macroCode; private readonly int[] _macroCode;
/// <summary>
/// MME Shadow RAM Control.
/// </summary>
public ShadowRamControl ShadowCtrl { get; private set; }
/// <summary> /// <summary>
/// Creates a new instance of the GPU General Purpose FIFO class. /// Creates a new instance of the GPU General Purpose FIFO class.
/// </summary> /// </summary>
/// <param name="context">GPU context</param> /// <param name="context">GPU context</param>
public GPFifoClass(GpuContext context) /// <param name="parent">Parent GPU General Purpose FIFO processor</param>
public GPFifoClass(GpuContext context, GPFifoProcessor parent)
{ {
_context = context; _context = context;
_parent = parent;
_state = new DeviceState<GPFifoClassState>(new Dictionary<string, RwCallback> _state = new DeviceState<GPFifoClassState>(new Dictionary<string, RwCallback>
{ {
{ nameof(GPFifoClassState.Semaphored), new RwCallback(Semaphored, null) }, { nameof(GPFifoClassState.Semaphored), new RwCallback(Semaphored, null) },
@ -155,7 +153,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
} }
/// <summary> /// <summary>
/// Send macro code/data to the MME /// Sends macro code/data to the MME.
/// </summary> /// </summary>
/// <param name="argument">Method call argument</param> /// <param name="argument">Method call argument</param>
public void LoadMmeInstructionRam(int argument) public void LoadMmeInstructionRam(int argument)
@ -164,7 +162,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
} }
/// <summary> /// <summary>
/// Bind a macro index to a position for the MME /// Binds a macro index to a position for the MME
/// </summary> /// </summary>
/// <param name="argument">Method call argument</param> /// <param name="argument">Method call argument</param>
public void LoadMmeStartAddressRam(int argument) public void LoadMmeStartAddressRam(int argument)
@ -173,12 +171,12 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
} }
/// <summary> /// <summary>
/// Change the shadow RAM setting /// Changes the shadow RAM control.
/// </summary> /// </summary>
/// <param name="argument">Method call argument</param> /// <param name="argument">Method call argument</param>
public void SetMmeShadowRamControl(int argument) public void SetMmeShadowRamControl(int argument)
{ {
ShadowCtrl = (ShadowRamControl)argument; _parent.SetShadowRamControl((ShadowRamControl)argument);
} }
/// <summary> /// <summary>
@ -208,7 +206,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
/// <param name="state">Current GPU state</param> /// <param name="state">Current GPU state</param>
public void CallMme(int index, GpuState state) public void CallMme(int index, GpuState state)
{ {
_macros[index].Execute(_macroCode, ShadowCtrl, state); _macros[index].Execute(_macroCode, state);
} }
} }
} }

View file

@ -39,8 +39,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
{ {
_context = context; _context = context;
_fifoClass = new GPFifoClass(context); _fifoClass = new GPFifoClass(context, this);
_subChannels = new GpuState[8]; _subChannels = new GpuState[8];
for (int index = 0; index < _subChannels.Length; index++) for (int index = 0; index < _subChannels.Length; index++)
@ -152,7 +151,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
} }
else if (meth.Method < 0xe00) else if (meth.Method < 0xe00)
{ {
_subChannels[meth.SubChannel].CallMethod(meth, _fifoClass.ShadowCtrl); _subChannels[meth.SubChannel].CallMethod(meth);
} }
else else
{ {
@ -175,5 +174,17 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
} }
} }
} }
/// <summary>
/// Sets the shadow ram control value of all sub-channels.
/// </summary>
/// <param name="control">New shadow ram control value</param>
public void SetShadowRamControl(ShadowRamControl control)
{
for (int i = 0; i < _subChannels.Length; i++)
{
_subChannels[i].ShadowRamControl = control;
}
}
} }
} }

View file

@ -0,0 +1,15 @@
namespace Ryujinx.Graphics.Gpu.Engine.MME
{
/// <summary>
/// GPU Macro Arithmetic and Logic unit operation.
/// </summary>
enum AluOperation
{
AluReg = 0,
AddImmediate = 1,
BitfieldReplace = 2,
BitfieldExtractLslImm = 3,
BitfieldExtractLslReg = 4,
ReadImmediate = 5
}
}

View file

@ -0,0 +1,18 @@
namespace Ryujinx.Graphics.Gpu.Engine.MME
{
/// <summary>
/// GPU Macro Arithmetic and Logic unit binary register-to-register operation.
/// </summary>
enum AluRegOperation
{
Add = 0,
AddWithCarry = 1,
Subtract = 2,
SubtractWithBorrow = 3,
BitwiseExclusiveOr = 8,
BitwiseOr = 9,
BitwiseAnd = 10,
BitwiseAndNot = 11,
BitwiseNotAnd = 12
}
}

View file

@ -0,0 +1,17 @@
namespace Ryujinx.Graphics.Gpu.Engine.MME
{
/// <summary>
/// GPU Macro assignment operation.
/// </summary>
enum AssignmentOperation
{
IgnoreAndFetch = 0,
Move = 1,
MoveAndSetMaddr = 2,
FetchAndSend = 3,
MoveAndSend = 4,
FetchAndSetMaddr = 5,
MoveAndSetMaddrThenFetchAndSend = 6,
MoveAndSetMaddrThenSendHigh = 7
}
}

View file

@ -0,0 +1,25 @@
using Ryujinx.Graphics.Gpu.State;
using System;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Gpu.Engine.MME
{
/// <summary>
/// Macro Execution Engine interface.
/// </summary>
interface IMacroEE
{
/// <summary>
/// Arguments FIFO.
/// </summary>
public Queue<int> Fifo { get; }
/// <summary>
/// Should execute the GPU Macro code being passed.
/// </summary>
/// <param name="code">Code to be executed</param>
/// <param name="state">GPU state at the time of the call</param>
/// <param name="arg0">First argument to be passed to the GPU Macro</param>
void Execute(ReadOnlySpan<int> code, GpuState state, int arg0);
}
}

View file

@ -1,4 +1,5 @@
using Ryujinx.Graphics.Gpu.State; using Ryujinx.Graphics.Gpu.State;
using System;
namespace Ryujinx.Graphics.Gpu.Engine.MME namespace Ryujinx.Graphics.Gpu.Engine.MME
{ {
@ -15,7 +16,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
private bool _executionPending; private bool _executionPending;
private int _argument; private int _argument;
private readonly MacroInterpreter _interpreter; private readonly IMacroEE _executionEngine;
/// <summary> /// <summary>
/// Creates a new instance of the GPU cached macro program. /// Creates a new instance of the GPU cached macro program.
@ -28,7 +29,14 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
_executionPending = false; _executionPending = false;
_argument = 0; _argument = 0;
_interpreter = new MacroInterpreter(); if (GraphicsConfig.EnableMacroJit)
{
_executionEngine = new MacroJit();
}
else
{
_executionEngine = new MacroInterpreter();
}
} }
/// <summary> /// <summary>
@ -45,15 +53,15 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
/// <summary> /// <summary>
/// Starts executing the macro program code. /// Starts executing the macro program code.
/// </summary> /// </summary>
/// <param name="mme">Program code</param> /// <param name="code">Program code</param>
/// <param name="state">Current GPU state</param> /// <param name="state">Current GPU state</param>
public void Execute(int[] mme, ShadowRamControl shadowCtrl, GpuState state) public void Execute(ReadOnlySpan<int> code, GpuState state)
{ {
if (_executionPending) if (_executionPending)
{ {
_executionPending = false; _executionPending = false;
_interpreter?.Execute(mme, Position, _argument, shadowCtrl, state); _executionEngine?.Execute(code.Slice(Position), state, _argument);
} }
} }
@ -63,7 +71,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
/// <param name="argument">Argument to be pushed</param> /// <param name="argument">Argument to be pushed</param>
public void PushArgument(int argument) public void PushArgument(int argument)
{ {
_interpreter?.Fifo.Enqueue(argument); _executionEngine?.Fifo.Enqueue(argument);
} }
} }
} }

View file

@ -3,48 +3,16 @@ using Ryujinx.Graphics.Gpu.State;
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
namespace Ryujinx.Graphics.Gpu namespace Ryujinx.Graphics.Gpu.Engine.MME
{ {
/// <summary> /// <summary>
/// Macro code interpreter. /// Macro code interpreter.
/// </summary> /// </summary>
class MacroInterpreter class MacroInterpreter : IMacroEE
{ {
private enum AssignmentOperation /// <summary>
{ /// Arguments FIFO.
IgnoreAndFetch = 0, /// </summary>
Move = 1,
MoveAndSetMaddr = 2,
FetchAndSend = 3,
MoveAndSend = 4,
FetchAndSetMaddr = 5,
MoveAndSetMaddrThenFetchAndSend = 6,
MoveAndSetMaddrThenSendHigh = 7
}
private enum AluOperation
{
AluReg = 0,
AddImmediate = 1,
BitfieldReplace = 2,
BitfieldExtractLslImm = 3,
BitfieldExtractLslReg = 4,
ReadImmediate = 5
}
private enum AluRegOperation
{
Add = 0,
AddWithCarry = 1,
Subtract = 2,
SubtractWithBorrow = 3,
BitwiseExclusiveOr = 8,
BitwiseOr = 9,
BitwiseAnd = 10,
BitwiseAndNot = 11,
BitwiseNotAnd = 12
}
public Queue<int> Fifo { get; } public Queue<int> Fifo { get; }
private int[] _gprs; private int[] _gprs;
@ -55,15 +23,12 @@ namespace Ryujinx.Graphics.Gpu
private bool _carry; private bool _carry;
private int _opCode; private int _opCode;
private int _pipeOp; private int _pipeOp;
private bool _ignoreExitFlag; private bool _ignoreExitFlag;
private int _pc; private int _pc;
private ShadowRamControl _shadowCtrl;
/// <summary> /// <summary>
/// Creates a new instance of the macro code interpreter. /// Creates a new instance of the macro code interpreter.
/// </summary> /// </summary>
@ -77,28 +42,24 @@ namespace Ryujinx.Graphics.Gpu
/// <summary> /// <summary>
/// Executes a macro program until it exits. /// Executes a macro program until it exits.
/// </summary> /// </summary>
/// <param name="mme">Code of the program to execute</param> /// <param name="code">Code of the program to execute</param>
/// <param name="position">Start position to execute</param>
/// <param name="param">Optional argument passed to the program, 0 if not used</param>
/// <param name="shadowCtrl">Shadow RAM control register value</param>
/// <param name="state">Current GPU state</param> /// <param name="state">Current GPU state</param>
public void Execute(int[] mme, int position, int param, ShadowRamControl shadowCtrl, GpuState state) /// <param name="arg0">Optional argument passed to the program, 0 if not used</param>
public void Execute(ReadOnlySpan<int> code, GpuState state, int arg0)
{ {
Reset(); Reset();
_gprs[1] = param; _gprs[1] = arg0;
_pc = position; _pc = 0;
_shadowCtrl = shadowCtrl; FetchOpCode(code);
FetchOpCode(mme); while (Step(code, state)) ;
while (Step(mme, state));
// Due to the delay slot, we still need to execute // Due to the delay slot, we still need to execute
// one more instruction before we actually exit. // one more instruction before we actually exit.
Step(mme, state); Step(code, state);
} }
/// <summary> /// <summary>
@ -121,14 +82,14 @@ namespace Ryujinx.Graphics.Gpu
/// <summary> /// <summary>
/// Executes a single instruction of the program. /// Executes a single instruction of the program.
/// </summary> /// </summary>
/// <param name="mme">Program code to execute</param> /// <param name="code">Program code to execute</param>
/// <param name="state">Current GPU state</param> /// <param name="state">Current GPU state</param>
/// <returns>True to continue execution, false if the program exited</returns> /// <returns>True to continue execution, false if the program exited</returns>
private bool Step(int[] mme, GpuState state) private bool Step(ReadOnlySpan<int> code, GpuState state)
{ {
int baseAddr = _pc - 1; int baseAddr = _pc - 1;
FetchOpCode(mme); FetchOpCode(code);
if ((_opCode & 7) < 7) if ((_opCode & 7) < 7)
{ {
@ -141,83 +102,44 @@ namespace Ryujinx.Graphics.Gpu
{ {
// Fetch parameter and ignore result. // Fetch parameter and ignore result.
case AssignmentOperation.IgnoreAndFetch: case AssignmentOperation.IgnoreAndFetch:
{
SetDstGpr(FetchParam()); SetDstGpr(FetchParam());
break; break;
}
// Move result. // Move result.
case AssignmentOperation.Move: case AssignmentOperation.Move:
{
SetDstGpr(result); SetDstGpr(result);
break; break;
}
// Move result and use as Method Address. // Move result and use as Method Address.
case AssignmentOperation.MoveAndSetMaddr: case AssignmentOperation.MoveAndSetMaddr:
{
SetDstGpr(result); SetDstGpr(result);
SetMethAddr(result); SetMethAddr(result);
break; break;
}
// Fetch parameter and send result. // Fetch parameter and send result.
case AssignmentOperation.FetchAndSend: case AssignmentOperation.FetchAndSend:
{
SetDstGpr(FetchParam()); SetDstGpr(FetchParam());
Send(state, result); Send(state, result);
break; break;
}
// Move and send result. // Move and send result.
case AssignmentOperation.MoveAndSend: case AssignmentOperation.MoveAndSend:
{
SetDstGpr(result); SetDstGpr(result);
Send(state, result); Send(state, result);
break; break;
}
// Fetch parameter and use result as Method Address. // Fetch parameter and use result as Method Address.
case AssignmentOperation.FetchAndSetMaddr: case AssignmentOperation.FetchAndSetMaddr:
{
SetDstGpr(FetchParam()); SetDstGpr(FetchParam());
SetMethAddr(result); SetMethAddr(result);
break; break;
}
// Move result and use as Method Address, then fetch and send parameter. // Move result and use as Method Address, then fetch and send parameter.
case AssignmentOperation.MoveAndSetMaddrThenFetchAndSend: case AssignmentOperation.MoveAndSetMaddrThenFetchAndSend:
{
SetDstGpr(result); SetDstGpr(result);
SetMethAddr(result); SetMethAddr(result);
Send(state, FetchParam()); Send(state, FetchParam());
break; break;
}
// Move result and use as Method Address, then send bits 17:12 of result. // Move result and use as Method Address, then send bits 17:12 of result.
case AssignmentOperation.MoveAndSetMaddrThenSendHigh: case AssignmentOperation.MoveAndSetMaddrThenSendHigh:
{
SetDstGpr(result); SetDstGpr(result);
SetMethAddr(result); SetMethAddr(result);
Send(state, (result >> 12) & 0x3f); Send(state, (result >> 12) & 0x3f);
break; break;
}
} }
} }
else else
@ -237,7 +159,7 @@ namespace Ryujinx.Graphics.Gpu
if (noDelays) if (noDelays)
{ {
FetchOpCode(mme); FetchOpCode(code);
} }
else else
{ {
@ -259,11 +181,11 @@ namespace Ryujinx.Graphics.Gpu
/// <summary> /// <summary>
/// Fetches a single operation code from the program code. /// Fetches a single operation code from the program code.
/// </summary> /// </summary>
/// <param name="mme">Program code</param> /// <param name="code">Program code</param>
private void FetchOpCode(int[] mme) private void FetchOpCode(ReadOnlySpan<int> code)
{ {
_opCode = _pipeOp; _opCode = _pipeOp;
_pipeOp = mme[_pc++]; _pipeOp = code[_pc++];
} }
/// <summary> /// <summary>
@ -278,23 +200,16 @@ namespace Ryujinx.Graphics.Gpu
switch (op) switch (op)
{ {
case AluOperation.AluReg: case AluOperation.AluReg:
{ return GetAluResult((AluRegOperation)((_opCode >> 17) & 0x1f), GetGprA(), GetGprB());
AluRegOperation aluOp = (AluRegOperation)((_opCode >> 17) & 0x1f);
return GetAluResult(aluOp, GetGprA(), GetGprB());
}
case AluOperation.AddImmediate: case AluOperation.AddImmediate:
{
return GetGprA() + GetImm(); return GetGprA() + GetImm();
}
case AluOperation.BitfieldReplace: case AluOperation.BitfieldReplace:
case AluOperation.BitfieldExtractLslImm: case AluOperation.BitfieldExtractLslImm:
case AluOperation.BitfieldExtractLslReg: case AluOperation.BitfieldExtractLslReg:
{
int bfSrcBit = (_opCode >> 17) & 0x1f; int bfSrcBit = (_opCode >> 17) & 0x1f;
int bfSize = (_opCode >> 22) & 0x1f; int bfSize = (_opCode >> 22) & 0x1f;
int bfDstBit = (_opCode >> 27) & 0x1f; int bfDstBit = (_opCode >> 27) & 0x1f;
int bfMask = (1 << bfSize) - 1; int bfMask = (1 << bfSize) - 1;
@ -305,7 +220,6 @@ namespace Ryujinx.Graphics.Gpu
switch (op) switch (op)
{ {
case AluOperation.BitfieldReplace: case AluOperation.BitfieldReplace:
{
src = (int)((uint)src >> bfSrcBit) & bfMask; src = (int)((uint)src >> bfSrcBit) & bfMask;
dst &= ~(bfMask << bfDstBit); dst &= ~(bfMask << bfDstBit);
@ -313,33 +227,25 @@ namespace Ryujinx.Graphics.Gpu
dst |= src << bfDstBit; dst |= src << bfDstBit;
return dst; return dst;
}
case AluOperation.BitfieldExtractLslImm: case AluOperation.BitfieldExtractLslImm:
{
src = (int)((uint)src >> dst) & bfMask; src = (int)((uint)src >> dst) & bfMask;
return src << bfDstBit; return src << bfDstBit;
}
case AluOperation.BitfieldExtractLslReg: case AluOperation.BitfieldExtractLslReg:
{
src = (int)((uint)src >> bfSrcBit) & bfMask; src = (int)((uint)src >> bfSrcBit) & bfMask;
return src << dst; return src << dst;
}
} }
break; break;
}
case AluOperation.ReadImmediate: case AluOperation.ReadImmediate:
{
return Read(state, GetGprA() + GetImm()); return Read(state, GetGprA() + GetImm());
}
} }
throw new ArgumentException(nameof(_opCode)); throw new InvalidOperationException($"Invalid operation \"{op}\" on instruction 0x{_opCode:X8}.");
} }
/// <summary> /// <summary>
@ -351,52 +257,46 @@ namespace Ryujinx.Graphics.Gpu
/// <returns>Operation result</returns> /// <returns>Operation result</returns>
private int GetAluResult(AluRegOperation aluOp, int a, int b) private int GetAluResult(AluRegOperation aluOp, int a, int b)
{ {
ulong result;
switch (aluOp) switch (aluOp)
{ {
case AluRegOperation.Add: case AluRegOperation.Add:
{ result = (ulong)a + (ulong)b;
ulong result = (ulong)a + (ulong)b;
_carry = result > 0xffffffff; _carry = result > 0xffffffff;
return (int)result; return (int)result;
}
case AluRegOperation.AddWithCarry: case AluRegOperation.AddWithCarry:
{ result = (ulong)a + (ulong)b + (_carry ? 1UL : 0UL);
ulong result = (ulong)a + (ulong)b + (_carry ? 1UL : 0UL);
_carry = result > 0xffffffff; _carry = result > 0xffffffff;
return (int)result; return (int)result;
}
case AluRegOperation.Subtract: case AluRegOperation.Subtract:
{ result = (ulong)a - (ulong)b;
ulong result = (ulong)a - (ulong)b;
_carry = result < 0x100000000; _carry = result < 0x100000000;
return (int)result; return (int)result;
}
case AluRegOperation.SubtractWithBorrow: case AluRegOperation.SubtractWithBorrow:
{ result = (ulong)a - (ulong)b - (_carry ? 0UL : 1UL);
ulong result = (ulong)a - (ulong)b - (_carry ? 0UL : 1UL);
_carry = result < 0x100000000; _carry = result < 0x100000000;
return (int)result; return (int)result;
}
case AluRegOperation.BitwiseExclusiveOr: return a ^ b; case AluRegOperation.BitwiseExclusiveOr: return a ^ b;
case AluRegOperation.BitwiseOr: return a | b; case AluRegOperation.BitwiseOr: return a | b;
case AluRegOperation.BitwiseAnd: return a & b; case AluRegOperation.BitwiseAnd: return a & b;
case AluRegOperation.BitwiseAndNot: return a & ~b; case AluRegOperation.BitwiseAndNot: return a & ~b;
case AluRegOperation.BitwiseNotAnd: return ~(a & b); case AluRegOperation.BitwiseNotAnd: return ~(a & b);
} }
throw new ArgumentOutOfRangeException(nameof(aluOp)); throw new InvalidOperationException($"Invalid operation \"{aluOp}\" on instruction 0x{_opCode:X8}.");
} }
/// <summary> /// <summary>
@ -415,7 +315,7 @@ namespace Ryujinx.Graphics.Gpu
/// <param name="value">Packed address and increment value</param> /// <param name="value">Packed address and increment value</param>
private void SetMethAddr(int value) private void SetMethAddr(int value)
{ {
_methAddr = (value >> 0) & 0xfff; _methAddr = (value >> 0) & 0xfff;
_methIncr = (value >> 12) & 0x3f; _methIncr = (value >> 12) & 0x3f;
} }
@ -492,7 +392,7 @@ namespace Ryujinx.Graphics.Gpu
{ {
MethodParams meth = new MethodParams(_methAddr, value); MethodParams meth = new MethodParams(_methAddr, value);
state.CallMethod(meth, _shadowCtrl); state.CallMethod(meth);
_methAddr += _methIncr; _methAddr += _methIncr;
} }

View file

@ -0,0 +1,39 @@
using Ryujinx.Graphics.Gpu.State;
using System;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Gpu.Engine.MME
{
/// <summary>
/// Represents a execution engine that uses a Just-in-Time compiler for fast execution.
/// </summary>
class MacroJit : IMacroEE
{
private readonly MacroJitContext _context = new MacroJitContext();
/// <summary>
/// Arguments FIFO.
/// </summary>
public Queue<int> Fifo => _context.Fifo;
private MacroJitCompiler.MacroExecute _execute;
/// <summary>
/// Executes a macro program until it exits.
/// </summary>
/// <param name="code">Code of the program to execute</param>
/// <param name="state">Current GPU state</param>
/// <param name="arg0">Optional argument passed to the program, 0 if not used</param>
public void Execute(ReadOnlySpan<int> code, GpuState state, int arg0)
{
if (_execute == null)
{
MacroJitCompiler compiler = new MacroJitCompiler();
_execute = compiler.Compile(code);
}
_execute(_context, state, arg0);
}
}
}

View file

@ -0,0 +1,516 @@
using Ryujinx.Graphics.Gpu.State;
using System;
using System.Collections.Generic;
using System.Reflection.Emit;
namespace Ryujinx.Graphics.Gpu.Engine.MME
{
/// <summary>
/// Represents a Macro Just-in-Time compiler.
/// </summary>R
class MacroJitCompiler
{
private readonly DynamicMethod _meth;
private readonly ILGenerator _ilGen;
private readonly LocalBuilder[] _gprs;
private readonly LocalBuilder _carry;
private readonly LocalBuilder _methAddr;
private readonly LocalBuilder _methIncr;
/// <summary>
/// Creates a new instance of the Macro Just-in-Time compiler.
/// </summary>
public MacroJitCompiler()
{
_meth = new DynamicMethod("Macro", typeof(void), new Type[] { typeof(MacroJitContext), typeof(GpuState), typeof(int) });
_ilGen = _meth.GetILGenerator();
_gprs = new LocalBuilder[8];
for (int i = 1; i < 8; i++)
{
_gprs[i] = _ilGen.DeclareLocal(typeof(int));
}
_carry = _ilGen.DeclareLocal(typeof(int));
_methAddr = _ilGen.DeclareLocal(typeof(int));
_methIncr = _ilGen.DeclareLocal(typeof(int));
_ilGen.Emit(OpCodes.Ldarg_2);
_ilGen.Emit(OpCodes.Stloc, _gprs[1]);
}
public delegate void MacroExecute(MacroJitContext context, GpuState state, int arg0);
/// <summary>
/// Translates a new piece of GPU Macro code into host executable code.
/// </summary>
/// <param name="code">Code to be translated</param>
/// <returns>Delegate of the host compiled code</returns>
public MacroExecute Compile(ReadOnlySpan<int> code)
{
Dictionary<int, Label> labels = new Dictionary<int, Label>();
int lastTarget = 0;
int i;
// Collect all branch targets.
for (i = 0; i < code.Length; i++)
{
int opCode = code[i];
if ((opCode & 7) == 7)
{
int target = i + (opCode >> 14);
if (!labels.ContainsKey(target))
{
labels.Add(target, _ilGen.DefineLabel());
}
if (lastTarget < target)
{
lastTarget = target;
}
}
bool exit = (opCode & 0x80) != 0;
if (exit && i >= lastTarget)
{
break;
}
}
// Code generation.
for (i = 0; i < code.Length; i++)
{
if (labels.TryGetValue(i, out Label label))
{
_ilGen.MarkLabel(label);
}
Emit(code, i, labels);
int opCode = code[i];
bool exit = (opCode & 0x80) != 0;
if (exit)
{
Emit(code, i + 1, labels);
_ilGen.Emit(OpCodes.Ret);
if (i >= lastTarget)
{
break;
}
}
}
if (i == code.Length)
{
_ilGen.Emit(OpCodes.Ret);
}
return (MacroExecute)_meth.CreateDelegate(typeof(MacroExecute));
}
/// <summary>
/// Emits IL equivalent to the Macro instruction at a given offset.
/// </summary>
/// <param name="code">GPU Macro code</param>
/// <param name="offset">Offset, in words, where the instruction is located</param>
/// <param name="labels">Labels for Macro branch targets, used by branch instructions</param>
private void Emit(ReadOnlySpan<int> code, int offset, Dictionary<int, Label> labels)
{
int opCode = code[offset];
if ((opCode & 7) < 7)
{
// Operation produces a value.
AssignmentOperation asgOp = (AssignmentOperation)((opCode >> 4) & 7);
EmitAluOp(opCode);
switch (asgOp)
{
// Fetch parameter and ignore result.
case AssignmentOperation.IgnoreAndFetch:
_ilGen.Emit(OpCodes.Pop);
EmitFetchParam();
EmitStoreDstGpr(opCode);
break;
// Move result.
case AssignmentOperation.Move:
EmitStoreDstGpr(opCode);
break;
// Move result and use as Method Address.
case AssignmentOperation.MoveAndSetMaddr:
_ilGen.Emit(OpCodes.Dup);
EmitStoreDstGpr(opCode);
EmitStoreMethAddr();
break;
// Fetch parameter and send result.
case AssignmentOperation.FetchAndSend:
EmitFetchParam();
EmitStoreDstGpr(opCode);
EmitSend();
break;
// Move and send result.
case AssignmentOperation.MoveAndSend:
_ilGen.Emit(OpCodes.Dup);
EmitStoreDstGpr(opCode);
EmitSend();
break;
// Fetch parameter and use result as Method Address.
case AssignmentOperation.FetchAndSetMaddr:
EmitFetchParam();
EmitStoreDstGpr(opCode);
EmitStoreMethAddr();
break;
// Move result and use as Method Address, then fetch and send parameter.
case AssignmentOperation.MoveAndSetMaddrThenFetchAndSend:
_ilGen.Emit(OpCodes.Dup);
EmitStoreDstGpr(opCode);
EmitStoreMethAddr();
EmitFetchParam();
EmitSend();
break;
// Move result and use as Method Address, then send bits 17:12 of result.
case AssignmentOperation.MoveAndSetMaddrThenSendHigh:
_ilGen.Emit(OpCodes.Dup);
_ilGen.Emit(OpCodes.Dup);
EmitStoreDstGpr(opCode);
EmitStoreMethAddr();
_ilGen.Emit(OpCodes.Ldc_I4, 12);
_ilGen.Emit(OpCodes.Shr_Un);
_ilGen.Emit(OpCodes.Ldc_I4, 0x3f);
_ilGen.Emit(OpCodes.And);
EmitSend();
break;
}
}
else
{
// Branch.
bool onNotZero = ((opCode >> 4) & 1) != 0;
EmitLoadGprA(opCode);
Label lblSkip = _ilGen.DefineLabel();
if (onNotZero)
{
_ilGen.Emit(OpCodes.Brfalse, lblSkip);
}
else
{
_ilGen.Emit(OpCodes.Brtrue, lblSkip);
}
bool noDelays = (opCode & 0x20) != 0;
if (!noDelays)
{
Emit(code, offset + 1, labels);
}
int target = offset + (opCode >> 14);
_ilGen.Emit(OpCodes.Br, labels[target]);
_ilGen.MarkLabel(lblSkip);
}
}
/// <summary>
/// Emits IL for a Arithmetic and Logic Unit instruction.
/// </summary>
/// <param name="opCode">Instruction to be translated</param>
/// <exception cref="InvalidOperationException">Throw when the instruction encoding is invalid</exception>
private void EmitAluOp(int opCode)
{
AluOperation op = (AluOperation)(opCode & 7);
switch (op)
{
case AluOperation.AluReg:
EmitAluOp((AluRegOperation)((opCode >> 17) & 0x1f), opCode);
break;
case AluOperation.AddImmediate:
EmitLoadGprA(opCode);
EmitLoadImm(opCode);
_ilGen.Emit(OpCodes.Add);
break;
case AluOperation.BitfieldReplace:
case AluOperation.BitfieldExtractLslImm:
case AluOperation.BitfieldExtractLslReg:
int bfSrcBit = (opCode >> 17) & 0x1f;
int bfSize = (opCode >> 22) & 0x1f;
int bfDstBit = (opCode >> 27) & 0x1f;
int bfMask = (1 << bfSize) - 1;
switch (op)
{
case AluOperation.BitfieldReplace:
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.Ldc_I4, bfSrcBit);
_ilGen.Emit(OpCodes.Shr_Un);
_ilGen.Emit(OpCodes.Ldc_I4, bfMask);
_ilGen.Emit(OpCodes.And);
_ilGen.Emit(OpCodes.Ldc_I4, bfDstBit);
_ilGen.Emit(OpCodes.Shl);
EmitLoadGprA(opCode);
_ilGen.Emit(OpCodes.Ldc_I4, ~(bfMask << bfDstBit));
_ilGen.Emit(OpCodes.And);
_ilGen.Emit(OpCodes.Or);
break;
case AluOperation.BitfieldExtractLslImm:
EmitLoadGprB(opCode);
EmitLoadGprA(opCode);
_ilGen.Emit(OpCodes.Shr_Un);
_ilGen.Emit(OpCodes.Ldc_I4, bfMask);
_ilGen.Emit(OpCodes.And);
_ilGen.Emit(OpCodes.Ldc_I4, bfDstBit);
_ilGen.Emit(OpCodes.Shl);
break;
case AluOperation.BitfieldExtractLslReg:
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.Ldc_I4, bfSrcBit);
_ilGen.Emit(OpCodes.Shr_Un);
_ilGen.Emit(OpCodes.Ldc_I4, bfMask);
_ilGen.Emit(OpCodes.And);
EmitLoadGprA(opCode);
_ilGen.Emit(OpCodes.Shl);
break;
}
break;
case AluOperation.ReadImmediate:
_ilGen.Emit(OpCodes.Ldarg_1);
EmitLoadGprA(opCode);
EmitLoadImm(opCode);
_ilGen.Emit(OpCodes.Add);
_ilGen.Emit(OpCodes.Call, typeof(MacroJitContext).GetMethod(nameof(MacroJitContext.Read)));
break;
default:
throw new InvalidOperationException($"Invalid operation \"{op}\" on instruction 0x{opCode:X8}.");
}
}
/// <summary>
/// Emits IL for a binary Arithmetic and Logic Unit instruction.
/// </summary>
/// <param name="aluOp">Arithmetic and Logic Unit instruction</param>
/// <param name="opCode">Raw instruction</param>
/// <exception cref="InvalidOperationException">Throw when the instruction encoding is invalid</exception>
private void EmitAluOp(AluRegOperation aluOp, int opCode)
{
switch (aluOp)
{
case AluRegOperation.Add:
EmitLoadGprA(opCode);
_ilGen.Emit(OpCodes.Conv_U8);
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.Conv_U8);
_ilGen.Emit(OpCodes.Add);
_ilGen.Emit(OpCodes.Dup);
_ilGen.Emit(OpCodes.Ldc_I8, 0xffffffffL);
_ilGen.Emit(OpCodes.Cgt_Un);
_ilGen.Emit(OpCodes.Stloc, _carry);
_ilGen.Emit(OpCodes.Conv_U4);
break;
case AluRegOperation.AddWithCarry:
EmitLoadGprA(opCode);
_ilGen.Emit(OpCodes.Conv_U8);
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.Conv_U8);
_ilGen.Emit(OpCodes.Ldloc_S, _carry);
_ilGen.Emit(OpCodes.Conv_U8);
_ilGen.Emit(OpCodes.Add);
_ilGen.Emit(OpCodes.Add);
_ilGen.Emit(OpCodes.Dup);
_ilGen.Emit(OpCodes.Ldc_I8, 0xffffffffL);
_ilGen.Emit(OpCodes.Cgt_Un);
_ilGen.Emit(OpCodes.Stloc, _carry);
_ilGen.Emit(OpCodes.Conv_U4);
break;
case AluRegOperation.Subtract:
EmitLoadGprA(opCode);
_ilGen.Emit(OpCodes.Conv_U8);
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.Conv_U8);
_ilGen.Emit(OpCodes.Sub);
_ilGen.Emit(OpCodes.Dup);
_ilGen.Emit(OpCodes.Ldc_I8, 0x100000000L);
_ilGen.Emit(OpCodes.Clt_Un);
_ilGen.Emit(OpCodes.Stloc, _carry);
_ilGen.Emit(OpCodes.Conv_U4);
break;
case AluRegOperation.SubtractWithBorrow:
EmitLoadGprA(opCode);
_ilGen.Emit(OpCodes.Conv_U8);
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.Conv_U8);
_ilGen.Emit(OpCodes.Ldloc_S, _carry);
_ilGen.Emit(OpCodes.Conv_U8);
_ilGen.Emit(OpCodes.Neg);
_ilGen.Emit(OpCodes.Sub);
_ilGen.Emit(OpCodes.Add);
_ilGen.Emit(OpCodes.Dup);
_ilGen.Emit(OpCodes.Ldc_I8, 0x100000000L);
_ilGen.Emit(OpCodes.Clt_Un);
_ilGen.Emit(OpCodes.Stloc, _carry);
_ilGen.Emit(OpCodes.Conv_U4);
break;
case AluRegOperation.BitwiseExclusiveOr:
EmitLoadGprA(opCode);
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.Xor);
break;
case AluRegOperation.BitwiseOr:
EmitLoadGprA(opCode);
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.Or);
break;
case AluRegOperation.BitwiseAnd:
EmitLoadGprA(opCode);
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.And);
break;
case AluRegOperation.BitwiseAndNot:
EmitLoadGprA(opCode);
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.Not);
_ilGen.Emit(OpCodes.And);
break;
case AluRegOperation.BitwiseNotAnd:
EmitLoadGprA(opCode);
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.And);
_ilGen.Emit(OpCodes.Not);
break;
default:
throw new InvalidOperationException($"Invalid operation \"{aluOp}\" on instruction 0x{opCode:X8}.");
}
}
/// <summary>
/// Loads a immediate value on the IL evaluation stack.
/// </summary>
/// <param name="opCode">Instruction from where the immediate should be extracted</param>
private void EmitLoadImm(int opCode)
{
// Note: The immediate is signed, the sign-extension is intended here.
_ilGen.Emit(OpCodes.Ldc_I4, opCode >> 14);
}
/// <summary>
/// Loads a value from the General Purpose register specified as first operand on the IL evaluation stack.
/// </summary>
/// <param name="opCode">Instruction from where the register number should be extracted</param>
private void EmitLoadGprA(int opCode)
{
EmitLoadGpr((opCode >> 11) & 7);
}
/// <summary>
/// Loads a value from the General Purpose register specified as second operand on the IL evaluation stack.
/// </summary>
/// <param name="opCode">Instruction from where the register number should be extracted</param>
private void EmitLoadGprB(int opCode)
{
EmitLoadGpr((opCode >> 14) & 7);
}
/// <summary>
/// Loads a value a General Purpose register on the IL evaluation stack.
/// </summary>
/// <remarks>
/// Register number 0 has a hardcoded value of 0.
/// </remarks>
/// <param name="index">Register number</param>
private void EmitLoadGpr(int index)
{
if (index == 0)
{
_ilGen.Emit(OpCodes.Ldc_I4_0);
}
else
{
_ilGen.Emit(OpCodes.Ldloc_S, _gprs[index]);
}
}
/// <summary>
/// Emits a call to the method that fetches an argument from the arguments FIFO.
/// The argument is pushed into the IL evaluation stack.
/// </summary>
private void EmitFetchParam()
{
_ilGen.Emit(OpCodes.Ldarg_0);
_ilGen.Emit(OpCodes.Call, typeof(MacroJitContext).GetMethod(nameof(MacroJitContext.FetchParam)));
}
/// <summary>
/// Stores the value on the top of the IL evaluation stack into a General Purpose register.
/// </summary>
/// <remarks>
/// Register number 0 does not exist, reads are hardcoded to 0, and writes are simply discarded.
/// </remarks>
/// <param name="opCode">Instruction from where the register number should be extracted</param>
private void EmitStoreDstGpr(int opCode)
{
int index = (opCode >> 8) & 7;
if (index == 0)
{
_ilGen.Emit(OpCodes.Pop);
}
else
{
_ilGen.Emit(OpCodes.Stloc_S, _gprs[index]);
}
}
/// <summary>
/// Stores the value on the top of the IL evaluation stack as method address.
/// This will be used on subsequent send calls as the destination method address.
/// Additionally, the 6 bits starting at bit 12 will be used as increment value,
/// added to the method address after each sent value.
/// </summary>
private void EmitStoreMethAddr()
{
_ilGen.Emit(OpCodes.Dup);
_ilGen.Emit(OpCodes.Ldc_I4, 0xfff);
_ilGen.Emit(OpCodes.And);
_ilGen.Emit(OpCodes.Stloc_S, _methAddr);
_ilGen.Emit(OpCodes.Ldc_I4, 12);
_ilGen.Emit(OpCodes.Shr_Un);
_ilGen.Emit(OpCodes.Ldc_I4, 0x3f);
_ilGen.Emit(OpCodes.And);
_ilGen.Emit(OpCodes.Stloc_S, _methIncr);
}
/// <summary>
/// Sends the value on the top of the IL evaluation stack to the GPU,
/// using the current method address.
/// </summary>
private void EmitSend()
{
_ilGen.Emit(OpCodes.Ldarg_1);
_ilGen.Emit(OpCodes.Ldloc_S, _methAddr);
_ilGen.Emit(OpCodes.Call, typeof(MacroJitContext).GetMethod(nameof(MacroJitContext.Send)));
_ilGen.Emit(OpCodes.Ldloc_S, _methAddr);
_ilGen.Emit(OpCodes.Ldloc_S, _methIncr);
_ilGen.Emit(OpCodes.Add);
_ilGen.Emit(OpCodes.Stloc_S, _methAddr);
}
}
}

View file

@ -0,0 +1,57 @@
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.Gpu.State;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Gpu.Engine.MME
{
/// <summary>
/// Represents a Macro Just-in-Time compiler execution context.
/// </summary>
class MacroJitContext
{
/// <summary>
/// Arguments FIFO.
/// </summary>
public Queue<int> Fifo { get; } = new Queue<int>();
/// <summary>
/// Fetches a arguments from the arguments FIFO.
/// </summary>
/// <returns></returns>
public int FetchParam()
{
if (!Fifo.TryDequeue(out int value))
{
Logger.PrintWarning(LogClass.Gpu, "Macro attempted to fetch an inexistent argument.");
return 0;
}
return value;
}
/// <summary>
/// Reads data from a GPU register.
/// </summary>
/// <param name="state">Current GPU state</param>
/// <param name="reg">Register offset to read</param>
/// <returns>GPU register value</returns>
public static int Read(GpuState state, int reg)
{
return state.Read(reg);
}
/// <summary>
/// Performs a GPU method call.
/// </summary>
/// <param name="value">Call argument</param>
/// <param name="state">Current GPU state</param>
/// <param name="methAddr">Address, in words, of the method</param>
public static void Send(int value, GpuState state, int methAddr)
{
MethodParams meth = new MethodParams(methAddr, value);
state.CallMethod(meth);
}
}
}

View file

@ -27,5 +27,10 @@ namespace Ryujinx.Graphics.Gpu
/// This can avoid lower resolution on some games when GPU performance is poor. /// This can avoid lower resolution on some games when GPU performance is poor.
/// </summary> /// </summary>
public static bool FastGpuTime = true; public static bool FastGpuTime = true;
/// <summary>
/// Enables or disables the Just-in-Time compiler for GPU Macro code.
/// </summary>
public static bool EnableMacroJit = true;
} }
} }

View file

@ -32,6 +32,11 @@ namespace Ryujinx.Graphics.Gpu.State
private readonly Register[] _registers; private readonly Register[] _registers;
/// <summary>
/// Gets or sets the shadow ram control used for this sub-channel.
/// </summary>
public ShadowRamControl ShadowRamControl { get; set; }
/// <summary> /// <summary>
/// Creates a new instance of the GPU state. /// Creates a new instance of the GPU state.
/// </summary> /// </summary>
@ -72,14 +77,15 @@ namespace Ryujinx.Graphics.Gpu.State
/// Calls a GPU method, using this state. /// Calls a GPU method, using this state.
/// </summary> /// </summary>
/// <param name="meth">The GPU method to be called</param> /// <param name="meth">The GPU method to be called</param>
/// <param name="shadowCtrl">Shadow RAM control register value</param> public void CallMethod(MethodParams meth)
public void CallMethod(MethodParams meth, ShadowRamControl shadowCtrl)
{ {
int value = meth.Argument; int value = meth.Argument;
// Methods < 0x80 shouldn't be affected by shadow RAM at all. // Methods < 0x80 shouldn't be affected by shadow RAM at all.
if (meth.Method >= 0x80) if (meth.Method >= 0x80)
{ {
ShadowRamControl shadowCtrl = ShadowRamControl;
// TODO: Figure out what TrackWithFilter does, compared to Track. // TODO: Figure out what TrackWithFilter does, compared to Track.
if (shadowCtrl == ShadowRamControl.Track || if (shadowCtrl == ShadowRamControl.Track ||
shadowCtrl == ShadowRamControl.TrackWithFilter) shadowCtrl == ShadowRamControl.TrackWithFilter)