aboutsummaryrefslogtreecommitdiff
path: root/src/Ryujinx.Graphics.Gpu/Engine/MME
diff options
context:
space:
mode:
authorTSR Berry <20988865+TSRBerry@users.noreply.github.com>2023-04-08 01:22:00 +0200
committerMary <thog@protonmail.com>2023-04-27 23:51:14 +0200
commitcee712105850ac3385cd0091a923438167433f9f (patch)
tree4a5274b21d8b7f938c0d0ce18736d3f2993b11b1 /src/Ryujinx.Graphics.Gpu/Engine/MME
parentcd124bda587ef09668a971fa1cac1c3f0cfc9f21 (diff)
Move solution and projects to src
Diffstat (limited to 'src/Ryujinx.Graphics.Gpu/Engine/MME')
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/AluOperation.cs15
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/AluRegOperation.cs18
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/AssignmentOperation.cs17
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs52
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs101
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs341
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs16
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs113
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs400
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs39
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitCompiler.cs517
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs55
12 files changed, 1684 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/AluOperation.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/AluOperation.cs
new file mode 100644
index 00000000..eeef9c67
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/AluOperation.cs
@@ -0,0 +1,15 @@
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// GPU Macro Arithmetic and Logic unit operation.
+ /// </summary>
+ enum AluOperation
+ {
+ AluReg = 0,
+ AddImmediate = 1,
+ BitfieldReplace = 2,
+ BitfieldExtractLslImm = 3,
+ BitfieldExtractLslReg = 4,
+ ReadImmediate = 5
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/AluRegOperation.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/AluRegOperation.cs
new file mode 100644
index 00000000..f3e05d38
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/AluRegOperation.cs
@@ -0,0 +1,18 @@
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// GPU Macro Arithmetic and Logic unit binary register-to-register operation.
+ /// </summary>
+ enum AluRegOperation
+ {
+ Add = 0,
+ AddWithCarry = 1,
+ Subtract = 2,
+ SubtractWithBorrow = 3,
+ BitwiseExclusiveOr = 8,
+ BitwiseOr = 9,
+ BitwiseAnd = 10,
+ BitwiseAndNot = 11,
+ BitwiseNotAnd = 12
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/AssignmentOperation.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/AssignmentOperation.cs
new file mode 100644
index 00000000..dc336026
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/AssignmentOperation.cs
@@ -0,0 +1,17 @@
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// GPU Macro assignment operation.
+ /// </summary>
+ enum AssignmentOperation
+ {
+ IgnoreAndFetch = 0,
+ Move = 1,
+ MoveAndSetMaddr = 2,
+ FetchAndSend = 3,
+ MoveAndSend = 4,
+ FetchAndSetMaddr = 5,
+ MoveAndSetMaddrThenFetchAndSend = 6,
+ MoveAndSetMaddrThenSendHigh = 7
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs
new file mode 100644
index 00000000..117961db
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs
@@ -0,0 +1,52 @@
+using Ryujinx.Graphics.Device;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// FIFO word.
+ /// </summary>
+ readonly struct FifoWord
+ {
+ /// <summary>
+ /// GPU virtual address where the word is located in memory.
+ /// </summary>
+ public ulong GpuVa { get; }
+
+ /// <summary>
+ /// Word value.
+ /// </summary>
+ public int Word { get; }
+
+ /// <summary>
+ /// Creates a new FIFO word.
+ /// </summary>
+ /// <param name="gpuVa">GPU virtual address where the word is located in memory</param>
+ /// <param name="word">Word value</param>
+ public FifoWord(ulong gpuVa, int word)
+ {
+ GpuVa = gpuVa;
+ Word = word;
+ }
+ }
+
+ /// <summary>
+ /// Macro Execution Engine interface.
+ /// </summary>
+ interface IMacroEE
+ {
+ /// <summary>
+ /// Arguments FIFO.
+ /// </summary>
+ Queue<FifoWord> Fifo { get; }
+
+ /// <summary>
+ /// Should execute the GPU Macro code being passed.
+ /// </summary>
+ /// <param name="code">Code to be executed</param>
+ /// <param name="state">GPU state at the time of the call</param>
+ /// <param name="arg0">First argument to be passed to the GPU Macro</param>
+ void Execute(ReadOnlySpan<int> code, IDeviceState state, int arg0);
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs
new file mode 100644
index 00000000..12a3ac02
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs
@@ -0,0 +1,101 @@
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.Gpu.Engine.GPFifo;
+using System;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// GPU macro program.
+ /// </summary>
+ struct Macro
+ {
+ /// <summary>
+ /// Word offset of the code on the code memory.
+ /// </summary>
+ public int Position { get; }
+
+ private IMacroEE _executionEngine;
+ private bool _executionPending;
+ private int _argument;
+ private MacroHLEFunctionName _hleFunction;
+
+ /// <summary>
+ /// Creates a new instance of the GPU cached macro program.
+ /// </summary>
+ /// <param name="position">Macro code start position</param>
+ public Macro(int position)
+ {
+ Position = position;
+
+ _executionEngine = null;
+ _executionPending = false;
+ _argument = 0;
+ _hleFunction = MacroHLEFunctionName.None;
+ }
+
+ /// <summary>
+ /// Sets the first argument for the macro call.
+ /// </summary>
+ /// <param name="context">GPU context where the macro code is being executed</param>
+ /// <param name="processor">GPU GP FIFO command processor</param>
+ /// <param name="code">Code to be executed</param>
+ /// <param name="argument">First argument</param>
+ public void StartExecution(GpuContext context, GPFifoProcessor processor, ReadOnlySpan<int> code, int argument)
+ {
+ _argument = argument;
+
+ _executionPending = true;
+
+ if (_executionEngine == null)
+ {
+ if (GraphicsConfig.EnableMacroHLE && MacroHLETable.TryGetMacroHLEFunction(code.Slice(Position), context.Capabilities, out _hleFunction))
+ {
+ _executionEngine = new MacroHLE(processor, _hleFunction);
+ }
+ else if (GraphicsConfig.EnableMacroJit)
+ {
+ _executionEngine = new MacroJit();
+ }
+ else
+ {
+ _executionEngine = new MacroInterpreter();
+ }
+ }
+
+ // We don't consume the parameter buffer value, so we don't need to flush it.
+ // Doing so improves performance if the value was written by a GPU shader.
+ if (_hleFunction == MacroHLEFunctionName.DrawElementsIndirect)
+ {
+ context.GPFifo.SetFlushSkips(1);
+ }
+ else if (_hleFunction == MacroHLEFunctionName.MultiDrawElementsIndirectCount)
+ {
+ context.GPFifo.SetFlushSkips(2);
+ }
+ }
+
+ /// <summary>
+ /// Starts executing the macro program code.
+ /// </summary>
+ /// <param name="code">Program code</param>
+ /// <param name="state">Current GPU state</param>
+ public void Execute(ReadOnlySpan<int> code, IDeviceState state)
+ {
+ if (_executionPending)
+ {
+ _executionPending = false;
+ _executionEngine?.Execute(code.Slice(Position), state, _argument);
+ }
+ }
+
+ /// <summary>
+ /// Pushes an argument to the macro call argument FIFO.
+ /// </summary>
+ /// <param name="gpuVa">GPU virtual address where the command word is located</param>
+ /// <param name="argument">Argument to be pushed</param>
+ public void PushArgument(ulong gpuVa, int argument)
+ {
+ _executionEngine?.Fifo.Enqueue(new FifoWord(gpuVa, argument));
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs
new file mode 100644
index 00000000..8630bbc4
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs
@@ -0,0 +1,341 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Engine.GPFifo;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// Macro High-level emulation.
+ /// </summary>
+ class MacroHLE : IMacroEE
+ {
+ private const int ColorLayerCountOffset = 0x818;
+ private const int ColorStructSize = 0x40;
+ private const int ZetaLayerCountOffset = 0x1230;
+
+ private const int IndirectDataEntrySize = 0x10;
+ private const int IndirectIndexedDataEntrySize = 0x14;
+
+ private readonly GPFifoProcessor _processor;
+ private readonly MacroHLEFunctionName _functionName;
+
+ /// <summary>
+ /// Arguments FIFO.
+ /// </summary>
+ public Queue<FifoWord> Fifo { get; }
+
+ /// <summary>
+ /// Creates a new instance of the HLE macro handler.
+ /// </summary>
+ /// <param name="processor">GPU GP FIFO command processor</param>
+ /// <param name="functionName">Name of the HLE macro function to be called</param>
+ public MacroHLE(GPFifoProcessor processor, MacroHLEFunctionName functionName)
+ {
+ _processor = processor;
+ _functionName = functionName;
+
+ Fifo = new Queue<FifoWord>();
+ }
+
+ /// <summary>
+ /// Executes a macro program until it exits.
+ /// </summary>
+ /// <param name="code">Code of the program to execute</param>
+ /// <param name="state">GPU state at the time of the call</param>
+ /// <param name="arg0">Optional argument passed to the program, 0 if not used</param>
+ public void Execute(ReadOnlySpan<int> code, IDeviceState state, int arg0)
+ {
+ switch (_functionName)
+ {
+ case MacroHLEFunctionName.ClearColor:
+ ClearColor(state, arg0);
+ break;
+ case MacroHLEFunctionName.ClearDepthStencil:
+ ClearDepthStencil(state, arg0);
+ break;
+ case MacroHLEFunctionName.DrawArraysInstanced:
+ DrawArraysInstanced(state, arg0);
+ break;
+ case MacroHLEFunctionName.DrawElementsInstanced:
+ DrawElementsInstanced(state, arg0);
+ break;
+ case MacroHLEFunctionName.DrawElementsIndirect:
+ DrawElementsIndirect(state, arg0);
+ break;
+ case MacroHLEFunctionName.MultiDrawElementsIndirectCount:
+ MultiDrawElementsIndirectCount(state, arg0);
+ break;
+ default:
+ throw new NotImplementedException(_functionName.ToString());
+ }
+
+ // It should be empty at this point, but clear it just to be safe.
+ Fifo.Clear();
+ }
+
+ /// <summary>
+ /// Clears one bound color target.
+ /// </summary>
+ /// <param name="state">GPU state at the time of the call</param>
+ /// <param name="arg0">First argument of the call</param>
+ private void ClearColor(IDeviceState state, int arg0)
+ {
+ int index = (arg0 >> 6) & 0xf;
+ int layerCount = state.Read(ColorLayerCountOffset + index * ColorStructSize);
+
+ _processor.ThreedClass.Clear(arg0, layerCount);
+ }
+
+ /// <summary>
+ /// Clears the current depth-stencil target.
+ /// </summary>
+ /// <param name="state">GPU state at the time of the call</param>
+ /// <param name="arg0">First argument of the call</param>
+ private void ClearDepthStencil(IDeviceState state, int arg0)
+ {
+ int layerCount = state.Read(ZetaLayerCountOffset);
+
+ _processor.ThreedClass.Clear(arg0, layerCount);
+ }
+
+ /// <summary>
+ /// Performs a draw.
+ /// </summary>
+ /// <param name="state">GPU state at the time of the call</param>
+ /// <param name="arg0">First argument of the call</param>
+ private void DrawArraysInstanced(IDeviceState state, int arg0)
+ {
+ var topology = (PrimitiveTopology)arg0;
+
+ var count = FetchParam();
+ var instanceCount = FetchParam();
+ var firstVertex = FetchParam();
+ var firstInstance = FetchParam();
+
+ if (ShouldSkipDraw(state, instanceCount.Word))
+ {
+ return;
+ }
+
+ _processor.ThreedClass.Draw(
+ topology,
+ count.Word,
+ instanceCount.Word,
+ 0,
+ firstVertex.Word,
+ firstInstance.Word,
+ indexed: false);
+ }
+
+ /// <summary>
+ /// Performs a indexed draw.
+ /// </summary>
+ /// <param name="state">GPU state at the time of the call</param>
+ /// <param name="arg0">First argument of the call</param>
+ private void DrawElementsInstanced(IDeviceState state, int arg0)
+ {
+ var topology = (PrimitiveTopology)arg0;
+
+ var count = FetchParam();
+ var instanceCount = FetchParam();
+ var firstIndex = FetchParam();
+ var firstVertex = FetchParam();
+ var firstInstance = FetchParam();
+
+ if (ShouldSkipDraw(state, instanceCount.Word))
+ {
+ return;
+ }
+
+ _processor.ThreedClass.Draw(
+ topology,
+ count.Word,
+ instanceCount.Word,
+ firstIndex.Word,
+ firstVertex.Word,
+ firstInstance.Word,
+ indexed: true);
+ }
+
+ /// <summary>
+ /// Performs a indirect indexed draw, with parameters from a GPU buffer.
+ /// </summary>
+ /// <param name="state">GPU state at the time of the call</param>
+ /// <param name="arg0">First argument of the call</param>
+ private void DrawElementsIndirect(IDeviceState state, int arg0)
+ {
+ var topology = (PrimitiveTopology)arg0;
+
+ var count = FetchParam();
+ var instanceCount = FetchParam();
+ var firstIndex = FetchParam();
+ var firstVertex = FetchParam();
+ var firstInstance = FetchParam();
+
+ ulong indirectBufferGpuVa = count.GpuVa;
+
+ var bufferCache = _processor.MemoryManager.Physical.BufferCache;
+
+ bool useBuffer = bufferCache.CheckModified(_processor.MemoryManager, indirectBufferGpuVa, IndirectIndexedDataEntrySize, out ulong indirectBufferAddress);
+
+ if (useBuffer)
+ {
+ int indexCount = firstIndex.Word + count.Word;
+
+ _processor.ThreedClass.DrawIndirect(
+ topology,
+ indirectBufferAddress,
+ 0,
+ 1,
+ IndirectIndexedDataEntrySize,
+ indexCount,
+ Threed.IndirectDrawType.DrawIndexedIndirect);
+ }
+ else
+ {
+ if (ShouldSkipDraw(state, instanceCount.Word))
+ {
+ return;
+ }
+
+ _processor.ThreedClass.Draw(
+ topology,
+ count.Word,
+ instanceCount.Word,
+ firstIndex.Word,
+ firstVertex.Word,
+ firstInstance.Word,
+ indexed: true);
+ }
+ }
+
+ /// <summary>
+ /// Performs a indirect indexed multi-draw, with parameters from a GPU buffer.
+ /// </summary>
+ /// <param name="state">GPU state at the time of the call</param>
+ /// <param name="arg0">First argument of the call</param>
+ private void MultiDrawElementsIndirectCount(IDeviceState state, int arg0)
+ {
+ int arg1 = FetchParam().Word;
+ int arg2 = FetchParam().Word;
+ int arg3 = FetchParam().Word;
+
+ int startDraw = arg0;
+ int endDraw = arg1;
+ var topology = (PrimitiveTopology)arg2;
+ int paddingWords = arg3;
+ int stride = paddingWords * 4 + 0x14;
+
+ ulong parameterBufferGpuVa = FetchParam().GpuVa;
+
+ int maxDrawCount = endDraw - startDraw;
+
+ if (startDraw != 0)
+ {
+ int drawCount = _processor.MemoryManager.Read<int>(parameterBufferGpuVa, tracked: true);
+
+ // Calculate maximum draw count based on the previous draw count and current draw count.
+ if ((uint)drawCount <= (uint)startDraw)
+ {
+ // The start draw is past our total draw count, so all draws were already performed.
+ maxDrawCount = 0;
+ }
+ else
+ {
+ // Perform just the missing number of draws.
+ maxDrawCount = (int)Math.Min((uint)maxDrawCount, (uint)(drawCount - startDraw));
+ }
+ }
+
+ if (maxDrawCount == 0)
+ {
+ Fifo.Clear();
+ return;
+ }
+
+ ulong indirectBufferGpuVa = 0;
+ int indexCount = 0;
+
+ for (int i = 0; i < maxDrawCount; i++)
+ {
+ var count = FetchParam();
+ var instanceCount = FetchParam();
+ var firstIndex = FetchParam();
+ var firstVertex = FetchParam();
+ var firstInstance = FetchParam();
+
+ if (i == 0)
+ {
+ indirectBufferGpuVa = count.GpuVa;
+ }
+
+ indexCount = Math.Max(indexCount, count.Word + firstIndex.Word);
+
+ if (i != maxDrawCount - 1)
+ {
+ for (int j = 0; j < paddingWords; j++)
+ {
+ FetchParam();
+ }
+ }
+ }
+
+ var bufferCache = _processor.MemoryManager.Physical.BufferCache;
+
+ ulong indirectBufferSize = (ulong)maxDrawCount * (ulong)stride;
+
+ ulong indirectBufferAddress = bufferCache.TranslateAndCreateBuffer(_processor.MemoryManager, indirectBufferGpuVa, indirectBufferSize);
+ ulong parameterBufferAddress = bufferCache.TranslateAndCreateBuffer(_processor.MemoryManager, parameterBufferGpuVa, 4);
+
+ _processor.ThreedClass.DrawIndirect(
+ topology,
+ indirectBufferAddress,
+ parameterBufferAddress,
+ maxDrawCount,
+ stride,
+ indexCount,
+ Threed.IndirectDrawType.DrawIndexedIndirectCount);
+ }
+
+ /// <summary>
+ /// Checks if the draw should be skipped, because the masked instance count is zero.
+ /// </summary>
+ /// <param name="state">Current GPU state</param>
+ /// <param name="instanceCount">Draw instance count</param>
+ /// <returns>True if the draw should be skipped, false otherwise</returns>
+ private static bool ShouldSkipDraw(IDeviceState state, int instanceCount)
+ {
+ return (Read(state, 0xd1b) & instanceCount) == 0;
+ }
+
+ /// <summary>
+ /// Fetches a arguments from the arguments FIFO.
+ /// </summary>
+ /// <returns>The call argument, or a 0 value with null address if the FIFO is empty</returns>
+ private FifoWord FetchParam()
+ {
+ if (!Fifo.TryDequeue(out var value))
+ {
+ Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument.");
+
+ return new FifoWord(0UL, 0);
+ }
+
+ return value;
+ }
+
+ /// <summary>
+ /// Reads data from a GPU register.
+ /// </summary>
+ /// <param name="state">Current GPU state</param>
+ /// <param name="reg">Register offset to read</param>
+ /// <returns>GPU register value</returns>
+ private static int Read(IDeviceState state, int reg)
+ {
+ return state.Read(reg * 4);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs
new file mode 100644
index 00000000..751867fc
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs
@@ -0,0 +1,16 @@
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// Name of the High-level implementation of a Macro function.
+ /// </summary>
+ enum MacroHLEFunctionName
+ {
+ None,
+ ClearColor,
+ ClearDepthStencil,
+ DrawArraysInstanced,
+ DrawElementsInstanced,
+ DrawElementsIndirect,
+ MultiDrawElementsIndirectCount
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs
new file mode 100644
index 00000000..719e170f
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs
@@ -0,0 +1,113 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.GAL;
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// Table with information about High-level implementations of GPU Macro code.
+ /// </summary>
+ static class MacroHLETable
+ {
+ /// <summary>
+ /// Macroo High-level implementation table entry.
+ /// </summary>
+ readonly struct TableEntry
+ {
+ /// <summary>
+ /// Name of the Macro function.
+ /// </summary>
+ public MacroHLEFunctionName Name { get; }
+
+ /// <summary>
+ /// Hash of the original binary Macro function code.
+ /// </summary>
+ public Hash128 Hash { get; }
+
+ /// <summary>
+ /// Size (in bytes) of the original binary Macro function code.
+ /// </summary>
+ public int Length { get; }
+
+ /// <summary>
+ /// Creates a new table entry.
+ /// </summary>
+ /// <param name="name">Name of the Macro function</param>
+ /// <param name="hash">Hash of the original binary Macro function code</param>
+ /// <param name="length">Size (in bytes) of the original binary Macro function code</param>
+ public TableEntry(MacroHLEFunctionName name, Hash128 hash, int length)
+ {
+ Name = name;
+ Hash = hash;
+ Length = length;
+ }
+ }
+
+ private static readonly TableEntry[] _table = new TableEntry[]
+ {
+ new TableEntry(MacroHLEFunctionName.ClearColor, new Hash128(0xA9FB28D1DC43645A, 0xB177E5D2EAE67FB0), 0x28),
+ new TableEntry(MacroHLEFunctionName.ClearDepthStencil, new Hash128(0x1B96CB77D4879F4F, 0x8557032FE0C965FB), 0x24),
+ new TableEntry(MacroHLEFunctionName.DrawArraysInstanced, new Hash128(0x197FB416269DBC26, 0x34288C01DDA82202), 0x48),
+ new TableEntry(MacroHLEFunctionName.DrawElementsInstanced, new Hash128(0x1A501FD3D54EC8E0, 0x6CF570CF79DA74D6), 0x5c),
+ new TableEntry(MacroHLEFunctionName.DrawElementsIndirect, new Hash128(0x86A3E8E903AF8F45, 0xD35BBA07C23860A4), 0x7c),
+ new TableEntry(MacroHLEFunctionName.MultiDrawElementsIndirectCount, new Hash128(0x890AF57ED3FB1C37, 0x35D0C95C61F5386F), 0x19C)
+ };
+
+ /// <summary>
+ /// Checks if the host supports all features required by the HLE macro.
+ /// </summary>
+ /// <param name="caps">Host capabilities</param>
+ /// <param name="name">Name of the HLE macro to be checked</param>
+ /// <returns>True if the host supports the HLE macro, false otherwise</returns>
+ private static bool IsMacroHLESupported(Capabilities caps, MacroHLEFunctionName name)
+ {
+ if (name == MacroHLEFunctionName.ClearColor ||
+ name == MacroHLEFunctionName.ClearDepthStencil ||
+ name == MacroHLEFunctionName.DrawArraysInstanced ||
+ name == MacroHLEFunctionName.DrawElementsInstanced ||
+ name == MacroHLEFunctionName.DrawElementsIndirect)
+ {
+ return true;
+ }
+ else if (name == MacroHLEFunctionName.MultiDrawElementsIndirectCount)
+ {
+ return caps.SupportsIndirectParameters;
+ }
+
+ return false;
+ }
+
+ /// <summary>
+ /// Checks if there's a fast, High-level implementation of the specified Macro code available.
+ /// </summary>
+ /// <param name="code">Macro code to be checked</param>
+ /// <param name="caps">Renderer capabilities to check for this macro HLE support</param>
+ /// <param name="name">Name of the function if a implementation is available and supported, otherwise <see cref="MacroHLEFunctionName.None"/></param>
+ /// <returns>True if there is a implementation available and supported, false otherwise</returns>
+ public static bool TryGetMacroHLEFunction(ReadOnlySpan<int> code, Capabilities caps, out MacroHLEFunctionName name)
+ {
+ var mc = MemoryMarshal.Cast<int, byte>(code);
+
+ for (int i = 0; i < _table.Length; i++)
+ {
+ ref var entry = ref _table[i];
+
+ var hash = XXHash128.ComputeHash(mc.Slice(0, entry.Length));
+ if (hash == entry.Hash)
+ {
+ if (IsMacroHLESupported(caps, entry.Name))
+ {
+ name = entry.Name;
+ return true;
+ }
+
+ break;
+ }
+ }
+
+ name = MacroHLEFunctionName.None;
+ return false;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs
new file mode 100644
index 00000000..df6ee040
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs
@@ -0,0 +1,400 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Device;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// Macro code interpreter.
+ /// </summary>
+ class MacroInterpreter : IMacroEE
+ {
+ /// <summary>
+ /// Arguments FIFO.
+ /// </summary>
+ public Queue<FifoWord> Fifo { get; }
+
+ private int[] _gprs;
+
+ private int _methAddr;
+ private int _methIncr;
+
+ private bool _carry;
+
+ private int _opCode;
+ private int _pipeOp;
+
+ private bool _ignoreExitFlag;
+
+ private int _pc;
+
+ /// <summary>
+ /// Creates a new instance of the macro code interpreter.
+ /// </summary>
+ public MacroInterpreter()
+ {
+ Fifo = new Queue<FifoWord>();
+
+ _gprs = new int[8];
+ }
+
+ /// <summary>
+ /// Executes a macro program until it exits.
+ /// </summary>
+ /// <param name="code">Code of the program to execute</param>
+ /// <param name="state">Current GPU state</param>
+ /// <param name="arg0">Optional argument passed to the program, 0 if not used</param>
+ public void Execute(ReadOnlySpan<int> code, IDeviceState state, int arg0)
+ {
+ Reset();
+
+ _gprs[1] = arg0;
+
+ _pc = 0;
+
+ FetchOpCode(code);
+
+ while (Step(code, state))
+ {
+ }
+
+ // Due to the delay slot, we still need to execute
+ // one more instruction before we actually exit.
+ Step(code, state);
+ }
+
+ /// <summary>
+ /// Resets the internal interpreter state.
+ /// Call each time you run a new program.
+ /// </summary>
+ private void Reset()
+ {
+ for (int index = 0; index < _gprs.Length; index++)
+ {
+ _gprs[index] = 0;
+ }
+
+ _methAddr = 0;
+ _methIncr = 0;
+
+ _carry = false;
+ }
+
+ /// <summary>
+ /// Executes a single instruction of the program.
+ /// </summary>
+ /// <param name="code">Program code to execute</param>
+ /// <param name="state">Current GPU state</param>
+ /// <returns>True to continue execution, false if the program exited</returns>
+ private bool Step(ReadOnlySpan<int> code, IDeviceState state)
+ {
+ int baseAddr = _pc - 1;
+
+ FetchOpCode(code);
+
+ if ((_opCode & 7) < 7)
+ {
+ // Operation produces a value.
+ AssignmentOperation asgOp = (AssignmentOperation)((_opCode >> 4) & 7);
+
+ int result = GetAluResult(state);
+
+ switch (asgOp)
+ {
+ // Fetch parameter and ignore result.
+ case AssignmentOperation.IgnoreAndFetch:
+ SetDstGpr(FetchParam());
+ break;
+ // Move result.
+ case AssignmentOperation.Move:
+ SetDstGpr(result);
+ break;
+ // Move result and use as Method Address.
+ case AssignmentOperation.MoveAndSetMaddr:
+ SetDstGpr(result);
+ SetMethAddr(result);
+ break;
+ // Fetch parameter and send result.
+ case AssignmentOperation.FetchAndSend:
+ SetDstGpr(FetchParam());
+ Send(state, result);
+ break;
+ // Move and send result.
+ case AssignmentOperation.MoveAndSend:
+ SetDstGpr(result);
+ Send(state, result);
+ break;
+ // Fetch parameter and use result as Method Address.
+ case AssignmentOperation.FetchAndSetMaddr:
+ SetDstGpr(FetchParam());
+ SetMethAddr(result);
+ break;
+ // Move result and use as Method Address, then fetch and send parameter.
+ case AssignmentOperation.MoveAndSetMaddrThenFetchAndSend:
+ SetDstGpr(result);
+ SetMethAddr(result);
+ Send(state, FetchParam());
+ break;
+ // Move result and use as Method Address, then send bits 17:12 of result.
+ case AssignmentOperation.MoveAndSetMaddrThenSendHigh:
+ SetDstGpr(result);
+ SetMethAddr(result);
+ Send(state, (result >> 12) & 0x3f);
+ break;
+ }
+ }
+ else
+ {
+ // Branch.
+ bool onNotZero = ((_opCode >> 4) & 1) != 0;
+
+ bool taken = onNotZero
+ ? GetGprA() != 0
+ : GetGprA() == 0;
+
+ if (taken)
+ {
+ _pc = baseAddr + GetImm();
+
+ bool noDelays = (_opCode & 0x20) != 0;
+
+ if (noDelays)
+ {
+ FetchOpCode(code);
+ }
+ else
+ {
+ // The delay slot instruction exit flag should be ignored.
+ _ignoreExitFlag = true;
+ }
+
+ return true;
+ }
+ }
+
+ bool exit = (_opCode & 0x80) != 0 && !_ignoreExitFlag;
+
+ _ignoreExitFlag = false;
+
+ return !exit;
+ }
+
+ /// <summary>
+ /// Fetches a single operation code from the program code.
+ /// </summary>
+ /// <param name="code">Program code</param>
+ private void FetchOpCode(ReadOnlySpan<int> code)
+ {
+ _opCode = _pipeOp;
+ _pipeOp = code[_pc++];
+ }
+
+ /// <summary>
+ /// Gets the result of the current Arithmetic and Logic unit operation.
+ /// </summary>
+ /// <param name="state">Current GPU state</param>
+ /// <returns>Operation result</returns>
+ private int GetAluResult(IDeviceState state)
+ {
+ AluOperation op = (AluOperation)(_opCode & 7);
+
+ switch (op)
+ {
+ case AluOperation.AluReg:
+ return GetAluResult((AluRegOperation)((_opCode >> 17) & 0x1f), GetGprA(), GetGprB());
+
+ case AluOperation.AddImmediate:
+ return GetGprA() + GetImm();
+
+ case AluOperation.BitfieldReplace:
+ case AluOperation.BitfieldExtractLslImm:
+ case AluOperation.BitfieldExtractLslReg:
+ int bfSrcBit = (_opCode >> 17) & 0x1f;
+ int bfSize = (_opCode >> 22) & 0x1f;
+ int bfDstBit = (_opCode >> 27) & 0x1f;
+
+ int bfMask = (1 << bfSize) - 1;
+
+ int dst = GetGprA();
+ int src = GetGprB();
+
+ switch (op)
+ {
+ case AluOperation.BitfieldReplace:
+ src = (int)((uint)src >> bfSrcBit) & bfMask;
+
+ dst &= ~(bfMask << bfDstBit);
+
+ dst |= src << bfDstBit;
+
+ return dst;
+
+ case AluOperation.BitfieldExtractLslImm:
+ src = (int)((uint)src >> dst) & bfMask;
+
+ return src << bfDstBit;
+
+ case AluOperation.BitfieldExtractLslReg:
+ src = (int)((uint)src >> bfSrcBit) & bfMask;
+
+ return src << dst;
+ }
+
+ break;
+
+ case AluOperation.ReadImmediate:
+ return Read(state, GetGprA() + GetImm());
+ }
+
+ throw new InvalidOperationException($"Invalid operation \"{op}\" on instruction 0x{_opCode:X8}.");
+ }
+
+ /// <summary>
+ /// Gets the result of an Arithmetic and Logic operation using registers.
+ /// </summary>
+ /// <param name="aluOp">Arithmetic and Logic unit operation with registers</param>
+ /// <param name="a">First operand value</param>
+ /// <param name="b">Second operand value</param>
+ /// <returns>Operation result</returns>
+ private int GetAluResult(AluRegOperation aluOp, int a, int b)
+ {
+ ulong result;
+
+ switch (aluOp)
+ {
+ case AluRegOperation.Add:
+ result = (ulong)a + (ulong)b;
+
+ _carry = result > 0xffffffff;
+
+ return (int)result;
+
+ case AluRegOperation.AddWithCarry:
+ result = (ulong)a + (ulong)b + (_carry ? 1UL : 0UL);
+
+ _carry = result > 0xffffffff;
+
+ return (int)result;
+
+ case AluRegOperation.Subtract:
+ result = (ulong)a - (ulong)b;
+
+ _carry = result < 0x100000000;
+
+ return (int)result;
+
+ case AluRegOperation.SubtractWithBorrow:
+ result = (ulong)a - (ulong)b - (_carry ? 0UL : 1UL);
+
+ _carry = result < 0x100000000;
+
+ return (int)result;
+
+ case AluRegOperation.BitwiseExclusiveOr: return a ^ b;
+ case AluRegOperation.BitwiseOr: return a | b;
+ case AluRegOperation.BitwiseAnd: return a & b;
+ case AluRegOperation.BitwiseAndNot: return a & ~b;
+ case AluRegOperation.BitwiseNotAnd: return ~(a & b);
+ }
+
+ throw new InvalidOperationException($"Invalid operation \"{aluOp}\" on instruction 0x{_opCode:X8}.");
+ }
+
+ /// <summary>
+ /// Extracts a 32-bits signed integer constant from the current operation code.
+ /// </summary>
+ /// <returns>The 32-bits immediate value encoded at the current operation code</returns>
+ private int GetImm()
+ {
+ // Note: The immediate is signed, the sign-extension is intended here.
+ return _opCode >> 14;
+ }
+
+ /// <summary>
+ /// Sets the current method address, for method calls.
+ /// </summary>
+ /// <param name="value">Packed address and increment value</param>
+ private void SetMethAddr(int value)
+ {
+ _methAddr = (value >> 0) & 0xfff;
+ _methIncr = (value >> 12) & 0x3f;
+ }
+
+ /// <summary>
+ /// Sets the destination register value.
+ /// </summary>
+ /// <param name="value">Value to set (usually the operation result)</param>
+ private void SetDstGpr(int value)
+ {
+ _gprs[(_opCode >> 8) & 7] = value;
+ }
+
+ /// <summary>
+ /// Gets first operand value from the respective register.
+ /// </summary>
+ /// <returns>Operand value</returns>
+ private int GetGprA()
+ {
+ return GetGprValue((_opCode >> 11) & 7);
+ }
+
+ /// <summary>
+ /// Gets second operand value from the respective register.
+ /// </summary>
+ /// <returns>Operand value</returns>
+ private int GetGprB()
+ {
+ return GetGprValue((_opCode >> 14) & 7);
+ }
+
+ /// <summary>
+ /// Gets the value from a register, or 0 if the R0 register is specified.
+ /// </summary>
+ /// <param name="index">Index of the register</param>
+ /// <returns>Register value</returns>
+ private int GetGprValue(int index)
+ {
+ return index != 0 ? _gprs[index] : 0;
+ }
+
+ /// <summary>
+ /// Fetches a call argument from the call argument FIFO.
+ /// </summary>
+ /// <returns>The call argument, or 0 if the FIFO is empty</returns>
+ private int FetchParam()
+ {
+ if (!Fifo.TryDequeue(out var value))
+ {
+ Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument.");
+
+ return 0;
+ }
+
+ return value.Word;
+ }
+
+ /// <summary>
+ /// Reads data from a GPU register.
+ /// </summary>
+ /// <param name="state">Current GPU state</param>
+ /// <param name="reg">Register offset to read</param>
+ /// <returns>GPU register value</returns>
+ private int Read(IDeviceState state, int reg)
+ {
+ return state.Read(reg * 4);
+ }
+
+ /// <summary>
+ /// Performs a GPU method call.
+ /// </summary>
+ /// <param name="state">Current GPU state</param>
+ /// <param name="value">Call argument</param>
+ private void Send(IDeviceState state, int value)
+ {
+ state.Write(_methAddr * 4, value);
+
+ _methAddr += _methIncr;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs
new file mode 100644
index 00000000..4077f74e
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs
@@ -0,0 +1,39 @@
+using Ryujinx.Graphics.Device;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// Represents a execution engine that uses a Just-in-Time compiler for fast execution.
+ /// </summary>
+ class MacroJit : IMacroEE
+ {
+ private readonly MacroJitContext _context = new MacroJitContext();
+
+ /// <summary>
+ /// Arguments FIFO.
+ /// </summary>
+ public Queue<FifoWord> Fifo => _context.Fifo;
+
+ private MacroJitCompiler.MacroExecute _execute;
+
+ /// <summary>
+ /// Executes a macro program until it exits.
+ /// </summary>
+ /// <param name="code">Code of the program to execute</param>
+ /// <param name="state">Current GPU state</param>
+ /// <param name="arg0">Optional argument passed to the program, 0 if not used</param>
+ public void Execute(ReadOnlySpan<int> code, IDeviceState state, int arg0)
+ {
+ if (_execute == null)
+ {
+ MacroJitCompiler compiler = new MacroJitCompiler();
+
+ _execute = compiler.Compile(code);
+ }
+
+ _execute(_context, state, arg0);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitCompiler.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitCompiler.cs
new file mode 100644
index 00000000..f8d839fa
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitCompiler.cs
@@ -0,0 +1,517 @@
+using Ryujinx.Graphics.Device;
+using System;
+using System.Collections.Generic;
+using System.Reflection.Emit;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// Represents a Macro Just-in-Time compiler.
+ /// </summary>R
+ class MacroJitCompiler
+ {
+ private readonly DynamicMethod _meth;
+ private readonly ILGenerator _ilGen;
+ private readonly LocalBuilder[] _gprs;
+ private readonly LocalBuilder _carry;
+ private readonly LocalBuilder _methAddr;
+ private readonly LocalBuilder _methIncr;
+
+ /// <summary>
+ /// Creates a new instance of the Macro Just-in-Time compiler.
+ /// </summary>
+ public MacroJitCompiler()
+ {
+ _meth = new DynamicMethod("Macro", typeof(void), new Type[] { typeof(MacroJitContext), typeof(IDeviceState), typeof(int) });
+ _ilGen = _meth.GetILGenerator();
+ _gprs = new LocalBuilder[8];
+
+ for (int i = 1; i < 8; i++)
+ {
+ _gprs[i] = _ilGen.DeclareLocal(typeof(int));
+ }
+
+ _carry = _ilGen.DeclareLocal(typeof(int));
+ _methAddr = _ilGen.DeclareLocal(typeof(int));
+ _methIncr = _ilGen.DeclareLocal(typeof(int));
+
+ _ilGen.Emit(OpCodes.Ldarg_2);
+ _ilGen.Emit(OpCodes.Stloc, _gprs[1]);
+ }
+
+ public delegate void MacroExecute(MacroJitContext context, IDeviceState state, int arg0);
+
+ /// <summary>
+ /// Translates a new piece of GPU Macro code into host executable code.
+ /// </summary>
+ /// <param name="code">Code to be translated</param>
+ /// <returns>Delegate of the host compiled code</returns>
+ public MacroExecute Compile(ReadOnlySpan<int> code)
+ {
+ Dictionary<int, Label> labels = new Dictionary<int, Label>();
+
+ int lastTarget = 0;
+ int i;
+
+ // Collect all branch targets.
+ for (i = 0; i < code.Length; i++)
+ {
+ int opCode = code[i];
+
+ if ((opCode & 7) == 7)
+ {
+ int target = i + (opCode >> 14);
+
+ if (!labels.ContainsKey(target))
+ {
+ labels.Add(target, _ilGen.DefineLabel());
+ }
+
+ if (lastTarget < target)
+ {
+ lastTarget = target;
+ }
+ }
+
+ bool exit = (opCode & 0x80) != 0;
+
+ if (exit && i >= lastTarget)
+ {
+ break;
+ }
+ }
+
+ // Code generation.
+ for (i = 0; i < code.Length; i++)
+ {
+ if (labels.TryGetValue(i, out Label label))
+ {
+ _ilGen.MarkLabel(label);
+ }
+
+ Emit(code, i, labels);
+
+ int opCode = code[i];
+
+ bool exit = (opCode & 0x80) != 0;
+
+ if (exit)
+ {
+ Emit(code, i + 1, labels);
+ _ilGen.Emit(OpCodes.Ret);
+
+ if (i >= lastTarget)
+ {
+ break;
+ }
+ }
+ }
+
+ if (i == code.Length)
+ {
+ _ilGen.Emit(OpCodes.Ret);
+ }
+
+ return _meth.CreateDelegate<MacroExecute>();
+ }
+
+ /// <summary>
+ /// Emits IL equivalent to the Macro instruction at a given offset.
+ /// </summary>
+ /// <param name="code">GPU Macro code</param>
+ /// <param name="offset">Offset, in words, where the instruction is located</param>
+ /// <param name="labels">Labels for Macro branch targets, used by branch instructions</param>
+ private void Emit(ReadOnlySpan<int> code, int offset, Dictionary<int, Label> labels)
+ {
+ int opCode = code[offset];
+
+ if ((opCode & 7) < 7)
+ {
+ // Operation produces a value.
+ AssignmentOperation asgOp = (AssignmentOperation)((opCode >> 4) & 7);
+
+ EmitAluOp(opCode);
+
+ switch (asgOp)
+ {
+ // Fetch parameter and ignore result.
+ case AssignmentOperation.IgnoreAndFetch:
+ _ilGen.Emit(OpCodes.Pop);
+ EmitFetchParam();
+ EmitStoreDstGpr(opCode);
+ break;
+ // Move result.
+ case AssignmentOperation.Move:
+ EmitStoreDstGpr(opCode);
+ break;
+ // Move result and use as Method Address.
+ case AssignmentOperation.MoveAndSetMaddr:
+ _ilGen.Emit(OpCodes.Dup);
+ EmitStoreDstGpr(opCode);
+ EmitStoreMethAddr();
+ break;
+ // Fetch parameter and send result.
+ case AssignmentOperation.FetchAndSend:
+ EmitFetchParam();
+ EmitStoreDstGpr(opCode);
+ EmitSend();
+ break;
+ // Move and send result.
+ case AssignmentOperation.MoveAndSend:
+ _ilGen.Emit(OpCodes.Dup);
+ EmitStoreDstGpr(opCode);
+ EmitSend();
+ break;
+ // Fetch parameter and use result as Method Address.
+ case AssignmentOperation.FetchAndSetMaddr:
+ EmitFetchParam();
+ EmitStoreDstGpr(opCode);
+ EmitStoreMethAddr();
+ break;
+ // Move result and use as Method Address, then fetch and send parameter.
+ case AssignmentOperation.MoveAndSetMaddrThenFetchAndSend:
+ _ilGen.Emit(OpCodes.Dup);
+ EmitStoreDstGpr(opCode);
+ EmitStoreMethAddr();
+ EmitFetchParam();
+ EmitSend();
+ break;
+ // Move result and use as Method Address, then send bits 17:12 of result.
+ case AssignmentOperation.MoveAndSetMaddrThenSendHigh:
+ _ilGen.Emit(OpCodes.Dup);
+ _ilGen.Emit(OpCodes.Dup);
+ EmitStoreDstGpr(opCode);
+ EmitStoreMethAddr();
+ _ilGen.Emit(OpCodes.Ldc_I4, 12);
+ _ilGen.Emit(OpCodes.Shr_Un);
+ _ilGen.Emit(OpCodes.Ldc_I4, 0x3f);
+ _ilGen.Emit(OpCodes.And);
+ EmitSend();
+ break;
+ }
+ }
+ else
+ {
+ // Branch.
+ bool onNotZero = ((opCode >> 4) & 1) != 0;
+
+ EmitLoadGprA(opCode);
+
+ Label lblSkip = _ilGen.DefineLabel();
+
+ if (onNotZero)
+ {
+ _ilGen.Emit(OpCodes.Brfalse, lblSkip);
+ }
+ else
+ {
+ _ilGen.Emit(OpCodes.Brtrue, lblSkip);
+ }
+
+ bool noDelays = (opCode & 0x20) != 0;
+
+ if (!noDelays)
+ {
+ Emit(code, offset + 1, labels);
+ }
+
+ int target = offset + (opCode >> 14);
+
+ _ilGen.Emit(OpCodes.Br, labels[target]);
+
+ _ilGen.MarkLabel(lblSkip);
+ }
+ }
+
+ /// <summary>
+ /// Emits IL for a Arithmetic and Logic Unit instruction.
+ /// </summary>
+ /// <param name="opCode">Instruction to be translated</param>
+ /// <exception cref="InvalidOperationException">Throw when the instruction encoding is invalid</exception>
+ private void EmitAluOp(int opCode)
+ {
+ AluOperation op = (AluOperation)(opCode & 7);
+
+ switch (op)
+ {
+ case AluOperation.AluReg:
+ EmitAluOp((AluRegOperation)((opCode >> 17) & 0x1f), opCode);
+ break;
+
+ case AluOperation.AddImmediate:
+ EmitLoadGprA(opCode);
+ EmitLoadImm(opCode);
+ _ilGen.Emit(OpCodes.Add);
+ break;
+
+ case AluOperation.BitfieldReplace:
+ case AluOperation.BitfieldExtractLslImm:
+ case AluOperation.BitfieldExtractLslReg:
+ int bfSrcBit = (opCode >> 17) & 0x1f;
+ int bfSize = (opCode >> 22) & 0x1f;
+ int bfDstBit = (opCode >> 27) & 0x1f;
+
+ int bfMask = (1 << bfSize) - 1;
+
+ switch (op)
+ {
+ case AluOperation.BitfieldReplace:
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Ldc_I4, bfSrcBit);
+ _ilGen.Emit(OpCodes.Shr_Un);
+ _ilGen.Emit(OpCodes.Ldc_I4, bfMask);
+ _ilGen.Emit(OpCodes.And);
+ _ilGen.Emit(OpCodes.Ldc_I4, bfDstBit);
+ _ilGen.Emit(OpCodes.Shl);
+ EmitLoadGprA(opCode);
+ _ilGen.Emit(OpCodes.Ldc_I4, ~(bfMask << bfDstBit));
+ _ilGen.Emit(OpCodes.And);
+ _ilGen.Emit(OpCodes.Or);
+ break;
+
+ case AluOperation.BitfieldExtractLslImm:
+ EmitLoadGprB(opCode);
+ EmitLoadGprA(opCode);
+ _ilGen.Emit(OpCodes.Shr_Un);
+ _ilGen.Emit(OpCodes.Ldc_I4, bfMask);
+ _ilGen.Emit(OpCodes.And);
+ _ilGen.Emit(OpCodes.Ldc_I4, bfDstBit);
+ _ilGen.Emit(OpCodes.Shl);
+ break;
+
+ case AluOperation.BitfieldExtractLslReg:
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Ldc_I4, bfSrcBit);
+ _ilGen.Emit(OpCodes.Shr_Un);
+ _ilGen.Emit(OpCodes.Ldc_I4, bfMask);
+ _ilGen.Emit(OpCodes.And);
+ EmitLoadGprA(opCode);
+ _ilGen.Emit(OpCodes.Shl);
+ break;
+ }
+ break;
+
+ case AluOperation.ReadImmediate:
+ _ilGen.Emit(OpCodes.Ldarg_1);
+ EmitLoadGprA(opCode);
+ EmitLoadImm(opCode);
+ _ilGen.Emit(OpCodes.Add);
+ _ilGen.Emit(OpCodes.Call, typeof(MacroJitContext).GetMethod(nameof(MacroJitContext.Read)));
+ break;
+
+ default:
+ throw new InvalidOperationException($"Invalid operation \"{op}\" on instruction 0x{opCode:X8}.");
+ }
+ }
+
+ /// <summary>
+ /// Emits IL for a binary Arithmetic and Logic Unit instruction.
+ /// </summary>
+ /// <param name="aluOp">Arithmetic and Logic Unit instruction</param>
+ /// <param name="opCode">Raw instruction</param>
+ /// <exception cref="InvalidOperationException">Throw when the instruction encoding is invalid</exception>
+ private void EmitAluOp(AluRegOperation aluOp, int opCode)
+ {
+ switch (aluOp)
+ {
+ case AluRegOperation.Add:
+ EmitLoadGprA(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ _ilGen.Emit(OpCodes.Add);
+ _ilGen.Emit(OpCodes.Dup);
+ _ilGen.Emit(OpCodes.Ldc_I8, 0xffffffffL);
+ _ilGen.Emit(OpCodes.Cgt_Un);
+ _ilGen.Emit(OpCodes.Stloc, _carry);
+ _ilGen.Emit(OpCodes.Conv_U4);
+ break;
+ case AluRegOperation.AddWithCarry:
+ EmitLoadGprA(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ _ilGen.Emit(OpCodes.Ldloc_S, _carry);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ _ilGen.Emit(OpCodes.Add);
+ _ilGen.Emit(OpCodes.Add);
+ _ilGen.Emit(OpCodes.Dup);
+ _ilGen.Emit(OpCodes.Ldc_I8, 0xffffffffL);
+ _ilGen.Emit(OpCodes.Cgt_Un);
+ _ilGen.Emit(OpCodes.Stloc, _carry);
+ _ilGen.Emit(OpCodes.Conv_U4);
+ break;
+ case AluRegOperation.Subtract:
+ EmitLoadGprA(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ _ilGen.Emit(OpCodes.Sub);
+ _ilGen.Emit(OpCodes.Dup);
+ _ilGen.Emit(OpCodes.Ldc_I8, 0x100000000L);
+ _ilGen.Emit(OpCodes.Clt_Un);
+ _ilGen.Emit(OpCodes.Stloc, _carry);
+ _ilGen.Emit(OpCodes.Conv_U4);
+ break;
+ case AluRegOperation.SubtractWithBorrow:
+ EmitLoadGprA(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ _ilGen.Emit(OpCodes.Ldc_I4_1);
+ _ilGen.Emit(OpCodes.Ldloc_S, _carry);
+ _ilGen.Emit(OpCodes.Sub);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ _ilGen.Emit(OpCodes.Sub);
+ _ilGen.Emit(OpCodes.Sub);
+ _ilGen.Emit(OpCodes.Dup);
+ _ilGen.Emit(OpCodes.Ldc_I8, 0x100000000L);
+ _ilGen.Emit(OpCodes.Clt_Un);
+ _ilGen.Emit(OpCodes.Stloc, _carry);
+ _ilGen.Emit(OpCodes.Conv_U4);
+ break;
+ case AluRegOperation.BitwiseExclusiveOr:
+ EmitLoadGprA(opCode);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Xor);
+ break;
+ case AluRegOperation.BitwiseOr:
+ EmitLoadGprA(opCode);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Or);
+ break;
+ case AluRegOperation.BitwiseAnd:
+ EmitLoadGprA(opCode);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.And);
+ break;
+ case AluRegOperation.BitwiseAndNot:
+ EmitLoadGprA(opCode);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Not);
+ _ilGen.Emit(OpCodes.And);
+ break;
+ case AluRegOperation.BitwiseNotAnd:
+ EmitLoadGprA(opCode);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.And);
+ _ilGen.Emit(OpCodes.Not);
+ break;
+ default:
+ throw new InvalidOperationException($"Invalid operation \"{aluOp}\" on instruction 0x{opCode:X8}.");
+ }
+ }
+
+ /// <summary>
+ /// Loads a immediate value on the IL evaluation stack.
+ /// </summary>
+ /// <param name="opCode">Instruction from where the immediate should be extracted</param>
+ private void EmitLoadImm(int opCode)
+ {
+ // Note: The immediate is signed, the sign-extension is intended here.
+ _ilGen.Emit(OpCodes.Ldc_I4, opCode >> 14);
+ }
+
+ /// <summary>
+ /// Loads a value from the General Purpose register specified as first operand on the IL evaluation stack.
+ /// </summary>
+ /// <param name="opCode">Instruction from where the register number should be extracted</param>
+ private void EmitLoadGprA(int opCode)
+ {
+ EmitLoadGpr((opCode >> 11) & 7);
+ }
+
+ /// <summary>
+ /// Loads a value from the General Purpose register specified as second operand on the IL evaluation stack.
+ /// </summary>
+ /// <param name="opCode">Instruction from where the register number should be extracted</param>
+ private void EmitLoadGprB(int opCode)
+ {
+ EmitLoadGpr((opCode >> 14) & 7);
+ }
+
+ /// <summary>
+ /// Loads a value a General Purpose register on the IL evaluation stack.
+ /// </summary>
+ /// <remarks>
+ /// Register number 0 has a hardcoded value of 0.
+ /// </remarks>
+ /// <param name="index">Register number</param>
+ private void EmitLoadGpr(int index)
+ {
+ if (index == 0)
+ {
+ _ilGen.Emit(OpCodes.Ldc_I4_0);
+ }
+ else
+ {
+ _ilGen.Emit(OpCodes.Ldloc_S, _gprs[index]);
+ }
+ }
+
+ /// <summary>
+ /// Emits a call to the method that fetches an argument from the arguments FIFO.
+ /// The argument is pushed into the IL evaluation stack.
+ /// </summary>
+ private void EmitFetchParam()
+ {
+ _ilGen.Emit(OpCodes.Ldarg_0);
+ _ilGen.Emit(OpCodes.Call, typeof(MacroJitContext).GetMethod(nameof(MacroJitContext.FetchParam)));
+ }
+
+ /// <summary>
+ /// Stores the value on the top of the IL evaluation stack into a General Purpose register.
+ /// </summary>
+ /// <remarks>
+ /// Register number 0 does not exist, reads are hardcoded to 0, and writes are simply discarded.
+ /// </remarks>
+ /// <param name="opCode">Instruction from where the register number should be extracted</param>
+ private void EmitStoreDstGpr(int opCode)
+ {
+ int index = (opCode >> 8) & 7;
+
+ if (index == 0)
+ {
+ _ilGen.Emit(OpCodes.Pop);
+ }
+ else
+ {
+ _ilGen.Emit(OpCodes.Stloc_S, _gprs[index]);
+ }
+ }
+
+ /// <summary>
+ /// Stores the value on the top of the IL evaluation stack as method address.
+ /// This will be used on subsequent send calls as the destination method address.
+ /// Additionally, the 6 bits starting at bit 12 will be used as increment value,
+ /// added to the method address after each sent value.
+ /// </summary>
+ private void EmitStoreMethAddr()
+ {
+ _ilGen.Emit(OpCodes.Dup);
+ _ilGen.Emit(OpCodes.Ldc_I4, 0xfff);
+ _ilGen.Emit(OpCodes.And);
+ _ilGen.Emit(OpCodes.Stloc_S, _methAddr);
+ _ilGen.Emit(OpCodes.Ldc_I4, 12);
+ _ilGen.Emit(OpCodes.Shr_Un);
+ _ilGen.Emit(OpCodes.Ldc_I4, 0x3f);
+ _ilGen.Emit(OpCodes.And);
+ _ilGen.Emit(OpCodes.Stloc_S, _methIncr);
+ }
+
+ /// <summary>
+ /// Sends the value on the top of the IL evaluation stack to the GPU,
+ /// using the current method address.
+ /// </summary>
+ private void EmitSend()
+ {
+ _ilGen.Emit(OpCodes.Ldarg_1);
+ _ilGen.Emit(OpCodes.Ldloc_S, _methAddr);
+ _ilGen.Emit(OpCodes.Call, typeof(MacroJitContext).GetMethod(nameof(MacroJitContext.Send)));
+ _ilGen.Emit(OpCodes.Ldloc_S, _methAddr);
+ _ilGen.Emit(OpCodes.Ldloc_S, _methIncr);
+ _ilGen.Emit(OpCodes.Add);
+ _ilGen.Emit(OpCodes.Stloc_S, _methAddr);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs
new file mode 100644
index 00000000..52c2a11b
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs
@@ -0,0 +1,55 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Device;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// Represents a Macro Just-in-Time compiler execution context.
+ /// </summary>
+ class MacroJitContext
+ {
+ /// <summary>
+ /// Arguments FIFO.
+ /// </summary>
+ public Queue<FifoWord> Fifo { get; } = new Queue<FifoWord>();
+
+ /// <summary>
+ /// Fetches a arguments from the arguments FIFO.
+ /// </summary>
+ /// <returns>The call argument, or 0 if the FIFO is empty</returns>
+ public int FetchParam()
+ {
+ if (!Fifo.TryDequeue(out var value))
+ {
+ Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument.");
+
+ return 0;
+ }
+
+ return value.Word;
+ }
+
+ /// <summary>
+ /// Reads data from a GPU register.
+ /// </summary>
+ /// <param name="state">Current GPU state</param>
+ /// <param name="reg">Register offset to read</param>
+ /// <returns>GPU register value</returns>
+ public static int Read(IDeviceState state, int reg)
+ {
+ return state.Read(reg * 4);
+ }
+
+ /// <summary>
+ /// Performs a GPU method call.
+ /// </summary>
+ /// <param name="value">Call argument</param>
+ /// <param name="state">Current GPU state</param>
+ /// <param name="methAddr">Address, in words, of the method</param>
+ public static void Send(int value, IDeviceState state, int methAddr)
+ {
+ state.Write(methAddr * 4, value);
+ }
+ }
+}