aboutsummaryrefslogtreecommitdiff
path: root/Ryujinx.Graphics/Gpu
diff options
context:
space:
mode:
authorgdkchan <gab.dark.100@gmail.com>2018-04-08 16:17:35 -0300
committergdkchan <gab.dark.100@gmail.com>2018-04-08 16:41:38 -0300
commitb9aa3966c00b4bb3ff0292dc28ed53ad26cf284b (patch)
treecd2ab3d65c61ac6c6ceb312116e5d138868a3e18 /Ryujinx.Graphics/Gpu
parent7acd0e01226d64d05b2675f6ae07507039a31835 (diff)
Merge shader branch, adding support for GLSL decompilation, a macro
interpreter, and a rewrite of the GPU code.
Diffstat (limited to 'Ryujinx.Graphics/Gpu')
-rw-r--r--Ryujinx.Graphics/Gpu/BCn.cs468
-rw-r--r--Ryujinx.Graphics/Gpu/BlockLinearSwizzle.cs57
-rw-r--r--Ryujinx.Graphics/Gpu/INvGpuEngine.cs11
-rw-r--r--Ryujinx.Graphics/Gpu/ISwizzle.cs7
-rw-r--r--Ryujinx.Graphics/Gpu/LinearSwizzle.cs20
-rw-r--r--Ryujinx.Graphics/Gpu/MacroInterpreter.cs420
-rw-r--r--Ryujinx.Graphics/Gpu/NsGpu.cs35
-rw-r--r--Ryujinx.Graphics/Gpu/NsGpuPBEntry.cs62
-rw-r--r--Ryujinx.Graphics/Gpu/NsGpuPGraph.cs305
-rw-r--r--Ryujinx.Graphics/Gpu/NsGpuRegister.cs94
-rw-r--r--Ryujinx.Graphics/Gpu/NsGpuTexture.cs10
-rw-r--r--Ryujinx.Graphics/Gpu/NsGpuTextureFormat.cs9
-rw-r--r--Ryujinx.Graphics/Gpu/NvGpuEngine.cs (renamed from Ryujinx.Graphics/Gpu/NsGpuEngine.cs)6
-rw-r--r--Ryujinx.Graphics/Gpu/NvGpuEngine3d.cs469
-rw-r--r--Ryujinx.Graphics/Gpu/NvGpuEngine3dReg.cs44
-rw-r--r--Ryujinx.Graphics/Gpu/NvGpuFifo.cs171
-rw-r--r--Ryujinx.Graphics/Gpu/NvGpuFifoMeth.cs11
-rw-r--r--Ryujinx.Graphics/Gpu/NvGpuMethod.cs6
-rw-r--r--Ryujinx.Graphics/Gpu/NvGpuPushBuffer.cs101
-rw-r--r--Ryujinx.Graphics/Gpu/SwizzleAddr.cs144
-rw-r--r--Ryujinx.Graphics/Gpu/Texture.cs34
-rw-r--r--Ryujinx.Graphics/Gpu/TextureFactory.cs83
-rw-r--r--Ryujinx.Graphics/Gpu/TextureReader.cs127
-rw-r--r--Ryujinx.Graphics/Gpu/TextureSwizzle.cs11
24 files changed, 1604 insertions, 1101 deletions
diff --git a/Ryujinx.Graphics/Gpu/BCn.cs b/Ryujinx.Graphics/Gpu/BCn.cs
deleted file mode 100644
index b1caf467..00000000
--- a/Ryujinx.Graphics/Gpu/BCn.cs
+++ /dev/null
@@ -1,468 +0,0 @@
-using System;
-using System.Drawing;
-
-namespace Ryujinx.Graphics.Gpu
-{
- static class BCn
- {
- public static byte[] DecodeBC1(NsGpuTexture Tex, int Offset)
- {
- int W = (Tex.Width + 3) / 4;
- int H = (Tex.Height + 3) / 4;
-
- byte[] Output = new byte[W * H * 64];
-
- SwizzleAddr Swizzle = new SwizzleAddr(W, H, 8);
-
- for (int Y = 0; Y < H; Y++)
- {
- for (int X = 0; X < W; X++)
- {
- int IOffs = Offset + Swizzle.GetSwizzledAddress64(X, Y) * 8;
-
- byte[] Tile = BCnDecodeTile(Tex.Data, IOffs, true);
-
- int TOffset = 0;
-
- for (int TY = 0; TY < 4; TY++)
- {
- for (int TX = 0; TX < 4; TX++)
- {
- int OOffset = (X * 4 + TX + (Y * 4 + TY) * W * 4) * 4;
-
- Output[OOffset + 0] = Tile[TOffset + 0];
- Output[OOffset + 1] = Tile[TOffset + 1];
- Output[OOffset + 2] = Tile[TOffset + 2];
- Output[OOffset + 3] = Tile[TOffset + 3];
-
- TOffset += 4;
- }
- }
- }
- }
-
- return Output;
- }
-
- public static byte[] DecodeBC2(NsGpuTexture Tex, int Offset)
- {
- int W = (Tex.Width + 3) / 4;
- int H = (Tex.Height + 3) / 4;
-
- byte[] Output = new byte[W * H * 64];
-
- SwizzleAddr Swizzle = new SwizzleAddr(W, H, 4);
-
- for (int Y = 0; Y < H; Y++)
- {
- for (int X = 0; X < W; X++)
- {
- int IOffs = Offset + Swizzle.GetSwizzledAddress128(X, Y) * 16;
-
- byte[] Tile = BCnDecodeTile(Tex.Data, IOffs + 8, false);
-
- int AlphaLow = Get32(Tex.Data, IOffs + 0);
- int AlphaHigh = Get32(Tex.Data, IOffs + 4);
-
- ulong AlphaCh = (uint)AlphaLow | (ulong)AlphaHigh << 32;
-
- int TOffset = 0;
-
- for (int TY = 0; TY < 4; TY++)
- {
- for (int TX = 0; TX < 4; TX++)
- {
- ulong Alpha = (AlphaCh >> (TY * 16 + TX * 4)) & 0xf;
-
- int OOffset = (X * 4 + TX + (Y * 4 + TY) * W * 4) * 4;
-
- Output[OOffset + 0] = Tile[TOffset + 0];
- Output[OOffset + 1] = Tile[TOffset + 1];
- Output[OOffset + 2] = Tile[TOffset + 2];
- Output[OOffset + 3] = (byte)(Alpha | (Alpha << 4));
-
- TOffset += 4;
- }
- }
- }
- }
-
- return Output;
- }
-
- public static byte[] DecodeBC3(NsGpuTexture Tex, int Offset)
- {
- int W = (Tex.Width + 3) / 4;
- int H = (Tex.Height + 3) / 4;
-
- byte[] Output = new byte[W * H * 64];
-
- SwizzleAddr Swizzle = new SwizzleAddr(W, H, 4);
-
- for (int Y = 0; Y < H; Y++)
- {
- for (int X = 0; X < W; X++)
- {
- int IOffs = Offset + Swizzle.GetSwizzledAddress128(X, Y) * 16;
-
- byte[] Tile = BCnDecodeTile(Tex.Data, IOffs + 8, false);
-
- byte[] Alpha = new byte[8];
-
- Alpha[0] = Tex.Data[IOffs + 0];
- Alpha[1] = Tex.Data[IOffs + 1];
-
- CalculateBC3Alpha(Alpha);
-
- int AlphaLow = Get32(Tex.Data, IOffs + 2);
- int AlphaHigh = Get16(Tex.Data, IOffs + 6);
-
- ulong AlphaCh = (uint)AlphaLow | (ulong)AlphaHigh << 32;
-
- int TOffset = 0;
-
- for (int TY = 0; TY < 4; TY++)
- {
- for (int TX = 0; TX < 4; TX++)
- {
- int OOffset = (X * 4 + TX + (Y * 4 + TY) * W * 4) * 4;
-
- byte AlphaPx = Alpha[(AlphaCh >> (TY * 12 + TX * 3)) & 7];
-
- Output[OOffset + 0] = Tile[TOffset + 0];
- Output[OOffset + 1] = Tile[TOffset + 1];
- Output[OOffset + 2] = Tile[TOffset + 2];
- Output[OOffset + 3] = AlphaPx;
-
- TOffset += 4;
- }
- }
- }
- }
-
- return Output;
- }
-
- public static byte[] DecodeBC4(NsGpuTexture Tex, int Offset)
- {
- int W = (Tex.Width + 3) / 4;
- int H = (Tex.Height + 3) / 4;
-
- byte[] Output = new byte[W * H * 64];
-
- SwizzleAddr Swizzle = new SwizzleAddr(W, H, 8);
-
- for (int Y = 0; Y < H; Y++)
- {
- for (int X = 0; X < W; X++)
- {
- int IOffs = Swizzle.GetSwizzledAddress64(X, Y) * 8;
-
- byte[] Red = new byte[8];
-
- Red[0] = Tex.Data[IOffs + 0];
- Red[1] = Tex.Data[IOffs + 1];
-
- CalculateBC3Alpha(Red);
-
- int RedLow = Get32(Tex.Data, IOffs + 2);
- int RedHigh = Get16(Tex.Data, IOffs + 6);
-
- ulong RedCh = (uint)RedLow | (ulong)RedHigh << 32;
-
- int TOffset = 0;
-
- for (int TY = 0; TY < 4; TY++)
- {
- for (int TX = 0; TX < 4; TX++)
- {
- int OOffset = (X * 4 + TX + (Y * 4 + TY) * W * 4) * 4;
-
- byte RedPx = Red[(RedCh >> (TY * 12 + TX * 3)) & 7];
-
- Output[OOffset + 0] = RedPx;
- Output[OOffset + 1] = RedPx;
- Output[OOffset + 2] = RedPx;
- Output[OOffset + 3] = 0xff;
-
- TOffset += 4;
- }
- }
- }
- }
-
- return Output;
- }
-
- public static byte[] DecodeBC5(NsGpuTexture Tex, int Offset, bool SNorm)
- {
- int W = (Tex.Width + 3) / 4;
- int H = (Tex.Height + 3) / 4;
-
- byte[] Output = new byte[W * H * 64];
-
- SwizzleAddr Swizzle = new SwizzleAddr(W, H, 4);
-
- for (int Y = 0; Y < H; Y++)
- {
- for (int X = 0; X < W; X++)
- {
- int IOffs = Swizzle.GetSwizzledAddress128(X, Y) * 16;
-
- byte[] Red = new byte[8];
- byte[] Green = new byte[8];
-
- Red[0] = Tex.Data[IOffs + 0];
- Red[1] = Tex.Data[IOffs + 1];
-
- Green[0] = Tex.Data[IOffs + 8];
- Green[1] = Tex.Data[IOffs + 9];
-
- if (SNorm)
- {
- CalculateBC3AlphaS(Red);
- CalculateBC3AlphaS(Green);
- }
- else
- {
- CalculateBC3Alpha(Red);
- CalculateBC3Alpha(Green);
- }
-
- int RedLow = Get32(Tex.Data, IOffs + 2);
- int RedHigh = Get16(Tex.Data, IOffs + 6);
-
- int GreenLow = Get32(Tex.Data, IOffs + 10);
- int GreenHigh = Get16(Tex.Data, IOffs + 14);
-
- ulong RedCh = (uint)RedLow | (ulong)RedHigh << 32;
- ulong GreenCh = (uint)GreenLow | (ulong)GreenHigh << 32;
-
- int TOffset = 0;
-
- if (SNorm)
- {
- for (int TY = 0; TY < 4; TY++)
- {
- for (int TX = 0; TX < 4; TX++)
- {
- int Shift = TY * 12 + TX * 3;
-
- int OOffset = (X * 4 + TX + (Y * 4 + TY) * W * 4) * 4;
-
- byte RedPx = Red [(RedCh >> Shift) & 7];
- byte GreenPx = Green[(GreenCh >> Shift) & 7];
-
- RedPx += 0x80;
- GreenPx += 0x80;
-
- float NX = (RedPx / 255f) * 2 - 1;
- float NY = (GreenPx / 255f) * 2 - 1;
-
- float NZ = (float)Math.Sqrt(1 - (NX * NX + NY * NY));
-
- Output[OOffset + 0] = Clamp((NZ + 1) * 0.5f);
- Output[OOffset + 1] = Clamp((NY + 1) * 0.5f);
- Output[OOffset + 2] = Clamp((NX + 1) * 0.5f);
- Output[OOffset + 3] = 0xff;
-
- TOffset += 4;
- }
- }
- }
- else
- {
- for (int TY = 0; TY < 4; TY++)
- {
- for (int TX = 0; TX < 4; TX++)
- {
- int Shift = TY * 12 + TX * 3;
-
- int OOffset = (X * 4 + TX + (Y * 4 + TY) * W * 4) * 4;
-
- byte RedPx = Red [(RedCh >> Shift) & 7];
- byte GreenPx = Green[(GreenCh >> Shift) & 7];
-
- Output[OOffset + 0] = RedPx;
- Output[OOffset + 1] = RedPx;
- Output[OOffset + 2] = RedPx;
- Output[OOffset + 3] = GreenPx;
-
- TOffset += 4;
- }
- }
- }
- }
- }
-
- return Output;
- }
-
- private static byte Clamp(float Value)
- {
- if (Value > 1)
- {
- return 0xff;
- }
- else if (Value < 0)
- {
- return 0;
- }
- else
- {
- return (byte)(Value * 0xff);
- }
- }
-
- private static void CalculateBC3Alpha(byte[] Alpha)
- {
- for (int i = 2; i < 8; i++)
- {
- if (Alpha[0] > Alpha[1])
- {
- Alpha[i] = (byte)(((8 - i) * Alpha[0] + (i - 1) * Alpha[1]) / 7);
- }
- else if (i < 6)
- {
- Alpha[i] = (byte)(((6 - i) * Alpha[0] + (i - 1) * Alpha[1]) / 7);
- }
- else if (i == 6)
- {
- Alpha[i] = 0;
- }
- else /* i == 7 */
- {
- Alpha[i] = 0xff;
- }
- }
- }
-
- private static void CalculateBC3AlphaS(byte[] Alpha)
- {
- for (int i = 2; i < 8; i++)
- {
- if ((sbyte)Alpha[0] > (sbyte)Alpha[1])
- {
- Alpha[i] = (byte)(((8 - i) * (sbyte)Alpha[0] + (i - 1) * (sbyte)Alpha[1]) / 7);
- }
- else if (i < 6)
- {
- Alpha[i] = (byte)(((6 - i) * (sbyte)Alpha[0] + (i - 1) * (sbyte)Alpha[1]) / 7);
- }
- else if (i == 6)
- {
- Alpha[i] = 0x80;
- }
- else /* i == 7 */
- {
- Alpha[i] = 0x7f;
- }
- }
- }
-
- private static byte[] BCnDecodeTile(
- byte[] Input,
- int Offset,
- bool IsBC1)
- {
- Color[] CLUT = new Color[4];
-
- int c0 = Get16(Input, Offset + 0);
- int c1 = Get16(Input, Offset + 2);
-
- CLUT[0] = DecodeRGB565(c0);
- CLUT[1] = DecodeRGB565(c1);
- CLUT[2] = CalculateCLUT2(CLUT[0], CLUT[1], c0, c1, IsBC1);
- CLUT[3] = CalculateCLUT3(CLUT[0], CLUT[1], c0, c1, IsBC1);
-
- int Indices = Get32(Input, Offset + 4);
-
- int IdxShift = 0;
-
- byte[] Output = new byte[4 * 4 * 4];
-
- int OOffset = 0;
-
- for (int TY = 0; TY < 4; TY++)
- {
- for (int TX = 0; TX < 4; TX++)
- {
- int Idx = (Indices >> IdxShift) & 3;
-
- IdxShift += 2;
-
- Color Pixel = CLUT[Idx];
-
- Output[OOffset + 0] = Pixel.R;
- Output[OOffset + 1] = Pixel.G;
- Output[OOffset + 2] = Pixel.B;
- Output[OOffset + 3] = Pixel.A;
-
- OOffset += 4;
- }
- }
-
- return Output;
- }
-
- private static Color CalculateCLUT2(Color C0, Color C1, int c0, int c1, bool IsBC1)
- {
- if (c0 > c1 || !IsBC1)
- {
- return Color.FromArgb(
- (2 * C0.R + C1.R) / 3,
- (2 * C0.G + C1.G) / 3,
- (2 * C0.B + C1.B) / 3);
- }
- else
- {
- return Color.FromArgb(
- (C0.R + C1.R) / 2,
- (C0.G + C1.G) / 2,
- (C0.B + C1.B) / 2);
- }
- }
-
- private static Color CalculateCLUT3(Color C0, Color C1, int c0, int c1, bool IsBC1)
- {
- if (c0 > c1 || !IsBC1)
- {
- return
- Color.FromArgb(
- (2 * C1.R + C0.R) / 3,
- (2 * C1.G + C0.G) / 3,
- (2 * C1.B + C0.B) / 3);
- }
-
- return Color.Transparent;
- }
-
- private static Color DecodeRGB565(int Value)
- {
- int B = ((Value >> 0) & 0x1f) << 3;
- int G = ((Value >> 5) & 0x3f) << 2;
- int R = ((Value >> 11) & 0x1f) << 3;
-
- return Color.FromArgb(
- R | (R >> 5),
- G | (G >> 6),
- B | (B >> 5));
- }
-
- private static int Get16(byte[] Data, int Address)
- {
- return
- Data[Address + 0] << 0 |
- Data[Address + 1] << 8;
- }
-
- private static int Get32(byte[] Data, int Address)
- {
- return
- Data[Address + 0] << 0 |
- Data[Address + 1] << 8 |
- Data[Address + 2] << 16 |
- Data[Address + 3] << 24;
- }
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/BlockLinearSwizzle.cs b/Ryujinx.Graphics/Gpu/BlockLinearSwizzle.cs
new file mode 100644
index 00000000..d2cbb144
--- /dev/null
+++ b/Ryujinx.Graphics/Gpu/BlockLinearSwizzle.cs
@@ -0,0 +1,57 @@
+namespace Ryujinx.Graphics.Gpu
+{
+ class BlockLinearSwizzle : ISwizzle
+ {
+ private int BhShift;
+ private int BppShift;
+ private int BhMask;
+
+ private int XShift;
+ private int GobStride;
+
+ public BlockLinearSwizzle(int Width, int Bpp, int BlockHeight = 16)
+ {
+ BhMask = (BlockHeight * 8) - 1;
+
+ BhShift = CountLsbZeros(BlockHeight * 8);
+ BppShift = CountLsbZeros(Bpp);
+
+ int WidthInGobs = Width * Bpp / 64;
+
+ GobStride = 512 * BlockHeight * WidthInGobs;
+
+ XShift = CountLsbZeros(512 * BlockHeight);
+ }
+
+ private int CountLsbZeros(int Value)
+ {
+ int Count = 0;
+
+ while (((Value >> Count) & 1) == 0)
+ {
+ Count++;
+ }
+
+ return Count;
+ }
+
+ public int GetSwizzleOffset(int X, int Y)
+ {
+ X <<= BppShift;
+
+ int Position = (Y >> BhShift) * GobStride;
+
+ Position += (X >> 6) << XShift;
+
+ Position += ((Y & BhMask) >> 3) << 9;
+
+ Position += ((X & 0x3f) >> 5) << 8;
+ Position += ((Y & 0x07) >> 1) << 6;
+ Position += ((X & 0x1f) >> 4) << 5;
+ Position += ((Y & 0x01) >> 0) << 4;
+ Position += ((X & 0x0f) >> 0) << 0;
+
+ return Position;
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/INvGpuEngine.cs b/Ryujinx.Graphics/Gpu/INvGpuEngine.cs
new file mode 100644
index 00000000..17e9b435
--- /dev/null
+++ b/Ryujinx.Graphics/Gpu/INvGpuEngine.cs
@@ -0,0 +1,11 @@
+using ChocolArm64.Memory;
+
+namespace Ryujinx.Graphics.Gpu
+{
+ interface INvGpuEngine
+ {
+ int[] Registers { get; }
+
+ void CallMethod(AMemory Memory, NsGpuPBEntry PBEntry);
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/ISwizzle.cs b/Ryujinx.Graphics/Gpu/ISwizzle.cs
new file mode 100644
index 00000000..755051d0
--- /dev/null
+++ b/Ryujinx.Graphics/Gpu/ISwizzle.cs
@@ -0,0 +1,7 @@
+namespace Ryujinx.Graphics.Gpu
+{
+ interface ISwizzle
+ {
+ int GetSwizzleOffset(int X, int Y);
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/LinearSwizzle.cs b/Ryujinx.Graphics/Gpu/LinearSwizzle.cs
new file mode 100644
index 00000000..01f09f81
--- /dev/null
+++ b/Ryujinx.Graphics/Gpu/LinearSwizzle.cs
@@ -0,0 +1,20 @@
+namespace Ryujinx.Graphics.Gpu
+{
+ class LinearSwizzle : ISwizzle
+ {
+ private int Bpp;
+ private int Stride;
+
+ public LinearSwizzle(int Width, int Bpp)
+ {
+ this.Bpp = Bpp;
+
+ Stride = Width * Bpp;
+ }
+
+ public int GetSwizzleOffset(int X, int Y)
+ {
+ return X * Bpp + Y * Stride;
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/MacroInterpreter.cs b/Ryujinx.Graphics/Gpu/MacroInterpreter.cs
new file mode 100644
index 00000000..233baac8
--- /dev/null
+++ b/Ryujinx.Graphics/Gpu/MacroInterpreter.cs
@@ -0,0 +1,420 @@
+using ChocolArm64.Memory;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu
+{
+ class MacroInterpreter
+ {
+ private enum AssignmentOperation
+ {
+ IgnoreAndFetch = 0,
+ Move = 1,
+ MoveAndSetMaddr = 2,
+ FetchAndSend = 3,
+ MoveAndSend = 4,
+ FetchAndSetMaddr = 5,
+ MoveAndSetMaddrThenFetchAndSend = 6,
+ MoveAndSetMaddrThenSendHigh = 7
+ }
+
+ private enum AluOperation
+ {
+ AluReg = 0,
+ AddImmediate = 1,
+ BitfieldReplace = 2,
+ BitfieldExtractLslImm = 3,
+ BitfieldExtractLslReg = 4,
+ ReadImmediate = 5
+ }
+
+ private enum AluRegOperation
+ {
+ Add = 0,
+ AddWithCarry = 1,
+ Subtract = 2,
+ SubtractWithBorrow = 3,
+ BitwiseExclusiveOr = 8,
+ BitwiseOr = 9,
+ BitwiseAnd = 10,
+ BitwiseAndNot = 11,
+ BitwiseNotAnd = 12
+ }
+
+ private NvGpuFifo PFifo;
+ private INvGpuEngine Engine;
+
+ public Queue<int> Fifo { get; private set; }
+
+ private int[] Gprs;
+
+ private int MethAddr;
+ private int MethIncr;
+
+ private bool Carry;
+
+ private int OpCode;
+
+ private int PipeOp;
+
+ private long Pc;
+
+ public MacroInterpreter(NvGpuFifo PFifo, INvGpuEngine Engine)
+ {
+ this.PFifo = PFifo;
+ this.Engine = Engine;
+
+ Fifo = new Queue<int>();
+
+ Gprs = new int[8];
+ }
+
+ public void Execute(AMemory Memory, long Position, int Param)
+ {
+ Reset();
+
+ Gprs[1] = Param;
+
+ Pc = Position;
+
+ FetchOpCode(Memory);
+
+ while (Step(Memory));
+
+ //Due to the delay slot, we still need to execute
+ //one more instruction before we actually exit.
+ Step(Memory);
+ }
+
+ private void Reset()
+ {
+ for (int Index = 0; Index < Gprs.Length; Index++)
+ {
+ Gprs[Index] = 0;
+ }
+
+ MethAddr = 0;
+ MethIncr = 0;
+
+ Carry = false;
+ }
+
+ private bool Step(AMemory Memory)
+ {
+ long BaseAddr = Pc - 4;
+
+ FetchOpCode(Memory);
+
+ if ((OpCode & 7) < 7)
+ {
+ //Operation produces a value.
+ AssignmentOperation AsgOp = (AssignmentOperation)((OpCode >> 4) & 7);
+
+ int Result = GetAluResult();
+
+ switch (AsgOp)
+ {
+ //Fetch parameter and ignore result.
+ case AssignmentOperation.IgnoreAndFetch:
+ {
+ SetDstGpr(FetchParam());
+
+ break;
+ }
+
+ //Move result.
+ case AssignmentOperation.Move:
+ {
+ SetDstGpr(Result);
+
+ break;
+ }
+
+ //Move result and use as Method Address.
+ case AssignmentOperation.MoveAndSetMaddr:
+ {
+ SetDstGpr(Result);
+
+ SetMethAddr(Result);
+
+ break;
+ }
+
+ //Fetch parameter and send result.
+ case AssignmentOperation.FetchAndSend:
+ {
+ SetDstGpr(FetchParam());
+
+ Send(Memory, Result);
+
+ break;
+ }
+
+ //Move and send result.
+ case AssignmentOperation.MoveAndSend:
+ {
+ SetDstGpr(Result);
+
+ Send(Memory, Result);
+
+ break;
+ }
+
+ //Fetch parameter and use result as Method Address.
+ case AssignmentOperation.FetchAndSetMaddr:
+ {
+ SetDstGpr(FetchParam());
+
+ SetMethAddr(Result);
+
+ break;
+ }
+
+ //Move result and use as Method Address, then fetch and send paramter.
+ case AssignmentOperation.MoveAndSetMaddrThenFetchAndSend:
+ {
+ SetDstGpr(Result);
+
+ SetMethAddr(Result);
+
+ Send(Memory, FetchParam());
+
+ break;
+ }
+
+ //Move result and use as Method Address, then send bits 17:12 of result.
+ case AssignmentOperation.MoveAndSetMaddrThenSendHigh:
+ {
+ SetDstGpr(Result);
+
+ SetMethAddr(Result);
+
+ Send(Memory, (Result >> 12) & 0x3f);
+
+ break;
+ }
+ }
+ }
+ else
+ {
+ //Branch.
+ bool OnNotZero = ((OpCode >> 4) & 1) != 0;
+
+ bool Taken = OnNotZero
+ ? GetGprA() != 0
+ : GetGprA() == 0;
+
+ if (Taken)
+ {
+ Pc = BaseAddr + (GetImm() << 2);
+
+ bool NoDelays = (OpCode & 0x20) != 0;
+
+ if (NoDelays)
+ {
+ FetchOpCode(Memory);
+ }
+
+ return true;
+ }
+ }
+
+ bool Exit = (OpCode & 0x80) != 0;
+
+ return !Exit;
+ }
+
+ private void FetchOpCode(AMemory Memory)
+ {
+ OpCode = PipeOp;
+
+ PipeOp = Memory.ReadInt32(Pc);
+
+ Pc += 4;
+ }
+
+ private int GetAluResult()
+ {
+ AluOperation Op = (AluOperation)(OpCode & 7);
+
+ switch (Op)
+ {
+ case AluOperation.AluReg:
+ {
+ AluRegOperation AluOp = (AluRegOperation)((OpCode >> 17) & 0x1f);
+
+ return GetAluResult(AluOp, GetGprA(), GetGprB());
+ }
+
+ case AluOperation.AddImmediate:
+ {
+ return GetGprA() + GetImm();
+ }
+
+ case AluOperation.BitfieldReplace:
+ case AluOperation.BitfieldExtractLslImm:
+ case AluOperation.BitfieldExtractLslReg:
+ {
+ int BfSrcBit = (OpCode >> 17) & 0x1f;
+ int BfSize = (OpCode >> 22) & 0x1f;
+ int BfDstBit = (OpCode >> 27) & 0x1f;
+
+ int BfMask = (1 << BfSize) - 1;
+
+ int Dst = GetGprA();
+ int Src = GetGprB();
+
+ switch (Op)
+ {
+ case AluOperation.BitfieldReplace:
+ {
+ Src = (int)((uint)Src >> BfSrcBit) & BfMask;
+
+ Dst &= ~(BfMask << BfDstBit);
+
+ Dst |= Src << BfDstBit;
+
+ return Dst;
+ }
+
+ case AluOperation.BitfieldExtractLslImm:
+ {
+ Src = (int)((uint)Src >> Dst) & BfMask;
+
+ return Src << BfDstBit;
+ }
+
+ case AluOperation.BitfieldExtractLslReg:
+ {
+ Src = (int)((uint)Src >> BfSrcBit) & BfMask;
+
+ return Src << Dst;
+ }
+ }
+
+ break;
+ }
+
+ case AluOperation.ReadImmediate:
+ {
+ return Read(GetGprA() + GetImm());
+ }
+ }
+
+ throw new ArgumentException(nameof(OpCode));
+ }
+
+ private int GetAluResult(AluRegOperation AluOp, int A, int B)
+ {
+ switch (AluOp)
+ {
+ case AluRegOperation.Add:
+ {
+ ulong Result = (ulong)A + (ulong)B;
+
+ Carry = Result > 0xffffffff;
+
+ return (int)Result;
+ }
+
+ case AluRegOperation.AddWithCarry:
+ {
+ ulong Result = (ulong)A + (ulong)B + (Carry ? 1UL : 0UL);
+
+ Carry = Result > 0xffffffff;
+
+ return (int)Result;
+ }
+
+ case AluRegOperation.Subtract:
+ {
+ ulong Result = (ulong)A - (ulong)B;
+
+ Carry = Result < 0x100000000;
+
+ return (int)Result;
+ }
+
+ case AluRegOperation.SubtractWithBorrow:
+ {
+ ulong Result = (ulong)A - (ulong)B - (Carry ? 0UL : 1UL);
+
+ Carry = Result < 0x100000000;
+
+ return (int)Result;
+ }
+
+ case AluRegOperation.BitwiseExclusiveOr: return A ^ B;
+ case AluRegOperation.BitwiseOr: return A | B;
+ case AluRegOperation.BitwiseAnd: return A & B;
+ case AluRegOperation.BitwiseAndNot: return A & ~B;
+ case AluRegOperation.BitwiseNotAnd: return ~(A & B);
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(AluOp));
+ }
+
+ private int GetImm()
+ {
+ //Note: The immediate is signed, the sign-extension is intended here.
+ return OpCode >> 14;
+ }
+
+ private void SetMethAddr(int Value)
+ {
+ MethAddr = (Value >> 0) & 0xfff;
+ MethIncr = (Value >> 12) & 0x3f;
+ }
+
+ private void SetDstGpr(int Value)
+ {
+ Gprs[(OpCode >> 8) & 7] = Value;
+ }
+
+ private int GetGprA()
+ {
+ return GetGprValue((OpCode >> 11) & 7);
+ }
+
+ private int GetGprB()
+ {
+ return GetGprValue((OpCode >> 14) & 7);
+ }
+
+ private int GetGprValue(int Index)
+ {
+ return Index != 0 ? Gprs[Index] : 0;
+ }
+
+ private int FetchParam()
+ {
+ int Value;
+
+ //If we don't have any parameters in the FIFO,
+ //keep running the PFIFO engine until it writes the parameters.
+ while (!Fifo.TryDequeue(out Value))
+ {
+ if (!PFifo.Step())
+ {
+ return 0;
+ }
+ }
+
+ return Value;
+ }
+
+ private int Read(int Reg)
+ {
+ return Engine.Registers[Reg];
+ }
+
+ private void Send(AMemory Memory, int Value)
+ {
+ NsGpuPBEntry PBEntry = new NsGpuPBEntry(MethAddr, 0, Value);
+
+ Engine.CallMethod(Memory, PBEntry);
+
+ MethAddr += MethIncr;
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/NsGpu.cs b/Ryujinx.Graphics/Gpu/NsGpu.cs
index 133d0af2..57380502 100644
--- a/Ryujinx.Graphics/Gpu/NsGpu.cs
+++ b/Ryujinx.Graphics/Gpu/NsGpu.cs
@@ -1,5 +1,5 @@
-using ChocolArm64.Memory;
using Ryujinx.Graphics.Gal;
+using System.Threading;
namespace Ryujinx.Graphics.Gpu
{
@@ -9,7 +9,13 @@ namespace Ryujinx.Graphics.Gpu
internal NsGpuMemoryMgr MemoryMgr { get; private set; }
- internal NsGpuPGraph PGraph { get; private set; }
+ public NvGpuFifo Fifo;
+
+ internal NvGpuEngine3d Engine3d;
+
+ private Thread FifoProcessing;
+
+ private bool KeepRunning;
public NsGpu(IGalRenderer Renderer)
{
@@ -17,7 +23,15 @@ namespace Ryujinx.Graphics.Gpu
MemoryMgr = new NsGpuMemoryMgr();
- PGraph = new NsGpuPGraph(this);
+ Fifo = new NvGpuFifo(this);
+
+ Engine3d = new NvGpuEngine3d(this);
+
+ KeepRunning = true;
+
+ FifoProcessing = new Thread(ProcessFifo);
+
+ FifoProcessing.Start();
}
public long GetCpuAddr(long Position)
@@ -35,11 +49,6 @@ namespace Ryujinx.Graphics.Gpu
return MemoryMgr.Map(CpuAddr, GpuAddr, Size);
}
- public void ProcessPushBuffer(NsGpuPBEntry[] PushBuffer, AMemory Memory)
- {
- PGraph.ProcessPushBuffer(PushBuffer, Memory);
- }
-
public long ReserveMemory(long Size, long Align)
{
return MemoryMgr.Reserve(Size, Align);
@@ -49,5 +58,15 @@ namespace Ryujinx.Graphics.Gpu
{
return MemoryMgr.Reserve(GpuAddr, Size, Align);
}
+
+ private void ProcessFifo()
+ {
+ while (KeepRunning)
+ {
+ Fifo.DispatchCalls();
+
+ Thread.Yield();
+ }
+ }
}
} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/NsGpuPBEntry.cs b/Ryujinx.Graphics/Gpu/NsGpuPBEntry.cs
index 8063651a..d405a93c 100644
--- a/Ryujinx.Graphics/Gpu/NsGpuPBEntry.cs
+++ b/Ryujinx.Graphics/Gpu/NsGpuPBEntry.cs
@@ -1,13 +1,11 @@
using System;
-using System.Collections.Generic;
using System.Collections.ObjectModel;
-using System.IO;
namespace Ryujinx.Graphics.Gpu
{
public struct NsGpuPBEntry
{
- public NsGpuRegister Register { get; private set; }
+ public int Method { get; private set; }
public int SubChannel { get; private set; }
@@ -15,65 +13,11 @@ namespace Ryujinx.Graphics.Gpu
public ReadOnlyCollection<int> Arguments => Array.AsReadOnly(m_Arguments);
- public NsGpuPBEntry(NsGpuRegister Register, int SubChannel, params int[] Arguments)
+ public NsGpuPBEntry(int Method, int SubChannel, params int[] Arguments)
{
- this.Register = Register;
+ this.Method = Method;
this.SubChannel = SubChannel;
this.m_Arguments = Arguments;
}
-
- public static NsGpuPBEntry[] DecodePushBuffer(byte[] Data)
- {
- using (MemoryStream MS = new MemoryStream(Data))
- {
- BinaryReader Reader = new BinaryReader(MS);
-
- List<NsGpuPBEntry> GpFifos = new List<NsGpuPBEntry>();
-
- bool CanRead() => MS.Position + 4 <= MS.Length;
-
- while (CanRead())
- {
- int Packed = Reader.ReadInt32();
-
- int Reg = (Packed << 2) & 0x7ffc;
- int SubC = (Packed >> 13) & 7;
- int Args = (Packed >> 16) & 0x1fff;
- int Mode = (Packed >> 29) & 7;
-
- if (Mode == 4)
- {
- //Inline Mode.
- GpFifos.Add(new NsGpuPBEntry((NsGpuRegister)Reg, SubC, Args));
- }
- else
- {
- //Word mode.
- if (Mode == 1)
- {
- //Sequential Mode.
- for (int Index = 0; Index < Args && CanRead(); Index++, Reg += 4)
- {
- GpFifos.Add(new NsGpuPBEntry((NsGpuRegister)Reg, SubC, Reader.ReadInt32()));
- }
- }
- else
- {
- //Non-Sequential Mode.
- int[] Arguments = new int[Args];
-
- for (int Index = 0; Index < Args && CanRead(); Index++)
- {
- Arguments[Index] = Reader.ReadInt32();
- }
-
- GpFifos.Add(new NsGpuPBEntry((NsGpuRegister)Reg, SubC, Arguments));
- }
- }
- }
-
- return GpFifos.ToArray();
- }
- }
}
} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/NsGpuPGraph.cs b/Ryujinx.Graphics/Gpu/NsGpuPGraph.cs
deleted file mode 100644
index 652f3e75..00000000
--- a/Ryujinx.Graphics/Gpu/NsGpuPGraph.cs
+++ /dev/null
@@ -1,305 +0,0 @@
-using ChocolArm64.Memory;
-using Ryujinx.Graphics.Gal;
-using System.Collections.Generic;
-
-namespace Ryujinx.Graphics.Gpu
-{
- class NsGpuPGraph
- {
- private NsGpu Gpu;
-
- private uint[] Registers;
-
- public NsGpuEngine[] SubChannels;
-
- private Dictionary<long, int> CurrentVertexBuffers;
-
- public NsGpuPGraph(NsGpu Gpu)
- {
- this.Gpu = Gpu;
-
- Registers = new uint[0x1000];
-
- SubChannels = new NsGpuEngine[8];
-
- CurrentVertexBuffers = new Dictionary<long, int>();
- }
-
- public void ProcessPushBuffer(NsGpuPBEntry[] PushBuffer, AMemory Memory)
- {
- bool HasQuery = false;
-
- foreach (NsGpuPBEntry Entry in PushBuffer)
- {
- if (Entry.Arguments.Count == 1)
- {
- SetRegister(Entry.Register, (uint)Entry.Arguments[0]);
- }
-
- switch (Entry.Register)
- {
- case NsGpuRegister.BindChannel:
- if (Entry.Arguments.Count > 0)
- {
- SubChannels[Entry.SubChannel] = (NsGpuEngine)Entry.Arguments[0];
- }
- break;
-
- case NsGpuRegister._3dVertexArray0Fetch:
- SendVertexBuffers(Memory);
- break;
-
- case NsGpuRegister._3dCbData0:
- if (GetRegister(NsGpuRegister._3dCbPos) == 0x20)
- {
- SendTexture(Memory);
- }
- break;
-
- case NsGpuRegister._3dQueryAddressHigh:
- case NsGpuRegister._3dQueryAddressLow:
- case NsGpuRegister._3dQuerySequence:
- case NsGpuRegister._3dQueryGet:
- HasQuery = true;
- break;
-
- case NsGpuRegister._3dSetShader:
- uint ShaderPrg = (uint)Entry.Arguments[0];
- uint ShaderId = (uint)Entry.Arguments[1];
- uint CodeAddr = (uint)Entry.Arguments[2];
- uint ShaderType = (uint)Entry.Arguments[3];
- uint CodeEnd = (uint)Entry.Arguments[4];
-
- SendShader(
- Memory,
- ShaderPrg,
- ShaderId,
- CodeAddr,
- ShaderType,
- CodeEnd);
- break;
- }
- }
-
- if (HasQuery)
- {
- long Position =
- (long)GetRegister(NsGpuRegister._3dQueryAddressHigh) << 32 |
- (long)GetRegister(NsGpuRegister._3dQueryAddressLow) << 0;
-
- uint Seq = GetRegister(NsGpuRegister._3dQuerySequence);
- uint Get = GetRegister(NsGpuRegister._3dQueryGet);
-
- uint Mode = Get & 3;
-
- if (Mode == 0)
- {
- //Write
- Position = Gpu.MemoryMgr.GetCpuAddr(Position);
-
- if (Position != -1)
- {
- Gpu.Renderer.QueueAction(delegate()
- {
- Memory.WriteUInt32(Position, Seq);
- });
- }
- }
- }
- }
-
- private void SendVertexBuffers(AMemory Memory)
- {
- long Position =
- (long)GetRegister(NsGpuRegister._3dVertexArray0StartHigh) << 32 |
- (long)GetRegister(NsGpuRegister._3dVertexArray0StartLow) << 0;
-
- long Limit =
- (long)GetRegister(NsGpuRegister._3dVertexArray0LimitHigh) << 32 |
- (long)GetRegister(NsGpuRegister._3dVertexArray0LimitLow) << 0;
-
- int VbIndex = CurrentVertexBuffers.Count;
-
- if (!CurrentVertexBuffers.TryAdd(Position, VbIndex))
- {
- VbIndex = CurrentVertexBuffers[Position];
- }
-
- if (Limit != 0)
- {
- long Size = (Limit - Position) + 1;
-
- Position = Gpu.MemoryMgr.GetCpuAddr(Position);
-
- if (Position != -1)
- {
- byte[] Buffer = AMemoryHelper.ReadBytes(Memory, Position, Size);
-
- int Stride = (int)GetRegister(NsGpuRegister._3dVertexArray0Fetch) & 0xfff;
-
- List<GalVertexAttrib> Attribs = new List<GalVertexAttrib>();
-
- for (int Attr = 0; Attr < 16; Attr++)
- {
- int Packed = (int)GetRegister(NsGpuRegister._3dVertexAttrib0Format + Attr * 4);
-
- GalVertexAttrib Attrib = new GalVertexAttrib(Attr,
- (Packed >> 0) & 0x1f,
- ((Packed >> 6) & 0x1) != 0,
- (Packed >> 7) & 0x3fff,
- (GalVertexAttribSize)((Packed >> 21) & 0x3f),
- (GalVertexAttribType)((Packed >> 27) & 0x7),
- ((Packed >> 31) & 0x1) != 0);
-
- if (Attrib.Offset < Stride)
- {
- Attribs.Add(Attrib);
- }
- }
-
- Gpu.Renderer.QueueAction(delegate()
- {
- Gpu.Renderer.SendVertexBuffer(VbIndex, Buffer, Stride, Attribs.ToArray());
- });
- }
- }
- }
-
- private void SendTexture(AMemory Memory)
- {
- long TicPos = (long)GetRegister(NsGpuRegister._3dTicAddressHigh) << 32 |
- (long)GetRegister(NsGpuRegister._3dTicAddressLow) << 0;
-
- uint CbData = GetRegister(NsGpuRegister._3dCbData0);
-
- uint TicIndex = (CbData >> 0) & 0xfffff;
- uint TscIndex = (CbData >> 20) & 0xfff; //I guess?
-
- TicPos = Gpu.MemoryMgr.GetCpuAddr(TicPos + TicIndex * 0x20);
-
- if (TicPos != -1)
- {
- int Word0 = Memory.ReadInt32(TicPos + 0x0);
- int Word1 = Memory.ReadInt32(TicPos + 0x4);
- int Word2 = Memory.ReadInt32(TicPos + 0x8);
- int Word3 = Memory.ReadInt32(TicPos + 0xc);
- int Word4 = Memory.ReadInt32(TicPos + 0x10);
- int Word5 = Memory.ReadInt32(TicPos + 0x14);
- int Word6 = Memory.ReadInt32(TicPos + 0x18);
- int Word7 = Memory.ReadInt32(TicPos + 0x1c);
-
- long TexAddress = Word1;
-
- TexAddress |= (long)(Word2 & 0xff) << 32;
-
- TexAddress = Gpu.MemoryMgr.GetCpuAddr(TexAddress);
-
- if (TexAddress != -1)
- {
- NsGpuTextureFormat Format = (NsGpuTextureFormat)(Word0 & 0x7f);
-
- int Width = (Word4 & 0xffff) + 1;
- int Height = (Word5 & 0xffff) + 1;
-
- byte[] Buffer = GetDecodedTexture(Memory, Format, TexAddress, Width, Height);
-
- if (Buffer != null)
- {
- Gpu.Renderer.QueueAction(delegate()
- {
- Gpu.Renderer.SendR8G8B8A8Texture(0, Buffer, Width, Height);
- });
- }
- }
- }
- }
-
- private void SendShader(
- AMemory Memory,
- uint ShaderPrg,
- uint ShaderId,
- uint CodeAddr,
- uint ShaderType,
- uint CodeEnd)
- {
- long CodePos = Gpu.MemoryMgr.GetCpuAddr(CodeAddr);
-
- byte[] Data = AMemoryHelper.ReadBytes(Memory, CodePos, 0x300);
- }
-
- private static byte[] GetDecodedTexture(
- AMemory Memory,
- NsGpuTextureFormat Format,
- long Position,
- int Width,
- int Height)
- {
- byte[] Data = null;
-
- switch (Format)
- {
- case NsGpuTextureFormat.BC1:
- {
- int Size = (Width * Height) >> 1;
-
- Data = AMemoryHelper.ReadBytes(Memory, Position, Size);
-
- Data = BCn.DecodeBC1(new NsGpuTexture()
- {
- Width = Width,
- Height = Height,
- Data = Data
- }, 0);
-
- break;
- }
-
- case NsGpuTextureFormat.BC2:
- {
- int Size = Width * Height;
-
- Data = AMemoryHelper.ReadBytes(Memory, Position, Size);
-
- Data = BCn.DecodeBC2(new NsGpuTexture()
- {
- Width = Width,
- Height = Height,
- Data = Data
- }, 0);
-
- break;
- }
-
- case NsGpuTextureFormat.BC3:
- {
- int Size = Width * Height;
-
- Data = AMemoryHelper.ReadBytes(Memory, Position, Size);
-
- Data = BCn.DecodeBC3(new NsGpuTexture()
- {
- Width = Width,
- Height = Height,
- Data = Data
- }, 0);
-
- break;
- }
-
- //default: throw new NotImplementedException(Format.ToString());
- }
-
- return Data;
- }
-
- public uint GetRegister(NsGpuRegister Register)
- {
- return Registers[((int)Register >> 2) & 0xfff];
- }
-
- public void SetRegister(NsGpuRegister Register, uint Value)
- {
- Registers[((int)Register >> 2) & 0xfff] = Value;
- }
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/NsGpuRegister.cs b/Ryujinx.Graphics/Gpu/NsGpuRegister.cs
deleted file mode 100644
index 4642e68d..00000000
--- a/Ryujinx.Graphics/Gpu/NsGpuRegister.cs
+++ /dev/null
@@ -1,94 +0,0 @@
-namespace Ryujinx.Graphics.Gpu
-{
- public enum NsGpuRegister
- {
- BindChannel = 0,
-
- _2dClipEnable = 0x0290,
- _2dOperation = 0x02ac,
-
- _3dGlobalBase = 0x02c8,
- _3dRt0AddressHigh = 0x0800,
- _3dRt0AddressLow = 0x0804,
- _3dRt0Horiz = 0x0808,
- _3dRt0Vert = 0x080c,
- _3dRt0Format = 0x0810,
- _3dRt0BlockDimensions = 0x0814,
- _3dRt0ArrayMode = 0x0818,
- _3dRt0LayerStride = 0x081c,
- _3dRt0BaseLayer = 0x0820,
- _3dViewportScaleX = 0x0a00,
- _3dViewportScaleY = 0x0a04,
- _3dViewportScaleZ = 0x0a08,
- _3dViewportTranslateX = 0x0a0c,
- _3dViewportTranslateY = 0x0a10,
- _3dViewportTranslateZ = 0x0a14,
- _3dViewportHoriz = 0x0c00,
- _3dViewportVert = 0x0c04,
- _3dDepthRangeNear = 0x0c08,
- _3dDepthRangeFar = 0x0c0c,
- _3dClearColorR = 0x0d80,
- _3dClearColorG = 0x0d84,
- _3dClearColorB = 0x0d88,
- _3dClearColorA = 0x0d8c,
- _3dScreenScissorHoriz = 0x0ff4,
- _3dScreenScissorVert = 0x0ff8,
- _3dVertexAttrib0Format = 0x1160,
- _3dVertexAttrib1Format = 0x1164,
- _3dVertexAttrib2Format = 0x1168,
- _3dVertexAttrib3Format = 0x116c,
- _3dVertexAttrib4Format = 0x1170,
- _3dVertexAttrib5Format = 0x1174,
- _3dVertexAttrib6Format = 0x1178,
- _3dVertexAttrib7Format = 0x117c,
- _3dVertexAttrib8Format = 0x1180,
- _3dVertexAttrib9Format = 0x1184,
- _3dVertexAttrib10Format = 0x1188,
- _3dVertexAttrib11Format = 0x118c,
- _3dVertexAttrib12Format = 0x1190,
- _3dVertexAttrib13Format = 0x1194,
- _3dVertexAttrib14Format = 0x1198,
- _3dVertexAttrib15Format = 0x119c,
- _3dScreenYControl = 0x13ac,
- _3dTscAddressHigh = 0x155c,
- _3dTscAddressLow = 0x1560,
- _3dTscLimit = 0x1564,
- _3dTicAddressHigh = 0x1574,
- _3dTicAddressLow = 0x1578,
- _3dTicLimit = 0x157c,
- _3dMultiSampleMode = 0x15d0,
- _3dVertexEndGl = 0x1614,
- _3dVertexBeginGl = 0x1618,
- _3dQueryAddressHigh = 0x1b00,
- _3dQueryAddressLow = 0x1b04,
- _3dQuerySequence = 0x1b08,
- _3dQueryGet = 0x1b0c,
- _3dVertexArray0Fetch = 0x1c00,
- _3dVertexArray0StartHigh = 0x1c04,
- _3dVertexArray0StartLow = 0x1c08,
- _3dVertexArray1Fetch = 0x1c10, //todo: the rest
- _3dVertexArray0LimitHigh = 0x1f00,
- _3dVertexArray0LimitLow = 0x1f04,
- _3dCbSize = 0x2380,
- _3dCbAddressHigh = 0x2384,
- _3dCbAddressLow = 0x2388,
- _3dCbPos = 0x238c,
- _3dCbData0 = 0x2390,
- _3dCbData1 = 0x2394,
- _3dCbData2 = 0x2398,
- _3dCbData3 = 0x239c,
- _3dCbData4 = 0x23a0,
- _3dCbData5 = 0x23a4,
- _3dCbData6 = 0x23a8,
- _3dCbData7 = 0x23ac,
- _3dCbData8 = 0x23b0,
- _3dCbData9 = 0x23b4,
- _3dCbData10 = 0x23b8,
- _3dCbData11 = 0x23bc,
- _3dCbData12 = 0x23c0,
- _3dCbData13 = 0x23c4,
- _3dCbData14 = 0x23c8,
- _3dCbData15 = 0x23cc,
- _3dSetShader = 0x3890
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/NsGpuTexture.cs b/Ryujinx.Graphics/Gpu/NsGpuTexture.cs
deleted file mode 100644
index aac42200..00000000
--- a/Ryujinx.Graphics/Gpu/NsGpuTexture.cs
+++ /dev/null
@@ -1,10 +0,0 @@
-namespace Ryujinx.Graphics.Gpu
-{
- struct NsGpuTexture
- {
- public int Width;
- public int Height;
-
- public byte[] Data;
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/NsGpuTextureFormat.cs b/Ryujinx.Graphics/Gpu/NsGpuTextureFormat.cs
deleted file mode 100644
index 2993840b..00000000
--- a/Ryujinx.Graphics/Gpu/NsGpuTextureFormat.cs
+++ /dev/null
@@ -1,9 +0,0 @@
-namespace Ryujinx.Graphics.Gpu
-{
- enum NsGpuTextureFormat
- {
- BC1 = 0x24,
- BC2 = 0x25,
- BC3 = 0x26
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/NsGpuEngine.cs b/Ryujinx.Graphics/Gpu/NvGpuEngine.cs
index 118e2b72..624915d0 100644
--- a/Ryujinx.Graphics/Gpu/NsGpuEngine.cs
+++ b/Ryujinx.Graphics/Gpu/NvGpuEngine.cs
@@ -1,13 +1,11 @@
namespace Ryujinx.Graphics.Gpu
{
- enum NsGpuEngine
+ enum NvGpuEngine
{
- None = 0,
_2d = 0x902d,
_3d = 0xb197,
Compute = 0xb1c0,
Kepler = 0xa140,
- Dma = 0xb0b5,
- GpFifo = 0xb06f
+ Dma = 0xb0b5
}
} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/NvGpuEngine3d.cs b/Ryujinx.Graphics/Gpu/NvGpuEngine3d.cs
new file mode 100644
index 00000000..f4486f46
--- /dev/null
+++ b/Ryujinx.Graphics/Gpu/NvGpuEngine3d.cs
@@ -0,0 +1,469 @@
+using ChocolArm64.Memory;
+using Ryujinx.Graphics.Gal;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu
+{
+ class NvGpuEngine3d : INvGpuEngine
+ {
+ public int[] Registers { get; private set; }
+
+ private NsGpu Gpu;
+
+ private Dictionary<int, NvGpuMethod> Methods;
+
+ private struct ConstBuffer
+ {
+ public bool Enabled;
+ public long Position;
+ public int Size;
+ }
+
+ private ConstBuffer[] Cbs;
+
+ private bool HasDataToRender;
+
+ public NvGpuEngine3d(NsGpu Gpu)
+ {
+ this.Gpu = Gpu;
+
+ Registers = new int[0xe00];
+
+ Methods = new Dictionary<int, NvGpuMethod>();
+
+ void AddMethod(int Meth, int Count, int Stride, NvGpuMethod Method)
+ {
+ while (Count-- > 0)
+ {
+ Methods.Add(Meth, Method);
+
+ Meth += Stride;
+ }
+ }
+
+ AddMethod(0x585, 1, 1, VertexEndGl);
+ AddMethod(0x674, 1, 1, ClearBuffers);
+ AddMethod(0x6c3, 1, 1, QueryControl);
+ AddMethod(0x8e4, 16, 1, CbData);
+ AddMethod(0x904, 1, 1, CbBind);
+
+ Cbs = new ConstBuffer[18];
+ }
+
+ public void CallMethod(AMemory Memory, NsGpuPBEntry PBEntry)
+ {
+ if (Methods.TryGetValue(PBEntry.Method, out NvGpuMethod Method))
+ {
+ Method(Memory, PBEntry);
+ }
+ else
+ {
+ WriteRegister(PBEntry);
+ }
+ }
+
+ private void VertexEndGl(AMemory Memory, NsGpuPBEntry PBEntry)
+ {
+ SetFrameBuffer(0);
+
+ long[] Tags = UploadShaders(Memory);
+
+ Gpu.Renderer.BindProgram();
+
+ SetAlphaBlending();
+
+ UploadTextures(Memory, Tags);
+ UploadUniforms(Memory);
+ UploadVertexArrays(Memory);
+
+ HasDataToRender = true;
+ }
+
+ private void ClearBuffers(AMemory Memory, NsGpuPBEntry PBEntry)
+ {
+ if (HasDataToRender)
+ {
+ HasDataToRender = false;
+
+ Gpu.Renderer.DrawFrameBuffer(0);
+ }
+
+ int Arg0 = PBEntry.Arguments[0];
+
+ int FbIndex = (Arg0 >> 6) & 0xf;
+
+ int Layer = (Arg0 >> 10) & 0x3ff;
+
+ GalClearBufferFlags Flags = (GalClearBufferFlags)(Arg0 & 0x3f);
+
+ SetFrameBuffer(0);
+
+ Gpu.Renderer.ClearBuffers(Layer, Flags);
+ }
+
+ private void SetFrameBuffer(int FbIndex)
+ {
+ int Width = ReadRegister(NvGpuEngine3dReg.FrameBufferNWidth + FbIndex * 0x10);
+ int Height = ReadRegister(NvGpuEngine3dReg.FrameBufferNHeight + FbIndex * 0x10);
+
+ Gpu.Renderer.SetFb(FbIndex, Width, Height);
+ Gpu.Renderer.BindFrameBuffer(FbIndex);
+ }
+
+ private long[] UploadShaders(AMemory Memory)
+ {
+ long[] Tags = new long[5];
+
+ long BasePosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress);
+
+ for (int Index = 0; Index < 6; Index++)
+ {
+ int Control = ReadRegister(NvGpuEngine3dReg.ShaderNControl + Index * 0x10);
+ int Offset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset + Index * 0x10);
+
+ //Note: Vertex Program (B) is always enabled.
+ bool Enable = (Control & 1) != 0 || Index == 1;
+
+ if (!Enable)
+ {
+ continue;
+ }
+
+ long Tag = BasePosition + (uint)Offset;
+
+ long Position = Gpu.GetCpuAddr(Tag);
+
+ //TODO: Find a better way to calculate the size.
+ int Size = 0x20000;
+
+ byte[] Code = AMemoryHelper.ReadBytes(Memory, Position, (uint)Size);
+
+ GalShaderType ShaderType = GetTypeFromProgram(Index);
+
+ Tags[(int)ShaderType] = Tag;
+
+ Gpu.Renderer.CreateShader(Tag, ShaderType, Code);
+ Gpu.Renderer.BindShader(Tag);
+ }
+
+ return Tags;
+ }
+
+ private static GalShaderType GetTypeFromProgram(int Program)
+ {
+ switch (Program)
+ {
+ case 0:
+ case 1: return GalShaderType.Vertex;
+ case 2: return GalShaderType.TessControl;
+ case 3: return GalShaderType.TessEvaluation;
+ case 4: return GalShaderType.Geometry;
+ case 5: return GalShaderType.Fragment;
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(Program));
+ }
+
+ private void SetAlphaBlending()
+ {
+ bool BlendEnableMaster = (ReadRegister(NvGpuEngine3dReg.BlendEnableMaster) & 1) != 0;
+
+ Gpu.Renderer.SetBlendEnable(BlendEnableMaster);
+
+ bool BlendSeparateAlpha = (ReadRegister(NvGpuEngine3dReg.BlendSeparateAlpha) & 1) != 0;
+
+ GalBlendEquation EquationRgb = (GalBlendEquation)ReadRegister(NvGpuEngine3dReg.BlendEquationRgb);
+
+ GalBlendFactor FuncSrcRgb = (GalBlendFactor)ReadRegister(NvGpuEngine3dReg.BlendFuncSrcRgb);
+ GalBlendFactor FuncDstRgb = (GalBlendFactor)ReadRegister(NvGpuEngine3dReg.BlendFuncDstRgb);
+
+ if (BlendSeparateAlpha)
+ {
+ GalBlendEquation EquationAlpha = (GalBlendEquation)ReadRegister(NvGpuEngine3dReg.BlendEquationAlpha);
+
+ GalBlendFactor FuncSrcAlpha = (GalBlendFactor)ReadRegister(NvGpuEngine3dReg.BlendFuncSrcAlpha);
+ GalBlendFactor FuncDstAlpha = (GalBlendFactor)ReadRegister(NvGpuEngine3dReg.BlendFuncDstAlpha);
+
+ Gpu.Renderer.SetBlendSeparate(
+ EquationRgb,
+ EquationAlpha,
+ FuncSrcRgb,
+ FuncDstRgb,
+ FuncSrcAlpha,
+ FuncDstAlpha);
+ }
+ else
+ {
+ Gpu.Renderer.SetBlend(EquationRgb, FuncSrcRgb, FuncDstRgb);
+ }
+ }
+
+ private void UploadTextures(AMemory Memory, long[] Tags)
+ {
+ long BaseShPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress);
+
+ int TextureCbIndex = ReadRegister(NvGpuEngine3dReg.TextureCbIndex);
+
+ long BasePosition = Cbs[TextureCbIndex].Position;
+
+ long Size = (uint)Cbs[TextureCbIndex].Size;
+
+ int TexIndex = 0;
+
+ for (int Index = 0; Index < Tags.Length; Index++)
+ {
+ foreach (ShaderDeclInfo DeclInfo in Gpu.Renderer.GetTextureUsage(Tags[Index]))
+ {
+ long Position = BasePosition + Index * Size;
+
+ UploadTexture(Memory, Position, TexIndex, DeclInfo.Index);
+
+ Gpu.Renderer.SetUniform1(DeclInfo.Name, TexIndex);
+
+ TexIndex++;
+ }
+ }
+ }
+
+ private void UploadTexture(AMemory Memory, long BasePosition, int TexIndex, int HndIndex)
+ {
+ long Position = BasePosition + HndIndex * 4;
+
+ int TextureHandle = Memory.ReadInt32(Position);
+
+ int TicIndex = (TextureHandle >> 0) & 0xfffff;
+ int TscIndex = (TextureHandle >> 20) & 0xfff;
+
+ TryGetCpuAddr(NvGpuEngine3dReg.TexHeaderPoolOffset, out long TicPosition);
+ TryGetCpuAddr(NvGpuEngine3dReg.TexSamplerPoolOffset, out long TscPosition);
+
+ TicPosition += TicIndex * 0x20;
+ TscPosition += TscIndex * 0x20;
+
+ Gpu.Renderer.SetTexture(TexIndex, TextureFactory.MakeTexture(Gpu, Memory, TicPosition));
+ Gpu.Renderer.SetSampler(TexIndex, TextureFactory.MakeSampler(Gpu, Memory, TscPosition));
+ }
+
+ private void UploadUniforms(AMemory Memory)
+ {
+ long BasePosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress);
+
+ for (int Index = 0; Index < 5; Index++)
+ {
+ int Control = ReadRegister(NvGpuEngine3dReg.ShaderNControl + (Index + 1) * 0x10);
+ int Offset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset + (Index + 1) * 0x10);
+
+ //Note: Vertex Program (B) is always enabled.
+ bool Enable = (Control & 1) != 0 || Index == 0;
+
+ if (!Enable)
+ {
+ continue;
+ }
+
+ for (int Cbuf = 0; Cbuf < Cbs.Length; Cbuf++)
+ {
+ ConstBuffer Cb = Cbs[Cbuf];
+
+ if (Cb.Enabled)
+ {
+ long CbPosition = Cb.Position + Index * Cb.Size;
+
+ byte[] Data = AMemoryHelper.ReadBytes(Memory, CbPosition, (uint)Cb.Size);
+
+ Gpu.Renderer.SetConstBuffer(BasePosition + (uint)Offset, Cbuf, Data);
+ }
+ }
+ }
+ }
+
+ private void UploadVertexArrays(AMemory Memory)
+ {
+ long IndexPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.IndexArrayAddress);
+
+ int IndexSize = ReadRegister(NvGpuEngine3dReg.IndexArrayFormat);
+ int IndexFirst = ReadRegister(NvGpuEngine3dReg.IndexBatchFirst);
+ int IndexCount = ReadRegister(NvGpuEngine3dReg.IndexBatchCount);
+
+ GalIndexFormat IndexFormat = (GalIndexFormat)IndexSize;
+
+ IndexSize = 1 << IndexSize;
+
+ if (IndexSize > 4)
+ {
+ throw new InvalidOperationException();
+ }
+
+ if (IndexSize != 0)
+ {
+ IndexPosition = Gpu.GetCpuAddr(IndexPosition);
+
+ int BufferSize = IndexCount * IndexSize;
+
+ byte[] Data = AMemoryHelper.ReadBytes(Memory, IndexPosition, BufferSize);
+
+ Gpu.Renderer.SetIndexArray(Data, IndexFormat);
+ }
+
+ List<GalVertexAttrib>[] Attribs = new List<GalVertexAttrib>[32];
+
+ for (int Attr = 0; Attr < 16; Attr++)
+ {
+ int Packed = ReadRegister(NvGpuEngine3dReg.VertexAttribNFormat + Attr);
+
+ int ArrayIndex = Packed & 0x1f;
+
+ if (Attribs[ArrayIndex] == null)
+ {
+ Attribs[ArrayIndex] = new List<GalVertexAttrib>();
+ }
+
+ Attribs[ArrayIndex].Add(new GalVertexAttrib(
+ ((Packed >> 6) & 0x1) != 0,
+ (Packed >> 7) & 0x3fff,
+ (GalVertexAttribSize)((Packed >> 21) & 0x3f),
+ (GalVertexAttribType)((Packed >> 27) & 0x7),
+ ((Packed >> 31) & 0x1) != 0));
+ }
+
+ for (int Index = 0; Index < 32; Index++)
+ {
+ int Control = ReadRegister(NvGpuEngine3dReg.VertexArrayNControl + Index * 4);
+
+ bool Enable = (Control & 0x1000) != 0;
+
+ if (!Enable)
+ {
+ continue;
+ }
+
+ long VertexPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.VertexArrayNAddress + Index * 4);
+ long VertexEndPos = MakeInt64From2xInt32(NvGpuEngine3dReg.VertexArrayNEndAddr + Index * 4);
+
+ long Size = (VertexEndPos - VertexPosition) + 1;
+
+ int Stride = Control & 0xfff;
+
+ VertexPosition = Gpu.GetCpuAddr(VertexPosition);
+
+ byte[] Data = AMemoryHelper.ReadBytes(Memory, VertexPosition, Size);
+
+ GalVertexAttrib[] AttribArray = Attribs[Index]?.ToArray() ?? new GalVertexAttrib[0];
+
+ Gpu.Renderer.SetVertexArray(Index, Stride, Data, AttribArray);
+
+ int PrimCtrl = ReadRegister(NvGpuEngine3dReg.VertexBeginGl);
+
+ GalPrimitiveType PrimType = (GalPrimitiveType)(PrimCtrl & 0xffff);
+
+ if (IndexCount != 0)
+ {
+ Gpu.Renderer.DrawElements(Index, IndexFirst, PrimType);
+ }
+ else
+ {
+ Gpu.Renderer.DrawArrays(Index, PrimType);
+ }
+ }
+ }
+
+ private void QueryControl(AMemory Memory, NsGpuPBEntry PBEntry)
+ {
+ if (TryGetCpuAddr(NvGpuEngine3dReg.QueryAddress, out long Position))
+ {
+ int Seq = Registers[(int)NvGpuEngine3dReg.QuerySequence];
+ int Ctrl = Registers[(int)NvGpuEngine3dReg.QueryControl];
+
+ int Mode = Ctrl & 3;
+
+ if (Mode == 0)
+ {
+ //Write.
+ Memory.WriteInt32(Position, Seq);
+ }
+ }
+
+ WriteRegister(PBEntry);
+ }
+
+ private void CbData(AMemory Memory, NsGpuPBEntry PBEntry)
+ {
+ if (TryGetCpuAddr(NvGpuEngine3dReg.ConstBufferNAddress, out long Position))
+ {
+ int Offset = ReadRegister(NvGpuEngine3dReg.ConstBufferNOffset);
+
+ foreach (int Arg in PBEntry.Arguments)
+ {
+ Memory.WriteInt32(Position + Offset, Arg);
+
+ Offset += 4;
+ }
+
+ WriteRegister(NvGpuEngine3dReg.ConstBufferNOffset, Offset);
+ }
+ }
+
+ private void CbBind(AMemory Memory, NsGpuPBEntry PBEntry)
+ {
+ int Index = PBEntry.Arguments[0];
+
+ bool Enabled = (Index & 1) != 0;
+
+ Index = (Index >> 4) & 0x1f;
+
+ if (TryGetCpuAddr(NvGpuEngine3dReg.ConstBufferNAddress, out long Position))
+ {
+ Cbs[Index].Position = Position;
+ Cbs[Index].Enabled = Enabled;
+
+ Cbs[Index].Size = ReadRegister(NvGpuEngine3dReg.ConstBufferNSize);
+ }
+ }
+
+ private int ReadCb(AMemory Memory, int Cbuf, int Offset)
+ {
+ long Position = Cbs[Cbuf].Position;
+
+ int Value = Memory.ReadInt32(Position + Offset);
+
+ return Value;
+ }
+
+ private bool TryGetCpuAddr(NvGpuEngine3dReg Reg, out long Position)
+ {
+ Position = MakeInt64From2xInt32(Reg);
+
+ Position = Gpu.GetCpuAddr(Position);
+
+ return Position != -1;
+ }
+
+ private long MakeInt64From2xInt32(NvGpuEngine3dReg Reg)
+ {
+ return
+ (long)Registers[(int)Reg + 0] << 32 |
+ (uint)Registers[(int)Reg + 1];
+ }
+
+ private void WriteRegister(NsGpuPBEntry PBEntry)
+ {
+ int ArgsCount = PBEntry.Arguments.Count;
+
+ if (ArgsCount > 0)
+ {
+ Registers[PBEntry.Method] = PBEntry.Arguments[ArgsCount - 1];
+ }
+ }
+
+ private int ReadRegister(NvGpuEngine3dReg Reg)
+ {
+ return Registers[(int)Reg];
+ }
+
+ private void WriteRegister(NvGpuEngine3dReg Reg, int Value)
+ {
+ Registers[(int)Reg] = Value;
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/NvGpuEngine3dReg.cs b/Ryujinx.Graphics/Gpu/NvGpuEngine3dReg.cs
new file mode 100644
index 00000000..4bba9abe
--- /dev/null
+++ b/Ryujinx.Graphics/Gpu/NvGpuEngine3dReg.cs
@@ -0,0 +1,44 @@
+namespace Ryujinx.Graphics.Gpu
+{
+ enum NvGpuEngine3dReg
+ {
+ FrameBufferNAddress = 0x200,
+ FrameBufferNWidth = 0x202,
+ FrameBufferNHeight = 0x203,
+ FrameBufferNFormat = 0x204,
+ VertexAttribNFormat = 0x458,
+ BlendSeparateAlpha = 0x4cf,
+ BlendEquationRgb = 0x4d0,
+ BlendFuncSrcRgb = 0x4d1,
+ BlendFuncDstRgb = 0x4d2,
+ BlendEquationAlpha = 0x4d3,
+ BlendFuncSrcAlpha = 0x4d4,
+ BlendFuncDstAlpha = 0x4d6,
+ BlendEnableMaster = 0x4d7,
+ VertexArrayElemBase = 0x50d,
+ TexHeaderPoolOffset = 0x55d,
+ TexSamplerPoolOffset = 0x557,
+ ShaderAddress = 0x582,
+ VertexBeginGl = 0x586,
+ IndexArrayAddress = 0x5f2,
+ IndexArrayEndAddr = 0x5f4,
+ IndexArrayFormat = 0x5f6,
+ IndexBatchFirst = 0x5f7,
+ IndexBatchCount = 0x5f8,
+ QueryAddress = 0x6c0,
+ QuerySequence = 0x6c2,
+ QueryControl = 0x6c3,
+ VertexArrayNControl = 0x700,
+ VertexArrayNAddress = 0x701,
+ VertexArrayNDivisor = 0x703,
+ VertexArrayNEndAddr = 0x7c0,
+ ShaderNControl = 0x800,
+ ShaderNOffset = 0x801,
+ ShaderNMaxGprs = 0x803,
+ ShaderNType = 0x804,
+ ConstBufferNSize = 0x8e0,
+ ConstBufferNAddress = 0x8e1,
+ ConstBufferNOffset = 0x8e3,
+ TextureCbIndex = 0x982
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/NvGpuFifo.cs b/Ryujinx.Graphics/Gpu/NvGpuFifo.cs
new file mode 100644
index 00000000..df765895
--- /dev/null
+++ b/Ryujinx.Graphics/Gpu/NvGpuFifo.cs
@@ -0,0 +1,171 @@
+using ChocolArm64.Memory;
+using System.Collections.Concurrent;
+
+namespace Ryujinx.Graphics.Gpu
+{
+ public class NvGpuFifo
+ {
+ private const int MacrosCount = 0x80;
+ private const int MacroIndexMask = MacrosCount - 1;
+
+ private NsGpu Gpu;
+
+ private ConcurrentQueue<(AMemory, NsGpuPBEntry)> BufferQueue;
+
+ private NvGpuEngine[] SubChannels;
+
+ private struct CachedMacro
+ {
+ public long Position { get; private set; }
+
+ private MacroInterpreter Interpreter;
+
+ public CachedMacro(NvGpuFifo PFifo, INvGpuEngine Engine, long Position)
+ {
+ this.Position = Position;
+
+ Interpreter = new MacroInterpreter(PFifo, Engine);
+ }
+
+ public void PushParam(int Param)
+ {
+ Interpreter?.Fifo.Enqueue(Param);
+ }
+
+ public void Execute(AMemory Memory, int Param)
+ {
+ Interpreter?.Execute(Memory, Position, Param);
+ }
+ }
+
+ private long CurrMacroPosition;
+ private int CurrMacroBindIndex;
+
+ private CachedMacro[] Macros;
+
+ public NvGpuFifo(NsGpu Gpu)
+ {
+ this.Gpu = Gpu;
+
+ BufferQueue = new ConcurrentQueue<(AMemory, NsGpuPBEntry)>();
+
+ SubChannels = new NvGpuEngine[8];
+
+ Macros = new CachedMacro[MacrosCount];
+ }
+
+ public void PushBuffer(AMemory Memory, NsGpuPBEntry[] Buffer)
+ {
+ foreach (NsGpuPBEntry PBEntry in Buffer)
+ {
+ BufferQueue.Enqueue((Memory, PBEntry));
+ }
+ }
+
+ public void DispatchCalls()
+ {
+ while (Step());
+ }
+
+ public bool Step()
+ {
+ if (BufferQueue.TryDequeue(out (AMemory Memory, NsGpuPBEntry PBEntry) Tuple))
+ {
+ CallMethod(Tuple.Memory, Tuple.PBEntry);
+
+ return true;
+ }
+
+ return false;
+ }
+
+ private void CallMethod(AMemory Memory, NsGpuPBEntry PBEntry)
+ {
+ if (PBEntry.Method < 0x80)
+ {
+ switch ((NvGpuFifoMeth)PBEntry.Method)
+ {
+ case NvGpuFifoMeth.BindChannel:
+ {
+ NvGpuEngine Engine = (NvGpuEngine)PBEntry.Arguments[0];
+
+ SubChannels[PBEntry.SubChannel] = Engine;
+
+ break;
+ }
+
+ case NvGpuFifoMeth.SetMacroUploadAddress:
+ {
+ CurrMacroPosition = (long)((ulong)PBEntry.Arguments[0] << 2);
+
+ break;
+ }
+
+ case NvGpuFifoMeth.SendMacroCodeData:
+ {
+ long Position = Gpu.GetCpuAddr(CurrMacroPosition);
+
+ foreach (int Arg in PBEntry.Arguments)
+ {
+ Memory.WriteInt32(Position, Arg);
+
+ CurrMacroPosition += 4;
+
+ Position += 4;
+ }
+ break;
+ }
+
+ case NvGpuFifoMeth.SetMacroBindingIndex:
+ {
+ CurrMacroBindIndex = PBEntry.Arguments[0];
+
+ break;
+ }
+
+ case NvGpuFifoMeth.BindMacro:
+ {
+ long Position = (long)((ulong)PBEntry.Arguments[0] << 2);
+
+ Position = Gpu.GetCpuAddr(Position);
+
+ Macros[CurrMacroBindIndex] = new CachedMacro(this, Gpu.Engine3d, Position);
+
+ break;
+ }
+ }
+ }
+ else
+ {
+ switch (SubChannels[PBEntry.SubChannel])
+ {
+ case NvGpuEngine._3d: Call3dMethod(Memory, PBEntry); break;
+ }
+ }
+ }
+
+ private void Call3dMethod(AMemory Memory, NsGpuPBEntry PBEntry)
+ {
+ if (PBEntry.Method < 0xe00)
+ {
+ Gpu.Engine3d.CallMethod(Memory, PBEntry);
+ }
+ else
+ {
+ int MacroIndex = (PBEntry.Method >> 1) & MacroIndexMask;
+
+ if ((PBEntry.Method & 1) != 0)
+ {
+ foreach (int Arg in PBEntry.Arguments)
+ {
+ Macros[MacroIndex].PushParam(Arg);
+ }
+ }
+ else
+ {
+ Macros[MacroIndex].Execute(Memory, PBEntry.Arguments[0]);
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/NvGpuFifoMeth.cs b/Ryujinx.Graphics/Gpu/NvGpuFifoMeth.cs
new file mode 100644
index 00000000..4287e250
--- /dev/null
+++ b/Ryujinx.Graphics/Gpu/NvGpuFifoMeth.cs
@@ -0,0 +1,11 @@
+namespace Ryujinx.Graphics.Gpu
+{
+ enum NvGpuFifoMeth
+ {
+ BindChannel = 0,
+ SetMacroUploadAddress = 0x45,
+ SendMacroCodeData = 0x46,
+ SetMacroBindingIndex = 0x47,
+ BindMacro = 0x48
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/NvGpuMethod.cs b/Ryujinx.Graphics/Gpu/NvGpuMethod.cs
new file mode 100644
index 00000000..2923ddff
--- /dev/null
+++ b/Ryujinx.Graphics/Gpu/NvGpuMethod.cs
@@ -0,0 +1,6 @@
+using ChocolArm64.Memory;
+
+namespace Ryujinx.Graphics.Gpu
+{
+ delegate void NvGpuMethod(AMemory Memory, NsGpuPBEntry PBEntry);
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/NvGpuPushBuffer.cs b/Ryujinx.Graphics/Gpu/NvGpuPushBuffer.cs
new file mode 100644
index 00000000..8cbb3288
--- /dev/null
+++ b/Ryujinx.Graphics/Gpu/NvGpuPushBuffer.cs
@@ -0,0 +1,101 @@
+using System.Collections.Generic;
+using System.IO;
+
+namespace Ryujinx.Graphics.Gpu
+{
+ public static class NvGpuPushBuffer
+ {
+ private enum SubmissionMode
+ {
+ Incrementing = 1,
+ NonIncrementing = 3,
+ Immediate = 4,
+ IncrementOnce = 5
+ }
+
+ public static NsGpuPBEntry[] Decode(byte[] Data)
+ {
+ using (MemoryStream MS = new MemoryStream(Data))
+ {
+ BinaryReader Reader = new BinaryReader(MS);
+
+ List<NsGpuPBEntry> PushBuffer = new List<NsGpuPBEntry>();
+
+ bool CanRead() => MS.Position + 4 <= MS.Length;
+
+ while (CanRead())
+ {
+ int Packed = Reader.ReadInt32();
+
+ int Meth = (Packed >> 0) & 0x1fff;
+ int SubC = (Packed >> 13) & 7;
+ int Args = (Packed >> 16) & 0x1fff;
+ int Mode = (Packed >> 29) & 7;
+
+ switch ((SubmissionMode)Mode)
+ {
+ case SubmissionMode.Incrementing:
+ {
+ for (int Index = 0; Index < Args && CanRead(); Index++, Meth++)
+ {
+ PushBuffer.Add(new NsGpuPBEntry(Meth, SubC, Reader.ReadInt32()));
+ }
+
+ break;
+ }
+
+ case SubmissionMode.NonIncrementing:
+ {
+ int[] Arguments = new int[Args];
+
+ for (int Index = 0; Index < Arguments.Length; Index++)
+ {
+ if (!CanRead())
+ {
+ break;
+ }
+
+ Arguments[Index] = Reader.ReadInt32();
+ }
+
+ PushBuffer.Add(new NsGpuPBEntry(Meth, SubC, Arguments));
+
+ break;
+ }
+
+ case SubmissionMode.Immediate:
+ {
+ PushBuffer.Add(new NsGpuPBEntry(Meth, SubC, Args));
+
+ break;
+ }
+
+ case SubmissionMode.IncrementOnce:
+ {
+ if (CanRead())
+ {
+ PushBuffer.Add(new NsGpuPBEntry(Meth, SubC, Reader.ReadInt32()));
+ }
+
+ if (CanRead() && Args > 1)
+ {
+ int[] Arguments = new int[Args - 1];
+
+ for (int Index = 0; Index < Arguments.Length && CanRead(); Index++)
+ {
+ Arguments[Index] = Reader.ReadInt32();
+ }
+
+ PushBuffer.Add(new NsGpuPBEntry(Meth + 1, SubC, Arguments));
+ }
+
+ break;
+ }
+ }
+ }
+
+ return PushBuffer.ToArray();
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/SwizzleAddr.cs b/Ryujinx.Graphics/Gpu/SwizzleAddr.cs
deleted file mode 100644
index 08e61eb5..00000000
--- a/Ryujinx.Graphics/Gpu/SwizzleAddr.cs
+++ /dev/null
@@ -1,144 +0,0 @@
-using System;
-
-namespace Ryujinx.Graphics.Gpu
-{
- class SwizzleAddr
- {
- private int Width;
-
- private int XB;
- private int YB;
-
- public SwizzleAddr(int Width, int Height, int Pad)
- {
- int W = Pow2RoundUp(Width);
- int H = Pow2RoundUp(Height);
-
- XB = CountZeros(W);
- YB = CountZeros(H);
-
- int HH = H >> 1;
-
- if (!IsPow2(Height) && Height <= HH + HH / 3 && YB > 3)
- {
- YB--;
- }
-
- this.Width = RoundSize(Width, Pad);
- }
-
- private static int Pow2RoundUp(int Value)
- {
- Value--;
-
- Value |= (Value >> 1);
- Value |= (Value >> 2);
- Value |= (Value >> 4);
- Value |= (Value >> 8);
- Value |= (Value >> 16);
-
- return ++Value;
- }
-
- private static bool IsPow2(int Value)
- {
- return Value != 0 && (Value & (Value - 1)) == 0;
- }
-
- private static int CountZeros(int Value)
- {
- int Count = 0;
-
- for (int i = 0; i < 32; i++)
- {
- if ((Value & (1 << i)) != 0)
- {
- break;
- }
-
- Count++;
- }
-
- return Count;
- }
-
- private static int RoundSize(int Size, int Pad)
- {
- int Mask = Pad - 1;
-
- if ((Size & Mask) != 0)
- {
- Size &= ~Mask;
- Size += Pad;
- }
-
- return Size;
- }
-
- public int GetSwizzledAddress8(int X, int Y)
- {
- return GetSwizzledAddress(X, Y, 4);
- }
-
- public int GetSwizzledAddress16(int X, int Y)
- {
- return GetSwizzledAddress(X, Y, 3);
- }
-
- public int GetSwizzledAddress32(int X, int Y)
- {
- return GetSwizzledAddress(X, Y, 2);
- }
-
- public int GetSwizzledAddress64(int X, int Y)
- {
- return GetSwizzledAddress(X, Y, 1);
- }
-
- public int GetSwizzledAddress128(int X, int Y)
- {
- return GetSwizzledAddress(X, Y, 0);
- }
-
- private int GetSwizzledAddress(int X, int Y, int XBase)
- {
- /*
- * Examples of patterns:
- * x x y x y y x y 0 0 0 0 64 x 64 dxt5
- * x x x x x y y y y x y y x y 0 0 0 0 512 x 512 dxt5
- * y x x x x x x y y y y x y y x y 0 0 0 0 1024 x 1024 dxt5
- * y y x x x x x x y y y y x y y x y x 0 0 0 2048 x 2048 dxt1
- * y y y x x x x x x y y y y x y y x y x x 0 0 1024 x 1024 rgba8888
- *
- * Read from right to left, LSB first.
- */
- int XCnt = XBase;
- int YCnt = 1;
- int XUsed = 0;
- int YUsed = 0;
- int Address = 0;
-
- while (XUsed < XBase + 2 && XUsed + XCnt < XB)
- {
- int XMask = (1 << XCnt) - 1;
- int YMask = (1 << YCnt) - 1;
-
- Address |= (X & XMask) << XUsed + YUsed;
- Address |= (Y & YMask) << XUsed + YUsed + XCnt;
-
- X >>= XCnt;
- Y >>= YCnt;
-
- XUsed += XCnt;
- YUsed += YCnt;
-
- XCnt = Math.Min(XB - XUsed, 1);
- YCnt = Math.Min(YB - YUsed, YCnt << 1);
- }
-
- Address |= (X + Y * (Width >> XUsed)) << (XUsed + YUsed);
-
- return Address;
- }
- }
-}
diff --git a/Ryujinx.Graphics/Gpu/Texture.cs b/Ryujinx.Graphics/Gpu/Texture.cs
new file mode 100644
index 00000000..c8d4e527
--- /dev/null
+++ b/Ryujinx.Graphics/Gpu/Texture.cs
@@ -0,0 +1,34 @@
+using Ryujinx.Graphics.Gal;
+
+namespace Ryujinx.Graphics.Gpu
+{
+ struct Texture
+ {
+ public long Position { get; private set; }
+
+ public int Width { get; private set; }
+ public int Height { get; private set; }
+
+ public int BlockHeight { get; private set; }
+
+ public TextureSwizzle Swizzle { get; private set; }
+
+ public GalTextureFormat Format { get; private set; }
+
+ public Texture(
+ long Position,
+ int Width,
+ int Height,
+ int BlockHeight,
+ TextureSwizzle Swizzle,
+ GalTextureFormat Format)
+ {
+ this.Position = Position;
+ this.Width = Width;
+ this.Height = Height;
+ this.BlockHeight = BlockHeight;
+ this.Swizzle = Swizzle;
+ this.Format = Format;
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/TextureFactory.cs b/Ryujinx.Graphics/Gpu/TextureFactory.cs
new file mode 100644
index 00000000..0a0497f3
--- /dev/null
+++ b/Ryujinx.Graphics/Gpu/TextureFactory.cs
@@ -0,0 +1,83 @@
+using ChocolArm64.Memory;
+using Ryujinx.Graphics.Gal;
+using System;
+
+namespace Ryujinx.Graphics.Gpu
+{
+ static class TextureFactory
+ {
+ public static GalTexture MakeTexture(NsGpu Gpu, AMemory Memory, long TicPosition)
+ {
+ int[] Tic = ReadWords(Memory, TicPosition, 8);
+
+ GalTextureFormat Format = (GalTextureFormat)(Tic[0] & 0x7f);
+
+ long TextureAddress = (uint)Tic[1];
+
+ TextureAddress |= (long)((ushort)Tic[2]) << 32;
+
+ TextureAddress = Gpu.GetCpuAddr(TextureAddress);
+
+ TextureSwizzle Swizzle = (TextureSwizzle)((Tic[2] >> 21) & 7);
+
+ int BlockHeightLog2 = (Tic[3] >> 3) & 7;
+
+ int BlockHeight = 1 << BlockHeightLog2;
+
+ int Width = (Tic[4] & 0xffff) + 1;
+ int Height = (Tic[5] & 0xffff) + 1;
+
+ Texture Texture = new Texture(
+ TextureAddress,
+ Width,
+ Height,
+ BlockHeight,
+ Swizzle,
+ Format);
+
+ byte[] Data = TextureReader.Read(Memory, Texture);
+
+ return new GalTexture(Data, Width, Height, Format);
+ }
+
+ public static GalTextureSampler MakeSampler(NsGpu Gpu, AMemory Memory, long TscPosition)
+ {
+ int[] Tsc = ReadWords(Memory, TscPosition, 8);
+
+ GalTextureWrap AddressU = (GalTextureWrap)((Tsc[0] >> 0) & 7);
+ GalTextureWrap AddressV = (GalTextureWrap)((Tsc[0] >> 3) & 7);
+ GalTextureWrap AddressP = (GalTextureWrap)((Tsc[0] >> 6) & 7);
+
+ GalTextureFilter MagFilter = (GalTextureFilter) ((Tsc[1] >> 0) & 3);
+ GalTextureFilter MinFilter = (GalTextureFilter) ((Tsc[1] >> 4) & 3);
+ GalTextureMipFilter MipFilter = (GalTextureMipFilter)((Tsc[1] >> 6) & 3);
+
+ GalColorF BorderColor = new GalColorF(
+ BitConverter.Int32BitsToSingle(Tsc[4]),
+ BitConverter.Int32BitsToSingle(Tsc[5]),
+ BitConverter.Int32BitsToSingle(Tsc[6]),
+ BitConverter.Int32BitsToSingle(Tsc[7]));
+
+ return new GalTextureSampler(
+ AddressU,
+ AddressV,
+ AddressP,
+ MinFilter,
+ MagFilter,
+ MipFilter,
+ BorderColor);
+ }
+
+ private static int[] ReadWords(AMemory Memory, long Position, int Count)
+ {
+ int[] Words = new int[Count];
+
+ for (int Index = 0; Index < Count; Index++, Position += 4)
+ {
+ Words[Index] = Memory.ReadInt32(Position);
+ }
+
+ return Words;
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/TextureReader.cs b/Ryujinx.Graphics/Gpu/TextureReader.cs
new file mode 100644
index 00000000..ce66e991
--- /dev/null
+++ b/Ryujinx.Graphics/Gpu/TextureReader.cs
@@ -0,0 +1,127 @@
+using ChocolArm64.Memory;
+using Ryujinx.Graphics.Gal;
+using System;
+
+namespace Ryujinx.Graphics.Gpu
+{
+ static class TextureReader
+ {
+ public static byte[] Read(AMemory Memory, Texture Texture)
+ {
+ switch (Texture.Format)
+ {
+ case GalTextureFormat.A8B8G8R8: return Read4Bpp (Memory, Texture);
+ case GalTextureFormat.BC1: return Read8Bpt4x4 (Memory, Texture);
+ case GalTextureFormat.BC2: return Read16Bpt4x4(Memory, Texture);
+ case GalTextureFormat.BC3: return Read16Bpt4x4(Memory, Texture);
+ }
+
+ throw new NotImplementedException(Texture.Format.ToString());
+ }
+
+ private unsafe static byte[] Read4Bpp(AMemory Memory, Texture Texture)
+ {
+ int Width = Texture.Width;
+ int Height = Texture.Height;
+
+ byte[] Output = new byte[Width * Height * 4];
+
+ ISwizzle Swizzle = GetSwizzle(Texture.Swizzle, Width, 4, Texture.BlockHeight);
+
+ fixed (byte* BuffPtr = Output)
+ {
+ long OutOffs = 0;
+
+ for (int Y = 0; Y < Height; Y++)
+ for (int X = 0; X < Width; X++)
+ {
+ long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y);
+
+ int Pixel = Memory.ReadInt32Unchecked(Texture.Position + Offset);
+
+ *(int*)(BuffPtr + OutOffs) = Pixel;
+
+ OutOffs += 4;
+ }
+ }
+
+ return Output;
+ }
+
+ private unsafe static byte[] Read8Bpt4x4(AMemory Memory, Texture Texture)
+ {
+ int Width = (Texture.Width + 3) / 4;
+ int Height = (Texture.Height + 3) / 4;
+
+ byte[] Output = new byte[Width * Height * 8];
+
+ ISwizzle Swizzle = GetSwizzle(Texture.Swizzle, Width, 8, Texture.BlockHeight);
+
+ fixed (byte* BuffPtr = Output)
+ {
+ long OutOffs = 0;
+
+ for (int Y = 0; Y < Height; Y++)
+ for (int X = 0; X < Width; X++)
+ {
+ long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y);
+
+ long Tile = Memory.ReadInt64Unchecked(Texture.Position + Offset);
+
+ *(long*)(BuffPtr + OutOffs) = Tile;
+
+ OutOffs += 8;
+ }
+ }
+
+ return Output;
+ }
+
+ private unsafe static byte[] Read16Bpt4x4(AMemory Memory, Texture Texture)
+ {
+ int Width = (Texture.Width + 3) / 4;
+ int Height = (Texture.Height + 3) / 4;
+
+ byte[] Output = new byte[Width * Height * 16];
+
+ ISwizzle Swizzle = GetSwizzle(Texture.Swizzle, Width, 16, Texture.BlockHeight);
+
+ fixed (byte* BuffPtr = Output)
+ {
+ long OutOffs = 0;
+
+ for (int Y = 0; Y < Height; Y++)
+ for (int X = 0; X < Width; X++)
+ {
+ long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y);
+
+ long Tile0 = Memory.ReadInt64Unchecked(Texture.Position + Offset + 0);
+ long Tile1 = Memory.ReadInt64Unchecked(Texture.Position + Offset + 8);
+
+ *(long*)(BuffPtr + OutOffs + 0) = Tile0;
+ *(long*)(BuffPtr + OutOffs + 8) = Tile1;
+
+ OutOffs += 16;
+ }
+ }
+
+ return Output;
+ }
+
+ private static ISwizzle GetSwizzle(TextureSwizzle Swizzle, int Width, int Bpp, int BlockHeight)
+ {
+ switch (Swizzle)
+ {
+ case TextureSwizzle.Pitch:
+ case TextureSwizzle.PitchColorKey:
+ return new LinearSwizzle(Width, Bpp);
+
+ case TextureSwizzle.BlockLinear:
+ case TextureSwizzle.BlockLinearColorKey:
+ return new BlockLinearSwizzle(Width, Bpp, BlockHeight);
+ }
+
+ throw new NotImplementedException(Swizzle.ToString());
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Gpu/TextureSwizzle.cs b/Ryujinx.Graphics/Gpu/TextureSwizzle.cs
new file mode 100644
index 00000000..2142e2c2
--- /dev/null
+++ b/Ryujinx.Graphics/Gpu/TextureSwizzle.cs
@@ -0,0 +1,11 @@
+namespace Ryujinx.Graphics.Gpu
+{
+ enum TextureSwizzle
+ {
+ _1dBuffer = 0,
+ PitchColorKey = 1,
+ Pitch = 2,
+ BlockLinear = 3,
+ BlockLinearColorKey = 4
+ }
+} \ No newline at end of file