aboutsummaryrefslogtreecommitdiff
path: root/Ryujinx.Graphics/Graphics3d
diff options
context:
space:
mode:
authorgdkchan <gab.dark.100@gmail.com>2018-12-03 00:38:47 -0200
committerGitHub <noreply@github.com>2018-12-03 00:38:47 -0200
commitc86aacde76b5f8e503e2b412385c8491ecc86b3b (patch)
tree8e4737422fba15199c1a6ce7c6345996c0e907b5 /Ryujinx.Graphics/Graphics3d
parentad00fd02442cf9c0f00c4562635738042b521efa (diff)
NVDEC implementation using FFmpeg (#443)
* Initial nvdec implementation using FFmpeg * Fix swapped channels on the video decoder and the G8R8 texture format * Fix texture samplers not being set properly (regression) * Rebased * Remove unused code introduced on the rebase * Add support for RGBA8 output format on the video image composer * Correct spacing * Some fixes for rebase and other tweaks * Allow size mismatch on frame copy * Get rid of GetHostAddress calls on VDec
Diffstat (limited to 'Ryujinx.Graphics/Graphics3d')
-rw-r--r--Ryujinx.Graphics/Graphics3d/INvGpuEngine.cs11
-rw-r--r--Ryujinx.Graphics/Graphics3d/MacroInterpreter.cs416
-rw-r--r--Ryujinx.Graphics/Graphics3d/NvGpuEngine.cs11
-rw-r--r--Ryujinx.Graphics/Graphics3d/NvGpuEngine2d.cs175
-rw-r--r--Ryujinx.Graphics/Graphics3d/NvGpuEngine2dReg.cs39
-rw-r--r--Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs1014
-rw-r--r--Ryujinx.Graphics/Graphics3d/NvGpuEngine3dReg.cs110
-rw-r--r--Ryujinx.Graphics/Graphics3d/NvGpuEngineM2mf.cs187
-rw-r--r--Ryujinx.Graphics/Graphics3d/NvGpuEngineM2mfReg.cs25
-rw-r--r--Ryujinx.Graphics/Graphics3d/NvGpuEngineP2mf.cs161
-rw-r--r--Ryujinx.Graphics/Graphics3d/NvGpuEngineP2mfReg.cs17
-rw-r--r--Ryujinx.Graphics/Graphics3d/NvGpuFifo.cs176
-rw-r--r--Ryujinx.Graphics/Graphics3d/NvGpuFifoMeth.cs11
-rw-r--r--Ryujinx.Graphics/Graphics3d/NvGpuMethod.cs6
-rw-r--r--Ryujinx.Graphics/Graphics3d/Texture/ASTCDecoder.cs1384
-rw-r--r--Ryujinx.Graphics/Graphics3d/Texture/ASTCPixel.cs138
-rw-r--r--Ryujinx.Graphics/Graphics3d/Texture/BitArrayStream.cs121
-rw-r--r--Ryujinx.Graphics/Graphics3d/Texture/BlockLinearSwizzle.cs59
-rw-r--r--Ryujinx.Graphics/Graphics3d/Texture/ISwizzle.cs7
-rw-r--r--Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs445
-rw-r--r--Ryujinx.Graphics/Graphics3d/Texture/IntegerEncoded.cs269
-rw-r--r--Ryujinx.Graphics/Graphics3d/Texture/LinearSwizzle.cs19
-rw-r--r--Ryujinx.Graphics/Graphics3d/Texture/TextureFactory.cs117
-rw-r--r--Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs42
-rw-r--r--Ryujinx.Graphics/Graphics3d/Texture/TextureSwizzle.cs11
25 files changed, 4971 insertions, 0 deletions
diff --git a/Ryujinx.Graphics/Graphics3d/INvGpuEngine.cs b/Ryujinx.Graphics/Graphics3d/INvGpuEngine.cs
new file mode 100644
index 00000000..c2474a17
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/INvGpuEngine.cs
@@ -0,0 +1,11 @@
+using Ryujinx.Graphics.Memory;
+
+namespace Ryujinx.Graphics.Graphics3d
+{
+ interface INvGpuEngine
+ {
+ int[] Registers { get; }
+
+ void CallMethod(NvGpuVmm Vmm, GpuMethodCall MethCall);
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Graphics3d/MacroInterpreter.cs b/Ryujinx.Graphics/Graphics3d/MacroInterpreter.cs
new file mode 100644
index 00000000..a124aca4
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/MacroInterpreter.cs
@@ -0,0 +1,416 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Memory;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Graphics3d
+{
+ class MacroInterpreter
+ {
+ private enum AssignmentOperation
+ {
+ IgnoreAndFetch = 0,
+ Move = 1,
+ MoveAndSetMaddr = 2,
+ FetchAndSend = 3,
+ MoveAndSend = 4,
+ FetchAndSetMaddr = 5,
+ MoveAndSetMaddrThenFetchAndSend = 6,
+ MoveAndSetMaddrThenSendHigh = 7
+ }
+
+ private enum AluOperation
+ {
+ AluReg = 0,
+ AddImmediate = 1,
+ BitfieldReplace = 2,
+ BitfieldExtractLslImm = 3,
+ BitfieldExtractLslReg = 4,
+ ReadImmediate = 5
+ }
+
+ private enum AluRegOperation
+ {
+ Add = 0,
+ AddWithCarry = 1,
+ Subtract = 2,
+ SubtractWithBorrow = 3,
+ BitwiseExclusiveOr = 8,
+ BitwiseOr = 9,
+ BitwiseAnd = 10,
+ BitwiseAndNot = 11,
+ BitwiseNotAnd = 12
+ }
+
+ private NvGpuFifo PFifo;
+ private INvGpuEngine Engine;
+
+ public Queue<int> Fifo { get; private set; }
+
+ private int[] Gprs;
+
+ private int MethAddr;
+ private int MethIncr;
+
+ private bool Carry;
+
+ private int OpCode;
+
+ private int PipeOp;
+
+ private int Pc;
+
+ public MacroInterpreter(NvGpuFifo PFifo, INvGpuEngine Engine)
+ {
+ this.PFifo = PFifo;
+ this.Engine = Engine;
+
+ Fifo = new Queue<int>();
+
+ Gprs = new int[8];
+ }
+
+ public void Execute(NvGpuVmm Vmm, int[] Mme, int Position, int Param)
+ {
+ Reset();
+
+ Gprs[1] = Param;
+
+ Pc = Position;
+
+ FetchOpCode(Mme);
+
+ while (Step(Vmm, Mme));
+
+ //Due to the delay slot, we still need to execute
+ //one more instruction before we actually exit.
+ Step(Vmm, Mme);
+ }
+
+ private void Reset()
+ {
+ for (int Index = 0; Index < Gprs.Length; Index++)
+ {
+ Gprs[Index] = 0;
+ }
+
+ MethAddr = 0;
+ MethIncr = 0;
+
+ Carry = false;
+ }
+
+ private bool Step(NvGpuVmm Vmm, int[] Mme)
+ {
+ int BaseAddr = Pc - 1;
+
+ FetchOpCode(Mme);
+
+ if ((OpCode & 7) < 7)
+ {
+ //Operation produces a value.
+ AssignmentOperation AsgOp = (AssignmentOperation)((OpCode >> 4) & 7);
+
+ int Result = GetAluResult();
+
+ switch (AsgOp)
+ {
+ //Fetch parameter and ignore result.
+ case AssignmentOperation.IgnoreAndFetch:
+ {
+ SetDstGpr(FetchParam());
+
+ break;
+ }
+
+ //Move result.
+ case AssignmentOperation.Move:
+ {
+ SetDstGpr(Result);
+
+ break;
+ }
+
+ //Move result and use as Method Address.
+ case AssignmentOperation.MoveAndSetMaddr:
+ {
+ SetDstGpr(Result);
+
+ SetMethAddr(Result);
+
+ break;
+ }
+
+ //Fetch parameter and send result.
+ case AssignmentOperation.FetchAndSend:
+ {
+ SetDstGpr(FetchParam());
+
+ Send(Vmm, Result);
+
+ break;
+ }
+
+ //Move and send result.
+ case AssignmentOperation.MoveAndSend:
+ {
+ SetDstGpr(Result);
+
+ Send(Vmm, Result);
+
+ break;
+ }
+
+ //Fetch parameter and use result as Method Address.
+ case AssignmentOperation.FetchAndSetMaddr:
+ {
+ SetDstGpr(FetchParam());
+
+ SetMethAddr(Result);
+
+ break;
+ }
+
+ //Move result and use as Method Address, then fetch and send paramter.
+ case AssignmentOperation.MoveAndSetMaddrThenFetchAndSend:
+ {
+ SetDstGpr(Result);
+
+ SetMethAddr(Result);
+
+ Send(Vmm, FetchParam());
+
+ break;
+ }
+
+ //Move result and use as Method Address, then send bits 17:12 of result.
+ case AssignmentOperation.MoveAndSetMaddrThenSendHigh:
+ {
+ SetDstGpr(Result);
+
+ SetMethAddr(Result);
+
+ Send(Vmm, (Result >> 12) & 0x3f);
+
+ break;
+ }
+ }
+ }
+ else
+ {
+ //Branch.
+ bool OnNotZero = ((OpCode >> 4) & 1) != 0;
+
+ bool Taken = OnNotZero
+ ? GetGprA() != 0
+ : GetGprA() == 0;
+
+ if (Taken)
+ {
+ Pc = BaseAddr + GetImm();
+
+ bool NoDelays = (OpCode & 0x20) != 0;
+
+ if (NoDelays)
+ {
+ FetchOpCode(Mme);
+ }
+
+ return true;
+ }
+ }
+
+ bool Exit = (OpCode & 0x80) != 0;
+
+ return !Exit;
+ }
+
+ private void FetchOpCode(int[] Mme)
+ {
+ OpCode = PipeOp;
+
+ PipeOp = Mme[Pc++];
+ }
+
+ private int GetAluResult()
+ {
+ AluOperation Op = (AluOperation)(OpCode & 7);
+
+ switch (Op)
+ {
+ case AluOperation.AluReg:
+ {
+ AluRegOperation AluOp = (AluRegOperation)((OpCode >> 17) & 0x1f);
+
+ return GetAluResult(AluOp, GetGprA(), GetGprB());
+ }
+
+ case AluOperation.AddImmediate:
+ {
+ return GetGprA() + GetImm();
+ }
+
+ case AluOperation.BitfieldReplace:
+ case AluOperation.BitfieldExtractLslImm:
+ case AluOperation.BitfieldExtractLslReg:
+ {
+ int BfSrcBit = (OpCode >> 17) & 0x1f;
+ int BfSize = (OpCode >> 22) & 0x1f;
+ int BfDstBit = (OpCode >> 27) & 0x1f;
+
+ int BfMask = (1 << BfSize) - 1;
+
+ int Dst = GetGprA();
+ int Src = GetGprB();
+
+ switch (Op)
+ {
+ case AluOperation.BitfieldReplace:
+ {
+ Src = (int)((uint)Src >> BfSrcBit) & BfMask;
+
+ Dst &= ~(BfMask << BfDstBit);
+
+ Dst |= Src << BfDstBit;
+
+ return Dst;
+ }
+
+ case AluOperation.BitfieldExtractLslImm:
+ {
+ Src = (int)((uint)Src >> Dst) & BfMask;
+
+ return Src << BfDstBit;
+ }
+
+ case AluOperation.BitfieldExtractLslReg:
+ {
+ Src = (int)((uint)Src >> BfSrcBit) & BfMask;
+
+ return Src << Dst;
+ }
+ }
+
+ break;
+ }
+
+ case AluOperation.ReadImmediate:
+ {
+ return Read(GetGprA() + GetImm());
+ }
+ }
+
+ throw new ArgumentException(nameof(OpCode));
+ }
+
+ private int GetAluResult(AluRegOperation AluOp, int A, int B)
+ {
+ switch (AluOp)
+ {
+ case AluRegOperation.Add:
+ {
+ ulong Result = (ulong)A + (ulong)B;
+
+ Carry = Result > 0xffffffff;
+
+ return (int)Result;
+ }
+
+ case AluRegOperation.AddWithCarry:
+ {
+ ulong Result = (ulong)A + (ulong)B + (Carry ? 1UL : 0UL);
+
+ Carry = Result > 0xffffffff;
+
+ return (int)Result;
+ }
+
+ case AluRegOperation.Subtract:
+ {
+ ulong Result = (ulong)A - (ulong)B;
+
+ Carry = Result < 0x100000000;
+
+ return (int)Result;
+ }
+
+ case AluRegOperation.SubtractWithBorrow:
+ {
+ ulong Result = (ulong)A - (ulong)B - (Carry ? 0UL : 1UL);
+
+ Carry = Result < 0x100000000;
+
+ return (int)Result;
+ }
+
+ case AluRegOperation.BitwiseExclusiveOr: return A ^ B;
+ case AluRegOperation.BitwiseOr: return A | B;
+ case AluRegOperation.BitwiseAnd: return A & B;
+ case AluRegOperation.BitwiseAndNot: return A & ~B;
+ case AluRegOperation.BitwiseNotAnd: return ~(A & B);
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(AluOp));
+ }
+
+ private int GetImm()
+ {
+ //Note: The immediate is signed, the sign-extension is intended here.
+ return OpCode >> 14;
+ }
+
+ private void SetMethAddr(int Value)
+ {
+ MethAddr = (Value >> 0) & 0xfff;
+ MethIncr = (Value >> 12) & 0x3f;
+ }
+
+ private void SetDstGpr(int Value)
+ {
+ Gprs[(OpCode >> 8) & 7] = Value;
+ }
+
+ private int GetGprA()
+ {
+ return GetGprValue((OpCode >> 11) & 7);
+ }
+
+ private int GetGprB()
+ {
+ return GetGprValue((OpCode >> 14) & 7);
+ }
+
+ private int GetGprValue(int Index)
+ {
+ return Index != 0 ? Gprs[Index] : 0;
+ }
+
+ private int FetchParam()
+ {
+ int Value;
+
+ if (!Fifo.TryDequeue(out Value))
+ {
+ Logger.PrintWarning(LogClass.Gpu, "Macro attempted to fetch an inexistent argument.");
+
+ return 0;
+ }
+
+ return Value;
+ }
+
+ private int Read(int Reg)
+ {
+ return Engine.Registers[Reg];
+ }
+
+ private void Send(NvGpuVmm Vmm, int Value)
+ {
+ GpuMethodCall MethCall = new GpuMethodCall(MethAddr, Value);
+
+ Engine.CallMethod(Vmm, MethCall);
+
+ MethAddr += MethIncr;
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngine.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngine.cs
new file mode 100644
index 00000000..20c36fda
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngine.cs
@@ -0,0 +1,11 @@
+namespace Ryujinx.Graphics.Graphics3d
+{
+ enum NvGpuEngine
+ {
+ _2d = 0x902d,
+ _3d = 0xb197,
+ Compute = 0xb1c0,
+ P2mf = 0xa140,
+ M2mf = 0xb0b5
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngine2d.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngine2d.cs
new file mode 100644
index 00000000..55e3ebd4
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngine2d.cs
@@ -0,0 +1,175 @@
+using Ryujinx.Graphics.Gal;
+using Ryujinx.Graphics.Memory;
+using Ryujinx.Graphics.Texture;
+
+namespace Ryujinx.Graphics.Graphics3d
+{
+ class NvGpuEngine2d : INvGpuEngine
+ {
+ private enum CopyOperation
+ {
+ SrcCopyAnd,
+ RopAnd,
+ Blend,
+ SrcCopy,
+ Rop,
+ SrcCopyPremult,
+ BlendPremult
+ }
+
+ public int[] Registers { get; private set; }
+
+ private NvGpu Gpu;
+
+ public NvGpuEngine2d(NvGpu Gpu)
+ {
+ this.Gpu = Gpu;
+
+ Registers = new int[0x238];
+ }
+
+ public void CallMethod(NvGpuVmm Vmm, GpuMethodCall MethCall)
+ {
+ WriteRegister(MethCall);
+
+ if ((NvGpuEngine2dReg)MethCall.Method == NvGpuEngine2dReg.BlitSrcYInt)
+ {
+ TextureCopy(Vmm);
+ }
+ }
+
+ private void TextureCopy(NvGpuVmm Vmm)
+ {
+ CopyOperation Operation = (CopyOperation)ReadRegister(NvGpuEngine2dReg.CopyOperation);
+
+ int DstFormat = ReadRegister(NvGpuEngine2dReg.DstFormat);
+ bool DstLinear = ReadRegister(NvGpuEngine2dReg.DstLinear) != 0;
+ int DstWidth = ReadRegister(NvGpuEngine2dReg.DstWidth);
+ int DstHeight = ReadRegister(NvGpuEngine2dReg.DstHeight);
+ int DstPitch = ReadRegister(NvGpuEngine2dReg.DstPitch);
+ int DstBlkDim = ReadRegister(NvGpuEngine2dReg.DstBlockDimensions);
+
+ int SrcFormat = ReadRegister(NvGpuEngine2dReg.SrcFormat);
+ bool SrcLinear = ReadRegister(NvGpuEngine2dReg.SrcLinear) != 0;
+ int SrcWidth = ReadRegister(NvGpuEngine2dReg.SrcWidth);
+ int SrcHeight = ReadRegister(NvGpuEngine2dReg.SrcHeight);
+ int SrcPitch = ReadRegister(NvGpuEngine2dReg.SrcPitch);
+ int SrcBlkDim = ReadRegister(NvGpuEngine2dReg.SrcBlockDimensions);
+
+ int DstBlitX = ReadRegister(NvGpuEngine2dReg.BlitDstX);
+ int DstBlitY = ReadRegister(NvGpuEngine2dReg.BlitDstY);
+ int DstBlitW = ReadRegister(NvGpuEngine2dReg.BlitDstW);
+ int DstBlitH = ReadRegister(NvGpuEngine2dReg.BlitDstH);
+
+ long BlitDuDx = ReadRegisterFixed1_31_32(NvGpuEngine2dReg.BlitDuDxFract);
+ long BlitDvDy = ReadRegisterFixed1_31_32(NvGpuEngine2dReg.BlitDvDyFract);
+
+ long SrcBlitX = ReadRegisterFixed1_31_32(NvGpuEngine2dReg.BlitSrcXFract);
+ long SrcBlitY = ReadRegisterFixed1_31_32(NvGpuEngine2dReg.BlitSrcYFract);
+
+ GalImageFormat SrcImgFormat = ImageUtils.ConvertSurface((GalSurfaceFormat)SrcFormat);
+ GalImageFormat DstImgFormat = ImageUtils.ConvertSurface((GalSurfaceFormat)DstFormat);
+
+ GalMemoryLayout SrcLayout = GetLayout(SrcLinear);
+ GalMemoryLayout DstLayout = GetLayout(DstLinear);
+
+ int SrcBlockHeight = 1 << ((SrcBlkDim >> 4) & 0xf);
+ int DstBlockHeight = 1 << ((DstBlkDim >> 4) & 0xf);
+
+ long SrcAddress = MakeInt64From2xInt32(NvGpuEngine2dReg.SrcAddress);
+ long DstAddress = MakeInt64From2xInt32(NvGpuEngine2dReg.DstAddress);
+
+ long SrcKey = Vmm.GetPhysicalAddress(SrcAddress);
+ long DstKey = Vmm.GetPhysicalAddress(DstAddress);
+
+ GalImage SrcTexture = new GalImage(
+ SrcWidth,
+ SrcHeight, 1,
+ SrcBlockHeight,
+ SrcLayout,
+ SrcImgFormat);
+
+ GalImage DstTexture = new GalImage(
+ DstWidth,
+ DstHeight, 1,
+ DstBlockHeight,
+ DstLayout,
+ DstImgFormat);
+
+ SrcTexture.Pitch = SrcPitch;
+ DstTexture.Pitch = DstPitch;
+
+ Gpu.ResourceManager.SendTexture(Vmm, SrcKey, SrcTexture);
+ Gpu.ResourceManager.SendTexture(Vmm, DstKey, DstTexture);
+
+ int SrcBlitX1 = (int)(SrcBlitX >> 32);
+ int SrcBlitY1 = (int)(SrcBlitY >> 32);
+
+ int SrcBlitX2 = (int)(SrcBlitX + DstBlitW * BlitDuDx >> 32);
+ int SrcBlitY2 = (int)(SrcBlitY + DstBlitH * BlitDvDy >> 32);
+
+ Gpu.Renderer.RenderTarget.Copy(
+ SrcKey,
+ DstKey,
+ SrcBlitX1,
+ SrcBlitY1,
+ SrcBlitX2,
+ SrcBlitY2,
+ DstBlitX,
+ DstBlitY,
+ DstBlitX + DstBlitW,
+ DstBlitY + DstBlitH);
+
+ //Do a guest side copy aswell. This is necessary when
+ //the texture is modified by the guest, however it doesn't
+ //work when resources that the gpu can write to are copied,
+ //like framebuffers.
+ ImageUtils.CopyTexture(
+ Vmm,
+ SrcTexture,
+ DstTexture,
+ SrcAddress,
+ DstAddress,
+ SrcBlitX1,
+ SrcBlitY1,
+ DstBlitX,
+ DstBlitY,
+ DstBlitW,
+ DstBlitH);
+
+ Vmm.IsRegionModified(DstKey, ImageUtils.GetSize(DstTexture), NvGpuBufferType.Texture);
+ }
+
+ private static GalMemoryLayout GetLayout(bool Linear)
+ {
+ return Linear
+ ? GalMemoryLayout.Pitch
+ : GalMemoryLayout.BlockLinear;
+ }
+
+ private long MakeInt64From2xInt32(NvGpuEngine2dReg Reg)
+ {
+ return
+ (long)Registers[(int)Reg + 0] << 32 |
+ (uint)Registers[(int)Reg + 1];
+ }
+
+ private void WriteRegister(GpuMethodCall MethCall)
+ {
+ Registers[MethCall.Method] = MethCall.Argument;
+ }
+
+ private long ReadRegisterFixed1_31_32(NvGpuEngine2dReg Reg)
+ {
+ long Low = (uint)ReadRegister(Reg + 0);
+ long High = (uint)ReadRegister(Reg + 1);
+
+ return Low | (High << 32);
+ }
+
+ private int ReadRegister(NvGpuEngine2dReg Reg)
+ {
+ return Registers[(int)Reg];
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngine2dReg.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngine2dReg.cs
new file mode 100644
index 00000000..c1c0dba2
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngine2dReg.cs
@@ -0,0 +1,39 @@
+namespace Ryujinx.Graphics.Graphics3d
+{
+ enum NvGpuEngine2dReg
+ {
+ DstFormat = 0x80,
+ DstLinear = 0x81,
+ DstBlockDimensions = 0x82,
+ DstDepth = 0x83,
+ DstLayer = 0x84,
+ DstPitch = 0x85,
+ DstWidth = 0x86,
+ DstHeight = 0x87,
+ DstAddress = 0x88,
+ SrcFormat = 0x8c,
+ SrcLinear = 0x8d,
+ SrcBlockDimensions = 0x8e,
+ SrcDepth = 0x8f,
+ SrcLayer = 0x90,
+ SrcPitch = 0x91,
+ SrcWidth = 0x92,
+ SrcHeight = 0x93,
+ SrcAddress = 0x94,
+ ClipEnable = 0xa4,
+ CopyOperation = 0xab,
+ BlitControl = 0x223,
+ BlitDstX = 0x22c,
+ BlitDstY = 0x22d,
+ BlitDstW = 0x22e,
+ BlitDstH = 0x22f,
+ BlitDuDxFract = 0x230,
+ BlitDuDxInt = 0x231,
+ BlitDvDyFract = 0x232,
+ BlitDvDyInt = 0x233,
+ BlitSrcXFract = 0x234,
+ BlitSrcXInt = 0x235,
+ BlitSrcYFract = 0x236,
+ BlitSrcYInt = 0x237
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs
new file mode 100644
index 00000000..6fb038ac
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs
@@ -0,0 +1,1014 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.Gal;
+using Ryujinx.Graphics.Memory;
+using Ryujinx.Graphics.Texture;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Graphics3d
+{
+ class NvGpuEngine3d : INvGpuEngine
+ {
+ public int[] Registers { get; private set; }
+
+ private NvGpu Gpu;
+
+ private Dictionary<int, NvGpuMethod> Methods;
+
+ private struct ConstBuffer
+ {
+ public bool Enabled;
+ public long Position;
+ public int Size;
+ }
+
+ private ConstBuffer[][] ConstBuffers;
+
+ private int CurrentInstance = 0;
+
+ public NvGpuEngine3d(NvGpu Gpu)
+ {
+ this.Gpu = Gpu;
+
+ Registers = new int[0xe00];
+
+ Methods = new Dictionary<int, NvGpuMethod>();
+
+ void AddMethod(int Meth, int Count, int Stride, NvGpuMethod Method)
+ {
+ while (Count-- > 0)
+ {
+ Methods.Add(Meth, Method);
+
+ Meth += Stride;
+ }
+ }
+
+ AddMethod(0x585, 1, 1, VertexEndGl);
+ AddMethod(0x674, 1, 1, ClearBuffers);
+ AddMethod(0x6c3, 1, 1, QueryControl);
+ AddMethod(0x8e4, 16, 1, CbData);
+ AddMethod(0x904, 5, 8, CbBind);
+
+ ConstBuffers = new ConstBuffer[6][];
+
+ for (int Index = 0; Index < ConstBuffers.Length; Index++)
+ {
+ ConstBuffers[Index] = new ConstBuffer[18];
+ }
+
+ //Ensure that all components are enabled by default.
+ //FIXME: Is this correct?
+ WriteRegister(NvGpuEngine3dReg.ColorMaskN, 0x1111);
+
+ WriteRegister(NvGpuEngine3dReg.FrameBufferSrgb, 1);
+
+ for (int Index = 0; Index < GalPipelineState.RenderTargetsCount; Index++)
+ {
+ WriteRegister(NvGpuEngine3dReg.IBlendNEquationRgb + Index * 8, (int)GalBlendEquation.FuncAdd);
+ WriteRegister(NvGpuEngine3dReg.IBlendNFuncSrcRgb + Index * 8, (int)GalBlendFactor.One);
+ WriteRegister(NvGpuEngine3dReg.IBlendNFuncDstRgb + Index * 8, (int)GalBlendFactor.Zero);
+ WriteRegister(NvGpuEngine3dReg.IBlendNEquationAlpha + Index * 8, (int)GalBlendEquation.FuncAdd);
+ WriteRegister(NvGpuEngine3dReg.IBlendNFuncSrcAlpha + Index * 8, (int)GalBlendFactor.One);
+ WriteRegister(NvGpuEngine3dReg.IBlendNFuncDstAlpha + Index * 8, (int)GalBlendFactor.Zero);
+ }
+ }
+
+ public void CallMethod(NvGpuVmm Vmm, GpuMethodCall MethCall)
+ {
+ if (Methods.TryGetValue(MethCall.Method, out NvGpuMethod Method))
+ {
+ Method(Vmm, MethCall);
+ }
+ else
+ {
+ WriteRegister(MethCall);
+ }
+ }
+
+ private void VertexEndGl(NvGpuVmm Vmm, GpuMethodCall MethCall)
+ {
+ LockCaches();
+
+ GalPipelineState State = new GalPipelineState();
+
+ SetFrameBuffer(State);
+ SetFrontFace(State);
+ SetCullFace(State);
+ SetDepth(State);
+ SetStencil(State);
+ SetBlending(State);
+ SetColorMask(State);
+ SetPrimitiveRestart(State);
+
+ for (int FbIndex = 0; FbIndex < 8; FbIndex++)
+ {
+ SetFrameBuffer(Vmm, FbIndex);
+ }
+
+ SetZeta(Vmm);
+
+ SetRenderTargets();
+
+ long[] Keys = UploadShaders(Vmm);
+
+ Gpu.Renderer.Shader.BindProgram();
+
+ UploadTextures(Vmm, State, Keys);
+ UploadConstBuffers(Vmm, State, Keys);
+ UploadVertexArrays(Vmm, State);
+
+ DispatchRender(Vmm, State);
+
+ UnlockCaches();
+ }
+
+ private void LockCaches()
+ {
+ Gpu.Renderer.Buffer.LockCache();
+ Gpu.Renderer.Rasterizer.LockCaches();
+ Gpu.Renderer.Texture.LockCache();
+ }
+
+ private void UnlockCaches()
+ {
+ Gpu.Renderer.Buffer.UnlockCache();
+ Gpu.Renderer.Rasterizer.UnlockCaches();
+ Gpu.Renderer.Texture.UnlockCache();
+ }
+
+ private void ClearBuffers(NvGpuVmm Vmm, GpuMethodCall MethCall)
+ {
+ int Attachment = (MethCall.Argument >> 6) & 0xf;
+
+ GalClearBufferFlags Flags = (GalClearBufferFlags)(MethCall.Argument & 0x3f);
+
+ float Red = ReadRegisterFloat(NvGpuEngine3dReg.ClearNColor + 0);
+ float Green = ReadRegisterFloat(NvGpuEngine3dReg.ClearNColor + 1);
+ float Blue = ReadRegisterFloat(NvGpuEngine3dReg.ClearNColor + 2);
+ float Alpha = ReadRegisterFloat(NvGpuEngine3dReg.ClearNColor + 3);
+
+ float Depth = ReadRegisterFloat(NvGpuEngine3dReg.ClearDepth);
+
+ int Stencil = ReadRegister(NvGpuEngine3dReg.ClearStencil);
+
+ SetFrameBuffer(Vmm, Attachment);
+
+ SetZeta(Vmm);
+
+ SetRenderTargets();
+
+ Gpu.Renderer.RenderTarget.Bind();
+
+ Gpu.Renderer.Rasterizer.ClearBuffers(Flags, Attachment, Red, Green, Blue, Alpha, Depth, Stencil);
+
+ Gpu.Renderer.Pipeline.ResetDepthMask();
+ Gpu.Renderer.Pipeline.ResetColorMask(Attachment);
+ }
+
+ private void SetFrameBuffer(NvGpuVmm Vmm, int FbIndex)
+ {
+ long VA = MakeInt64From2xInt32(NvGpuEngine3dReg.FrameBufferNAddress + FbIndex * 0x10);
+
+ int SurfFormat = ReadRegister(NvGpuEngine3dReg.FrameBufferNFormat + FbIndex * 0x10);
+
+ if (VA == 0 || SurfFormat == 0)
+ {
+ Gpu.Renderer.RenderTarget.UnbindColor(FbIndex);
+
+ return;
+ }
+
+ long Key = Vmm.GetPhysicalAddress(VA);
+
+ int Width = ReadRegister(NvGpuEngine3dReg.FrameBufferNWidth + FbIndex * 0x10);
+ int Height = ReadRegister(NvGpuEngine3dReg.FrameBufferNHeight + FbIndex * 0x10);
+
+ int BlockDim = ReadRegister(NvGpuEngine3dReg.FrameBufferNBlockDim + FbIndex * 0x10);
+
+ int GobBlockHeight = 1 << ((BlockDim >> 4) & 7);
+
+ GalMemoryLayout Layout = (GalMemoryLayout)((BlockDim >> 12) & 1);
+
+ float TX = ReadRegisterFloat(NvGpuEngine3dReg.ViewportNTranslateX + FbIndex * 8);
+ float TY = ReadRegisterFloat(NvGpuEngine3dReg.ViewportNTranslateY + FbIndex * 8);
+
+ float SX = ReadRegisterFloat(NvGpuEngine3dReg.ViewportNScaleX + FbIndex * 8);
+ float SY = ReadRegisterFloat(NvGpuEngine3dReg.ViewportNScaleY + FbIndex * 8);
+
+ int VpX = (int)MathF.Max(0, TX - MathF.Abs(SX));
+ int VpY = (int)MathF.Max(0, TY - MathF.Abs(SY));
+
+ int VpW = (int)(TX + MathF.Abs(SX)) - VpX;
+ int VpH = (int)(TY + MathF.Abs(SY)) - VpY;
+
+ GalImageFormat Format = ImageUtils.ConvertSurface((GalSurfaceFormat)SurfFormat);
+
+ GalImage Image = new GalImage(Width, Height, 1, GobBlockHeight, Layout, Format);
+
+ Gpu.ResourceManager.SendColorBuffer(Vmm, Key, FbIndex, Image);
+
+ Gpu.Renderer.RenderTarget.SetViewport(FbIndex, VpX, VpY, VpW, VpH);
+ }
+
+ private void SetFrameBuffer(GalPipelineState State)
+ {
+ State.FramebufferSrgb = ReadRegisterBool(NvGpuEngine3dReg.FrameBufferSrgb);
+
+ State.FlipX = GetFlipSign(NvGpuEngine3dReg.ViewportNScaleX);
+ State.FlipY = GetFlipSign(NvGpuEngine3dReg.ViewportNScaleY);
+
+ int ScreenYControl = ReadRegister(NvGpuEngine3dReg.ScreenYControl);
+
+ bool NegateY = (ScreenYControl & 1) != 0;
+
+ if (NegateY)
+ {
+ State.FlipY = -State.FlipY;
+ }
+ }
+
+ private void SetZeta(NvGpuVmm Vmm)
+ {
+ long VA = MakeInt64From2xInt32(NvGpuEngine3dReg.ZetaAddress);
+
+ int ZetaFormat = ReadRegister(NvGpuEngine3dReg.ZetaFormat);
+
+ int BlockDim = ReadRegister(NvGpuEngine3dReg.ZetaBlockDimensions);
+
+ int GobBlockHeight = 1 << ((BlockDim >> 4) & 7);
+
+ GalMemoryLayout Layout = (GalMemoryLayout)((BlockDim >> 12) & 1); //?
+
+ bool ZetaEnable = ReadRegisterBool(NvGpuEngine3dReg.ZetaEnable);
+
+ if (VA == 0 || ZetaFormat == 0 || !ZetaEnable)
+ {
+ Gpu.Renderer.RenderTarget.UnbindZeta();
+
+ return;
+ }
+
+ long Key = Vmm.GetPhysicalAddress(VA);
+
+ int Width = ReadRegister(NvGpuEngine3dReg.ZetaHoriz);
+ int Height = ReadRegister(NvGpuEngine3dReg.ZetaVert);
+
+ GalImageFormat Format = ImageUtils.ConvertZeta((GalZetaFormat)ZetaFormat);
+
+ GalImage Image = new GalImage(Width, Height, 1, GobBlockHeight, Layout, Format);
+
+ Gpu.ResourceManager.SendZetaBuffer(Vmm, Key, Image);
+ }
+
+ private long[] UploadShaders(NvGpuVmm Vmm)
+ {
+ long[] Keys = new long[5];
+
+ long BasePosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress);
+
+ int Index = 1;
+
+ int VpAControl = ReadRegister(NvGpuEngine3dReg.ShaderNControl);
+
+ bool VpAEnable = (VpAControl & 1) != 0;
+
+ if (VpAEnable)
+ {
+ //Note: The maxwell supports 2 vertex programs, usually
+ //only VP B is used, but in some cases VP A is also used.
+ //In this case, it seems to function as an extra vertex
+ //shader stage.
+ //The graphics abstraction layer has a special overload for this
+ //case, which should merge the two shaders into one vertex shader.
+ int VpAOffset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset);
+ int VpBOffset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset + 0x10);
+
+ long VpAPos = BasePosition + (uint)VpAOffset;
+ long VpBPos = BasePosition + (uint)VpBOffset;
+
+ Keys[(int)GalShaderType.Vertex] = VpBPos;
+
+ Gpu.Renderer.Shader.Create(Vmm, VpAPos, VpBPos, GalShaderType.Vertex);
+ Gpu.Renderer.Shader.Bind(VpBPos);
+
+ Index = 2;
+ }
+
+ for (; Index < 6; Index++)
+ {
+ GalShaderType Type = GetTypeFromProgram(Index);
+
+ int Control = ReadRegister(NvGpuEngine3dReg.ShaderNControl + Index * 0x10);
+ int Offset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset + Index * 0x10);
+
+ //Note: Vertex Program (B) is always enabled.
+ bool Enable = (Control & 1) != 0 || Index == 1;
+
+ if (!Enable)
+ {
+ Gpu.Renderer.Shader.Unbind(Type);
+
+ continue;
+ }
+
+ long Key = BasePosition + (uint)Offset;
+
+ Keys[(int)Type] = Key;
+
+ Gpu.Renderer.Shader.Create(Vmm, Key, Type);
+ Gpu.Renderer.Shader.Bind(Key);
+ }
+
+ return Keys;
+ }
+
+ private static GalShaderType GetTypeFromProgram(int Program)
+ {
+ switch (Program)
+ {
+ case 0:
+ case 1: return GalShaderType.Vertex;
+ case 2: return GalShaderType.TessControl;
+ case 3: return GalShaderType.TessEvaluation;
+ case 4: return GalShaderType.Geometry;
+ case 5: return GalShaderType.Fragment;
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(Program));
+ }
+
+ private void SetFrontFace(GalPipelineState State)
+ {
+ float SignX = GetFlipSign(NvGpuEngine3dReg.ViewportNScaleX);
+ float SignY = GetFlipSign(NvGpuEngine3dReg.ViewportNScaleY);
+
+ GalFrontFace FrontFace = (GalFrontFace)ReadRegister(NvGpuEngine3dReg.FrontFace);
+
+ //Flipping breaks facing. Flipping front facing too fixes it
+ if (SignX != SignY)
+ {
+ switch (FrontFace)
+ {
+ case GalFrontFace.CW: FrontFace = GalFrontFace.CCW; break;
+ case GalFrontFace.CCW: FrontFace = GalFrontFace.CW; break;
+ }
+ }
+
+ State.FrontFace = FrontFace;
+ }
+
+ private void SetCullFace(GalPipelineState State)
+ {
+ State.CullFaceEnabled = ReadRegisterBool(NvGpuEngine3dReg.CullFaceEnable);
+
+ if (State.CullFaceEnabled)
+ {
+ State.CullFace = (GalCullFace)ReadRegister(NvGpuEngine3dReg.CullFace);
+ }
+ }
+
+ private void SetDepth(GalPipelineState State)
+ {
+ State.DepthTestEnabled = ReadRegisterBool(NvGpuEngine3dReg.DepthTestEnable);
+
+ State.DepthWriteEnabled = ReadRegisterBool(NvGpuEngine3dReg.DepthWriteEnable);
+
+ if (State.DepthTestEnabled)
+ {
+ State.DepthFunc = (GalComparisonOp)ReadRegister(NvGpuEngine3dReg.DepthTestFunction);
+ }
+
+ State.DepthRangeNear = ReadRegisterFloat(NvGpuEngine3dReg.DepthRangeNNear);
+ State.DepthRangeFar = ReadRegisterFloat(NvGpuEngine3dReg.DepthRangeNFar);
+ }
+
+ private void SetStencil(GalPipelineState State)
+ {
+ State.StencilTestEnabled = ReadRegisterBool(NvGpuEngine3dReg.StencilEnable);
+
+ if (State.StencilTestEnabled)
+ {
+ State.StencilBackFuncFunc = (GalComparisonOp)ReadRegister(NvGpuEngine3dReg.StencilBackFuncFunc);
+ State.StencilBackFuncRef = ReadRegister(NvGpuEngine3dReg.StencilBackFuncRef);
+ State.StencilBackFuncMask = (uint)ReadRegister(NvGpuEngine3dReg.StencilBackFuncMask);
+ State.StencilBackOpFail = (GalStencilOp)ReadRegister(NvGpuEngine3dReg.StencilBackOpFail);
+ State.StencilBackOpZFail = (GalStencilOp)ReadRegister(NvGpuEngine3dReg.StencilBackOpZFail);
+ State.StencilBackOpZPass = (GalStencilOp)ReadRegister(NvGpuEngine3dReg.StencilBackOpZPass);
+ State.StencilBackMask = (uint)ReadRegister(NvGpuEngine3dReg.StencilBackMask);
+
+ State.StencilFrontFuncFunc = (GalComparisonOp)ReadRegister(NvGpuEngine3dReg.StencilFrontFuncFunc);
+ State.StencilFrontFuncRef = ReadRegister(NvGpuEngine3dReg.StencilFrontFuncRef);
+ State.StencilFrontFuncMask = (uint)ReadRegister(NvGpuEngine3dReg.StencilFrontFuncMask);
+ State.StencilFrontOpFail = (GalStencilOp)ReadRegister(NvGpuEngine3dReg.StencilFrontOpFail);
+ State.StencilFrontOpZFail = (GalStencilOp)ReadRegister(NvGpuEngine3dReg.StencilFrontOpZFail);
+ State.StencilFrontOpZPass = (GalStencilOp)ReadRegister(NvGpuEngine3dReg.StencilFrontOpZPass);
+ State.StencilFrontMask = (uint)ReadRegister(NvGpuEngine3dReg.StencilFrontMask);
+ }
+ }
+
+ private void SetBlending(GalPipelineState State)
+ {
+ bool BlendIndependent = ReadRegisterBool(NvGpuEngine3dReg.BlendIndependent);
+
+ State.BlendIndependent = BlendIndependent;
+
+ for (int Index = 0; Index < GalPipelineState.RenderTargetsCount; Index++)
+ {
+ if (BlendIndependent)
+ {
+ State.Blends[Index].Enabled = ReadRegisterBool(NvGpuEngine3dReg.IBlendNEnable + Index);
+
+ if (State.Blends[Index].Enabled)
+ {
+ State.Blends[Index].SeparateAlpha = ReadRegisterBool(NvGpuEngine3dReg.IBlendNSeparateAlpha + Index * 8);
+
+ State.Blends[Index].EquationRgb = ReadBlendEquation(NvGpuEngine3dReg.IBlendNEquationRgb + Index * 8);
+ State.Blends[Index].FuncSrcRgb = ReadBlendFactor (NvGpuEngine3dReg.IBlendNFuncSrcRgb + Index * 8);
+ State.Blends[Index].FuncDstRgb = ReadBlendFactor (NvGpuEngine3dReg.IBlendNFuncDstRgb + Index * 8);
+ State.Blends[Index].EquationAlpha = ReadBlendEquation(NvGpuEngine3dReg.IBlendNEquationAlpha + Index * 8);
+ State.Blends[Index].FuncSrcAlpha = ReadBlendFactor (NvGpuEngine3dReg.IBlendNFuncSrcAlpha + Index * 8);
+ State.Blends[Index].FuncDstAlpha = ReadBlendFactor (NvGpuEngine3dReg.IBlendNFuncDstAlpha + Index * 8);
+ }
+ }
+ else
+ {
+ //It seems that even when independent blend is disabled, the first IBlend enable
+ //register is still set to indicate whenever blend is enabled or not (?).
+ State.Blends[Index].Enabled = ReadRegisterBool(NvGpuEngine3dReg.IBlendNEnable);
+
+ if (State.Blends[Index].Enabled)
+ {
+ State.Blends[Index].SeparateAlpha = ReadRegisterBool(NvGpuEngine3dReg.BlendSeparateAlpha);
+
+ State.Blends[Index].EquationRgb = ReadBlendEquation(NvGpuEngine3dReg.BlendEquationRgb);
+ State.Blends[Index].FuncSrcRgb = ReadBlendFactor (NvGpuEngine3dReg.BlendFuncSrcRgb);
+ State.Blends[Index].FuncDstRgb = ReadBlendFactor (NvGpuEngine3dReg.BlendFuncDstRgb);
+ State.Blends[Index].EquationAlpha = ReadBlendEquation(NvGpuEngine3dReg.BlendEquationAlpha);
+ State.Blends[Index].FuncSrcAlpha = ReadBlendFactor (NvGpuEngine3dReg.BlendFuncSrcAlpha);
+ State.Blends[Index].FuncDstAlpha = ReadBlendFactor (NvGpuEngine3dReg.BlendFuncDstAlpha);
+ }
+ }
+ }
+ }
+
+ private GalBlendEquation ReadBlendEquation(NvGpuEngine3dReg Register)
+ {
+ return (GalBlendEquation)ReadRegister(Register);
+ }
+
+ private GalBlendFactor ReadBlendFactor(NvGpuEngine3dReg Register)
+ {
+ return (GalBlendFactor)ReadRegister(Register);
+ }
+
+ private void SetColorMask(GalPipelineState State)
+ {
+ bool ColorMaskCommon = ReadRegisterBool(NvGpuEngine3dReg.ColorMaskCommon);
+
+ State.ColorMaskCommon = ColorMaskCommon;
+
+ for (int Index = 0; Index < GalPipelineState.RenderTargetsCount; Index++)
+ {
+ int ColorMask = ReadRegister(NvGpuEngine3dReg.ColorMaskN + (ColorMaskCommon ? 0 : Index));
+
+ State.ColorMasks[Index].Red = ((ColorMask >> 0) & 0xf) != 0;
+ State.ColorMasks[Index].Green = ((ColorMask >> 4) & 0xf) != 0;
+ State.ColorMasks[Index].Blue = ((ColorMask >> 8) & 0xf) != 0;
+ State.ColorMasks[Index].Alpha = ((ColorMask >> 12) & 0xf) != 0;
+ }
+ }
+
+ private void SetPrimitiveRestart(GalPipelineState State)
+ {
+ State.PrimitiveRestartEnabled = ReadRegisterBool(NvGpuEngine3dReg.PrimRestartEnable);
+
+ if (State.PrimitiveRestartEnabled)
+ {
+ State.PrimitiveRestartIndex = (uint)ReadRegister(NvGpuEngine3dReg.PrimRestartIndex);
+ }
+ }
+
+ private void SetRenderTargets()
+ {
+ //Commercial games do not seem to
+ //bool SeparateFragData = ReadRegisterBool(NvGpuEngine3dReg.RTSeparateFragData);
+
+ uint Control = (uint)(ReadRegister(NvGpuEngine3dReg.RTControl));
+
+ uint Count = Control & 0xf;
+
+ if (Count > 0)
+ {
+ int[] Map = new int[Count];
+
+ for (int Index = 0; Index < Count; Index++)
+ {
+ int Shift = 4 + Index * 3;
+
+ Map[Index] = (int)((Control >> Shift) & 7);
+ }
+
+ Gpu.Renderer.RenderTarget.SetMap(Map);
+ }
+ else
+ {
+ Gpu.Renderer.RenderTarget.SetMap(null);
+ }
+ }
+
+ private void UploadTextures(NvGpuVmm Vmm, GalPipelineState State, long[] Keys)
+ {
+ long BaseShPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress);
+
+ int TextureCbIndex = ReadRegister(NvGpuEngine3dReg.TextureCbIndex);
+
+ List<(long, GalImage, GalTextureSampler)> UnboundTextures = new List<(long, GalImage, GalTextureSampler)>();
+
+ for (int Index = 0; Index < Keys.Length; Index++)
+ {
+ foreach (ShaderDeclInfo DeclInfo in Gpu.Renderer.Shader.GetTextureUsage(Keys[Index]))
+ {
+ long Position;
+
+ if (DeclInfo.IsCb)
+ {
+ Position = ConstBuffers[Index][DeclInfo.Cbuf].Position;
+ }
+ else
+ {
+ Position = ConstBuffers[Index][TextureCbIndex].Position;
+ }
+
+ int TextureHandle = Vmm.ReadInt32(Position + DeclInfo.Index * 4);
+
+ UnboundTextures.Add(UploadTexture(Vmm, TextureHandle));
+ }
+ }
+
+ for (int Index = 0; Index < UnboundTextures.Count; Index++)
+ {
+ (long Key, GalImage Image, GalTextureSampler Sampler) = UnboundTextures[Index];
+
+ if (Key == 0)
+ {
+ continue;
+ }
+
+ Gpu.Renderer.Texture.Bind(Key, Index, Image);
+ Gpu.Renderer.Texture.SetSampler(Sampler);
+ }
+ }
+
+ private (long, GalImage, GalTextureSampler) UploadTexture(NvGpuVmm Vmm, int TextureHandle)
+ {
+ if (TextureHandle == 0)
+ {
+ //FIXME: Some games like puyo puyo will use handles with the value 0.
+ //This is a bug, most likely caused by sync issues.
+ return (0, default(GalImage), default(GalTextureSampler));
+ }
+
+ bool LinkedTsc = ReadRegisterBool(NvGpuEngine3dReg.LinkedTsc);
+
+ int TicIndex = (TextureHandle >> 0) & 0xfffff;
+
+ int TscIndex = LinkedTsc ? TicIndex : (TextureHandle >> 20) & 0xfff;
+
+ long TicPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.TexHeaderPoolOffset);
+ long TscPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.TexSamplerPoolOffset);
+
+ TicPosition += TicIndex * 0x20;
+ TscPosition += TscIndex * 0x20;
+
+ GalImage Image = TextureFactory.MakeTexture(Vmm, TicPosition);
+
+ GalTextureSampler Sampler = TextureFactory.MakeSampler(Gpu, Vmm, TscPosition);
+
+ long Key = Vmm.ReadInt64(TicPosition + 4) & 0xffffffffffff;
+
+ if (Image.Layout == GalMemoryLayout.BlockLinear)
+ {
+ Key &= ~0x1ffL;
+ }
+ else if (Image.Layout == GalMemoryLayout.Pitch)
+ {
+ Key &= ~0x1fL;
+ }
+
+ Key = Vmm.GetPhysicalAddress(Key);
+
+ if (Key == -1)
+ {
+ //FIXME: Shouldn't ignore invalid addresses.
+ return (0, default(GalImage), default(GalTextureSampler));
+ }
+
+ Gpu.ResourceManager.SendTexture(Vmm, Key, Image);
+
+ return (Key, Image, Sampler);
+ }
+
+ private void UploadConstBuffers(NvGpuVmm Vmm, GalPipelineState State, long[] Keys)
+ {
+ for (int Stage = 0; Stage < Keys.Length; Stage++)
+ {
+ foreach (ShaderDeclInfo DeclInfo in Gpu.Renderer.Shader.GetConstBufferUsage(Keys[Stage]))
+ {
+ ConstBuffer Cb = ConstBuffers[Stage][DeclInfo.Cbuf];
+
+ if (!Cb.Enabled)
+ {
+ continue;
+ }
+
+ long Key = Vmm.GetPhysicalAddress(Cb.Position);
+
+ if (Gpu.ResourceManager.MemoryRegionModified(Vmm, Key, Cb.Size, NvGpuBufferType.ConstBuffer))
+ {
+ if (Vmm.TryGetHostAddress(Cb.Position, Cb.Size, out IntPtr CbPtr))
+ {
+ Gpu.Renderer.Buffer.SetData(Key, Cb.Size, CbPtr);
+ }
+ else
+ {
+ Gpu.Renderer.Buffer.SetData(Key, Vmm.ReadBytes(Cb.Position, Cb.Size));
+ }
+ }
+
+ State.ConstBufferKeys[Stage][DeclInfo.Cbuf] = Key;
+ }
+ }
+ }
+
+ private void UploadVertexArrays(NvGpuVmm Vmm, GalPipelineState State)
+ {
+ long IbPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.IndexArrayAddress);
+
+ long IboKey = Vmm.GetPhysicalAddress(IbPosition);
+
+ int IndexEntryFmt = ReadRegister(NvGpuEngine3dReg.IndexArrayFormat);
+ int IndexCount = ReadRegister(NvGpuEngine3dReg.IndexBatchCount);
+ int PrimCtrl = ReadRegister(NvGpuEngine3dReg.VertexBeginGl);
+
+ GalPrimitiveType PrimType = (GalPrimitiveType)(PrimCtrl & 0xffff);
+
+ GalIndexFormat IndexFormat = (GalIndexFormat)IndexEntryFmt;
+
+ int IndexEntrySize = 1 << IndexEntryFmt;
+
+ if (IndexEntrySize > 4)
+ {
+ throw new InvalidOperationException("Invalid index entry size \"" + IndexEntrySize + "\"!");
+ }
+
+ if (IndexCount != 0)
+ {
+ int IbSize = IndexCount * IndexEntrySize;
+
+ bool IboCached = Gpu.Renderer.Rasterizer.IsIboCached(IboKey, (uint)IbSize);
+
+ bool UsesLegacyQuads =
+ PrimType == GalPrimitiveType.Quads ||
+ PrimType == GalPrimitiveType.QuadStrip;
+
+ if (!IboCached || Gpu.ResourceManager.MemoryRegionModified(Vmm, IboKey, (uint)IbSize, NvGpuBufferType.Index))
+ {
+ if (!UsesLegacyQuads)
+ {
+ if (Vmm.TryGetHostAddress(IbPosition, IbSize, out IntPtr IbPtr))
+ {
+ Gpu.Renderer.Rasterizer.CreateIbo(IboKey, IbSize, IbPtr);
+ }
+ else
+ {
+ Gpu.Renderer.Rasterizer.CreateIbo(IboKey, IbSize, Vmm.ReadBytes(IbPosition, IbSize));
+ }
+ }
+ else
+ {
+ byte[] Buffer = Vmm.ReadBytes(IbPosition, IbSize);
+
+ if (PrimType == GalPrimitiveType.Quads)
+ {
+ Buffer = QuadHelper.ConvertIbQuadsToTris(Buffer, IndexEntrySize, IndexCount);
+ }
+ else /* if (PrimType == GalPrimitiveType.QuadStrip) */
+ {
+ Buffer = QuadHelper.ConvertIbQuadStripToTris(Buffer, IndexEntrySize, IndexCount);
+ }
+
+ Gpu.Renderer.Rasterizer.CreateIbo(IboKey, IbSize, Buffer);
+ }
+ }
+
+ if (!UsesLegacyQuads)
+ {
+ Gpu.Renderer.Rasterizer.SetIndexArray(IbSize, IndexFormat);
+ }
+ else
+ {
+ if (PrimType == GalPrimitiveType.Quads)
+ {
+ Gpu.Renderer.Rasterizer.SetIndexArray(QuadHelper.ConvertIbSizeQuadsToTris(IbSize), IndexFormat);
+ }
+ else /* if (PrimType == GalPrimitiveType.QuadStrip) */
+ {
+ Gpu.Renderer.Rasterizer.SetIndexArray(QuadHelper.ConvertIbSizeQuadStripToTris(IbSize), IndexFormat);
+ }
+ }
+ }
+
+ List<GalVertexAttrib>[] Attribs = new List<GalVertexAttrib>[32];
+
+ for (int Attr = 0; Attr < 16; Attr++)
+ {
+ int Packed = ReadRegister(NvGpuEngine3dReg.VertexAttribNFormat + Attr);
+
+ int ArrayIndex = Packed & 0x1f;
+
+ if (Attribs[ArrayIndex] == null)
+ {
+ Attribs[ArrayIndex] = new List<GalVertexAttrib>();
+ }
+
+ long VbPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.VertexArrayNAddress + ArrayIndex * 4);
+
+ bool IsConst = ((Packed >> 6) & 1) != 0;
+
+ int Offset = (Packed >> 7) & 0x3fff;
+
+ GalVertexAttribSize Size = (GalVertexAttribSize)((Packed >> 21) & 0x3f);
+ GalVertexAttribType Type = (GalVertexAttribType)((Packed >> 27) & 0x7);
+
+ bool IsRgba = ((Packed >> 31) & 1) != 0;
+
+ //Note: 16 is the maximum size of an attribute,
+ //having a component size of 32-bits with 4 elements (a vec4).
+ byte[] Data = Vmm.ReadBytes(VbPosition + Offset, 16);
+
+ Attribs[ArrayIndex].Add(new GalVertexAttrib(Attr, IsConst, Offset, Data, Size, Type, IsRgba));
+ }
+
+ State.VertexBindings = new GalVertexBinding[32];
+
+ for (int Index = 0; Index < 32; Index++)
+ {
+ if (Attribs[Index] == null)
+ {
+ continue;
+ }
+
+ int Control = ReadRegister(NvGpuEngine3dReg.VertexArrayNControl + Index * 4);
+
+ bool Enable = (Control & 0x1000) != 0;
+
+ if (!Enable)
+ {
+ continue;
+ }
+
+ long VbPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.VertexArrayNAddress + Index * 4);
+ long VbEndPos = MakeInt64From2xInt32(NvGpuEngine3dReg.VertexArrayNEndAddr + Index * 2);
+
+ int VertexDivisor = ReadRegister(NvGpuEngine3dReg.VertexArrayNDivisor + Index * 4);
+
+ bool Instanced = ReadRegisterBool(NvGpuEngine3dReg.VertexArrayNInstance + Index);
+
+ int Stride = Control & 0xfff;
+
+ if (Instanced && VertexDivisor != 0)
+ {
+ VbPosition += Stride * (CurrentInstance / VertexDivisor);
+ }
+
+ if (VbPosition > VbEndPos)
+ {
+ //Instance is invalid, ignore the draw call
+ continue;
+ }
+
+ long VboKey = Vmm.GetPhysicalAddress(VbPosition);
+
+ long VbSize = (VbEndPos - VbPosition) + 1;
+
+ bool VboCached = Gpu.Renderer.Rasterizer.IsVboCached(VboKey, VbSize);
+
+ if (!VboCached || Gpu.ResourceManager.MemoryRegionModified(Vmm, VboKey, VbSize, NvGpuBufferType.Vertex))
+ {
+ if (Vmm.TryGetHostAddress(VbPosition, VbSize, out IntPtr VbPtr))
+ {
+ Gpu.Renderer.Rasterizer.CreateVbo(VboKey, (int)VbSize, VbPtr);
+ }
+ else
+ {
+ Gpu.Renderer.Rasterizer.CreateVbo(VboKey, Vmm.ReadBytes(VbPosition, VbSize));
+ }
+ }
+
+ State.VertexBindings[Index].Enabled = true;
+ State.VertexBindings[Index].Stride = Stride;
+ State.VertexBindings[Index].VboKey = VboKey;
+ State.VertexBindings[Index].Instanced = Instanced;
+ State.VertexBindings[Index].Divisor = VertexDivisor;
+ State.VertexBindings[Index].Attribs = Attribs[Index].ToArray();
+ }
+ }
+
+ private void DispatchRender(NvGpuVmm Vmm, GalPipelineState State)
+ {
+ int IndexCount = ReadRegister(NvGpuEngine3dReg.IndexBatchCount);
+ int PrimCtrl = ReadRegister(NvGpuEngine3dReg.VertexBeginGl);
+
+ GalPrimitiveType PrimType = (GalPrimitiveType)(PrimCtrl & 0xffff);
+
+ bool InstanceNext = ((PrimCtrl >> 26) & 1) != 0;
+ bool InstanceCont = ((PrimCtrl >> 27) & 1) != 0;
+
+ if (InstanceNext && InstanceCont)
+ {
+ throw new InvalidOperationException("GPU tried to increase and reset instance count at the same time");
+ }
+
+ if (InstanceNext)
+ {
+ CurrentInstance++;
+ }
+ else if (!InstanceCont)
+ {
+ CurrentInstance = 0;
+ }
+
+ State.Instance = CurrentInstance;
+
+ Gpu.Renderer.Pipeline.Bind(State);
+
+ Gpu.Renderer.RenderTarget.Bind();
+
+ if (IndexCount != 0)
+ {
+ int IndexEntryFmt = ReadRegister(NvGpuEngine3dReg.IndexArrayFormat);
+ int IndexFirst = ReadRegister(NvGpuEngine3dReg.IndexBatchFirst);
+ int VertexBase = ReadRegister(NvGpuEngine3dReg.VertexArrayElemBase);
+
+ long IndexPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.IndexArrayAddress);
+
+ long IboKey = Vmm.GetPhysicalAddress(IndexPosition);
+
+ //Quad primitive types were deprecated on OpenGL 3.x,
+ //they are converted to a triangles index buffer on IB creation,
+ //so we should use the triangles type here too.
+ if (PrimType == GalPrimitiveType.Quads ||
+ PrimType == GalPrimitiveType.QuadStrip)
+ {
+ PrimType = GalPrimitiveType.Triangles;
+
+ //Note: We assume that index first points to the first
+ //vertex of a quad, if it points to the middle of a
+ //quad (First % 4 != 0 for Quads) then it will not work properly.
+ if (PrimType == GalPrimitiveType.Quads)
+ {
+ IndexFirst = QuadHelper.ConvertIbSizeQuadsToTris(IndexFirst);
+ }
+ else /* if (PrimType == GalPrimitiveType.QuadStrip) */
+ {
+ IndexFirst = QuadHelper.ConvertIbSizeQuadStripToTris(IndexFirst);
+ }
+ }
+
+ Gpu.Renderer.Rasterizer.DrawElements(IboKey, IndexFirst, VertexBase, PrimType);
+ }
+ else
+ {
+ int VertexFirst = ReadRegister(NvGpuEngine3dReg.VertexArrayFirst);
+ int VertexCount = ReadRegister(NvGpuEngine3dReg.VertexArrayCount);
+
+ Gpu.Renderer.Rasterizer.DrawArrays(VertexFirst, VertexCount, PrimType);
+ }
+
+ //Is the GPU really clearing those registers after draw?
+ WriteRegister(NvGpuEngine3dReg.IndexBatchFirst, 0);
+ WriteRegister(NvGpuEngine3dReg.IndexBatchCount, 0);
+ }
+
+ private enum QueryMode
+ {
+ WriteSeq,
+ Sync,
+ WriteCounterAndTimestamp
+ }
+
+ private void QueryControl(NvGpuVmm Vmm, GpuMethodCall MethCall)
+ {
+ WriteRegister(MethCall);
+
+ long Position = MakeInt64From2xInt32(NvGpuEngine3dReg.QueryAddress);
+
+ int Seq = Registers[(int)NvGpuEngine3dReg.QuerySequence];
+ int Ctrl = Registers[(int)NvGpuEngine3dReg.QueryControl];
+
+ QueryMode Mode = (QueryMode)(Ctrl & 3);
+
+ switch (Mode)
+ {
+ case QueryMode.WriteSeq: Vmm.WriteInt32(Position, Seq); break;
+
+ case QueryMode.WriteCounterAndTimestamp:
+ {
+ //TODO: Implement counters.
+ long Counter = 1;
+
+ long Timestamp = PerformanceCounter.ElapsedMilliseconds;
+
+ Timestamp = (long)(Timestamp * 615384.615385);
+
+ Vmm.WriteInt64(Position + 0, Counter);
+ Vmm.WriteInt64(Position + 8, Timestamp);
+
+ break;
+ }
+ }
+ }
+
+ private void CbData(NvGpuVmm Vmm, GpuMethodCall MethCall)
+ {
+ long Position = MakeInt64From2xInt32(NvGpuEngine3dReg.ConstBufferAddress);
+
+ int Offset = ReadRegister(NvGpuEngine3dReg.ConstBufferOffset);
+
+ Vmm.WriteInt32(Position + Offset, MethCall.Argument);
+
+ WriteRegister(NvGpuEngine3dReg.ConstBufferOffset, Offset + 4);
+
+ Gpu.ResourceManager.ClearPbCache(NvGpuBufferType.ConstBuffer);
+ }
+
+ private void CbBind(NvGpuVmm Vmm, GpuMethodCall MethCall)
+ {
+ int Stage = (MethCall.Method - 0x904) >> 3;
+
+ int Index = MethCall.Argument;
+
+ bool Enabled = (Index & 1) != 0;
+
+ Index = (Index >> 4) & 0x1f;
+
+ long Position = MakeInt64From2xInt32(NvGpuEngine3dReg.ConstBufferAddress);
+
+ long CbKey = Vmm.GetPhysicalAddress(Position);
+
+ int Size = ReadRegister(NvGpuEngine3dReg.ConstBufferSize);
+
+ if (!Gpu.Renderer.Buffer.IsCached(CbKey, Size))
+ {
+ Gpu.Renderer.Buffer.Create(CbKey, Size);
+ }
+
+ ConstBuffer Cb = ConstBuffers[Stage][Index];
+
+ if (Cb.Position != Position || Cb.Enabled != Enabled || Cb.Size != Size)
+ {
+ ConstBuffers[Stage][Index].Position = Position;
+ ConstBuffers[Stage][Index].Enabled = Enabled;
+ ConstBuffers[Stage][Index].Size = Size;
+ }
+ }
+
+ private float GetFlipSign(NvGpuEngine3dReg Reg)
+ {
+ return MathF.Sign(ReadRegisterFloat(Reg));
+ }
+
+ private long MakeInt64From2xInt32(NvGpuEngine3dReg Reg)
+ {
+ return
+ (long)Registers[(int)Reg + 0] << 32 |
+ (uint)Registers[(int)Reg + 1];
+ }
+
+ private void WriteRegister(GpuMethodCall MethCall)
+ {
+ Registers[MethCall.Method] = MethCall.Argument;
+ }
+
+ private int ReadRegister(NvGpuEngine3dReg Reg)
+ {
+ return Registers[(int)Reg];
+ }
+
+ private float ReadRegisterFloat(NvGpuEngine3dReg Reg)
+ {
+ return BitConverter.Int32BitsToSingle(ReadRegister(Reg));
+ }
+
+ private bool ReadRegisterBool(NvGpuEngine3dReg Reg)
+ {
+ return (ReadRegister(Reg) & 1) != 0;
+ }
+
+ private void WriteRegister(NvGpuEngine3dReg Reg, int Value)
+ {
+ Registers[(int)Reg] = Value;
+ }
+ }
+}
diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngine3dReg.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngine3dReg.cs
new file mode 100644
index 00000000..30243c02
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngine3dReg.cs
@@ -0,0 +1,110 @@
+namespace Ryujinx.Graphics.Graphics3d
+{
+ enum NvGpuEngine3dReg
+ {
+ FrameBufferNAddress = 0x200,
+ FrameBufferNWidth = 0x202,
+ FrameBufferNHeight = 0x203,
+ FrameBufferNFormat = 0x204,
+ FrameBufferNBlockDim = 0x205,
+ ViewportNScaleX = 0x280,
+ ViewportNScaleY = 0x281,
+ ViewportNScaleZ = 0x282,
+ ViewportNTranslateX = 0x283,
+ ViewportNTranslateY = 0x284,
+ ViewportNTranslateZ = 0x285,
+ ViewportNHoriz = 0x300,
+ ViewportNVert = 0x301,
+ DepthRangeNNear = 0x302,
+ DepthRangeNFar = 0x303,
+ VertexArrayFirst = 0x35d,
+ VertexArrayCount = 0x35e,
+ ClearNColor = 0x360,
+ ClearDepth = 0x364,
+ ClearStencil = 0x368,
+ StencilBackFuncRef = 0x3d5,
+ StencilBackMask = 0x3d6,
+ StencilBackFuncMask = 0x3d7,
+ ColorMaskCommon = 0x3e4,
+ RTSeparateFragData = 0x3eb,
+ ZetaAddress = 0x3f8,
+ ZetaFormat = 0x3fa,
+ ZetaBlockDimensions = 0x3fb,
+ ZetaLayerStride = 0x3fc,
+ VertexAttribNFormat = 0x458,
+ RTControl = 0x487,
+ ZetaHoriz = 0x48a,
+ ZetaVert = 0x48b,
+ ZetaArrayMode = 0x48c,
+ LinkedTsc = 0x48d,
+ DepthTestEnable = 0x4b3,
+ BlendIndependent = 0x4b9,
+ DepthWriteEnable = 0x4ba,
+ DepthTestFunction = 0x4c3,
+ BlendSeparateAlpha = 0x4cf,
+ BlendEquationRgb = 0x4d0,
+ BlendFuncSrcRgb = 0x4d1,
+ BlendFuncDstRgb = 0x4d2,
+ BlendEquationAlpha = 0x4d3,
+ BlendFuncSrcAlpha = 0x4d4,
+ BlendFuncDstAlpha = 0x4d6,
+ BlendEnable = 0x4d7,
+ IBlendNEnable = 0x4d8,
+ StencilEnable = 0x4e0,
+ StencilFrontOpFail = 0x4e1,
+ StencilFrontOpZFail = 0x4e2,
+ StencilFrontOpZPass = 0x4e3,
+ StencilFrontFuncFunc = 0x4e4,
+ StencilFrontFuncRef = 0x4e5,
+ StencilFrontFuncMask = 0x4e6,
+ StencilFrontMask = 0x4e7,
+ ScreenYControl = 0x4eb,
+ VertexArrayElemBase = 0x50d,
+ VertexArrayInstBase = 0x50e,
+ ZetaEnable = 0x54e,
+ TexHeaderPoolOffset = 0x55d,
+ TexSamplerPoolOffset = 0x557,
+ StencilTwoSideEnable = 0x565,
+ StencilBackOpFail = 0x566,
+ StencilBackOpZFail = 0x567,
+ StencilBackOpZPass = 0x568,
+ StencilBackFuncFunc = 0x569,
+ FrameBufferSrgb = 0x56e,
+ ShaderAddress = 0x582,
+ VertexBeginGl = 0x586,
+ PrimRestartEnable = 0x591,
+ PrimRestartIndex = 0x592,
+ IndexArrayAddress = 0x5f2,
+ IndexArrayEndAddr = 0x5f4,
+ IndexArrayFormat = 0x5f6,
+ IndexBatchFirst = 0x5f7,
+ IndexBatchCount = 0x5f8,
+ VertexArrayNInstance = 0x620,
+ CullFaceEnable = 0x646,
+ FrontFace = 0x647,
+ CullFace = 0x648,
+ ColorMaskN = 0x680,
+ QueryAddress = 0x6c0,
+ QuerySequence = 0x6c2,
+ QueryControl = 0x6c3,
+ VertexArrayNControl = 0x700,
+ VertexArrayNAddress = 0x701,
+ VertexArrayNDivisor = 0x703,
+ IBlendNSeparateAlpha = 0x780,
+ IBlendNEquationRgb = 0x781,
+ IBlendNFuncSrcRgb = 0x782,
+ IBlendNFuncDstRgb = 0x783,
+ IBlendNEquationAlpha = 0x784,
+ IBlendNFuncSrcAlpha = 0x785,
+ IBlendNFuncDstAlpha = 0x786,
+ VertexArrayNEndAddr = 0x7c0,
+ ShaderNControl = 0x800,
+ ShaderNOffset = 0x801,
+ ShaderNMaxGprs = 0x803,
+ ShaderNType = 0x804,
+ ConstBufferSize = 0x8e0,
+ ConstBufferAddress = 0x8e1,
+ ConstBufferOffset = 0x8e3,
+ TextureCbIndex = 0x982
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngineM2mf.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngineM2mf.cs
new file mode 100644
index 00000000..d89059c0
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngineM2mf.cs
@@ -0,0 +1,187 @@
+using Ryujinx.Graphics.Memory;
+using Ryujinx.Graphics.Texture;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Graphics3d
+{
+ class NvGpuEngineM2mf : INvGpuEngine
+ {
+ public int[] Registers { get; private set; }
+
+ private NvGpu Gpu;
+
+ private Dictionary<int, NvGpuMethod> Methods;
+
+ public NvGpuEngineM2mf(NvGpu Gpu)
+ {
+ this.Gpu = Gpu;
+
+ Registers = new int[0x1d6];
+
+ Methods = new Dictionary<int, NvGpuMethod>();
+
+ void AddMethod(int Meth, int Count, int Stride, NvGpuMethod Method)
+ {
+ while (Count-- > 0)
+ {
+ Methods.Add(Meth, Method);
+
+ Meth += Stride;
+ }
+ }
+
+ AddMethod(0xc0, 1, 1, Execute);
+ }
+
+ public void CallMethod(NvGpuVmm Vmm, GpuMethodCall MethCall)
+ {
+ if (Methods.TryGetValue(MethCall.Method, out NvGpuMethod Method))
+ {
+ Method(Vmm, MethCall);
+ }
+ else
+ {
+ WriteRegister(MethCall);
+ }
+ }
+
+ private void Execute(NvGpuVmm Vmm, GpuMethodCall MethCall)
+ {
+ //TODO: Some registers and copy modes are still not implemented.
+ int Control = MethCall.Argument;
+
+ bool SrcLinear = ((Control >> 7) & 1) != 0;
+ bool DstLinear = ((Control >> 8) & 1) != 0;
+ bool Copy2d = ((Control >> 9) & 1) != 0;
+
+ long SrcAddress = MakeInt64From2xInt32(NvGpuEngineM2mfReg.SrcAddress);
+ long DstAddress = MakeInt64From2xInt32(NvGpuEngineM2mfReg.DstAddress);
+
+ int SrcPitch = ReadRegister(NvGpuEngineM2mfReg.SrcPitch);
+ int DstPitch = ReadRegister(NvGpuEngineM2mfReg.DstPitch);
+
+ int XCount = ReadRegister(NvGpuEngineM2mfReg.XCount);
+ int YCount = ReadRegister(NvGpuEngineM2mfReg.YCount);
+
+ int Swizzle = ReadRegister(NvGpuEngineM2mfReg.Swizzle);
+
+ int DstBlkDim = ReadRegister(NvGpuEngineM2mfReg.DstBlkDim);
+ int DstSizeX = ReadRegister(NvGpuEngineM2mfReg.DstSizeX);
+ int DstSizeY = ReadRegister(NvGpuEngineM2mfReg.DstSizeY);
+ int DstSizeZ = ReadRegister(NvGpuEngineM2mfReg.DstSizeZ);
+ int DstPosXY = ReadRegister(NvGpuEngineM2mfReg.DstPosXY);
+ int DstPosZ = ReadRegister(NvGpuEngineM2mfReg.DstPosZ);
+
+ int SrcBlkDim = ReadRegister(NvGpuEngineM2mfReg.SrcBlkDim);
+ int SrcSizeX = ReadRegister(NvGpuEngineM2mfReg.SrcSizeX);
+ int SrcSizeY = ReadRegister(NvGpuEngineM2mfReg.SrcSizeY);
+ int SrcSizeZ = ReadRegister(NvGpuEngineM2mfReg.SrcSizeZ);
+ int SrcPosXY = ReadRegister(NvGpuEngineM2mfReg.SrcPosXY);
+ int SrcPosZ = ReadRegister(NvGpuEngineM2mfReg.SrcPosZ);
+
+ int SrcCpp = ((Swizzle >> 20) & 7) + 1;
+ int DstCpp = ((Swizzle >> 24) & 7) + 1;
+
+ int DstPosX = (DstPosXY >> 0) & 0xffff;
+ int DstPosY = (DstPosXY >> 16) & 0xffff;
+
+ int SrcPosX = (SrcPosXY >> 0) & 0xffff;
+ int SrcPosY = (SrcPosXY >> 16) & 0xffff;
+
+ int SrcBlockHeight = 1 << ((SrcBlkDim >> 4) & 0xf);
+ int DstBlockHeight = 1 << ((DstBlkDim >> 4) & 0xf);
+
+ long SrcPA = Vmm.GetPhysicalAddress(SrcAddress);
+ long DstPA = Vmm.GetPhysicalAddress(DstAddress);
+
+ if (Copy2d)
+ {
+ if (SrcLinear)
+ {
+ SrcPosX = SrcPosY = SrcPosZ = 0;
+ }
+
+ if (DstLinear)
+ {
+ DstPosX = DstPosY = DstPosZ = 0;
+ }
+
+ if (SrcLinear && DstLinear)
+ {
+ for (int Y = 0; Y < YCount; Y++)
+ {
+ int SrcOffset = (SrcPosY + Y) * SrcPitch + SrcPosX * SrcCpp;
+ int DstOffset = (DstPosY + Y) * DstPitch + DstPosX * DstCpp;
+
+ long Src = SrcPA + (uint)SrcOffset;
+ long Dst = DstPA + (uint)DstOffset;
+
+ Vmm.Memory.CopyBytes(Src, Dst, XCount * SrcCpp);
+ }
+ }
+ else
+ {
+ ISwizzle SrcSwizzle;
+
+ if (SrcLinear)
+ {
+ SrcSwizzle = new LinearSwizzle(SrcPitch, SrcCpp);
+ }
+ else
+ {
+ SrcSwizzle = new BlockLinearSwizzle(SrcSizeX, SrcCpp, SrcBlockHeight);
+ }
+
+ ISwizzle DstSwizzle;
+
+ if (DstLinear)
+ {
+ DstSwizzle = new LinearSwizzle(DstPitch, DstCpp);
+ }
+ else
+ {
+ DstSwizzle = new BlockLinearSwizzle(DstSizeX, DstCpp, DstBlockHeight);
+ }
+
+ for (int Y = 0; Y < YCount; Y++)
+ for (int X = 0; X < XCount; X++)
+ {
+ int SrcOffset = SrcSwizzle.GetSwizzleOffset(SrcPosX + X, SrcPosY + Y);
+ int DstOffset = DstSwizzle.GetSwizzleOffset(DstPosX + X, DstPosY + Y);
+
+ long Src = SrcPA + (uint)SrcOffset;
+ long Dst = DstPA + (uint)DstOffset;
+
+ Vmm.Memory.CopyBytes(Src, Dst, SrcCpp);
+ }
+ }
+ }
+ else
+ {
+ Vmm.Memory.CopyBytes(SrcPA, DstPA, XCount);
+ }
+ }
+
+ private long MakeInt64From2xInt32(NvGpuEngineM2mfReg Reg)
+ {
+ return
+ (long)Registers[(int)Reg + 0] << 32 |
+ (uint)Registers[(int)Reg + 1];
+ }
+
+ private void WriteRegister(GpuMethodCall MethCall)
+ {
+ Registers[MethCall.Method] = MethCall.Argument;
+ }
+
+ private int ReadRegister(NvGpuEngineM2mfReg Reg)
+ {
+ return Registers[(int)Reg];
+ }
+
+ private void WriteRegister(NvGpuEngineM2mfReg Reg, int Value)
+ {
+ Registers[(int)Reg] = Value;
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngineM2mfReg.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngineM2mfReg.cs
new file mode 100644
index 00000000..4bef8d9e
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngineM2mfReg.cs
@@ -0,0 +1,25 @@
+namespace Ryujinx.Graphics.Graphics3d
+{
+ enum NvGpuEngineM2mfReg
+ {
+ SrcAddress = 0x100,
+ DstAddress = 0x102,
+ SrcPitch = 0x104,
+ DstPitch = 0x105,
+ XCount = 0x106,
+ YCount = 0x107,
+ Swizzle = 0x1c2,
+ DstBlkDim = 0x1c3,
+ DstSizeX = 0x1c4,
+ DstSizeY = 0x1c5,
+ DstSizeZ = 0x1c6,
+ DstPosZ = 0x1c7,
+ DstPosXY = 0x1c8,
+ SrcBlkDim = 0x1ca,
+ SrcSizeX = 0x1cb,
+ SrcSizeY = 0x1cc,
+ SrcSizeZ = 0x1cd,
+ SrcPosZ = 0x1ce,
+ SrcPosXY = 0x1cf
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngineP2mf.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngineP2mf.cs
new file mode 100644
index 00000000..68155255
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngineP2mf.cs
@@ -0,0 +1,161 @@
+using Ryujinx.Graphics.Memory;
+using Ryujinx.Graphics.Texture;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Graphics3d
+{
+ class NvGpuEngineP2mf : INvGpuEngine
+ {
+ public int[] Registers { get; private set; }
+
+ private NvGpu Gpu;
+
+ private Dictionary<int, NvGpuMethod> Methods;
+
+ private int CopyStartX;
+ private int CopyStartY;
+
+ private int CopyWidth;
+ private int CopyHeight;
+ private int CopyGobBlockHeight;
+
+ private long CopyAddress;
+
+ private int CopyOffset;
+ private int CopySize;
+
+ private bool CopyLinear;
+
+ private byte[] Buffer;
+
+ public NvGpuEngineP2mf(NvGpu Gpu)
+ {
+ this.Gpu = Gpu;
+
+ Registers = new int[0x80];
+
+ Methods = new Dictionary<int, NvGpuMethod>();
+
+ void AddMethod(int Meth, int Count, int Stride, NvGpuMethod Method)
+ {
+ while (Count-- > 0)
+ {
+ Methods.Add(Meth, Method);
+
+ Meth += Stride;
+ }
+ }
+
+ AddMethod(0x6c, 1, 1, Execute);
+ AddMethod(0x6d, 1, 1, PushData);
+ }
+
+ public void CallMethod(NvGpuVmm Vmm, GpuMethodCall MethCall)
+ {
+ if (Methods.TryGetValue(MethCall.Method, out NvGpuMethod Method))
+ {
+ Method(Vmm, MethCall);
+ }
+ else
+ {
+ WriteRegister(MethCall);
+ }
+ }
+
+ private void Execute(NvGpuVmm Vmm, GpuMethodCall MethCall)
+ {
+ //TODO: Some registers and copy modes are still not implemented.
+ int Control = MethCall.Argument;
+
+ long DstAddress = MakeInt64From2xInt32(NvGpuEngineP2mfReg.DstAddress);
+
+ int DstPitch = ReadRegister(NvGpuEngineP2mfReg.DstPitch);
+ int DstBlkDim = ReadRegister(NvGpuEngineP2mfReg.DstBlockDim);
+
+ int DstX = ReadRegister(NvGpuEngineP2mfReg.DstX);
+ int DstY = ReadRegister(NvGpuEngineP2mfReg.DstY);
+
+ int DstWidth = ReadRegister(NvGpuEngineP2mfReg.DstWidth);
+ int DstHeight = ReadRegister(NvGpuEngineP2mfReg.DstHeight);
+
+ int LineLengthIn = ReadRegister(NvGpuEngineP2mfReg.LineLengthIn);
+ int LineCount = ReadRegister(NvGpuEngineP2mfReg.LineCount);
+
+ CopyLinear = (Control & 1) != 0;
+
+ CopyGobBlockHeight = 1 << ((DstBlkDim >> 4) & 0xf);
+
+ CopyStartX = DstX;
+ CopyStartY = DstY;
+
+ CopyWidth = DstWidth;
+ CopyHeight = DstHeight;
+
+ CopyAddress = DstAddress;
+
+ CopyOffset = 0;
+ CopySize = LineLengthIn * LineCount;
+
+ Buffer = new byte[CopySize];
+ }
+
+ private void PushData(NvGpuVmm Vmm, GpuMethodCall MethCall)
+ {
+ if (Buffer == null)
+ {
+ return;
+ }
+
+ for (int Shift = 0; Shift < 32 && CopyOffset < CopySize; Shift += 8, CopyOffset++)
+ {
+ Buffer[CopyOffset] = (byte)(MethCall.Argument >> Shift);
+ }
+
+ if (MethCall.IsLastCall)
+ {
+ if (CopyLinear)
+ {
+ Vmm.WriteBytes(CopyAddress, Buffer);
+ }
+ else
+ {
+ BlockLinearSwizzle Swizzle = new BlockLinearSwizzle(CopyWidth, 1, CopyGobBlockHeight);
+
+ int SrcOffset = 0;
+
+ for (int Y = CopyStartY; Y < CopyHeight && SrcOffset < CopySize; Y++)
+ for (int X = CopyStartX; X < CopyWidth && SrcOffset < CopySize; X++)
+ {
+ int DstOffset = Swizzle.GetSwizzleOffset(X, Y);
+
+ Vmm.WriteByte(CopyAddress + DstOffset, Buffer[SrcOffset++]);
+ }
+ }
+
+ Buffer = null;
+ }
+ }
+
+ private long MakeInt64From2xInt32(NvGpuEngineP2mfReg Reg)
+ {
+ return
+ (long)Registers[(int)Reg + 0] << 32 |
+ (uint)Registers[(int)Reg + 1];
+ }
+
+ private void WriteRegister(GpuMethodCall MethCall)
+ {
+ Registers[MethCall.Method] = MethCall.Argument;
+ }
+
+ private int ReadRegister(NvGpuEngineP2mfReg Reg)
+ {
+ return Registers[(int)Reg];
+ }
+
+ private void WriteRegister(NvGpuEngineP2mfReg Reg, int Value)
+ {
+ Registers[(int)Reg] = Value;
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngineP2mfReg.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngineP2mfReg.cs
new file mode 100644
index 00000000..ab3a304d
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngineP2mfReg.cs
@@ -0,0 +1,17 @@
+namespace Ryujinx.Graphics.Graphics3d
+{
+ enum NvGpuEngineP2mfReg
+ {
+ LineLengthIn = 0x60,
+ LineCount = 0x61,
+ DstAddress = 0x62,
+ DstPitch = 0x64,
+ DstBlockDim = 0x65,
+ DstWidth = 0x66,
+ DstHeight = 0x67,
+ DstDepth = 0x68,
+ DstZ = 0x69,
+ DstX = 0x6a,
+ DstY = 0x6b
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuFifo.cs b/Ryujinx.Graphics/Graphics3d/NvGpuFifo.cs
new file mode 100644
index 00000000..f834ade7
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/NvGpuFifo.cs
@@ -0,0 +1,176 @@
+using Ryujinx.Graphics.Memory;
+
+namespace Ryujinx.Graphics.Graphics3d
+{
+ class NvGpuFifo
+ {
+ private const int MacrosCount = 0x80;
+ private const int MacroIndexMask = MacrosCount - 1;
+
+ //Note: The size of the macro memory is unknown, we just make
+ //a guess here and use 256kb as the size. Increase if needed.
+ private const int MmeWords = 256 * 256;
+
+ private NvGpu Gpu;
+
+ private NvGpuEngine[] SubChannels;
+
+ private struct CachedMacro
+ {
+ public int Position { get; private set; }
+
+ private bool ExecutionPending;
+ private int Argument;
+
+ private MacroInterpreter Interpreter;
+
+ public CachedMacro(NvGpuFifo PFifo, INvGpuEngine Engine, int Position)
+ {
+ this.Position = Position;
+
+ ExecutionPending = false;
+ Argument = 0;
+
+ Interpreter = new MacroInterpreter(PFifo, Engine);
+ }
+
+ public void StartExecution(int Argument)
+ {
+ this.Argument = Argument;
+
+ ExecutionPending = true;
+ }
+
+ public void Execute(NvGpuVmm Vmm, int[] Mme)
+ {
+ if (ExecutionPending)
+ {
+ ExecutionPending = false;
+
+ Interpreter?.Execute(Vmm, Mme, Position, Argument);
+ }
+ }
+
+ public void PushArgument(int Argument)
+ {
+ Interpreter?.Fifo.Enqueue(Argument);
+ }
+ }
+
+ private int CurrMacroPosition;
+ private int CurrMacroBindIndex;
+
+ private CachedMacro[] Macros;
+
+ private int[] Mme;
+
+ public NvGpuFifo(NvGpu Gpu)
+ {
+ this.Gpu = Gpu;
+
+ SubChannels = new NvGpuEngine[8];
+
+ Macros = new CachedMacro[MacrosCount];
+
+ Mme = new int[MmeWords];
+ }
+
+ public void CallMethod(NvGpuVmm Vmm, GpuMethodCall MethCall)
+ {
+ if ((NvGpuFifoMeth)MethCall.Method == NvGpuFifoMeth.BindChannel)
+ {
+ NvGpuEngine Engine = (NvGpuEngine)MethCall.Argument;
+
+ SubChannels[MethCall.SubChannel] = Engine;
+ }
+ else
+ {
+ switch (SubChannels[MethCall.SubChannel])
+ {
+ case NvGpuEngine._2d: Call2dMethod (Vmm, MethCall); break;
+ case NvGpuEngine._3d: Call3dMethod (Vmm, MethCall); break;
+ case NvGpuEngine.P2mf: CallP2mfMethod(Vmm, MethCall); break;
+ case NvGpuEngine.M2mf: CallM2mfMethod(Vmm, MethCall); break;
+ }
+ }
+ }
+
+ private void Call2dMethod(NvGpuVmm Vmm, GpuMethodCall MethCall)
+ {
+ Gpu.Engine2d.CallMethod(Vmm, MethCall);
+ }
+
+ private void Call3dMethod(NvGpuVmm Vmm, GpuMethodCall MethCall)
+ {
+ if (MethCall.Method < 0x80)
+ {
+ switch ((NvGpuFifoMeth)MethCall.Method)
+ {
+ case NvGpuFifoMeth.SetMacroUploadAddress:
+ {
+ CurrMacroPosition = MethCall.Argument;
+
+ break;
+ }
+
+ case NvGpuFifoMeth.SendMacroCodeData:
+ {
+ Mme[CurrMacroPosition++] = MethCall.Argument;
+
+ break;
+ }
+
+ case NvGpuFifoMeth.SetMacroBindingIndex:
+ {
+ CurrMacroBindIndex = MethCall.Argument;
+
+ break;
+ }
+
+ case NvGpuFifoMeth.BindMacro:
+ {
+ int Position = MethCall.Argument;
+
+ Macros[CurrMacroBindIndex] = new CachedMacro(this, Gpu.Engine3d, Position);
+
+ break;
+ }
+
+ default: CallP2mfMethod(Vmm, MethCall); break;
+ }
+ }
+ else if (MethCall.Method < 0xe00)
+ {
+ Gpu.Engine3d.CallMethod(Vmm, MethCall);
+ }
+ else
+ {
+ int MacroIndex = (MethCall.Method >> 1) & MacroIndexMask;
+
+ if ((MethCall.Method & 1) != 0)
+ {
+ Macros[MacroIndex].PushArgument(MethCall.Argument);
+ }
+ else
+ {
+ Macros[MacroIndex].StartExecution(MethCall.Argument);
+ }
+
+ if (MethCall.IsLastCall)
+ {
+ Macros[MacroIndex].Execute(Vmm, Mme);
+ }
+ }
+ }
+
+ private void CallP2mfMethod(NvGpuVmm Vmm, GpuMethodCall MethCall)
+ {
+ Gpu.EngineP2mf.CallMethod(Vmm, MethCall);
+ }
+
+ private void CallM2mfMethod(NvGpuVmm Vmm, GpuMethodCall MethCall)
+ {
+ Gpu.EngineM2mf.CallMethod(Vmm, MethCall);
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuFifoMeth.cs b/Ryujinx.Graphics/Graphics3d/NvGpuFifoMeth.cs
new file mode 100644
index 00000000..9bf528b3
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/NvGpuFifoMeth.cs
@@ -0,0 +1,11 @@
+namespace Ryujinx.Graphics.Graphics3d
+{
+ enum NvGpuFifoMeth
+ {
+ BindChannel = 0,
+ SetMacroUploadAddress = 0x45,
+ SendMacroCodeData = 0x46,
+ SetMacroBindingIndex = 0x47,
+ BindMacro = 0x48
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuMethod.cs b/Ryujinx.Graphics/Graphics3d/NvGpuMethod.cs
new file mode 100644
index 00000000..8730d144
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/NvGpuMethod.cs
@@ -0,0 +1,6 @@
+using Ryujinx.Graphics.Memory;
+
+namespace Ryujinx.Graphics.Graphics3d
+{
+ delegate void NvGpuMethod(NvGpuVmm Vmm, GpuMethodCall MethCall);
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Graphics3d/Texture/ASTCDecoder.cs b/Ryujinx.Graphics/Graphics3d/Texture/ASTCDecoder.cs
new file mode 100644
index 00000000..1efa0255
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/Texture/ASTCDecoder.cs
@@ -0,0 +1,1384 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+
+namespace Ryujinx.Graphics.Texture
+{
+ public class ASTCDecoderException : Exception
+ {
+ public ASTCDecoderException(string ExMsg) : base(ExMsg) { }
+ }
+
+ //https://github.com/GammaUNC/FasTC/blob/master/ASTCEncoder/src/Decompressor.cpp
+ public static class ASTCDecoder
+ {
+ struct TexelWeightParams
+ {
+ public int Width;
+ public int Height;
+ public bool DualPlane;
+ public int MaxWeight;
+ public bool Error;
+ public bool VoidExtentLDR;
+ public bool VoidExtentHDR;
+
+ public int GetPackedBitSize()
+ {
+ // How many indices do we have?
+ int Indices = Height * Width;
+
+ if (DualPlane)
+ {
+ Indices *= 2;
+ }
+
+ IntegerEncoded IntEncoded = IntegerEncoded.CreateEncoding(MaxWeight);
+
+ return IntEncoded.GetBitLength(Indices);
+ }
+
+ public int GetNumWeightValues()
+ {
+ int Ret = Width * Height;
+
+ if (DualPlane)
+ {
+ Ret *= 2;
+ }
+
+ return Ret;
+ }
+ }
+
+ public static byte[] DecodeToRGBA8888(
+ byte[] InputBuffer,
+ int BlockX,
+ int BlockY,
+ int BlockZ,
+ int X,
+ int Y,
+ int Z)
+ {
+ using (MemoryStream InputStream = new MemoryStream(InputBuffer))
+ {
+ BinaryReader BinReader = new BinaryReader(InputStream);
+
+ if (BlockX > 12 || BlockY > 12)
+ {
+ throw new ASTCDecoderException("Block size unsupported!");
+ }
+
+ if (BlockZ != 1 || Z != 1)
+ {
+ throw new ASTCDecoderException("3D compressed textures unsupported!");
+ }
+
+ using (MemoryStream OutputStream = new MemoryStream())
+ {
+ int BlockIndex = 0;
+
+ for (int j = 0; j < Y; j += BlockY)
+ {
+ for (int i = 0; i < X; i += BlockX)
+ {
+ int[] DecompressedData = new int[144];
+
+ DecompressBlock(BinReader.ReadBytes(0x10), DecompressedData, BlockX, BlockY);
+
+ int DecompressedWidth = Math.Min(BlockX, X - i);
+ int DecompressedHeight = Math.Min(BlockY, Y - j);
+ int BaseOffsets = (j * X + i) * 4;
+
+ for (int jj = 0; jj < DecompressedHeight; jj++)
+ {
+ OutputStream.Seek(BaseOffsets + jj * X * 4, SeekOrigin.Begin);
+
+ byte[] OutputBuffer = new byte[DecompressedData.Length * sizeof(int)];
+ Buffer.BlockCopy(DecompressedData, 0, OutputBuffer, 0, OutputBuffer.Length);
+
+ OutputStream.Write(OutputBuffer, jj * BlockX * 4, DecompressedWidth * 4);
+ }
+
+ BlockIndex++;
+ }
+ }
+
+ return OutputStream.ToArray();
+ }
+ }
+ }
+
+ public static bool DecompressBlock(
+ byte[] InputBuffer,
+ int[] OutputBuffer,
+ int BlockWidth,
+ int BlockHeight)
+ {
+ BitArrayStream BitStream = new BitArrayStream(new BitArray(InputBuffer));
+ TexelWeightParams TexelParams = DecodeBlockInfo(BitStream);
+
+ if (TexelParams.Error)
+ {
+ throw new ASTCDecoderException("Invalid block mode");
+ }
+
+ if (TexelParams.VoidExtentLDR)
+ {
+ FillVoidExtentLDR(BitStream, OutputBuffer, BlockWidth, BlockHeight);
+
+ return true;
+ }
+
+ if (TexelParams.VoidExtentHDR)
+ {
+ throw new ASTCDecoderException("HDR void extent blocks are unsupported!");
+ }
+
+ if (TexelParams.Width > BlockWidth)
+ {
+ throw new ASTCDecoderException("Texel weight grid width should be smaller than block width");
+ }
+
+ if (TexelParams.Height > BlockHeight)
+ {
+ throw new ASTCDecoderException("Texel weight grid height should be smaller than block height");
+ }
+
+ // Read num partitions
+ int NumberPartitions = BitStream.ReadBits(2) + 1;
+ Debug.Assert(NumberPartitions <= 4);
+
+ if (NumberPartitions == 4 && TexelParams.DualPlane)
+ {
+ throw new ASTCDecoderException("Dual plane mode is incompatible with four partition blocks");
+ }
+
+ // Based on the number of partitions, read the color endpoint mode for
+ // each partition.
+
+ // Determine partitions, partition index, and color endpoint modes
+ int PlaneIndices = -1;
+ int PartitionIndex;
+ uint[] ColorEndpointMode = { 0, 0, 0, 0 };
+
+ BitArrayStream ColorEndpointStream = new BitArrayStream(new BitArray(16 * 8));
+
+ // Read extra config data...
+ uint BaseColorEndpointMode = 0;
+
+ if (NumberPartitions == 1)
+ {
+ ColorEndpointMode[0] = (uint)BitStream.ReadBits(4);
+ PartitionIndex = 0;
+ }
+ else
+ {
+ PartitionIndex = BitStream.ReadBits(10);
+ BaseColorEndpointMode = (uint)BitStream.ReadBits(6);
+ }
+
+ uint BaseMode = (BaseColorEndpointMode & 3);
+
+ // Remaining bits are color endpoint data...
+ int NumberWeightBits = TexelParams.GetPackedBitSize();
+ int RemainingBits = 128 - NumberWeightBits - BitStream.Position;
+
+ // Consider extra bits prior to texel data...
+ uint ExtraColorEndpointModeBits = 0;
+
+ if (BaseMode != 0)
+ {
+ switch (NumberPartitions)
+ {
+ case 2: ExtraColorEndpointModeBits += 2; break;
+ case 3: ExtraColorEndpointModeBits += 5; break;
+ case 4: ExtraColorEndpointModeBits += 8; break;
+ default: Debug.Assert(false); break;
+ }
+ }
+
+ RemainingBits -= (int)ExtraColorEndpointModeBits;
+
+ // Do we have a dual plane situation?
+ int PlaneSelectorBits = 0;
+
+ if (TexelParams.DualPlane)
+ {
+ PlaneSelectorBits = 2;
+ }
+
+ RemainingBits -= PlaneSelectorBits;
+
+ // Read color data...
+ int ColorDataBits = RemainingBits;
+
+ while (RemainingBits > 0)
+ {
+ int NumberBits = Math.Min(RemainingBits, 8);
+ int Bits = BitStream.ReadBits(NumberBits);
+ ColorEndpointStream.WriteBits(Bits, NumberBits);
+ RemainingBits -= 8;
+ }
+
+ // Read the plane selection bits
+ PlaneIndices = BitStream.ReadBits(PlaneSelectorBits);
+
+ // Read the rest of the CEM
+ if (BaseMode != 0)
+ {
+ uint ExtraColorEndpointMode = (uint)BitStream.ReadBits((int)ExtraColorEndpointModeBits);
+ uint TempColorEndpointMode = (ExtraColorEndpointMode << 6) | BaseColorEndpointMode;
+ TempColorEndpointMode >>= 2;
+
+ bool[] C = new bool[4];
+
+ for (int i = 0; i < NumberPartitions; i++)
+ {
+ C[i] = (TempColorEndpointMode & 1) != 0;
+ TempColorEndpointMode >>= 1;
+ }
+
+ byte[] M = new byte[4];
+
+ for (int i = 0; i < NumberPartitions; i++)
+ {
+ M[i] = (byte)(TempColorEndpointMode & 3);
+ TempColorEndpointMode >>= 2;
+ Debug.Assert(M[i] <= 3);
+ }
+
+ for (int i = 0; i < NumberPartitions; i++)
+ {
+ ColorEndpointMode[i] = BaseMode;
+ if (!(C[i])) ColorEndpointMode[i] -= 1;
+ ColorEndpointMode[i] <<= 2;
+ ColorEndpointMode[i] |= M[i];
+ }
+ }
+ else if (NumberPartitions > 1)
+ {
+ uint TempColorEndpointMode = BaseColorEndpointMode >> 2;
+
+ for (uint i = 0; i < NumberPartitions; i++)
+ {
+ ColorEndpointMode[i] = TempColorEndpointMode;
+ }
+ }
+
+ // Make sure everything up till here is sane.
+ for (int i = 0; i < NumberPartitions; i++)
+ {
+ Debug.Assert(ColorEndpointMode[i] < 16);
+ }
+ Debug.Assert(BitStream.Position + TexelParams.GetPackedBitSize() == 128);
+
+ // Decode both color data and texel weight data
+ int[] ColorValues = new int[32]; // Four values * two endpoints * four maximum partitions
+ DecodeColorValues(ColorValues, ColorEndpointStream.ToByteArray(), ColorEndpointMode, NumberPartitions, ColorDataBits);
+
+ ASTCPixel[][] EndPoints = new ASTCPixel[4][];
+ EndPoints[0] = new ASTCPixel[2];
+ EndPoints[1] = new ASTCPixel[2];
+ EndPoints[2] = new ASTCPixel[2];
+ EndPoints[3] = new ASTCPixel[2];
+
+ int ColorValuesPosition = 0;
+
+ for (int i = 0; i < NumberPartitions; i++)
+ {
+ ComputeEndpoints(EndPoints[i], ColorValues, ColorEndpointMode[i], ref ColorValuesPosition);
+ }
+
+ // Read the texel weight data.
+ byte[] TexelWeightData = (byte[])InputBuffer.Clone();
+
+ // Reverse everything
+ for (int i = 0; i < 8; i++)
+ {
+ byte a = ReverseByte(TexelWeightData[i]);
+ byte b = ReverseByte(TexelWeightData[15 - i]);
+
+ TexelWeightData[i] = b;
+ TexelWeightData[15 - i] = a;
+ }
+
+ // Make sure that higher non-texel bits are set to zero
+ int ClearByteStart = (TexelParams.GetPackedBitSize() >> 3) + 1;
+ TexelWeightData[ClearByteStart - 1] &= (byte)((1 << (TexelParams.GetPackedBitSize() % 8)) - 1);
+
+ int cLen = 16 - ClearByteStart;
+ for (int i = ClearByteStart; i < ClearByteStart + cLen; i++) TexelWeightData[i] = 0;
+
+ List<IntegerEncoded> TexelWeightValues = new List<IntegerEncoded>();
+ BitArrayStream WeightBitStream = new BitArrayStream(new BitArray(TexelWeightData));
+
+ IntegerEncoded.DecodeIntegerSequence(TexelWeightValues, WeightBitStream, TexelParams.MaxWeight, TexelParams.GetNumWeightValues());
+
+ // Blocks can be at most 12x12, so we can have as many as 144 weights
+ int[][] Weights = new int[2][];
+ Weights[0] = new int[144];
+ Weights[1] = new int[144];
+
+ UnquantizeTexelWeights(Weights, TexelWeightValues, TexelParams, BlockWidth, BlockHeight);
+
+ // Now that we have endpoints and weights, we can interpolate and generate
+ // the proper decoding...
+ for (int j = 0; j < BlockHeight; j++)
+ {
+ for (int i = 0; i < BlockWidth; i++)
+ {
+ int Partition = Select2DPartition(PartitionIndex, i, j, NumberPartitions, ((BlockHeight * BlockWidth) < 32));
+ Debug.Assert(Partition < NumberPartitions);
+
+ ASTCPixel Pixel = new ASTCPixel(0, 0, 0, 0);
+ for (int Component = 0; Component < 4; Component++)
+ {
+ int Component0 = EndPoints[Partition][0].GetComponent(Component);
+ Component0 = BitArrayStream.Replicate(Component0, 8, 16);
+ int Component1 = EndPoints[Partition][1].GetComponent(Component);
+ Component1 = BitArrayStream.Replicate(Component1, 8, 16);
+
+ int Plane = 0;
+
+ if (TexelParams.DualPlane && (((PlaneIndices + 1) & 3) == Component))
+ {
+ Plane = 1;
+ }
+
+ int Weight = Weights[Plane][j * BlockWidth + i];
+ int FinalComponent = (Component0 * (64 - Weight) + Component1 * Weight + 32) / 64;
+
+ if (FinalComponent == 65535)
+ {
+ Pixel.SetComponent(Component, 255);
+ }
+ else
+ {
+ double FinalComponentFloat = FinalComponent;
+ Pixel.SetComponent(Component, (int)(255.0 * (FinalComponentFloat / 65536.0) + 0.5));
+ }
+ }
+
+ OutputBuffer[j * BlockWidth + i] = Pixel.Pack();
+ }
+ }
+
+ return true;
+ }
+
+ private static int Select2DPartition(int Seed, int X, int Y, int PartitionCount, bool IsSmallBlock)
+ {
+ return SelectPartition(Seed, X, Y, 0, PartitionCount, IsSmallBlock);
+ }
+
+ private static int SelectPartition(int Seed, int X, int Y, int Z, int PartitionCount, bool IsSmallBlock)
+ {
+ if (PartitionCount == 1)
+ {
+ return 0;
+ }
+
+ if (IsSmallBlock)
+ {
+ X <<= 1;
+ Y <<= 1;
+ Z <<= 1;
+ }
+
+ Seed += (PartitionCount - 1) * 1024;
+
+ int RightNum = Hash52((uint)Seed);
+ byte Seed01 = (byte)(RightNum & 0xF);
+ byte Seed02 = (byte)((RightNum >> 4) & 0xF);
+ byte Seed03 = (byte)((RightNum >> 8) & 0xF);
+ byte Seed04 = (byte)((RightNum >> 12) & 0xF);
+ byte Seed05 = (byte)((RightNum >> 16) & 0xF);
+ byte Seed06 = (byte)((RightNum >> 20) & 0xF);
+ byte Seed07 = (byte)((RightNum >> 24) & 0xF);
+ byte Seed08 = (byte)((RightNum >> 28) & 0xF);
+ byte Seed09 = (byte)((RightNum >> 18) & 0xF);
+ byte Seed10 = (byte)((RightNum >> 22) & 0xF);
+ byte Seed11 = (byte)((RightNum >> 26) & 0xF);
+ byte Seed12 = (byte)(((RightNum >> 30) | (RightNum << 2)) & 0xF);
+
+ Seed01 *= Seed01; Seed02 *= Seed02;
+ Seed03 *= Seed03; Seed04 *= Seed04;
+ Seed05 *= Seed05; Seed06 *= Seed06;
+ Seed07 *= Seed07; Seed08 *= Seed08;
+ Seed09 *= Seed09; Seed10 *= Seed10;
+ Seed11 *= Seed11; Seed12 *= Seed12;
+
+ int SeedHash1, SeedHash2, SeedHash3;
+
+ if ((Seed & 1) != 0)
+ {
+ SeedHash1 = (Seed & 2) != 0 ? 4 : 5;
+ SeedHash2 = (PartitionCount == 3) ? 6 : 5;
+ }
+ else
+ {
+ SeedHash1 = (PartitionCount == 3) ? 6 : 5;
+ SeedHash2 = (Seed & 2) != 0 ? 4 : 5;
+ }
+
+ SeedHash3 = (Seed & 0x10) != 0 ? SeedHash1 : SeedHash2;
+
+ Seed01 >>= SeedHash1; Seed02 >>= SeedHash2; Seed03 >>= SeedHash1; Seed04 >>= SeedHash2;
+ Seed05 >>= SeedHash1; Seed06 >>= SeedHash2; Seed07 >>= SeedHash1; Seed08 >>= SeedHash2;
+ Seed09 >>= SeedHash3; Seed10 >>= SeedHash3; Seed11 >>= SeedHash3; Seed12 >>= SeedHash3;
+
+ int a = Seed01 * X + Seed02 * Y + Seed11 * Z + (RightNum >> 14);
+ int b = Seed03 * X + Seed04 * Y + Seed12 * Z + (RightNum >> 10);
+ int c = Seed05 * X + Seed06 * Y + Seed09 * Z + (RightNum >> 6);
+ int d = Seed07 * X + Seed08 * Y + Seed10 * Z + (RightNum >> 2);
+
+ a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F;
+
+ if (PartitionCount < 4) d = 0;
+ if (PartitionCount < 3) c = 0;
+
+ if (a >= b && a >= c && a >= d) return 0;
+ else if (b >= c && b >= d) return 1;
+ else if (c >= d) return 2;
+ return 3;
+ }
+
+ static int Hash52(uint Val)
+ {
+ Val ^= Val >> 15; Val -= Val << 17; Val += Val << 7; Val += Val << 4;
+ Val ^= Val >> 5; Val += Val << 16; Val ^= Val >> 7; Val ^= Val >> 3;
+ Val ^= Val << 6; Val ^= Val >> 17;
+
+ return (int)Val;
+ }
+
+ static void UnquantizeTexelWeights(
+ int[][] OutputBuffer,
+ List<IntegerEncoded> Weights,
+ TexelWeightParams TexelParams,
+ int BlockWidth,
+ int BlockHeight)
+ {
+ int WeightIndices = 0;
+ int[][] Unquantized = new int[2][];
+ Unquantized[0] = new int[144];
+ Unquantized[1] = new int[144];
+
+ for (int i = 0; i < Weights.Count; i++)
+ {
+ Unquantized[0][WeightIndices] = UnquantizeTexelWeight(Weights[i]);
+
+ if (TexelParams.DualPlane)
+ {
+ i++;
+ Unquantized[1][WeightIndices] = UnquantizeTexelWeight(Weights[i]);
+
+ if (i == Weights.Count)
+ {
+ break;
+ }
+ }
+
+ if (++WeightIndices >= (TexelParams.Width * TexelParams.Height)) break;
+ }
+
+ // Do infill if necessary (Section C.2.18) ...
+ int Ds = (1024 + (BlockWidth / 2)) / (BlockWidth - 1);
+ int Dt = (1024 + (BlockHeight / 2)) / (BlockHeight - 1);
+
+ int PlaneScale = TexelParams.DualPlane ? 2 : 1;
+
+ for (int Plane = 0; Plane < PlaneScale; Plane++)
+ {
+ for (int t = 0; t < BlockHeight; t++)
+ {
+ for (int s = 0; s < BlockWidth; s++)
+ {
+ int cs = Ds * s;
+ int ct = Dt * t;
+
+ int gs = (cs * (TexelParams.Width - 1) + 32) >> 6;
+ int gt = (ct * (TexelParams.Height - 1) + 32) >> 6;
+
+ int js = gs >> 4;
+ int fs = gs & 0xF;
+
+ int jt = gt >> 4;
+ int ft = gt & 0x0F;
+
+ int w11 = (fs * ft + 8) >> 4;
+ int w10 = ft - w11;
+ int w01 = fs - w11;
+ int w00 = 16 - fs - ft + w11;
+
+ int v0 = js + jt * TexelParams.Width;
+
+ int p00 = 0;
+ int p01 = 0;
+ int p10 = 0;
+ int p11 = 0;
+
+ if (v0 < (TexelParams.Width * TexelParams.Height))
+ {
+ p00 = Unquantized[Plane][v0];
+ }
+
+ if (v0 + 1 < (TexelParams.Width * TexelParams.Height))
+ {
+ p01 = Unquantized[Plane][v0 + 1];
+ }
+
+ if (v0 + TexelParams.Width < (TexelParams.Width * TexelParams.Height))
+ {
+ p10 = Unquantized[Plane][v0 + TexelParams.Width];
+ }
+
+ if (v0 + TexelParams.Width + 1 < (TexelParams.Width * TexelParams.Height))
+ {
+ p11 = Unquantized[Plane][v0 + TexelParams.Width + 1];
+ }
+
+ OutputBuffer[Plane][t * BlockWidth + s] = (p00 * w00 + p01 * w01 + p10 * w10 + p11 * w11 + 8) >> 4;
+ }
+ }
+ }
+ }
+
+ static int UnquantizeTexelWeight(IntegerEncoded IntEncoded)
+ {
+ int BitValue = IntEncoded.BitValue;
+ int BitLength = IntEncoded.NumberBits;
+
+ int A = BitArrayStream.Replicate(BitValue & 1, 1, 7);
+ int B = 0, C = 0, D = 0;
+
+ int Result = 0;
+
+ switch (IntEncoded.GetEncoding())
+ {
+ case IntegerEncoded.EIntegerEncoding.JustBits:
+ Result = BitArrayStream.Replicate(BitValue, BitLength, 6);
+ break;
+
+ case IntegerEncoded.EIntegerEncoding.Trit:
+ {
+ D = IntEncoded.TritValue;
+ Debug.Assert(D < 3);
+
+ switch (BitLength)
+ {
+ case 0:
+ {
+ int[] Results = { 0, 32, 63 };
+ Result = Results[D];
+
+ break;
+ }
+
+ case 1:
+ {
+ C = 50;
+ break;
+ }
+
+ case 2:
+ {
+ C = 23;
+ int b = (BitValue >> 1) & 1;
+ B = (b << 6) | (b << 2) | b;
+
+ break;
+ }
+
+ case 3:
+ {
+ C = 11;
+ int cb = (BitValue >> 1) & 3;
+ B = (cb << 5) | cb;
+
+ break;
+ }
+
+ default:
+ throw new ASTCDecoderException("Invalid trit encoding for texel weight");
+ }
+
+ break;
+ }
+
+ case IntegerEncoded.EIntegerEncoding.Quint:
+ {
+ D = IntEncoded.QuintValue;
+ Debug.Assert(D < 5);
+
+ switch (BitLength)
+ {
+ case 0:
+ {
+ int[] Results = { 0, 16, 32, 47, 63 };
+ Result = Results[D];
+
+ break;
+ }
+
+ case 1:
+ {
+ C = 28;
+
+ break;
+ }
+
+ case 2:
+ {
+ C = 13;
+ int b = (BitValue >> 1) & 1;
+ B = (b << 6) | (b << 1);
+
+ break;
+ }
+
+ default:
+ throw new ASTCDecoderException("Invalid quint encoding for texel weight");
+ }
+
+ break;
+ }
+ }
+
+ if (IntEncoded.GetEncoding() != IntegerEncoded.EIntegerEncoding.JustBits && BitLength > 0)
+ {
+ // Decode the value...
+ Result = D * C + B;
+ Result ^= A;
+ Result = (A & 0x20) | (Result >> 2);
+ }
+
+ Debug.Assert(Result < 64);
+
+ // Change from [0,63] to [0,64]
+ if (Result > 32)
+ {
+ Result += 1;
+ }
+
+ return Result;
+ }
+
+ static byte ReverseByte(byte b)
+ {
+ // Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits
+ return (byte)((((b) * 0x80200802L) & 0x0884422110L) * 0x0101010101L >> 32);
+ }
+
+ static uint[] ReadUintColorValues(int Number, int[] ColorValues, ref int ColorValuesPosition)
+ {
+ uint[] Ret = new uint[Number];
+
+ for (int i = 0; i < Number; i++)
+ {
+ Ret[i] = (uint)ColorValues[ColorValuesPosition++];
+ }
+
+ return Ret;
+ }
+
+ static int[] ReadIntColorValues(int Number, int[] ColorValues, ref int ColorValuesPosition)
+ {
+ int[] Ret = new int[Number];
+
+ for (int i = 0; i < Number; i++)
+ {
+ Ret[i] = ColorValues[ColorValuesPosition++];
+ }
+
+ return Ret;
+ }
+
+ static void ComputeEndpoints(
+ ASTCPixel[] EndPoints,
+ int[] ColorValues,
+ uint ColorEndpointMode,
+ ref int ColorValuesPosition)
+ {
+ switch (ColorEndpointMode)
+ {
+ case 0:
+ {
+ uint[] Val = ReadUintColorValues(2, ColorValues, ref ColorValuesPosition);
+
+ EndPoints[0] = new ASTCPixel(0xFF, (short)Val[0], (short)Val[0], (short)Val[0]);
+ EndPoints[1] = new ASTCPixel(0xFF, (short)Val[1], (short)Val[1], (short)Val[1]);
+
+ break;
+ }
+
+
+ case 1:
+ {
+ uint[] Val = ReadUintColorValues(2, ColorValues, ref ColorValuesPosition);
+ int L0 = (int)((Val[0] >> 2) | (Val[1] & 0xC0));
+ int L1 = (int)Math.Max(L0 + (Val[1] & 0x3F), 0xFFU);
+
+ EndPoints[0] = new ASTCPixel(0xFF, (short)L0, (short)L0, (short)L0);
+ EndPoints[1] = new ASTCPixel(0xFF, (short)L1, (short)L1, (short)L1);
+
+ break;
+ }
+
+ case 4:
+ {
+ uint[] Val = ReadUintColorValues(4, ColorValues, ref ColorValuesPosition);
+
+ EndPoints[0] = new ASTCPixel((short)Val[2], (short)Val[0], (short)Val[0], (short)Val[0]);
+ EndPoints[1] = new ASTCPixel((short)Val[3], (short)Val[1], (short)Val[1], (short)Val[1]);
+
+ break;
+ }
+
+ case 5:
+ {
+ int[] Val = ReadIntColorValues(4, ColorValues, ref ColorValuesPosition);
+
+ BitArrayStream.BitTransferSigned(ref Val[1], ref Val[0]);
+ BitArrayStream.BitTransferSigned(ref Val[3], ref Val[2]);
+
+ EndPoints[0] = new ASTCPixel((short)Val[2], (short)Val[0], (short)Val[0], (short)Val[0]);
+ EndPoints[1] = new ASTCPixel((short)(Val[2] + Val[3]), (short)(Val[0] + Val[1]), (short)(Val[0] + Val[1]), (short)(Val[0] + Val[1]));
+
+ EndPoints[0].ClampByte();
+ EndPoints[1].ClampByte();
+
+ break;
+ }
+
+ case 6:
+ {
+ uint[] Val = ReadUintColorValues(4, ColorValues, ref ColorValuesPosition);
+
+ EndPoints[0] = new ASTCPixel(0xFF, (short)(Val[0] * Val[3] >> 8), (short)(Val[1] * Val[3] >> 8), (short)(Val[2] * Val[3] >> 8));
+ EndPoints[1] = new ASTCPixel(0xFF, (short)Val[0], (short)Val[1], (short)Val[2]);
+
+ break;
+ }
+
+ case 8:
+ {
+ uint[] Val = ReadUintColorValues(6, ColorValues, ref ColorValuesPosition);
+
+ if (Val[1] + Val[3] + Val[5] >= Val[0] + Val[2] + Val[4])
+ {
+ EndPoints[0] = new ASTCPixel(0xFF, (short)Val[0], (short)Val[2], (short)Val[4]);
+ EndPoints[1] = new ASTCPixel(0xFF, (short)Val[1], (short)Val[3], (short)Val[5]);
+ }
+ else
+ {
+ EndPoints[0] = ASTCPixel.BlueContract(0xFF, (short)Val[1], (short)Val[3], (short)Val[5]);
+ EndPoints[1] = ASTCPixel.BlueContract(0xFF, (short)Val[0], (short)Val[2], (short)Val[4]);
+ }
+
+ break;
+ }
+
+ case 9:
+ {
+ int[] Val = ReadIntColorValues(6, ColorValues, ref ColorValuesPosition);
+
+ BitArrayStream.BitTransferSigned(ref Val[1], ref Val[0]);
+ BitArrayStream.BitTransferSigned(ref Val[3], ref Val[2]);
+ BitArrayStream.BitTransferSigned(ref Val[5], ref Val[4]);
+
+ if (Val[1] + Val[3] + Val[5] >= 0)
+ {
+ EndPoints[0] = new ASTCPixel(0xFF, (short)Val[0], (short)Val[2], (short)Val[4]);
+ EndPoints[1] = new ASTCPixel(0xFF, (short)(Val[0] + Val[1]), (short)(Val[2] + Val[3]), (short)(Val[4] + Val[5]));
+ }
+ else
+ {
+ EndPoints[0] = ASTCPixel.BlueContract(0xFF, Val[0] + Val[1], Val[2] + Val[3], Val[4] + Val[5]);
+ EndPoints[1] = ASTCPixel.BlueContract(0xFF, Val[0], Val[2], Val[4]);
+ }
+
+ EndPoints[0].ClampByte();
+ EndPoints[1].ClampByte();
+
+ break;
+ }
+
+ case 10:
+ {
+ uint[] Val = ReadUintColorValues(6, ColorValues, ref ColorValuesPosition);
+
+ EndPoints[0] = new ASTCPixel((short)Val[4], (short)(Val[0] * Val[3] >> 8), (short)(Val[1] * Val[3] >> 8), (short)(Val[2] * Val[3] >> 8));
+ EndPoints[1] = new ASTCPixel((short)Val[5], (short)Val[0], (short)Val[1], (short)Val[2]);
+
+ break;
+ }
+
+ case 12:
+ {
+ uint[] Val = ReadUintColorValues(8, ColorValues, ref ColorValuesPosition);
+
+ if (Val[1] + Val[3] + Val[5] >= Val[0] + Val[2] + Val[4])
+ {
+ EndPoints[0] = new ASTCPixel((short)Val[6], (short)Val[0], (short)Val[2], (short)Val[4]);
+ EndPoints[1] = new ASTCPixel((short)Val[7], (short)Val[1], (short)Val[3], (short)Val[5]);
+ }
+ else
+ {
+ EndPoints[0] = ASTCPixel.BlueContract((short)Val[7], (short)Val[1], (short)Val[3], (short)Val[5]);
+ EndPoints[1] = ASTCPixel.BlueContract((short)Val[6], (short)Val[0], (short)Val[2], (short)Val[4]);
+ }
+
+ break;
+ }
+
+ case 13:
+ {
+ int[] Val = ReadIntColorValues(8, ColorValues, ref ColorValuesPosition);
+
+ BitArrayStream.BitTransferSigned(ref Val[1], ref Val[0]);
+ BitArrayStream.BitTransferSigned(ref Val[3], ref Val[2]);
+ BitArrayStream.BitTransferSigned(ref Val[5], ref Val[4]);
+ BitArrayStream.BitTransferSigned(ref Val[7], ref Val[6]);
+
+ if (Val[1] + Val[3] + Val[5] >= 0)
+ {
+ EndPoints[0] = new ASTCPixel((short)Val[6], (short)Val[0], (short)Val[2], (short)Val[4]);
+ EndPoints[1] = new ASTCPixel((short)(Val[7] + Val[6]), (short)(Val[0] + Val[1]), (short)(Val[2] + Val[3]), (short)(Val[4] + Val[5]));
+ }
+ else
+ {
+ EndPoints[0] = ASTCPixel.BlueContract(Val[6] + Val[7], Val[0] + Val[1], Val[2] + Val[3], Val[4] + Val[5]);
+ EndPoints[1] = ASTCPixel.BlueContract(Val[6], Val[0], Val[2], Val[4]);
+ }
+
+ EndPoints[0].ClampByte();
+ EndPoints[1].ClampByte();
+
+ break;
+ }
+
+ default:
+ throw new ASTCDecoderException("Unsupported color endpoint mode (is it HDR?)");
+ }
+ }
+
+ static void DecodeColorValues(
+ int[] OutputValues,
+ byte[] InputData,
+ uint[] Modes,
+ int NumberPartitions,
+ int NumberBitsForColorData)
+ {
+ // First figure out how many color values we have
+ int NumberValues = 0;
+
+ for (int i = 0; i < NumberPartitions; i++)
+ {
+ NumberValues += (int)((Modes[i] >> 2) + 1) << 1;
+ }
+
+ // Then based on the number of values and the remaining number of bits,
+ // figure out the max value for each of them...
+ int Range = 256;
+
+ while (--Range > 0)
+ {
+ IntegerEncoded IntEncoded = IntegerEncoded.CreateEncoding(Range);
+ int BitLength = IntEncoded.GetBitLength(NumberValues);
+
+ if (BitLength <= NumberBitsForColorData)
+ {
+ // Find the smallest possible range that matches the given encoding
+ while (--Range > 0)
+ {
+ IntegerEncoded NewIntEncoded = IntegerEncoded.CreateEncoding(Range);
+ if (!NewIntEncoded.MatchesEncoding(IntEncoded))
+ {
+ break;
+ }
+ }
+
+ // Return to last matching range.
+ Range++;
+ break;
+ }
+ }
+
+ // We now have enough to decode our integer sequence.
+ List<IntegerEncoded> IntegerEncodedSequence = new List<IntegerEncoded>();
+ BitArrayStream ColorBitStream = new BitArrayStream(new BitArray(InputData));
+
+ IntegerEncoded.DecodeIntegerSequence(IntegerEncodedSequence, ColorBitStream, Range, NumberValues);
+
+ // Once we have the decoded values, we need to dequantize them to the 0-255 range
+ // This procedure is outlined in ASTC spec C.2.13
+ int OutputIndices = 0;
+
+ foreach (IntegerEncoded IntEncoded in IntegerEncodedSequence)
+ {
+ int BitLength = IntEncoded.NumberBits;
+ int BitValue = IntEncoded.BitValue;
+
+ Debug.Assert(BitLength >= 1);
+
+ int A = 0, B = 0, C = 0, D = 0;
+ // A is just the lsb replicated 9 times.
+ A = BitArrayStream.Replicate(BitValue & 1, 1, 9);
+
+ switch (IntEncoded.GetEncoding())
+ {
+ case IntegerEncoded.EIntegerEncoding.JustBits:
+ {
+ OutputValues[OutputIndices++] = BitArrayStream.Replicate(BitValue, BitLength, 8);
+
+ break;
+ }
+
+ case IntegerEncoded.EIntegerEncoding.Trit:
+ {
+ D = IntEncoded.TritValue;
+
+ switch (BitLength)
+ {
+ case 1:
+ {
+ C = 204;
+
+ break;
+ }
+
+ case 2:
+ {
+ C = 93;
+ // B = b000b0bb0
+ int b = (BitValue >> 1) & 1;
+ B = (b << 8) | (b << 4) | (b << 2) | (b << 1);
+
+ break;
+ }
+
+ case 3:
+ {
+ C = 44;
+ // B = cb000cbcb
+ int cb = (BitValue >> 1) & 3;
+ B = (cb << 7) | (cb << 2) | cb;
+
+ break;
+ }
+
+
+ case 4:
+ {
+ C = 22;
+ // B = dcb000dcb
+ int dcb = (BitValue >> 1) & 7;
+ B = (dcb << 6) | dcb;
+
+ break;
+ }
+
+ case 5:
+ {
+ C = 11;
+ // B = edcb000ed
+ int edcb = (BitValue >> 1) & 0xF;
+ B = (edcb << 5) | (edcb >> 2);
+
+ break;
+ }
+
+ case 6:
+ {
+ C = 5;
+ // B = fedcb000f
+ int fedcb = (BitValue >> 1) & 0x1F;
+ B = (fedcb << 4) | (fedcb >> 4);
+
+ break;
+ }
+
+ default:
+ throw new ASTCDecoderException("Unsupported trit encoding for color values!");
+ }
+
+ break;
+ }
+
+ case IntegerEncoded.EIntegerEncoding.Quint:
+ {
+ D = IntEncoded.QuintValue;
+
+ switch (BitLength)
+ {
+ case 1:
+ {
+ C = 113;
+
+ break;
+ }
+
+ case 2:
+ {
+ C = 54;
+ // B = b0000bb00
+ int b = (BitValue >> 1) & 1;
+ B = (b << 8) | (b << 3) | (b << 2);
+
+ break;
+ }
+
+ case 3:
+ {
+ C = 26;
+ // B = cb0000cbc
+ int cb = (BitValue >> 1) & 3;
+ B = (cb << 7) | (cb << 1) | (cb >> 1);
+
+ break;
+ }
+
+ case 4:
+ {
+ C = 13;
+ // B = dcb0000dc
+ int dcb = (BitValue >> 1) & 7;
+ B = (dcb << 6) | (dcb >> 1);
+
+ break;
+ }
+
+ case 5:
+ {
+ C = 6;
+ // B = edcb0000e
+ int edcb = (BitValue >> 1) & 0xF;
+ B = (edcb << 5) | (edcb >> 3);
+
+ break;
+ }
+
+ default:
+ throw new ASTCDecoderException("Unsupported quint encoding for color values!");
+ }
+ break;
+ }
+ }
+
+ if (IntEncoded.GetEncoding() != IntegerEncoded.EIntegerEncoding.JustBits)
+ {
+ int T = D * C + B;
+ T ^= A;
+ T = (A & 0x80) | (T >> 2);
+
+ OutputValues[OutputIndices++] = T;
+ }
+ }
+
+ // Make sure that each of our values is in the proper range...
+ for (int i = 0; i < NumberValues; i++)
+ {
+ Debug.Assert(OutputValues[i] <= 255);
+ }
+ }
+
+ static void FillVoidExtentLDR(BitArrayStream BitStream, int[] OutputBuffer, int BlockWidth, int BlockHeight)
+ {
+ // Don't actually care about the void extent, just read the bits...
+ for (int i = 0; i < 4; ++i)
+ {
+ BitStream.ReadBits(13);
+ }
+
+ // Decode the RGBA components and renormalize them to the range [0, 255]
+ ushort R = (ushort)BitStream.ReadBits(16);
+ ushort G = (ushort)BitStream.ReadBits(16);
+ ushort B = (ushort)BitStream.ReadBits(16);
+ ushort A = (ushort)BitStream.ReadBits(16);
+
+ int RGBA = (R >> 8) | (G & 0xFF00) | ((B) & 0xFF00) << 8 | ((A) & 0xFF00) << 16;
+
+ for (int j = 0; j < BlockHeight; j++)
+ {
+ for (int i = 0; i < BlockWidth; i++)
+ {
+ OutputBuffer[j * BlockWidth + i] = RGBA;
+ }
+ }
+ }
+
+ static TexelWeightParams DecodeBlockInfo(BitArrayStream BitStream)
+ {
+ TexelWeightParams TexelParams = new TexelWeightParams();
+
+ // Read the entire block mode all at once
+ ushort ModeBits = (ushort)BitStream.ReadBits(11);
+
+ // Does this match the void extent block mode?
+ if ((ModeBits & 0x01FF) == 0x1FC)
+ {
+ if ((ModeBits & 0x200) != 0)
+ {
+ TexelParams.VoidExtentHDR = true;
+ }
+ else
+ {
+ TexelParams.VoidExtentLDR = true;
+ }
+
+ // Next two bits must be one.
+ if ((ModeBits & 0x400) == 0 || BitStream.ReadBits(1) == 0)
+ {
+ TexelParams.Error = true;
+ }
+
+ return TexelParams;
+ }
+
+ // First check if the last four bits are zero
+ if ((ModeBits & 0xF) == 0)
+ {
+ TexelParams.Error = true;
+ return TexelParams;
+ }
+
+ // If the last two bits are zero, then if bits
+ // [6-8] are all ones, this is also reserved.
+ if ((ModeBits & 0x3) == 0 && (ModeBits & 0x1C0) == 0x1C0)
+ {
+ TexelParams.Error = true;
+
+ return TexelParams;
+ }
+
+ // Otherwise, there is no error... Figure out the layout
+ // of the block mode. Layout is determined by a number
+ // between 0 and 9 corresponding to table C.2.8 of the
+ // ASTC spec.
+ int Layout = 0;
+
+ if ((ModeBits & 0x1) != 0 || (ModeBits & 0x2) != 0)
+ {
+ // layout is in [0-4]
+ if ((ModeBits & 0x8) != 0)
+ {
+ // layout is in [2-4]
+ if ((ModeBits & 0x4) != 0)
+ {
+ // layout is in [3-4]
+ if ((ModeBits & 0x100) != 0)
+ {
+ Layout = 4;
+ }
+ else
+ {
+ Layout = 3;
+ }
+ }
+ else
+ {
+ Layout = 2;
+ }
+ }
+ else
+ {
+ // layout is in [0-1]
+ if ((ModeBits & 0x4) != 0)
+ {
+ Layout = 1;
+ }
+ else
+ {
+ Layout = 0;
+ }
+ }
+ }
+ else
+ {
+ // layout is in [5-9]
+ if ((ModeBits & 0x100) != 0)
+ {
+ // layout is in [7-9]
+ if ((ModeBits & 0x80) != 0)
+ {
+ // layout is in [7-8]
+ Debug.Assert((ModeBits & 0x40) == 0);
+
+ if ((ModeBits & 0x20) != 0)
+ {
+ Layout = 8;
+ }
+ else
+ {
+ Layout = 7;
+ }
+ }
+ else
+ {
+ Layout = 9;
+ }
+ }
+ else
+ {
+ // layout is in [5-6]
+ if ((ModeBits & 0x80) != 0)
+ {
+ Layout = 6;
+ }
+ else
+ {
+ Layout = 5;
+ }
+ }
+ }
+
+ Debug.Assert(Layout < 10);
+
+ // Determine R
+ int R = (ModeBits >> 4) & 1;
+ if (Layout < 5)
+ {
+ R |= (ModeBits & 0x3) << 1;
+ }
+ else
+ {
+ R |= (ModeBits & 0xC) >> 1;
+ }
+
+ Debug.Assert(2 <= R && R <= 7);
+
+ // Determine width & height
+ switch (Layout)
+ {
+ case 0:
+ {
+ int A = (ModeBits >> 5) & 0x3;
+ int B = (ModeBits >> 7) & 0x3;
+
+ TexelParams.Width = B + 4;
+ TexelParams.Height = A + 2;
+
+ break;
+ }
+
+ case 1:
+ {
+ int A = (ModeBits >> 5) & 0x3;
+ int B = (ModeBits >> 7) & 0x3;
+
+ TexelParams.Width = B + 8;
+ TexelParams.Height = A + 2;
+
+ break;
+ }
+
+ case 2:
+ {
+ int A = (ModeBits >> 5) & 0x3;
+ int B = (ModeBits >> 7) & 0x3;
+
+ TexelParams.Width = A + 2;
+ TexelParams.Height = B + 8;
+
+ break;
+ }
+
+ case 3:
+ {
+ int A = (ModeBits >> 5) & 0x3;
+ int B = (ModeBits >> 7) & 0x1;
+
+ TexelParams.Width = A + 2;
+ TexelParams.Height = B + 6;
+
+ break;
+ }
+
+ case 4:
+ {
+ int A = (ModeBits >> 5) & 0x3;
+ int B = (ModeBits >> 7) & 0x1;
+
+ TexelParams.Width = B + 2;
+ TexelParams.Height = A + 2;
+
+ break;
+ }
+
+ case 5:
+ {
+ int A = (ModeBits >> 5) & 0x3;
+
+ TexelParams.Width = 12;
+ TexelParams.Height = A + 2;
+
+ break;
+ }
+
+ case 6:
+ {
+ int A = (ModeBits >> 5) & 0x3;
+
+ TexelParams.Width = A + 2;
+ TexelParams.Height = 12;
+
+ break;
+ }
+
+ case 7:
+ {
+ TexelParams.Width = 6;
+ TexelParams.Height = 10;
+
+ break;
+ }
+
+ case 8:
+ {
+ TexelParams.Width = 10;
+ TexelParams.Height = 6;
+ break;
+ }
+
+ case 9:
+ {
+ int A = (ModeBits >> 5) & 0x3;
+ int B = (ModeBits >> 9) & 0x3;
+
+ TexelParams.Width = A + 6;
+ TexelParams.Height = B + 6;
+
+ break;
+ }
+
+ default:
+ //Don't know this layout...
+ TexelParams.Error = true;
+ break;
+ }
+
+ // Determine whether or not we're using dual planes
+ // and/or high precision layouts.
+ bool D = ((Layout != 9) && ((ModeBits & 0x400) != 0));
+ bool H = (Layout != 9) && ((ModeBits & 0x200) != 0);
+
+ if (H)
+ {
+ int[] MaxWeights = { 9, 11, 15, 19, 23, 31 };
+ TexelParams.MaxWeight = MaxWeights[R - 2];
+ }
+ else
+ {
+ int[] MaxWeights = { 1, 2, 3, 4, 5, 7 };
+ TexelParams.MaxWeight = MaxWeights[R - 2];
+ }
+
+ TexelParams.DualPlane = D;
+
+ return TexelParams;
+ }
+ }
+}
diff --git a/Ryujinx.Graphics/Graphics3d/Texture/ASTCPixel.cs b/Ryujinx.Graphics/Graphics3d/Texture/ASTCPixel.cs
new file mode 100644
index 00000000..c43eaf93
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/Texture/ASTCPixel.cs
@@ -0,0 +1,138 @@
+using System;
+using System.Diagnostics;
+
+namespace Ryujinx.Graphics.Texture
+{
+ class ASTCPixel
+ {
+ public short R { get; set; }
+ public short G { get; set; }
+ public short B { get; set; }
+ public short A { get; set; }
+
+ byte[] BitDepth = new byte[4];
+
+ public ASTCPixel(short _A, short _R, short _G, short _B)
+ {
+ A = _A;
+ R = _R;
+ G = _G;
+ B = _B;
+
+ for (int i = 0; i < 4; i++)
+ BitDepth[i] = 8;
+ }
+
+ public void ClampByte()
+ {
+ R = Math.Min(Math.Max(R, (short)0), (short)255);
+ G = Math.Min(Math.Max(G, (short)0), (short)255);
+ B = Math.Min(Math.Max(B, (short)0), (short)255);
+ A = Math.Min(Math.Max(A, (short)0), (short)255);
+ }
+
+ public short GetComponent(int Index)
+ {
+ switch(Index)
+ {
+ case 0: return A;
+ case 1: return R;
+ case 2: return G;
+ case 3: return B;
+ }
+
+ return 0;
+ }
+
+ public void SetComponent(int Index, int Value)
+ {
+ switch (Index)
+ {
+ case 0:
+ A = (short)Value;
+ break;
+ case 1:
+ R = (short)Value;
+ break;
+ case 2:
+ G = (short)Value;
+ break;
+ case 3:
+ B = (short)Value;
+ break;
+ }
+ }
+
+ public void ChangeBitDepth(byte[] Depth)
+ {
+ for(int i = 0; i< 4; i++)
+ {
+ int Value = ChangeBitDepth(GetComponent(i), BitDepth[i], Depth[i]);
+
+ SetComponent(i, Value);
+ BitDepth[i] = Depth[i];
+ }
+ }
+
+ short ChangeBitDepth(short Value, byte OldDepth, byte NewDepth)
+ {
+ Debug.Assert(NewDepth <= 8);
+ Debug.Assert(OldDepth <= 8);
+
+ if (OldDepth == NewDepth)
+ {
+ // Do nothing
+ return Value;
+ }
+ else if (OldDepth == 0 && NewDepth != 0)
+ {
+ return (short)((1 << NewDepth) - 1);
+ }
+ else if (NewDepth > OldDepth)
+ {
+ return (short)BitArrayStream.Replicate(Value, OldDepth, NewDepth);
+ }
+ else
+ {
+ // oldDepth > newDepth
+ if (NewDepth == 0)
+ {
+ return 0xFF;
+ }
+ else
+ {
+ byte BitsWasted = (byte)(OldDepth - NewDepth);
+ short TempValue = Value;
+
+ TempValue = (short)((TempValue + (1 << (BitsWasted - 1))) >> BitsWasted);
+ TempValue = Math.Min(Math.Max((short)0, TempValue), (short)((1 << NewDepth) - 1));
+
+ return (byte)(TempValue);
+ }
+ }
+ }
+
+ public int Pack()
+ {
+ ASTCPixel NewPixel = new ASTCPixel(A, R, G, B);
+ byte[] eightBitDepth = { 8, 8, 8, 8 };
+
+ NewPixel.ChangeBitDepth(eightBitDepth);
+
+ return (byte)NewPixel.A << 24 |
+ (byte)NewPixel.B << 16 |
+ (byte)NewPixel.G << 8 |
+ (byte)NewPixel.R << 0;
+ }
+
+ // Adds more precision to the blue channel as described
+ // in C.2.14
+ public static ASTCPixel BlueContract(int a, int r, int g, int b)
+ {
+ return new ASTCPixel((short)(a),
+ (short)((r + b) >> 1),
+ (short)((g + b) >> 1),
+ (short)(b));
+ }
+ }
+}
diff --git a/Ryujinx.Graphics/Graphics3d/Texture/BitArrayStream.cs b/Ryujinx.Graphics/Graphics3d/Texture/BitArrayStream.cs
new file mode 100644
index 00000000..2a8ed091
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/Texture/BitArrayStream.cs
@@ -0,0 +1,121 @@
+using System;
+using System.Collections;
+
+namespace Ryujinx.Graphics.Texture
+{
+ public class BitArrayStream
+ {
+ public BitArray BitsArray;
+
+ public int Position { get; private set; }
+
+ public BitArrayStream(BitArray BitArray)
+ {
+ BitsArray = BitArray;
+ Position = 0;
+ }
+
+ public short ReadBits(int Length)
+ {
+ int RetValue = 0;
+ for (int i = Position; i < Position + Length; i++)
+ {
+ if (BitsArray[i])
+ {
+ RetValue |= 1 << (i - Position);
+ }
+ }
+
+ Position += Length;
+ return (short)RetValue;
+ }
+
+ public int ReadBits(int Start, int End)
+ {
+ int RetValue = 0;
+ for (int i = Start; i <= End; i++)
+ {
+ if (BitsArray[i])
+ {
+ RetValue |= 1 << (i - Start);
+ }
+ }
+
+ return RetValue;
+ }
+
+ public int ReadBit(int Index)
+ {
+ return Convert.ToInt32(BitsArray[Index]);
+ }
+
+ public void WriteBits(int Value, int Length)
+ {
+ for (int i = Position; i < Position + Length; i++)
+ {
+ BitsArray[i] = ((Value >> (i - Position)) & 1) != 0;
+ }
+
+ Position += Length;
+ }
+
+ public byte[] ToByteArray()
+ {
+ byte[] RetArray = new byte[(BitsArray.Length + 7) / 8];
+ BitsArray.CopyTo(RetArray, 0);
+ return RetArray;
+ }
+
+ public static int Replicate(int Value, int NumberBits, int ToBit)
+ {
+ if (NumberBits == 0) return 0;
+ if (ToBit == 0) return 0;
+
+ int TempValue = Value & ((1 << NumberBits) - 1);
+ int RetValue = TempValue;
+ int ResLength = NumberBits;
+
+ while (ResLength < ToBit)
+ {
+ int Comp = 0;
+ if (NumberBits > ToBit - ResLength)
+ {
+ int NewShift = ToBit - ResLength;
+ Comp = NumberBits - NewShift;
+ NumberBits = NewShift;
+ }
+ RetValue <<= NumberBits;
+ RetValue |= TempValue >> Comp;
+ ResLength += NumberBits;
+ }
+ return RetValue;
+ }
+
+ public static int PopCnt(int Number)
+ {
+ int Counter;
+ for (Counter = 0; Number != 0; Counter++)
+ {
+ Number &= Number - 1;
+ }
+ return Counter;
+ }
+
+ public static void Swap<T>(ref T lhs, ref T rhs)
+ {
+ T Temp = lhs;
+ lhs = rhs;
+ rhs = Temp;
+ }
+
+ // Transfers a bit as described in C.2.14
+ public static void BitTransferSigned(ref int a, ref int b)
+ {
+ b >>= 1;
+ b |= a & 0x80;
+ a >>= 1;
+ a &= 0x3F;
+ if ((a & 0x20) != 0) a -= 0x40;
+ }
+ }
+}
diff --git a/Ryujinx.Graphics/Graphics3d/Texture/BlockLinearSwizzle.cs b/Ryujinx.Graphics/Graphics3d/Texture/BlockLinearSwizzle.cs
new file mode 100644
index 00000000..9451291e
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/Texture/BlockLinearSwizzle.cs
@@ -0,0 +1,59 @@
+using System;
+
+namespace Ryujinx.Graphics.Texture
+{
+ class BlockLinearSwizzle : ISwizzle
+ {
+ private int BhShift;
+ private int BppShift;
+ private int BhMask;
+
+ private int XShift;
+ private int GobStride;
+
+ public BlockLinearSwizzle(int Width, int Bpp, int BlockHeight = 16)
+ {
+ BhMask = (BlockHeight * 8) - 1;
+
+ BhShift = CountLsbZeros(BlockHeight * 8);
+ BppShift = CountLsbZeros(Bpp);
+
+ int WidthInGobs = (int)MathF.Ceiling(Width * Bpp / 64f);
+
+ GobStride = 512 * BlockHeight * WidthInGobs;
+
+ XShift = CountLsbZeros(512 * BlockHeight);
+ }
+
+ private int CountLsbZeros(int Value)
+ {
+ int Count = 0;
+
+ while (((Value >> Count) & 1) == 0)
+ {
+ Count++;
+ }
+
+ return Count;
+ }
+
+ public int GetSwizzleOffset(int X, int Y)
+ {
+ X <<= BppShift;
+
+ int Position = (Y >> BhShift) * GobStride;
+
+ Position += (X >> 6) << XShift;
+
+ Position += ((Y & BhMask) >> 3) << 9;
+
+ Position += ((X & 0x3f) >> 5) << 8;
+ Position += ((Y & 0x07) >> 1) << 6;
+ Position += ((X & 0x1f) >> 4) << 5;
+ Position += ((Y & 0x01) >> 0) << 4;
+ Position += ((X & 0x0f) >> 0) << 0;
+
+ return Position;
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Graphics3d/Texture/ISwizzle.cs b/Ryujinx.Graphics/Graphics3d/Texture/ISwizzle.cs
new file mode 100644
index 00000000..583fc20c
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/Texture/ISwizzle.cs
@@ -0,0 +1,7 @@
+namespace Ryujinx.Graphics.Texture
+{
+ interface ISwizzle
+ {
+ int GetSwizzleOffset(int X, int Y);
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs b/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs
new file mode 100644
index 00000000..479be5b1
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs
@@ -0,0 +1,445 @@
+using ChocolArm64.Memory;
+using Ryujinx.Graphics.Gal;
+using Ryujinx.Graphics.Memory;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Texture
+{
+ public static class ImageUtils
+ {
+ [Flags]
+ private enum TargetBuffer
+ {
+ Color = 1 << 0,
+ Depth = 1 << 1,
+ Stencil = 1 << 2,
+
+ DepthStencil = Depth | Stencil
+ }
+
+ private struct ImageDescriptor
+ {
+ public int BytesPerPixel { get; private set; }
+ public int BlockWidth { get; private set; }
+ public int BlockHeight { get; private set; }
+
+ public TargetBuffer Target { get; private set; }
+
+ public ImageDescriptor(int BytesPerPixel, int BlockWidth, int BlockHeight, TargetBuffer Target)
+ {
+ this.BytesPerPixel = BytesPerPixel;
+ this.BlockWidth = BlockWidth;
+ this.BlockHeight = BlockHeight;
+ this.Target = Target;
+ }
+ }
+
+ private const GalImageFormat Snorm = GalImageFormat.Snorm;
+ private const GalImageFormat Unorm = GalImageFormat.Unorm;
+ private const GalImageFormat Sint = GalImageFormat.Sint;
+ private const GalImageFormat Uint = GalImageFormat.Uint;
+ private const GalImageFormat Float = GalImageFormat.Float;
+ private const GalImageFormat Srgb = GalImageFormat.Srgb;
+
+ private static readonly Dictionary<GalTextureFormat, GalImageFormat> s_TextureTable =
+ new Dictionary<GalTextureFormat, GalImageFormat>()
+ {
+ { GalTextureFormat.RGBA32, GalImageFormat.RGBA32 | Sint | Uint | Float },
+ { GalTextureFormat.RGBA16, GalImageFormat.RGBA16 | Snorm | Unorm | Sint | Uint | Float },
+ { GalTextureFormat.RG32, GalImageFormat.RG32 | Sint | Uint | Float },
+ { GalTextureFormat.RGBA8, GalImageFormat.RGBA8 | Snorm | Unorm | Sint | Uint | Srgb },
+ { GalTextureFormat.RGB10A2, GalImageFormat.RGB10A2 | Snorm | Unorm | Sint | Uint },
+ { GalTextureFormat.RG8, GalImageFormat.RG8 | Snorm | Unorm | Sint | Uint },
+ { GalTextureFormat.R16, GalImageFormat.R16 | Snorm | Unorm | Sint | Uint | Float },
+ { GalTextureFormat.R8, GalImageFormat.R8 | Snorm | Unorm | Sint | Uint },
+ { GalTextureFormat.RG16, GalImageFormat.RG16 | Snorm | Unorm | Float },
+ { GalTextureFormat.R32, GalImageFormat.R32 | Sint | Uint | Float },
+ { GalTextureFormat.RGBA4, GalImageFormat.RGBA4 | Unorm },
+ { GalTextureFormat.RGB5A1, GalImageFormat.RGB5A1 | Unorm },
+ { GalTextureFormat.RGB565, GalImageFormat.RGB565 | Unorm },
+ { GalTextureFormat.R11G11B10F, GalImageFormat.R11G11B10 | Float },
+ { GalTextureFormat.D24S8, GalImageFormat.D24S8 | Unorm | Uint },
+ { GalTextureFormat.D32F, GalImageFormat.D32 | Float },
+ { GalTextureFormat.D32FX24S8, GalImageFormat.D32S8 | Float },
+ { GalTextureFormat.D16, GalImageFormat.D16 | Unorm },
+
+ //Compressed formats
+ { GalTextureFormat.BptcSfloat, GalImageFormat.BptcSfloat | Float },
+ { GalTextureFormat.BptcUfloat, GalImageFormat.BptcUfloat | Float },
+ { GalTextureFormat.BptcUnorm, GalImageFormat.BptcUnorm | Unorm | Srgb },
+ { GalTextureFormat.BC1, GalImageFormat.BC1 | Unorm | Srgb },
+ { GalTextureFormat.BC2, GalImageFormat.BC2 | Unorm | Srgb },
+ { GalTextureFormat.BC3, GalImageFormat.BC3 | Unorm | Srgb },
+ { GalTextureFormat.BC4, GalImageFormat.BC4 | Unorm | Snorm },
+ { GalTextureFormat.BC5, GalImageFormat.BC5 | Unorm | Snorm },
+ { GalTextureFormat.Astc2D4x4, GalImageFormat.Astc2D4x4 | Unorm | Srgb },
+ { GalTextureFormat.Astc2D5x5, GalImageFormat.Astc2D5x5 | Unorm | Srgb },
+ { GalTextureFormat.Astc2D6x6, GalImageFormat.Astc2D6x6 | Unorm | Srgb },
+ { GalTextureFormat.Astc2D8x8, GalImageFormat.Astc2D8x8 | Unorm | Srgb },
+ { GalTextureFormat.Astc2D10x10, GalImageFormat.Astc2D10x10 | Unorm | Srgb },
+ { GalTextureFormat.Astc2D12x12, GalImageFormat.Astc2D12x12 | Unorm | Srgb },
+ { GalTextureFormat.Astc2D5x4, GalImageFormat.Astc2D5x4 | Unorm | Srgb },
+ { GalTextureFormat.Astc2D6x5, GalImageFormat.Astc2D6x5 | Unorm | Srgb },
+ { GalTextureFormat.Astc2D8x6, GalImageFormat.Astc2D8x6 | Unorm | Srgb },
+ { GalTextureFormat.Astc2D10x8, GalImageFormat.Astc2D10x8 | Unorm | Srgb },
+ { GalTextureFormat.Astc2D12x10, GalImageFormat.Astc2D12x10 | Unorm | Srgb },
+ { GalTextureFormat.Astc2D8x5, GalImageFormat.Astc2D8x5 | Unorm | Srgb },
+ { GalTextureFormat.Astc2D10x5, GalImageFormat.Astc2D10x5 | Unorm | Srgb },
+ { GalTextureFormat.Astc2D10x6, GalImageFormat.Astc2D10x6 | Unorm | Srgb }
+ };
+
+ private static readonly Dictionary<GalImageFormat, ImageDescriptor> s_ImageTable =
+ new Dictionary<GalImageFormat, ImageDescriptor>()
+ {
+ { GalImageFormat.RGBA32, new ImageDescriptor(16, 1, 1, TargetBuffer.Color) },
+ { GalImageFormat.RGBA16, new ImageDescriptor(8, 1, 1, TargetBuffer.Color) },
+ { GalImageFormat.RG32, new ImageDescriptor(8, 1, 1, TargetBuffer.Color) },
+ { GalImageFormat.RGBX8, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) },
+ { GalImageFormat.RGBA8, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) },
+ { GalImageFormat.BGRA8, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) },
+ { GalImageFormat.RGB10A2, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) },
+ { GalImageFormat.R32, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) },
+ { GalImageFormat.RGBA4, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) },
+ { GalImageFormat.BptcSfloat, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) },
+ { GalImageFormat.BptcUfloat, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) },
+ { GalImageFormat.BGR5A1, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) },
+ { GalImageFormat.RGB5A1, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) },
+ { GalImageFormat.RGB565, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) },
+ { GalImageFormat.BptcUnorm, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) },
+ { GalImageFormat.RG16, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) },
+ { GalImageFormat.RG8, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) },
+ { GalImageFormat.R16, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) },
+ { GalImageFormat.R8, new ImageDescriptor(1, 1, 1, TargetBuffer.Color) },
+ { GalImageFormat.R11G11B10, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) },
+ { GalImageFormat.BC1, new ImageDescriptor(8, 4, 4, TargetBuffer.Color) },
+ { GalImageFormat.BC2, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) },
+ { GalImageFormat.BC3, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) },
+ { GalImageFormat.BC4, new ImageDescriptor(8, 4, 4, TargetBuffer.Color) },
+ { GalImageFormat.BC5, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) },
+ { GalImageFormat.Astc2D4x4, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) },
+ { GalImageFormat.Astc2D5x5, new ImageDescriptor(16, 5, 5, TargetBuffer.Color) },
+ { GalImageFormat.Astc2D6x6, new ImageDescriptor(16, 6, 6, TargetBuffer.Color) },
+ { GalImageFormat.Astc2D8x8, new ImageDescriptor(16, 8, 8, TargetBuffer.Color) },
+ { GalImageFormat.Astc2D10x10, new ImageDescriptor(16, 10, 10, TargetBuffer.Color) },
+ { GalImageFormat.Astc2D12x12, new ImageDescriptor(16, 12, 12, TargetBuffer.Color) },
+ { GalImageFormat.Astc2D5x4, new ImageDescriptor(16, 5, 4, TargetBuffer.Color) },
+ { GalImageFormat.Astc2D6x5, new ImageDescriptor(16, 6, 5, TargetBuffer.Color) },
+ { GalImageFormat.Astc2D8x6, new ImageDescriptor(16, 8, 6, TargetBuffer.Color) },
+ { GalImageFormat.Astc2D10x8, new ImageDescriptor(16, 10, 8, TargetBuffer.Color) },
+ { GalImageFormat.Astc2D12x10, new ImageDescriptor(16, 12, 10, TargetBuffer.Color) },
+ { GalImageFormat.Astc2D8x5, new ImageDescriptor(16, 8, 5, TargetBuffer.Color) },
+ { GalImageFormat.Astc2D10x5, new ImageDescriptor(16, 10, 5, TargetBuffer.Color) },
+ { GalImageFormat.Astc2D10x6, new ImageDescriptor(16, 10, 6, TargetBuffer.Color) },
+
+ { GalImageFormat.D16, new ImageDescriptor(2, 1, 1, TargetBuffer.Depth) },
+ { GalImageFormat.D24, new ImageDescriptor(4, 1, 1, TargetBuffer.Depth) },
+ { GalImageFormat.D24S8, new ImageDescriptor(4, 1, 1, TargetBuffer.DepthStencil) },
+ { GalImageFormat.D32, new ImageDescriptor(4, 1, 1, TargetBuffer.Depth) },
+ { GalImageFormat.D32S8, new ImageDescriptor(8, 1, 1, TargetBuffer.DepthStencil) }
+ };
+
+ public static GalImageFormat ConvertTexture(
+ GalTextureFormat Format,
+ GalTextureType RType,
+ GalTextureType GType,
+ GalTextureType BType,
+ GalTextureType AType,
+ bool ConvSrgb)
+ {
+ if (!s_TextureTable.TryGetValue(Format, out GalImageFormat ImageFormat))
+ {
+ throw new NotImplementedException($"Format 0x{((int)Format):x} not implemented!");
+ }
+
+ if (!HasDepth(ImageFormat) && (RType != GType || RType != BType || RType != AType))
+ {
+ throw new NotImplementedException($"Per component types are not implemented!");
+ }
+
+ GalImageFormat FormatType = ConvSrgb ? Srgb : GetFormatType(RType);
+
+ GalImageFormat CombinedFormat = (ImageFormat & GalImageFormat.FormatMask) | FormatType;
+
+ if (!ImageFormat.HasFlag(FormatType))
+ {
+ throw new NotImplementedException($"Format \"{CombinedFormat}\" not implemented!");
+ }
+
+ return CombinedFormat;
+ }
+
+ public static GalImageFormat ConvertSurface(GalSurfaceFormat Format)
+ {
+ switch (Format)
+ {
+ case GalSurfaceFormat.RGBA32Float: return GalImageFormat.RGBA32 | Float;
+ case GalSurfaceFormat.RGBA32Uint: return GalImageFormat.RGBA32 | Uint;
+ case GalSurfaceFormat.RGBA16Float: return GalImageFormat.RGBA16 | Float;
+ case GalSurfaceFormat.RGBA16Unorm: return GalImageFormat.RGBA16 | Unorm;
+ case GalSurfaceFormat.RG32Float: return GalImageFormat.RG32 | Float;
+ case GalSurfaceFormat.RG32Sint: return GalImageFormat.RG32 | Sint;
+ case GalSurfaceFormat.RG32Uint: return GalImageFormat.RG32 | Uint;
+ case GalSurfaceFormat.BGRA8Unorm: return GalImageFormat.BGRA8 | Unorm;
+ case GalSurfaceFormat.BGRA8Srgb: return GalImageFormat.BGRA8 | Srgb;
+ case GalSurfaceFormat.RGB10A2Unorm: return GalImageFormat.RGB10A2 | Unorm;
+ case GalSurfaceFormat.RGBA8Unorm: return GalImageFormat.RGBA8 | Unorm;
+ case GalSurfaceFormat.RGBA8Srgb: return GalImageFormat.RGBA8 | Srgb;
+ case GalSurfaceFormat.RGBA8Snorm: return GalImageFormat.RGBA8 | Snorm;
+ case GalSurfaceFormat.RG16Snorm: return GalImageFormat.RG16 | Snorm;
+ case GalSurfaceFormat.RG16Unorm: return GalImageFormat.RG16 | Unorm;
+ case GalSurfaceFormat.RG16Float: return GalImageFormat.RG16 | Float;
+ case GalSurfaceFormat.R11G11B10Float: return GalImageFormat.R11G11B10 | Float;
+ case GalSurfaceFormat.R32Float: return GalImageFormat.R32 | Float;
+ case GalSurfaceFormat.R32Uint: return GalImageFormat.R32 | Uint;
+ case GalSurfaceFormat.RG8Unorm: return GalImageFormat.RG8 | Unorm;
+ case GalSurfaceFormat.RG8Snorm: return GalImageFormat.RG8 | Snorm;
+ case GalSurfaceFormat.R16Float: return GalImageFormat.R16 | Float;
+ case GalSurfaceFormat.R16Unorm: return GalImageFormat.R16 | Unorm;
+ case GalSurfaceFormat.R16Uint: return GalImageFormat.R16 | Uint;
+ case GalSurfaceFormat.R8Unorm: return GalImageFormat.R8 | Unorm;
+ case GalSurfaceFormat.R8Uint: return GalImageFormat.R8 | Uint;
+ case GalSurfaceFormat.B5G6R5Unorm: return GalImageFormat.RGB565 | Unorm;
+ case GalSurfaceFormat.BGR5A1Unorm: return GalImageFormat.BGR5A1 | Unorm;
+ case GalSurfaceFormat.RGBX8Unorm: return GalImageFormat.RGBX8 | Unorm;
+ }
+
+ throw new NotImplementedException(Format.ToString());
+ }
+
+ public static GalImageFormat ConvertZeta(GalZetaFormat Format)
+ {
+ switch (Format)
+ {
+ case GalZetaFormat.D32Float: return GalImageFormat.D32 | Float;
+ case GalZetaFormat.S8D24Unorm: return GalImageFormat.D24S8 | Unorm;
+ case GalZetaFormat.D16Unorm: return GalImageFormat.D16 | Unorm;
+ case GalZetaFormat.D24X8Unorm: return GalImageFormat.D24 | Unorm;
+ case GalZetaFormat.D24S8Unorm: return GalImageFormat.D24S8 | Unorm;
+ case GalZetaFormat.D32S8X24Float: return GalImageFormat.D32S8 | Float;
+ }
+
+ throw new NotImplementedException(Format.ToString());
+ }
+
+ public static byte[] ReadTexture(IMemory Memory, GalImage Image, long Position)
+ {
+ MemoryManager CpuMemory;
+
+ if (Memory is NvGpuVmm Vmm)
+ {
+ CpuMemory = Vmm.Memory;
+ }
+ else
+ {
+ CpuMemory = (MemoryManager)Memory;
+ }
+
+ ISwizzle Swizzle = TextureHelper.GetSwizzle(Image);
+
+ ImageDescriptor Desc = GetImageDescriptor(Image.Format);
+
+ (int Width, int Height) = GetImageSizeInBlocks(Image);
+
+ int BytesPerPixel = Desc.BytesPerPixel;
+
+ //Note: Each row of the texture needs to be aligned to 4 bytes.
+ int Pitch = (Width * BytesPerPixel + 3) & ~3;
+
+ byte[] Data = new byte[Height * Pitch];
+
+ for (int Y = 0; Y < Height; Y++)
+ {
+ int OutOffs = Y * Pitch;
+
+ for (int X = 0; X < Width; X++)
+ {
+ long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y);
+
+ CpuMemory.ReadBytes(Position + Offset, Data, OutOffs, BytesPerPixel);
+
+ OutOffs += BytesPerPixel;
+ }
+ }
+
+ return Data;
+ }
+
+ public static void WriteTexture(NvGpuVmm Vmm, GalImage Image, long Position, byte[] Data)
+ {
+ ISwizzle Swizzle = TextureHelper.GetSwizzle(Image);
+
+ ImageDescriptor Desc = GetImageDescriptor(Image.Format);
+
+ (int Width, int Height) = ImageUtils.GetImageSizeInBlocks(Image);
+
+ int BytesPerPixel = Desc.BytesPerPixel;
+
+ int InOffs = 0;
+
+ for (int Y = 0; Y < Height; Y++)
+ for (int X = 0; X < Width; X++)
+ {
+ long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y);
+
+ Vmm.Memory.WriteBytes(Position + Offset, Data, InOffs, BytesPerPixel);
+
+ InOffs += BytesPerPixel;
+ }
+ }
+
+ public static bool CopyTexture(
+ NvGpuVmm Vmm,
+ GalImage SrcImage,
+ GalImage DstImage,
+ long SrcAddress,
+ long DstAddress,
+ int SrcX,
+ int SrcY,
+ int DstX,
+ int DstY,
+ int Width,
+ int Height)
+ {
+ ISwizzle SrcSwizzle = TextureHelper.GetSwizzle(SrcImage);
+ ISwizzle DstSwizzle = TextureHelper.GetSwizzle(DstImage);
+
+ ImageDescriptor Desc = GetImageDescriptor(SrcImage.Format);
+
+ if (GetImageDescriptor(DstImage.Format).BytesPerPixel != Desc.BytesPerPixel)
+ {
+ return false;
+ }
+
+ int BytesPerPixel = Desc.BytesPerPixel;
+
+ for (int Y = 0; Y < Height; Y++)
+ for (int X = 0; X < Width; X++)
+ {
+ long SrcOffset = (uint)SrcSwizzle.GetSwizzleOffset(SrcX + X, SrcY + Y);
+ long DstOffset = (uint)DstSwizzle.GetSwizzleOffset(DstX + X, DstY + Y);
+
+ byte[] Texel = Vmm.ReadBytes(SrcAddress + SrcOffset, BytesPerPixel);
+
+ Vmm.WriteBytes(DstAddress + DstOffset, Texel);
+ }
+
+ return true;
+ }
+
+ public static int GetSize(GalImage Image)
+ {
+ ImageDescriptor Desc = GetImageDescriptor(Image.Format);
+
+ int Width = DivRoundUp(Image.Width, Desc.BlockWidth);
+ int Height = DivRoundUp(Image.Height, Desc.BlockHeight);
+
+ return Desc.BytesPerPixel * Width * Height;
+ }
+
+ public static int GetPitch(GalImageFormat Format, int Width)
+ {
+ ImageDescriptor Desc = GetImageDescriptor(Format);
+
+ int Pitch = Desc.BytesPerPixel * DivRoundUp(Width, Desc.BlockWidth);
+
+ Pitch = (Pitch + 0x1f) & ~0x1f;
+
+ return Pitch;
+ }
+
+ public static int GetBlockWidth(GalImageFormat Format)
+ {
+ return GetImageDescriptor(Format).BlockWidth;
+ }
+
+ public static int GetBlockHeight(GalImageFormat Format)
+ {
+ return GetImageDescriptor(Format).BlockHeight;
+ }
+
+ public static int GetAlignedWidth(GalImage Image)
+ {
+ ImageDescriptor Desc = GetImageDescriptor(Image.Format);
+
+ int AlignMask;
+
+ if (Image.Layout == GalMemoryLayout.BlockLinear)
+ {
+ AlignMask = Image.TileWidth * (64 / Desc.BytesPerPixel) - 1;
+ }
+ else
+ {
+ AlignMask = (32 / Desc.BytesPerPixel) - 1;
+ }
+
+ return (Image.Width + AlignMask) & ~AlignMask;
+ }
+
+ public static (int Width, int Height) GetImageSizeInBlocks(GalImage Image)
+ {
+ ImageDescriptor Desc = GetImageDescriptor(Image.Format);
+
+ return (DivRoundUp(Image.Width, Desc.BlockWidth),
+ DivRoundUp(Image.Height, Desc.BlockHeight));
+ }
+
+ public static int GetBytesPerPixel(GalImageFormat Format)
+ {
+ return GetImageDescriptor(Format).BytesPerPixel;
+ }
+
+ private static int DivRoundUp(int LHS, int RHS)
+ {
+ return (LHS + (RHS - 1)) / RHS;
+ }
+
+ public static bool HasColor(GalImageFormat Format)
+ {
+ return (GetImageDescriptor(Format).Target & TargetBuffer.Color) != 0;
+ }
+
+ public static bool HasDepth(GalImageFormat Format)
+ {
+ return (GetImageDescriptor(Format).Target & TargetBuffer.Depth) != 0;
+ }
+
+ public static bool HasStencil(GalImageFormat Format)
+ {
+ return (GetImageDescriptor(Format).Target & TargetBuffer.Stencil) != 0;
+ }
+
+ public static bool IsCompressed(GalImageFormat Format)
+ {
+ ImageDescriptor Desc = GetImageDescriptor(Format);
+
+ return (Desc.BlockWidth | Desc.BlockHeight) != 1;
+ }
+
+ private static ImageDescriptor GetImageDescriptor(GalImageFormat Format)
+ {
+ GalImageFormat PixelFormat = Format & GalImageFormat.FormatMask;
+
+ if (s_ImageTable.TryGetValue(PixelFormat, out ImageDescriptor Descriptor))
+ {
+ return Descriptor;
+ }
+
+ throw new NotImplementedException($"Format \"{PixelFormat}\" not implemented!");
+ }
+
+ private static GalImageFormat GetFormatType(GalTextureType Type)
+ {
+ switch (Type)
+ {
+ case GalTextureType.Snorm: return Snorm;
+ case GalTextureType.Unorm: return Unorm;
+ case GalTextureType.Sint: return Sint;
+ case GalTextureType.Uint: return Uint;
+ case GalTextureType.Float: return Float;
+
+ default: throw new NotImplementedException(((int)Type).ToString());
+ }
+ }
+ }
+}
diff --git a/Ryujinx.Graphics/Graphics3d/Texture/IntegerEncoded.cs b/Ryujinx.Graphics/Graphics3d/Texture/IntegerEncoded.cs
new file mode 100644
index 00000000..683cb770
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/Texture/IntegerEncoded.cs
@@ -0,0 +1,269 @@
+using System.Collections;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Texture
+{
+ public struct IntegerEncoded
+ {
+ public enum EIntegerEncoding
+ {
+ JustBits,
+ Quint,
+ Trit
+ }
+
+ EIntegerEncoding Encoding;
+ public int NumberBits { get; private set; }
+ public int BitValue { get; private set; }
+ public int TritValue { get; private set; }
+ public int QuintValue { get; private set; }
+
+ public IntegerEncoded(EIntegerEncoding _Encoding, int NumBits)
+ {
+ Encoding = _Encoding;
+ NumberBits = NumBits;
+ BitValue = 0;
+ TritValue = 0;
+ QuintValue = 0;
+ }
+
+ public bool MatchesEncoding(IntegerEncoded Other)
+ {
+ return Encoding == Other.Encoding && NumberBits == Other.NumberBits;
+ }
+
+ public EIntegerEncoding GetEncoding()
+ {
+ return Encoding;
+ }
+
+ public int GetBitLength(int NumberVals)
+ {
+ int TotalBits = NumberBits * NumberVals;
+ if (Encoding == EIntegerEncoding.Trit)
+ {
+ TotalBits += (NumberVals * 8 + 4) / 5;
+ }
+ else if (Encoding == EIntegerEncoding.Quint)
+ {
+ TotalBits += (NumberVals * 7 + 2) / 3;
+ }
+ return TotalBits;
+ }
+
+ public static IntegerEncoded CreateEncoding(int MaxVal)
+ {
+ while (MaxVal > 0)
+ {
+ int Check = MaxVal + 1;
+
+ // Is maxVal a power of two?
+ if ((Check & (Check - 1)) == 0)
+ {
+ return new IntegerEncoded(EIntegerEncoding.JustBits, BitArrayStream.PopCnt(MaxVal));
+ }
+
+ // Is maxVal of the type 3*2^n - 1?
+ if ((Check % 3 == 0) && ((Check / 3) & ((Check / 3) - 1)) == 0)
+ {
+ return new IntegerEncoded(EIntegerEncoding.Trit, BitArrayStream.PopCnt(Check / 3 - 1));
+ }
+
+ // Is maxVal of the type 5*2^n - 1?
+ if ((Check % 5 == 0) && ((Check / 5) & ((Check / 5) - 1)) == 0)
+ {
+ return new IntegerEncoded(EIntegerEncoding.Quint, BitArrayStream.PopCnt(Check / 5 - 1));
+ }
+
+ // Apparently it can't be represented with a bounded integer sequence...
+ // just iterate.
+ MaxVal--;
+ }
+
+ return new IntegerEncoded(EIntegerEncoding.JustBits, 0);
+ }
+
+ public static void DecodeTritBlock(
+ BitArrayStream BitStream,
+ List<IntegerEncoded> ListIntegerEncoded,
+ int NumberBitsPerValue)
+ {
+ // Implement the algorithm in section C.2.12
+ int[] m = new int[5];
+ int[] t = new int[5];
+ int T;
+
+ // Read the trit encoded block according to
+ // table C.2.14
+ m[0] = BitStream.ReadBits(NumberBitsPerValue);
+ T = BitStream.ReadBits(2);
+ m[1] = BitStream.ReadBits(NumberBitsPerValue);
+ T |= BitStream.ReadBits(2) << 2;
+ m[2] = BitStream.ReadBits(NumberBitsPerValue);
+ T |= BitStream.ReadBits(1) << 4;
+ m[3] = BitStream.ReadBits(NumberBitsPerValue);
+ T |= BitStream.ReadBits(2) << 5;
+ m[4] = BitStream.ReadBits(NumberBitsPerValue);
+ T |= BitStream.ReadBits(1) << 7;
+
+ int C = 0;
+
+ BitArrayStream Tb = new BitArrayStream(new BitArray(new int[] { T }));
+ if (Tb.ReadBits(2, 4) == 7)
+ {
+ C = (Tb.ReadBits(5, 7) << 2) | Tb.ReadBits(0, 1);
+ t[4] = t[3] = 2;
+ }
+ else
+ {
+ C = Tb.ReadBits(0, 4);
+ if (Tb.ReadBits(5, 6) == 3)
+ {
+ t[4] = 2;
+ t[3] = Tb.ReadBit(7);
+ }
+ else
+ {
+ t[4] = Tb.ReadBit(7);
+ t[3] = Tb.ReadBits(5, 6);
+ }
+ }
+
+ BitArrayStream Cb = new BitArrayStream(new BitArray(new int[] { C }));
+ if (Cb.ReadBits(0, 1) == 3)
+ {
+ t[2] = 2;
+ t[1] = Cb.ReadBit(4);
+ t[0] = (Cb.ReadBit(3) << 1) | (Cb.ReadBit(2) & ~Cb.ReadBit(3));
+ }
+ else if (Cb.ReadBits(2, 3) == 3)
+ {
+ t[2] = 2;
+ t[1] = 2;
+ t[0] = Cb.ReadBits(0, 1);
+ }
+ else
+ {
+ t[2] = Cb.ReadBit(4);
+ t[1] = Cb.ReadBits(2, 3);
+ t[0] = (Cb.ReadBit(1) << 1) | (Cb.ReadBit(0) & ~Cb.ReadBit(1));
+ }
+
+ for (int i = 0; i < 5; i++)
+ {
+ IntegerEncoded IntEncoded = new IntegerEncoded(EIntegerEncoding.Trit, NumberBitsPerValue)
+ {
+ BitValue = m[i],
+ TritValue = t[i]
+ };
+ ListIntegerEncoded.Add(IntEncoded);
+ }
+ }
+
+ public static void DecodeQuintBlock(
+ BitArrayStream BitStream,
+ List<IntegerEncoded> ListIntegerEncoded,
+ int NumberBitsPerValue)
+ {
+ // Implement the algorithm in section C.2.12
+ int[] m = new int[3];
+ int[] q = new int[3];
+ int Q;
+
+ // Read the trit encoded block according to
+ // table C.2.15
+ m[0] = BitStream.ReadBits(NumberBitsPerValue);
+ Q = BitStream.ReadBits(3);
+ m[1] = BitStream.ReadBits(NumberBitsPerValue);
+ Q |= BitStream.ReadBits(2) << 3;
+ m[2] = BitStream.ReadBits(NumberBitsPerValue);
+ Q |= BitStream.ReadBits(2) << 5;
+
+ BitArrayStream Qb = new BitArrayStream(new BitArray(new int[] { Q }));
+ if (Qb.ReadBits(1, 2) == 3 && Qb.ReadBits(5, 6) == 0)
+ {
+ q[0] = q[1] = 4;
+ q[2] = (Qb.ReadBit(0) << 2) | ((Qb.ReadBit(4) & ~Qb.ReadBit(0)) << 1) | (Qb.ReadBit(3) & ~Qb.ReadBit(0));
+ }
+ else
+ {
+ int C = 0;
+ if (Qb.ReadBits(1, 2) == 3)
+ {
+ q[2] = 4;
+ C = (Qb.ReadBits(3, 4) << 3) | ((~Qb.ReadBits(5, 6) & 3) << 1) | Qb.ReadBit(0);
+ }
+ else
+ {
+ q[2] = Qb.ReadBits(5, 6);
+ C = Qb.ReadBits(0, 4);
+ }
+
+ BitArrayStream Cb = new BitArrayStream(new BitArray(new int[] { C }));
+ if (Cb.ReadBits(0, 2) == 5)
+ {
+ q[1] = 4;
+ q[0] = Cb.ReadBits(3, 4);
+ }
+ else
+ {
+ q[1] = Cb.ReadBits(3, 4);
+ q[0] = Cb.ReadBits(0, 2);
+ }
+ }
+
+ for (int i = 0; i < 3; i++)
+ {
+ IntegerEncoded IntEncoded = new IntegerEncoded(EIntegerEncoding.Quint, NumberBitsPerValue)
+ {
+ BitValue = m[i],
+ QuintValue = q[i]
+ };
+ ListIntegerEncoded.Add(IntEncoded);
+ }
+ }
+
+ public static void DecodeIntegerSequence(
+ List<IntegerEncoded> DecodeIntegerSequence,
+ BitArrayStream BitStream,
+ int MaxRange,
+ int NumberValues)
+ {
+ // Determine encoding parameters
+ IntegerEncoded IntEncoded = CreateEncoding(MaxRange);
+
+ // Start decoding
+ int NumberValuesDecoded = 0;
+ while (NumberValuesDecoded < NumberValues)
+ {
+ switch (IntEncoded.GetEncoding())
+ {
+ case EIntegerEncoding.Quint:
+ {
+ DecodeQuintBlock(BitStream, DecodeIntegerSequence, IntEncoded.NumberBits);
+ NumberValuesDecoded += 3;
+
+ break;
+ }
+
+ case EIntegerEncoding.Trit:
+ {
+ DecodeTritBlock(BitStream, DecodeIntegerSequence, IntEncoded.NumberBits);
+ NumberValuesDecoded += 5;
+
+ break;
+ }
+
+ case EIntegerEncoding.JustBits:
+ {
+ IntEncoded.BitValue = BitStream.ReadBits(IntEncoded.NumberBits);
+ DecodeIntegerSequence.Add(IntEncoded);
+ NumberValuesDecoded++;
+
+ break;
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/Ryujinx.Graphics/Graphics3d/Texture/LinearSwizzle.cs b/Ryujinx.Graphics/Graphics3d/Texture/LinearSwizzle.cs
new file mode 100644
index 00000000..ef468e27
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/Texture/LinearSwizzle.cs
@@ -0,0 +1,19 @@
+namespace Ryujinx.Graphics.Texture
+{
+ class LinearSwizzle : ISwizzle
+ {
+ private int Pitch;
+ private int Bpp;
+
+ public LinearSwizzle(int Pitch, int Bpp)
+ {
+ this.Pitch = Pitch;
+ this.Bpp = Bpp;
+ }
+
+ public int GetSwizzleOffset(int X, int Y)
+ {
+ return X * Bpp + Y * Pitch;
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Graphics3d/Texture/TextureFactory.cs b/Ryujinx.Graphics/Graphics3d/Texture/TextureFactory.cs
new file mode 100644
index 00000000..1f2d625e
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/Texture/TextureFactory.cs
@@ -0,0 +1,117 @@
+using Ryujinx.Graphics.Gal;
+using Ryujinx.Graphics.Memory;
+using System;
+
+namespace Ryujinx.Graphics.Texture
+{
+ static class TextureFactory
+ {
+ public static GalImage MakeTexture(NvGpuVmm Vmm, long TicPosition)
+ {
+ int[] Tic = ReadWords(Vmm, TicPosition, 8);
+
+ GalImageFormat Format = GetImageFormat(Tic);
+
+ GalTextureSource XSource = (GalTextureSource)((Tic[0] >> 19) & 7);
+ GalTextureSource YSource = (GalTextureSource)((Tic[0] >> 22) & 7);
+ GalTextureSource ZSource = (GalTextureSource)((Tic[0] >> 25) & 7);
+ GalTextureSource WSource = (GalTextureSource)((Tic[0] >> 28) & 7);
+
+ TextureSwizzle Swizzle = (TextureSwizzle)((Tic[2] >> 21) & 7);
+
+ GalMemoryLayout Layout;
+
+ if (Swizzle == TextureSwizzle.BlockLinear ||
+ Swizzle == TextureSwizzle.BlockLinearColorKey)
+ {
+ Layout = GalMemoryLayout.BlockLinear;
+ }
+ else
+ {
+ Layout = GalMemoryLayout.Pitch;
+ }
+
+ int BlockHeightLog2 = (Tic[3] >> 3) & 7;
+ int TileWidthLog2 = (Tic[3] >> 10) & 7;
+
+ int BlockHeight = 1 << BlockHeightLog2;
+ int TileWidth = 1 << TileWidthLog2;
+
+ int Width = (Tic[4] & 0xffff) + 1;
+ int Height = (Tic[5] & 0xffff) + 1;
+
+ GalImage Image = new GalImage(
+ Width,
+ Height,
+ TileWidth,
+ BlockHeight,
+ Layout,
+ Format,
+ XSource,
+ YSource,
+ ZSource,
+ WSource);
+
+ if (Layout == GalMemoryLayout.Pitch)
+ {
+ Image.Pitch = (Tic[3] & 0xffff) << 5;
+ }
+
+ return Image;
+ }
+
+ public static GalTextureSampler MakeSampler(NvGpu Gpu, NvGpuVmm Vmm, long TscPosition)
+ {
+ int[] Tsc = ReadWords(Vmm, TscPosition, 8);
+
+ GalTextureWrap AddressU = (GalTextureWrap)((Tsc[0] >> 0) & 7);
+ GalTextureWrap AddressV = (GalTextureWrap)((Tsc[0] >> 3) & 7);
+ GalTextureWrap AddressP = (GalTextureWrap)((Tsc[0] >> 6) & 7);
+
+ GalTextureFilter MagFilter = (GalTextureFilter) ((Tsc[1] >> 0) & 3);
+ GalTextureFilter MinFilter = (GalTextureFilter) ((Tsc[1] >> 4) & 3);
+ GalTextureMipFilter MipFilter = (GalTextureMipFilter)((Tsc[1] >> 6) & 3);
+
+ GalColorF BorderColor = new GalColorF(
+ BitConverter.Int32BitsToSingle(Tsc[4]),
+ BitConverter.Int32BitsToSingle(Tsc[5]),
+ BitConverter.Int32BitsToSingle(Tsc[6]),
+ BitConverter.Int32BitsToSingle(Tsc[7]));
+
+ return new GalTextureSampler(
+ AddressU,
+ AddressV,
+ AddressP,
+ MinFilter,
+ MagFilter,
+ MipFilter,
+ BorderColor);
+ }
+
+ private static GalImageFormat GetImageFormat(int[] Tic)
+ {
+ GalTextureType RType = (GalTextureType)((Tic[0] >> 7) & 7);
+ GalTextureType GType = (GalTextureType)((Tic[0] >> 10) & 7);
+ GalTextureType BType = (GalTextureType)((Tic[0] >> 13) & 7);
+ GalTextureType AType = (GalTextureType)((Tic[0] >> 16) & 7);
+
+ GalTextureFormat Format = (GalTextureFormat)(Tic[0] & 0x7f);
+
+ bool ConvSrgb = ((Tic[4] >> 22) & 1) != 0;
+
+ return ImageUtils.ConvertTexture(Format, RType, GType, BType, AType, ConvSrgb);
+ }
+
+ private static int[] ReadWords(NvGpuVmm Vmm, long Position, int Count)
+ {
+ int[] Words = new int[Count];
+
+ for (int Index = 0; Index < Count; Index++, Position += 4)
+ {
+ Words[Index] = Vmm.ReadInt32(Position);
+ }
+
+ return Words;
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs b/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs
new file mode 100644
index 00000000..6ac91d8b
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs
@@ -0,0 +1,42 @@
+using ChocolArm64.Memory;
+using Ryujinx.Graphics.Gal;
+using Ryujinx.Graphics.Memory;
+
+namespace Ryujinx.Graphics.Texture
+{
+ static class TextureHelper
+ {
+ public static ISwizzle GetSwizzle(GalImage Image)
+ {
+ int BlockWidth = ImageUtils.GetBlockWidth (Image.Format);
+ int BytesPerPixel = ImageUtils.GetBytesPerPixel(Image.Format);
+
+ int Width = (Image.Width + (BlockWidth - 1)) / BlockWidth;
+
+ if (Image.Layout == GalMemoryLayout.BlockLinear)
+ {
+ int AlignMask = Image.TileWidth * (64 / BytesPerPixel) - 1;
+
+ Width = (Width + AlignMask) & ~AlignMask;
+
+ return new BlockLinearSwizzle(Width, BytesPerPixel, Image.GobBlockHeight);
+ }
+ else
+ {
+ return new LinearSwizzle(Image.Pitch, BytesPerPixel);
+ }
+ }
+
+ public static (MemoryManager Memory, long Position) GetMemoryAndPosition(
+ IMemory Memory,
+ long Position)
+ {
+ if (Memory is NvGpuVmm Vmm)
+ {
+ return (Vmm.Memory, Vmm.GetPhysicalAddress(Position));
+ }
+
+ return ((MemoryManager)Memory, Position);
+ }
+ }
+}
diff --git a/Ryujinx.Graphics/Graphics3d/Texture/TextureSwizzle.cs b/Ryujinx.Graphics/Graphics3d/Texture/TextureSwizzle.cs
new file mode 100644
index 00000000..c67a5367
--- /dev/null
+++ b/Ryujinx.Graphics/Graphics3d/Texture/TextureSwizzle.cs
@@ -0,0 +1,11 @@
+namespace Ryujinx.Graphics.Texture
+{
+ public enum TextureSwizzle
+ {
+ _1dBuffer = 0,
+ PitchColorKey = 1,
+ Pitch = 2,
+ BlockLinear = 3,
+ BlockLinearColorKey = 4
+ }
+} \ No newline at end of file