diff options
| author | gdkchan <gab.dark.100@gmail.com> | 2018-12-03 00:38:47 -0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-12-03 00:38:47 -0200 |
| commit | c86aacde76b5f8e503e2b412385c8491ecc86b3b (patch) | |
| tree | 8e4737422fba15199c1a6ce7c6345996c0e907b5 /Ryujinx.Graphics/Graphics3d | |
| parent | ad00fd02442cf9c0f00c4562635738042b521efa (diff) | |
NVDEC implementation using FFmpeg (#443)
* Initial nvdec implementation using FFmpeg
* Fix swapped channels on the video decoder and the G8R8 texture format
* Fix texture samplers not being set properly (regression)
* Rebased
* Remove unused code introduced on the rebase
* Add support for RGBA8 output format on the video image composer
* Correct spacing
* Some fixes for rebase and other tweaks
* Allow size mismatch on frame copy
* Get rid of GetHostAddress calls on VDec
Diffstat (limited to 'Ryujinx.Graphics/Graphics3d')
25 files changed, 4971 insertions, 0 deletions
diff --git a/Ryujinx.Graphics/Graphics3d/INvGpuEngine.cs b/Ryujinx.Graphics/Graphics3d/INvGpuEngine.cs new file mode 100644 index 00000000..c2474a17 --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/INvGpuEngine.cs @@ -0,0 +1,11 @@ +using Ryujinx.Graphics.Memory; + +namespace Ryujinx.Graphics.Graphics3d +{ + interface INvGpuEngine + { + int[] Registers { get; } + + void CallMethod(NvGpuVmm Vmm, GpuMethodCall MethCall); + } +}
\ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/MacroInterpreter.cs b/Ryujinx.Graphics/Graphics3d/MacroInterpreter.cs new file mode 100644 index 00000000..a124aca4 --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/MacroInterpreter.cs @@ -0,0 +1,416 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.Memory; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Graphics3d +{ + class MacroInterpreter + { + private enum AssignmentOperation + { + IgnoreAndFetch = 0, + Move = 1, + MoveAndSetMaddr = 2, + FetchAndSend = 3, + MoveAndSend = 4, + FetchAndSetMaddr = 5, + MoveAndSetMaddrThenFetchAndSend = 6, + MoveAndSetMaddrThenSendHigh = 7 + } + + private enum AluOperation + { + AluReg = 0, + AddImmediate = 1, + BitfieldReplace = 2, + BitfieldExtractLslImm = 3, + BitfieldExtractLslReg = 4, + ReadImmediate = 5 + } + + private enum AluRegOperation + { + Add = 0, + AddWithCarry = 1, + Subtract = 2, + SubtractWithBorrow = 3, + BitwiseExclusiveOr = 8, + BitwiseOr = 9, + BitwiseAnd = 10, + BitwiseAndNot = 11, + BitwiseNotAnd = 12 + } + + private NvGpuFifo PFifo; + private INvGpuEngine Engine; + + public Queue<int> Fifo { get; private set; } + + private int[] Gprs; + + private int MethAddr; + private int MethIncr; + + private bool Carry; + + private int OpCode; + + private int PipeOp; + + private int Pc; + + public MacroInterpreter(NvGpuFifo PFifo, INvGpuEngine Engine) + { + this.PFifo = PFifo; + this.Engine = Engine; + + Fifo = new Queue<int>(); + + Gprs = new int[8]; + } + + public void Execute(NvGpuVmm Vmm, int[] Mme, int Position, int Param) + { + Reset(); + + Gprs[1] = Param; + + Pc = Position; + + FetchOpCode(Mme); + + while (Step(Vmm, Mme)); + + //Due to the delay slot, we still need to execute + //one more instruction before we actually exit. + Step(Vmm, Mme); + } + + private void Reset() + { + for (int Index = 0; Index < Gprs.Length; Index++) + { + Gprs[Index] = 0; + } + + MethAddr = 0; + MethIncr = 0; + + Carry = false; + } + + private bool Step(NvGpuVmm Vmm, int[] Mme) + { + int BaseAddr = Pc - 1; + + FetchOpCode(Mme); + + if ((OpCode & 7) < 7) + { + //Operation produces a value. + AssignmentOperation AsgOp = (AssignmentOperation)((OpCode >> 4) & 7); + + int Result = GetAluResult(); + + switch (AsgOp) + { + //Fetch parameter and ignore result. + case AssignmentOperation.IgnoreAndFetch: + { + SetDstGpr(FetchParam()); + + break; + } + + //Move result. + case AssignmentOperation.Move: + { + SetDstGpr(Result); + + break; + } + + //Move result and use as Method Address. + case AssignmentOperation.MoveAndSetMaddr: + { + SetDstGpr(Result); + + SetMethAddr(Result); + + break; + } + + //Fetch parameter and send result. + case AssignmentOperation.FetchAndSend: + { + SetDstGpr(FetchParam()); + + Send(Vmm, Result); + + break; + } + + //Move and send result. + case AssignmentOperation.MoveAndSend: + { + SetDstGpr(Result); + + Send(Vmm, Result); + + break; + } + + //Fetch parameter and use result as Method Address. + case AssignmentOperation.FetchAndSetMaddr: + { + SetDstGpr(FetchParam()); + + SetMethAddr(Result); + + break; + } + + //Move result and use as Method Address, then fetch and send paramter. + case AssignmentOperation.MoveAndSetMaddrThenFetchAndSend: + { + SetDstGpr(Result); + + SetMethAddr(Result); + + Send(Vmm, FetchParam()); + + break; + } + + //Move result and use as Method Address, then send bits 17:12 of result. + case AssignmentOperation.MoveAndSetMaddrThenSendHigh: + { + SetDstGpr(Result); + + SetMethAddr(Result); + + Send(Vmm, (Result >> 12) & 0x3f); + + break; + } + } + } + else + { + //Branch. + bool OnNotZero = ((OpCode >> 4) & 1) != 0; + + bool Taken = OnNotZero + ? GetGprA() != 0 + : GetGprA() == 0; + + if (Taken) + { + Pc = BaseAddr + GetImm(); + + bool NoDelays = (OpCode & 0x20) != 0; + + if (NoDelays) + { + FetchOpCode(Mme); + } + + return true; + } + } + + bool Exit = (OpCode & 0x80) != 0; + + return !Exit; + } + + private void FetchOpCode(int[] Mme) + { + OpCode = PipeOp; + + PipeOp = Mme[Pc++]; + } + + private int GetAluResult() + { + AluOperation Op = (AluOperation)(OpCode & 7); + + switch (Op) + { + case AluOperation.AluReg: + { + AluRegOperation AluOp = (AluRegOperation)((OpCode >> 17) & 0x1f); + + return GetAluResult(AluOp, GetGprA(), GetGprB()); + } + + case AluOperation.AddImmediate: + { + return GetGprA() + GetImm(); + } + + case AluOperation.BitfieldReplace: + case AluOperation.BitfieldExtractLslImm: + case AluOperation.BitfieldExtractLslReg: + { + int BfSrcBit = (OpCode >> 17) & 0x1f; + int BfSize = (OpCode >> 22) & 0x1f; + int BfDstBit = (OpCode >> 27) & 0x1f; + + int BfMask = (1 << BfSize) - 1; + + int Dst = GetGprA(); + int Src = GetGprB(); + + switch (Op) + { + case AluOperation.BitfieldReplace: + { + Src = (int)((uint)Src >> BfSrcBit) & BfMask; + + Dst &= ~(BfMask << BfDstBit); + + Dst |= Src << BfDstBit; + + return Dst; + } + + case AluOperation.BitfieldExtractLslImm: + { + Src = (int)((uint)Src >> Dst) & BfMask; + + return Src << BfDstBit; + } + + case AluOperation.BitfieldExtractLslReg: + { + Src = (int)((uint)Src >> BfSrcBit) & BfMask; + + return Src << Dst; + } + } + + break; + } + + case AluOperation.ReadImmediate: + { + return Read(GetGprA() + GetImm()); + } + } + + throw new ArgumentException(nameof(OpCode)); + } + + private int GetAluResult(AluRegOperation AluOp, int A, int B) + { + switch (AluOp) + { + case AluRegOperation.Add: + { + ulong Result = (ulong)A + (ulong)B; + + Carry = Result > 0xffffffff; + + return (int)Result; + } + + case AluRegOperation.AddWithCarry: + { + ulong Result = (ulong)A + (ulong)B + (Carry ? 1UL : 0UL); + + Carry = Result > 0xffffffff; + + return (int)Result; + } + + case AluRegOperation.Subtract: + { + ulong Result = (ulong)A - (ulong)B; + + Carry = Result < 0x100000000; + + return (int)Result; + } + + case AluRegOperation.SubtractWithBorrow: + { + ulong Result = (ulong)A - (ulong)B - (Carry ? 0UL : 1UL); + + Carry = Result < 0x100000000; + + return (int)Result; + } + + case AluRegOperation.BitwiseExclusiveOr: return A ^ B; + case AluRegOperation.BitwiseOr: return A | B; + case AluRegOperation.BitwiseAnd: return A & B; + case AluRegOperation.BitwiseAndNot: return A & ~B; + case AluRegOperation.BitwiseNotAnd: return ~(A & B); + } + + throw new ArgumentOutOfRangeException(nameof(AluOp)); + } + + private int GetImm() + { + //Note: The immediate is signed, the sign-extension is intended here. + return OpCode >> 14; + } + + private void SetMethAddr(int Value) + { + MethAddr = (Value >> 0) & 0xfff; + MethIncr = (Value >> 12) & 0x3f; + } + + private void SetDstGpr(int Value) + { + Gprs[(OpCode >> 8) & 7] = Value; + } + + private int GetGprA() + { + return GetGprValue((OpCode >> 11) & 7); + } + + private int GetGprB() + { + return GetGprValue((OpCode >> 14) & 7); + } + + private int GetGprValue(int Index) + { + return Index != 0 ? Gprs[Index] : 0; + } + + private int FetchParam() + { + int Value; + + if (!Fifo.TryDequeue(out Value)) + { + Logger.PrintWarning(LogClass.Gpu, "Macro attempted to fetch an inexistent argument."); + + return 0; + } + + return Value; + } + + private int Read(int Reg) + { + return Engine.Registers[Reg]; + } + + private void Send(NvGpuVmm Vmm, int Value) + { + GpuMethodCall MethCall = new GpuMethodCall(MethAddr, Value); + + Engine.CallMethod(Vmm, MethCall); + + MethAddr += MethIncr; + } + } +}
\ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngine.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngine.cs new file mode 100644 index 00000000..20c36fda --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngine.cs @@ -0,0 +1,11 @@ +namespace Ryujinx.Graphics.Graphics3d +{ + enum NvGpuEngine + { + _2d = 0x902d, + _3d = 0xb197, + Compute = 0xb1c0, + P2mf = 0xa140, + M2mf = 0xb0b5 + } +}
\ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngine2d.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngine2d.cs new file mode 100644 index 00000000..55e3ebd4 --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngine2d.cs @@ -0,0 +1,175 @@ +using Ryujinx.Graphics.Gal; +using Ryujinx.Graphics.Memory; +using Ryujinx.Graphics.Texture; + +namespace Ryujinx.Graphics.Graphics3d +{ + class NvGpuEngine2d : INvGpuEngine + { + private enum CopyOperation + { + SrcCopyAnd, + RopAnd, + Blend, + SrcCopy, + Rop, + SrcCopyPremult, + BlendPremult + } + + public int[] Registers { get; private set; } + + private NvGpu Gpu; + + public NvGpuEngine2d(NvGpu Gpu) + { + this.Gpu = Gpu; + + Registers = new int[0x238]; + } + + public void CallMethod(NvGpuVmm Vmm, GpuMethodCall MethCall) + { + WriteRegister(MethCall); + + if ((NvGpuEngine2dReg)MethCall.Method == NvGpuEngine2dReg.BlitSrcYInt) + { + TextureCopy(Vmm); + } + } + + private void TextureCopy(NvGpuVmm Vmm) + { + CopyOperation Operation = (CopyOperation)ReadRegister(NvGpuEngine2dReg.CopyOperation); + + int DstFormat = ReadRegister(NvGpuEngine2dReg.DstFormat); + bool DstLinear = ReadRegister(NvGpuEngine2dReg.DstLinear) != 0; + int DstWidth = ReadRegister(NvGpuEngine2dReg.DstWidth); + int DstHeight = ReadRegister(NvGpuEngine2dReg.DstHeight); + int DstPitch = ReadRegister(NvGpuEngine2dReg.DstPitch); + int DstBlkDim = ReadRegister(NvGpuEngine2dReg.DstBlockDimensions); + + int SrcFormat = ReadRegister(NvGpuEngine2dReg.SrcFormat); + bool SrcLinear = ReadRegister(NvGpuEngine2dReg.SrcLinear) != 0; + int SrcWidth = ReadRegister(NvGpuEngine2dReg.SrcWidth); + int SrcHeight = ReadRegister(NvGpuEngine2dReg.SrcHeight); + int SrcPitch = ReadRegister(NvGpuEngine2dReg.SrcPitch); + int SrcBlkDim = ReadRegister(NvGpuEngine2dReg.SrcBlockDimensions); + + int DstBlitX = ReadRegister(NvGpuEngine2dReg.BlitDstX); + int DstBlitY = ReadRegister(NvGpuEngine2dReg.BlitDstY); + int DstBlitW = ReadRegister(NvGpuEngine2dReg.BlitDstW); + int DstBlitH = ReadRegister(NvGpuEngine2dReg.BlitDstH); + + long BlitDuDx = ReadRegisterFixed1_31_32(NvGpuEngine2dReg.BlitDuDxFract); + long BlitDvDy = ReadRegisterFixed1_31_32(NvGpuEngine2dReg.BlitDvDyFract); + + long SrcBlitX = ReadRegisterFixed1_31_32(NvGpuEngine2dReg.BlitSrcXFract); + long SrcBlitY = ReadRegisterFixed1_31_32(NvGpuEngine2dReg.BlitSrcYFract); + + GalImageFormat SrcImgFormat = ImageUtils.ConvertSurface((GalSurfaceFormat)SrcFormat); + GalImageFormat DstImgFormat = ImageUtils.ConvertSurface((GalSurfaceFormat)DstFormat); + + GalMemoryLayout SrcLayout = GetLayout(SrcLinear); + GalMemoryLayout DstLayout = GetLayout(DstLinear); + + int SrcBlockHeight = 1 << ((SrcBlkDim >> 4) & 0xf); + int DstBlockHeight = 1 << ((DstBlkDim >> 4) & 0xf); + + long SrcAddress = MakeInt64From2xInt32(NvGpuEngine2dReg.SrcAddress); + long DstAddress = MakeInt64From2xInt32(NvGpuEngine2dReg.DstAddress); + + long SrcKey = Vmm.GetPhysicalAddress(SrcAddress); + long DstKey = Vmm.GetPhysicalAddress(DstAddress); + + GalImage SrcTexture = new GalImage( + SrcWidth, + SrcHeight, 1, + SrcBlockHeight, + SrcLayout, + SrcImgFormat); + + GalImage DstTexture = new GalImage( + DstWidth, + DstHeight, 1, + DstBlockHeight, + DstLayout, + DstImgFormat); + + SrcTexture.Pitch = SrcPitch; + DstTexture.Pitch = DstPitch; + + Gpu.ResourceManager.SendTexture(Vmm, SrcKey, SrcTexture); + Gpu.ResourceManager.SendTexture(Vmm, DstKey, DstTexture); + + int SrcBlitX1 = (int)(SrcBlitX >> 32); + int SrcBlitY1 = (int)(SrcBlitY >> 32); + + int SrcBlitX2 = (int)(SrcBlitX + DstBlitW * BlitDuDx >> 32); + int SrcBlitY2 = (int)(SrcBlitY + DstBlitH * BlitDvDy >> 32); + + Gpu.Renderer.RenderTarget.Copy( + SrcKey, + DstKey, + SrcBlitX1, + SrcBlitY1, + SrcBlitX2, + SrcBlitY2, + DstBlitX, + DstBlitY, + DstBlitX + DstBlitW, + DstBlitY + DstBlitH); + + //Do a guest side copy aswell. This is necessary when + //the texture is modified by the guest, however it doesn't + //work when resources that the gpu can write to are copied, + //like framebuffers. + ImageUtils.CopyTexture( + Vmm, + SrcTexture, + DstTexture, + SrcAddress, + DstAddress, + SrcBlitX1, + SrcBlitY1, + DstBlitX, + DstBlitY, + DstBlitW, + DstBlitH); + + Vmm.IsRegionModified(DstKey, ImageUtils.GetSize(DstTexture), NvGpuBufferType.Texture); + } + + private static GalMemoryLayout GetLayout(bool Linear) + { + return Linear + ? GalMemoryLayout.Pitch + : GalMemoryLayout.BlockLinear; + } + + private long MakeInt64From2xInt32(NvGpuEngine2dReg Reg) + { + return + (long)Registers[(int)Reg + 0] << 32 | + (uint)Registers[(int)Reg + 1]; + } + + private void WriteRegister(GpuMethodCall MethCall) + { + Registers[MethCall.Method] = MethCall.Argument; + } + + private long ReadRegisterFixed1_31_32(NvGpuEngine2dReg Reg) + { + long Low = (uint)ReadRegister(Reg + 0); + long High = (uint)ReadRegister(Reg + 1); + + return Low | (High << 32); + } + + private int ReadRegister(NvGpuEngine2dReg Reg) + { + return Registers[(int)Reg]; + } + } +}
\ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngine2dReg.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngine2dReg.cs new file mode 100644 index 00000000..c1c0dba2 --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngine2dReg.cs @@ -0,0 +1,39 @@ +namespace Ryujinx.Graphics.Graphics3d +{ + enum NvGpuEngine2dReg + { + DstFormat = 0x80, + DstLinear = 0x81, + DstBlockDimensions = 0x82, + DstDepth = 0x83, + DstLayer = 0x84, + DstPitch = 0x85, + DstWidth = 0x86, + DstHeight = 0x87, + DstAddress = 0x88, + SrcFormat = 0x8c, + SrcLinear = 0x8d, + SrcBlockDimensions = 0x8e, + SrcDepth = 0x8f, + SrcLayer = 0x90, + SrcPitch = 0x91, + SrcWidth = 0x92, + SrcHeight = 0x93, + SrcAddress = 0x94, + ClipEnable = 0xa4, + CopyOperation = 0xab, + BlitControl = 0x223, + BlitDstX = 0x22c, + BlitDstY = 0x22d, + BlitDstW = 0x22e, + BlitDstH = 0x22f, + BlitDuDxFract = 0x230, + BlitDuDxInt = 0x231, + BlitDvDyFract = 0x232, + BlitDvDyInt = 0x233, + BlitSrcXFract = 0x234, + BlitSrcXInt = 0x235, + BlitSrcYFract = 0x236, + BlitSrcYInt = 0x237 + } +}
\ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs new file mode 100644 index 00000000..6fb038ac --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs @@ -0,0 +1,1014 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Gal; +using Ryujinx.Graphics.Memory; +using Ryujinx.Graphics.Texture; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Graphics3d +{ + class NvGpuEngine3d : INvGpuEngine + { + public int[] Registers { get; private set; } + + private NvGpu Gpu; + + private Dictionary<int, NvGpuMethod> Methods; + + private struct ConstBuffer + { + public bool Enabled; + public long Position; + public int Size; + } + + private ConstBuffer[][] ConstBuffers; + + private int CurrentInstance = 0; + + public NvGpuEngine3d(NvGpu Gpu) + { + this.Gpu = Gpu; + + Registers = new int[0xe00]; + + Methods = new Dictionary<int, NvGpuMethod>(); + + void AddMethod(int Meth, int Count, int Stride, NvGpuMethod Method) + { + while (Count-- > 0) + { + Methods.Add(Meth, Method); + + Meth += Stride; + } + } + + AddMethod(0x585, 1, 1, VertexEndGl); + AddMethod(0x674, 1, 1, ClearBuffers); + AddMethod(0x6c3, 1, 1, QueryControl); + AddMethod(0x8e4, 16, 1, CbData); + AddMethod(0x904, 5, 8, CbBind); + + ConstBuffers = new ConstBuffer[6][]; + + for (int Index = 0; Index < ConstBuffers.Length; Index++) + { + ConstBuffers[Index] = new ConstBuffer[18]; + } + + //Ensure that all components are enabled by default. + //FIXME: Is this correct? + WriteRegister(NvGpuEngine3dReg.ColorMaskN, 0x1111); + + WriteRegister(NvGpuEngine3dReg.FrameBufferSrgb, 1); + + for (int Index = 0; Index < GalPipelineState.RenderTargetsCount; Index++) + { + WriteRegister(NvGpuEngine3dReg.IBlendNEquationRgb + Index * 8, (int)GalBlendEquation.FuncAdd); + WriteRegister(NvGpuEngine3dReg.IBlendNFuncSrcRgb + Index * 8, (int)GalBlendFactor.One); + WriteRegister(NvGpuEngine3dReg.IBlendNFuncDstRgb + Index * 8, (int)GalBlendFactor.Zero); + WriteRegister(NvGpuEngine3dReg.IBlendNEquationAlpha + Index * 8, (int)GalBlendEquation.FuncAdd); + WriteRegister(NvGpuEngine3dReg.IBlendNFuncSrcAlpha + Index * 8, (int)GalBlendFactor.One); + WriteRegister(NvGpuEngine3dReg.IBlendNFuncDstAlpha + Index * 8, (int)GalBlendFactor.Zero); + } + } + + public void CallMethod(NvGpuVmm Vmm, GpuMethodCall MethCall) + { + if (Methods.TryGetValue(MethCall.Method, out NvGpuMethod Method)) + { + Method(Vmm, MethCall); + } + else + { + WriteRegister(MethCall); + } + } + + private void VertexEndGl(NvGpuVmm Vmm, GpuMethodCall MethCall) + { + LockCaches(); + + GalPipelineState State = new GalPipelineState(); + + SetFrameBuffer(State); + SetFrontFace(State); + SetCullFace(State); + SetDepth(State); + SetStencil(State); + SetBlending(State); + SetColorMask(State); + SetPrimitiveRestart(State); + + for (int FbIndex = 0; FbIndex < 8; FbIndex++) + { + SetFrameBuffer(Vmm, FbIndex); + } + + SetZeta(Vmm); + + SetRenderTargets(); + + long[] Keys = UploadShaders(Vmm); + + Gpu.Renderer.Shader.BindProgram(); + + UploadTextures(Vmm, State, Keys); + UploadConstBuffers(Vmm, State, Keys); + UploadVertexArrays(Vmm, State); + + DispatchRender(Vmm, State); + + UnlockCaches(); + } + + private void LockCaches() + { + Gpu.Renderer.Buffer.LockCache(); + Gpu.Renderer.Rasterizer.LockCaches(); + Gpu.Renderer.Texture.LockCache(); + } + + private void UnlockCaches() + { + Gpu.Renderer.Buffer.UnlockCache(); + Gpu.Renderer.Rasterizer.UnlockCaches(); + Gpu.Renderer.Texture.UnlockCache(); + } + + private void ClearBuffers(NvGpuVmm Vmm, GpuMethodCall MethCall) + { + int Attachment = (MethCall.Argument >> 6) & 0xf; + + GalClearBufferFlags Flags = (GalClearBufferFlags)(MethCall.Argument & 0x3f); + + float Red = ReadRegisterFloat(NvGpuEngine3dReg.ClearNColor + 0); + float Green = ReadRegisterFloat(NvGpuEngine3dReg.ClearNColor + 1); + float Blue = ReadRegisterFloat(NvGpuEngine3dReg.ClearNColor + 2); + float Alpha = ReadRegisterFloat(NvGpuEngine3dReg.ClearNColor + 3); + + float Depth = ReadRegisterFloat(NvGpuEngine3dReg.ClearDepth); + + int Stencil = ReadRegister(NvGpuEngine3dReg.ClearStencil); + + SetFrameBuffer(Vmm, Attachment); + + SetZeta(Vmm); + + SetRenderTargets(); + + Gpu.Renderer.RenderTarget.Bind(); + + Gpu.Renderer.Rasterizer.ClearBuffers(Flags, Attachment, Red, Green, Blue, Alpha, Depth, Stencil); + + Gpu.Renderer.Pipeline.ResetDepthMask(); + Gpu.Renderer.Pipeline.ResetColorMask(Attachment); + } + + private void SetFrameBuffer(NvGpuVmm Vmm, int FbIndex) + { + long VA = MakeInt64From2xInt32(NvGpuEngine3dReg.FrameBufferNAddress + FbIndex * 0x10); + + int SurfFormat = ReadRegister(NvGpuEngine3dReg.FrameBufferNFormat + FbIndex * 0x10); + + if (VA == 0 || SurfFormat == 0) + { + Gpu.Renderer.RenderTarget.UnbindColor(FbIndex); + + return; + } + + long Key = Vmm.GetPhysicalAddress(VA); + + int Width = ReadRegister(NvGpuEngine3dReg.FrameBufferNWidth + FbIndex * 0x10); + int Height = ReadRegister(NvGpuEngine3dReg.FrameBufferNHeight + FbIndex * 0x10); + + int BlockDim = ReadRegister(NvGpuEngine3dReg.FrameBufferNBlockDim + FbIndex * 0x10); + + int GobBlockHeight = 1 << ((BlockDim >> 4) & 7); + + GalMemoryLayout Layout = (GalMemoryLayout)((BlockDim >> 12) & 1); + + float TX = ReadRegisterFloat(NvGpuEngine3dReg.ViewportNTranslateX + FbIndex * 8); + float TY = ReadRegisterFloat(NvGpuEngine3dReg.ViewportNTranslateY + FbIndex * 8); + + float SX = ReadRegisterFloat(NvGpuEngine3dReg.ViewportNScaleX + FbIndex * 8); + float SY = ReadRegisterFloat(NvGpuEngine3dReg.ViewportNScaleY + FbIndex * 8); + + int VpX = (int)MathF.Max(0, TX - MathF.Abs(SX)); + int VpY = (int)MathF.Max(0, TY - MathF.Abs(SY)); + + int VpW = (int)(TX + MathF.Abs(SX)) - VpX; + int VpH = (int)(TY + MathF.Abs(SY)) - VpY; + + GalImageFormat Format = ImageUtils.ConvertSurface((GalSurfaceFormat)SurfFormat); + + GalImage Image = new GalImage(Width, Height, 1, GobBlockHeight, Layout, Format); + + Gpu.ResourceManager.SendColorBuffer(Vmm, Key, FbIndex, Image); + + Gpu.Renderer.RenderTarget.SetViewport(FbIndex, VpX, VpY, VpW, VpH); + } + + private void SetFrameBuffer(GalPipelineState State) + { + State.FramebufferSrgb = ReadRegisterBool(NvGpuEngine3dReg.FrameBufferSrgb); + + State.FlipX = GetFlipSign(NvGpuEngine3dReg.ViewportNScaleX); + State.FlipY = GetFlipSign(NvGpuEngine3dReg.ViewportNScaleY); + + int ScreenYControl = ReadRegister(NvGpuEngine3dReg.ScreenYControl); + + bool NegateY = (ScreenYControl & 1) != 0; + + if (NegateY) + { + State.FlipY = -State.FlipY; + } + } + + private void SetZeta(NvGpuVmm Vmm) + { + long VA = MakeInt64From2xInt32(NvGpuEngine3dReg.ZetaAddress); + + int ZetaFormat = ReadRegister(NvGpuEngine3dReg.ZetaFormat); + + int BlockDim = ReadRegister(NvGpuEngine3dReg.ZetaBlockDimensions); + + int GobBlockHeight = 1 << ((BlockDim >> 4) & 7); + + GalMemoryLayout Layout = (GalMemoryLayout)((BlockDim >> 12) & 1); //? + + bool ZetaEnable = ReadRegisterBool(NvGpuEngine3dReg.ZetaEnable); + + if (VA == 0 || ZetaFormat == 0 || !ZetaEnable) + { + Gpu.Renderer.RenderTarget.UnbindZeta(); + + return; + } + + long Key = Vmm.GetPhysicalAddress(VA); + + int Width = ReadRegister(NvGpuEngine3dReg.ZetaHoriz); + int Height = ReadRegister(NvGpuEngine3dReg.ZetaVert); + + GalImageFormat Format = ImageUtils.ConvertZeta((GalZetaFormat)ZetaFormat); + + GalImage Image = new GalImage(Width, Height, 1, GobBlockHeight, Layout, Format); + + Gpu.ResourceManager.SendZetaBuffer(Vmm, Key, Image); + } + + private long[] UploadShaders(NvGpuVmm Vmm) + { + long[] Keys = new long[5]; + + long BasePosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress); + + int Index = 1; + + int VpAControl = ReadRegister(NvGpuEngine3dReg.ShaderNControl); + + bool VpAEnable = (VpAControl & 1) != 0; + + if (VpAEnable) + { + //Note: The maxwell supports 2 vertex programs, usually + //only VP B is used, but in some cases VP A is also used. + //In this case, it seems to function as an extra vertex + //shader stage. + //The graphics abstraction layer has a special overload for this + //case, which should merge the two shaders into one vertex shader. + int VpAOffset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset); + int VpBOffset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset + 0x10); + + long VpAPos = BasePosition + (uint)VpAOffset; + long VpBPos = BasePosition + (uint)VpBOffset; + + Keys[(int)GalShaderType.Vertex] = VpBPos; + + Gpu.Renderer.Shader.Create(Vmm, VpAPos, VpBPos, GalShaderType.Vertex); + Gpu.Renderer.Shader.Bind(VpBPos); + + Index = 2; + } + + for (; Index < 6; Index++) + { + GalShaderType Type = GetTypeFromProgram(Index); + + int Control = ReadRegister(NvGpuEngine3dReg.ShaderNControl + Index * 0x10); + int Offset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset + Index * 0x10); + + //Note: Vertex Program (B) is always enabled. + bool Enable = (Control & 1) != 0 || Index == 1; + + if (!Enable) + { + Gpu.Renderer.Shader.Unbind(Type); + + continue; + } + + long Key = BasePosition + (uint)Offset; + + Keys[(int)Type] = Key; + + Gpu.Renderer.Shader.Create(Vmm, Key, Type); + Gpu.Renderer.Shader.Bind(Key); + } + + return Keys; + } + + private static GalShaderType GetTypeFromProgram(int Program) + { + switch (Program) + { + case 0: + case 1: return GalShaderType.Vertex; + case 2: return GalShaderType.TessControl; + case 3: return GalShaderType.TessEvaluation; + case 4: return GalShaderType.Geometry; + case 5: return GalShaderType.Fragment; + } + + throw new ArgumentOutOfRangeException(nameof(Program)); + } + + private void SetFrontFace(GalPipelineState State) + { + float SignX = GetFlipSign(NvGpuEngine3dReg.ViewportNScaleX); + float SignY = GetFlipSign(NvGpuEngine3dReg.ViewportNScaleY); + + GalFrontFace FrontFace = (GalFrontFace)ReadRegister(NvGpuEngine3dReg.FrontFace); + + //Flipping breaks facing. Flipping front facing too fixes it + if (SignX != SignY) + { + switch (FrontFace) + { + case GalFrontFace.CW: FrontFace = GalFrontFace.CCW; break; + case GalFrontFace.CCW: FrontFace = GalFrontFace.CW; break; + } + } + + State.FrontFace = FrontFace; + } + + private void SetCullFace(GalPipelineState State) + { + State.CullFaceEnabled = ReadRegisterBool(NvGpuEngine3dReg.CullFaceEnable); + + if (State.CullFaceEnabled) + { + State.CullFace = (GalCullFace)ReadRegister(NvGpuEngine3dReg.CullFace); + } + } + + private void SetDepth(GalPipelineState State) + { + State.DepthTestEnabled = ReadRegisterBool(NvGpuEngine3dReg.DepthTestEnable); + + State.DepthWriteEnabled = ReadRegisterBool(NvGpuEngine3dReg.DepthWriteEnable); + + if (State.DepthTestEnabled) + { + State.DepthFunc = (GalComparisonOp)ReadRegister(NvGpuEngine3dReg.DepthTestFunction); + } + + State.DepthRangeNear = ReadRegisterFloat(NvGpuEngine3dReg.DepthRangeNNear); + State.DepthRangeFar = ReadRegisterFloat(NvGpuEngine3dReg.DepthRangeNFar); + } + + private void SetStencil(GalPipelineState State) + { + State.StencilTestEnabled = ReadRegisterBool(NvGpuEngine3dReg.StencilEnable); + + if (State.StencilTestEnabled) + { + State.StencilBackFuncFunc = (GalComparisonOp)ReadRegister(NvGpuEngine3dReg.StencilBackFuncFunc); + State.StencilBackFuncRef = ReadRegister(NvGpuEngine3dReg.StencilBackFuncRef); + State.StencilBackFuncMask = (uint)ReadRegister(NvGpuEngine3dReg.StencilBackFuncMask); + State.StencilBackOpFail = (GalStencilOp)ReadRegister(NvGpuEngine3dReg.StencilBackOpFail); + State.StencilBackOpZFail = (GalStencilOp)ReadRegister(NvGpuEngine3dReg.StencilBackOpZFail); + State.StencilBackOpZPass = (GalStencilOp)ReadRegister(NvGpuEngine3dReg.StencilBackOpZPass); + State.StencilBackMask = (uint)ReadRegister(NvGpuEngine3dReg.StencilBackMask); + + State.StencilFrontFuncFunc = (GalComparisonOp)ReadRegister(NvGpuEngine3dReg.StencilFrontFuncFunc); + State.StencilFrontFuncRef = ReadRegister(NvGpuEngine3dReg.StencilFrontFuncRef); + State.StencilFrontFuncMask = (uint)ReadRegister(NvGpuEngine3dReg.StencilFrontFuncMask); + State.StencilFrontOpFail = (GalStencilOp)ReadRegister(NvGpuEngine3dReg.StencilFrontOpFail); + State.StencilFrontOpZFail = (GalStencilOp)ReadRegister(NvGpuEngine3dReg.StencilFrontOpZFail); + State.StencilFrontOpZPass = (GalStencilOp)ReadRegister(NvGpuEngine3dReg.StencilFrontOpZPass); + State.StencilFrontMask = (uint)ReadRegister(NvGpuEngine3dReg.StencilFrontMask); + } + } + + private void SetBlending(GalPipelineState State) + { + bool BlendIndependent = ReadRegisterBool(NvGpuEngine3dReg.BlendIndependent); + + State.BlendIndependent = BlendIndependent; + + for (int Index = 0; Index < GalPipelineState.RenderTargetsCount; Index++) + { + if (BlendIndependent) + { + State.Blends[Index].Enabled = ReadRegisterBool(NvGpuEngine3dReg.IBlendNEnable + Index); + + if (State.Blends[Index].Enabled) + { + State.Blends[Index].SeparateAlpha = ReadRegisterBool(NvGpuEngine3dReg.IBlendNSeparateAlpha + Index * 8); + + State.Blends[Index].EquationRgb = ReadBlendEquation(NvGpuEngine3dReg.IBlendNEquationRgb + Index * 8); + State.Blends[Index].FuncSrcRgb = ReadBlendFactor (NvGpuEngine3dReg.IBlendNFuncSrcRgb + Index * 8); + State.Blends[Index].FuncDstRgb = ReadBlendFactor (NvGpuEngine3dReg.IBlendNFuncDstRgb + Index * 8); + State.Blends[Index].EquationAlpha = ReadBlendEquation(NvGpuEngine3dReg.IBlendNEquationAlpha + Index * 8); + State.Blends[Index].FuncSrcAlpha = ReadBlendFactor (NvGpuEngine3dReg.IBlendNFuncSrcAlpha + Index * 8); + State.Blends[Index].FuncDstAlpha = ReadBlendFactor (NvGpuEngine3dReg.IBlendNFuncDstAlpha + Index * 8); + } + } + else + { + //It seems that even when independent blend is disabled, the first IBlend enable + //register is still set to indicate whenever blend is enabled or not (?). + State.Blends[Index].Enabled = ReadRegisterBool(NvGpuEngine3dReg.IBlendNEnable); + + if (State.Blends[Index].Enabled) + { + State.Blends[Index].SeparateAlpha = ReadRegisterBool(NvGpuEngine3dReg.BlendSeparateAlpha); + + State.Blends[Index].EquationRgb = ReadBlendEquation(NvGpuEngine3dReg.BlendEquationRgb); + State.Blends[Index].FuncSrcRgb = ReadBlendFactor (NvGpuEngine3dReg.BlendFuncSrcRgb); + State.Blends[Index].FuncDstRgb = ReadBlendFactor (NvGpuEngine3dReg.BlendFuncDstRgb); + State.Blends[Index].EquationAlpha = ReadBlendEquation(NvGpuEngine3dReg.BlendEquationAlpha); + State.Blends[Index].FuncSrcAlpha = ReadBlendFactor (NvGpuEngine3dReg.BlendFuncSrcAlpha); + State.Blends[Index].FuncDstAlpha = ReadBlendFactor (NvGpuEngine3dReg.BlendFuncDstAlpha); + } + } + } + } + + private GalBlendEquation ReadBlendEquation(NvGpuEngine3dReg Register) + { + return (GalBlendEquation)ReadRegister(Register); + } + + private GalBlendFactor ReadBlendFactor(NvGpuEngine3dReg Register) + { + return (GalBlendFactor)ReadRegister(Register); + } + + private void SetColorMask(GalPipelineState State) + { + bool ColorMaskCommon = ReadRegisterBool(NvGpuEngine3dReg.ColorMaskCommon); + + State.ColorMaskCommon = ColorMaskCommon; + + for (int Index = 0; Index < GalPipelineState.RenderTargetsCount; Index++) + { + int ColorMask = ReadRegister(NvGpuEngine3dReg.ColorMaskN + (ColorMaskCommon ? 0 : Index)); + + State.ColorMasks[Index].Red = ((ColorMask >> 0) & 0xf) != 0; + State.ColorMasks[Index].Green = ((ColorMask >> 4) & 0xf) != 0; + State.ColorMasks[Index].Blue = ((ColorMask >> 8) & 0xf) != 0; + State.ColorMasks[Index].Alpha = ((ColorMask >> 12) & 0xf) != 0; + } + } + + private void SetPrimitiveRestart(GalPipelineState State) + { + State.PrimitiveRestartEnabled = ReadRegisterBool(NvGpuEngine3dReg.PrimRestartEnable); + + if (State.PrimitiveRestartEnabled) + { + State.PrimitiveRestartIndex = (uint)ReadRegister(NvGpuEngine3dReg.PrimRestartIndex); + } + } + + private void SetRenderTargets() + { + //Commercial games do not seem to + //bool SeparateFragData = ReadRegisterBool(NvGpuEngine3dReg.RTSeparateFragData); + + uint Control = (uint)(ReadRegister(NvGpuEngine3dReg.RTControl)); + + uint Count = Control & 0xf; + + if (Count > 0) + { + int[] Map = new int[Count]; + + for (int Index = 0; Index < Count; Index++) + { + int Shift = 4 + Index * 3; + + Map[Index] = (int)((Control >> Shift) & 7); + } + + Gpu.Renderer.RenderTarget.SetMap(Map); + } + else + { + Gpu.Renderer.RenderTarget.SetMap(null); + } + } + + private void UploadTextures(NvGpuVmm Vmm, GalPipelineState State, long[] Keys) + { + long BaseShPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress); + + int TextureCbIndex = ReadRegister(NvGpuEngine3dReg.TextureCbIndex); + + List<(long, GalImage, GalTextureSampler)> UnboundTextures = new List<(long, GalImage, GalTextureSampler)>(); + + for (int Index = 0; Index < Keys.Length; Index++) + { + foreach (ShaderDeclInfo DeclInfo in Gpu.Renderer.Shader.GetTextureUsage(Keys[Index])) + { + long Position; + + if (DeclInfo.IsCb) + { + Position = ConstBuffers[Index][DeclInfo.Cbuf].Position; + } + else + { + Position = ConstBuffers[Index][TextureCbIndex].Position; + } + + int TextureHandle = Vmm.ReadInt32(Position + DeclInfo.Index * 4); + + UnboundTextures.Add(UploadTexture(Vmm, TextureHandle)); + } + } + + for (int Index = 0; Index < UnboundTextures.Count; Index++) + { + (long Key, GalImage Image, GalTextureSampler Sampler) = UnboundTextures[Index]; + + if (Key == 0) + { + continue; + } + + Gpu.Renderer.Texture.Bind(Key, Index, Image); + Gpu.Renderer.Texture.SetSampler(Sampler); + } + } + + private (long, GalImage, GalTextureSampler) UploadTexture(NvGpuVmm Vmm, int TextureHandle) + { + if (TextureHandle == 0) + { + //FIXME: Some games like puyo puyo will use handles with the value 0. + //This is a bug, most likely caused by sync issues. + return (0, default(GalImage), default(GalTextureSampler)); + } + + bool LinkedTsc = ReadRegisterBool(NvGpuEngine3dReg.LinkedTsc); + + int TicIndex = (TextureHandle >> 0) & 0xfffff; + + int TscIndex = LinkedTsc ? TicIndex : (TextureHandle >> 20) & 0xfff; + + long TicPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.TexHeaderPoolOffset); + long TscPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.TexSamplerPoolOffset); + + TicPosition += TicIndex * 0x20; + TscPosition += TscIndex * 0x20; + + GalImage Image = TextureFactory.MakeTexture(Vmm, TicPosition); + + GalTextureSampler Sampler = TextureFactory.MakeSampler(Gpu, Vmm, TscPosition); + + long Key = Vmm.ReadInt64(TicPosition + 4) & 0xffffffffffff; + + if (Image.Layout == GalMemoryLayout.BlockLinear) + { + Key &= ~0x1ffL; + } + else if (Image.Layout == GalMemoryLayout.Pitch) + { + Key &= ~0x1fL; + } + + Key = Vmm.GetPhysicalAddress(Key); + + if (Key == -1) + { + //FIXME: Shouldn't ignore invalid addresses. + return (0, default(GalImage), default(GalTextureSampler)); + } + + Gpu.ResourceManager.SendTexture(Vmm, Key, Image); + + return (Key, Image, Sampler); + } + + private void UploadConstBuffers(NvGpuVmm Vmm, GalPipelineState State, long[] Keys) + { + for (int Stage = 0; Stage < Keys.Length; Stage++) + { + foreach (ShaderDeclInfo DeclInfo in Gpu.Renderer.Shader.GetConstBufferUsage(Keys[Stage])) + { + ConstBuffer Cb = ConstBuffers[Stage][DeclInfo.Cbuf]; + + if (!Cb.Enabled) + { + continue; + } + + long Key = Vmm.GetPhysicalAddress(Cb.Position); + + if (Gpu.ResourceManager.MemoryRegionModified(Vmm, Key, Cb.Size, NvGpuBufferType.ConstBuffer)) + { + if (Vmm.TryGetHostAddress(Cb.Position, Cb.Size, out IntPtr CbPtr)) + { + Gpu.Renderer.Buffer.SetData(Key, Cb.Size, CbPtr); + } + else + { + Gpu.Renderer.Buffer.SetData(Key, Vmm.ReadBytes(Cb.Position, Cb.Size)); + } + } + + State.ConstBufferKeys[Stage][DeclInfo.Cbuf] = Key; + } + } + } + + private void UploadVertexArrays(NvGpuVmm Vmm, GalPipelineState State) + { + long IbPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.IndexArrayAddress); + + long IboKey = Vmm.GetPhysicalAddress(IbPosition); + + int IndexEntryFmt = ReadRegister(NvGpuEngine3dReg.IndexArrayFormat); + int IndexCount = ReadRegister(NvGpuEngine3dReg.IndexBatchCount); + int PrimCtrl = ReadRegister(NvGpuEngine3dReg.VertexBeginGl); + + GalPrimitiveType PrimType = (GalPrimitiveType)(PrimCtrl & 0xffff); + + GalIndexFormat IndexFormat = (GalIndexFormat)IndexEntryFmt; + + int IndexEntrySize = 1 << IndexEntryFmt; + + if (IndexEntrySize > 4) + { + throw new InvalidOperationException("Invalid index entry size \"" + IndexEntrySize + "\"!"); + } + + if (IndexCount != 0) + { + int IbSize = IndexCount * IndexEntrySize; + + bool IboCached = Gpu.Renderer.Rasterizer.IsIboCached(IboKey, (uint)IbSize); + + bool UsesLegacyQuads = + PrimType == GalPrimitiveType.Quads || + PrimType == GalPrimitiveType.QuadStrip; + + if (!IboCached || Gpu.ResourceManager.MemoryRegionModified(Vmm, IboKey, (uint)IbSize, NvGpuBufferType.Index)) + { + if (!UsesLegacyQuads) + { + if (Vmm.TryGetHostAddress(IbPosition, IbSize, out IntPtr IbPtr)) + { + Gpu.Renderer.Rasterizer.CreateIbo(IboKey, IbSize, IbPtr); + } + else + { + Gpu.Renderer.Rasterizer.CreateIbo(IboKey, IbSize, Vmm.ReadBytes(IbPosition, IbSize)); + } + } + else + { + byte[] Buffer = Vmm.ReadBytes(IbPosition, IbSize); + + if (PrimType == GalPrimitiveType.Quads) + { + Buffer = QuadHelper.ConvertIbQuadsToTris(Buffer, IndexEntrySize, IndexCount); + } + else /* if (PrimType == GalPrimitiveType.QuadStrip) */ + { + Buffer = QuadHelper.ConvertIbQuadStripToTris(Buffer, IndexEntrySize, IndexCount); + } + + Gpu.Renderer.Rasterizer.CreateIbo(IboKey, IbSize, Buffer); + } + } + + if (!UsesLegacyQuads) + { + Gpu.Renderer.Rasterizer.SetIndexArray(IbSize, IndexFormat); + } + else + { + if (PrimType == GalPrimitiveType.Quads) + { + Gpu.Renderer.Rasterizer.SetIndexArray(QuadHelper.ConvertIbSizeQuadsToTris(IbSize), IndexFormat); + } + else /* if (PrimType == GalPrimitiveType.QuadStrip) */ + { + Gpu.Renderer.Rasterizer.SetIndexArray(QuadHelper.ConvertIbSizeQuadStripToTris(IbSize), IndexFormat); + } + } + } + + List<GalVertexAttrib>[] Attribs = new List<GalVertexAttrib>[32]; + + for (int Attr = 0; Attr < 16; Attr++) + { + int Packed = ReadRegister(NvGpuEngine3dReg.VertexAttribNFormat + Attr); + + int ArrayIndex = Packed & 0x1f; + + if (Attribs[ArrayIndex] == null) + { + Attribs[ArrayIndex] = new List<GalVertexAttrib>(); + } + + long VbPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.VertexArrayNAddress + ArrayIndex * 4); + + bool IsConst = ((Packed >> 6) & 1) != 0; + + int Offset = (Packed >> 7) & 0x3fff; + + GalVertexAttribSize Size = (GalVertexAttribSize)((Packed >> 21) & 0x3f); + GalVertexAttribType Type = (GalVertexAttribType)((Packed >> 27) & 0x7); + + bool IsRgba = ((Packed >> 31) & 1) != 0; + + //Note: 16 is the maximum size of an attribute, + //having a component size of 32-bits with 4 elements (a vec4). + byte[] Data = Vmm.ReadBytes(VbPosition + Offset, 16); + + Attribs[ArrayIndex].Add(new GalVertexAttrib(Attr, IsConst, Offset, Data, Size, Type, IsRgba)); + } + + State.VertexBindings = new GalVertexBinding[32]; + + for (int Index = 0; Index < 32; Index++) + { + if (Attribs[Index] == null) + { + continue; + } + + int Control = ReadRegister(NvGpuEngine3dReg.VertexArrayNControl + Index * 4); + + bool Enable = (Control & 0x1000) != 0; + + if (!Enable) + { + continue; + } + + long VbPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.VertexArrayNAddress + Index * 4); + long VbEndPos = MakeInt64From2xInt32(NvGpuEngine3dReg.VertexArrayNEndAddr + Index * 2); + + int VertexDivisor = ReadRegister(NvGpuEngine3dReg.VertexArrayNDivisor + Index * 4); + + bool Instanced = ReadRegisterBool(NvGpuEngine3dReg.VertexArrayNInstance + Index); + + int Stride = Control & 0xfff; + + if (Instanced && VertexDivisor != 0) + { + VbPosition += Stride * (CurrentInstance / VertexDivisor); + } + + if (VbPosition > VbEndPos) + { + //Instance is invalid, ignore the draw call + continue; + } + + long VboKey = Vmm.GetPhysicalAddress(VbPosition); + + long VbSize = (VbEndPos - VbPosition) + 1; + + bool VboCached = Gpu.Renderer.Rasterizer.IsVboCached(VboKey, VbSize); + + if (!VboCached || Gpu.ResourceManager.MemoryRegionModified(Vmm, VboKey, VbSize, NvGpuBufferType.Vertex)) + { + if (Vmm.TryGetHostAddress(VbPosition, VbSize, out IntPtr VbPtr)) + { + Gpu.Renderer.Rasterizer.CreateVbo(VboKey, (int)VbSize, VbPtr); + } + else + { + Gpu.Renderer.Rasterizer.CreateVbo(VboKey, Vmm.ReadBytes(VbPosition, VbSize)); + } + } + + State.VertexBindings[Index].Enabled = true; + State.VertexBindings[Index].Stride = Stride; + State.VertexBindings[Index].VboKey = VboKey; + State.VertexBindings[Index].Instanced = Instanced; + State.VertexBindings[Index].Divisor = VertexDivisor; + State.VertexBindings[Index].Attribs = Attribs[Index].ToArray(); + } + } + + private void DispatchRender(NvGpuVmm Vmm, GalPipelineState State) + { + int IndexCount = ReadRegister(NvGpuEngine3dReg.IndexBatchCount); + int PrimCtrl = ReadRegister(NvGpuEngine3dReg.VertexBeginGl); + + GalPrimitiveType PrimType = (GalPrimitiveType)(PrimCtrl & 0xffff); + + bool InstanceNext = ((PrimCtrl >> 26) & 1) != 0; + bool InstanceCont = ((PrimCtrl >> 27) & 1) != 0; + + if (InstanceNext && InstanceCont) + { + throw new InvalidOperationException("GPU tried to increase and reset instance count at the same time"); + } + + if (InstanceNext) + { + CurrentInstance++; + } + else if (!InstanceCont) + { + CurrentInstance = 0; + } + + State.Instance = CurrentInstance; + + Gpu.Renderer.Pipeline.Bind(State); + + Gpu.Renderer.RenderTarget.Bind(); + + if (IndexCount != 0) + { + int IndexEntryFmt = ReadRegister(NvGpuEngine3dReg.IndexArrayFormat); + int IndexFirst = ReadRegister(NvGpuEngine3dReg.IndexBatchFirst); + int VertexBase = ReadRegister(NvGpuEngine3dReg.VertexArrayElemBase); + + long IndexPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.IndexArrayAddress); + + long IboKey = Vmm.GetPhysicalAddress(IndexPosition); + + //Quad primitive types were deprecated on OpenGL 3.x, + //they are converted to a triangles index buffer on IB creation, + //so we should use the triangles type here too. + if (PrimType == GalPrimitiveType.Quads || + PrimType == GalPrimitiveType.QuadStrip) + { + PrimType = GalPrimitiveType.Triangles; + + //Note: We assume that index first points to the first + //vertex of a quad, if it points to the middle of a + //quad (First % 4 != 0 for Quads) then it will not work properly. + if (PrimType == GalPrimitiveType.Quads) + { + IndexFirst = QuadHelper.ConvertIbSizeQuadsToTris(IndexFirst); + } + else /* if (PrimType == GalPrimitiveType.QuadStrip) */ + { + IndexFirst = QuadHelper.ConvertIbSizeQuadStripToTris(IndexFirst); + } + } + + Gpu.Renderer.Rasterizer.DrawElements(IboKey, IndexFirst, VertexBase, PrimType); + } + else + { + int VertexFirst = ReadRegister(NvGpuEngine3dReg.VertexArrayFirst); + int VertexCount = ReadRegister(NvGpuEngine3dReg.VertexArrayCount); + + Gpu.Renderer.Rasterizer.DrawArrays(VertexFirst, VertexCount, PrimType); + } + + //Is the GPU really clearing those registers after draw? + WriteRegister(NvGpuEngine3dReg.IndexBatchFirst, 0); + WriteRegister(NvGpuEngine3dReg.IndexBatchCount, 0); + } + + private enum QueryMode + { + WriteSeq, + Sync, + WriteCounterAndTimestamp + } + + private void QueryControl(NvGpuVmm Vmm, GpuMethodCall MethCall) + { + WriteRegister(MethCall); + + long Position = MakeInt64From2xInt32(NvGpuEngine3dReg.QueryAddress); + + int Seq = Registers[(int)NvGpuEngine3dReg.QuerySequence]; + int Ctrl = Registers[(int)NvGpuEngine3dReg.QueryControl]; + + QueryMode Mode = (QueryMode)(Ctrl & 3); + + switch (Mode) + { + case QueryMode.WriteSeq: Vmm.WriteInt32(Position, Seq); break; + + case QueryMode.WriteCounterAndTimestamp: + { + //TODO: Implement counters. + long Counter = 1; + + long Timestamp = PerformanceCounter.ElapsedMilliseconds; + + Timestamp = (long)(Timestamp * 615384.615385); + + Vmm.WriteInt64(Position + 0, Counter); + Vmm.WriteInt64(Position + 8, Timestamp); + + break; + } + } + } + + private void CbData(NvGpuVmm Vmm, GpuMethodCall MethCall) + { + long Position = MakeInt64From2xInt32(NvGpuEngine3dReg.ConstBufferAddress); + + int Offset = ReadRegister(NvGpuEngine3dReg.ConstBufferOffset); + + Vmm.WriteInt32(Position + Offset, MethCall.Argument); + + WriteRegister(NvGpuEngine3dReg.ConstBufferOffset, Offset + 4); + + Gpu.ResourceManager.ClearPbCache(NvGpuBufferType.ConstBuffer); + } + + private void CbBind(NvGpuVmm Vmm, GpuMethodCall MethCall) + { + int Stage = (MethCall.Method - 0x904) >> 3; + + int Index = MethCall.Argument; + + bool Enabled = (Index & 1) != 0; + + Index = (Index >> 4) & 0x1f; + + long Position = MakeInt64From2xInt32(NvGpuEngine3dReg.ConstBufferAddress); + + long CbKey = Vmm.GetPhysicalAddress(Position); + + int Size = ReadRegister(NvGpuEngine3dReg.ConstBufferSize); + + if (!Gpu.Renderer.Buffer.IsCached(CbKey, Size)) + { + Gpu.Renderer.Buffer.Create(CbKey, Size); + } + + ConstBuffer Cb = ConstBuffers[Stage][Index]; + + if (Cb.Position != Position || Cb.Enabled != Enabled || Cb.Size != Size) + { + ConstBuffers[Stage][Index].Position = Position; + ConstBuffers[Stage][Index].Enabled = Enabled; + ConstBuffers[Stage][Index].Size = Size; + } + } + + private float GetFlipSign(NvGpuEngine3dReg Reg) + { + return MathF.Sign(ReadRegisterFloat(Reg)); + } + + private long MakeInt64From2xInt32(NvGpuEngine3dReg Reg) + { + return + (long)Registers[(int)Reg + 0] << 32 | + (uint)Registers[(int)Reg + 1]; + } + + private void WriteRegister(GpuMethodCall MethCall) + { + Registers[MethCall.Method] = MethCall.Argument; + } + + private int ReadRegister(NvGpuEngine3dReg Reg) + { + return Registers[(int)Reg]; + } + + private float ReadRegisterFloat(NvGpuEngine3dReg Reg) + { + return BitConverter.Int32BitsToSingle(ReadRegister(Reg)); + } + + private bool ReadRegisterBool(NvGpuEngine3dReg Reg) + { + return (ReadRegister(Reg) & 1) != 0; + } + + private void WriteRegister(NvGpuEngine3dReg Reg, int Value) + { + Registers[(int)Reg] = Value; + } + } +} diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngine3dReg.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngine3dReg.cs new file mode 100644 index 00000000..30243c02 --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngine3dReg.cs @@ -0,0 +1,110 @@ +namespace Ryujinx.Graphics.Graphics3d +{ + enum NvGpuEngine3dReg + { + FrameBufferNAddress = 0x200, + FrameBufferNWidth = 0x202, + FrameBufferNHeight = 0x203, + FrameBufferNFormat = 0x204, + FrameBufferNBlockDim = 0x205, + ViewportNScaleX = 0x280, + ViewportNScaleY = 0x281, + ViewportNScaleZ = 0x282, + ViewportNTranslateX = 0x283, + ViewportNTranslateY = 0x284, + ViewportNTranslateZ = 0x285, + ViewportNHoriz = 0x300, + ViewportNVert = 0x301, + DepthRangeNNear = 0x302, + DepthRangeNFar = 0x303, + VertexArrayFirst = 0x35d, + VertexArrayCount = 0x35e, + ClearNColor = 0x360, + ClearDepth = 0x364, + ClearStencil = 0x368, + StencilBackFuncRef = 0x3d5, + StencilBackMask = 0x3d6, + StencilBackFuncMask = 0x3d7, + ColorMaskCommon = 0x3e4, + RTSeparateFragData = 0x3eb, + ZetaAddress = 0x3f8, + ZetaFormat = 0x3fa, + ZetaBlockDimensions = 0x3fb, + ZetaLayerStride = 0x3fc, + VertexAttribNFormat = 0x458, + RTControl = 0x487, + ZetaHoriz = 0x48a, + ZetaVert = 0x48b, + ZetaArrayMode = 0x48c, + LinkedTsc = 0x48d, + DepthTestEnable = 0x4b3, + BlendIndependent = 0x4b9, + DepthWriteEnable = 0x4ba, + DepthTestFunction = 0x4c3, + BlendSeparateAlpha = 0x4cf, + BlendEquationRgb = 0x4d0, + BlendFuncSrcRgb = 0x4d1, + BlendFuncDstRgb = 0x4d2, + BlendEquationAlpha = 0x4d3, + BlendFuncSrcAlpha = 0x4d4, + BlendFuncDstAlpha = 0x4d6, + BlendEnable = 0x4d7, + IBlendNEnable = 0x4d8, + StencilEnable = 0x4e0, + StencilFrontOpFail = 0x4e1, + StencilFrontOpZFail = 0x4e2, + StencilFrontOpZPass = 0x4e3, + StencilFrontFuncFunc = 0x4e4, + StencilFrontFuncRef = 0x4e5, + StencilFrontFuncMask = 0x4e6, + StencilFrontMask = 0x4e7, + ScreenYControl = 0x4eb, + VertexArrayElemBase = 0x50d, + VertexArrayInstBase = 0x50e, + ZetaEnable = 0x54e, + TexHeaderPoolOffset = 0x55d, + TexSamplerPoolOffset = 0x557, + StencilTwoSideEnable = 0x565, + StencilBackOpFail = 0x566, + StencilBackOpZFail = 0x567, + StencilBackOpZPass = 0x568, + StencilBackFuncFunc = 0x569, + FrameBufferSrgb = 0x56e, + ShaderAddress = 0x582, + VertexBeginGl = 0x586, + PrimRestartEnable = 0x591, + PrimRestartIndex = 0x592, + IndexArrayAddress = 0x5f2, + IndexArrayEndAddr = 0x5f4, + IndexArrayFormat = 0x5f6, + IndexBatchFirst = 0x5f7, + IndexBatchCount = 0x5f8, + VertexArrayNInstance = 0x620, + CullFaceEnable = 0x646, + FrontFace = 0x647, + CullFace = 0x648, + ColorMaskN = 0x680, + QueryAddress = 0x6c0, + QuerySequence = 0x6c2, + QueryControl = 0x6c3, + VertexArrayNControl = 0x700, + VertexArrayNAddress = 0x701, + VertexArrayNDivisor = 0x703, + IBlendNSeparateAlpha = 0x780, + IBlendNEquationRgb = 0x781, + IBlendNFuncSrcRgb = 0x782, + IBlendNFuncDstRgb = 0x783, + IBlendNEquationAlpha = 0x784, + IBlendNFuncSrcAlpha = 0x785, + IBlendNFuncDstAlpha = 0x786, + VertexArrayNEndAddr = 0x7c0, + ShaderNControl = 0x800, + ShaderNOffset = 0x801, + ShaderNMaxGprs = 0x803, + ShaderNType = 0x804, + ConstBufferSize = 0x8e0, + ConstBufferAddress = 0x8e1, + ConstBufferOffset = 0x8e3, + TextureCbIndex = 0x982 + } +}
\ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngineM2mf.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngineM2mf.cs new file mode 100644 index 00000000..d89059c0 --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngineM2mf.cs @@ -0,0 +1,187 @@ +using Ryujinx.Graphics.Memory; +using Ryujinx.Graphics.Texture; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Graphics3d +{ + class NvGpuEngineM2mf : INvGpuEngine + { + public int[] Registers { get; private set; } + + private NvGpu Gpu; + + private Dictionary<int, NvGpuMethod> Methods; + + public NvGpuEngineM2mf(NvGpu Gpu) + { + this.Gpu = Gpu; + + Registers = new int[0x1d6]; + + Methods = new Dictionary<int, NvGpuMethod>(); + + void AddMethod(int Meth, int Count, int Stride, NvGpuMethod Method) + { + while (Count-- > 0) + { + Methods.Add(Meth, Method); + + Meth += Stride; + } + } + + AddMethod(0xc0, 1, 1, Execute); + } + + public void CallMethod(NvGpuVmm Vmm, GpuMethodCall MethCall) + { + if (Methods.TryGetValue(MethCall.Method, out NvGpuMethod Method)) + { + Method(Vmm, MethCall); + } + else + { + WriteRegister(MethCall); + } + } + + private void Execute(NvGpuVmm Vmm, GpuMethodCall MethCall) + { + //TODO: Some registers and copy modes are still not implemented. + int Control = MethCall.Argument; + + bool SrcLinear = ((Control >> 7) & 1) != 0; + bool DstLinear = ((Control >> 8) & 1) != 0; + bool Copy2d = ((Control >> 9) & 1) != 0; + + long SrcAddress = MakeInt64From2xInt32(NvGpuEngineM2mfReg.SrcAddress); + long DstAddress = MakeInt64From2xInt32(NvGpuEngineM2mfReg.DstAddress); + + int SrcPitch = ReadRegister(NvGpuEngineM2mfReg.SrcPitch); + int DstPitch = ReadRegister(NvGpuEngineM2mfReg.DstPitch); + + int XCount = ReadRegister(NvGpuEngineM2mfReg.XCount); + int YCount = ReadRegister(NvGpuEngineM2mfReg.YCount); + + int Swizzle = ReadRegister(NvGpuEngineM2mfReg.Swizzle); + + int DstBlkDim = ReadRegister(NvGpuEngineM2mfReg.DstBlkDim); + int DstSizeX = ReadRegister(NvGpuEngineM2mfReg.DstSizeX); + int DstSizeY = ReadRegister(NvGpuEngineM2mfReg.DstSizeY); + int DstSizeZ = ReadRegister(NvGpuEngineM2mfReg.DstSizeZ); + int DstPosXY = ReadRegister(NvGpuEngineM2mfReg.DstPosXY); + int DstPosZ = ReadRegister(NvGpuEngineM2mfReg.DstPosZ); + + int SrcBlkDim = ReadRegister(NvGpuEngineM2mfReg.SrcBlkDim); + int SrcSizeX = ReadRegister(NvGpuEngineM2mfReg.SrcSizeX); + int SrcSizeY = ReadRegister(NvGpuEngineM2mfReg.SrcSizeY); + int SrcSizeZ = ReadRegister(NvGpuEngineM2mfReg.SrcSizeZ); + int SrcPosXY = ReadRegister(NvGpuEngineM2mfReg.SrcPosXY); + int SrcPosZ = ReadRegister(NvGpuEngineM2mfReg.SrcPosZ); + + int SrcCpp = ((Swizzle >> 20) & 7) + 1; + int DstCpp = ((Swizzle >> 24) & 7) + 1; + + int DstPosX = (DstPosXY >> 0) & 0xffff; + int DstPosY = (DstPosXY >> 16) & 0xffff; + + int SrcPosX = (SrcPosXY >> 0) & 0xffff; + int SrcPosY = (SrcPosXY >> 16) & 0xffff; + + int SrcBlockHeight = 1 << ((SrcBlkDim >> 4) & 0xf); + int DstBlockHeight = 1 << ((DstBlkDim >> 4) & 0xf); + + long SrcPA = Vmm.GetPhysicalAddress(SrcAddress); + long DstPA = Vmm.GetPhysicalAddress(DstAddress); + + if (Copy2d) + { + if (SrcLinear) + { + SrcPosX = SrcPosY = SrcPosZ = 0; + } + + if (DstLinear) + { + DstPosX = DstPosY = DstPosZ = 0; + } + + if (SrcLinear && DstLinear) + { + for (int Y = 0; Y < YCount; Y++) + { + int SrcOffset = (SrcPosY + Y) * SrcPitch + SrcPosX * SrcCpp; + int DstOffset = (DstPosY + Y) * DstPitch + DstPosX * DstCpp; + + long Src = SrcPA + (uint)SrcOffset; + long Dst = DstPA + (uint)DstOffset; + + Vmm.Memory.CopyBytes(Src, Dst, XCount * SrcCpp); + } + } + else + { + ISwizzle SrcSwizzle; + + if (SrcLinear) + { + SrcSwizzle = new LinearSwizzle(SrcPitch, SrcCpp); + } + else + { + SrcSwizzle = new BlockLinearSwizzle(SrcSizeX, SrcCpp, SrcBlockHeight); + } + + ISwizzle DstSwizzle; + + if (DstLinear) + { + DstSwizzle = new LinearSwizzle(DstPitch, DstCpp); + } + else + { + DstSwizzle = new BlockLinearSwizzle(DstSizeX, DstCpp, DstBlockHeight); + } + + for (int Y = 0; Y < YCount; Y++) + for (int X = 0; X < XCount; X++) + { + int SrcOffset = SrcSwizzle.GetSwizzleOffset(SrcPosX + X, SrcPosY + Y); + int DstOffset = DstSwizzle.GetSwizzleOffset(DstPosX + X, DstPosY + Y); + + long Src = SrcPA + (uint)SrcOffset; + long Dst = DstPA + (uint)DstOffset; + + Vmm.Memory.CopyBytes(Src, Dst, SrcCpp); + } + } + } + else + { + Vmm.Memory.CopyBytes(SrcPA, DstPA, XCount); + } + } + + private long MakeInt64From2xInt32(NvGpuEngineM2mfReg Reg) + { + return + (long)Registers[(int)Reg + 0] << 32 | + (uint)Registers[(int)Reg + 1]; + } + + private void WriteRegister(GpuMethodCall MethCall) + { + Registers[MethCall.Method] = MethCall.Argument; + } + + private int ReadRegister(NvGpuEngineM2mfReg Reg) + { + return Registers[(int)Reg]; + } + + private void WriteRegister(NvGpuEngineM2mfReg Reg, int Value) + { + Registers[(int)Reg] = Value; + } + } +}
\ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngineM2mfReg.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngineM2mfReg.cs new file mode 100644 index 00000000..4bef8d9e --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngineM2mfReg.cs @@ -0,0 +1,25 @@ +namespace Ryujinx.Graphics.Graphics3d +{ + enum NvGpuEngineM2mfReg + { + SrcAddress = 0x100, + DstAddress = 0x102, + SrcPitch = 0x104, + DstPitch = 0x105, + XCount = 0x106, + YCount = 0x107, + Swizzle = 0x1c2, + DstBlkDim = 0x1c3, + DstSizeX = 0x1c4, + DstSizeY = 0x1c5, + DstSizeZ = 0x1c6, + DstPosZ = 0x1c7, + DstPosXY = 0x1c8, + SrcBlkDim = 0x1ca, + SrcSizeX = 0x1cb, + SrcSizeY = 0x1cc, + SrcSizeZ = 0x1cd, + SrcPosZ = 0x1ce, + SrcPosXY = 0x1cf + } +}
\ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngineP2mf.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngineP2mf.cs new file mode 100644 index 00000000..68155255 --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngineP2mf.cs @@ -0,0 +1,161 @@ +using Ryujinx.Graphics.Memory; +using Ryujinx.Graphics.Texture; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Graphics3d +{ + class NvGpuEngineP2mf : INvGpuEngine + { + public int[] Registers { get; private set; } + + private NvGpu Gpu; + + private Dictionary<int, NvGpuMethod> Methods; + + private int CopyStartX; + private int CopyStartY; + + private int CopyWidth; + private int CopyHeight; + private int CopyGobBlockHeight; + + private long CopyAddress; + + private int CopyOffset; + private int CopySize; + + private bool CopyLinear; + + private byte[] Buffer; + + public NvGpuEngineP2mf(NvGpu Gpu) + { + this.Gpu = Gpu; + + Registers = new int[0x80]; + + Methods = new Dictionary<int, NvGpuMethod>(); + + void AddMethod(int Meth, int Count, int Stride, NvGpuMethod Method) + { + while (Count-- > 0) + { + Methods.Add(Meth, Method); + + Meth += Stride; + } + } + + AddMethod(0x6c, 1, 1, Execute); + AddMethod(0x6d, 1, 1, PushData); + } + + public void CallMethod(NvGpuVmm Vmm, GpuMethodCall MethCall) + { + if (Methods.TryGetValue(MethCall.Method, out NvGpuMethod Method)) + { + Method(Vmm, MethCall); + } + else + { + WriteRegister(MethCall); + } + } + + private void Execute(NvGpuVmm Vmm, GpuMethodCall MethCall) + { + //TODO: Some registers and copy modes are still not implemented. + int Control = MethCall.Argument; + + long DstAddress = MakeInt64From2xInt32(NvGpuEngineP2mfReg.DstAddress); + + int DstPitch = ReadRegister(NvGpuEngineP2mfReg.DstPitch); + int DstBlkDim = ReadRegister(NvGpuEngineP2mfReg.DstBlockDim); + + int DstX = ReadRegister(NvGpuEngineP2mfReg.DstX); + int DstY = ReadRegister(NvGpuEngineP2mfReg.DstY); + + int DstWidth = ReadRegister(NvGpuEngineP2mfReg.DstWidth); + int DstHeight = ReadRegister(NvGpuEngineP2mfReg.DstHeight); + + int LineLengthIn = ReadRegister(NvGpuEngineP2mfReg.LineLengthIn); + int LineCount = ReadRegister(NvGpuEngineP2mfReg.LineCount); + + CopyLinear = (Control & 1) != 0; + + CopyGobBlockHeight = 1 << ((DstBlkDim >> 4) & 0xf); + + CopyStartX = DstX; + CopyStartY = DstY; + + CopyWidth = DstWidth; + CopyHeight = DstHeight; + + CopyAddress = DstAddress; + + CopyOffset = 0; + CopySize = LineLengthIn * LineCount; + + Buffer = new byte[CopySize]; + } + + private void PushData(NvGpuVmm Vmm, GpuMethodCall MethCall) + { + if (Buffer == null) + { + return; + } + + for (int Shift = 0; Shift < 32 && CopyOffset < CopySize; Shift += 8, CopyOffset++) + { + Buffer[CopyOffset] = (byte)(MethCall.Argument >> Shift); + } + + if (MethCall.IsLastCall) + { + if (CopyLinear) + { + Vmm.WriteBytes(CopyAddress, Buffer); + } + else + { + BlockLinearSwizzle Swizzle = new BlockLinearSwizzle(CopyWidth, 1, CopyGobBlockHeight); + + int SrcOffset = 0; + + for (int Y = CopyStartY; Y < CopyHeight && SrcOffset < CopySize; Y++) + for (int X = CopyStartX; X < CopyWidth && SrcOffset < CopySize; X++) + { + int DstOffset = Swizzle.GetSwizzleOffset(X, Y); + + Vmm.WriteByte(CopyAddress + DstOffset, Buffer[SrcOffset++]); + } + } + + Buffer = null; + } + } + + private long MakeInt64From2xInt32(NvGpuEngineP2mfReg Reg) + { + return + (long)Registers[(int)Reg + 0] << 32 | + (uint)Registers[(int)Reg + 1]; + } + + private void WriteRegister(GpuMethodCall MethCall) + { + Registers[MethCall.Method] = MethCall.Argument; + } + + private int ReadRegister(NvGpuEngineP2mfReg Reg) + { + return Registers[(int)Reg]; + } + + private void WriteRegister(NvGpuEngineP2mfReg Reg, int Value) + { + Registers[(int)Reg] = Value; + } + } +}
\ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngineP2mfReg.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngineP2mfReg.cs new file mode 100644 index 00000000..ab3a304d --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngineP2mfReg.cs @@ -0,0 +1,17 @@ +namespace Ryujinx.Graphics.Graphics3d +{ + enum NvGpuEngineP2mfReg + { + LineLengthIn = 0x60, + LineCount = 0x61, + DstAddress = 0x62, + DstPitch = 0x64, + DstBlockDim = 0x65, + DstWidth = 0x66, + DstHeight = 0x67, + DstDepth = 0x68, + DstZ = 0x69, + DstX = 0x6a, + DstY = 0x6b + } +}
\ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuFifo.cs b/Ryujinx.Graphics/Graphics3d/NvGpuFifo.cs new file mode 100644 index 00000000..f834ade7 --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/NvGpuFifo.cs @@ -0,0 +1,176 @@ +using Ryujinx.Graphics.Memory; + +namespace Ryujinx.Graphics.Graphics3d +{ + class NvGpuFifo + { + private const int MacrosCount = 0x80; + private const int MacroIndexMask = MacrosCount - 1; + + //Note: The size of the macro memory is unknown, we just make + //a guess here and use 256kb as the size. Increase if needed. + private const int MmeWords = 256 * 256; + + private NvGpu Gpu; + + private NvGpuEngine[] SubChannels; + + private struct CachedMacro + { + public int Position { get; private set; } + + private bool ExecutionPending; + private int Argument; + + private MacroInterpreter Interpreter; + + public CachedMacro(NvGpuFifo PFifo, INvGpuEngine Engine, int Position) + { + this.Position = Position; + + ExecutionPending = false; + Argument = 0; + + Interpreter = new MacroInterpreter(PFifo, Engine); + } + + public void StartExecution(int Argument) + { + this.Argument = Argument; + + ExecutionPending = true; + } + + public void Execute(NvGpuVmm Vmm, int[] Mme) + { + if (ExecutionPending) + { + ExecutionPending = false; + + Interpreter?.Execute(Vmm, Mme, Position, Argument); + } + } + + public void PushArgument(int Argument) + { + Interpreter?.Fifo.Enqueue(Argument); + } + } + + private int CurrMacroPosition; + private int CurrMacroBindIndex; + + private CachedMacro[] Macros; + + private int[] Mme; + + public NvGpuFifo(NvGpu Gpu) + { + this.Gpu = Gpu; + + SubChannels = new NvGpuEngine[8]; + + Macros = new CachedMacro[MacrosCount]; + + Mme = new int[MmeWords]; + } + + public void CallMethod(NvGpuVmm Vmm, GpuMethodCall MethCall) + { + if ((NvGpuFifoMeth)MethCall.Method == NvGpuFifoMeth.BindChannel) + { + NvGpuEngine Engine = (NvGpuEngine)MethCall.Argument; + + SubChannels[MethCall.SubChannel] = Engine; + } + else + { + switch (SubChannels[MethCall.SubChannel]) + { + case NvGpuEngine._2d: Call2dMethod (Vmm, MethCall); break; + case NvGpuEngine._3d: Call3dMethod (Vmm, MethCall); break; + case NvGpuEngine.P2mf: CallP2mfMethod(Vmm, MethCall); break; + case NvGpuEngine.M2mf: CallM2mfMethod(Vmm, MethCall); break; + } + } + } + + private void Call2dMethod(NvGpuVmm Vmm, GpuMethodCall MethCall) + { + Gpu.Engine2d.CallMethod(Vmm, MethCall); + } + + private void Call3dMethod(NvGpuVmm Vmm, GpuMethodCall MethCall) + { + if (MethCall.Method < 0x80) + { + switch ((NvGpuFifoMeth)MethCall.Method) + { + case NvGpuFifoMeth.SetMacroUploadAddress: + { + CurrMacroPosition = MethCall.Argument; + + break; + } + + case NvGpuFifoMeth.SendMacroCodeData: + { + Mme[CurrMacroPosition++] = MethCall.Argument; + + break; + } + + case NvGpuFifoMeth.SetMacroBindingIndex: + { + CurrMacroBindIndex = MethCall.Argument; + + break; + } + + case NvGpuFifoMeth.BindMacro: + { + int Position = MethCall.Argument; + + Macros[CurrMacroBindIndex] = new CachedMacro(this, Gpu.Engine3d, Position); + + break; + } + + default: CallP2mfMethod(Vmm, MethCall); break; + } + } + else if (MethCall.Method < 0xe00) + { + Gpu.Engine3d.CallMethod(Vmm, MethCall); + } + else + { + int MacroIndex = (MethCall.Method >> 1) & MacroIndexMask; + + if ((MethCall.Method & 1) != 0) + { + Macros[MacroIndex].PushArgument(MethCall.Argument); + } + else + { + Macros[MacroIndex].StartExecution(MethCall.Argument); + } + + if (MethCall.IsLastCall) + { + Macros[MacroIndex].Execute(Vmm, Mme); + } + } + } + + private void CallP2mfMethod(NvGpuVmm Vmm, GpuMethodCall MethCall) + { + Gpu.EngineP2mf.CallMethod(Vmm, MethCall); + } + + private void CallM2mfMethod(NvGpuVmm Vmm, GpuMethodCall MethCall) + { + Gpu.EngineM2mf.CallMethod(Vmm, MethCall); + } + } +}
\ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuFifoMeth.cs b/Ryujinx.Graphics/Graphics3d/NvGpuFifoMeth.cs new file mode 100644 index 00000000..9bf528b3 --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/NvGpuFifoMeth.cs @@ -0,0 +1,11 @@ +namespace Ryujinx.Graphics.Graphics3d +{ + enum NvGpuFifoMeth + { + BindChannel = 0, + SetMacroUploadAddress = 0x45, + SendMacroCodeData = 0x46, + SetMacroBindingIndex = 0x47, + BindMacro = 0x48 + } +}
\ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuMethod.cs b/Ryujinx.Graphics/Graphics3d/NvGpuMethod.cs new file mode 100644 index 00000000..8730d144 --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/NvGpuMethod.cs @@ -0,0 +1,6 @@ +using Ryujinx.Graphics.Memory; + +namespace Ryujinx.Graphics.Graphics3d +{ + delegate void NvGpuMethod(NvGpuVmm Vmm, GpuMethodCall MethCall); +}
\ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/Texture/ASTCDecoder.cs b/Ryujinx.Graphics/Graphics3d/Texture/ASTCDecoder.cs new file mode 100644 index 00000000..1efa0255 --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/Texture/ASTCDecoder.cs @@ -0,0 +1,1384 @@ +using System; +using System.Collections; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; + +namespace Ryujinx.Graphics.Texture +{ + public class ASTCDecoderException : Exception + { + public ASTCDecoderException(string ExMsg) : base(ExMsg) { } + } + + //https://github.com/GammaUNC/FasTC/blob/master/ASTCEncoder/src/Decompressor.cpp + public static class ASTCDecoder + { + struct TexelWeightParams + { + public int Width; + public int Height; + public bool DualPlane; + public int MaxWeight; + public bool Error; + public bool VoidExtentLDR; + public bool VoidExtentHDR; + + public int GetPackedBitSize() + { + // How many indices do we have? + int Indices = Height * Width; + + if (DualPlane) + { + Indices *= 2; + } + + IntegerEncoded IntEncoded = IntegerEncoded.CreateEncoding(MaxWeight); + + return IntEncoded.GetBitLength(Indices); + } + + public int GetNumWeightValues() + { + int Ret = Width * Height; + + if (DualPlane) + { + Ret *= 2; + } + + return Ret; + } + } + + public static byte[] DecodeToRGBA8888( + byte[] InputBuffer, + int BlockX, + int BlockY, + int BlockZ, + int X, + int Y, + int Z) + { + using (MemoryStream InputStream = new MemoryStream(InputBuffer)) + { + BinaryReader BinReader = new BinaryReader(InputStream); + + if (BlockX > 12 || BlockY > 12) + { + throw new ASTCDecoderException("Block size unsupported!"); + } + + if (BlockZ != 1 || Z != 1) + { + throw new ASTCDecoderException("3D compressed textures unsupported!"); + } + + using (MemoryStream OutputStream = new MemoryStream()) + { + int BlockIndex = 0; + + for (int j = 0; j < Y; j += BlockY) + { + for (int i = 0; i < X; i += BlockX) + { + int[] DecompressedData = new int[144]; + + DecompressBlock(BinReader.ReadBytes(0x10), DecompressedData, BlockX, BlockY); + + int DecompressedWidth = Math.Min(BlockX, X - i); + int DecompressedHeight = Math.Min(BlockY, Y - j); + int BaseOffsets = (j * X + i) * 4; + + for (int jj = 0; jj < DecompressedHeight; jj++) + { + OutputStream.Seek(BaseOffsets + jj * X * 4, SeekOrigin.Begin); + + byte[] OutputBuffer = new byte[DecompressedData.Length * sizeof(int)]; + Buffer.BlockCopy(DecompressedData, 0, OutputBuffer, 0, OutputBuffer.Length); + + OutputStream.Write(OutputBuffer, jj * BlockX * 4, DecompressedWidth * 4); + } + + BlockIndex++; + } + } + + return OutputStream.ToArray(); + } + } + } + + public static bool DecompressBlock( + byte[] InputBuffer, + int[] OutputBuffer, + int BlockWidth, + int BlockHeight) + { + BitArrayStream BitStream = new BitArrayStream(new BitArray(InputBuffer)); + TexelWeightParams TexelParams = DecodeBlockInfo(BitStream); + + if (TexelParams.Error) + { + throw new ASTCDecoderException("Invalid block mode"); + } + + if (TexelParams.VoidExtentLDR) + { + FillVoidExtentLDR(BitStream, OutputBuffer, BlockWidth, BlockHeight); + + return true; + } + + if (TexelParams.VoidExtentHDR) + { + throw new ASTCDecoderException("HDR void extent blocks are unsupported!"); + } + + if (TexelParams.Width > BlockWidth) + { + throw new ASTCDecoderException("Texel weight grid width should be smaller than block width"); + } + + if (TexelParams.Height > BlockHeight) + { + throw new ASTCDecoderException("Texel weight grid height should be smaller than block height"); + } + + // Read num partitions + int NumberPartitions = BitStream.ReadBits(2) + 1; + Debug.Assert(NumberPartitions <= 4); + + if (NumberPartitions == 4 && TexelParams.DualPlane) + { + throw new ASTCDecoderException("Dual plane mode is incompatible with four partition blocks"); + } + + // Based on the number of partitions, read the color endpoint mode for + // each partition. + + // Determine partitions, partition index, and color endpoint modes + int PlaneIndices = -1; + int PartitionIndex; + uint[] ColorEndpointMode = { 0, 0, 0, 0 }; + + BitArrayStream ColorEndpointStream = new BitArrayStream(new BitArray(16 * 8)); + + // Read extra config data... + uint BaseColorEndpointMode = 0; + + if (NumberPartitions == 1) + { + ColorEndpointMode[0] = (uint)BitStream.ReadBits(4); + PartitionIndex = 0; + } + else + { + PartitionIndex = BitStream.ReadBits(10); + BaseColorEndpointMode = (uint)BitStream.ReadBits(6); + } + + uint BaseMode = (BaseColorEndpointMode & 3); + + // Remaining bits are color endpoint data... + int NumberWeightBits = TexelParams.GetPackedBitSize(); + int RemainingBits = 128 - NumberWeightBits - BitStream.Position; + + // Consider extra bits prior to texel data... + uint ExtraColorEndpointModeBits = 0; + + if (BaseMode != 0) + { + switch (NumberPartitions) + { + case 2: ExtraColorEndpointModeBits += 2; break; + case 3: ExtraColorEndpointModeBits += 5; break; + case 4: ExtraColorEndpointModeBits += 8; break; + default: Debug.Assert(false); break; + } + } + + RemainingBits -= (int)ExtraColorEndpointModeBits; + + // Do we have a dual plane situation? + int PlaneSelectorBits = 0; + + if (TexelParams.DualPlane) + { + PlaneSelectorBits = 2; + } + + RemainingBits -= PlaneSelectorBits; + + // Read color data... + int ColorDataBits = RemainingBits; + + while (RemainingBits > 0) + { + int NumberBits = Math.Min(RemainingBits, 8); + int Bits = BitStream.ReadBits(NumberBits); + ColorEndpointStream.WriteBits(Bits, NumberBits); + RemainingBits -= 8; + } + + // Read the plane selection bits + PlaneIndices = BitStream.ReadBits(PlaneSelectorBits); + + // Read the rest of the CEM + if (BaseMode != 0) + { + uint ExtraColorEndpointMode = (uint)BitStream.ReadBits((int)ExtraColorEndpointModeBits); + uint TempColorEndpointMode = (ExtraColorEndpointMode << 6) | BaseColorEndpointMode; + TempColorEndpointMode >>= 2; + + bool[] C = new bool[4]; + + for (int i = 0; i < NumberPartitions; i++) + { + C[i] = (TempColorEndpointMode & 1) != 0; + TempColorEndpointMode >>= 1; + } + + byte[] M = new byte[4]; + + for (int i = 0; i < NumberPartitions; i++) + { + M[i] = (byte)(TempColorEndpointMode & 3); + TempColorEndpointMode >>= 2; + Debug.Assert(M[i] <= 3); + } + + for (int i = 0; i < NumberPartitions; i++) + { + ColorEndpointMode[i] = BaseMode; + if (!(C[i])) ColorEndpointMode[i] -= 1; + ColorEndpointMode[i] <<= 2; + ColorEndpointMode[i] |= M[i]; + } + } + else if (NumberPartitions > 1) + { + uint TempColorEndpointMode = BaseColorEndpointMode >> 2; + + for (uint i = 0; i < NumberPartitions; i++) + { + ColorEndpointMode[i] = TempColorEndpointMode; + } + } + + // Make sure everything up till here is sane. + for (int i = 0; i < NumberPartitions; i++) + { + Debug.Assert(ColorEndpointMode[i] < 16); + } + Debug.Assert(BitStream.Position + TexelParams.GetPackedBitSize() == 128); + + // Decode both color data and texel weight data + int[] ColorValues = new int[32]; // Four values * two endpoints * four maximum partitions + DecodeColorValues(ColorValues, ColorEndpointStream.ToByteArray(), ColorEndpointMode, NumberPartitions, ColorDataBits); + + ASTCPixel[][] EndPoints = new ASTCPixel[4][]; + EndPoints[0] = new ASTCPixel[2]; + EndPoints[1] = new ASTCPixel[2]; + EndPoints[2] = new ASTCPixel[2]; + EndPoints[3] = new ASTCPixel[2]; + + int ColorValuesPosition = 0; + + for (int i = 0; i < NumberPartitions; i++) + { + ComputeEndpoints(EndPoints[i], ColorValues, ColorEndpointMode[i], ref ColorValuesPosition); + } + + // Read the texel weight data. + byte[] TexelWeightData = (byte[])InputBuffer.Clone(); + + // Reverse everything + for (int i = 0; i < 8; i++) + { + byte a = ReverseByte(TexelWeightData[i]); + byte b = ReverseByte(TexelWeightData[15 - i]); + + TexelWeightData[i] = b; + TexelWeightData[15 - i] = a; + } + + // Make sure that higher non-texel bits are set to zero + int ClearByteStart = (TexelParams.GetPackedBitSize() >> 3) + 1; + TexelWeightData[ClearByteStart - 1] &= (byte)((1 << (TexelParams.GetPackedBitSize() % 8)) - 1); + + int cLen = 16 - ClearByteStart; + for (int i = ClearByteStart; i < ClearByteStart + cLen; i++) TexelWeightData[i] = 0; + + List<IntegerEncoded> TexelWeightValues = new List<IntegerEncoded>(); + BitArrayStream WeightBitStream = new BitArrayStream(new BitArray(TexelWeightData)); + + IntegerEncoded.DecodeIntegerSequence(TexelWeightValues, WeightBitStream, TexelParams.MaxWeight, TexelParams.GetNumWeightValues()); + + // Blocks can be at most 12x12, so we can have as many as 144 weights + int[][] Weights = new int[2][]; + Weights[0] = new int[144]; + Weights[1] = new int[144]; + + UnquantizeTexelWeights(Weights, TexelWeightValues, TexelParams, BlockWidth, BlockHeight); + + // Now that we have endpoints and weights, we can interpolate and generate + // the proper decoding... + for (int j = 0; j < BlockHeight; j++) + { + for (int i = 0; i < BlockWidth; i++) + { + int Partition = Select2DPartition(PartitionIndex, i, j, NumberPartitions, ((BlockHeight * BlockWidth) < 32)); + Debug.Assert(Partition < NumberPartitions); + + ASTCPixel Pixel = new ASTCPixel(0, 0, 0, 0); + for (int Component = 0; Component < 4; Component++) + { + int Component0 = EndPoints[Partition][0].GetComponent(Component); + Component0 = BitArrayStream.Replicate(Component0, 8, 16); + int Component1 = EndPoints[Partition][1].GetComponent(Component); + Component1 = BitArrayStream.Replicate(Component1, 8, 16); + + int Plane = 0; + + if (TexelParams.DualPlane && (((PlaneIndices + 1) & 3) == Component)) + { + Plane = 1; + } + + int Weight = Weights[Plane][j * BlockWidth + i]; + int FinalComponent = (Component0 * (64 - Weight) + Component1 * Weight + 32) / 64; + + if (FinalComponent == 65535) + { + Pixel.SetComponent(Component, 255); + } + else + { + double FinalComponentFloat = FinalComponent; + Pixel.SetComponent(Component, (int)(255.0 * (FinalComponentFloat / 65536.0) + 0.5)); + } + } + + OutputBuffer[j * BlockWidth + i] = Pixel.Pack(); + } + } + + return true; + } + + private static int Select2DPartition(int Seed, int X, int Y, int PartitionCount, bool IsSmallBlock) + { + return SelectPartition(Seed, X, Y, 0, PartitionCount, IsSmallBlock); + } + + private static int SelectPartition(int Seed, int X, int Y, int Z, int PartitionCount, bool IsSmallBlock) + { + if (PartitionCount == 1) + { + return 0; + } + + if (IsSmallBlock) + { + X <<= 1; + Y <<= 1; + Z <<= 1; + } + + Seed += (PartitionCount - 1) * 1024; + + int RightNum = Hash52((uint)Seed); + byte Seed01 = (byte)(RightNum & 0xF); + byte Seed02 = (byte)((RightNum >> 4) & 0xF); + byte Seed03 = (byte)((RightNum >> 8) & 0xF); + byte Seed04 = (byte)((RightNum >> 12) & 0xF); + byte Seed05 = (byte)((RightNum >> 16) & 0xF); + byte Seed06 = (byte)((RightNum >> 20) & 0xF); + byte Seed07 = (byte)((RightNum >> 24) & 0xF); + byte Seed08 = (byte)((RightNum >> 28) & 0xF); + byte Seed09 = (byte)((RightNum >> 18) & 0xF); + byte Seed10 = (byte)((RightNum >> 22) & 0xF); + byte Seed11 = (byte)((RightNum >> 26) & 0xF); + byte Seed12 = (byte)(((RightNum >> 30) | (RightNum << 2)) & 0xF); + + Seed01 *= Seed01; Seed02 *= Seed02; + Seed03 *= Seed03; Seed04 *= Seed04; + Seed05 *= Seed05; Seed06 *= Seed06; + Seed07 *= Seed07; Seed08 *= Seed08; + Seed09 *= Seed09; Seed10 *= Seed10; + Seed11 *= Seed11; Seed12 *= Seed12; + + int SeedHash1, SeedHash2, SeedHash3; + + if ((Seed & 1) != 0) + { + SeedHash1 = (Seed & 2) != 0 ? 4 : 5; + SeedHash2 = (PartitionCount == 3) ? 6 : 5; + } + else + { + SeedHash1 = (PartitionCount == 3) ? 6 : 5; + SeedHash2 = (Seed & 2) != 0 ? 4 : 5; + } + + SeedHash3 = (Seed & 0x10) != 0 ? SeedHash1 : SeedHash2; + + Seed01 >>= SeedHash1; Seed02 >>= SeedHash2; Seed03 >>= SeedHash1; Seed04 >>= SeedHash2; + Seed05 >>= SeedHash1; Seed06 >>= SeedHash2; Seed07 >>= SeedHash1; Seed08 >>= SeedHash2; + Seed09 >>= SeedHash3; Seed10 >>= SeedHash3; Seed11 >>= SeedHash3; Seed12 >>= SeedHash3; + + int a = Seed01 * X + Seed02 * Y + Seed11 * Z + (RightNum >> 14); + int b = Seed03 * X + Seed04 * Y + Seed12 * Z + (RightNum >> 10); + int c = Seed05 * X + Seed06 * Y + Seed09 * Z + (RightNum >> 6); + int d = Seed07 * X + Seed08 * Y + Seed10 * Z + (RightNum >> 2); + + a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F; + + if (PartitionCount < 4) d = 0; + if (PartitionCount < 3) c = 0; + + if (a >= b && a >= c && a >= d) return 0; + else if (b >= c && b >= d) return 1; + else if (c >= d) return 2; + return 3; + } + + static int Hash52(uint Val) + { + Val ^= Val >> 15; Val -= Val << 17; Val += Val << 7; Val += Val << 4; + Val ^= Val >> 5; Val += Val << 16; Val ^= Val >> 7; Val ^= Val >> 3; + Val ^= Val << 6; Val ^= Val >> 17; + + return (int)Val; + } + + static void UnquantizeTexelWeights( + int[][] OutputBuffer, + List<IntegerEncoded> Weights, + TexelWeightParams TexelParams, + int BlockWidth, + int BlockHeight) + { + int WeightIndices = 0; + int[][] Unquantized = new int[2][]; + Unquantized[0] = new int[144]; + Unquantized[1] = new int[144]; + + for (int i = 0; i < Weights.Count; i++) + { + Unquantized[0][WeightIndices] = UnquantizeTexelWeight(Weights[i]); + + if (TexelParams.DualPlane) + { + i++; + Unquantized[1][WeightIndices] = UnquantizeTexelWeight(Weights[i]); + + if (i == Weights.Count) + { + break; + } + } + + if (++WeightIndices >= (TexelParams.Width * TexelParams.Height)) break; + } + + // Do infill if necessary (Section C.2.18) ... + int Ds = (1024 + (BlockWidth / 2)) / (BlockWidth - 1); + int Dt = (1024 + (BlockHeight / 2)) / (BlockHeight - 1); + + int PlaneScale = TexelParams.DualPlane ? 2 : 1; + + for (int Plane = 0; Plane < PlaneScale; Plane++) + { + for (int t = 0; t < BlockHeight; t++) + { + for (int s = 0; s < BlockWidth; s++) + { + int cs = Ds * s; + int ct = Dt * t; + + int gs = (cs * (TexelParams.Width - 1) + 32) >> 6; + int gt = (ct * (TexelParams.Height - 1) + 32) >> 6; + + int js = gs >> 4; + int fs = gs & 0xF; + + int jt = gt >> 4; + int ft = gt & 0x0F; + + int w11 = (fs * ft + 8) >> 4; + int w10 = ft - w11; + int w01 = fs - w11; + int w00 = 16 - fs - ft + w11; + + int v0 = js + jt * TexelParams.Width; + + int p00 = 0; + int p01 = 0; + int p10 = 0; + int p11 = 0; + + if (v0 < (TexelParams.Width * TexelParams.Height)) + { + p00 = Unquantized[Plane][v0]; + } + + if (v0 + 1 < (TexelParams.Width * TexelParams.Height)) + { + p01 = Unquantized[Plane][v0 + 1]; + } + + if (v0 + TexelParams.Width < (TexelParams.Width * TexelParams.Height)) + { + p10 = Unquantized[Plane][v0 + TexelParams.Width]; + } + + if (v0 + TexelParams.Width + 1 < (TexelParams.Width * TexelParams.Height)) + { + p11 = Unquantized[Plane][v0 + TexelParams.Width + 1]; + } + + OutputBuffer[Plane][t * BlockWidth + s] = (p00 * w00 + p01 * w01 + p10 * w10 + p11 * w11 + 8) >> 4; + } + } + } + } + + static int UnquantizeTexelWeight(IntegerEncoded IntEncoded) + { + int BitValue = IntEncoded.BitValue; + int BitLength = IntEncoded.NumberBits; + + int A = BitArrayStream.Replicate(BitValue & 1, 1, 7); + int B = 0, C = 0, D = 0; + + int Result = 0; + + switch (IntEncoded.GetEncoding()) + { + case IntegerEncoded.EIntegerEncoding.JustBits: + Result = BitArrayStream.Replicate(BitValue, BitLength, 6); + break; + + case IntegerEncoded.EIntegerEncoding.Trit: + { + D = IntEncoded.TritValue; + Debug.Assert(D < 3); + + switch (BitLength) + { + case 0: + { + int[] Results = { 0, 32, 63 }; + Result = Results[D]; + + break; + } + + case 1: + { + C = 50; + break; + } + + case 2: + { + C = 23; + int b = (BitValue >> 1) & 1; + B = (b << 6) | (b << 2) | b; + + break; + } + + case 3: + { + C = 11; + int cb = (BitValue >> 1) & 3; + B = (cb << 5) | cb; + + break; + } + + default: + throw new ASTCDecoderException("Invalid trit encoding for texel weight"); + } + + break; + } + + case IntegerEncoded.EIntegerEncoding.Quint: + { + D = IntEncoded.QuintValue; + Debug.Assert(D < 5); + + switch (BitLength) + { + case 0: + { + int[] Results = { 0, 16, 32, 47, 63 }; + Result = Results[D]; + + break; + } + + case 1: + { + C = 28; + + break; + } + + case 2: + { + C = 13; + int b = (BitValue >> 1) & 1; + B = (b << 6) | (b << 1); + + break; + } + + default: + throw new ASTCDecoderException("Invalid quint encoding for texel weight"); + } + + break; + } + } + + if (IntEncoded.GetEncoding() != IntegerEncoded.EIntegerEncoding.JustBits && BitLength > 0) + { + // Decode the value... + Result = D * C + B; + Result ^= A; + Result = (A & 0x20) | (Result >> 2); + } + + Debug.Assert(Result < 64); + + // Change from [0,63] to [0,64] + if (Result > 32) + { + Result += 1; + } + + return Result; + } + + static byte ReverseByte(byte b) + { + // Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits + return (byte)((((b) * 0x80200802L) & 0x0884422110L) * 0x0101010101L >> 32); + } + + static uint[] ReadUintColorValues(int Number, int[] ColorValues, ref int ColorValuesPosition) + { + uint[] Ret = new uint[Number]; + + for (int i = 0; i < Number; i++) + { + Ret[i] = (uint)ColorValues[ColorValuesPosition++]; + } + + return Ret; + } + + static int[] ReadIntColorValues(int Number, int[] ColorValues, ref int ColorValuesPosition) + { + int[] Ret = new int[Number]; + + for (int i = 0; i < Number; i++) + { + Ret[i] = ColorValues[ColorValuesPosition++]; + } + + return Ret; + } + + static void ComputeEndpoints( + ASTCPixel[] EndPoints, + int[] ColorValues, + uint ColorEndpointMode, + ref int ColorValuesPosition) + { + switch (ColorEndpointMode) + { + case 0: + { + uint[] Val = ReadUintColorValues(2, ColorValues, ref ColorValuesPosition); + + EndPoints[0] = new ASTCPixel(0xFF, (short)Val[0], (short)Val[0], (short)Val[0]); + EndPoints[1] = new ASTCPixel(0xFF, (short)Val[1], (short)Val[1], (short)Val[1]); + + break; + } + + + case 1: + { + uint[] Val = ReadUintColorValues(2, ColorValues, ref ColorValuesPosition); + int L0 = (int)((Val[0] >> 2) | (Val[1] & 0xC0)); + int L1 = (int)Math.Max(L0 + (Val[1] & 0x3F), 0xFFU); + + EndPoints[0] = new ASTCPixel(0xFF, (short)L0, (short)L0, (short)L0); + EndPoints[1] = new ASTCPixel(0xFF, (short)L1, (short)L1, (short)L1); + + break; + } + + case 4: + { + uint[] Val = ReadUintColorValues(4, ColorValues, ref ColorValuesPosition); + + EndPoints[0] = new ASTCPixel((short)Val[2], (short)Val[0], (short)Val[0], (short)Val[0]); + EndPoints[1] = new ASTCPixel((short)Val[3], (short)Val[1], (short)Val[1], (short)Val[1]); + + break; + } + + case 5: + { + int[] Val = ReadIntColorValues(4, ColorValues, ref ColorValuesPosition); + + BitArrayStream.BitTransferSigned(ref Val[1], ref Val[0]); + BitArrayStream.BitTransferSigned(ref Val[3], ref Val[2]); + + EndPoints[0] = new ASTCPixel((short)Val[2], (short)Val[0], (short)Val[0], (short)Val[0]); + EndPoints[1] = new ASTCPixel((short)(Val[2] + Val[3]), (short)(Val[0] + Val[1]), (short)(Val[0] + Val[1]), (short)(Val[0] + Val[1])); + + EndPoints[0].ClampByte(); + EndPoints[1].ClampByte(); + + break; + } + + case 6: + { + uint[] Val = ReadUintColorValues(4, ColorValues, ref ColorValuesPosition); + + EndPoints[0] = new ASTCPixel(0xFF, (short)(Val[0] * Val[3] >> 8), (short)(Val[1] * Val[3] >> 8), (short)(Val[2] * Val[3] >> 8)); + EndPoints[1] = new ASTCPixel(0xFF, (short)Val[0], (short)Val[1], (short)Val[2]); + + break; + } + + case 8: + { + uint[] Val = ReadUintColorValues(6, ColorValues, ref ColorValuesPosition); + + if (Val[1] + Val[3] + Val[5] >= Val[0] + Val[2] + Val[4]) + { + EndPoints[0] = new ASTCPixel(0xFF, (short)Val[0], (short)Val[2], (short)Val[4]); + EndPoints[1] = new ASTCPixel(0xFF, (short)Val[1], (short)Val[3], (short)Val[5]); + } + else + { + EndPoints[0] = ASTCPixel.BlueContract(0xFF, (short)Val[1], (short)Val[3], (short)Val[5]); + EndPoints[1] = ASTCPixel.BlueContract(0xFF, (short)Val[0], (short)Val[2], (short)Val[4]); + } + + break; + } + + case 9: + { + int[] Val = ReadIntColorValues(6, ColorValues, ref ColorValuesPosition); + + BitArrayStream.BitTransferSigned(ref Val[1], ref Val[0]); + BitArrayStream.BitTransferSigned(ref Val[3], ref Val[2]); + BitArrayStream.BitTransferSigned(ref Val[5], ref Val[4]); + + if (Val[1] + Val[3] + Val[5] >= 0) + { + EndPoints[0] = new ASTCPixel(0xFF, (short)Val[0], (short)Val[2], (short)Val[4]); + EndPoints[1] = new ASTCPixel(0xFF, (short)(Val[0] + Val[1]), (short)(Val[2] + Val[3]), (short)(Val[4] + Val[5])); + } + else + { + EndPoints[0] = ASTCPixel.BlueContract(0xFF, Val[0] + Val[1], Val[2] + Val[3], Val[4] + Val[5]); + EndPoints[1] = ASTCPixel.BlueContract(0xFF, Val[0], Val[2], Val[4]); + } + + EndPoints[0].ClampByte(); + EndPoints[1].ClampByte(); + + break; + } + + case 10: + { + uint[] Val = ReadUintColorValues(6, ColorValues, ref ColorValuesPosition); + + EndPoints[0] = new ASTCPixel((short)Val[4], (short)(Val[0] * Val[3] >> 8), (short)(Val[1] * Val[3] >> 8), (short)(Val[2] * Val[3] >> 8)); + EndPoints[1] = new ASTCPixel((short)Val[5], (short)Val[0], (short)Val[1], (short)Val[2]); + + break; + } + + case 12: + { + uint[] Val = ReadUintColorValues(8, ColorValues, ref ColorValuesPosition); + + if (Val[1] + Val[3] + Val[5] >= Val[0] + Val[2] + Val[4]) + { + EndPoints[0] = new ASTCPixel((short)Val[6], (short)Val[0], (short)Val[2], (short)Val[4]); + EndPoints[1] = new ASTCPixel((short)Val[7], (short)Val[1], (short)Val[3], (short)Val[5]); + } + else + { + EndPoints[0] = ASTCPixel.BlueContract((short)Val[7], (short)Val[1], (short)Val[3], (short)Val[5]); + EndPoints[1] = ASTCPixel.BlueContract((short)Val[6], (short)Val[0], (short)Val[2], (short)Val[4]); + } + + break; + } + + case 13: + { + int[] Val = ReadIntColorValues(8, ColorValues, ref ColorValuesPosition); + + BitArrayStream.BitTransferSigned(ref Val[1], ref Val[0]); + BitArrayStream.BitTransferSigned(ref Val[3], ref Val[2]); + BitArrayStream.BitTransferSigned(ref Val[5], ref Val[4]); + BitArrayStream.BitTransferSigned(ref Val[7], ref Val[6]); + + if (Val[1] + Val[3] + Val[5] >= 0) + { + EndPoints[0] = new ASTCPixel((short)Val[6], (short)Val[0], (short)Val[2], (short)Val[4]); + EndPoints[1] = new ASTCPixel((short)(Val[7] + Val[6]), (short)(Val[0] + Val[1]), (short)(Val[2] + Val[3]), (short)(Val[4] + Val[5])); + } + else + { + EndPoints[0] = ASTCPixel.BlueContract(Val[6] + Val[7], Val[0] + Val[1], Val[2] + Val[3], Val[4] + Val[5]); + EndPoints[1] = ASTCPixel.BlueContract(Val[6], Val[0], Val[2], Val[4]); + } + + EndPoints[0].ClampByte(); + EndPoints[1].ClampByte(); + + break; + } + + default: + throw new ASTCDecoderException("Unsupported color endpoint mode (is it HDR?)"); + } + } + + static void DecodeColorValues( + int[] OutputValues, + byte[] InputData, + uint[] Modes, + int NumberPartitions, + int NumberBitsForColorData) + { + // First figure out how many color values we have + int NumberValues = 0; + + for (int i = 0; i < NumberPartitions; i++) + { + NumberValues += (int)((Modes[i] >> 2) + 1) << 1; + } + + // Then based on the number of values and the remaining number of bits, + // figure out the max value for each of them... + int Range = 256; + + while (--Range > 0) + { + IntegerEncoded IntEncoded = IntegerEncoded.CreateEncoding(Range); + int BitLength = IntEncoded.GetBitLength(NumberValues); + + if (BitLength <= NumberBitsForColorData) + { + // Find the smallest possible range that matches the given encoding + while (--Range > 0) + { + IntegerEncoded NewIntEncoded = IntegerEncoded.CreateEncoding(Range); + if (!NewIntEncoded.MatchesEncoding(IntEncoded)) + { + break; + } + } + + // Return to last matching range. + Range++; + break; + } + } + + // We now have enough to decode our integer sequence. + List<IntegerEncoded> IntegerEncodedSequence = new List<IntegerEncoded>(); + BitArrayStream ColorBitStream = new BitArrayStream(new BitArray(InputData)); + + IntegerEncoded.DecodeIntegerSequence(IntegerEncodedSequence, ColorBitStream, Range, NumberValues); + + // Once we have the decoded values, we need to dequantize them to the 0-255 range + // This procedure is outlined in ASTC spec C.2.13 + int OutputIndices = 0; + + foreach (IntegerEncoded IntEncoded in IntegerEncodedSequence) + { + int BitLength = IntEncoded.NumberBits; + int BitValue = IntEncoded.BitValue; + + Debug.Assert(BitLength >= 1); + + int A = 0, B = 0, C = 0, D = 0; + // A is just the lsb replicated 9 times. + A = BitArrayStream.Replicate(BitValue & 1, 1, 9); + + switch (IntEncoded.GetEncoding()) + { + case IntegerEncoded.EIntegerEncoding.JustBits: + { + OutputValues[OutputIndices++] = BitArrayStream.Replicate(BitValue, BitLength, 8); + + break; + } + + case IntegerEncoded.EIntegerEncoding.Trit: + { + D = IntEncoded.TritValue; + + switch (BitLength) + { + case 1: + { + C = 204; + + break; + } + + case 2: + { + C = 93; + // B = b000b0bb0 + int b = (BitValue >> 1) & 1; + B = (b << 8) | (b << 4) | (b << 2) | (b << 1); + + break; + } + + case 3: + { + C = 44; + // B = cb000cbcb + int cb = (BitValue >> 1) & 3; + B = (cb << 7) | (cb << 2) | cb; + + break; + } + + + case 4: + { + C = 22; + // B = dcb000dcb + int dcb = (BitValue >> 1) & 7; + B = (dcb << 6) | dcb; + + break; + } + + case 5: + { + C = 11; + // B = edcb000ed + int edcb = (BitValue >> 1) & 0xF; + B = (edcb << 5) | (edcb >> 2); + + break; + } + + case 6: + { + C = 5; + // B = fedcb000f + int fedcb = (BitValue >> 1) & 0x1F; + B = (fedcb << 4) | (fedcb >> 4); + + break; + } + + default: + throw new ASTCDecoderException("Unsupported trit encoding for color values!"); + } + + break; + } + + case IntegerEncoded.EIntegerEncoding.Quint: + { + D = IntEncoded.QuintValue; + + switch (BitLength) + { + case 1: + { + C = 113; + + break; + } + + case 2: + { + C = 54; + // B = b0000bb00 + int b = (BitValue >> 1) & 1; + B = (b << 8) | (b << 3) | (b << 2); + + break; + } + + case 3: + { + C = 26; + // B = cb0000cbc + int cb = (BitValue >> 1) & 3; + B = (cb << 7) | (cb << 1) | (cb >> 1); + + break; + } + + case 4: + { + C = 13; + // B = dcb0000dc + int dcb = (BitValue >> 1) & 7; + B = (dcb << 6) | (dcb >> 1); + + break; + } + + case 5: + { + C = 6; + // B = edcb0000e + int edcb = (BitValue >> 1) & 0xF; + B = (edcb << 5) | (edcb >> 3); + + break; + } + + default: + throw new ASTCDecoderException("Unsupported quint encoding for color values!"); + } + break; + } + } + + if (IntEncoded.GetEncoding() != IntegerEncoded.EIntegerEncoding.JustBits) + { + int T = D * C + B; + T ^= A; + T = (A & 0x80) | (T >> 2); + + OutputValues[OutputIndices++] = T; + } + } + + // Make sure that each of our values is in the proper range... + for (int i = 0; i < NumberValues; i++) + { + Debug.Assert(OutputValues[i] <= 255); + } + } + + static void FillVoidExtentLDR(BitArrayStream BitStream, int[] OutputBuffer, int BlockWidth, int BlockHeight) + { + // Don't actually care about the void extent, just read the bits... + for (int i = 0; i < 4; ++i) + { + BitStream.ReadBits(13); + } + + // Decode the RGBA components and renormalize them to the range [0, 255] + ushort R = (ushort)BitStream.ReadBits(16); + ushort G = (ushort)BitStream.ReadBits(16); + ushort B = (ushort)BitStream.ReadBits(16); + ushort A = (ushort)BitStream.ReadBits(16); + + int RGBA = (R >> 8) | (G & 0xFF00) | ((B) & 0xFF00) << 8 | ((A) & 0xFF00) << 16; + + for (int j = 0; j < BlockHeight; j++) + { + for (int i = 0; i < BlockWidth; i++) + { + OutputBuffer[j * BlockWidth + i] = RGBA; + } + } + } + + static TexelWeightParams DecodeBlockInfo(BitArrayStream BitStream) + { + TexelWeightParams TexelParams = new TexelWeightParams(); + + // Read the entire block mode all at once + ushort ModeBits = (ushort)BitStream.ReadBits(11); + + // Does this match the void extent block mode? + if ((ModeBits & 0x01FF) == 0x1FC) + { + if ((ModeBits & 0x200) != 0) + { + TexelParams.VoidExtentHDR = true; + } + else + { + TexelParams.VoidExtentLDR = true; + } + + // Next two bits must be one. + if ((ModeBits & 0x400) == 0 || BitStream.ReadBits(1) == 0) + { + TexelParams.Error = true; + } + + return TexelParams; + } + + // First check if the last four bits are zero + if ((ModeBits & 0xF) == 0) + { + TexelParams.Error = true; + return TexelParams; + } + + // If the last two bits are zero, then if bits + // [6-8] are all ones, this is also reserved. + if ((ModeBits & 0x3) == 0 && (ModeBits & 0x1C0) == 0x1C0) + { + TexelParams.Error = true; + + return TexelParams; + } + + // Otherwise, there is no error... Figure out the layout + // of the block mode. Layout is determined by a number + // between 0 and 9 corresponding to table C.2.8 of the + // ASTC spec. + int Layout = 0; + + if ((ModeBits & 0x1) != 0 || (ModeBits & 0x2) != 0) + { + // layout is in [0-4] + if ((ModeBits & 0x8) != 0) + { + // layout is in [2-4] + if ((ModeBits & 0x4) != 0) + { + // layout is in [3-4] + if ((ModeBits & 0x100) != 0) + { + Layout = 4; + } + else + { + Layout = 3; + } + } + else + { + Layout = 2; + } + } + else + { + // layout is in [0-1] + if ((ModeBits & 0x4) != 0) + { + Layout = 1; + } + else + { + Layout = 0; + } + } + } + else + { + // layout is in [5-9] + if ((ModeBits & 0x100) != 0) + { + // layout is in [7-9] + if ((ModeBits & 0x80) != 0) + { + // layout is in [7-8] + Debug.Assert((ModeBits & 0x40) == 0); + + if ((ModeBits & 0x20) != 0) + { + Layout = 8; + } + else + { + Layout = 7; + } + } + else + { + Layout = 9; + } + } + else + { + // layout is in [5-6] + if ((ModeBits & 0x80) != 0) + { + Layout = 6; + } + else + { + Layout = 5; + } + } + } + + Debug.Assert(Layout < 10); + + // Determine R + int R = (ModeBits >> 4) & 1; + if (Layout < 5) + { + R |= (ModeBits & 0x3) << 1; + } + else + { + R |= (ModeBits & 0xC) >> 1; + } + + Debug.Assert(2 <= R && R <= 7); + + // Determine width & height + switch (Layout) + { + case 0: + { + int A = (ModeBits >> 5) & 0x3; + int B = (ModeBits >> 7) & 0x3; + + TexelParams.Width = B + 4; + TexelParams.Height = A + 2; + + break; + } + + case 1: + { + int A = (ModeBits >> 5) & 0x3; + int B = (ModeBits >> 7) & 0x3; + + TexelParams.Width = B + 8; + TexelParams.Height = A + 2; + + break; + } + + case 2: + { + int A = (ModeBits >> 5) & 0x3; + int B = (ModeBits >> 7) & 0x3; + + TexelParams.Width = A + 2; + TexelParams.Height = B + 8; + + break; + } + + case 3: + { + int A = (ModeBits >> 5) & 0x3; + int B = (ModeBits >> 7) & 0x1; + + TexelParams.Width = A + 2; + TexelParams.Height = B + 6; + + break; + } + + case 4: + { + int A = (ModeBits >> 5) & 0x3; + int B = (ModeBits >> 7) & 0x1; + + TexelParams.Width = B + 2; + TexelParams.Height = A + 2; + + break; + } + + case 5: + { + int A = (ModeBits >> 5) & 0x3; + + TexelParams.Width = 12; + TexelParams.Height = A + 2; + + break; + } + + case 6: + { + int A = (ModeBits >> 5) & 0x3; + + TexelParams.Width = A + 2; + TexelParams.Height = 12; + + break; + } + + case 7: + { + TexelParams.Width = 6; + TexelParams.Height = 10; + + break; + } + + case 8: + { + TexelParams.Width = 10; + TexelParams.Height = 6; + break; + } + + case 9: + { + int A = (ModeBits >> 5) & 0x3; + int B = (ModeBits >> 9) & 0x3; + + TexelParams.Width = A + 6; + TexelParams.Height = B + 6; + + break; + } + + default: + //Don't know this layout... + TexelParams.Error = true; + break; + } + + // Determine whether or not we're using dual planes + // and/or high precision layouts. + bool D = ((Layout != 9) && ((ModeBits & 0x400) != 0)); + bool H = (Layout != 9) && ((ModeBits & 0x200) != 0); + + if (H) + { + int[] MaxWeights = { 9, 11, 15, 19, 23, 31 }; + TexelParams.MaxWeight = MaxWeights[R - 2]; + } + else + { + int[] MaxWeights = { 1, 2, 3, 4, 5, 7 }; + TexelParams.MaxWeight = MaxWeights[R - 2]; + } + + TexelParams.DualPlane = D; + + return TexelParams; + } + } +} diff --git a/Ryujinx.Graphics/Graphics3d/Texture/ASTCPixel.cs b/Ryujinx.Graphics/Graphics3d/Texture/ASTCPixel.cs new file mode 100644 index 00000000..c43eaf93 --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/Texture/ASTCPixel.cs @@ -0,0 +1,138 @@ +using System; +using System.Diagnostics; + +namespace Ryujinx.Graphics.Texture +{ + class ASTCPixel + { + public short R { get; set; } + public short G { get; set; } + public short B { get; set; } + public short A { get; set; } + + byte[] BitDepth = new byte[4]; + + public ASTCPixel(short _A, short _R, short _G, short _B) + { + A = _A; + R = _R; + G = _G; + B = _B; + + for (int i = 0; i < 4; i++) + BitDepth[i] = 8; + } + + public void ClampByte() + { + R = Math.Min(Math.Max(R, (short)0), (short)255); + G = Math.Min(Math.Max(G, (short)0), (short)255); + B = Math.Min(Math.Max(B, (short)0), (short)255); + A = Math.Min(Math.Max(A, (short)0), (short)255); + } + + public short GetComponent(int Index) + { + switch(Index) + { + case 0: return A; + case 1: return R; + case 2: return G; + case 3: return B; + } + + return 0; + } + + public void SetComponent(int Index, int Value) + { + switch (Index) + { + case 0: + A = (short)Value; + break; + case 1: + R = (short)Value; + break; + case 2: + G = (short)Value; + break; + case 3: + B = (short)Value; + break; + } + } + + public void ChangeBitDepth(byte[] Depth) + { + for(int i = 0; i< 4; i++) + { + int Value = ChangeBitDepth(GetComponent(i), BitDepth[i], Depth[i]); + + SetComponent(i, Value); + BitDepth[i] = Depth[i]; + } + } + + short ChangeBitDepth(short Value, byte OldDepth, byte NewDepth) + { + Debug.Assert(NewDepth <= 8); + Debug.Assert(OldDepth <= 8); + + if (OldDepth == NewDepth) + { + // Do nothing + return Value; + } + else if (OldDepth == 0 && NewDepth != 0) + { + return (short)((1 << NewDepth) - 1); + } + else if (NewDepth > OldDepth) + { + return (short)BitArrayStream.Replicate(Value, OldDepth, NewDepth); + } + else + { + // oldDepth > newDepth + if (NewDepth == 0) + { + return 0xFF; + } + else + { + byte BitsWasted = (byte)(OldDepth - NewDepth); + short TempValue = Value; + + TempValue = (short)((TempValue + (1 << (BitsWasted - 1))) >> BitsWasted); + TempValue = Math.Min(Math.Max((short)0, TempValue), (short)((1 << NewDepth) - 1)); + + return (byte)(TempValue); + } + } + } + + public int Pack() + { + ASTCPixel NewPixel = new ASTCPixel(A, R, G, B); + byte[] eightBitDepth = { 8, 8, 8, 8 }; + + NewPixel.ChangeBitDepth(eightBitDepth); + + return (byte)NewPixel.A << 24 | + (byte)NewPixel.B << 16 | + (byte)NewPixel.G << 8 | + (byte)NewPixel.R << 0; + } + + // Adds more precision to the blue channel as described + // in C.2.14 + public static ASTCPixel BlueContract(int a, int r, int g, int b) + { + return new ASTCPixel((short)(a), + (short)((r + b) >> 1), + (short)((g + b) >> 1), + (short)(b)); + } + } +} diff --git a/Ryujinx.Graphics/Graphics3d/Texture/BitArrayStream.cs b/Ryujinx.Graphics/Graphics3d/Texture/BitArrayStream.cs new file mode 100644 index 00000000..2a8ed091 --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/Texture/BitArrayStream.cs @@ -0,0 +1,121 @@ +using System; +using System.Collections; + +namespace Ryujinx.Graphics.Texture +{ + public class BitArrayStream + { + public BitArray BitsArray; + + public int Position { get; private set; } + + public BitArrayStream(BitArray BitArray) + { + BitsArray = BitArray; + Position = 0; + } + + public short ReadBits(int Length) + { + int RetValue = 0; + for (int i = Position; i < Position + Length; i++) + { + if (BitsArray[i]) + { + RetValue |= 1 << (i - Position); + } + } + + Position += Length; + return (short)RetValue; + } + + public int ReadBits(int Start, int End) + { + int RetValue = 0; + for (int i = Start; i <= End; i++) + { + if (BitsArray[i]) + { + RetValue |= 1 << (i - Start); + } + } + + return RetValue; + } + + public int ReadBit(int Index) + { + return Convert.ToInt32(BitsArray[Index]); + } + + public void WriteBits(int Value, int Length) + { + for (int i = Position; i < Position + Length; i++) + { + BitsArray[i] = ((Value >> (i - Position)) & 1) != 0; + } + + Position += Length; + } + + public byte[] ToByteArray() + { + byte[] RetArray = new byte[(BitsArray.Length + 7) / 8]; + BitsArray.CopyTo(RetArray, 0); + return RetArray; + } + + public static int Replicate(int Value, int NumberBits, int ToBit) + { + if (NumberBits == 0) return 0; + if (ToBit == 0) return 0; + + int TempValue = Value & ((1 << NumberBits) - 1); + int RetValue = TempValue; + int ResLength = NumberBits; + + while (ResLength < ToBit) + { + int Comp = 0; + if (NumberBits > ToBit - ResLength) + { + int NewShift = ToBit - ResLength; + Comp = NumberBits - NewShift; + NumberBits = NewShift; + } + RetValue <<= NumberBits; + RetValue |= TempValue >> Comp; + ResLength += NumberBits; + } + return RetValue; + } + + public static int PopCnt(int Number) + { + int Counter; + for (Counter = 0; Number != 0; Counter++) + { + Number &= Number - 1; + } + return Counter; + } + + public static void Swap<T>(ref T lhs, ref T rhs) + { + T Temp = lhs; + lhs = rhs; + rhs = Temp; + } + + // Transfers a bit as described in C.2.14 + public static void BitTransferSigned(ref int a, ref int b) + { + b >>= 1; + b |= a & 0x80; + a >>= 1; + a &= 0x3F; + if ((a & 0x20) != 0) a -= 0x40; + } + } +} diff --git a/Ryujinx.Graphics/Graphics3d/Texture/BlockLinearSwizzle.cs b/Ryujinx.Graphics/Graphics3d/Texture/BlockLinearSwizzle.cs new file mode 100644 index 00000000..9451291e --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/Texture/BlockLinearSwizzle.cs @@ -0,0 +1,59 @@ +using System; + +namespace Ryujinx.Graphics.Texture +{ + class BlockLinearSwizzle : ISwizzle + { + private int BhShift; + private int BppShift; + private int BhMask; + + private int XShift; + private int GobStride; + + public BlockLinearSwizzle(int Width, int Bpp, int BlockHeight = 16) + { + BhMask = (BlockHeight * 8) - 1; + + BhShift = CountLsbZeros(BlockHeight * 8); + BppShift = CountLsbZeros(Bpp); + + int WidthInGobs = (int)MathF.Ceiling(Width * Bpp / 64f); + + GobStride = 512 * BlockHeight * WidthInGobs; + + XShift = CountLsbZeros(512 * BlockHeight); + } + + private int CountLsbZeros(int Value) + { + int Count = 0; + + while (((Value >> Count) & 1) == 0) + { + Count++; + } + + return Count; + } + + public int GetSwizzleOffset(int X, int Y) + { + X <<= BppShift; + + int Position = (Y >> BhShift) * GobStride; + + Position += (X >> 6) << XShift; + + Position += ((Y & BhMask) >> 3) << 9; + + Position += ((X & 0x3f) >> 5) << 8; + Position += ((Y & 0x07) >> 1) << 6; + Position += ((X & 0x1f) >> 4) << 5; + Position += ((Y & 0x01) >> 0) << 4; + Position += ((X & 0x0f) >> 0) << 0; + + return Position; + } + } +}
\ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/Texture/ISwizzle.cs b/Ryujinx.Graphics/Graphics3d/Texture/ISwizzle.cs new file mode 100644 index 00000000..583fc20c --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/Texture/ISwizzle.cs @@ -0,0 +1,7 @@ +namespace Ryujinx.Graphics.Texture +{ + interface ISwizzle + { + int GetSwizzleOffset(int X, int Y); + } +}
\ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs b/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs new file mode 100644 index 00000000..479be5b1 --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs @@ -0,0 +1,445 @@ +using ChocolArm64.Memory; +using Ryujinx.Graphics.Gal; +using Ryujinx.Graphics.Memory; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Texture +{ + public static class ImageUtils + { + [Flags] + private enum TargetBuffer + { + Color = 1 << 0, + Depth = 1 << 1, + Stencil = 1 << 2, + + DepthStencil = Depth | Stencil + } + + private struct ImageDescriptor + { + public int BytesPerPixel { get; private set; } + public int BlockWidth { get; private set; } + public int BlockHeight { get; private set; } + + public TargetBuffer Target { get; private set; } + + public ImageDescriptor(int BytesPerPixel, int BlockWidth, int BlockHeight, TargetBuffer Target) + { + this.BytesPerPixel = BytesPerPixel; + this.BlockWidth = BlockWidth; + this.BlockHeight = BlockHeight; + this.Target = Target; + } + } + + private const GalImageFormat Snorm = GalImageFormat.Snorm; + private const GalImageFormat Unorm = GalImageFormat.Unorm; + private const GalImageFormat Sint = GalImageFormat.Sint; + private const GalImageFormat Uint = GalImageFormat.Uint; + private const GalImageFormat Float = GalImageFormat.Float; + private const GalImageFormat Srgb = GalImageFormat.Srgb; + + private static readonly Dictionary<GalTextureFormat, GalImageFormat> s_TextureTable = + new Dictionary<GalTextureFormat, GalImageFormat>() + { + { GalTextureFormat.RGBA32, GalImageFormat.RGBA32 | Sint | Uint | Float }, + { GalTextureFormat.RGBA16, GalImageFormat.RGBA16 | Snorm | Unorm | Sint | Uint | Float }, + { GalTextureFormat.RG32, GalImageFormat.RG32 | Sint | Uint | Float }, + { GalTextureFormat.RGBA8, GalImageFormat.RGBA8 | Snorm | Unorm | Sint | Uint | Srgb }, + { GalTextureFormat.RGB10A2, GalImageFormat.RGB10A2 | Snorm | Unorm | Sint | Uint }, + { GalTextureFormat.RG8, GalImageFormat.RG8 | Snorm | Unorm | Sint | Uint }, + { GalTextureFormat.R16, GalImageFormat.R16 | Snorm | Unorm | Sint | Uint | Float }, + { GalTextureFormat.R8, GalImageFormat.R8 | Snorm | Unorm | Sint | Uint }, + { GalTextureFormat.RG16, GalImageFormat.RG16 | Snorm | Unorm | Float }, + { GalTextureFormat.R32, GalImageFormat.R32 | Sint | Uint | Float }, + { GalTextureFormat.RGBA4, GalImageFormat.RGBA4 | Unorm }, + { GalTextureFormat.RGB5A1, GalImageFormat.RGB5A1 | Unorm }, + { GalTextureFormat.RGB565, GalImageFormat.RGB565 | Unorm }, + { GalTextureFormat.R11G11B10F, GalImageFormat.R11G11B10 | Float }, + { GalTextureFormat.D24S8, GalImageFormat.D24S8 | Unorm | Uint }, + { GalTextureFormat.D32F, GalImageFormat.D32 | Float }, + { GalTextureFormat.D32FX24S8, GalImageFormat.D32S8 | Float }, + { GalTextureFormat.D16, GalImageFormat.D16 | Unorm }, + + //Compressed formats + { GalTextureFormat.BptcSfloat, GalImageFormat.BptcSfloat | Float }, + { GalTextureFormat.BptcUfloat, GalImageFormat.BptcUfloat | Float }, + { GalTextureFormat.BptcUnorm, GalImageFormat.BptcUnorm | Unorm | Srgb }, + { GalTextureFormat.BC1, GalImageFormat.BC1 | Unorm | Srgb }, + { GalTextureFormat.BC2, GalImageFormat.BC2 | Unorm | Srgb }, + { GalTextureFormat.BC3, GalImageFormat.BC3 | Unorm | Srgb }, + { GalTextureFormat.BC4, GalImageFormat.BC4 | Unorm | Snorm }, + { GalTextureFormat.BC5, GalImageFormat.BC5 | Unorm | Snorm }, + { GalTextureFormat.Astc2D4x4, GalImageFormat.Astc2D4x4 | Unorm | Srgb }, + { GalTextureFormat.Astc2D5x5, GalImageFormat.Astc2D5x5 | Unorm | Srgb }, + { GalTextureFormat.Astc2D6x6, GalImageFormat.Astc2D6x6 | Unorm | Srgb }, + { GalTextureFormat.Astc2D8x8, GalImageFormat.Astc2D8x8 | Unorm | Srgb }, + { GalTextureFormat.Astc2D10x10, GalImageFormat.Astc2D10x10 | Unorm | Srgb }, + { GalTextureFormat.Astc2D12x12, GalImageFormat.Astc2D12x12 | Unorm | Srgb }, + { GalTextureFormat.Astc2D5x4, GalImageFormat.Astc2D5x4 | Unorm | Srgb }, + { GalTextureFormat.Astc2D6x5, GalImageFormat.Astc2D6x5 | Unorm | Srgb }, + { GalTextureFormat.Astc2D8x6, GalImageFormat.Astc2D8x6 | Unorm | Srgb }, + { GalTextureFormat.Astc2D10x8, GalImageFormat.Astc2D10x8 | Unorm | Srgb }, + { GalTextureFormat.Astc2D12x10, GalImageFormat.Astc2D12x10 | Unorm | Srgb }, + { GalTextureFormat.Astc2D8x5, GalImageFormat.Astc2D8x5 | Unorm | Srgb }, + { GalTextureFormat.Astc2D10x5, GalImageFormat.Astc2D10x5 | Unorm | Srgb }, + { GalTextureFormat.Astc2D10x6, GalImageFormat.Astc2D10x6 | Unorm | Srgb } + }; + + private static readonly Dictionary<GalImageFormat, ImageDescriptor> s_ImageTable = + new Dictionary<GalImageFormat, ImageDescriptor>() + { + { GalImageFormat.RGBA32, new ImageDescriptor(16, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.RGBA16, new ImageDescriptor(8, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.RG32, new ImageDescriptor(8, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.RGBX8, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.RGBA8, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.BGRA8, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.RGB10A2, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.R32, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.RGBA4, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.BptcSfloat, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) }, + { GalImageFormat.BptcUfloat, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) }, + { GalImageFormat.BGR5A1, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.RGB5A1, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.RGB565, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.BptcUnorm, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) }, + { GalImageFormat.RG16, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.RG8, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.R16, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.R8, new ImageDescriptor(1, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.R11G11B10, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.BC1, new ImageDescriptor(8, 4, 4, TargetBuffer.Color) }, + { GalImageFormat.BC2, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) }, + { GalImageFormat.BC3, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) }, + { GalImageFormat.BC4, new ImageDescriptor(8, 4, 4, TargetBuffer.Color) }, + { GalImageFormat.BC5, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) }, + { GalImageFormat.Astc2D4x4, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) }, + { GalImageFormat.Astc2D5x5, new ImageDescriptor(16, 5, 5, TargetBuffer.Color) }, + { GalImageFormat.Astc2D6x6, new ImageDescriptor(16, 6, 6, TargetBuffer.Color) }, + { GalImageFormat.Astc2D8x8, new ImageDescriptor(16, 8, 8, TargetBuffer.Color) }, + { GalImageFormat.Astc2D10x10, new ImageDescriptor(16, 10, 10, TargetBuffer.Color) }, + { GalImageFormat.Astc2D12x12, new ImageDescriptor(16, 12, 12, TargetBuffer.Color) }, + { GalImageFormat.Astc2D5x4, new ImageDescriptor(16, 5, 4, TargetBuffer.Color) }, + { GalImageFormat.Astc2D6x5, new ImageDescriptor(16, 6, 5, TargetBuffer.Color) }, + { GalImageFormat.Astc2D8x6, new ImageDescriptor(16, 8, 6, TargetBuffer.Color) }, + { GalImageFormat.Astc2D10x8, new ImageDescriptor(16, 10, 8, TargetBuffer.Color) }, + { GalImageFormat.Astc2D12x10, new ImageDescriptor(16, 12, 10, TargetBuffer.Color) }, + { GalImageFormat.Astc2D8x5, new ImageDescriptor(16, 8, 5, TargetBuffer.Color) }, + { GalImageFormat.Astc2D10x5, new ImageDescriptor(16, 10, 5, TargetBuffer.Color) }, + { GalImageFormat.Astc2D10x6, new ImageDescriptor(16, 10, 6, TargetBuffer.Color) }, + + { GalImageFormat.D16, new ImageDescriptor(2, 1, 1, TargetBuffer.Depth) }, + { GalImageFormat.D24, new ImageDescriptor(4, 1, 1, TargetBuffer.Depth) }, + { GalImageFormat.D24S8, new ImageDescriptor(4, 1, 1, TargetBuffer.DepthStencil) }, + { GalImageFormat.D32, new ImageDescriptor(4, 1, 1, TargetBuffer.Depth) }, + { GalImageFormat.D32S8, new ImageDescriptor(8, 1, 1, TargetBuffer.DepthStencil) } + }; + + public static GalImageFormat ConvertTexture( + GalTextureFormat Format, + GalTextureType RType, + GalTextureType GType, + GalTextureType BType, + GalTextureType AType, + bool ConvSrgb) + { + if (!s_TextureTable.TryGetValue(Format, out GalImageFormat ImageFormat)) + { + throw new NotImplementedException($"Format 0x{((int)Format):x} not implemented!"); + } + + if (!HasDepth(ImageFormat) && (RType != GType || RType != BType || RType != AType)) + { + throw new NotImplementedException($"Per component types are not implemented!"); + } + + GalImageFormat FormatType = ConvSrgb ? Srgb : GetFormatType(RType); + + GalImageFormat CombinedFormat = (ImageFormat & GalImageFormat.FormatMask) | FormatType; + + if (!ImageFormat.HasFlag(FormatType)) + { + throw new NotImplementedException($"Format \"{CombinedFormat}\" not implemented!"); + } + + return CombinedFormat; + } + + public static GalImageFormat ConvertSurface(GalSurfaceFormat Format) + { + switch (Format) + { + case GalSurfaceFormat.RGBA32Float: return GalImageFormat.RGBA32 | Float; + case GalSurfaceFormat.RGBA32Uint: return GalImageFormat.RGBA32 | Uint; + case GalSurfaceFormat.RGBA16Float: return GalImageFormat.RGBA16 | Float; + case GalSurfaceFormat.RGBA16Unorm: return GalImageFormat.RGBA16 | Unorm; + case GalSurfaceFormat.RG32Float: return GalImageFormat.RG32 | Float; + case GalSurfaceFormat.RG32Sint: return GalImageFormat.RG32 | Sint; + case GalSurfaceFormat.RG32Uint: return GalImageFormat.RG32 | Uint; + case GalSurfaceFormat.BGRA8Unorm: return GalImageFormat.BGRA8 | Unorm; + case GalSurfaceFormat.BGRA8Srgb: return GalImageFormat.BGRA8 | Srgb; + case GalSurfaceFormat.RGB10A2Unorm: return GalImageFormat.RGB10A2 | Unorm; + case GalSurfaceFormat.RGBA8Unorm: return GalImageFormat.RGBA8 | Unorm; + case GalSurfaceFormat.RGBA8Srgb: return GalImageFormat.RGBA8 | Srgb; + case GalSurfaceFormat.RGBA8Snorm: return GalImageFormat.RGBA8 | Snorm; + case GalSurfaceFormat.RG16Snorm: return GalImageFormat.RG16 | Snorm; + case GalSurfaceFormat.RG16Unorm: return GalImageFormat.RG16 | Unorm; + case GalSurfaceFormat.RG16Float: return GalImageFormat.RG16 | Float; + case GalSurfaceFormat.R11G11B10Float: return GalImageFormat.R11G11B10 | Float; + case GalSurfaceFormat.R32Float: return GalImageFormat.R32 | Float; + case GalSurfaceFormat.R32Uint: return GalImageFormat.R32 | Uint; + case GalSurfaceFormat.RG8Unorm: return GalImageFormat.RG8 | Unorm; + case GalSurfaceFormat.RG8Snorm: return GalImageFormat.RG8 | Snorm; + case GalSurfaceFormat.R16Float: return GalImageFormat.R16 | Float; + case GalSurfaceFormat.R16Unorm: return GalImageFormat.R16 | Unorm; + case GalSurfaceFormat.R16Uint: return GalImageFormat.R16 | Uint; + case GalSurfaceFormat.R8Unorm: return GalImageFormat.R8 | Unorm; + case GalSurfaceFormat.R8Uint: return GalImageFormat.R8 | Uint; + case GalSurfaceFormat.B5G6R5Unorm: return GalImageFormat.RGB565 | Unorm; + case GalSurfaceFormat.BGR5A1Unorm: return GalImageFormat.BGR5A1 | Unorm; + case GalSurfaceFormat.RGBX8Unorm: return GalImageFormat.RGBX8 | Unorm; + } + + throw new NotImplementedException(Format.ToString()); + } + + public static GalImageFormat ConvertZeta(GalZetaFormat Format) + { + switch (Format) + { + case GalZetaFormat.D32Float: return GalImageFormat.D32 | Float; + case GalZetaFormat.S8D24Unorm: return GalImageFormat.D24S8 | Unorm; + case GalZetaFormat.D16Unorm: return GalImageFormat.D16 | Unorm; + case GalZetaFormat.D24X8Unorm: return GalImageFormat.D24 | Unorm; + case GalZetaFormat.D24S8Unorm: return GalImageFormat.D24S8 | Unorm; + case GalZetaFormat.D32S8X24Float: return GalImageFormat.D32S8 | Float; + } + + throw new NotImplementedException(Format.ToString()); + } + + public static byte[] ReadTexture(IMemory Memory, GalImage Image, long Position) + { + MemoryManager CpuMemory; + + if (Memory is NvGpuVmm Vmm) + { + CpuMemory = Vmm.Memory; + } + else + { + CpuMemory = (MemoryManager)Memory; + } + + ISwizzle Swizzle = TextureHelper.GetSwizzle(Image); + + ImageDescriptor Desc = GetImageDescriptor(Image.Format); + + (int Width, int Height) = GetImageSizeInBlocks(Image); + + int BytesPerPixel = Desc.BytesPerPixel; + + //Note: Each row of the texture needs to be aligned to 4 bytes. + int Pitch = (Width * BytesPerPixel + 3) & ~3; + + byte[] Data = new byte[Height * Pitch]; + + for (int Y = 0; Y < Height; Y++) + { + int OutOffs = Y * Pitch; + + for (int X = 0; X < Width; X++) + { + long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y); + + CpuMemory.ReadBytes(Position + Offset, Data, OutOffs, BytesPerPixel); + + OutOffs += BytesPerPixel; + } + } + + return Data; + } + + public static void WriteTexture(NvGpuVmm Vmm, GalImage Image, long Position, byte[] Data) + { + ISwizzle Swizzle = TextureHelper.GetSwizzle(Image); + + ImageDescriptor Desc = GetImageDescriptor(Image.Format); + + (int Width, int Height) = ImageUtils.GetImageSizeInBlocks(Image); + + int BytesPerPixel = Desc.BytesPerPixel; + + int InOffs = 0; + + for (int Y = 0; Y < Height; Y++) + for (int X = 0; X < Width; X++) + { + long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y); + + Vmm.Memory.WriteBytes(Position + Offset, Data, InOffs, BytesPerPixel); + + InOffs += BytesPerPixel; + } + } + + public static bool CopyTexture( + NvGpuVmm Vmm, + GalImage SrcImage, + GalImage DstImage, + long SrcAddress, + long DstAddress, + int SrcX, + int SrcY, + int DstX, + int DstY, + int Width, + int Height) + { + ISwizzle SrcSwizzle = TextureHelper.GetSwizzle(SrcImage); + ISwizzle DstSwizzle = TextureHelper.GetSwizzle(DstImage); + + ImageDescriptor Desc = GetImageDescriptor(SrcImage.Format); + + if (GetImageDescriptor(DstImage.Format).BytesPerPixel != Desc.BytesPerPixel) + { + return false; + } + + int BytesPerPixel = Desc.BytesPerPixel; + + for (int Y = 0; Y < Height; Y++) + for (int X = 0; X < Width; X++) + { + long SrcOffset = (uint)SrcSwizzle.GetSwizzleOffset(SrcX + X, SrcY + Y); + long DstOffset = (uint)DstSwizzle.GetSwizzleOffset(DstX + X, DstY + Y); + + byte[] Texel = Vmm.ReadBytes(SrcAddress + SrcOffset, BytesPerPixel); + + Vmm.WriteBytes(DstAddress + DstOffset, Texel); + } + + return true; + } + + public static int GetSize(GalImage Image) + { + ImageDescriptor Desc = GetImageDescriptor(Image.Format); + + int Width = DivRoundUp(Image.Width, Desc.BlockWidth); + int Height = DivRoundUp(Image.Height, Desc.BlockHeight); + + return Desc.BytesPerPixel * Width * Height; + } + + public static int GetPitch(GalImageFormat Format, int Width) + { + ImageDescriptor Desc = GetImageDescriptor(Format); + + int Pitch = Desc.BytesPerPixel * DivRoundUp(Width, Desc.BlockWidth); + + Pitch = (Pitch + 0x1f) & ~0x1f; + + return Pitch; + } + + public static int GetBlockWidth(GalImageFormat Format) + { + return GetImageDescriptor(Format).BlockWidth; + } + + public static int GetBlockHeight(GalImageFormat Format) + { + return GetImageDescriptor(Format).BlockHeight; + } + + public static int GetAlignedWidth(GalImage Image) + { + ImageDescriptor Desc = GetImageDescriptor(Image.Format); + + int AlignMask; + + if (Image.Layout == GalMemoryLayout.BlockLinear) + { + AlignMask = Image.TileWidth * (64 / Desc.BytesPerPixel) - 1; + } + else + { + AlignMask = (32 / Desc.BytesPerPixel) - 1; + } + + return (Image.Width + AlignMask) & ~AlignMask; + } + + public static (int Width, int Height) GetImageSizeInBlocks(GalImage Image) + { + ImageDescriptor Desc = GetImageDescriptor(Image.Format); + + return (DivRoundUp(Image.Width, Desc.BlockWidth), + DivRoundUp(Image.Height, Desc.BlockHeight)); + } + + public static int GetBytesPerPixel(GalImageFormat Format) + { + return GetImageDescriptor(Format).BytesPerPixel; + } + + private static int DivRoundUp(int LHS, int RHS) + { + return (LHS + (RHS - 1)) / RHS; + } + + public static bool HasColor(GalImageFormat Format) + { + return (GetImageDescriptor(Format).Target & TargetBuffer.Color) != 0; + } + + public static bool HasDepth(GalImageFormat Format) + { + return (GetImageDescriptor(Format).Target & TargetBuffer.Depth) != 0; + } + + public static bool HasStencil(GalImageFormat Format) + { + return (GetImageDescriptor(Format).Target & TargetBuffer.Stencil) != 0; + } + + public static bool IsCompressed(GalImageFormat Format) + { + ImageDescriptor Desc = GetImageDescriptor(Format); + + return (Desc.BlockWidth | Desc.BlockHeight) != 1; + } + + private static ImageDescriptor GetImageDescriptor(GalImageFormat Format) + { + GalImageFormat PixelFormat = Format & GalImageFormat.FormatMask; + + if (s_ImageTable.TryGetValue(PixelFormat, out ImageDescriptor Descriptor)) + { + return Descriptor; + } + + throw new NotImplementedException($"Format \"{PixelFormat}\" not implemented!"); + } + + private static GalImageFormat GetFormatType(GalTextureType Type) + { + switch (Type) + { + case GalTextureType.Snorm: return Snorm; + case GalTextureType.Unorm: return Unorm; + case GalTextureType.Sint: return Sint; + case GalTextureType.Uint: return Uint; + case GalTextureType.Float: return Float; + + default: throw new NotImplementedException(((int)Type).ToString()); + } + } + } +} diff --git a/Ryujinx.Graphics/Graphics3d/Texture/IntegerEncoded.cs b/Ryujinx.Graphics/Graphics3d/Texture/IntegerEncoded.cs new file mode 100644 index 00000000..683cb770 --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/Texture/IntegerEncoded.cs @@ -0,0 +1,269 @@ +using System.Collections; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Texture +{ + public struct IntegerEncoded + { + public enum EIntegerEncoding + { + JustBits, + Quint, + Trit + } + + EIntegerEncoding Encoding; + public int NumberBits { get; private set; } + public int BitValue { get; private set; } + public int TritValue { get; private set; } + public int QuintValue { get; private set; } + + public IntegerEncoded(EIntegerEncoding _Encoding, int NumBits) + { + Encoding = _Encoding; + NumberBits = NumBits; + BitValue = 0; + TritValue = 0; + QuintValue = 0; + } + + public bool MatchesEncoding(IntegerEncoded Other) + { + return Encoding == Other.Encoding && NumberBits == Other.NumberBits; + } + + public EIntegerEncoding GetEncoding() + { + return Encoding; + } + + public int GetBitLength(int NumberVals) + { + int TotalBits = NumberBits * NumberVals; + if (Encoding == EIntegerEncoding.Trit) + { + TotalBits += (NumberVals * 8 + 4) / 5; + } + else if (Encoding == EIntegerEncoding.Quint) + { + TotalBits += (NumberVals * 7 + 2) / 3; + } + return TotalBits; + } + + public static IntegerEncoded CreateEncoding(int MaxVal) + { + while (MaxVal > 0) + { + int Check = MaxVal + 1; + + // Is maxVal a power of two? + if ((Check & (Check - 1)) == 0) + { + return new IntegerEncoded(EIntegerEncoding.JustBits, BitArrayStream.PopCnt(MaxVal)); + } + + // Is maxVal of the type 3*2^n - 1? + if ((Check % 3 == 0) && ((Check / 3) & ((Check / 3) - 1)) == 0) + { + return new IntegerEncoded(EIntegerEncoding.Trit, BitArrayStream.PopCnt(Check / 3 - 1)); + } + + // Is maxVal of the type 5*2^n - 1? + if ((Check % 5 == 0) && ((Check / 5) & ((Check / 5) - 1)) == 0) + { + return new IntegerEncoded(EIntegerEncoding.Quint, BitArrayStream.PopCnt(Check / 5 - 1)); + } + + // Apparently it can't be represented with a bounded integer sequence... + // just iterate. + MaxVal--; + } + + return new IntegerEncoded(EIntegerEncoding.JustBits, 0); + } + + public static void DecodeTritBlock( + BitArrayStream BitStream, + List<IntegerEncoded> ListIntegerEncoded, + int NumberBitsPerValue) + { + // Implement the algorithm in section C.2.12 + int[] m = new int[5]; + int[] t = new int[5]; + int T; + + // Read the trit encoded block according to + // table C.2.14 + m[0] = BitStream.ReadBits(NumberBitsPerValue); + T = BitStream.ReadBits(2); + m[1] = BitStream.ReadBits(NumberBitsPerValue); + T |= BitStream.ReadBits(2) << 2; + m[2] = BitStream.ReadBits(NumberBitsPerValue); + T |= BitStream.ReadBits(1) << 4; + m[3] = BitStream.ReadBits(NumberBitsPerValue); + T |= BitStream.ReadBits(2) << 5; + m[4] = BitStream.ReadBits(NumberBitsPerValue); + T |= BitStream.ReadBits(1) << 7; + + int C = 0; + + BitArrayStream Tb = new BitArrayStream(new BitArray(new int[] { T })); + if (Tb.ReadBits(2, 4) == 7) + { + C = (Tb.ReadBits(5, 7) << 2) | Tb.ReadBits(0, 1); + t[4] = t[3] = 2; + } + else + { + C = Tb.ReadBits(0, 4); + if (Tb.ReadBits(5, 6) == 3) + { + t[4] = 2; + t[3] = Tb.ReadBit(7); + } + else + { + t[4] = Tb.ReadBit(7); + t[3] = Tb.ReadBits(5, 6); + } + } + + BitArrayStream Cb = new BitArrayStream(new BitArray(new int[] { C })); + if (Cb.ReadBits(0, 1) == 3) + { + t[2] = 2; + t[1] = Cb.ReadBit(4); + t[0] = (Cb.ReadBit(3) << 1) | (Cb.ReadBit(2) & ~Cb.ReadBit(3)); + } + else if (Cb.ReadBits(2, 3) == 3) + { + t[2] = 2; + t[1] = 2; + t[0] = Cb.ReadBits(0, 1); + } + else + { + t[2] = Cb.ReadBit(4); + t[1] = Cb.ReadBits(2, 3); + t[0] = (Cb.ReadBit(1) << 1) | (Cb.ReadBit(0) & ~Cb.ReadBit(1)); + } + + for (int i = 0; i < 5; i++) + { + IntegerEncoded IntEncoded = new IntegerEncoded(EIntegerEncoding.Trit, NumberBitsPerValue) + { + BitValue = m[i], + TritValue = t[i] + }; + ListIntegerEncoded.Add(IntEncoded); + } + } + + public static void DecodeQuintBlock( + BitArrayStream BitStream, + List<IntegerEncoded> ListIntegerEncoded, + int NumberBitsPerValue) + { + // Implement the algorithm in section C.2.12 + int[] m = new int[3]; + int[] q = new int[3]; + int Q; + + // Read the trit encoded block according to + // table C.2.15 + m[0] = BitStream.ReadBits(NumberBitsPerValue); + Q = BitStream.ReadBits(3); + m[1] = BitStream.ReadBits(NumberBitsPerValue); + Q |= BitStream.ReadBits(2) << 3; + m[2] = BitStream.ReadBits(NumberBitsPerValue); + Q |= BitStream.ReadBits(2) << 5; + + BitArrayStream Qb = new BitArrayStream(new BitArray(new int[] { Q })); + if (Qb.ReadBits(1, 2) == 3 && Qb.ReadBits(5, 6) == 0) + { + q[0] = q[1] = 4; + q[2] = (Qb.ReadBit(0) << 2) | ((Qb.ReadBit(4) & ~Qb.ReadBit(0)) << 1) | (Qb.ReadBit(3) & ~Qb.ReadBit(0)); + } + else + { + int C = 0; + if (Qb.ReadBits(1, 2) == 3) + { + q[2] = 4; + C = (Qb.ReadBits(3, 4) << 3) | ((~Qb.ReadBits(5, 6) & 3) << 1) | Qb.ReadBit(0); + } + else + { + q[2] = Qb.ReadBits(5, 6); + C = Qb.ReadBits(0, 4); + } + + BitArrayStream Cb = new BitArrayStream(new BitArray(new int[] { C })); + if (Cb.ReadBits(0, 2) == 5) + { + q[1] = 4; + q[0] = Cb.ReadBits(3, 4); + } + else + { + q[1] = Cb.ReadBits(3, 4); + q[0] = Cb.ReadBits(0, 2); + } + } + + for (int i = 0; i < 3; i++) + { + IntegerEncoded IntEncoded = new IntegerEncoded(EIntegerEncoding.Quint, NumberBitsPerValue) + { + BitValue = m[i], + QuintValue = q[i] + }; + ListIntegerEncoded.Add(IntEncoded); + } + } + + public static void DecodeIntegerSequence( + List<IntegerEncoded> DecodeIntegerSequence, + BitArrayStream BitStream, + int MaxRange, + int NumberValues) + { + // Determine encoding parameters + IntegerEncoded IntEncoded = CreateEncoding(MaxRange); + + // Start decoding + int NumberValuesDecoded = 0; + while (NumberValuesDecoded < NumberValues) + { + switch (IntEncoded.GetEncoding()) + { + case EIntegerEncoding.Quint: + { + DecodeQuintBlock(BitStream, DecodeIntegerSequence, IntEncoded.NumberBits); + NumberValuesDecoded += 3; + + break; + } + + case EIntegerEncoding.Trit: + { + DecodeTritBlock(BitStream, DecodeIntegerSequence, IntEncoded.NumberBits); + NumberValuesDecoded += 5; + + break; + } + + case EIntegerEncoding.JustBits: + { + IntEncoded.BitValue = BitStream.ReadBits(IntEncoded.NumberBits); + DecodeIntegerSequence.Add(IntEncoded); + NumberValuesDecoded++; + + break; + } + } + } + } + } +} diff --git a/Ryujinx.Graphics/Graphics3d/Texture/LinearSwizzle.cs b/Ryujinx.Graphics/Graphics3d/Texture/LinearSwizzle.cs new file mode 100644 index 00000000..ef468e27 --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/Texture/LinearSwizzle.cs @@ -0,0 +1,19 @@ +namespace Ryujinx.Graphics.Texture +{ + class LinearSwizzle : ISwizzle + { + private int Pitch; + private int Bpp; + + public LinearSwizzle(int Pitch, int Bpp) + { + this.Pitch = Pitch; + this.Bpp = Bpp; + } + + public int GetSwizzleOffset(int X, int Y) + { + return X * Bpp + Y * Pitch; + } + } +}
\ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/Texture/TextureFactory.cs b/Ryujinx.Graphics/Graphics3d/Texture/TextureFactory.cs new file mode 100644 index 00000000..1f2d625e --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/Texture/TextureFactory.cs @@ -0,0 +1,117 @@ +using Ryujinx.Graphics.Gal; +using Ryujinx.Graphics.Memory; +using System; + +namespace Ryujinx.Graphics.Texture +{ + static class TextureFactory + { + public static GalImage MakeTexture(NvGpuVmm Vmm, long TicPosition) + { + int[] Tic = ReadWords(Vmm, TicPosition, 8); + + GalImageFormat Format = GetImageFormat(Tic); + + GalTextureSource XSource = (GalTextureSource)((Tic[0] >> 19) & 7); + GalTextureSource YSource = (GalTextureSource)((Tic[0] >> 22) & 7); + GalTextureSource ZSource = (GalTextureSource)((Tic[0] >> 25) & 7); + GalTextureSource WSource = (GalTextureSource)((Tic[0] >> 28) & 7); + + TextureSwizzle Swizzle = (TextureSwizzle)((Tic[2] >> 21) & 7); + + GalMemoryLayout Layout; + + if (Swizzle == TextureSwizzle.BlockLinear || + Swizzle == TextureSwizzle.BlockLinearColorKey) + { + Layout = GalMemoryLayout.BlockLinear; + } + else + { + Layout = GalMemoryLayout.Pitch; + } + + int BlockHeightLog2 = (Tic[3] >> 3) & 7; + int TileWidthLog2 = (Tic[3] >> 10) & 7; + + int BlockHeight = 1 << BlockHeightLog2; + int TileWidth = 1 << TileWidthLog2; + + int Width = (Tic[4] & 0xffff) + 1; + int Height = (Tic[5] & 0xffff) + 1; + + GalImage Image = new GalImage( + Width, + Height, + TileWidth, + BlockHeight, + Layout, + Format, + XSource, + YSource, + ZSource, + WSource); + + if (Layout == GalMemoryLayout.Pitch) + { + Image.Pitch = (Tic[3] & 0xffff) << 5; + } + + return Image; + } + + public static GalTextureSampler MakeSampler(NvGpu Gpu, NvGpuVmm Vmm, long TscPosition) + { + int[] Tsc = ReadWords(Vmm, TscPosition, 8); + + GalTextureWrap AddressU = (GalTextureWrap)((Tsc[0] >> 0) & 7); + GalTextureWrap AddressV = (GalTextureWrap)((Tsc[0] >> 3) & 7); + GalTextureWrap AddressP = (GalTextureWrap)((Tsc[0] >> 6) & 7); + + GalTextureFilter MagFilter = (GalTextureFilter) ((Tsc[1] >> 0) & 3); + GalTextureFilter MinFilter = (GalTextureFilter) ((Tsc[1] >> 4) & 3); + GalTextureMipFilter MipFilter = (GalTextureMipFilter)((Tsc[1] >> 6) & 3); + + GalColorF BorderColor = new GalColorF( + BitConverter.Int32BitsToSingle(Tsc[4]), + BitConverter.Int32BitsToSingle(Tsc[5]), + BitConverter.Int32BitsToSingle(Tsc[6]), + BitConverter.Int32BitsToSingle(Tsc[7])); + + return new GalTextureSampler( + AddressU, + AddressV, + AddressP, + MinFilter, + MagFilter, + MipFilter, + BorderColor); + } + + private static GalImageFormat GetImageFormat(int[] Tic) + { + GalTextureType RType = (GalTextureType)((Tic[0] >> 7) & 7); + GalTextureType GType = (GalTextureType)((Tic[0] >> 10) & 7); + GalTextureType BType = (GalTextureType)((Tic[0] >> 13) & 7); + GalTextureType AType = (GalTextureType)((Tic[0] >> 16) & 7); + + GalTextureFormat Format = (GalTextureFormat)(Tic[0] & 0x7f); + + bool ConvSrgb = ((Tic[4] >> 22) & 1) != 0; + + return ImageUtils.ConvertTexture(Format, RType, GType, BType, AType, ConvSrgb); + } + + private static int[] ReadWords(NvGpuVmm Vmm, long Position, int Count) + { + int[] Words = new int[Count]; + + for (int Index = 0; Index < Count; Index++, Position += 4) + { + Words[Index] = Vmm.ReadInt32(Position); + } + + return Words; + } + } +}
\ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs b/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs new file mode 100644 index 00000000..6ac91d8b --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs @@ -0,0 +1,42 @@ +using ChocolArm64.Memory; +using Ryujinx.Graphics.Gal; +using Ryujinx.Graphics.Memory; + +namespace Ryujinx.Graphics.Texture +{ + static class TextureHelper + { + public static ISwizzle GetSwizzle(GalImage Image) + { + int BlockWidth = ImageUtils.GetBlockWidth (Image.Format); + int BytesPerPixel = ImageUtils.GetBytesPerPixel(Image.Format); + + int Width = (Image.Width + (BlockWidth - 1)) / BlockWidth; + + if (Image.Layout == GalMemoryLayout.BlockLinear) + { + int AlignMask = Image.TileWidth * (64 / BytesPerPixel) - 1; + + Width = (Width + AlignMask) & ~AlignMask; + + return new BlockLinearSwizzle(Width, BytesPerPixel, Image.GobBlockHeight); + } + else + { + return new LinearSwizzle(Image.Pitch, BytesPerPixel); + } + } + + public static (MemoryManager Memory, long Position) GetMemoryAndPosition( + IMemory Memory, + long Position) + { + if (Memory is NvGpuVmm Vmm) + { + return (Vmm.Memory, Vmm.GetPhysicalAddress(Position)); + } + + return ((MemoryManager)Memory, Position); + } + } +} diff --git a/Ryujinx.Graphics/Graphics3d/Texture/TextureSwizzle.cs b/Ryujinx.Graphics/Graphics3d/Texture/TextureSwizzle.cs new file mode 100644 index 00000000..c67a5367 --- /dev/null +++ b/Ryujinx.Graphics/Graphics3d/Texture/TextureSwizzle.cs @@ -0,0 +1,11 @@ +namespace Ryujinx.Graphics.Texture +{ + public enum TextureSwizzle + { + _1dBuffer = 0, + PitchColorKey = 1, + Pitch = 2, + BlockLinear = 3, + BlockLinearColorKey = 4 + } +}
\ No newline at end of file |
