diff options
| author | gdkchan <gab.dark.100@gmail.com> | 2018-05-07 15:53:23 -0300 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-05-07 15:53:23 -0300 |
| commit | 34037701c708cb70bbf44dea71ee0912f7b4102b (patch) | |
| tree | ca4cf2bde85dea48af12033b8d0446f17b611f4f /Ryujinx.Core/Gpu | |
| parent | 4419e8d6b43432eae94a3a9304f7df22b34738a8 (diff) | |
NvServices refactoring (#120)
* Initial implementation of NvMap/NvHostCtrl
* More work on NvHostCtrl
* Refactoring of nvservices, move GPU Vmm, make Vmm per-process, refactor most gpu devices, move Gpu to Core, fix CbBind
* Implement GetGpuTime, support CancelSynchronization, fix issue on InsertWaitingMutex, proper double buffering support (again, not working properly for commercial games, only hb)
* Try to fix perf regression reading/writing textures, moved syncpts and events to a UserCtx class, delete global state when the process exits, other minor tweaks
* Remove now unused code, add comment about probably wrong result codes
Diffstat (limited to 'Ryujinx.Core/Gpu')
23 files changed, 2463 insertions, 0 deletions
diff --git a/Ryujinx.Core/Gpu/BlockLinearSwizzle.cs b/Ryujinx.Core/Gpu/BlockLinearSwizzle.cs new file mode 100644 index 00000000..eb41ea30 --- /dev/null +++ b/Ryujinx.Core/Gpu/BlockLinearSwizzle.cs @@ -0,0 +1,57 @@ +namespace Ryujinx.Core.Gpu +{ + class BlockLinearSwizzle : ISwizzle + { + private int BhShift; + private int BppShift; + private int BhMask; + + private int XShift; + private int GobStride; + + public BlockLinearSwizzle(int Width, int Bpp, int BlockHeight = 16) + { + BhMask = (BlockHeight * 8) - 1; + + BhShift = CountLsbZeros(BlockHeight * 8); + BppShift = CountLsbZeros(Bpp); + + int WidthInGobs = Width * Bpp / 64; + + GobStride = 512 * BlockHeight * WidthInGobs; + + XShift = CountLsbZeros(512 * BlockHeight); + } + + private int CountLsbZeros(int Value) + { + int Count = 0; + + while (((Value >> Count) & 1) == 0) + { + Count++; + } + + return Count; + } + + public int GetSwizzleOffset(int X, int Y) + { + X <<= BppShift; + + int Position = (Y >> BhShift) * GobStride; + + Position += (X >> 6) << XShift; + + Position += ((Y & BhMask) >> 3) << 9; + + Position += ((X & 0x3f) >> 5) << 8; + Position += ((Y & 0x07) >> 1) << 6; + Position += ((X & 0x1f) >> 4) << 5; + Position += ((Y & 0x01) >> 0) << 4; + Position += ((X & 0x0f) >> 0) << 0; + + return Position; + } + } +}
\ No newline at end of file diff --git a/Ryujinx.Core/Gpu/INvGpuEngine.cs b/Ryujinx.Core/Gpu/INvGpuEngine.cs new file mode 100644 index 00000000..865ea8ba --- /dev/null +++ b/Ryujinx.Core/Gpu/INvGpuEngine.cs @@ -0,0 +1,9 @@ +namespace Ryujinx.Core.Gpu +{ + interface INvGpuEngine + { + int[] Registers { get; } + + void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry); + } +}
\ No newline at end of file diff --git a/Ryujinx.Core/Gpu/ISwizzle.cs b/Ryujinx.Core/Gpu/ISwizzle.cs new file mode 100644 index 00000000..f475be6e --- /dev/null +++ b/Ryujinx.Core/Gpu/ISwizzle.cs @@ -0,0 +1,7 @@ +namespace Ryujinx.Core.Gpu +{ + interface ISwizzle + { + int GetSwizzleOffset(int X, int Y); + } +}
\ No newline at end of file diff --git a/Ryujinx.Core/Gpu/LinearSwizzle.cs b/Ryujinx.Core/Gpu/LinearSwizzle.cs new file mode 100644 index 00000000..5f8dfdde --- /dev/null +++ b/Ryujinx.Core/Gpu/LinearSwizzle.cs @@ -0,0 +1,19 @@ +namespace Ryujinx.Core.Gpu +{ + class LinearSwizzle : ISwizzle + { + private int Pitch; + private int Bpp; + + public LinearSwizzle(int Pitch, int Bpp) + { + this.Pitch = Pitch; + this.Bpp = Bpp; + } + + public int GetSwizzleOffset(int X, int Y) + { + return X * Bpp + Y * Pitch; + } + } +}
\ No newline at end of file diff --git a/Ryujinx.Core/Gpu/MacroInterpreter.cs b/Ryujinx.Core/Gpu/MacroInterpreter.cs new file mode 100644 index 00000000..b799f98f --- /dev/null +++ b/Ryujinx.Core/Gpu/MacroInterpreter.cs @@ -0,0 +1,419 @@ +using System; +using System.Collections.Generic; + +namespace Ryujinx.Core.Gpu +{ + class MacroInterpreter + { + private enum AssignmentOperation + { + IgnoreAndFetch = 0, + Move = 1, + MoveAndSetMaddr = 2, + FetchAndSend = 3, + MoveAndSend = 4, + FetchAndSetMaddr = 5, + MoveAndSetMaddrThenFetchAndSend = 6, + MoveAndSetMaddrThenSendHigh = 7 + } + + private enum AluOperation + { + AluReg = 0, + AddImmediate = 1, + BitfieldReplace = 2, + BitfieldExtractLslImm = 3, + BitfieldExtractLslReg = 4, + ReadImmediate = 5 + } + + private enum AluRegOperation + { + Add = 0, + AddWithCarry = 1, + Subtract = 2, + SubtractWithBorrow = 3, + BitwiseExclusiveOr = 8, + BitwiseOr = 9, + BitwiseAnd = 10, + BitwiseAndNot = 11, + BitwiseNotAnd = 12 + } + + private NvGpuFifo PFifo; + private INvGpuEngine Engine; + + public Queue<int> Fifo { get; private set; } + + private int[] Gprs; + + private int MethAddr; + private int MethIncr; + + private bool Carry; + + private int OpCode; + + private int PipeOp; + + private long Pc; + + public MacroInterpreter(NvGpuFifo PFifo, INvGpuEngine Engine) + { + this.PFifo = PFifo; + this.Engine = Engine; + + Fifo = new Queue<int>(); + + Gprs = new int[8]; + } + + public void Execute(NvGpuVmm Vmm, long Position, int Param) + { + Reset(); + + Gprs[1] = Param; + + Pc = Position; + + FetchOpCode(Vmm); + + while (Step(Vmm)); + + //Due to the delay slot, we still need to execute + //one more instruction before we actually exit. + Step(Vmm); + } + + private void Reset() + { + for (int Index = 0; Index < Gprs.Length; Index++) + { + Gprs[Index] = 0; + } + + MethAddr = 0; + MethIncr = 0; + + Carry = false; + } + + private bool Step(NvGpuVmm Vmm) + { + long BaseAddr = Pc - 4; + + FetchOpCode(Vmm); + + if ((OpCode & 7) < 7) + { + //Operation produces a value. + AssignmentOperation AsgOp = (AssignmentOperation)((OpCode >> 4) & 7); + + int Result = GetAluResult(); + + switch (AsgOp) + { + //Fetch parameter and ignore result. + case AssignmentOperation.IgnoreAndFetch: + { + SetDstGpr(FetchParam()); + + break; + } + + //Move result. + case AssignmentOperation.Move: + { + SetDstGpr(Result); + + break; + } + + //Move result and use as Method Address. + case AssignmentOperation.MoveAndSetMaddr: + { + SetDstGpr(Result); + + SetMethAddr(Result); + + break; + } + + //Fetch parameter and send result. + case AssignmentOperation.FetchAndSend: + { + SetDstGpr(FetchParam()); + + Send(Vmm, Result); + + break; + } + + //Move and send result. + case AssignmentOperation.MoveAndSend: + { + SetDstGpr(Result); + + Send(Vmm, Result); + + break; + } + + //Fetch parameter and use result as Method Address. + case AssignmentOperation.FetchAndSetMaddr: + { + SetDstGpr(FetchParam()); + + SetMethAddr(Result); + + break; + } + + //Move result and use as Method Address, then fetch and send paramter. + case AssignmentOperation.MoveAndSetMaddrThenFetchAndSend: + { + SetDstGpr(Result); + + SetMethAddr(Result); + + Send(Vmm, FetchParam()); + + break; + } + + //Move result and use as Method Address, then send bits 17:12 of result. + case AssignmentOperation.MoveAndSetMaddrThenSendHigh: + { + SetDstGpr(Result); + + SetMethAddr(Result); + + Send(Vmm, (Result >> 12) & 0x3f); + + break; + } + } + } + else + { + //Branch. + bool OnNotZero = ((OpCode >> 4) & 1) != 0; + + bool Taken = OnNotZero + ? GetGprA() != 0 + : GetGprA() == 0; + + if (Taken) + { + Pc = BaseAddr + (GetImm() << 2); + + bool NoDelays = (OpCode & 0x20) != 0; + + if (NoDelays) + { + FetchOpCode(Vmm); + } + + return true; + } + } + + bool Exit = (OpCode & 0x80) != 0; + + return !Exit; + } + + private void FetchOpCode(NvGpuVmm Vmm) + { + OpCode = PipeOp; + + PipeOp = Vmm.ReadInt32(Pc); + + Pc += 4; + } + + private int GetAluResult() + { + AluOperation Op = (AluOperation)(OpCode & 7); + + switch (Op) + { + case AluOperation.AluReg: + { + AluRegOperation AluOp = (AluRegOperation)((OpCode >> 17) & 0x1f); + + return GetAluResult(AluOp, GetGprA(), GetGprB()); + } + + case AluOperation.AddImmediate: + { + return GetGprA() + GetImm(); + } + + case AluOperation.BitfieldReplace: + case AluOperation.BitfieldExtractLslImm: + case AluOperation.BitfieldExtractLslReg: + { + int BfSrcBit = (OpCode >> 17) & 0x1f; + int BfSize = (OpCode >> 22) & 0x1f; + int BfDstBit = (OpCode >> 27) & 0x1f; + + int BfMask = (1 << BfSize) - 1; + + int Dst = GetGprA(); + int Src = GetGprB(); + + switch (Op) + { + case AluOperation.BitfieldReplace: + { + Src = (int)((uint)Src >> BfSrcBit) & BfMask; + + Dst &= ~(BfMask << BfDstBit); + + Dst |= Src << BfDstBit; + + return Dst; + } + + case AluOperation.BitfieldExtractLslImm: + { + Src = (int)((uint)Src >> Dst) & BfMask; + + return Src << BfDstBit; + } + + case AluOperation.BitfieldExtractLslReg: + { + Src = (int)((uint)Src >> BfSrcBit) & BfMask; + + return Src << Dst; + } + } + + break; + } + + case AluOperation.ReadImmediate: + { + return Read(GetGprA() + GetImm()); + } + } + + throw new ArgumentException(nameof(OpCode)); + } + + private int GetAluResult(AluRegOperation AluOp, int A, int B) + { + switch (AluOp) + { + case AluRegOperation.Add: + { + ulong Result = (ulong)A + (ulong)B; + + Carry = Result > 0xffffffff; + + return (int)Result; + } + + case AluRegOperation.AddWithCarry: + { + ulong Result = (ulong)A + (ulong)B + (Carry ? 1UL : 0UL); + + Carry = Result > 0xffffffff; + + return (int)Result; + } + + case AluRegOperation.Subtract: + { + ulong Result = (ulong)A - (ulong)B; + + Carry = Result < 0x100000000; + + return (int)Result; + } + + case AluRegOperation.SubtractWithBorrow: + { + ulong Result = (ulong)A - (ulong)B - (Carry ? 0UL : 1UL); + + Carry = Result < 0x100000000; + + return (int)Result; + } + + case AluRegOperation.BitwiseExclusiveOr: return A ^ B; + case AluRegOperation.BitwiseOr: return A | B; + case AluRegOperation.BitwiseAnd: return A & B; + case AluRegOperation.BitwiseAndNot: return A & ~B; + case AluRegOperation.BitwiseNotAnd: return ~(A & B); + } + + throw new ArgumentOutOfRangeException(nameof(AluOp)); + } + + private int GetImm() + { + //Note: The immediate is signed, the sign-extension is intended here. + return OpCode >> 14; + } + + private void SetMethAddr(int Value) + { + MethAddr = (Value >> 0) & 0xfff; + MethIncr = (Value >> 12) & 0x3f; + } + + private void SetDstGpr(int Value) + { + Gprs[(OpCode >> 8) & 7] = Value; + } + + private int GetGprA() + { + return GetGprValue((OpCode >> 11) & 7); + } + + private int GetGprB() + { + return GetGprValue((OpCode >> 14) & 7); + } + + private int GetGprValue(int Index) + { + return Index != 0 ? Gprs[Index] : 0; + } + + private int FetchParam() + { + int Value; + + //If we don't have any parameters in the FIFO, + //keep running the PFIFO engine until it writes the parameters. + while (!Fifo.TryDequeue(out Value)) + { + if (!PFifo.Step()) + { + return 0; + } + } + + return Value; + } + + private int Read(int Reg) + { + return Engine.Registers[Reg]; + } + + private void Send(NvGpuVmm Vmm, int Value) + { + NvGpuPBEntry PBEntry = new NvGpuPBEntry(MethAddr, 0, Value); + + Engine.CallMethod(Vmm, PBEntry); + + MethAddr += MethIncr; + } + } +}
\ No newline at end of file diff --git a/Ryujinx.Core/Gpu/NvGpu.cs b/Ryujinx.Core/Gpu/NvGpu.cs new file mode 100644 index 00000000..71df76ff --- /dev/null +++ b/Ryujinx.Core/Gpu/NvGpu.cs @@ -0,0 +1,45 @@ +using Ryujinx.Graphics.Gal; +using System.Threading; + +namespace Ryujinx.Core.Gpu +{ + public class NvGpu + { + public IGalRenderer Renderer { get; private set; } + + public NvGpuFifo Fifo { get; private set; } + + public NvGpuEngine2d Engine2d { get; private set; } + public NvGpuEngine3d Engine3d { get; private set; } + + private Thread FifoProcessing; + + private bool KeepRunning; + + public NvGpu(IGalRenderer Renderer) + { + this.Renderer = Renderer; + + Fifo = new NvGpuFifo(this); + + Engine2d = new NvGpuEngine2d(this); + Engine3d = new NvGpuEngine3d(this); + + KeepRunning = true; + + FifoProcessing = new Thread(ProcessFifo); + + FifoProcessing.Start(); + } + + private void ProcessFifo() + { + while (KeepRunning) + { + Fifo.DispatchCalls(); + + Thread.Yield(); + } + } + } +}
\ No newline at end of file diff --git a/Ryujinx.Core/Gpu/NvGpuEngine.cs b/Ryujinx.Core/Gpu/NvGpuEngine.cs new file mode 100644 index 00000000..ee0420f7 --- /dev/null +++ b/Ryujinx.Core/Gpu/NvGpuEngine.cs @@ -0,0 +1,11 @@ +namespace Ryujinx.Core.Gpu +{ + enum NvGpuEngine + { + _2d = 0x902d, + _3d = 0xb197, + Compute = 0xb1c0, + Kepler = 0xa140, + Dma = 0xb0b5 + } +}
\ No newline at end of file diff --git a/Ryujinx.Core/Gpu/NvGpuEngine2d.cs b/Ryujinx.Core/Gpu/NvGpuEngine2d.cs new file mode 100644 index 00000000..88395b7a --- /dev/null +++ b/Ryujinx.Core/Gpu/NvGpuEngine2d.cs @@ -0,0 +1,148 @@ +using Ryujinx.Graphics.Gal; +using System.Collections.Generic; + +namespace Ryujinx.Core.Gpu +{ + public class NvGpuEngine2d : INvGpuEngine + { + private enum CopyOperation + { + SrcCopyAnd, + RopAnd, + Blend, + SrcCopy, + Rop, + SrcCopyPremult, + BlendPremult + } + + public int[] Registers { get; private set; } + + private NvGpu Gpu; + + private Dictionary<int, NvGpuMethod> Methods; + + public NvGpuEngine2d(NvGpu Gpu) + { + this.Gpu = Gpu; + + Registers = new int[0xe00]; + + Methods = new Dictionary<int, NvGpuMethod>(); + + void AddMethod(int Meth, int Count, int Stride, NvGpuMethod Method) + { + while (Count-- > 0) + { + Methods.Add(Meth, Method); + + Meth += Stride; + } + } + + AddMethod(0xb5, 1, 1, TextureCopy); + } + + public void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry) + { + if (Methods.TryGetValue(PBEntry.Method, out NvGpuMethod Method)) + { + Method(Vmm, PBEntry); + } + else + { + WriteRegister(PBEntry); + } + } + + private void TextureCopy(NvGpuVmm Vmm, NvGpuPBEntry PBEntry) + { + CopyOperation Operation = (CopyOperation)ReadRegister(NvGpuEngine2dReg.CopyOperation); + + bool SrcLinear = ReadRegister(NvGpuEngine2dReg.SrcLinear) != 0; + int SrcWidth = ReadRegister(NvGpuEngine2dReg.SrcWidth); + int SrcHeight = ReadRegister(NvGpuEngine2dReg.SrcHeight); + + bool DstLinear = ReadRegister(NvGpuEngine2dReg.DstLinear) != 0; + int DstWidth = ReadRegister(NvGpuEngine2dReg.DstWidth); + int DstHeight = ReadRegister(NvGpuEngine2dReg.DstHeight); + int DstPitch = ReadRegister(NvGpuEngine2dReg.DstPitch); + int DstBlkDim = ReadRegister(NvGpuEngine2dReg.DstBlockDimensions); + + TextureSwizzle DstSwizzle = DstLinear + ? TextureSwizzle.Pitch + : TextureSwizzle.BlockLinear; + + int DstBlockHeight = 1 << ((DstBlkDim >> 4) & 0xf); + + long Tag = Vmm.GetPhysicalAddress(MakeInt64From2xInt32(NvGpuEngine2dReg.SrcAddress)); + + long SrcAddress = MakeInt64From2xInt32(NvGpuEngine2dReg.SrcAddress); + long DstAddress = MakeInt64From2xInt32(NvGpuEngine2dReg.DstAddress); + + bool IsFbTexture = Gpu.Engine3d.IsFrameBufferPosition(Tag); + + if (IsFbTexture && DstLinear) + { + DstSwizzle = TextureSwizzle.BlockLinear; + } + + Texture DstTexture = new Texture( + DstAddress, + DstWidth, + DstHeight, + DstBlockHeight, + DstBlockHeight, + DstSwizzle, + GalTextureFormat.A8B8G8R8); + + if (IsFbTexture) + { + Gpu.Renderer.GetFrameBufferData(Tag, (byte[] Buffer) => + { + CopyTexture(Vmm, DstTexture, Buffer); + }); + } + else + { + long Size = SrcWidth * SrcHeight * 4; + + byte[] Buffer = Vmm.ReadBytes(SrcAddress, Size); + + CopyTexture(Vmm, DstTexture, Buffer); + } + } + + private void CopyTexture(NvGpuVmm Vmm, Texture Texture, byte[] Buffer) + { + TextureWriter.Write(Vmm, Texture, Buffer); + } + + private long MakeInt64From2xInt32(NvGpuEngine2dReg Reg) + { + return + (long)Registers[(int)Reg + 0] << 32 | + (uint)Registers[(int)Reg + 1]; + } + + private void WriteRegister(NvGpuPBEntry PBEntry) + { + int ArgsCount = PBEntry.Arguments.Count; + + if (ArgsCount > 0) + { + Registers[PBEntry.Method] = PBEntry.Arguments[ArgsCount - 1]; + } + } + + private int ReadRegister(NvGpuEngine2dReg Reg) + { + return Registers[(int)Reg]; + } + + private void WriteRegister(NvGpuEngine2dReg Reg, int Value) + { + Registers[(int)Reg] = Value; + } + } +}
\ No newline at end of file diff --git a/Ryujinx.Core/Gpu/NvGpuEngine2dReg.cs b/Ryujinx.Core/Gpu/NvGpuEngine2dReg.cs new file mode 100644 index 00000000..b4abad00 --- /dev/null +++ b/Ryujinx.Core/Gpu/NvGpuEngine2dReg.cs @@ -0,0 +1,25 @@ +namespace Ryujinx.Core.Gpu +{ + enum NvGpuEngine2dReg + { + DstFormat = 0x80, + DstLinear = 0x81, + DstBlockDimensions = 0x82, + DstDepth = 0x83, + DstLayer = 0x84, + DstPitch = 0x85, + DstWidth = 0x86, + DstHeight = 0x87, + DstAddress = 0x88, + SrcFormat = 0x8c, + SrcLinear = 0x8d, + SrcBlockDimensions = 0x8e, + SrcDepth = 0x8f, + SrcLayer = 0x90, + SrcPitch = 0x91, + SrcWidth = 0x92, + SrcHeight = 0x93, + SrcAddress = 0x94, + CopyOperation = 0xab + } +}
\ No newline at end of file diff --git a/Ryujinx.Core/Gpu/NvGpuEngine3d.cs b/Ryujinx.Core/Gpu/NvGpuEngine3d.cs new file mode 100644 index 00000000..b08b9496 --- /dev/null +++ b/Ryujinx.Core/Gpu/NvGpuEngine3d.cs @@ -0,0 +1,558 @@ +using Ryujinx.Graphics.Gal; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Core.Gpu +{ + public class NvGpuEngine3d : INvGpuEngine + { + public int[] Registers { get; private set; } + + private NvGpu Gpu; + + private Dictionary<int, NvGpuMethod> Methods; + + private struct ConstBuffer + { + public bool Enabled; + public long Position; + public int Size; + } + + private ConstBuffer[][] ConstBuffers; + + private HashSet<long> FrameBuffers; + + public NvGpuEngine3d(NvGpu Gpu) + { + this.Gpu = Gpu; + + Registers = new int[0xe00]; + + Methods = new Dictionary<int, NvGpuMethod>(); + + void AddMethod(int Meth, int Count, int Stride, NvGpuMethod Method) + { + while (Count-- > 0) + { + Methods.Add(Meth, Method); + + Meth += Stride; + } + } + + AddMethod(0x585, 1, 1, VertexEndGl); + AddMethod(0x674, 1, 1, ClearBuffers); + AddMethod(0x6c3, 1, 1, QueryControl); + AddMethod(0x8e4, 16, 1, CbData); + AddMethod(0x904, 5, 8, CbBind); + + ConstBuffers = new ConstBuffer[6][]; + + for (int Index = 0; Index < ConstBuffers.Length; Index++) + { + ConstBuffers[Index] = new ConstBuffer[18]; + } + + FrameBuffers = new HashSet<long>(); + } + + public void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry) + { + if (Methods.TryGetValue(PBEntry.Method, out NvGpuMethod Method)) + { + Method(Vmm, PBEntry); + } + else + { + WriteRegister(PBEntry); + } + } + + private void VertexEndGl(NvGpuVmm Vmm, NvGpuPBEntry PBEntry) + { + SetFrameBuffer(Vmm, 0); + + long[] Tags = UploadShaders(Vmm); + + Gpu.Renderer.BindProgram(); + + SetAlphaBlending(); + + UploadTextures(Vmm, Tags); + UploadUniforms(Vmm); + UploadVertexArrays(Vmm); + } + + private void ClearBuffers(NvGpuVmm Vmm, NvGpuPBEntry PBEntry) + { + int Arg0 = PBEntry.Arguments[0]; + + int FbIndex = (Arg0 >> 6) & 0xf; + + int Layer = (Arg0 >> 10) & 0x3ff; + + GalClearBufferFlags Flags = (GalClearBufferFlags)(Arg0 & 0x3f); + + SetFrameBuffer(Vmm, 0); + + //TODO: Enable this once the frame buffer problems are fixed. + //Gpu.Renderer.ClearBuffers(Layer, Flags); + } + + private void SetFrameBuffer(NvGpuVmm Vmm, int FbIndex) + { + long VA = MakeInt64From2xInt32(NvGpuEngine3dReg.FrameBufferNAddress + FbIndex * 0x10); + + long PA = Vmm.GetPhysicalAddress(VA); + + FrameBuffers.Add(PA); + + int Width = ReadRegister(NvGpuEngine3dReg.FrameBufferNWidth + FbIndex * 0x10); + int Height = ReadRegister(NvGpuEngine3dReg.FrameBufferNHeight + FbIndex * 0x10); + + //Note: Using the Width/Height results seems to give incorrect results. + //Maybe the size of all frame buffers is hardcoded to screen size? This seems unlikely. + Gpu.Renderer.CreateFrameBuffer(PA, 1280, 720); + Gpu.Renderer.BindFrameBuffer(PA); + } + + private long[] UploadShaders(NvGpuVmm Vmm) + { + long[] Tags = new long[5]; + + long BasePosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress); + + for (int Index = 0; Index < 6; Index++) + { + int Control = ReadRegister(NvGpuEngine3dReg.ShaderNControl + Index * 0x10); + int Offset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset + Index * 0x10); + + //Note: Vertex Program (B) is always enabled. + bool Enable = (Control & 1) != 0 || Index == 1; + + if (!Enable) + { + continue; + } + + long Tag = BasePosition + (uint)Offset; + + //TODO: Find a better way to calculate the size. + int Size = 0x20000; + + byte[] Code = Vmm.ReadBytes(Tag, Size); + + GalShaderType ShaderType = GetTypeFromProgram(Index); + + Tags[(int)ShaderType] = Tag; + + Gpu.Renderer.CreateShader(Tag, ShaderType, Code); + Gpu.Renderer.BindShader(Tag); + } + + int RawSX = ReadRegister(NvGpuEngine3dReg.ViewportScaleX); + int RawSY = ReadRegister(NvGpuEngine3dReg.ViewportScaleY); + + float SX = BitConverter.Int32BitsToSingle(RawSX); + float SY = BitConverter.Int32BitsToSingle(RawSY); + + float SignX = MathF.Sign(SX); + float SignY = MathF.Sign(SY); + + Gpu.Renderer.SetUniform2F(GalConsts.FlipUniformName, SignX, SignY); + + return Tags; + } + + private static GalShaderType GetTypeFromProgram(int Program) + { + switch (Program) + { + case 0: + case 1: return GalShaderType.Vertex; + case 2: return GalShaderType.TessControl; + case 3: return GalShaderType.TessEvaluation; + case 4: return GalShaderType.Geometry; + case 5: return GalShaderType.Fragment; + } + + throw new ArgumentOutOfRangeException(nameof(Program)); + } + + private void SetAlphaBlending() + { + //TODO: Support independent blend properly. + bool Enable = (ReadRegister(NvGpuEngine3dReg.IBlendNEnable) & 1) != 0; + + Gpu.Renderer.SetBlendEnable(Enable); + + bool BlendSeparateAlpha = (ReadRegister(NvGpuEngine3dReg.IBlendNSeparateAlpha) & 1) != 0; + + GalBlendEquation EquationRgb = (GalBlendEquation)ReadRegister(NvGpuEngine3dReg.IBlendNEquationRgb); + + GalBlendFactor FuncSrcRgb = (GalBlendFactor)ReadRegister(NvGpuEngine3dReg.IBlendNFuncSrcRgb); + GalBlendFactor FuncDstRgb = (GalBlendFactor)ReadRegister(NvGpuEngine3dReg.IBlendNFuncDstRgb); + + if (BlendSeparateAlpha) + { + GalBlendEquation EquationAlpha = (GalBlendEquation)ReadRegister(NvGpuEngine3dReg.IBlendNEquationAlpha); + + GalBlendFactor FuncSrcAlpha = (GalBlendFactor)ReadRegister(NvGpuEngine3dReg.IBlendNFuncSrcAlpha); + GalBlendFactor FuncDstAlpha = (GalBlendFactor)ReadRegister(NvGpuEngine3dReg.IBlendNFuncDstAlpha); + + Gpu.Renderer.SetBlendSeparate( + EquationRgb, + EquationAlpha, + FuncSrcRgb, + FuncDstRgb, + FuncSrcAlpha, + FuncDstAlpha); + } + else + { + Gpu.Renderer.SetBlend(EquationRgb, FuncSrcRgb, FuncDstRgb); + } + } + + private void UploadTextures(NvGpuVmm Vmm, long[] Tags) + { + long BaseShPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress); + + int TextureCbIndex = ReadRegister(NvGpuEngine3dReg.TextureCbIndex); + + //Note: On the emulator renderer, Texture Unit 0 is + //reserved for drawing the frame buffer. + int TexIndex = 1; + + for (int Index = 0; Index < Tags.Length; Index++) + { + foreach (ShaderDeclInfo DeclInfo in Gpu.Renderer.GetTextureUsage(Tags[Index])) + { + long Position = ConstBuffers[Index][TextureCbIndex].Position; + + UploadTexture(Vmm, Position, TexIndex, DeclInfo.Index); + + Gpu.Renderer.SetUniform1(DeclInfo.Name, TexIndex); + + TexIndex++; + } + } + } + + private void UploadTexture(NvGpuVmm Vmm, long BasePosition, int TexIndex, int HndIndex) + { + long Position = BasePosition + HndIndex * 4; + + int TextureHandle = Vmm.ReadInt32(Position); + + int TicIndex = (TextureHandle >> 0) & 0xfffff; + int TscIndex = (TextureHandle >> 20) & 0xfff; + + long TicPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.TexHeaderPoolOffset); + long TscPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.TexSamplerPoolOffset); + + TicPosition += TicIndex * 0x20; + TscPosition += TscIndex * 0x20; + + GalTextureSampler Sampler = TextureFactory.MakeSampler(Gpu, Vmm, TscPosition); + + long TextureAddress = Vmm.ReadInt64(TicPosition + 4) & 0xffffffffffff; + + TextureAddress = Vmm.GetPhysicalAddress(TextureAddress); + + if (IsFrameBufferPosition(TextureAddress)) + { + //This texture is a frame buffer texture, + //we shouldn't read anything from memory and bind + //the frame buffer texture instead, since we're not + //really writing anything to memory. + Gpu.Renderer.BindFrameBufferTexture(TextureAddress, TexIndex, Sampler); + } + else + { + GalTexture Texture = TextureFactory.MakeTexture(Gpu, Vmm, TicPosition); + + Gpu.Renderer.SetTextureAndSampler(TexIndex, Texture, Sampler); + Gpu.Renderer.BindTexture(TexIndex); + } + } + + private void UploadUniforms(NvGpuVmm Vmm) + { + long BasePosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress); + + for (int Index = 0; Index < 5; Index++) + { + int Control = ReadRegister(NvGpuEngine3dReg.ShaderNControl + (Index + 1) * 0x10); + int Offset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset + (Index + 1) * 0x10); + + //Note: Vertex Program (B) is always enabled. + bool Enable = (Control & 1) != 0 || Index == 0; + + if (!Enable) + { + continue; + } + + for (int Cbuf = 0; Cbuf < ConstBuffers.Length; Cbuf++) + { + ConstBuffer Cb = ConstBuffers[Index][Cbuf]; + + if (Cb.Enabled) + { + byte[] Data = Vmm.ReadBytes(Cb.Position, (uint)Cb.Size); + + Gpu.Renderer.SetConstBuffer(BasePosition + (uint)Offset, Cbuf, Data); + } + } + } + } + + private void UploadVertexArrays(NvGpuVmm Vmm) + { + long IndexPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.IndexArrayAddress); + + int IndexSize = ReadRegister(NvGpuEngine3dReg.IndexArrayFormat); + int IndexFirst = ReadRegister(NvGpuEngine3dReg.IndexBatchFirst); + int IndexCount = ReadRegister(NvGpuEngine3dReg.IndexBatchCount); + + GalIndexFormat IndexFormat = (GalIndexFormat)IndexSize; + + IndexSize = 1 << IndexSize; + + if (IndexSize > 4) + { + throw new InvalidOperationException(); + } + + if (IndexSize != 0) + { + int BufferSize = IndexCount * IndexSize; + + byte[] Data = Vmm.ReadBytes(IndexPosition, BufferSize); + + Gpu.Renderer.SetIndexArray(Data, IndexFormat); + } + + List<GalVertexAttrib>[] Attribs = new List<GalVertexAttrib>[32]; + + for (int Attr = 0; Attr < 16; Attr++) + { + int Packed = ReadRegister(NvGpuEngine3dReg.VertexAttribNFormat + Attr); + + int ArrayIndex = Packed & 0x1f; + + if (Attribs[ArrayIndex] == null) + { + Attribs[ArrayIndex] = new List<GalVertexAttrib>(); + } + + Attribs[ArrayIndex].Add(new GalVertexAttrib( + Attr, + ((Packed >> 6) & 0x1) != 0, + (Packed >> 7) & 0x3fff, + (GalVertexAttribSize)((Packed >> 21) & 0x3f), + (GalVertexAttribType)((Packed >> 27) & 0x7), + ((Packed >> 31) & 0x1) != 0)); + } + + for (int Index = 0; Index < 32; Index++) + { + int VertexFirst = ReadRegister(NvGpuEngine3dReg.VertexArrayFirst); + int VertexCount = ReadRegister(NvGpuEngine3dReg.VertexArrayCount); + + int Control = ReadRegister(NvGpuEngine3dReg.VertexArrayNControl + Index * 4); + + bool Enable = (Control & 0x1000) != 0; + + long VertexPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.VertexArrayNAddress + Index * 4); + + if (!Enable) + { + continue; + } + + int Stride = Control & 0xfff; + + long Size = 0; + + if (IndexCount != 0) + { + Size = GetVertexCountFromIndexBuffer( + Vmm, + IndexPosition, + IndexCount, + IndexSize); + } + else + { + Size = VertexCount; + } + + //TODO: Support cases where the Stride is 0. + //In this case, we need to use the size of the attribute. + Size *= Stride; + + byte[] Data = Vmm.ReadBytes(VertexPosition, Size); + + GalVertexAttrib[] AttribArray = Attribs[Index]?.ToArray() ?? new GalVertexAttrib[0]; + + Gpu.Renderer.SetVertexArray(Index, Stride, Data, AttribArray); + + int PrimCtrl = ReadRegister(NvGpuEngine3dReg.VertexBeginGl); + + GalPrimitiveType PrimType = (GalPrimitiveType)(PrimCtrl & 0xffff); + + if (IndexCount != 0) + { + Gpu.Renderer.DrawElements(Index, IndexFirst, PrimType); + } + else + { + Gpu.Renderer.DrawArrays(Index, VertexFirst, VertexCount, PrimType); + } + } + } + + private int GetVertexCountFromIndexBuffer( + NvGpuVmm Vmm, + long IndexPosition, + int IndexCount, + int IndexSize) + { + int MaxIndex = -1; + + if (IndexSize == 2) + { + while (IndexCount -- > 0) + { + ushort Value = Vmm.ReadUInt16(IndexPosition); + + IndexPosition += 2; + + if (MaxIndex < Value) + { + MaxIndex = Value; + } + } + } + else if (IndexSize == 1) + { + while (IndexCount -- > 0) + { + byte Value = Vmm.ReadByte(IndexPosition++); + + if (MaxIndex < Value) + { + MaxIndex = Value; + } + } + } + else if (IndexSize == 4) + { + while (IndexCount -- > 0) + { + uint Value = Vmm.ReadUInt32(IndexPosition); + + IndexPosition += 2; + + if (MaxIndex < Value) + { + MaxIndex = (int)Value; + } + } + } + else + { + throw new ArgumentOutOfRangeException(nameof(IndexSize)); + } + + return MaxIndex + 1; + } + + private void QueryControl(NvGpuVmm Vmm, NvGpuPBEntry PBEntry) + { + long Position = MakeInt64From2xInt32(NvGpuEngine3dReg.QueryAddress); + + int Seq = Registers[(int)NvGpuEngine3dReg.QuerySequence]; + int Ctrl = Registers[(int)NvGpuEngine3dReg.QueryControl]; + + int Mode = Ctrl & 3; + + if (Mode == 0) + { + //Write mode. + Vmm.WriteInt32(Position, Seq); + } + + WriteRegister(PBEntry); + } + + private void CbData(NvGpuVmm Vmm, NvGpuPBEntry PBEntry) + { + long Position = MakeInt64From2xInt32(NvGpuEngine3dReg.ConstBufferAddress); + + int Offset = ReadRegister(NvGpuEngine3dReg.ConstBufferOffset); + + foreach (int Arg in PBEntry.Arguments) + { + Vmm.WriteInt32(Position + Offset, Arg); + + Offset += 4; + } + + WriteRegister(NvGpuEngine3dReg.ConstBufferOffset, Offset); + } + + private void CbBind(NvGpuVmm Vmm, NvGpuPBEntry PBEntry) + { + int Stage = (PBEntry.Method - 0x904) >> 3; + + int Index = PBEntry.Arguments[0]; + + bool Enabled = (Index & 1) != 0; + + Index = (Index >> 4) & 0x1f; + + long Position = MakeInt64From2xInt32(NvGpuEngine3dReg.ConstBufferAddress); + + ConstBuffers[Stage][Index].Position = Position; + ConstBuffers[Stage][Index].Enabled = Enabled; + + ConstBuffers[Stage][Index].Size = ReadRegister(NvGpuEngine3dReg.ConstBufferSize); + } + + private long MakeInt64From2xInt32(NvGpuEngine3dReg Reg) + { + return + (long)Registers[(int)Reg + 0] << 32 | + (uint)Registers[(int)Reg + 1]; + } + + private void WriteRegister(NvGpuPBEntry PBEntry) + { + int ArgsCount = PBEntry.Arguments.Count; + + if (ArgsCount > 0) + { + Registers[PBEntry.Method] = PBEntry.Arguments[ArgsCount - 1]; + } + } + + private int ReadRegister(NvGpuEngine3dReg Reg) + { + return Registers[(int)Reg]; + } + + private void WriteRegister(NvGpuEngine3dReg Reg, int Value) + { + Registers[(int)Reg] = Value; + } + + public bool IsFrameBufferPosition(long Position) + { + return FrameBuffers.Contains(Position); + } + } +}
\ No newline at end of file diff --git a/Ryujinx.Core/Gpu/NvGpuEngine3dReg.cs b/Ryujinx.Core/Gpu/NvGpuEngine3dReg.cs new file mode 100644 index 00000000..823885ff --- /dev/null +++ b/Ryujinx.Core/Gpu/NvGpuEngine3dReg.cs @@ -0,0 +1,61 @@ +namespace Ryujinx.Core.Gpu +{ + enum NvGpuEngine3dReg + { + FrameBufferNAddress = 0x200, + FrameBufferNWidth = 0x202, + FrameBufferNHeight = 0x203, + FrameBufferNFormat = 0x204, + ViewportScaleX = 0x280, + ViewportScaleY = 0x281, + ViewportScaleZ = 0x282, + ViewportTranslateX = 0x283, + ViewportTranslateY = 0x284, + ViewportTranslateZ = 0x285, + VertexArrayFirst = 0x35d, + VertexArrayCount = 0x35e, + VertexAttribNFormat = 0x458, + IBlendEnable = 0x4b9, + BlendSeparateAlpha = 0x4cf, + BlendEquationRgb = 0x4d0, + BlendFuncSrcRgb = 0x4d1, + BlendFuncDstRgb = 0x4d2, + BlendEquationAlpha = 0x4d3, + BlendFuncSrcAlpha = 0x4d4, + BlendFuncDstAlpha = 0x4d6, + BlendEnableMaster = 0x4d7, + IBlendNEnable = 0x4d8, + VertexArrayElemBase = 0x50d, + TexHeaderPoolOffset = 0x55d, + TexSamplerPoolOffset = 0x557, + ShaderAddress = 0x582, + VertexBeginGl = 0x586, + IndexArrayAddress = 0x5f2, + IndexArrayEndAddr = 0x5f4, + IndexArrayFormat = 0x5f6, + IndexBatchFirst = 0x5f7, + IndexBatchCount = 0x5f8, + QueryAddress = 0x6c0, + QuerySequence = 0x6c2, + QueryControl = 0x6c3, + VertexArrayNControl = 0x700, + VertexArrayNAddress = 0x701, + VertexArrayNDivisor = 0x703, + IBlendNSeparateAlpha = 0x780, + IBlendNEquationRgb = 0x781, + IBlendNFuncSrcRgb = 0x782, + IBlendNFuncDstRgb = 0x783, + IBlendNEquationAlpha = 0x784, + IBlendNFuncSrcAlpha = 0x785, + IBlendNFuncDstAlpha = 0x786, + VertexArrayNEndAddr = 0x7c0, + ShaderNControl = 0x800, + ShaderNOffset = 0x801, + ShaderNMaxGprs = 0x803, + ShaderNType = 0x804, + ConstBufferSize = 0x8e0, + ConstBufferAddress = 0x8e1, + ConstBufferOffset = 0x8e3, + TextureCbIndex = 0x982 + } +}
\ No newline at end of file diff --git a/Ryujinx.Core/Gpu/NvGpuFifo.cs b/Ryujinx.Core/Gpu/NvGpuFifo.cs new file mode 100644 index 00000000..d0e6fc14 --- /dev/null +++ b/Ryujinx.Core/Gpu/NvGpuFifo.cs @@ -0,0 +1,174 @@ +using System.Collections.Concurrent; + +namespace Ryujinx.Core.Gpu +{ + public class NvGpuFifo + { + private const int MacrosCount = 0x80; + private const int MacroIndexMask = MacrosCount - 1; + + private NvGpu Gpu; + + private ConcurrentQueue<(NvGpuVmm, NvGpuPBEntry)> BufferQueue; + + private NvGpuEngine[] SubChannels; + + private struct CachedMacro + { + public long Position { get; private set; } + + private MacroInterpreter Interpreter; + + public CachedMacro(NvGpuFifo PFifo, INvGpuEngine Engine, long Position) + { + this.Position = Position; + + Interpreter = new MacroInterpreter(PFifo, Engine); + } + + public void PushParam(int Param) + { + Interpreter?.Fifo.Enqueue(Param); + } + + public void Execute(NvGpuVmm Vmm, int Param) + { + Interpreter?.Execute(Vmm, Position, Param); + } + } + + private long CurrMacroPosition; + private int CurrMacroBindIndex; + + private CachedMacro[] Macros; + + public NvGpuFifo(NvGpu Gpu) + { + this.Gpu = Gpu; + + BufferQueue = new ConcurrentQueue<(NvGpuVmm, NvGpuPBEntry)>(); + + SubChannels = new NvGpuEngine[8]; + + Macros = new CachedMacro[MacrosCount]; + } + + public void PushBuffer(NvGpuVmm Vmm, NvGpuPBEntry[] Buffer) + { + foreach (NvGpuPBEntry PBEntry in Buffer) + { + BufferQueue.Enqueue((Vmm, PBEntry)); + } + } + + public void DispatchCalls() + { + while (Step()); + } + + public bool Step() + { + if (BufferQueue.TryDequeue(out (NvGpuVmm Vmm, NvGpuPBEntry PBEntry) Tuple)) + { + CallMethod(Tuple.Vmm, Tuple.PBEntry); + + return true; + } + + return false; + } + + private void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry) + { + if (PBEntry.Method < 0x80) + { + switch ((NvGpuFifoMeth)PBEntry.Method) + { + case NvGpuFifoMeth.BindChannel: + { + NvGpuEngine Engine = (NvGpuEngine)PBEntry.Arguments[0]; + + SubChannels[PBEntry.SubChannel] = Engine; + + break; + } + + case NvGpuFifoMeth.SetMacroUploadAddress: + { + CurrMacroPosition = (long)((ulong)PBEntry.Arguments[0] << 2); + + break; + } + + case NvGpuFifoMeth.SendMacroCodeData: + { + long Position = CurrMacroPosition; + + foreach (int Arg in PBEntry.Arguments) + { + Vmm.WriteInt32(Position, Arg); + + CurrMacroPosition += 4; + + Position += 4; + } + break; + } + + case NvGpuFifoMeth.SetMacroBindingIndex: + { + CurrMacroBindIndex = PBEntry.Arguments[0]; + + break; + } + + case NvGpuFifoMeth.BindMacro: + { + long Position = (long)((ulong)PBEntry.Arguments[0] << 2); + + Macros[CurrMacroBindIndex] = new CachedMacro(this, Gpu.Engine3d, Position); + + break; + } + } + } + else + { + switch (SubChannels[PBEntry.SubChannel]) + { + case NvGpuEngine._2d: Call2dMethod(Vmm, PBEntry); break; + case NvGpuEngine._3d: Call3dMethod(Vmm, PBEntry); break; + } + } + } + + private void Call2dMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry) + { + Gpu.Engine2d.CallMethod(Vmm, PBEntry); + } + + private void Call3dMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry) + { + if (PBEntry.Method < 0xe00) + { + Gpu.Engine3d.CallMethod(Vmm, PBEntry); + } + else + { + int MacroIndex = (PBEntry.Method >> 1) & MacroIndexMask; + + if ((PBEntry.Method & 1) != 0) + { + foreach (int Arg in PBEntry.Arguments) + { + Macros[MacroIndex].PushParam(Arg); + } + } + else + { + Macros[MacroIndex].Execute(Vmm, PBEntry.Arguments[0]); + } + } + } + } +}
\ No newline at end of file diff --git a/Ryujinx.Core/Gpu/NvGpuFifoMeth.cs b/Ryujinx.Core/Gpu/NvGpuFifoMeth.cs new file mode 100644 index 00000000..78ec9080 --- /dev/null +++ b/Ryujinx.Core/Gpu/NvGpuFifoMeth.cs @@ -0,0 +1,11 @@ +namespace Ryujinx.Core.Gpu +{ + enum NvGpuFifoMeth + { + BindChannel = 0, + SetMacroUploadAddress = 0x45, + SendMacroCodeData = 0x46, + SetMacroBindingIndex = 0x47, + BindMacro = 0x48 + } +}
\ No newline at end of file diff --git a/Ryujinx.Core/Gpu/NvGpuMethod.cs b/Ryujinx.Core/Gpu/NvGpuMethod.cs new file mode 100644 index 00000000..42e1b553 --- /dev/null +++ b/Ryujinx.Core/Gpu/NvGpuMethod.cs @@ -0,0 +1,4 @@ +namespace Ryujinx.Core.Gpu +{ + delegate void NvGpuMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry); +}
\ No newline at end of file diff --git a/Ryujinx.Core/Gpu/NvGpuPBEntry.cs b/Ryujinx.Core/Gpu/NvGpuPBEntry.cs new file mode 100644 index 00000000..ebf35b9e --- /dev/null +++ b/Ryujinx.Core/Gpu/NvGpuPBEntry.cs @@ -0,0 +1,23 @@ +using System; +using System.Collections.ObjectModel; + +namespace Ryujinx.Core.Gpu +{ + public struct NvGpuPBEntry + { + public int Method { get; private set; } + + public int SubChannel { get; private set; } + + private int[] m_Arguments; + + public ReadOnlyCollection<int> Arguments => Array.AsReadOnly(m_Arguments); + + public NvGpuPBEntry(int Method, int SubChannel, params int[] Arguments) + { + this.Method = Method; + this.SubChannel = SubChannel; + this.m_Arguments = Arguments; + } + } +}
\ No newline at end of file diff --git a/Ryujinx.Core/Gpu/NvGpuPushBuffer.cs b/Ryujinx.Core/Gpu/NvGpuPushBuffer.cs new file mode 100644 index 00000000..d5588655 --- /dev/null +++ b/Ryujinx.Core/Gpu/NvGpuPushBuffer.cs @@ -0,0 +1,101 @@ +using System.Collections.Generic; +using System.IO; + +namespace Ryujinx.Core.Gpu +{ + public static class NvGpuPushBuffer + { + private enum SubmissionMode + { + Incrementing = 1, + NonIncrementing = 3, + Immediate = 4, + IncrementOnce = 5 + } + + public static NvGpuPBEntry[] Decode(byte[] Data) + { + using (MemoryStream MS = new MemoryStream(Data)) + { + BinaryReader Reader = new BinaryReader(MS); + + List<NvGpuPBEntry> PushBuffer = new List<NvGpuPBEntry>(); + + bool CanRead() => MS.Position + 4 <= MS.Length; + + while (CanRead()) + { + int Packed = Reader.ReadInt32(); + + int Meth = (Packed >> 0) & 0x1fff; + int SubC = (Packed >> 13) & 7; + int Args = (Packed >> 16) & 0x1fff; + int Mode = (Packed >> 29) & 7; + + switch ((SubmissionMode)Mode) + { + case SubmissionMode.Incrementing: + { + for (int Index = 0; Index < Args && CanRead(); Index++, Meth++) + { + PushBuffer.Add(new NvGpuPBEntry(Meth, SubC, Reader.ReadInt32())); + } + + break; + } + + case SubmissionMode.NonIncrementing: + { + int[] Arguments = new int[Args]; + + for (int Index = 0; Index < Arguments.Length; Index++) + { + if (!CanRead()) + { + break; + } + + Arguments[Index] = Reader.ReadInt32(); + } + + PushBuffer.Add(new NvGpuPBEntry(Meth, SubC, Arguments)); + + break; + } + + case SubmissionMode.Immediate: + { + PushBuffer.Add(new NvGpuPBEntry(Meth, SubC, Args)); + + break; + } + + case SubmissionMode.IncrementOnce: + { + if (CanRead()) + { + PushBuffer.Add(new NvGpuPBEntry(Meth, SubC, Reader.ReadInt32())); + } + + if (CanRead() && Args > 1) + { + int[] Arguments = new int[Args - 1]; + + for (int Index = 0; Index < Arguments.Length && CanRead(); Index++) + { + Arguments[Index] = Reader.ReadInt32(); + } + + PushBuffer.Add(new NvGpuPBEntry(Meth + 1, SubC, Arguments)); + } + + break; + } + } + } + + return PushBuffer.ToArray(); + } + } + } +}
\ No newline at end of file diff --git a/Ryujinx.Core/Gpu/NvGpuVmm.cs b/Ryujinx.Core/Gpu/NvGpuVmm.cs new file mode 100644 index 00000000..cf94a6c0 --- /dev/null +++ b/Ryujinx.Core/Gpu/NvGpuVmm.cs @@ -0,0 +1,398 @@ +using ChocolArm64.Memory; +using System.Collections.Concurrent; + +namespace Ryujinx.Core.Gpu +{ + public class NvGpuVmm : IAMemory + { + public const long AddrSize = 1L << 40; + + private const int PTLvl0Bits = 14; + private const int PTLvl1Bits = 14; + private const int PTPageBits = 12; + + private const int PTLvl0Size = 1 << PTLvl0Bits; + private const int PTLvl1Size = 1 << PTLvl1Bits; + public const int PageSize = 1 << PTPageBits; + + private const int PTLvl0Mask = PTLvl0Size - 1; + private const int PTLvl1Mask = PTLvl1Size - 1; + public const int PageMask = PageSize - 1; + + private const int PTLvl0Bit = PTPageBits + PTLvl1Bits; + private const int PTLvl1Bit = PTPageBits; + + public AMemory Memory { get; private set; } + + private struct MappedMemory + { + public long Size; + + public MappedMemory(long Size) + { + this.Size = Size; + } + } + + private ConcurrentDictionary<long, MappedMemory> Maps; + + private const long PteUnmapped = -1; + private const long PteReserved = -2; + + private long[][] PageTable; + + public NvGpuVmm(AMemory Memory) + { + this.Memory = Memory; + + Maps = new ConcurrentDictionary<long, MappedMemory>(); + + PageTable = new long[PTLvl0Size][]; + } + + public long Map(long PA, long VA, long Size) + { + lock (PageTable) + { + for (long Offset = 0; Offset < Size; Offset += PageSize) + { + if (GetPte(VA + Offset) != PteReserved) + { + return Map(PA, Size); + } + } + + for (long Offset = 0; Offset < Size; Offset += PageSize) + { + SetPte(VA + Offset, PA + Offset); + } + } + + return VA; + } + + public long Map(long PA, long Size) + { + lock (PageTable) + { + long VA = GetFreePosition(Size); + + if (VA != -1) + { + MappedMemory Map = new MappedMemory(Size); + + Maps.AddOrUpdate(VA, Map, (Key, Old) => Map); + + for (long Offset = 0; Offset < Size; Offset += PageSize) + { + SetPte(VA + Offset, PA + Offset); + } + } + + return VA; + } + } + + public bool Unmap(long VA) + { + if (Maps.TryRemove(VA, out MappedMemory Map)) + { + Free(VA, Map.Size); + + return true; + } + + return false; + } + + public long Reserve(long VA, long Size, long Align) + { + lock (PageTable) + { + for (long Offset = 0; Offset < Size; Offset += PageSize) + { + if (IsPageInUse(VA + Offset)) + { + return Reserve(Size, Align); + } + } + + for (long Offset = 0; Offset < Size; Offset += PageSize) + { + SetPte(VA + Offset, PteReserved); + } + } + + return VA; + } + + public long Reserve(long Size, long Align) + { + lock (PageTable) + { + long Position = GetFreePosition(Size, Align); + + if (Position != -1) + { + for (long Offset = 0; Offset < Size; Offset += PageSize) + { + SetPte(Position + Offset, PteReserved); + } + } + + return Position; + } + } + + public void Free(long VA, long Size) + { + lock (PageTable) + { + for (long Offset = 0; Offset < Size; Offset += PageSize) + { + SetPte(VA + Offset, PteUnmapped); + } + } + } + + private long GetFreePosition(long Size, long Align = 1) + { + long Position = 0; + long FreeSize = 0; + + if (Align < 1) + { + Align = 1; + } + + Align = (Align + PageMask) & ~PageMask; + + while (Position + FreeSize < AddrSize) + { + if (!IsPageInUse(Position + FreeSize)) + { + FreeSize += PageSize; + + if (FreeSize >= Size) + { + return Position; + } + } + else + { + Position += FreeSize + PageSize; + FreeSize = 0; + + long Remainder = Position % Align; + + if (Remainder != 0) + { + Position = (Position - Remainder) + Align; + } + } + } + + return -1; + } + + public long GetPhysicalAddress(long VA) + { + long BasePos = GetPte(VA); + + if (BasePos < 0) + { + return -1; + } + + return BasePos + (VA & PageMask); + } + + public bool IsRegionFree(long VA, long Size) + { + for (long Offset = 0; Offset < Size; Offset += PageSize) + { + if (IsPageInUse(VA + Offset)) + { + return false; + } + } + + return true; + } + + private bool IsPageInUse(long VA) + { + if (VA >> PTLvl0Bits + PTLvl1Bits + PTPageBits != 0) + { + return false; + } + + long L0 = (VA >> PTLvl0Bit) & PTLvl0Mask; + long L1 = (VA >> PTLvl1Bit) & PTLvl1Mask; + + if (PageTable[L0] == null) + { + return false; + } + + return PageTable[L0][L1] != PteUnmapped; + } + + private long GetPte(long Position) + { + long L0 = (Position >> PTLvl0Bit) & PTLvl0Mask; + long L1 = (Position >> PTLvl1Bit) & PTLvl1Mask; + + if (PageTable[L0] == null) + { + return -1; + } + + return PageTable[L0][L1]; + } + + private void SetPte(long Position, long TgtAddr) + { + long L0 = (Position >> PTLvl0Bit) & PTLvl0Mask; + long L1 = (Position >> PTLvl1Bit) & PTLvl1Mask; + + if (PageTable[L0] == null) + { + PageTable[L0] = new long[PTLvl1Size]; + + for (int Index = 0; Index < PTLvl1Size; Index++) + { + PageTable[L0][Index] = PteUnmapped; + } + } + + PageTable[L0][L1] = TgtAddr; + } + + public byte ReadByte(long Position) + { + Position = GetPhysicalAddress(Position); + + return Memory.ReadByte(Position); + } + + public ushort ReadUInt16(long Position) + { + Position = GetPhysicalAddress(Position); + + return Memory.ReadUInt16(Position); + } + + public uint ReadUInt32(long Position) + { + Position = GetPhysicalAddress(Position); + + return Memory.ReadUInt32(Position); + } + + public ulong ReadUInt64(long Position) + { + Position = GetPhysicalAddress(Position); + + return Memory.ReadUInt64(Position); + } + + public sbyte ReadSByte(long Position) + { + Position = GetPhysicalAddress(Position); + + return Memory.ReadSByte(Position); + } + + public short ReadInt16(long Position) + { + Position = GetPhysicalAddress(Position); + + return Memory.ReadInt16(Position); + } + + public int ReadInt32(long Position) + { + Position = GetPhysicalAddress(Position); + + return Memory.ReadInt32(Position); + } + + public long ReadInt64(long Position) + { + Position = GetPhysicalAddress(Position); + + return Memory.ReadInt64(Position); + } + + public byte[] ReadBytes(long Position, long Size) + { + Position = GetPhysicalAddress(Position); + + return AMemoryHelper.ReadBytes(Memory, Position, Size); + } + + public void WriteByte(long Position, byte Value) + { + Position = GetPhysicalAddress(Position); + + Memory.WriteByte(Position, Value); + } + + public void WriteUInt16(long Position, ushort Value) + { + Position = GetPhysicalAddress(Position); + + Memory.WriteUInt16(Position, Value); + } + + public void WriteUInt32(long Position, uint Value) + { + Position = GetPhysicalAddress(Position); + + Memory.WriteUInt32(Position, Value); + } + + public void WriteUInt64(long Position, ulong Value) + { + Position = GetPhysicalAddress(Position); + + Memory.WriteUInt64(Position, Value); + } + + public void WriteSByte(long Position, sbyte Value) + { + Position = GetPhysicalAddress(Position); + + Memory.WriteSByte(Position, Value); + } + + public void WriteInt16(long Position, short Value) + { + Position = GetPhysicalAddress(Position); + + Memory.WriteInt16(Position, Value); + } + + public void WriteInt32(long Position, int Value) + { + Position = GetPhysicalAddress(Position); + + Memory.WriteInt32(Position, Value); + } + + public void WriteInt64(long Position, long Value) + { + Position = GetPhysicalAddress(Position); + + Memory.WriteInt64(Position, Value); + } + + public void WriteBytes(long Position, byte[] Data) + { + Position = GetPhysicalAddress(Position); + + AMemoryHelper.WriteBytes(Memory, Position, Data); + } + } +}
\ No newline at end of file diff --git a/Ryujinx.Core/Gpu/Texture.cs b/Ryujinx.Core/Gpu/Texture.cs new file mode 100644 index 00000000..39a35059 --- /dev/null +++ b/Ryujinx.Core/Gpu/Texture.cs @@ -0,0 +1,55 @@ +using Ryujinx.Graphics.Gal; + +namespace Ryujinx.Core.Gpu +{ + public struct Texture + { + public long Position { get; private set; } + + public int Width { get; private set; } + public int Height { get; private set; } + public int Pitch { get; private set; } + + public int BlockHeight { get; private set; } + + public TextureSwizzle Swizzle { get; private set; } + + public GalTextureFormat Format { get; private set; } + + public Texture( + long Position, + int Width, + int Height) + { + this.Position = Position; + this.Width = Width; + this.Height = Height; + + Pitch = 0; + + BlockHeight = 16; + + Swizzle = TextureSwizzle.BlockLinear; + + Format = GalTextureFormat.A8B8G8R8; + } + + public Texture( + long Position, + int Width, + int Height, + int Pitch, + int BlockHeight, + TextureSwizzle Swizzle, + GalTextureFormat Format) + { + this.Position = Position; + this.Width = Width; + this.Height = Height; + this.Pitch = Pitch; + this.BlockHeight = BlockHeight; + this.Swizzle = Swizzle; + this.Format = Format; + } + } +}
\ No newline at end of file diff --git a/Ryujinx.Core/Gpu/TextureFactory.cs b/Ryujinx.Core/Gpu/TextureFactory.cs new file mode 100644 index 00000000..68b48a1f --- /dev/null +++ b/Ryujinx.Core/Gpu/TextureFactory.cs @@ -0,0 +1,83 @@ +using Ryujinx.Graphics.Gal; +using System; + +namespace Ryujinx.Core.Gpu +{ + static class TextureFactory + { + public static GalTexture MakeTexture(NvGpu Gpu, NvGpuVmm Vmm, long TicPosition) + { + int[] Tic = ReadWords(Vmm, TicPosition, 8); + + GalTextureFormat Format = (GalTextureFormat)(Tic[0] & 0x7f); + + long TextureAddress = (uint)Tic[1]; + + TextureAddress |= (long)((ushort)Tic[2]) << 32; + + TextureSwizzle Swizzle = (TextureSwizzle)((Tic[2] >> 21) & 7); + + int Pitch = (Tic[3] & 0xffff) << 5; + + int BlockHeightLog2 = (Tic[3] >> 3) & 7; + + int BlockHeight = 1 << BlockHeightLog2; + + int Width = (Tic[4] & 0xffff) + 1; + int Height = (Tic[5] & 0xffff) + 1; + + Texture Texture = new Texture( + TextureAddress, + Width, + Height, + Pitch, + BlockHeight, + Swizzle, + Format); + + byte[] Data = TextureReader.Read(Vmm, Texture); + + return new GalTexture(Data, Width, Height, Format); + } + + public static GalTextureSampler MakeSampler(NvGpu Gpu, NvGpuVmm Vmm, long TscPosition) + { + int[] Tsc = ReadWords(Vmm, TscPosition, 8); + + GalTextureWrap AddressU = (GalTextureWrap)((Tsc[0] >> 0) & 7); + GalTextureWrap AddressV = (GalTextureWrap)((Tsc[0] >> 3) & 7); + GalTextureWrap AddressP = (GalTextureWrap)((Tsc[0] >> 6) & 7); + + GalTextureFilter MagFilter = (GalTextureFilter) ((Tsc[1] >> 0) & 3); + GalTextureFilter MinFilter = (GalTextureFilter) ((Tsc[1] >> 4) & 3); + GalTextureMipFilter MipFilter = (GalTextureMipFilter)((Tsc[1] >> 6) & 3); + + GalColorF BorderColor = new GalColorF( + BitConverter.Int32BitsToSingle(Tsc[4]), + BitConverter.Int32BitsToSingle(Tsc[5]), + BitConverter.Int32BitsToSingle(Tsc[6]), + BitConverter.Int32BitsToSingle(Tsc[7])); + + return new GalTextureSampler( + AddressU, + AddressV, + AddressP, + MinFilter, + MagFilter, + MipFilter, + BorderColor); + } + + private static int[] ReadWords(NvGpuVmm Vmm, long Position, int Count) + { + int[] Words = new int[Count]; + + for (int Index = 0; Index < Count; Index++, Position += 4) + { + Words[Index] = Vmm.ReadInt32(Position); + } + + return Words; + } + } +}
\ No newline at end of file diff --git a/Ryujinx.Core/Gpu/TextureHelper.cs b/Ryujinx.Core/Gpu/TextureHelper.cs new file mode 100644 index 00000000..f0ebc1f0 --- /dev/null +++ b/Ryujinx.Core/Gpu/TextureHelper.cs @@ -0,0 +1,36 @@ +using ChocolArm64.Memory; +using System; + +namespace Ryujinx.Core.Gpu +{ + static class TextureHelper + { + public static ISwizzle GetSwizzle(Texture Texture, int Width, int Bpp) + { + switch (Texture.Swizzle) + { + case TextureSwizzle.Pitch: + case TextureSwizzle.PitchColorKey: + return new LinearSwizzle(Texture.Pitch, Bpp); + + case TextureSwizzle.BlockLinear: + case TextureSwizzle.BlockLinearColorKey: + return new BlockLinearSwizzle(Width, Bpp, Texture.BlockHeight); + } + + throw new NotImplementedException(Texture.Swizzle.ToString()); + } + + public static (AMemory Memory, long Position) GetMemoryAndPosition( + IAMemory Memory, + long Position) + { + if (Memory is NvGpuVmm Vmm) + { + return (Vmm.Memory, Vmm.GetPhysicalAddress(Position)); + } + + return ((AMemory)Memory, Position); + } + } +} diff --git a/Ryujinx.Core/Gpu/TextureReader.cs b/Ryujinx.Core/Gpu/TextureReader.cs new file mode 100644 index 00000000..f3e41046 --- /dev/null +++ b/Ryujinx.Core/Gpu/TextureReader.cs @@ -0,0 +1,160 @@ +using ChocolArm64.Memory; +using Ryujinx.Graphics.Gal; +using System; + +namespace Ryujinx.Core.Gpu +{ + public static class TextureReader + { + public static byte[] Read(IAMemory Memory, Texture Texture) + { + switch (Texture.Format) + { + case GalTextureFormat.A8B8G8R8: return Read4Bpp (Memory, Texture); + case GalTextureFormat.A1B5G5R5: return Read2Bpp (Memory, Texture); + case GalTextureFormat.B5G6R5: return Read2Bpp (Memory, Texture); + case GalTextureFormat.BC1: return Read8Bpt4x4 (Memory, Texture); + case GalTextureFormat.BC2: return Read16Bpt4x4(Memory, Texture); + case GalTextureFormat.BC3: return Read16Bpt4x4(Memory, Texture); + case GalTextureFormat.BC4: return Read8Bpt4x4 (Memory, Texture); + case GalTextureFormat.BC5: return Read16Bpt4x4(Memory, Texture); + } + + throw new NotImplementedException(Texture.Format.ToString()); + } + + private unsafe static byte[] Read2Bpp(IAMemory Memory, Texture Texture) + { + int Width = Texture.Width; + int Height = Texture.Height; + + byte[] Output = new byte[Width * Height * 2]; + + ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 2); + + (AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition( + Memory, + Texture.Position); + + fixed (byte* BuffPtr = Output) + { + long OutOffs = 0; + + for (int Y = 0; Y < Height; Y++) + for (int X = 0; X < Width; X++) + { + long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y); + + short Pixel = CpuMem.ReadInt16Unchecked(Position + Offset); + + *(short*)(BuffPtr + OutOffs) = Pixel; + + OutOffs += 2; + } + } + + return Output; + } + + private unsafe static byte[] Read4Bpp(IAMemory Memory, Texture Texture) + { + int Width = Texture.Width; + int Height = Texture.Height; + + byte[] Output = new byte[Width * Height * 4]; + + ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 4); + + (AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition( + Memory, + Texture.Position); + + fixed (byte* BuffPtr = Output) + { + long OutOffs = 0; + + for (int Y = 0; Y < Height; Y++) + for (int X = 0; X < Width; X++) + { + long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y); + + int Pixel = CpuMem.ReadInt32Unchecked(Position + Offset); + + *(int*)(BuffPtr + OutOffs) = Pixel; + + OutOffs += 4; + } + } + + return Output; + } + + private unsafe static byte[] Read8Bpt4x4(IAMemory Memory, Texture Texture) + { + int Width = (Texture.Width + 3) / 4; + int Height = (Texture.Height + 3) / 4; + + byte[] Output = new byte[Width * Height * 8]; + + ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 8); + + (AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition( + Memory, + Texture.Position); + + fixed (byte* BuffPtr = Output) + { + long OutOffs = 0; + + for (int Y = 0; Y < Height; Y++) + for (int X = 0; X < Width; X++) + { + long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y); + + long Tile = CpuMem.ReadInt64Unchecked(Position + Offset); + + *(long*)(BuffPtr + OutOffs) = Tile; + + OutOffs += 8; + } + } + + return Output; + } + + private unsafe static byte[] Read16Bpt4x4(IAMemory Memory, Texture Texture) + { + int Width = (Texture.Width + 3) / 4; + int Height = (Texture.Height + 3) / 4; + + byte[] Output = new byte[Width * Height * 16]; + + ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 16); + + (AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition( + Memory, + Texture.Position); + + fixed (byte* BuffPtr = Output) + { + long OutOffs = 0; + + for (int Y = 0; Y < Height; Y++) + for (int X = 0; X < Width; X++) + { + long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y); + + long Tile0 = CpuMem.ReadInt64Unchecked(Position + Offset + 0); + long Tile1 = CpuMem.ReadInt64Unchecked(Position + Offset + 8); + + *(long*)(BuffPtr + OutOffs + 0) = Tile0; + *(long*)(BuffPtr + OutOffs + 8) = Tile1; + + OutOffs += 16; + } + } + + return Output; + } + } +} diff --git a/Ryujinx.Core/Gpu/TextureSwizzle.cs b/Ryujinx.Core/Gpu/TextureSwizzle.cs new file mode 100644 index 00000000..3214f45f --- /dev/null +++ b/Ryujinx.Core/Gpu/TextureSwizzle.cs @@ -0,0 +1,11 @@ +namespace Ryujinx.Core.Gpu +{ + public enum TextureSwizzle + { + _1dBuffer = 0, + PitchColorKey = 1, + Pitch = 2, + BlockLinear = 3, + BlockLinearColorKey = 4 + } +}
\ No newline at end of file diff --git a/Ryujinx.Core/Gpu/TextureWriter.cs b/Ryujinx.Core/Gpu/TextureWriter.cs new file mode 100644 index 00000000..125bb8c4 --- /dev/null +++ b/Ryujinx.Core/Gpu/TextureWriter.cs @@ -0,0 +1,48 @@ +using ChocolArm64.Memory; +using Ryujinx.Graphics.Gal; +using System; + +namespace Ryujinx.Core.Gpu +{ + public static class TextureWriter + { + public static void Write(IAMemory Memory, Texture Texture, byte[] Data) + { + switch (Texture.Format) + { + case GalTextureFormat.A8B8G8R8: Write4Bpp(Memory, Texture, Data); break; + + default: throw new NotImplementedException(Texture.Format.ToString()); + } + } + + private unsafe static void Write4Bpp(IAMemory Memory, Texture Texture, byte[] Data) + { + int Width = Texture.Width; + int Height = Texture.Height; + + ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 4); + + (AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition( + Memory, + Texture.Position); + + fixed (byte* BuffPtr = Data) + { + long InOffs = 0; + + for (int Y = 0; Y < Height; Y++) + for (int X = 0; X < Width; X++) + { + long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y); + + int Pixel = *(int*)(BuffPtr + InOffs); + + CpuMem.WriteInt32Unchecked(Position + Offset, Pixel); + + InOffs += 4; + } + } + } + } +} |
