From 796e5d14b4fadc15439d273f8ff8f9e9afc4033a Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sun, 2 Feb 2020 00:25:52 -0300 Subject: Use correct shader local memory size instead of a hardcoded size (#914) * Use correct shader local size instead of a hardcoded size * Remove unused uniform block * Update XML doc * Local memory size has 23 bits on maxwell * Generate compute QMD struct from nv open doc header * Remove dummy arrays when shared or local memory is not used, other improvements --- Ryujinx.Graphics.Gpu/Engine/Compute.cs | 40 +++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 18 deletions(-) (limited to 'Ryujinx.Graphics.Gpu/Engine/Compute.cs') diff --git a/Ryujinx.Graphics.Gpu/Engine/Compute.cs b/Ryujinx.Graphics.Gpu/Engine/Compute.cs index d24d2d8d..9178cfb0 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Compute.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Compute.cs @@ -17,29 +17,31 @@ namespace Ryujinx.Graphics.Gpu.Engine /// Method call argument public void Dispatch(GpuState state, int argument) { - uint dispatchParamsAddress = (uint)state.Get(MethodOffset.DispatchParamsAddress); + uint qmdAddress = (uint)state.Get(MethodOffset.DispatchParamsAddress); - var dispatchParams = _context.MemoryAccessor.Read((ulong)dispatchParamsAddress << 8); + var qmd = _context.MemoryAccessor.Read((ulong)qmdAddress << 8); GpuVa shaderBaseAddress = state.Get(MethodOffset.ShaderBaseAddress); - ulong shaderGpuVa = shaderBaseAddress.Pack() + (uint)dispatchParams.ShaderOffset; + ulong shaderGpuVa = shaderBaseAddress.Pack() + (uint)qmd.ProgramOffset; - // Note: A size of 0 is also invalid, the size must be at least 1. - int sharedMemorySize = Math.Clamp(dispatchParams.SharedMemorySize & 0xffff, 1, _context.Capabilities.MaximumComputeSharedMemorySize); + int localMemorySize = qmd.ShaderLocalMemoryLowSize + qmd.ShaderLocalMemoryHighSize; + + int sharedMemorySize = Math.Min(qmd.SharedMemorySize, _context.Capabilities.MaximumComputeSharedMemorySize); ComputeShader cs = ShaderCache.GetComputeShader( shaderGpuVa, - sharedMemorySize, - dispatchParams.UnpackBlockSizeX(), - dispatchParams.UnpackBlockSizeY(), - dispatchParams.UnpackBlockSizeZ()); + qmd.CtaThreadDimension0, + qmd.CtaThreadDimension1, + qmd.CtaThreadDimension2, + localMemorySize, + sharedMemorySize); _context.Renderer.Pipeline.SetProgram(cs.HostProgram); var samplerPool = state.Get(MethodOffset.SamplerPoolState); - TextureManager.SetComputeSamplerPool(samplerPool.Address.Pack(), samplerPool.MaximumId, dispatchParams.SamplerIndex); + TextureManager.SetComputeSamplerPool(samplerPool.Address.Pack(), samplerPool.MaximumId, qmd.SamplerIndex); var texturePool = state.Get(MethodOffset.TexturePoolState); @@ -50,17 +52,19 @@ namespace Ryujinx.Graphics.Gpu.Engine ShaderProgramInfo info = cs.Shader.Program.Info; uint sbEnableMask = 0; - uint ubEnableMask = dispatchParams.UnpackUniformBuffersEnableMask(); + uint ubEnableMask = 0; - for (int index = 0; index < dispatchParams.UniformBuffers.Length; index++) + for (int index = 0; index < Constants.TotalCpUniformBuffers; index++) { - if ((ubEnableMask & (1 << index)) == 0) + if (!qmd.ConstantBufferValid(index)) { continue; } - ulong gpuVa = dispatchParams.UniformBuffers[index].PackAddress(); - ulong size = dispatchParams.UniformBuffers[index].UnpackSize(); + ubEnableMask |= 1u << index; + + ulong gpuVa = (uint)qmd.ConstantBufferAddrLower(index) | (ulong)qmd.ConstantBufferAddrUpper(index) << 32; + ulong size = (ulong)qmd.ConstantBufferSize(index); BufferManager.SetComputeUniformBuffer(index, gpuVa, size); } @@ -131,9 +135,9 @@ namespace Ryujinx.Graphics.Gpu.Engine TextureManager.CommitComputeBindings(); _context.Renderer.Pipeline.DispatchCompute( - dispatchParams.UnpackGridSizeX(), - dispatchParams.UnpackGridSizeY(), - dispatchParams.UnpackGridSizeZ()); + qmd.CtaRasterWidth, + qmd.CtaRasterHeight, + qmd.CtaRasterDepth); UpdateShaderState(state); } -- cgit v1.2.3