diff options
Diffstat (limited to 'src/Ryujinx.Graphics.Gpu/Shader')
39 files changed, 7452 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderBindings.cs b/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderBindings.cs new file mode 100644 index 00000000..1734f08a --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderBindings.cs @@ -0,0 +1,103 @@ +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine; +using Ryujinx.Graphics.Gpu.Image; +using Ryujinx.Graphics.Shader; +using System; +using System.Linq; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// <summary> + /// A collection of shader bindings ready for insertion into the buffer and texture managers. + /// </summary> + internal class CachedShaderBindings + { + public TextureBindingInfo[][] TextureBindings { get; } + public TextureBindingInfo[][] ImageBindings { get; } + public BufferDescriptor[][] ConstantBufferBindings { get; } + public BufferDescriptor[][] StorageBufferBindings { get; } + + public int MaxTextureBinding { get; } + public int MaxImageBinding { get; } + + /// <summary> + /// Create a new cached shader bindings collection. + /// </summary> + /// <param name="isCompute">Whether the shader is for compute</param> + /// <param name="stages">The stages used by the shader</param> + public CachedShaderBindings(bool isCompute, CachedShaderStage[] stages) + { + int stageCount = isCompute ? 1 : Constants.ShaderStages; + + TextureBindings = new TextureBindingInfo[stageCount][]; + ImageBindings = new TextureBindingInfo[stageCount][]; + ConstantBufferBindings = new BufferDescriptor[stageCount][]; + StorageBufferBindings = new BufferDescriptor[stageCount][]; + + int maxTextureBinding = -1; + int maxImageBinding = -1; + int offset = isCompute ? 0 : 1; + + for (int i = 0; i < stageCount; i++) + { + CachedShaderStage stage = stages[i + offset]; + + if (stage == null) + { + TextureBindings[i] = Array.Empty<TextureBindingInfo>(); + ImageBindings[i] = Array.Empty<TextureBindingInfo>(); + ConstantBufferBindings[i] = Array.Empty<BufferDescriptor>(); + StorageBufferBindings[i] = Array.Empty<BufferDescriptor>(); + + continue; + } + + TextureBindings[i] = stage.Info.Textures.Select(descriptor => + { + Target target = ShaderTexture.GetTarget(descriptor.Type); + + var result = new TextureBindingInfo( + target, + descriptor.Binding, + descriptor.CbufSlot, + descriptor.HandleIndex, + descriptor.Flags); + + if (descriptor.Binding > maxTextureBinding) + { + maxTextureBinding = descriptor.Binding; + } + + return result; + }).ToArray(); + + ImageBindings[i] = stage.Info.Images.Select(descriptor => + { + Target target = ShaderTexture.GetTarget(descriptor.Type); + Format format = ShaderTexture.GetFormat(descriptor.Format); + + var result = new TextureBindingInfo( + target, + format, + descriptor.Binding, + descriptor.CbufSlot, + descriptor.HandleIndex, + descriptor.Flags); + + if (descriptor.Binding > maxImageBinding) + { + maxImageBinding = descriptor.Binding; + } + + return result; + }).ToArray(); + + ConstantBufferBindings[i] = stage.Info.CBuffers.ToArray(); + StorageBufferBindings[i] = stage.Info.SBuffers.ToArray(); + } + + MaxTextureBinding = maxTextureBinding; + MaxImageBinding = maxImageBinding; + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs b/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs new file mode 100644 index 00000000..ff9c39a1 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs @@ -0,0 +1,56 @@ +using Ryujinx.Graphics.GAL; +using System; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// <summary> + /// Represents a program composed of one or more shader stages (for graphics shaders), + /// or a single shader (for compute shaders). + /// </summary> + class CachedShaderProgram : IDisposable + { + /// <summary> + /// Host shader program object. + /// </summary> + public IProgram HostProgram { get; } + + /// <summary> + /// GPU state used to create this version of the shader. + /// </summary> + public ShaderSpecializationState SpecializationState { get; } + + /// <summary> + /// Compiled shader for each shader stage. + /// </summary> + public CachedShaderStage[] Shaders { get; } + + /// <summary> + /// Cached shader bindings, ready for placing into the bindings manager. + /// </summary> + public CachedShaderBindings Bindings { get; } + + /// <summary> + /// Creates a new instance of the shader bundle. + /// </summary> + /// <param name="hostProgram">Host program with all the shader stages</param> + /// <param name="specializationState">GPU state used to create this version of the shader</param> + /// <param name="shaders">Shaders</param> + public CachedShaderProgram(IProgram hostProgram, ShaderSpecializationState specializationState, params CachedShaderStage[] shaders) + { + HostProgram = hostProgram; + SpecializationState = specializationState; + Shaders = shaders; + + SpecializationState.Prepare(shaders); + Bindings = new CachedShaderBindings(shaders.Length == 1, shaders); + } + + /// <summary> + /// Dispose of the host shader resources. + /// </summary> + public void Dispose() + { + HostProgram.Dispose(); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderStage.cs b/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderStage.cs new file mode 100644 index 00000000..22b08dd5 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderStage.cs @@ -0,0 +1,38 @@ +using Ryujinx.Graphics.Shader; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// <summary> + /// Cached shader code for a single shader stage. + /// </summary> + class CachedShaderStage + { + /// <summary> + /// Shader program information. + /// </summary> + public ShaderProgramInfo Info { get; } + + /// <summary> + /// Maxwell binary shader code. + /// </summary> + public byte[] Code { get; } + + /// <summary> + /// Constant buffer 1 data accessed by the shader. + /// </summary> + public byte[] Cb1Data { get; } + + /// <summary> + /// Creates a new instance of the shader code holder. + /// </summary> + /// <param name="info">Shader program information</param> + /// <param name="code">Maxwell binary shader code</param> + /// <param name="cb1Data">Constant buffer 1 data accessed by the shader</param> + public CachedShaderStage(ShaderProgramInfo info, byte[] code, byte[] cb1Data) + { + Info = info; + Code = code; + Cb1Data = cb1Data; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ComputeShaderCacheHashTable.cs b/src/Ryujinx.Graphics.Gpu/Shader/ComputeShaderCacheHashTable.cs new file mode 100644 index 00000000..a6718211 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/ComputeShaderCacheHashTable.cs @@ -0,0 +1,70 @@ +using Ryujinx.Graphics.Gpu.Shader.HashTable; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// <summary> + /// Compute shader cache hash table. + /// </summary> + class ComputeShaderCacheHashTable + { + private readonly PartitionedHashTable<ShaderSpecializationList> _cache; + private readonly List<CachedShaderProgram> _shaderPrograms; + + /// <summary> + /// Creates a new compute shader cache hash table. + /// </summary> + public ComputeShaderCacheHashTable() + { + _cache = new PartitionedHashTable<ShaderSpecializationList>(); + _shaderPrograms = new List<CachedShaderProgram>(); + } + + /// <summary> + /// Adds a program to the cache. + /// </summary> + /// <param name="program">Program to be added</param> + public void Add(CachedShaderProgram program) + { + var specList = _cache.GetOrAdd(program.Shaders[0].Code, new ShaderSpecializationList()); + specList.Add(program); + _shaderPrograms.Add(program); + } + + /// <summary> + /// Tries to find a cached program. + /// </summary> + /// <param name="channel">GPU channel</param> + /// <param name="poolState">Texture pool state</param> + /// <param name="computeState">Compute state</param> + /// <param name="gpuVa">GPU virtual address of the compute shader</param> + /// <param name="program">Cached host program for the given state, if found</param> + /// <param name="cachedGuestCode">Cached guest code, if any found</param> + /// <returns>True if a cached host program was found, false otherwise</returns> + public bool TryFind( + GpuChannel channel, + GpuChannelPoolState poolState, + GpuChannelComputeState computeState, + ulong gpuVa, + out CachedShaderProgram program, + out byte[] cachedGuestCode) + { + program = null; + ShaderCodeAccessor codeAccessor = new ShaderCodeAccessor(channel.MemoryManager, gpuVa); + bool hasSpecList = _cache.TryFindItem(codeAccessor, out var specList, out cachedGuestCode); + return hasSpecList && specList.TryFindForCompute(channel, poolState, computeState, out program); + } + + /// <summary> + /// Gets all programs that have been added to the table. + /// </summary> + /// <returns>Programs added to the table</returns> + public IEnumerable<CachedShaderProgram> GetPrograms() + { + foreach (var program in _shaderPrograms) + { + yield return program; + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs new file mode 100644 index 00000000..568fe968 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs @@ -0,0 +1,138 @@ +using Ryujinx.Common; +using Ryujinx.Common.Logging; +using System; +using System.IO; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// <summary> + /// Represents a background disk cache writer. + /// </summary> + class BackgroundDiskCacheWriter : IDisposable + { + /// <summary> + /// Possible operation to do on the <see cref="_fileWriterWorkerQueue"/>. + /// </summary> + private enum CacheFileOperation + { + /// <summary> + /// Operation to add a shader to the cache. + /// </summary> + AddShader + } + + /// <summary> + /// Represents an operation to perform on the <see cref="_fileWriterWorkerQueue"/>. + /// </summary> + private readonly struct CacheFileOperationTask + { + /// <summary> + /// The type of operation to perform. + /// </summary> + public readonly CacheFileOperation Type; + + /// <summary> + /// The data associated to this operation or null. + /// </summary> + public readonly object Data; + + public CacheFileOperationTask(CacheFileOperation type, object data) + { + Type = type; + Data = data; + } + } + + /// <summary> + /// Background shader cache write information. + /// </summary> + private readonly struct AddShaderData + { + /// <summary> + /// Cached shader program. + /// </summary> + public readonly CachedShaderProgram Program; + + /// <summary> + /// Binary host code. + /// </summary> + public readonly byte[] HostCode; + + /// <summary> + /// Creates a new background shader cache write information. + /// </summary> + /// <param name="program">Cached shader program</param> + /// <param name="hostCode">Binary host code</param> + public AddShaderData(CachedShaderProgram program, byte[] hostCode) + { + Program = program; + HostCode = hostCode; + } + } + + private readonly GpuContext _context; + private readonly DiskCacheHostStorage _hostStorage; + private readonly AsyncWorkQueue<CacheFileOperationTask> _fileWriterWorkerQueue; + + /// <summary> + /// Creates a new background disk cache writer. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="hostStorage">Disk cache host storage</param> + public BackgroundDiskCacheWriter(GpuContext context, DiskCacheHostStorage hostStorage) + { + _context = context; + _hostStorage = hostStorage; + _fileWriterWorkerQueue = new AsyncWorkQueue<CacheFileOperationTask>(ProcessTask, "GPU.BackgroundDiskCacheWriter"); + } + + /// <summary> + /// Processes a shader cache background operation. + /// </summary> + /// <param name="task">Task to process</param> + private void ProcessTask(CacheFileOperationTask task) + { + switch (task.Type) + { + case CacheFileOperation.AddShader: + AddShaderData data = (AddShaderData)task.Data; + try + { + _hostStorage.AddShader(_context, data.Program, data.HostCode); + } + catch (DiskCacheLoadException diskCacheLoadException) + { + Logger.Error?.Print(LogClass.Gpu, $"Error writing shader to disk cache. {diskCacheLoadException.Message}"); + } + catch (IOException ioException) + { + Logger.Error?.Print(LogClass.Gpu, $"Error writing shader to disk cache. {ioException.Message}"); + } + break; + } + } + + /// <summary> + /// Adds a shader program to be cached in the background. + /// </summary> + /// <param name="program">Shader program to cache</param> + /// <param name="hostCode">Host binary code of the program</param> + public void AddShader(CachedShaderProgram program, byte[] hostCode) + { + _fileWriterWorkerQueue.Add(new CacheFileOperationTask(CacheFileOperation.AddShader, new AddShaderData(program, hostCode))); + } + + public void Dispose() + { + Dispose(true); + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + _fileWriterWorkerQueue.Dispose(); + } + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/BinarySerializer.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/BinarySerializer.cs new file mode 100644 index 00000000..50e37033 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/BinarySerializer.cs @@ -0,0 +1,216 @@ +using System; +using System.IO; +using System.IO.Compression; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// <summary> + /// Binary data serializer. + /// </summary> + struct BinarySerializer + { + private readonly Stream _stream; + private Stream _activeStream; + + /// <summary> + /// Creates a new binary serializer. + /// </summary> + /// <param name="stream">Stream to read from or write into</param> + public BinarySerializer(Stream stream) + { + _stream = stream; + _activeStream = stream; + } + + /// <summary> + /// Reads data from the stream. + /// </summary> + /// <typeparam name="T">Type of the data</typeparam> + /// <param name="data">Data read</param> + public void Read<T>(ref T data) where T : unmanaged + { + Span<byte> buffer = MemoryMarshal.Cast<T, byte>(MemoryMarshal.CreateSpan(ref data, 1)); + for (int offset = 0; offset < buffer.Length;) + { + offset += _activeStream.Read(buffer.Slice(offset)); + } + } + + /// <summary> + /// Tries to read data from the stream. + /// </summary> + /// <typeparam name="T">Type of the data</typeparam> + /// <param name="data">Data read</param> + /// <returns>True if the read was successful, false otherwise</returns> + public bool TryRead<T>(ref T data) where T : unmanaged + { + // Length is unknown on compressed streams. + if (_activeStream == _stream) + { + int size = Unsafe.SizeOf<T>(); + if (_activeStream.Length - _activeStream.Position < size) + { + return false; + } + } + + Read(ref data); + return true; + } + + /// <summary> + /// Reads data prefixed with a magic and size from the stream. + /// </summary> + /// <typeparam name="T">Type of the data</typeparam> + /// <param name="data">Data read</param> + /// <param name="magic">Expected magic value, for validation</param> + public void ReadWithMagicAndSize<T>(ref T data, uint magic) where T : unmanaged + { + uint actualMagic = 0; + int size = 0; + Read(ref actualMagic); + Read(ref size); + + if (actualMagic != magic) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedInvalidMagic); + } + + // Structs are expected to expand but not shrink between versions. + if (size > Unsafe.SizeOf<T>()) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedInvalidLength); + } + + Span<byte> buffer = MemoryMarshal.Cast<T, byte>(MemoryMarshal.CreateSpan(ref data, 1)).Slice(0, size); + for (int offset = 0; offset < buffer.Length;) + { + offset += _activeStream.Read(buffer.Slice(offset)); + } + } + + /// <summary> + /// Writes data into the stream. + /// </summary> + /// <typeparam name="T">Type of the data</typeparam> + /// <param name="data">Data to be written</param> + public void Write<T>(ref T data) where T : unmanaged + { + Span<byte> buffer = MemoryMarshal.Cast<T, byte>(MemoryMarshal.CreateSpan(ref data, 1)); + _activeStream.Write(buffer); + } + + /// <summary> + /// Writes data prefixed with a magic and size into the stream. + /// </summary> + /// <typeparam name="T">Type of the data</typeparam> + /// <param name="data">Data to write</param> + /// <param name="magic">Magic value to write</param> + public void WriteWithMagicAndSize<T>(ref T data, uint magic) where T : unmanaged + { + int size = Unsafe.SizeOf<T>(); + Write(ref magic); + Write(ref size); + Span<byte> buffer = MemoryMarshal.Cast<T, byte>(MemoryMarshal.CreateSpan(ref data, 1)); + _activeStream.Write(buffer); + } + + /// <summary> + /// Indicates that all data that will be read from the stream has been compressed. + /// </summary> + public void BeginCompression() + { + CompressionAlgorithm algorithm = CompressionAlgorithm.None; + Read(ref algorithm); + + if (algorithm == CompressionAlgorithm.Deflate) + { + _activeStream = new DeflateStream(_stream, CompressionMode.Decompress, true); + } + } + + /// <summary> + /// Indicates that all data that will be written into the stream should be compressed. + /// </summary> + /// <param name="algorithm">Compression algorithm that should be used</param> + public void BeginCompression(CompressionAlgorithm algorithm) + { + Write(ref algorithm); + + if (algorithm == CompressionAlgorithm.Deflate) + { + _activeStream = new DeflateStream(_stream, CompressionLevel.SmallestSize, true); + } + } + + /// <summary> + /// Indicates the end of a compressed chunck. + /// </summary> + /// <remarks> + /// Any data written after this will not be compressed unless <see cref="BeginCompression(CompressionAlgorithm)"/> is called again. + /// Any data read after this will be assumed to be uncompressed unless <see cref="BeginCompression"/> is called again. + /// </remarks> + public void EndCompression() + { + if (_activeStream != _stream) + { + _activeStream.Dispose(); + _activeStream = _stream; + } + } + + /// <summary> + /// Reads compressed data from the stream. + /// </summary> + /// <remarks> + /// <paramref name="data"/> must have the exact length of the uncompressed data, + /// otherwise decompression will fail. + /// </remarks> + /// <param name="stream">Stream to read from</param> + /// <param name="data">Buffer to write the uncompressed data into</param> + public static void ReadCompressed(Stream stream, Span<byte> data) + { + CompressionAlgorithm algorithm = (CompressionAlgorithm)stream.ReadByte(); + + switch (algorithm) + { + case CompressionAlgorithm.None: + stream.Read(data); + break; + case CompressionAlgorithm.Deflate: + stream = new DeflateStream(stream, CompressionMode.Decompress, true); + for (int offset = 0; offset < data.Length;) + { + offset += stream.Read(data.Slice(offset)); + } + stream.Dispose(); + break; + } + } + + /// <summary> + /// Compresses and writes the compressed data into the stream. + /// </summary> + /// <param name="stream">Stream to write into</param> + /// <param name="data">Data to compress</param> + /// <param name="algorithm">Compression algorithm to be used</param> + public static void WriteCompressed(Stream stream, ReadOnlySpan<byte> data, CompressionAlgorithm algorithm) + { + stream.WriteByte((byte)algorithm); + + switch (algorithm) + { + case CompressionAlgorithm.None: + stream.Write(data); + break; + case CompressionAlgorithm.Deflate: + stream = new DeflateStream(stream, CompressionLevel.SmallestSize, true); + stream.Write(data); + stream.Dispose(); + break; + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/CompressionAlgorithm.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/CompressionAlgorithm.cs new file mode 100644 index 00000000..a46e1ef7 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/CompressionAlgorithm.cs @@ -0,0 +1,18 @@ +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// <summary> + /// Algorithm used to compress the cache. + /// </summary> + enum CompressionAlgorithm : byte + { + /// <summary> + /// No compression, the data is stored as-is. + /// </summary> + None, + + /// <summary> + /// Deflate compression (RFC 1951). + /// </summary> + Deflate + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheCommon.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheCommon.cs new file mode 100644 index 00000000..c8a9f7ff --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheCommon.cs @@ -0,0 +1,57 @@ +using Ryujinx.Common.Logging; +using System.IO; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// <summary> + /// Common disk cache utility methods. + /// </summary> + static class DiskCacheCommon + { + /// <summary> + /// Opens a file for read or write. + /// </summary> + /// <param name="basePath">Base path of the file (should not include the file name)</param> + /// <param name="fileName">Name of the file</param> + /// <param name="writable">Indicates if the file will be read or written</param> + /// <returns>File stream</returns> + public static FileStream OpenFile(string basePath, string fileName, bool writable) + { + string fullPath = Path.Combine(basePath, fileName); + + FileMode mode; + FileAccess access; + + if (writable) + { + mode = FileMode.OpenOrCreate; + access = FileAccess.ReadWrite; + } + else + { + mode = FileMode.Open; + access = FileAccess.Read; + } + + try + { + return new FileStream(fullPath, mode, access, FileShare.Read); + } + catch (IOException ioException) + { + Logger.Error?.Print(LogClass.Gpu, $"Could not access file \"{fullPath}\". {ioException.Message}"); + + throw new DiskCacheLoadException(DiskCacheLoadResult.NoAccess); + } + } + + /// <summary> + /// Gets the compression algorithm that should be used when writing the disk cache. + /// </summary> + /// <returns>Compression algorithm</returns> + public static CompressionAlgorithm GetCompressionAlgorithm() + { + return CompressionAlgorithm.Deflate; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs new file mode 100644 index 00000000..17639ca1 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs @@ -0,0 +1,266 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Image; +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// <summary> + /// Represents a GPU state and memory accessor. + /// </summary> + class DiskCacheGpuAccessor : GpuAccessorBase, IGpuAccessor + { + private readonly ReadOnlyMemory<byte> _data; + private readonly ReadOnlyMemory<byte> _cb1Data; + private readonly ShaderSpecializationState _oldSpecState; + private readonly ShaderSpecializationState _newSpecState; + private readonly int _stageIndex; + private readonly bool _isVulkan; + private readonly ResourceCounts _resourceCounts; + + /// <summary> + /// Creates a new instance of the cached GPU state accessor for shader translation. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="data">The data of the shader</param> + /// <param name="cb1Data">The constant buffer 1 data of the shader</param> + /// <param name="oldSpecState">Shader specialization state of the cached shader</param> + /// <param name="newSpecState">Shader specialization state of the recompiled shader</param> + /// <param name="stageIndex">Shader stage index</param> + public DiskCacheGpuAccessor( + GpuContext context, + ReadOnlyMemory<byte> data, + ReadOnlyMemory<byte> cb1Data, + ShaderSpecializationState oldSpecState, + ShaderSpecializationState newSpecState, + ResourceCounts counts, + int stageIndex) : base(context, counts, stageIndex) + { + _data = data; + _cb1Data = cb1Data; + _oldSpecState = oldSpecState; + _newSpecState = newSpecState; + _stageIndex = stageIndex; + _isVulkan = context.Capabilities.Api == TargetApi.Vulkan; + _resourceCounts = counts; + } + + /// <inheritdoc/> + public uint ConstantBuffer1Read(int offset) + { + if (offset + sizeof(uint) > _cb1Data.Length) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.InvalidCb1DataLength); + } + + return MemoryMarshal.Cast<byte, uint>(_cb1Data.Span.Slice(offset))[0]; + } + + /// <inheritdoc/> + public void Log(string message) + { + Logger.Warning?.Print(LogClass.Gpu, $"Shader translator: {message}"); + } + + /// <inheritdoc/> + public ReadOnlySpan<ulong> GetCode(ulong address, int minimumSize) + { + return MemoryMarshal.Cast<byte, ulong>(_data.Span.Slice((int)address)); + } + + /// <inheritdoc/> + public bool QueryAlphaToCoverageDitherEnable() + { + return _oldSpecState.GraphicsState.AlphaToCoverageEnable && _oldSpecState.GraphicsState.AlphaToCoverageDitherEnable; + } + + /// <inheritdoc/> + public AlphaTestOp QueryAlphaTestCompare() + { + if (!_isVulkan || !_oldSpecState.GraphicsState.AlphaTestEnable) + { + return AlphaTestOp.Always; + } + + return _oldSpecState.GraphicsState.AlphaTestCompare switch + { + CompareOp.Never or CompareOp.NeverGl => AlphaTestOp.Never, + CompareOp.Less or CompareOp.LessGl => AlphaTestOp.Less, + CompareOp.Equal or CompareOp.EqualGl => AlphaTestOp.Equal, + CompareOp.LessOrEqual or CompareOp.LessOrEqualGl => AlphaTestOp.LessOrEqual, + CompareOp.Greater or CompareOp.GreaterGl => AlphaTestOp.Greater, + CompareOp.NotEqual or CompareOp.NotEqualGl => AlphaTestOp.NotEqual, + CompareOp.GreaterOrEqual or CompareOp.GreaterOrEqualGl => AlphaTestOp.GreaterOrEqual, + _ => AlphaTestOp.Always + }; + } + + /// <inheritdoc/> + public float QueryAlphaTestReference() => _oldSpecState.GraphicsState.AlphaTestReference; + + /// <inheritdoc/> + public AttributeType QueryAttributeType(int location) + { + return _oldSpecState.GraphicsState.AttributeTypes[location]; + } + + /// <inheritdoc/> + public AttributeType QueryFragmentOutputType(int location) + { + return _oldSpecState.GraphicsState.FragmentOutputTypes[location]; + } + + /// <inheritdoc/> + public int QueryComputeLocalSizeX() => _oldSpecState.ComputeState.LocalSizeX; + + /// <inheritdoc/> + public int QueryComputeLocalSizeY() => _oldSpecState.ComputeState.LocalSizeY; + + /// <inheritdoc/> + public int QueryComputeLocalSizeZ() => _oldSpecState.ComputeState.LocalSizeZ; + + /// <inheritdoc/> + public int QueryComputeLocalMemorySize() => _oldSpecState.ComputeState.LocalMemorySize; + + /// <inheritdoc/> + public int QueryComputeSharedMemorySize() => _oldSpecState.ComputeState.SharedMemorySize; + + /// <inheritdoc/> + public uint QueryConstantBufferUse() + { + _newSpecState.RecordConstantBufferUse(_stageIndex, _oldSpecState.ConstantBufferUse[_stageIndex]); + return _oldSpecState.ConstantBufferUse[_stageIndex]; + } + + /// <inheritdoc/> + public bool QueryHasConstantBufferDrawParameters() + { + return _oldSpecState.GraphicsState.HasConstantBufferDrawParameters; + } + + /// <inheritdoc/> + public bool QueryDualSourceBlendEnable() + { + return _oldSpecState.GraphicsState.DualSourceBlendEnable; + } + + /// <inheritdoc/> + public InputTopology QueryPrimitiveTopology() + { + _newSpecState.RecordPrimitiveTopology(); + return ConvertToInputTopology(_oldSpecState.GraphicsState.Topology, _oldSpecState.GraphicsState.TessellationMode); + } + + /// <inheritdoc/> + public bool QueryProgramPointSize() + { + return _oldSpecState.GraphicsState.ProgramPointSizeEnable; + } + + /// <inheritdoc/> + public float QueryPointSize() + { + return _oldSpecState.GraphicsState.PointSize; + } + + /// <inheritdoc/> + public bool QueryTessCw() + { + return _oldSpecState.GraphicsState.TessellationMode.UnpackCw(); + } + + /// <inheritdoc/> + public TessPatchType QueryTessPatchType() + { + return _oldSpecState.GraphicsState.TessellationMode.UnpackPatchType(); + } + + /// <inheritdoc/> + public TessSpacing QueryTessSpacing() + { + return _oldSpecState.GraphicsState.TessellationMode.UnpackSpacing(); + } + + /// <inheritdoc/> + public TextureFormat QueryTextureFormat(int handle, int cbufSlot) + { + _newSpecState.RecordTextureFormat(_stageIndex, handle, cbufSlot); + (uint format, bool formatSrgb) = _oldSpecState.GetFormat(_stageIndex, handle, cbufSlot); + return ConvertToTextureFormat(format, formatSrgb); + } + + /// <inheritdoc/> + public SamplerType QuerySamplerType(int handle, int cbufSlot) + { + _newSpecState.RecordTextureSamplerType(_stageIndex, handle, cbufSlot); + return _oldSpecState.GetTextureTarget(_stageIndex, handle, cbufSlot).ConvertSamplerType(); + } + + /// <inheritdoc/> + public bool QueryTextureCoordNormalized(int handle, int cbufSlot) + { + _newSpecState.RecordTextureCoordNormalized(_stageIndex, handle, cbufSlot); + return _oldSpecState.GetCoordNormalized(_stageIndex, handle, cbufSlot); + } + + /// <inheritdoc/> + public bool QueryTransformDepthMinusOneToOne() + { + return _oldSpecState.GraphicsState.DepthMode; + } + + /// <inheritdoc/> + public bool QueryTransformFeedbackEnabled() + { + return _oldSpecState.TransformFeedbackDescriptors != null; + } + + /// <inheritdoc/> + public ReadOnlySpan<byte> QueryTransformFeedbackVaryingLocations(int bufferIndex) + { + return _oldSpecState.TransformFeedbackDescriptors[bufferIndex].AsSpan(); + } + + /// <inheritdoc/> + public int QueryTransformFeedbackStride(int bufferIndex) + { + return _oldSpecState.TransformFeedbackDescriptors[bufferIndex].Stride; + } + + /// <inheritdoc/> + public bool QueryEarlyZForce() + { + _newSpecState.RecordEarlyZForce(); + return _oldSpecState.GraphicsState.EarlyZForce; + } + + /// <inheritdoc/> + public bool QueryHasUnalignedStorageBuffer() + { + return _oldSpecState.GraphicsState.HasUnalignedStorageBuffer || _oldSpecState.ComputeState.HasUnalignedStorageBuffer; + } + + /// <inheritdoc/> + public bool QueryViewportTransformDisable() + { + return _oldSpecState.GraphicsState.ViewportTransformDisable; + } + + /// <inheritdoc/> + public void RegisterTexture(int handle, int cbufSlot) + { + if (!_oldSpecState.TextureRegistered(_stageIndex, handle, cbufSlot)) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.MissingTextureDescriptor); + } + + (uint format, bool formatSrgb) = _oldSpecState.GetFormat(_stageIndex, handle, cbufSlot); + TextureTarget target = _oldSpecState.GetTextureTarget(_stageIndex, handle, cbufSlot); + bool coordNormalized = _oldSpecState.GetCoordNormalized(_stageIndex, handle, cbufSlot); + _newSpecState.RegisterTexture(_stageIndex, handle, cbufSlot, format, formatSrgb, target, coordNormalized); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs new file mode 100644 index 00000000..01034b49 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs @@ -0,0 +1,459 @@ +using Ryujinx.Common; +using System; +using System.Collections.Generic; +using System.IO; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// <summary> + /// On-disk shader cache storage for guest code. + /// </summary> + class DiskCacheGuestStorage + { + private const uint TocMagic = (byte)'T' | ((byte)'O' << 8) | ((byte)'C' << 16) | ((byte)'G' << 24); + + private const ushort VersionMajor = 1; + private const ushort VersionMinor = 1; + private const uint VersionPacked = ((uint)VersionMajor << 16) | VersionMinor; + + private const string TocFileName = "guest.toc"; + private const string DataFileName = "guest.data"; + + private readonly string _basePath; + + /// <summary> + /// TOC (Table of contents) file header. + /// </summary> + private struct TocHeader + { + /// <summary> + /// Magic value, for validation and identification purposes. + /// </summary> + public uint Magic; + + /// <summary> + /// File format version. + /// </summary> + public uint Version; + + /// <summary> + /// Header padding. + /// </summary> + public uint Padding; + + /// <summary> + /// Number of modifications to the file, also the shaders count. + /// </summary> + public uint ModificationsCount; + + /// <summary> + /// Reserved space, to be used in the future. Write as zero. + /// </summary> + public ulong Reserved; + + /// <summary> + /// Reserved space, to be used in the future. Write as zero. + /// </summary> + public ulong Reserved2; + } + + /// <summary> + /// TOC (Table of contents) file entry. + /// </summary> + private struct TocEntry + { + /// <summary> + /// Offset of the data on the data file. + /// </summary> + public uint Offset; + + /// <summary> + /// Code size. + /// </summary> + public uint CodeSize; + + /// <summary> + /// Constant buffer 1 data size. + /// </summary> + public uint Cb1DataSize; + + /// <summary> + /// Hash of the code and constant buffer data. + /// </summary> + public uint Hash; + } + + /// <summary> + /// TOC (Table of contents) memory cache entry. + /// </summary> + private struct TocMemoryEntry + { + /// <summary> + /// Offset of the data on the data file. + /// </summary> + public uint Offset; + + /// <summary> + /// Code size. + /// </summary> + public uint CodeSize; + + /// <summary> + /// Constant buffer 1 data size. + /// </summary> + public uint Cb1DataSize; + + /// <summary> + /// Index of the shader on the cache. + /// </summary> + public readonly int Index; + + /// <summary> + /// Creates a new TOC memory entry. + /// </summary> + /// <param name="offset">Offset of the data on the data file</param> + /// <param name="codeSize">Code size</param> + /// <param name="cb1DataSize">Constant buffer 1 data size</param> + /// <param name="index">Index of the shader on the cache</param> + public TocMemoryEntry(uint offset, uint codeSize, uint cb1DataSize, int index) + { + Offset = offset; + CodeSize = codeSize; + Cb1DataSize = cb1DataSize; + Index = index; + } + } + + private Dictionary<uint, List<TocMemoryEntry>> _toc; + private uint _tocModificationsCount; + + private (byte[], byte[])[] _cache; + + /// <summary> + /// Creates a new disk cache guest storage. + /// </summary> + /// <param name="basePath">Base path of the disk shader cache</param> + public DiskCacheGuestStorage(string basePath) + { + _basePath = basePath; + } + + /// <summary> + /// Checks if the TOC (table of contents) file for the guest cache exists. + /// </summary> + /// <returns>True if the file exists, false otherwise</returns> + public bool TocFileExists() + { + return File.Exists(Path.Combine(_basePath, TocFileName)); + } + + /// <summary> + /// Checks if the data file for the guest cache exists. + /// </summary> + /// <returns>True if the file exists, false otherwise</returns> + public bool DataFileExists() + { + return File.Exists(Path.Combine(_basePath, DataFileName)); + } + + /// <summary> + /// Opens the guest cache TOC (table of contents) file. + /// </summary> + /// <returns>File stream</returns> + public Stream OpenTocFileStream() + { + return DiskCacheCommon.OpenFile(_basePath, TocFileName, writable: false); + } + + /// <summary> + /// Opens the guest cache data file. + /// </summary> + /// <returns>File stream</returns> + public Stream OpenDataFileStream() + { + return DiskCacheCommon.OpenFile(_basePath, DataFileName, writable: false); + } + + /// <summary> + /// Clear all content from the guest cache files. + /// </summary> + public void ClearCache() + { + using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, TocFileName, writable: true); + using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, DataFileName, writable: true); + + tocFileStream.SetLength(0); + dataFileStream.SetLength(0); + } + + /// <summary> + /// Loads the guest cache from file or memory cache. + /// </summary> + /// <param name="tocFileStream">Guest TOC file stream</param> + /// <param name="dataFileStream">Guest data file stream</param> + /// <param name="index">Guest shader index</param> + /// <returns>Guest code and constant buffer 1 data</returns> + public GuestCodeAndCbData LoadShader(Stream tocFileStream, Stream dataFileStream, int index) + { + if (_cache == null || index >= _cache.Length) + { + _cache = new (byte[], byte[])[Math.Max(index + 1, GetShadersCountFromLength(tocFileStream.Length))]; + } + + (byte[] guestCode, byte[] cb1Data) = _cache[index]; + + if (guestCode == null || cb1Data == null) + { + BinarySerializer tocReader = new BinarySerializer(tocFileStream); + tocFileStream.Seek(Unsafe.SizeOf<TocHeader>() + index * Unsafe.SizeOf<TocEntry>(), SeekOrigin.Begin); + + TocEntry entry = new TocEntry(); + tocReader.Read(ref entry); + + guestCode = new byte[entry.CodeSize]; + cb1Data = new byte[entry.Cb1DataSize]; + + if (entry.Offset >= (ulong)dataFileStream.Length) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric); + } + + dataFileStream.Seek((long)entry.Offset, SeekOrigin.Begin); + dataFileStream.Read(cb1Data); + BinarySerializer.ReadCompressed(dataFileStream, guestCode); + + _cache[index] = (guestCode, cb1Data); + } + + return new GuestCodeAndCbData(guestCode, cb1Data); + } + + /// <summary> + /// Clears guest code memory cache, forcing future loads to be from file. + /// </summary> + public void ClearMemoryCache() + { + _cache = null; + } + + /// <summary> + /// Calculates the guest shaders count from the TOC file length. + /// </summary> + /// <param name="length">TOC file length</param> + /// <returns>Shaders count</returns> + private static int GetShadersCountFromLength(long length) + { + return (int)((length - Unsafe.SizeOf<TocHeader>()) / Unsafe.SizeOf<TocEntry>()); + } + + /// <summary> + /// Adds a guest shader to the cache. + /// </summary> + /// <remarks> + /// If the shader is already on the cache, the existing index will be returned and nothing will be written. + /// </remarks> + /// <param name="data">Guest code</param> + /// <param name="cb1Data">Constant buffer 1 data accessed by the code</param> + /// <returns>Index of the shader on the cache</returns> + public int AddShader(ReadOnlySpan<byte> data, ReadOnlySpan<byte> cb1Data) + { + using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, TocFileName, writable: true); + using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, DataFileName, writable: true); + + TocHeader header = new TocHeader(); + + LoadOrCreateToc(tocFileStream, ref header); + + uint hash = CalcHash(data, cb1Data); + + if (_toc.TryGetValue(hash, out var list)) + { + foreach (var entry in list) + { + if (data.Length != entry.CodeSize || cb1Data.Length != entry.Cb1DataSize) + { + continue; + } + + dataFileStream.Seek((long)entry.Offset, SeekOrigin.Begin); + byte[] cachedCode = new byte[entry.CodeSize]; + byte[] cachedCb1Data = new byte[entry.Cb1DataSize]; + dataFileStream.Read(cachedCb1Data); + BinarySerializer.ReadCompressed(dataFileStream, cachedCode); + + if (data.SequenceEqual(cachedCode) && cb1Data.SequenceEqual(cachedCb1Data)) + { + return entry.Index; + } + } + } + + return WriteNewEntry(tocFileStream, dataFileStream, ref header, data, cb1Data, hash); + } + + /// <summary> + /// Loads the guest cache TOC file, or create a new one if not present. + /// </summary> + /// <param name="tocFileStream">Guest TOC file stream</param> + /// <param name="header">Set to the TOC file header</param> + private void LoadOrCreateToc(Stream tocFileStream, ref TocHeader header) + { + BinarySerializer reader = new BinarySerializer(tocFileStream); + + if (!reader.TryRead(ref header) || header.Magic != TocMagic || header.Version != VersionPacked) + { + CreateToc(tocFileStream, ref header); + } + + if (_toc == null || header.ModificationsCount != _tocModificationsCount) + { + if (!LoadTocEntries(tocFileStream, ref reader)) + { + CreateToc(tocFileStream, ref header); + } + + _tocModificationsCount = header.ModificationsCount; + } + } + + /// <summary> + /// Creates a new guest cache TOC file. + /// </summary> + /// <param name="tocFileStream">Guest TOC file stream</param> + /// <param name="header">Set to the TOC header</param> + private void CreateToc(Stream tocFileStream, ref TocHeader header) + { + BinarySerializer writer = new BinarySerializer(tocFileStream); + + header.Magic = TocMagic; + header.Version = VersionPacked; + header.Padding = 0; + header.ModificationsCount = 0; + header.Reserved = 0; + header.Reserved2 = 0; + + if (tocFileStream.Length > 0) + { + tocFileStream.Seek(0, SeekOrigin.Begin); + tocFileStream.SetLength(0); + } + + writer.Write(ref header); + } + + /// <summary> + /// Reads all the entries on the guest TOC file. + /// </summary> + /// <param name="tocFileStream">Guest TOC file stream</param> + /// <param name="reader">TOC file reader</param> + /// <returns>True if the operation was successful, false otherwise</returns> + private bool LoadTocEntries(Stream tocFileStream, ref BinarySerializer reader) + { + _toc = new Dictionary<uint, List<TocMemoryEntry>>(); + + TocEntry entry = new TocEntry(); + int index = 0; + + while (tocFileStream.Position < tocFileStream.Length) + { + if (!reader.TryRead(ref entry)) + { + return false; + } + + AddTocMemoryEntry(entry.Offset, entry.CodeSize, entry.Cb1DataSize, entry.Hash, index++); + } + + return true; + } + + /// <summary> + /// Writes a new guest code entry into the file. + /// </summary> + /// <param name="tocFileStream">TOC file stream</param> + /// <param name="dataFileStream">Data file stream</param> + /// <param name="header">TOC header, to be updated with the new count</param> + /// <param name="data">Guest code</param> + /// <param name="cb1Data">Constant buffer 1 data accessed by the guest code</param> + /// <param name="hash">Code and constant buffer data hash</param> + /// <returns>Entry index</returns> + private int WriteNewEntry( + Stream tocFileStream, + Stream dataFileStream, + ref TocHeader header, + ReadOnlySpan<byte> data, + ReadOnlySpan<byte> cb1Data, + uint hash) + { + BinarySerializer tocWriter = new BinarySerializer(tocFileStream); + + dataFileStream.Seek(0, SeekOrigin.End); + uint dataOffset = checked((uint)dataFileStream.Position); + uint codeSize = (uint)data.Length; + uint cb1DataSize = (uint)cb1Data.Length; + dataFileStream.Write(cb1Data); + BinarySerializer.WriteCompressed(dataFileStream, data, DiskCacheCommon.GetCompressionAlgorithm()); + + _tocModificationsCount = ++header.ModificationsCount; + tocFileStream.Seek(0, SeekOrigin.Begin); + tocWriter.Write(ref header); + + TocEntry entry = new TocEntry() + { + Offset = dataOffset, + CodeSize = codeSize, + Cb1DataSize = cb1DataSize, + Hash = hash + }; + + tocFileStream.Seek(0, SeekOrigin.End); + int index = (int)((tocFileStream.Position - Unsafe.SizeOf<TocHeader>()) / Unsafe.SizeOf<TocEntry>()); + + tocWriter.Write(ref entry); + + AddTocMemoryEntry(dataOffset, codeSize, cb1DataSize, hash, index); + + return index; + } + + /// <summary> + /// Adds an entry to the memory TOC cache. This can be used to avoid reading the TOC file all the time. + /// </summary> + /// <param name="dataOffset">Offset of the code and constant buffer data in the data file</param> + /// <param name="codeSize">Code size</param> + /// <param name="cb1DataSize">Constant buffer 1 data size</param> + /// <param name="hash">Code and constant buffer data hash</param> + /// <param name="index">Index of the data on the cache</param> + private void AddTocMemoryEntry(uint dataOffset, uint codeSize, uint cb1DataSize, uint hash, int index) + { + if (!_toc.TryGetValue(hash, out var list)) + { + _toc.Add(hash, list = new List<TocMemoryEntry>()); + } + + list.Add(new TocMemoryEntry(dataOffset, codeSize, cb1DataSize, index)); + } + + /// <summary> + /// Calculates the hash for a data pair. + /// </summary> + /// <param name="data">Data 1</param> + /// <param name="data2">Data 2</param> + /// <returns>Hash of both data</returns> + private static uint CalcHash(ReadOnlySpan<byte> data, ReadOnlySpan<byte> data2) + { + return CalcHash(data2) * 23 ^ CalcHash(data); + } + + /// <summary> + /// Calculates the hash for data. + /// </summary> + /// <param name="data">Data to be hashed</param> + /// <returns>Hash of the data</returns> + private static uint CalcHash(ReadOnlySpan<byte> data) + { + return (uint)XXHash128.ComputeHash(data).Low; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs new file mode 100644 index 00000000..b182f299 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs @@ -0,0 +1,839 @@ +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.IO; +using System.Numerics; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// <summary> + /// On-disk shader cache storage for host code. + /// </summary> + class DiskCacheHostStorage + { + private const uint TocsMagic = (byte)'T' | ((byte)'O' << 8) | ((byte)'C' << 16) | ((byte)'S' << 24); + private const uint TochMagic = (byte)'T' | ((byte)'O' << 8) | ((byte)'C' << 16) | ((byte)'H' << 24); + private const uint ShdiMagic = (byte)'S' | ((byte)'H' << 8) | ((byte)'D' << 16) | ((byte)'I' << 24); + private const uint BufdMagic = (byte)'B' | ((byte)'U' << 8) | ((byte)'F' << 16) | ((byte)'D' << 24); + private const uint TexdMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'D' << 24); + + private const ushort FileFormatVersionMajor = 1; + private const ushort FileFormatVersionMinor = 2; + private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; + private const uint CodeGenVersion = 4735; + + private const string SharedTocFileName = "shared.toc"; + private const string SharedDataFileName = "shared.data"; + + private readonly string _basePath; + + public bool CacheEnabled => !string.IsNullOrEmpty(_basePath); + + /// <summary> + /// TOC (Table of contents) file header. + /// </summary> + private struct TocHeader + { + /// <summary> + /// Magic value, for validation and identification. + /// </summary> + public uint Magic; + + /// <summary> + /// File format version. + /// </summary> + public uint FormatVersion; + + /// <summary> + /// Generated shader code version. + /// </summary> + public uint CodeGenVersion; + + /// <summary> + /// Header padding. + /// </summary> + public uint Padding; + + /// <summary> + /// Timestamp of when the file was first created. + /// </summary> + public ulong Timestamp; + + /// <summary> + /// Reserved space, to be used in the future. Write as zero. + /// </summary> + public ulong Reserved; + } + + /// <summary> + /// Offset and size pair. + /// </summary> + private struct OffsetAndSize + { + /// <summary> + /// Offset. + /// </summary> + public ulong Offset; + + /// <summary> + /// Size of uncompressed data. + /// </summary> + public uint UncompressedSize; + + /// <summary> + /// Size of compressed data. + /// </summary> + public uint CompressedSize; + } + + /// <summary> + /// Per-stage data entry. + /// </summary> + private struct DataEntryPerStage + { + /// <summary> + /// Index of the guest code on the guest code cache TOC file. + /// </summary> + public int GuestCodeIndex; + } + + /// <summary> + /// Per-program data entry. + /// </summary> + private struct DataEntry + { + /// <summary> + /// Bit mask where each bit set is a used shader stage. Should be zero for compute shaders. + /// </summary> + public uint StagesBitMask; + } + + /// <summary> + /// Per-stage shader information, returned by the translator. + /// </summary> + private struct DataShaderInfo + { + /// <summary> + /// Total constant buffers used. + /// </summary> + public ushort CBuffersCount; + + /// <summary> + /// Total storage buffers used. + /// </summary> + public ushort SBuffersCount; + + /// <summary> + /// Total textures used. + /// </summary> + public ushort TexturesCount; + + /// <summary> + /// Total images used. + /// </summary> + public ushort ImagesCount; + + /// <summary> + /// Shader stage. + /// </summary> + public ShaderStage Stage; + + /// <summary> + /// Indicates if the shader accesses the Instance ID built-in variable. + /// </summary> + public bool UsesInstanceId; + + /// <summary> + /// Indicates if the shader modifies the Layer built-in variable. + /// </summary> + public bool UsesRtLayer; + + /// <summary> + /// Bit mask with the clip distances written on the vertex stage. + /// </summary> + public byte ClipDistancesWritten; + + /// <summary> + /// Bit mask of the render target components written by the fragment stage. + /// </summary> + public int FragmentOutputMap; + + /// <summary> + /// Indicates if the vertex shader accesses draw parameters. + /// </summary> + public bool UsesDrawParameters; + } + + private readonly DiskCacheGuestStorage _guestStorage; + + /// <summary> + /// Creates a disk cache host storage. + /// </summary> + /// <param name="basePath">Base path of the shader cache</param> + public DiskCacheHostStorage(string basePath) + { + _basePath = basePath; + _guestStorage = new DiskCacheGuestStorage(basePath); + + if (CacheEnabled) + { + Directory.CreateDirectory(basePath); + } + } + + /// <summary> + /// Gets the total of host programs on the cache. + /// </summary> + /// <returns>Host programs count</returns> + public int GetProgramCount() + { + string tocFilePath = Path.Combine(_basePath, SharedTocFileName); + + if (!File.Exists(tocFilePath)) + { + return 0; + } + + return Math.Max((int)((new FileInfo(tocFilePath).Length - Unsafe.SizeOf<TocHeader>()) / sizeof(ulong)), 0); + } + + /// <summary> + /// Guest the name of the host program cache file, with extension. + /// </summary> + /// <param name="context">GPU context</param> + /// <returns>Name of the file, without extension</returns> + private static string GetHostFileName(GpuContext context) + { + string apiName = context.Capabilities.Api.ToString().ToLowerInvariant(); + string vendorName = RemoveInvalidCharacters(context.Capabilities.VendorName.ToLowerInvariant()); + return $"{apiName}_{vendorName}"; + } + + /// <summary> + /// Removes invalid path characters and spaces from a file name. + /// </summary> + /// <param name="fileName">File name</param> + /// <returns>Filtered file name</returns> + private static string RemoveInvalidCharacters(string fileName) + { + int indexOfSpace = fileName.IndexOf(' '); + if (indexOfSpace >= 0) + { + fileName = fileName.Substring(0, indexOfSpace); + } + + return string.Concat(fileName.Split(Path.GetInvalidFileNameChars(), StringSplitOptions.RemoveEmptyEntries)); + } + + /// <summary> + /// Gets the name of the TOC host file. + /// </summary> + /// <param name="context">GPU context</param> + /// <returns>File name</returns> + private static string GetHostTocFileName(GpuContext context) + { + return GetHostFileName(context) + ".toc"; + } + + /// <summary> + /// Gets the name of the data host file. + /// </summary> + /// <param name="context">GPU context</param> + /// <returns>File name</returns> + private static string GetHostDataFileName(GpuContext context) + { + return GetHostFileName(context) + ".data"; + } + + /// <summary> + /// Checks if a disk cache exists for the current application. + /// </summary> + /// <returns>True if a disk cache exists, false otherwise</returns> + public bool CacheExists() + { + string tocFilePath = Path.Combine(_basePath, SharedTocFileName); + string dataFilePath = Path.Combine(_basePath, SharedDataFileName); + + if (!File.Exists(tocFilePath) || !File.Exists(dataFilePath) || !_guestStorage.TocFileExists() || !_guestStorage.DataFileExists()) + { + return false; + } + + return true; + } + + /// <summary> + /// Loads all shaders from the cache. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="loader">Parallel disk cache loader</param> + public void LoadShaders(GpuContext context, ParallelDiskCacheLoader loader) + { + if (!CacheExists()) + { + return; + } + + Stream hostTocFileStream = null; + Stream hostDataFileStream = null; + + try + { + using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, SharedTocFileName, writable: false); + using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, SharedDataFileName, writable: false); + + using var guestTocFileStream = _guestStorage.OpenTocFileStream(); + using var guestDataFileStream = _guestStorage.OpenDataFileStream(); + + BinarySerializer tocReader = new BinarySerializer(tocFileStream); + BinarySerializer dataReader = new BinarySerializer(dataFileStream); + + TocHeader header = new TocHeader(); + + if (!tocReader.TryRead(ref header) || header.Magic != TocsMagic) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric); + } + + if (header.FormatVersion != FileFormatVersionPacked) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.IncompatibleVersion); + } + + bool loadHostCache = header.CodeGenVersion == CodeGenVersion; + + int programIndex = 0; + + DataEntry entry = new DataEntry(); + + while (tocFileStream.Position < tocFileStream.Length && loader.Active) + { + ulong dataOffset = 0; + tocReader.Read(ref dataOffset); + + if ((ulong)dataOffset >= (ulong)dataFileStream.Length) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric); + } + + dataFileStream.Seek((long)dataOffset, SeekOrigin.Begin); + + dataReader.BeginCompression(); + dataReader.Read(ref entry); + uint stagesBitMask = entry.StagesBitMask; + + if ((stagesBitMask & ~0x3fu) != 0) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric); + } + + bool isCompute = stagesBitMask == 0; + if (isCompute) + { + stagesBitMask = 1; + } + + GuestCodeAndCbData?[] guestShaders = new GuestCodeAndCbData?[isCompute ? 1 : Constants.ShaderStages + 1]; + + DataEntryPerStage stageEntry = new DataEntryPerStage(); + + while (stagesBitMask != 0) + { + int stageIndex = BitOperations.TrailingZeroCount(stagesBitMask); + + dataReader.Read(ref stageEntry); + + guestShaders[stageIndex] = _guestStorage.LoadShader( + guestTocFileStream, + guestDataFileStream, + stageEntry.GuestCodeIndex); + + stagesBitMask &= ~(1u << stageIndex); + } + + ShaderSpecializationState specState = ShaderSpecializationState.Read(ref dataReader); + dataReader.EndCompression(); + + if (loadHostCache) + { + (byte[] hostCode, CachedShaderStage[] shaders) = ReadHostCode( + context, + ref hostTocFileStream, + ref hostDataFileStream, + guestShaders, + programIndex, + header.Timestamp); + + if (hostCode != null) + { + bool hasFragmentShader = shaders.Length > 5 && shaders[5] != null; + int fragmentOutputMap = hasFragmentShader ? shaders[5].Info.FragmentOutputMap : -1; + + ShaderInfo shaderInfo = specState.PipelineState.HasValue + ? new ShaderInfo(fragmentOutputMap, specState.PipelineState.Value, fromCache: true) + : new ShaderInfo(fragmentOutputMap, fromCache: true); + + IProgram hostProgram; + + if (context.Capabilities.Api == TargetApi.Vulkan) + { + ShaderSource[] shaderSources = ShaderBinarySerializer.Unpack(shaders, hostCode); + + hostProgram = context.Renderer.CreateProgram(shaderSources, shaderInfo); + } + else + { + hostProgram = context.Renderer.LoadProgramBinary(hostCode, hasFragmentShader, shaderInfo); + } + + CachedShaderProgram program = new CachedShaderProgram(hostProgram, specState, shaders); + + loader.QueueHostProgram(program, hostCode, programIndex, isCompute); + } + else + { + loadHostCache = false; + } + } + + if (!loadHostCache) + { + loader.QueueGuestProgram(guestShaders, specState, programIndex, isCompute); + } + + loader.CheckCompilation(); + programIndex++; + } + } + finally + { + _guestStorage.ClearMemoryCache(); + + hostTocFileStream?.Dispose(); + hostDataFileStream?.Dispose(); + } + } + + /// <summary> + /// Reads the host code for a given shader, if existent. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="tocFileStream">Host TOC file stream, intialized if needed</param> + /// <param name="dataFileStream">Host data file stream, initialized if needed</param> + /// <param name="guestShaders">Guest shader code for each active stage</param> + /// <param name="programIndex">Index of the program on the cache</param> + /// <param name="expectedTimestamp">Timestamp of the shared cache file. The host file must be newer than it</param> + /// <returns>Host binary code, or null if not found</returns> + private (byte[], CachedShaderStage[]) ReadHostCode( + GpuContext context, + ref Stream tocFileStream, + ref Stream dataFileStream, + GuestCodeAndCbData?[] guestShaders, + int programIndex, + ulong expectedTimestamp) + { + if (tocFileStream == null && dataFileStream == null) + { + string tocFilePath = Path.Combine(_basePath, GetHostTocFileName(context)); + string dataFilePath = Path.Combine(_basePath, GetHostDataFileName(context)); + + if (!File.Exists(tocFilePath) || !File.Exists(dataFilePath)) + { + return (null, null); + } + + tocFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: false); + dataFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: false); + + BinarySerializer tempTocReader = new BinarySerializer(tocFileStream); + + TocHeader header = new TocHeader(); + + tempTocReader.Read(ref header); + + if (header.Timestamp < expectedTimestamp) + { + return (null, null); + } + } + + int offset = Unsafe.SizeOf<TocHeader>() + programIndex * Unsafe.SizeOf<OffsetAndSize>(); + if (offset + Unsafe.SizeOf<OffsetAndSize>() > tocFileStream.Length) + { + return (null, null); + } + + if ((ulong)offset >= (ulong)dataFileStream.Length) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric); + } + + tocFileStream.Seek(offset, SeekOrigin.Begin); + + BinarySerializer tocReader = new BinarySerializer(tocFileStream); + + OffsetAndSize offsetAndSize = new OffsetAndSize(); + tocReader.Read(ref offsetAndSize); + + if (offsetAndSize.Offset >= (ulong)dataFileStream.Length) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric); + } + + dataFileStream.Seek((long)offsetAndSize.Offset, SeekOrigin.Begin); + + byte[] hostCode = new byte[offsetAndSize.UncompressedSize]; + + BinarySerializer.ReadCompressed(dataFileStream, hostCode); + + CachedShaderStage[] shaders = new CachedShaderStage[guestShaders.Length]; + BinarySerializer dataReader = new BinarySerializer(dataFileStream); + + dataFileStream.Seek((long)(offsetAndSize.Offset + offsetAndSize.CompressedSize), SeekOrigin.Begin); + + dataReader.BeginCompression(); + + for (int index = 0; index < guestShaders.Length; index++) + { + if (!guestShaders[index].HasValue) + { + continue; + } + + GuestCodeAndCbData guestShader = guestShaders[index].Value; + ShaderProgramInfo info = index != 0 || guestShaders.Length == 1 ? ReadShaderProgramInfo(ref dataReader) : null; + + shaders[index] = new CachedShaderStage(info, guestShader.Code, guestShader.Cb1Data); + } + + dataReader.EndCompression(); + + return (hostCode, shaders); + } + + /// <summary> + /// Gets output streams for the disk cache, for faster batch writing. + /// </summary> + /// <param name="context">The GPU context, used to determine the host disk cache</param> + /// <returns>A collection of disk cache output streams</returns> + public DiskCacheOutputStreams GetOutputStreams(GpuContext context) + { + var tocFileStream = DiskCacheCommon.OpenFile(_basePath, SharedTocFileName, writable: true); + var dataFileStream = DiskCacheCommon.OpenFile(_basePath, SharedDataFileName, writable: true); + + var hostTocFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: true); + var hostDataFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: true); + + return new DiskCacheOutputStreams(tocFileStream, dataFileStream, hostTocFileStream, hostDataFileStream); + } + + /// <summary> + /// Adds a shader to the cache. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="program">Cached program</param> + /// <param name="hostCode">Optional host binary code</param> + /// <param name="streams">Output streams to use</param> + public void AddShader(GpuContext context, CachedShaderProgram program, ReadOnlySpan<byte> hostCode, DiskCacheOutputStreams streams = null) + { + uint stagesBitMask = 0; + + for (int index = 0; index < program.Shaders.Length; index++) + { + var shader = program.Shaders[index]; + if (shader == null || (shader.Info != null && shader.Info.Stage == ShaderStage.Compute)) + { + continue; + } + + stagesBitMask |= 1u << index; + } + + var tocFileStream = streams != null ? streams.TocFileStream : DiskCacheCommon.OpenFile(_basePath, SharedTocFileName, writable: true); + var dataFileStream = streams != null ? streams.DataFileStream : DiskCacheCommon.OpenFile(_basePath, SharedDataFileName, writable: true); + + ulong timestamp = (ulong)DateTime.UtcNow.Subtract(DateTime.UnixEpoch).TotalSeconds; + + if (tocFileStream.Length == 0) + { + TocHeader header = new TocHeader(); + CreateToc(tocFileStream, ref header, TocsMagic, CodeGenVersion, timestamp); + } + + tocFileStream.Seek(0, SeekOrigin.End); + dataFileStream.Seek(0, SeekOrigin.End); + + BinarySerializer tocWriter = new BinarySerializer(tocFileStream); + BinarySerializer dataWriter = new BinarySerializer(dataFileStream); + + ulong dataOffset = (ulong)dataFileStream.Position; + tocWriter.Write(ref dataOffset); + + DataEntry entry = new DataEntry(); + + entry.StagesBitMask = stagesBitMask; + + dataWriter.BeginCompression(DiskCacheCommon.GetCompressionAlgorithm()); + dataWriter.Write(ref entry); + + DataEntryPerStage stageEntry = new DataEntryPerStage(); + + for (int index = 0; index < program.Shaders.Length; index++) + { + var shader = program.Shaders[index]; + if (shader == null) + { + continue; + } + + stageEntry.GuestCodeIndex = _guestStorage.AddShader(shader.Code, shader.Cb1Data); + + dataWriter.Write(ref stageEntry); + } + + program.SpecializationState.Write(ref dataWriter); + dataWriter.EndCompression(); + + if (streams == null) + { + tocFileStream.Dispose(); + dataFileStream.Dispose(); + } + + if (hostCode.IsEmpty) + { + return; + } + + WriteHostCode(context, hostCode, program.Shaders, streams, timestamp); + } + + /// <summary> + /// Clears all content from the guest cache files. + /// </summary> + public void ClearGuestCache() + { + _guestStorage.ClearCache(); + } + + /// <summary> + /// Clears all content from the shared cache files. + /// </summary> + /// <param name="context">GPU context</param> + public void ClearSharedCache() + { + using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, SharedTocFileName, writable: true); + using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, SharedDataFileName, writable: true); + + tocFileStream.SetLength(0); + dataFileStream.SetLength(0); + } + + /// <summary> + /// Deletes all content from the host cache files. + /// </summary> + /// <param name="context">GPU context</param> + public void ClearHostCache(GpuContext context) + { + using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: true); + using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: true); + + tocFileStream.SetLength(0); + dataFileStream.SetLength(0); + } + + /// <summary> + /// Writes the host binary code on the host cache. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="hostCode">Host binary code</param> + /// <param name="shaders">Shader stages to be added to the host cache</param> + /// <param name="streams">Output streams to use</param> + /// <param name="timestamp">File creation timestamp</param> + private void WriteHostCode( + GpuContext context, + ReadOnlySpan<byte> hostCode, + CachedShaderStage[] shaders, + DiskCacheOutputStreams streams, + ulong timestamp) + { + var tocFileStream = streams != null ? streams.HostTocFileStream : DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: true); + var dataFileStream = streams != null ? streams.HostDataFileStream : DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: true); + + if (tocFileStream.Length == 0) + { + TocHeader header = new TocHeader(); + CreateToc(tocFileStream, ref header, TochMagic, 0, timestamp); + } + + tocFileStream.Seek(0, SeekOrigin.End); + dataFileStream.Seek(0, SeekOrigin.End); + + BinarySerializer tocWriter = new BinarySerializer(tocFileStream); + BinarySerializer dataWriter = new BinarySerializer(dataFileStream); + + OffsetAndSize offsetAndSize = new OffsetAndSize(); + offsetAndSize.Offset = (ulong)dataFileStream.Position; + offsetAndSize.UncompressedSize = (uint)hostCode.Length; + + long dataStartPosition = dataFileStream.Position; + + BinarySerializer.WriteCompressed(dataFileStream, hostCode, DiskCacheCommon.GetCompressionAlgorithm()); + + offsetAndSize.CompressedSize = (uint)(dataFileStream.Position - dataStartPosition); + + tocWriter.Write(ref offsetAndSize); + + dataWriter.BeginCompression(DiskCacheCommon.GetCompressionAlgorithm()); + + for (int index = 0; index < shaders.Length; index++) + { + if (shaders[index] != null) + { + WriteShaderProgramInfo(ref dataWriter, shaders[index].Info); + } + } + + dataWriter.EndCompression(); + + if (streams == null) + { + tocFileStream.Dispose(); + dataFileStream.Dispose(); + } + } + + /// <summary> + /// Creates a TOC file for the host or shared cache. + /// </summary> + /// <param name="tocFileStream">TOC file stream</param> + /// <param name="header">Set to the TOC file header</param> + /// <param name="magic">Magic value to be written</param> + /// <param name="codegenVersion">Shader codegen version, only valid for the host file</param> + /// <param name="timestamp">File creation timestamp</param> + private void CreateToc(Stream tocFileStream, ref TocHeader header, uint magic, uint codegenVersion, ulong timestamp) + { + BinarySerializer writer = new BinarySerializer(tocFileStream); + + header.Magic = magic; + header.FormatVersion = FileFormatVersionPacked; + header.CodeGenVersion = codegenVersion; + header.Padding = 0; + header.Reserved = 0; + header.Timestamp = timestamp; + + if (tocFileStream.Length > 0) + { + tocFileStream.Seek(0, SeekOrigin.Begin); + tocFileStream.SetLength(0); + } + + writer.Write(ref header); + } + + /// <summary> + /// Reads the shader program info from the cache. + /// </summary> + /// <param name="dataReader">Cache data reader</param> + /// <returns>Shader program info</returns> + private static ShaderProgramInfo ReadShaderProgramInfo(ref BinarySerializer dataReader) + { + DataShaderInfo dataInfo = new DataShaderInfo(); + + dataReader.ReadWithMagicAndSize(ref dataInfo, ShdiMagic); + + BufferDescriptor[] cBuffers = new BufferDescriptor[dataInfo.CBuffersCount]; + BufferDescriptor[] sBuffers = new BufferDescriptor[dataInfo.SBuffersCount]; + TextureDescriptor[] textures = new TextureDescriptor[dataInfo.TexturesCount]; + TextureDescriptor[] images = new TextureDescriptor[dataInfo.ImagesCount]; + + for (int index = 0; index < dataInfo.CBuffersCount; index++) + { + dataReader.ReadWithMagicAndSize(ref cBuffers[index], BufdMagic); + } + + for (int index = 0; index < dataInfo.SBuffersCount; index++) + { + dataReader.ReadWithMagicAndSize(ref sBuffers[index], BufdMagic); + } + + for (int index = 0; index < dataInfo.TexturesCount; index++) + { + dataReader.ReadWithMagicAndSize(ref textures[index], TexdMagic); + } + + for (int index = 0; index < dataInfo.ImagesCount; index++) + { + dataReader.ReadWithMagicAndSize(ref images[index], TexdMagic); + } + + return new ShaderProgramInfo( + cBuffers, + sBuffers, + textures, + images, + ShaderIdentification.None, + 0, + dataInfo.Stage, + dataInfo.UsesInstanceId, + dataInfo.UsesDrawParameters, + dataInfo.UsesRtLayer, + dataInfo.ClipDistancesWritten, + dataInfo.FragmentOutputMap); + } + + /// <summary> + /// Writes the shader program info into the cache. + /// </summary> + /// <param name="dataWriter">Cache data writer</param> + /// <param name="info">Program info</param> + private static void WriteShaderProgramInfo(ref BinarySerializer dataWriter, ShaderProgramInfo info) + { + if (info == null) + { + return; + } + + DataShaderInfo dataInfo = new DataShaderInfo(); + + dataInfo.CBuffersCount = (ushort)info.CBuffers.Count; + dataInfo.SBuffersCount = (ushort)info.SBuffers.Count; + dataInfo.TexturesCount = (ushort)info.Textures.Count; + dataInfo.ImagesCount = (ushort)info.Images.Count; + dataInfo.Stage = info.Stage; + dataInfo.UsesInstanceId = info.UsesInstanceId; + dataInfo.UsesDrawParameters = info.UsesDrawParameters; + dataInfo.UsesRtLayer = info.UsesRtLayer; + dataInfo.ClipDistancesWritten = info.ClipDistancesWritten; + dataInfo.FragmentOutputMap = info.FragmentOutputMap; + + dataWriter.WriteWithMagicAndSize(ref dataInfo, ShdiMagic); + + for (int index = 0; index < info.CBuffers.Count; index++) + { + var entry = info.CBuffers[index]; + dataWriter.WriteWithMagicAndSize(ref entry, BufdMagic); + } + + for (int index = 0; index < info.SBuffers.Count; index++) + { + var entry = info.SBuffers[index]; + dataWriter.WriteWithMagicAndSize(ref entry, BufdMagic); + } + + for (int index = 0; index < info.Textures.Count; index++) + { + var entry = info.Textures[index]; + dataWriter.WriteWithMagicAndSize(ref entry, TexdMagic); + } + + for (int index = 0; index < info.Images.Count; index++) + { + var entry = info.Images[index]; + dataWriter.WriteWithMagicAndSize(ref entry, TexdMagic); + } + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadException.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadException.cs new file mode 100644 index 00000000..d6e23302 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadException.cs @@ -0,0 +1,48 @@ +using System; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// <summary> + /// Disk cache load exception. + /// </summary> + class DiskCacheLoadException : Exception + { + /// <summary> + /// Result of the cache load operation. + /// </summary> + public DiskCacheLoadResult Result { get; } + + /// <summary> + /// Creates a new instance of the disk cache load exception. + /// </summary> + public DiskCacheLoadException() + { + } + + /// <summary> + /// Creates a new instance of the disk cache load exception. + /// </summary> + /// <param name="message">Exception message</param> + public DiskCacheLoadException(string message) : base(message) + { + } + + /// <summary> + /// Creates a new instance of the disk cache load exception. + /// </summary> + /// <param name="message">Exception message</param> + /// <param name="inner">Inner exception</param> + public DiskCacheLoadException(string message, Exception inner) : base(message, inner) + { + } + + /// <summary> + /// Creates a new instance of the disk cache load exception. + /// </summary> + /// <param name="result">Result code</param> + public DiskCacheLoadException(DiskCacheLoadResult result) : base(result.GetMessage()) + { + Result = result; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadResult.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadResult.cs new file mode 100644 index 00000000..b3ffa4a7 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadResult.cs @@ -0,0 +1,72 @@ +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// <summary> + /// Result of a shader cache load operation. + /// </summary> + enum DiskCacheLoadResult + { + /// <summary> + /// No error. + /// </summary> + Success, + + /// <summary> + /// File can't be accessed. + /// </summary> + NoAccess, + + /// <summary> + /// The constant buffer 1 data length is too low for the translation of the guest shader. + /// </summary> + InvalidCb1DataLength, + + /// <summary> + /// The cache is missing the descriptor of a texture used by the shader. + /// </summary> + MissingTextureDescriptor, + + /// <summary> + /// File is corrupted. + /// </summary> + FileCorruptedGeneric, + + /// <summary> + /// File is corrupted, detected by magic value check. + /// </summary> + FileCorruptedInvalidMagic, + + /// <summary> + /// File is corrupted, detected by length check. + /// </summary> + FileCorruptedInvalidLength, + + /// <summary> + /// File might be valid, but is incompatible with the current emulator version. + /// </summary> + IncompatibleVersion + } + + static class DiskCacheLoadResultExtensions + { + /// <summary> + /// Gets an error message from a result code. + /// </summary> + /// <param name="result">Result code</param> + /// <returns>Error message</returns> + public static string GetMessage(this DiskCacheLoadResult result) + { + return result switch + { + DiskCacheLoadResult.Success => "No error.", + DiskCacheLoadResult.NoAccess => "Could not access the cache file.", + DiskCacheLoadResult.InvalidCb1DataLength => "Constant buffer 1 data length is too low.", + DiskCacheLoadResult.MissingTextureDescriptor => "Texture descriptor missing from the cache file.", + DiskCacheLoadResult.FileCorruptedGeneric => "The cache file is corrupted.", + DiskCacheLoadResult.FileCorruptedInvalidMagic => "Magic check failed, the cache file is corrupted.", + DiskCacheLoadResult.FileCorruptedInvalidLength => "Length check failed, the cache file is corrupted.", + DiskCacheLoadResult.IncompatibleVersion => "The version of the disk cache is not compatible with this version of the emulator.", + _ => "Unknown error." + }; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheOutputStreams.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheOutputStreams.cs new file mode 100644 index 00000000..1e0df264 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheOutputStreams.cs @@ -0,0 +1,57 @@ +using System; +using System.IO; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// <summary> + /// Output streams for the disk shader cache. + /// </summary> + class DiskCacheOutputStreams : IDisposable + { + /// <summary> + /// Shared table of contents (TOC) file stream. + /// </summary> + public readonly FileStream TocFileStream; + + /// <summary> + /// Shared data file stream. + /// </summary> + public readonly FileStream DataFileStream; + + /// <summary> + /// Host table of contents (TOC) file stream. + /// </summary> + public readonly FileStream HostTocFileStream; + + /// <summary> + /// Host data file stream. + /// </summary> + public readonly FileStream HostDataFileStream; + + /// <summary> + /// Creates a new instance of a disk cache output stream container. + /// </summary> + /// <param name="tocFileStream">Stream for the shared table of contents file</param> + /// <param name="dataFileStream">Stream for the shared data file</param> + /// <param name="hostTocFileStream">Stream for the host table of contents file</param> + /// <param name="hostDataFileStream">Stream for the host data file</param> + public DiskCacheOutputStreams(FileStream tocFileStream, FileStream dataFileStream, FileStream hostTocFileStream, FileStream hostDataFileStream) + { + TocFileStream = tocFileStream; + DataFileStream = dataFileStream; + HostTocFileStream = hostTocFileStream; + HostDataFileStream = hostDataFileStream; + } + + /// <summary> + /// Disposes the output file streams. + /// </summary> + public void Dispose() + { + TocFileStream.Dispose(); + DataFileStream.Dispose(); + HostTocFileStream.Dispose(); + HostDataFileStream.Dispose(); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/GuestCodeAndCbData.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/GuestCodeAndCbData.cs new file mode 100644 index 00000000..959d6e18 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/GuestCodeAndCbData.cs @@ -0,0 +1,29 @@ +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// <summary> + /// Guest shader code and constant buffer data accessed by the shader. + /// </summary> + readonly struct GuestCodeAndCbData + { + /// <summary> + /// Maxwell binary shader code. + /// </summary> + public byte[] Code { get; } + + /// <summary> + /// Constant buffer 1 data accessed by the shader. + /// </summary> + public byte[] Cb1Data { get; } + + /// <summary> + /// Creates a new instance of the guest shader code and constant buffer data. + /// </summary> + /// <param name="code">Maxwell binary shader code</param> + /// <param name="cb1Data">Constant buffer 1 data accessed by the shader</param> + public GuestCodeAndCbData(byte[] code, byte[] cb1Data) + { + Code = code; + Cb1Data = cb1Data; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs new file mode 100644 index 00000000..77fb3ca4 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs @@ -0,0 +1,725 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.IO; +using System.Threading; +using static Ryujinx.Graphics.Gpu.Shader.ShaderCache; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + class ParallelDiskCacheLoader + { + private const int ThreadCount = 8; + + private readonly GpuContext _context; + private readonly ShaderCacheHashTable _graphicsCache; + private readonly ComputeShaderCacheHashTable _computeCache; + private readonly DiskCacheHostStorage _hostStorage; + private readonly CancellationToken _cancellationToken; + private readonly Action<ShaderCacheState, int, int> _stateChangeCallback; + + /// <summary> + /// Indicates if the cache should be loaded. + /// </summary> + public bool Active => !_cancellationToken.IsCancellationRequested; + + private bool _needsHostRegen; + + /// <summary> + /// Number of shaders that failed to compile from the cache. + /// </summary> + public int ErrorCount { get; private set; } + + /// <summary> + /// Program validation entry. + /// </summary> + private readonly struct ProgramEntry + { + /// <summary> + /// Cached shader program. + /// </summary> + public readonly CachedShaderProgram CachedProgram; + + /// <summary> + /// Optional binary code. If not null, it is used instead of the backend host binary. + /// </summary> + public readonly byte[] BinaryCode; + + /// <summary> + /// Program index. + /// </summary> + public readonly int ProgramIndex; + + /// <summary> + /// Indicates if the program is a compute shader. + /// </summary> + public readonly bool IsCompute; + + /// <summary> + /// Indicates if the program is a host binary shader. + /// </summary> + public readonly bool IsBinary; + + /// <summary> + /// Creates a new program validation entry. + /// </summary> + /// <param name="cachedProgram">Cached shader program</param> + /// <param name="binaryCode">Optional binary code. If not null, it is used instead of the backend host binary</param> + /// <param name="programIndex">Program index</param> + /// <param name="isCompute">Indicates if the program is a compute shader</param> + /// <param name="isBinary">Indicates if the program is a host binary shader</param> + public ProgramEntry( + CachedShaderProgram cachedProgram, + byte[] binaryCode, + int programIndex, + bool isCompute, + bool isBinary) + { + CachedProgram = cachedProgram; + BinaryCode = binaryCode; + ProgramIndex = programIndex; + IsCompute = isCompute; + IsBinary = isBinary; + } + } + + /// <summary> + /// Translated shader compilation entry. + /// </summary> + private readonly struct ProgramCompilation + { + /// <summary> + /// Translated shader stages. + /// </summary> + public readonly ShaderProgram[] TranslatedStages; + + /// <summary> + /// Cached shaders. + /// </summary> + public readonly CachedShaderStage[] Shaders; + + /// <summary> + /// Specialization state. + /// </summary> + public readonly ShaderSpecializationState SpecializationState; + + /// <summary> + /// Program index. + /// </summary> + public readonly int ProgramIndex; + + /// <summary> + /// Indicates if the program is a compute shader. + /// </summary> + public readonly bool IsCompute; + + /// <summary> + /// Creates a new translated shader compilation entry. + /// </summary> + /// <param name="translatedStages">Translated shader stages</param> + /// <param name="shaders">Cached shaders</param> + /// <param name="specState">Specialization state</param> + /// <param name="programIndex">Program index</param> + /// <param name="isCompute">Indicates if the program is a compute shader</param> + public ProgramCompilation( + ShaderProgram[] translatedStages, + CachedShaderStage[] shaders, + ShaderSpecializationState specState, + int programIndex, + bool isCompute) + { + TranslatedStages = translatedStages; + Shaders = shaders; + SpecializationState = specState; + ProgramIndex = programIndex; + IsCompute = isCompute; + } + } + + /// <summary> + /// Program translation entry. + /// </summary> + private readonly struct AsyncProgramTranslation + { + /// <summary> + /// Guest code for each active stage. + /// </summary> + public readonly GuestCodeAndCbData?[] GuestShaders; + + /// <summary> + /// Specialization state. + /// </summary> + public readonly ShaderSpecializationState SpecializationState; + + /// <summary> + /// Program index. + /// </summary> + public readonly int ProgramIndex; + + /// <summary> + /// Indicates if the program is a compute shader. + /// </summary> + public readonly bool IsCompute; + + /// <summary> + /// Creates a new program translation entry. + /// </summary> + /// <param name="guestShaders">Guest code for each active stage</param> + /// <param name="specState">Specialization state</param> + /// <param name="programIndex">Program index</param> + /// <param name="isCompute">Indicates if the program is a compute shader</param> + public AsyncProgramTranslation( + GuestCodeAndCbData?[] guestShaders, + ShaderSpecializationState specState, + int programIndex, + bool isCompute) + { + GuestShaders = guestShaders; + SpecializationState = specState; + ProgramIndex = programIndex; + IsCompute = isCompute; + } + } + + private readonly Queue<ProgramEntry> _validationQueue; + private readonly ConcurrentQueue<ProgramCompilation> _compilationQueue; + private readonly BlockingCollection<AsyncProgramTranslation> _asyncTranslationQueue; + private readonly SortedList<int, (CachedShaderProgram, byte[])> _programList; + + private int _backendParallelCompileThreads; + private int _compiledCount; + private int _totalCount; + + /// <summary> + /// Creates a new parallel disk cache loader. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="graphicsCache">Graphics shader cache</param> + /// <param name="computeCache">Compute shader cache</param> + /// <param name="hostStorage">Disk cache host storage</param> + /// <param name="cancellationToken">Cancellation token</param> + /// <param name="stateChangeCallback">Function to be called when there is a state change, reporting state, compiled and total shaders count</param> + public ParallelDiskCacheLoader( + GpuContext context, + ShaderCacheHashTable graphicsCache, + ComputeShaderCacheHashTable computeCache, + DiskCacheHostStorage hostStorage, + CancellationToken cancellationToken, + Action<ShaderCacheState, int, int> stateChangeCallback) + { + _context = context; + _graphicsCache = graphicsCache; + _computeCache = computeCache; + _hostStorage = hostStorage; + _cancellationToken = cancellationToken; + _stateChangeCallback = stateChangeCallback; + _validationQueue = new Queue<ProgramEntry>(); + _compilationQueue = new ConcurrentQueue<ProgramCompilation>(); + _asyncTranslationQueue = new BlockingCollection<AsyncProgramTranslation>(ThreadCount); + _programList = new SortedList<int, (CachedShaderProgram, byte[])>(); + _backendParallelCompileThreads = Math.Min(Environment.ProcessorCount, 8); // Must be kept in sync with the backend code. + } + + /// <summary> + /// Loads all shaders from the cache. + /// </summary> + public void LoadShaders() + { + Thread[] workThreads = new Thread[ThreadCount]; + + for (int index = 0; index < ThreadCount; index++) + { + workThreads[index] = new Thread(ProcessAsyncQueue) + { + Name = $"GPU.AsyncTranslationThread.{index}" + }; + } + + int programCount = _hostStorage.GetProgramCount(); + + _compiledCount = 0; + _totalCount = programCount; + + _stateChangeCallback(ShaderCacheState.Start, 0, programCount); + + Logger.Info?.Print(LogClass.Gpu, $"Loading {programCount} shaders from the cache..."); + + for (int index = 0; index < ThreadCount; index++) + { + workThreads[index].Start(_cancellationToken); + } + + try + { + _hostStorage.LoadShaders(_context, this); + } + catch (DiskCacheLoadException diskCacheLoadException) + { + Logger.Warning?.Print(LogClass.Gpu, $"Error loading the shader cache. {diskCacheLoadException.Message}"); + + // If we can't even access the file, then we also can't rebuild. + if (diskCacheLoadException.Result != DiskCacheLoadResult.NoAccess) + { + _needsHostRegen = true; + } + } + catch (InvalidDataException invalidDataException) + { + Logger.Warning?.Print(LogClass.Gpu, $"Error decompressing the shader cache file. {invalidDataException.Message}"); + _needsHostRegen = true; + } + catch (IOException ioException) + { + Logger.Warning?.Print(LogClass.Gpu, $"Error reading the shader cache file. {ioException.Message}"); + _needsHostRegen = true; + } + + _asyncTranslationQueue.CompleteAdding(); + + for (int index = 0; index < ThreadCount; index++) + { + workThreads[index].Join(); + } + + CheckCompilationBlocking(); + + if (_needsHostRegen && Active) + { + // Rebuild both shared and host cache files. + // Rebuilding shared is required because the shader information returned by the translator + // might have changed, and so we have to reconstruct the file with the new information. + try + { + _hostStorage.ClearSharedCache(); + _hostStorage.ClearHostCache(_context); + + if (_programList.Count != 0) + { + Logger.Info?.Print(LogClass.Gpu, $"Rebuilding {_programList.Count} shaders..."); + + using var streams = _hostStorage.GetOutputStreams(_context); + + foreach (var kv in _programList) + { + if (!Active) + { + break; + } + + (CachedShaderProgram program, byte[] binaryCode) = kv.Value; + _hostStorage.AddShader(_context, program, binaryCode, streams); + } + + Logger.Info?.Print(LogClass.Gpu, $"Rebuilt {_programList.Count} shaders successfully."); + } + else + { + _hostStorage.ClearGuestCache(); + + Logger.Info?.Print(LogClass.Gpu, "Shader cache deleted due to corruption."); + } + } + catch (DiskCacheLoadException diskCacheLoadException) + { + Logger.Warning?.Print(LogClass.Gpu, $"Error deleting the shader cache. {diskCacheLoadException.Message}"); + } + catch (IOException ioException) + { + Logger.Warning?.Print(LogClass.Gpu, $"Error deleting the shader cache file. {ioException.Message}"); + } + } + + Logger.Info?.Print(LogClass.Gpu, "Shader cache loaded."); + + _stateChangeCallback(ShaderCacheState.Loaded, programCount, programCount); + } + + /// <summary> + /// Enqueues a host program for compilation. + /// </summary> + /// <param name="cachedProgram">Cached program</param> + /// <param name="binaryCode">Host binary code</param> + /// <param name="programIndex">Program index</param> + /// <param name="isCompute">Indicates if the program is a compute shader</param> + public void QueueHostProgram(CachedShaderProgram cachedProgram, byte[] binaryCode, int programIndex, bool isCompute) + { + EnqueueForValidation(new ProgramEntry(cachedProgram, binaryCode, programIndex, isCompute, isBinary: true)); + } + + /// <summary> + /// Enqueues a guest program for compilation. + /// </summary> + /// <param name="guestShaders">Guest code for each active stage</param> + /// <param name="specState">Specialization state</param> + /// <param name="programIndex">Program index</param> + /// <param name="isCompute">Indicates if the program is a compute shader</param> + public void QueueGuestProgram(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex, bool isCompute) + { + try + { + AsyncProgramTranslation asyncTranslation = new AsyncProgramTranslation(guestShaders, specState, programIndex, isCompute); + _asyncTranslationQueue.Add(asyncTranslation, _cancellationToken); + } + catch (OperationCanceledException) + { + } + } + + /// <summary> + /// Check the state of programs that have already been compiled, + /// and add to the cache if the compilation was successful. + /// </summary> + public void CheckCompilation() + { + ProcessCompilationQueue(); + + // Process programs that already finished compiling. + // If not yet compiled, do nothing. This avoids blocking to wait for shader compilation. + while (_validationQueue.TryPeek(out ProgramEntry entry)) + { + ProgramLinkStatus result = entry.CachedProgram.HostProgram.CheckProgramLink(false); + + if (result != ProgramLinkStatus.Incomplete) + { + ProcessCompiledProgram(ref entry, result); + _validationQueue.Dequeue(); + } + else + { + break; + } + } + } + + /// <summary> + /// Waits until all programs finishes compiling, then adds the ones + /// with successful compilation to the cache. + /// </summary> + private void CheckCompilationBlocking() + { + ProcessCompilationQueue(); + + while (_validationQueue.TryDequeue(out ProgramEntry entry) && Active) + { + ProcessCompiledProgram(ref entry, entry.CachedProgram.HostProgram.CheckProgramLink(true), asyncCompile: false); + } + } + + /// <summary> + /// Process a compiled program result. + /// </summary> + /// <param name="entry">Compiled program entry</param> + /// <param name="result">Compilation result</param> + /// <param name="asyncCompile">For failed host compilations, indicates if a guest compilation should be done asynchronously</param> + private void ProcessCompiledProgram(ref ProgramEntry entry, ProgramLinkStatus result, bool asyncCompile = true) + { + if (result == ProgramLinkStatus.Success) + { + // Compilation successful, add to memory cache. + if (entry.IsCompute) + { + _computeCache.Add(entry.CachedProgram); + } + else + { + _graphicsCache.Add(entry.CachedProgram); + } + + if (!entry.IsBinary) + { + _needsHostRegen = true; + } + + // Fetch the binary code from the backend if it isn't already present. + byte[] binaryCode = entry.BinaryCode ?? entry.CachedProgram.HostProgram.GetBinary(); + + _programList.Add(entry.ProgramIndex, (entry.CachedProgram, binaryCode)); + SignalCompiled(); + } + else if (entry.IsBinary) + { + // If this is a host binary and compilation failed, + // we still have a chance to recompile from the guest binary. + CachedShaderProgram program = entry.CachedProgram; + + GuestCodeAndCbData?[] guestShaders = new GuestCodeAndCbData?[program.Shaders.Length]; + + for (int index = 0; index < program.Shaders.Length; index++) + { + CachedShaderStage shader = program.Shaders[index]; + + if (shader != null) + { + guestShaders[index] = new GuestCodeAndCbData(shader.Code, shader.Cb1Data); + } + } + + if (asyncCompile) + { + QueueGuestProgram(guestShaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute); + } + else + { + RecompileFromGuestCode(guestShaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute); + ProcessCompilationQueue(); + } + } + else + { + // Failed to compile from both host and guest binary. + ErrorCount++; + SignalCompiled(); + } + } + + /// <summary> + /// Processes the queue of translated guest programs that should be compiled on the host. + /// </summary> + private void ProcessCompilationQueue() + { + while (_compilationQueue.TryDequeue(out ProgramCompilation compilation) && Active) + { + ShaderSource[] shaderSources = new ShaderSource[compilation.TranslatedStages.Length]; + + int fragmentOutputMap = -1; + + for (int index = 0; index < compilation.TranslatedStages.Length; index++) + { + ShaderProgram shader = compilation.TranslatedStages[index]; + shaderSources[index] = CreateShaderSource(shader); + + if (shader.Info.Stage == ShaderStage.Fragment) + { + fragmentOutputMap = shader.Info.FragmentOutputMap; + } + } + + ShaderInfo shaderInfo = compilation.SpecializationState.PipelineState.HasValue + ? new ShaderInfo(fragmentOutputMap, compilation.SpecializationState.PipelineState.Value, fromCache: true) + : new ShaderInfo(fragmentOutputMap, fromCache: true); + + IProgram hostProgram = _context.Renderer.CreateProgram(shaderSources, shaderInfo); + CachedShaderProgram program = new CachedShaderProgram(hostProgram, compilation.SpecializationState, compilation.Shaders); + + // Vulkan's binary code is the SPIR-V used for compilation, so it is ready immediately. Other APIs get this after compilation. + byte[] binaryCode = _context.Capabilities.Api == TargetApi.Vulkan ? ShaderBinarySerializer.Pack(shaderSources) : null; + + EnqueueForValidation(new ProgramEntry(program, binaryCode, compilation.ProgramIndex, compilation.IsCompute, isBinary: false)); + } + } + + /// <summary> + /// Enqueues a program for validation, which will check if the program was compiled successfully. + /// </summary> + /// <param name="newEntry">Program entry to be validated</param> + private void EnqueueForValidation(ProgramEntry newEntry) + { + _validationQueue.Enqueue(newEntry); + + // Do not allow more than N shader compilation in-flight, where N is the maximum number of threads + // the driver will be using for parallel compilation. + // Submitting more seems to cause NVIDIA OpenGL driver to crash. + if (_validationQueue.Count >= _backendParallelCompileThreads && _validationQueue.TryDequeue(out ProgramEntry entry)) + { + ProcessCompiledProgram(ref entry, entry.CachedProgram.HostProgram.CheckProgramLink(true), asyncCompile: false); + } + } + + /// <summary> + /// Processses the queue of programs that should be translated from guest code. + /// </summary> + /// <param name="state">Cancellation token</param> + private void ProcessAsyncQueue(object state) + { + CancellationToken ct = (CancellationToken)state; + + try + { + foreach (AsyncProgramTranslation asyncCompilation in _asyncTranslationQueue.GetConsumingEnumerable(ct)) + { + RecompileFromGuestCode( + asyncCompilation.GuestShaders, + asyncCompilation.SpecializationState, + asyncCompilation.ProgramIndex, + asyncCompilation.IsCompute); + } + } + catch (OperationCanceledException) + { + } + } + + /// <summary> + /// Recompiles a program from guest code. + /// </summary> + /// <param name="guestShaders">Guest code for each active stage</param> + /// <param name="specState">Specialization state</param> + /// <param name="programIndex">Program index</param> + /// <param name="isCompute">Indicates if the program is a compute shader</param> + private void RecompileFromGuestCode(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex, bool isCompute) + { + try + { + if (isCompute) + { + RecompileComputeFromGuestCode(guestShaders, specState, programIndex); + } + else + { + RecompileGraphicsFromGuestCode(guestShaders, specState, programIndex); + } + } + catch (Exception exception) + { + Logger.Error?.Print(LogClass.Gpu, $"Error translating guest shader. {exception.Message}"); + + ErrorCount++; + SignalCompiled(); + } + } + + /// <summary> + /// Recompiles a graphics program from guest code. + /// </summary> + /// <param name="guestShaders">Guest code for each active stage</param> + /// <param name="specState">Specialization state</param> + /// <param name="programIndex">Program index</param> + private void RecompileGraphicsFromGuestCode(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex) + { + ShaderSpecializationState newSpecState = new ShaderSpecializationState( + ref specState.GraphicsState, + specState.PipelineState, + specState.TransformFeedbackDescriptors); + + ResourceCounts counts = new ResourceCounts(); + + TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1]; + TranslatorContext nextStage = null; + + TargetApi api = _context.Capabilities.Api; + + for (int stageIndex = Constants.ShaderStages - 1; stageIndex >= 0; stageIndex--) + { + if (guestShaders[stageIndex + 1].HasValue) + { + GuestCodeAndCbData shader = guestShaders[stageIndex + 1].Value; + + byte[] guestCode = shader.Code; + byte[] cb1Data = shader.Cb1Data; + + DiskCacheGpuAccessor gpuAccessor = new DiskCacheGpuAccessor(_context, guestCode, cb1Data, specState, newSpecState, counts, stageIndex); + TranslatorContext currentStage = DecodeGraphicsShader(gpuAccessor, api, DefaultFlags, 0); + + if (nextStage != null) + { + currentStage.SetNextStage(nextStage); + } + + if (stageIndex == 0 && guestShaders[0].HasValue) + { + byte[] guestCodeA = guestShaders[0].Value.Code; + byte[] cb1DataA = guestShaders[0].Value.Cb1Data; + + DiskCacheGpuAccessor gpuAccessorA = new DiskCacheGpuAccessor(_context, guestCodeA, cb1DataA, specState, newSpecState, counts, 0); + translatorContexts[0] = DecodeGraphicsShader(gpuAccessorA, api, DefaultFlags | TranslationFlags.VertexA, 0); + } + + translatorContexts[stageIndex + 1] = currentStage; + nextStage = currentStage; + } + } + + if (!_context.Capabilities.SupportsGeometryShader) + { + ShaderCache.TryRemoveGeometryStage(translatorContexts); + } + + CachedShaderStage[] shaders = new CachedShaderStage[guestShaders.Length]; + List<ShaderProgram> translatedStages = new List<ShaderProgram>(); + + TranslatorContext previousStage = null; + + for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++) + { + TranslatorContext currentStage = translatorContexts[stageIndex + 1]; + + if (currentStage != null) + { + ShaderProgram program; + + byte[] guestCode = guestShaders[stageIndex + 1].Value.Code; + byte[] cb1Data = guestShaders[stageIndex + 1].Value.Cb1Data; + + if (stageIndex == 0 && guestShaders[0].HasValue) + { + program = currentStage.Translate(translatorContexts[0]); + + byte[] guestCodeA = guestShaders[0].Value.Code; + byte[] cb1DataA = guestShaders[0].Value.Cb1Data; + + shaders[0] = new CachedShaderStage(null, guestCodeA, cb1DataA); + shaders[1] = new CachedShaderStage(program.Info, guestCode, cb1Data); + } + else + { + program = currentStage.Translate(); + + shaders[stageIndex + 1] = new CachedShaderStage(program.Info, guestCode, cb1Data); + } + + if (program != null) + { + translatedStages.Add(program); + } + + previousStage = currentStage; + } + else if ( + previousStage != null && + previousStage.LayerOutputWritten && + stageIndex == 3 && + !_context.Capabilities.SupportsLayerVertexTessellation) + { + translatedStages.Add(previousStage.GenerateGeometryPassthrough()); + } + } + + _compilationQueue.Enqueue(new ProgramCompilation(translatedStages.ToArray(), shaders, newSpecState, programIndex, isCompute: false)); + } + + /// <summary> + /// Recompiles a compute program from guest code. + /// </summary> + /// <param name="guestShaders">Guest code for each active stage</param> + /// <param name="specState">Specialization state</param> + /// <param name="programIndex">Program index</param> + private void RecompileComputeFromGuestCode(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex) + { + GuestCodeAndCbData shader = guestShaders[0].Value; + ResourceCounts counts = new ResourceCounts(); + ShaderSpecializationState newSpecState = new ShaderSpecializationState(ref specState.ComputeState); + DiskCacheGpuAccessor gpuAccessor = new DiskCacheGpuAccessor(_context, shader.Code, shader.Cb1Data, specState, newSpecState, counts, 0); + + TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, _context.Capabilities.Api, 0); + + ShaderProgram program = translatorContext.Translate(); + + CachedShaderStage[] shaders = new[] { new CachedShaderStage(program.Info, shader.Code, shader.Cb1Data) }; + + _compilationQueue.Enqueue(new ProgramCompilation(new[] { program }, shaders, newSpecState, programIndex, isCompute: true)); + } + + /// <summary> + /// Signals that compilation of a program has been finished successfully, + /// or that it failed and guest recompilation has also been attempted. + /// </summary> + private void SignalCompiled() + { + _stateChangeCallback(ShaderCacheState.Loading, ++_compiledCount, _totalCount); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ShaderBinarySerializer.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ShaderBinarySerializer.cs new file mode 100644 index 00000000..77e52667 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ShaderBinarySerializer.cs @@ -0,0 +1,66 @@ +using Ryujinx.Common; +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Generic; +using System.IO; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + static class ShaderBinarySerializer + { + public static byte[] Pack(ShaderSource[] sources) + { + using MemoryStream output = MemoryStreamManager.Shared.GetStream(); + + output.Write(sources.Length); + + foreach (ShaderSource source in sources) + { + output.Write((int)source.Stage); + output.Write(source.BinaryCode.Length); + output.Write(source.BinaryCode); + } + + return output.ToArray(); + } + + public static ShaderSource[] Unpack(CachedShaderStage[] stages, byte[] code) + { + using MemoryStream input = new MemoryStream(code); + using BinaryReader reader = new BinaryReader(input); + + List<ShaderSource> output = new List<ShaderSource>(); + + int count = reader.ReadInt32(); + + for (int i = 0; i < count; i++) + { + ShaderStage stage = (ShaderStage)reader.ReadInt32(); + int binaryCodeLength = reader.ReadInt32(); + byte[] binaryCode = reader.ReadBytes(binaryCodeLength); + + output.Add(new ShaderSource(binaryCode, GetBindings(stages, stage), stage, TargetLanguage.Spirv)); + } + + return output.ToArray(); + } + + private static ShaderBindings GetBindings(CachedShaderStage[] stages, ShaderStage stage) + { + for (int i = 0; i < stages.Length; i++) + { + CachedShaderStage currentStage = stages[i]; + + if (currentStage?.Info != null && currentStage.Info.Stage == stage) + { + return ShaderCache.GetBindings(currentStage.Info); + } + } + + return new ShaderBindings(Array.Empty<int>(), Array.Empty<int>(), Array.Empty<int>(), Array.Empty<int>()); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs new file mode 100644 index 00000000..3e816733 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs @@ -0,0 +1,297 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Image; +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// <summary> + /// Represents a GPU state and memory accessor. + /// </summary> + class GpuAccessor : GpuAccessorBase, IGpuAccessor + { + private readonly GpuChannel _channel; + private readonly GpuAccessorState _state; + private readonly int _stageIndex; + private readonly bool _compute; + private readonly bool _isVulkan; + + /// <summary> + /// Creates a new instance of the GPU state accessor for graphics shader translation. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="channel">GPU channel</param> + /// <param name="state">Current GPU state</param> + /// <param name="stageIndex">Graphics shader stage index (0 = Vertex, 4 = Fragment)</param> + public GpuAccessor( + GpuContext context, + GpuChannel channel, + GpuAccessorState state, + int stageIndex) : base(context, state.ResourceCounts, stageIndex) + { + _isVulkan = context.Capabilities.Api == TargetApi.Vulkan; + _channel = channel; + _state = state; + _stageIndex = stageIndex; + } + + /// <summary> + /// Creates a new instance of the GPU state accessor for compute shader translation. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="channel">GPU channel</param> + /// <param name="state">Current GPU state</param> + public GpuAccessor(GpuContext context, GpuChannel channel, GpuAccessorState state) : base(context, state.ResourceCounts, 0) + { + _channel = channel; + _state = state; + _compute = true; + } + + /// <inheritdoc/> + public uint ConstantBuffer1Read(int offset) + { + ulong baseAddress = _compute + ? _channel.BufferManager.GetComputeUniformBufferAddress(1) + : _channel.BufferManager.GetGraphicsUniformBufferAddress(_stageIndex, 1); + + return _channel.MemoryManager.Physical.Read<uint>(baseAddress + (ulong)offset); + } + + /// <inheritdoc/> + public void Log(string message) + { + Logger.Warning?.Print(LogClass.Gpu, $"Shader translator: {message}"); + } + + /// <inheritdoc/> + public ReadOnlySpan<ulong> GetCode(ulong address, int minimumSize) + { + int size = Math.Max(minimumSize, 0x1000 - (int)(address & 0xfff)); + return MemoryMarshal.Cast<byte, ulong>(_channel.MemoryManager.GetSpan(address, size)); + } + + /// <inheritdoc/> + public bool QueryAlphaToCoverageDitherEnable() + { + return _state.GraphicsState.AlphaToCoverageEnable && _state.GraphicsState.AlphaToCoverageDitherEnable; + } + + /// <inheritdoc/> + public AlphaTestOp QueryAlphaTestCompare() + { + if (!_isVulkan || !_state.GraphicsState.AlphaTestEnable) + { + return AlphaTestOp.Always; + } + + return _state.GraphicsState.AlphaTestCompare switch + { + CompareOp.Never or CompareOp.NeverGl => AlphaTestOp.Never, + CompareOp.Less or CompareOp.LessGl => AlphaTestOp.Less, + CompareOp.Equal or CompareOp.EqualGl => AlphaTestOp.Equal, + CompareOp.LessOrEqual or CompareOp.LessOrEqualGl => AlphaTestOp.LessOrEqual, + CompareOp.Greater or CompareOp.GreaterGl => AlphaTestOp.Greater, + CompareOp.NotEqual or CompareOp.NotEqualGl => AlphaTestOp.NotEqual, + CompareOp.GreaterOrEqual or CompareOp.GreaterOrEqualGl => AlphaTestOp.GreaterOrEqual, + _ => AlphaTestOp.Always + }; + } + + /// <inheritdoc/> + public float QueryAlphaTestReference() + { + return _state.GraphicsState.AlphaTestReference; + } + + /// <inheritdoc/> + public AttributeType QueryAttributeType(int location) + { + return _state.GraphicsState.AttributeTypes[location]; + } + + /// <inheritdoc/> + public AttributeType QueryFragmentOutputType(int location) + { + return _state.GraphicsState.FragmentOutputTypes[location]; + } + + /// <inheritdoc/> + public int QueryComputeLocalSizeX() => _state.ComputeState.LocalSizeX; + + /// <inheritdoc/> + public int QueryComputeLocalSizeY() => _state.ComputeState.LocalSizeY; + + /// <inheritdoc/> + public int QueryComputeLocalSizeZ() => _state.ComputeState.LocalSizeZ; + + /// <inheritdoc/> + public int QueryComputeLocalMemorySize() => _state.ComputeState.LocalMemorySize; + + /// <inheritdoc/> + public int QueryComputeSharedMemorySize() => _state.ComputeState.SharedMemorySize; + + /// <inheritdoc/> + public uint QueryConstantBufferUse() + { + uint useMask = _compute + ? _channel.BufferManager.GetComputeUniformBufferUseMask() + : _channel.BufferManager.GetGraphicsUniformBufferUseMask(_stageIndex); + + _state.SpecializationState?.RecordConstantBufferUse(_stageIndex, useMask); + return useMask; + } + + /// <inheritdoc/> + public bool QueryHasConstantBufferDrawParameters() + { + return _state.GraphicsState.HasConstantBufferDrawParameters; + } + + /// <inheritdoc/> + public bool QueryHasUnalignedStorageBuffer() + { + return _state.GraphicsState.HasUnalignedStorageBuffer || _state.ComputeState.HasUnalignedStorageBuffer; + } + + /// <inheritdoc/> + public bool QueryDualSourceBlendEnable() + { + return _state.GraphicsState.DualSourceBlendEnable; + } + + /// <inheritdoc/> + public InputTopology QueryPrimitiveTopology() + { + _state.SpecializationState?.RecordPrimitiveTopology(); + return ConvertToInputTopology(_state.GraphicsState.Topology, _state.GraphicsState.TessellationMode); + } + + /// <inheritdoc/> + public bool QueryProgramPointSize() + { + return _state.GraphicsState.ProgramPointSizeEnable; + } + + /// <inheritdoc/> + public float QueryPointSize() + { + return _state.GraphicsState.PointSize; + } + + /// <inheritdoc/> + public bool QueryTessCw() + { + return _state.GraphicsState.TessellationMode.UnpackCw(); + } + + /// <inheritdoc/> + public TessPatchType QueryTessPatchType() + { + return _state.GraphicsState.TessellationMode.UnpackPatchType(); + } + + /// <inheritdoc/> + public TessSpacing QueryTessSpacing() + { + return _state.GraphicsState.TessellationMode.UnpackSpacing(); + } + + //// <inheritdoc/> + public TextureFormat QueryTextureFormat(int handle, int cbufSlot) + { + _state.SpecializationState?.RecordTextureFormat(_stageIndex, handle, cbufSlot); + var descriptor = GetTextureDescriptor(handle, cbufSlot); + return ConvertToTextureFormat(descriptor.UnpackFormat(), descriptor.UnpackSrgb()); + } + + /// <inheritdoc/> + public SamplerType QuerySamplerType(int handle, int cbufSlot) + { + _state.SpecializationState?.RecordTextureSamplerType(_stageIndex, handle, cbufSlot); + return GetTextureDescriptor(handle, cbufSlot).UnpackTextureTarget().ConvertSamplerType(); + } + + /// <inheritdoc/> + public bool QueryTextureCoordNormalized(int handle, int cbufSlot) + { + _state.SpecializationState?.RecordTextureCoordNormalized(_stageIndex, handle, cbufSlot); + return GetTextureDescriptor(handle, cbufSlot).UnpackTextureCoordNormalized(); + } + + /// <summary> + /// Gets the texture descriptor for a given texture on the pool. + /// </summary> + /// <param name="handle">Index of the texture (this is the word offset of the handle in the constant buffer)</param> + /// <param name="cbufSlot">Constant buffer slot for the texture handle</param> + /// <returns>Texture descriptor</returns> + private Image.TextureDescriptor GetTextureDescriptor(int handle, int cbufSlot) + { + if (_compute) + { + return _channel.TextureManager.GetComputeTextureDescriptor( + _state.PoolState.TexturePoolGpuVa, + _state.PoolState.TextureBufferIndex, + _state.PoolState.TexturePoolMaximumId, + handle, + cbufSlot); + } + else + { + return _channel.TextureManager.GetGraphicsTextureDescriptor( + _state.PoolState.TexturePoolGpuVa, + _state.PoolState.TextureBufferIndex, + _state.PoolState.TexturePoolMaximumId, + _stageIndex, + handle, + cbufSlot); + } + } + + /// <inheritdoc/> + public bool QueryTransformDepthMinusOneToOne() + { + return _state.GraphicsState.DepthMode; + } + + /// <inheritdoc/> + public bool QueryTransformFeedbackEnabled() + { + return _state.TransformFeedbackDescriptors != null; + } + + /// <inheritdoc/> + public ReadOnlySpan<byte> QueryTransformFeedbackVaryingLocations(int bufferIndex) + { + return _state.TransformFeedbackDescriptors[bufferIndex].AsSpan(); + } + + /// <inheritdoc/> + public int QueryTransformFeedbackStride(int bufferIndex) + { + return _state.TransformFeedbackDescriptors[bufferIndex].Stride; + } + + /// <inheritdoc/> + public bool QueryEarlyZForce() + { + _state.SpecializationState?.RecordEarlyZForce(); + return _state.GraphicsState.EarlyZForce; + } + + /// <inheritdoc/> + public bool QueryViewportTransformDisable() + { + return _state.GraphicsState.ViewportTransformDisable; + } + + /// <inheritdoc/> + public void RegisterTexture(int handle, int cbufSlot) + { + _state.SpecializationState?.RegisterTexture(_stageIndex, handle, cbufSlot, GetTextureDescriptor(handle, cbufSlot)); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs new file mode 100644 index 00000000..d35b8d92 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs @@ -0,0 +1,238 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.Threed; +using Ryujinx.Graphics.Gpu.Image; +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// <summary> + /// GPU accessor. + /// </summary> + class GpuAccessorBase + { + private readonly GpuContext _context; + private readonly ResourceCounts _resourceCounts; + private readonly int _stageIndex; + + /// <summary> + /// Creates a new GPU accessor. + /// </summary> + /// <param name="context">GPU context</param> + public GpuAccessorBase(GpuContext context, ResourceCounts resourceCounts, int stageIndex) + { + _context = context; + _resourceCounts = resourceCounts; + _stageIndex = stageIndex; + } + + public int QueryBindingConstantBuffer(int index) + { + if (_context.Capabilities.Api == TargetApi.Vulkan) + { + // We need to start counting from 1 since binding 0 is reserved for the support uniform buffer. + return GetBindingFromIndex(index, _context.Capabilities.MaximumUniformBuffersPerStage, "Uniform buffer") + 1; + } + else + { + return _resourceCounts.UniformBuffersCount++; + } + } + + public int QueryBindingStorageBuffer(int index) + { + if (_context.Capabilities.Api == TargetApi.Vulkan) + { + return GetBindingFromIndex(index, _context.Capabilities.MaximumStorageBuffersPerStage, "Storage buffer"); + } + else + { + return _resourceCounts.StorageBuffersCount++; + } + } + + public int QueryBindingTexture(int index, bool isBuffer) + { + if (_context.Capabilities.Api == TargetApi.Vulkan) + { + if (isBuffer) + { + index += (int)_context.Capabilities.MaximumTexturesPerStage; + } + + return GetBindingFromIndex(index, _context.Capabilities.MaximumTexturesPerStage * 2, "Texture"); + } + else + { + return _resourceCounts.TexturesCount++; + } + } + + public int QueryBindingImage(int index, bool isBuffer) + { + if (_context.Capabilities.Api == TargetApi.Vulkan) + { + if (isBuffer) + { + index += (int)_context.Capabilities.MaximumImagesPerStage; + } + + return GetBindingFromIndex(index, _context.Capabilities.MaximumImagesPerStage * 2, "Image"); + } + else + { + return _resourceCounts.ImagesCount++; + } + } + + private int GetBindingFromIndex(int index, uint maxPerStage, string resourceName) + { + if ((uint)index >= maxPerStage) + { + Logger.Error?.Print(LogClass.Gpu, $"{resourceName} index {index} exceeds per stage limit of {maxPerStage}."); + } + + return GetStageIndex() * (int)maxPerStage + index; + } + + private int GetStageIndex() + { + // This is just a simple remapping to ensure that most frequently used shader stages + // have the lowest binding numbers. + // This is useful because if we need to run on a system with a low limit on the bindings, + // then we can still get most games working as the most common shaders will have low binding numbers. + return _stageIndex switch + { + 4 => 1, // Fragment + 3 => 2, // Geometry + 1 => 3, // Tessellation control + 2 => 4, // Tessellation evaluation + _ => 0 // Vertex/Compute + }; + } + + public int QueryHostGatherBiasPrecision() => _context.Capabilities.GatherBiasPrecision; + + public bool QueryHostReducedPrecision() => _context.Capabilities.ReduceShaderPrecision; + + public bool QueryHostHasFrontFacingBug() => _context.Capabilities.HasFrontFacingBug; + + public bool QueryHostHasVectorIndexingBug() => _context.Capabilities.HasVectorIndexingBug; + + public int QueryHostStorageBufferOffsetAlignment() => _context.Capabilities.StorageBufferOffsetAlignment; + + public bool QueryHostSupportsBgraFormat() => _context.Capabilities.SupportsBgraFormat; + + public bool QueryHostSupportsFragmentShaderInterlock() => _context.Capabilities.SupportsFragmentShaderInterlock; + + public bool QueryHostSupportsFragmentShaderOrderingIntel() => _context.Capabilities.SupportsFragmentShaderOrderingIntel; + + public bool QueryHostSupportsGeometryShader() => _context.Capabilities.SupportsGeometryShader; + + public bool QueryHostSupportsGeometryShaderPassthrough() => _context.Capabilities.SupportsGeometryShaderPassthrough; + + public bool QueryHostSupportsImageLoadFormatted() => _context.Capabilities.SupportsImageLoadFormatted; + + public bool QueryHostSupportsLayerVertexTessellation() => _context.Capabilities.SupportsLayerVertexTessellation; + + public bool QueryHostSupportsNonConstantTextureOffset() => _context.Capabilities.SupportsNonConstantTextureOffset; + + public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot; + + public bool QueryHostSupportsSnormBufferTextureFormat() => _context.Capabilities.SupportsSnormBufferTextureFormat; + + public bool QueryHostSupportsTextureShadowLod() => _context.Capabilities.SupportsTextureShadowLod; + + public bool QueryHostSupportsViewportIndexVertexTessellation() => _context.Capabilities.SupportsViewportIndexVertexTessellation; + + public bool QueryHostSupportsViewportMask() => _context.Capabilities.SupportsViewportMask; + + /// <summary> + /// Converts a packed Maxwell texture format to the shader translator texture format. + /// </summary> + /// <param name="format">Packed maxwell format</param> + /// <param name="formatSrgb">Indicates if the format is sRGB</param> + /// <returns>Shader translator texture format</returns> + protected static TextureFormat ConvertToTextureFormat(uint format, bool formatSrgb) + { + if (!FormatTable.TryGetTextureFormat(format, formatSrgb, out FormatInfo formatInfo)) + { + return TextureFormat.Unknown; + } + + return formatInfo.Format switch + { + Format.R8Unorm => TextureFormat.R8Unorm, + Format.R8Snorm => TextureFormat.R8Snorm, + Format.R8Uint => TextureFormat.R8Uint, + Format.R8Sint => TextureFormat.R8Sint, + Format.R16Float => TextureFormat.R16Float, + Format.R16Unorm => TextureFormat.R16Unorm, + Format.R16Snorm => TextureFormat.R16Snorm, + Format.R16Uint => TextureFormat.R16Uint, + Format.R16Sint => TextureFormat.R16Sint, + Format.R32Float => TextureFormat.R32Float, + Format.R32Uint => TextureFormat.R32Uint, + Format.R32Sint => TextureFormat.R32Sint, + Format.R8G8Unorm => TextureFormat.R8G8Unorm, + Format.R8G8Snorm => TextureFormat.R8G8Snorm, + Format.R8G8Uint => TextureFormat.R8G8Uint, + Format.R8G8Sint => TextureFormat.R8G8Sint, + Format.R16G16Float => TextureFormat.R16G16Float, + Format.R16G16Unorm => TextureFormat.R16G16Unorm, + Format.R16G16Snorm => TextureFormat.R16G16Snorm, + Format.R16G16Uint => TextureFormat.R16G16Uint, + Format.R16G16Sint => TextureFormat.R16G16Sint, + Format.R32G32Float => TextureFormat.R32G32Float, + Format.R32G32Uint => TextureFormat.R32G32Uint, + Format.R32G32Sint => TextureFormat.R32G32Sint, + Format.R8G8B8A8Unorm => TextureFormat.R8G8B8A8Unorm, + Format.R8G8B8A8Snorm => TextureFormat.R8G8B8A8Snorm, + Format.R8G8B8A8Uint => TextureFormat.R8G8B8A8Uint, + Format.R8G8B8A8Sint => TextureFormat.R8G8B8A8Sint, + Format.R8G8B8A8Srgb => TextureFormat.R8G8B8A8Unorm, + Format.R16G16B16A16Float => TextureFormat.R16G16B16A16Float, + Format.R16G16B16A16Unorm => TextureFormat.R16G16B16A16Unorm, + Format.R16G16B16A16Snorm => TextureFormat.R16G16B16A16Snorm, + Format.R16G16B16A16Uint => TextureFormat.R16G16B16A16Uint, + Format.R16G16B16A16Sint => TextureFormat.R16G16B16A16Sint, + Format.R32G32B32A32Float => TextureFormat.R32G32B32A32Float, + Format.R32G32B32A32Uint => TextureFormat.R32G32B32A32Uint, + Format.R32G32B32A32Sint => TextureFormat.R32G32B32A32Sint, + Format.R10G10B10A2Unorm => TextureFormat.R10G10B10A2Unorm, + Format.R10G10B10A2Uint => TextureFormat.R10G10B10A2Uint, + Format.R11G11B10Float => TextureFormat.R11G11B10Float, + _ => TextureFormat.Unknown + }; + } + + /// <summary> + /// Converts the Maxwell primitive topology to the shader translator topology. + /// </summary> + /// <param name="topology">Maxwell primitive topology</param> + /// <param name="tessellationMode">Maxwell tessellation mode</param> + /// <returns>Shader translator topology</returns> + protected static InputTopology ConvertToInputTopology(PrimitiveTopology topology, TessMode tessellationMode) + { + return topology switch + { + PrimitiveTopology.Points => InputTopology.Points, + PrimitiveTopology.Lines or + PrimitiveTopology.LineLoop or + PrimitiveTopology.LineStrip => InputTopology.Lines, + PrimitiveTopology.LinesAdjacency or + PrimitiveTopology.LineStripAdjacency => InputTopology.LinesAdjacency, + PrimitiveTopology.Triangles or + PrimitiveTopology.TriangleStrip or + PrimitiveTopology.TriangleFan => InputTopology.Triangles, + PrimitiveTopology.TrianglesAdjacency or + PrimitiveTopology.TriangleStripAdjacency => InputTopology.TrianglesAdjacency, + PrimitiveTopology.Patches => tessellationMode.UnpackPatchType() == TessPatchType.Isolines + ? InputTopology.Lines + : InputTopology.Triangles, + _ => InputTopology.Points + }; + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorState.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorState.cs new file mode 100644 index 00000000..0e8e979c --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorState.cs @@ -0,0 +1,61 @@ +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// <summary> + /// State used by the <see cref="GpuAccessor"/>. + /// </summary> + class GpuAccessorState + { + /// <summary> + /// GPU texture pool state. + /// </summary> + public readonly GpuChannelPoolState PoolState; + + /// <summary> + /// GPU compute state, for compute shaders. + /// </summary> + public readonly GpuChannelComputeState ComputeState; + + /// <summary> + /// GPU graphics state, for vertex, tessellation, geometry and fragment shaders. + /// </summary> + public readonly GpuChannelGraphicsState GraphicsState; + + /// <summary> + /// Shader specialization state (shared by all stages). + /// </summary> + public readonly ShaderSpecializationState SpecializationState; + + /// <summary> + /// Transform feedback information, if the shader uses transform feedback. Otherwise, should be null. + /// </summary> + public readonly TransformFeedbackDescriptor[] TransformFeedbackDescriptors; + + /// <summary> + /// Shader resource counts (shared by all stages). + /// </summary> + public readonly ResourceCounts ResourceCounts; + + /// <summary> + /// Creates a new GPU accessor state. + /// </summary> + /// <param name="poolState">GPU texture pool state</param> + /// <param name="computeState">GPU compute state, for compute shaders</param> + /// <param name="graphicsState">GPU graphics state, for vertex, tessellation, geometry and fragment shaders</param> + /// <param name="specializationState">Shader specialization state (shared by all stages)</param> + /// <param name="transformFeedbackDescriptors">Transform feedback information, if the shader uses transform feedback. Otherwise, should be null</param> + public GpuAccessorState( + GpuChannelPoolState poolState, + GpuChannelComputeState computeState, + GpuChannelGraphicsState graphicsState, + ShaderSpecializationState specializationState, + TransformFeedbackDescriptor[] transformFeedbackDescriptors = null) + { + PoolState = poolState; + GraphicsState = graphicsState; + ComputeState = computeState; + SpecializationState = specializationState; + TransformFeedbackDescriptors = transformFeedbackDescriptors; + ResourceCounts = new ResourceCounts(); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs new file mode 100644 index 00000000..b65dd75e --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs @@ -0,0 +1,65 @@ +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// <summary> + /// State used by the <see cref="GpuAccessor"/>. + /// </summary> + readonly struct GpuChannelComputeState + { + // New fields should be added to the end of the struct to keep disk shader cache compatibility. + + /// <summary> + /// Local group size X of the compute shader. + /// </summary> + public readonly int LocalSizeX; + + /// <summary> + /// Local group size Y of the compute shader. + /// </summary> + public readonly int LocalSizeY; + + /// <summary> + /// Local group size Z of the compute shader. + /// </summary> + public readonly int LocalSizeZ; + + /// <summary> + /// Local memory size of the compute shader. + /// </summary> + public readonly int LocalMemorySize; + + /// <summary> + /// Shared memory size of the compute shader. + /// </summary> + public readonly int SharedMemorySize; + + /// <summary> + /// Indicates that any storage buffer use is unaligned. + /// </summary> + public readonly bool HasUnalignedStorageBuffer; + + /// <summary> + /// Creates a new GPU compute state. + /// </summary> + /// <param name="localSizeX">Local group size X of the compute shader</param> + /// <param name="localSizeY">Local group size Y of the compute shader</param> + /// <param name="localSizeZ">Local group size Z of the compute shader</param> + /// <param name="localMemorySize">Local memory size of the compute shader</param> + /// <param name="sharedMemorySize">Shared memory size of the compute shader</param> + /// <param name="hasUnalignedStorageBuffer">Indicates that any storage buffer use is unaligned</param> + public GpuChannelComputeState( + int localSizeX, + int localSizeY, + int localSizeZ, + int localMemorySize, + int sharedMemorySize, + bool hasUnalignedStorageBuffer) + { + LocalSizeX = localSizeX; + LocalSizeY = localSizeY; + LocalSizeZ = localSizeZ; + LocalMemorySize = localMemorySize; + SharedMemorySize = sharedMemorySize; + HasUnalignedStorageBuffer = hasUnalignedStorageBuffer; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs new file mode 100644 index 00000000..5247a096 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs @@ -0,0 +1,158 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.Threed; +using Ryujinx.Graphics.Shader; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// <summary> + /// State used by the <see cref="GpuAccessor"/>. + /// </summary> + struct GpuChannelGraphicsState + { + // New fields should be added to the end of the struct to keep disk shader cache compatibility. + + /// <summary> + /// Early Z force enable. + /// </summary> + public bool EarlyZForce; + + /// <summary> + /// Primitive topology of current draw. + /// </summary> + public PrimitiveTopology Topology; + + /// <summary> + /// Tessellation mode. + /// </summary> + public TessMode TessellationMode; + + /// <summary> + /// Indicates whether alpha-to-coverage is enabled. + /// </summary> + public bool AlphaToCoverageEnable; + + /// <summary> + /// Indicates whether alpha-to-coverage dithering is enabled. + /// </summary> + public bool AlphaToCoverageDitherEnable; + + /// <summary> + /// Indicates whether the viewport transform is disabled. + /// </summary> + public bool ViewportTransformDisable; + + /// <summary> + /// Depth mode zero to one or minus one to one. + /// </summary> + public bool DepthMode; + + /// <summary> + /// Indicates if the point size is set on the shader or is fixed. + /// </summary> + public bool ProgramPointSizeEnable; + + /// <summary> + /// Point size used if <see cref="ProgramPointSizeEnable" /> is false. + /// </summary> + public float PointSize; + + /// <summary> + /// Indicates whether alpha test is enabled. + /// </summary> + public bool AlphaTestEnable; + + /// <summary> + /// When alpha test is enabled, indicates the comparison that decides if the fragment should be discarded. + /// </summary> + public CompareOp AlphaTestCompare; + + /// <summary> + /// When alpha test is enabled, indicates the value to compare with the fragment output alpha. + /// </summary> + public float AlphaTestReference; + + /// <summary> + /// Type of the vertex attributes consumed by the shader. + /// </summary> + public Array32<AttributeType> AttributeTypes; + + /// <summary> + /// Indicates that the draw is writing the base vertex, base instance and draw index to Constant Buffer 0. + /// </summary> + public bool HasConstantBufferDrawParameters; + + /// <summary> + /// Indicates that any storage buffer use is unaligned. + /// </summary> + public bool HasUnalignedStorageBuffer; + + /// <summary> + /// Type of the fragment shader outputs. + /// </summary> + public Array8<AttributeType> FragmentOutputTypes; + + /// <summary> + /// Indicates whether dual source blend is enabled. + /// </summary> + public bool DualSourceBlendEnable; + + /// <summary> + /// Creates a new GPU graphics state. + /// </summary> + /// <param name="earlyZForce">Early Z force enable</param> + /// <param name="topology">Primitive topology</param> + /// <param name="tessellationMode">Tessellation mode</param> + /// <param name="alphaToCoverageEnable">Indicates whether alpha-to-coverage is enabled</param> + /// <param name="alphaToCoverageDitherEnable">Indicates whether alpha-to-coverage dithering is enabled</param> + /// <param name="viewportTransformDisable">Indicates whether the viewport transform is disabled</param> + /// <param name="depthMode">Depth mode zero to one or minus one to one</param> + /// <param name="programPointSizeEnable">Indicates if the point size is set on the shader or is fixed</param> + /// <param name="pointSize">Point size if not set from shader</param> + /// <param name="alphaTestEnable">Indicates whether alpha test is enabled</param> + /// <param name="alphaTestCompare">When alpha test is enabled, indicates the comparison that decides if the fragment should be discarded</param> + /// <param name="alphaTestReference">When alpha test is enabled, indicates the value to compare with the fragment output alpha</param> + /// <param name="attributeTypes">Type of the vertex attributes consumed by the shader</param> + /// <param name="hasConstantBufferDrawParameters">Indicates that the draw is writing the base vertex, base instance and draw index to Constant Buffer 0</param> + /// <param name="hasUnalignedStorageBuffer">Indicates that any storage buffer use is unaligned</param> + /// <param name="fragmentOutputTypes">Type of the fragment shader outputs</param> + /// <param name="dualSourceBlendEnable">Type of the vertex attributes consumed by the shader</param> + public GpuChannelGraphicsState( + bool earlyZForce, + PrimitiveTopology topology, + TessMode tessellationMode, + bool alphaToCoverageEnable, + bool alphaToCoverageDitherEnable, + bool viewportTransformDisable, + bool depthMode, + bool programPointSizeEnable, + float pointSize, + bool alphaTestEnable, + CompareOp alphaTestCompare, + float alphaTestReference, + ref Array32<AttributeType> attributeTypes, + bool hasConstantBufferDrawParameters, + bool hasUnalignedStorageBuffer, + ref Array8<AttributeType> fragmentOutputTypes, + bool dualSourceBlendEnable) + { + EarlyZForce = earlyZForce; + Topology = topology; + TessellationMode = tessellationMode; + AlphaToCoverageEnable = alphaToCoverageEnable; + AlphaToCoverageDitherEnable = alphaToCoverageDitherEnable; + ViewportTransformDisable = viewportTransformDisable; + DepthMode = depthMode; + ProgramPointSizeEnable = programPointSizeEnable; + PointSize = pointSize; + AlphaTestEnable = alphaTestEnable; + AlphaTestCompare = alphaTestCompare; + AlphaTestReference = alphaTestReference; + AttributeTypes = attributeTypes; + HasConstantBufferDrawParameters = hasConstantBufferDrawParameters; + HasUnalignedStorageBuffer = hasUnalignedStorageBuffer; + FragmentOutputTypes = fragmentOutputTypes; + DualSourceBlendEnable = dualSourceBlendEnable; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelPoolState.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelPoolState.cs new file mode 100644 index 00000000..1e34c5de --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelPoolState.cs @@ -0,0 +1,50 @@ +using System; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// <summary> + /// State used by the <see cref="GpuAccessor"/>. + /// </summary> + readonly struct GpuChannelPoolState : IEquatable<GpuChannelPoolState> + { + /// <summary> + /// GPU virtual address of the texture pool. + /// </summary> + public readonly ulong TexturePoolGpuVa; + + /// <summary> + /// Maximum ID of the texture pool. + /// </summary> + public readonly int TexturePoolMaximumId; + + /// <summary> + /// Constant buffer slot where the texture handles are located. + /// </summary> + public readonly int TextureBufferIndex; + + /// <summary> + /// Creates a new GPU texture pool state. + /// </summary> + /// <param name="texturePoolGpuVa">GPU virtual address of the texture pool</param> + /// <param name="texturePoolMaximumId">Maximum ID of the texture pool</param> + /// <param name="textureBufferIndex">Constant buffer slot where the texture handles are located</param> + public GpuChannelPoolState(ulong texturePoolGpuVa, int texturePoolMaximumId, int textureBufferIndex) + { + TexturePoolGpuVa = texturePoolGpuVa; + TexturePoolMaximumId = texturePoolMaximumId; + TextureBufferIndex = textureBufferIndex; + } + + /// <summary> + /// Check if the pool states are equal. + /// </summary> + /// <param name="other">Pool state to compare with</param> + /// <returns>True if they are equal, false otherwise</returns> + public bool Equals(GpuChannelPoolState other) + { + return TexturePoolGpuVa == other.TexturePoolGpuVa && + TexturePoolMaximumId == other.TexturePoolMaximumId && + TextureBufferIndex == other.TextureBufferIndex; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/HashTable/HashState.cs b/src/Ryujinx.Graphics.Gpu/Shader/HashTable/HashState.cs new file mode 100644 index 00000000..584eefdc --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/HashTable/HashState.cs @@ -0,0 +1,113 @@ +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Shader.HashTable +{ + /// <summary> + /// State of a hash calculation. + /// </summary> + struct HashState + { + // This is using a slightly modified implementation of FastHash64. + // Reference: https://github.com/ztanml/fast-hash/blob/master/fasthash.c + private const ulong M = 0x880355f21e6d1965UL; + private ulong _hash; + private int _start; + + /// <summary> + /// One shot hash calculation for a given data. + /// </summary> + /// <param name="data">Data to be hashed</param> + /// <returns>Hash of the given data</returns> + public static uint CalcHash(ReadOnlySpan<byte> data) + { + HashState state = new HashState(); + + state.Initialize(); + state.Continue(data); + return state.Finalize(data); + } + + /// <summary> + /// Initializes the hash state. + /// </summary> + public void Initialize() + { + _hash = 23; + } + + /// <summary> + /// Calculates the hash of the given data. + /// </summary> + /// <remarks> + /// The full data must be passed on <paramref name="data"/>. + /// If this is not the first time the method is called, then <paramref name="data"/> must start with the data passed on the last call. + /// If a smaller slice of the data was already hashed before, only the additional data will be hashed. + /// This can be used for additive hashing of data in chuncks. + /// </remarks> + /// <param name="data">Data to be hashed</param> + public void Continue(ReadOnlySpan<byte> data) + { + ulong h = _hash; + + ReadOnlySpan<ulong> dataAsUlong = MemoryMarshal.Cast<byte, ulong>(data.Slice(_start)); + + for (int i = 0; i < dataAsUlong.Length; i++) + { + ulong value = dataAsUlong[i]; + + h ^= Mix(value); + h *= M; + } + + _hash = h; + _start = data.Length & ~7; + } + + /// <summary> + /// Performs the hash finalization step, and returns the calculated hash. + /// </summary> + /// <remarks> + /// The full data must be passed on <paramref name="data"/>. + /// <paramref name="data"/> must start with the data passed on the last call to <see cref="Continue"/>. + /// No internal state is changed, so one can still continue hashing data with <see cref="Continue"/> + /// after calling this method. + /// </remarks> + /// <param name="data">Data to be hashed</param> + /// <returns>Hash of all the data hashed with this <see cref="HashState"/></returns> + public uint Finalize(ReadOnlySpan<byte> data) + { + ulong h = _hash; + + int remainder = data.Length & 7; + if (remainder != 0) + { + ulong v = 0; + + for (int i = data.Length - remainder; i < data.Length; i++) + { + v |= (ulong)data[i] << ((i - remainder) * 8); + } + + h ^= Mix(v); + h *= M; + } + + h = Mix(h); + return (uint)(h - (h >> 32)); + } + + /// <summary> + /// Hash mix function. + /// </summary> + /// <param name="h">Hash to mix</param> + /// <returns>Mixed hash</returns> + private static ulong Mix(ulong h) + { + h ^= h >> 23; + h *= 0x2127599bf4325c37UL; + h ^= h >> 47; + return h; + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Shader/HashTable/IDataAccessor.cs b/src/Ryujinx.Graphics.Gpu/Shader/HashTable/IDataAccessor.cs new file mode 100644 index 00000000..c982cd9f --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/HashTable/IDataAccessor.cs @@ -0,0 +1,27 @@ +using System; + +namespace Ryujinx.Graphics.Gpu.Shader.HashTable +{ + /// <summary> + /// Data accessor, used by <see cref="PartitionedHashTable{T}"/> to access data of unknown length. + /// </summary> + /// <remarks> + /// This will be used to access chuncks of data and try finding a match on the table. + /// This is necessary because the data size is assumed to be unknown, and so the + /// hash table must try to "guess" the size of the data based on the entries on the table. + /// </remarks> + public interface IDataAccessor + { + /// <summary> + /// Gets a span of shader code at the specified offset, with at most the specified size. + /// </summary> + /// <remarks> + /// This might return a span smaller than the requested <paramref name="length"/> if there's + /// no more code available. + /// </remarks> + /// <param name="offset">Offset in shader code</param> + /// <param name="length">Size in bytes</param> + /// <returns>Shader code span</returns> + ReadOnlySpan<byte> GetSpan(int offset, int length); + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionHashTable.cs b/src/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionHashTable.cs new file mode 100644 index 00000000..d7cb3d99 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionHashTable.cs @@ -0,0 +1,451 @@ +using System; +using System.Collections.Generic; +using System.Numerics; + +namespace Ryujinx.Graphics.Gpu.Shader.HashTable +{ + /// <summary> + /// Partitioned hash table. + /// </summary> + /// <typeparam name="T">Hash table entry type</typeparam> + class PartitionHashTable<T> + { + /// <summary> + /// Hash table entry. + /// </summary> + private struct Entry + { + /// <summary> + /// Hash <see cref="OwnSize"/> bytes of <see cref="Data"/>. + /// </summary> + public readonly uint Hash; + + /// <summary> + /// If this entry is only a sub-region of <see cref="Data"/>, this indicates the size in bytes + /// of that region. Otherwise, it should be zero. + /// </summary> + public readonly int OwnSize; + + /// <summary> + /// Data used to compute the hash for this entry. + /// </summary> + /// <remarks> + /// To avoid additional allocations, this might be a instance of the full entry data, + /// and only a sub-region of it might be actually used by this entry. Such sub-region + /// has its size indicated by <see cref="OwnSize"/> in this case. + /// </remarks> + public readonly byte[] Data; + + /// <summary> + /// Item associated with this entry. + /// </summary> + public T Item; + + /// <summary> + /// Indicates if the entry is partial, which means that this entry is only for a sub-region of the data. + /// </summary> + /// <remarks> + /// Partial entries have no items associated with them. They just indicates that the data might be present on + /// the table, and one must keep looking for the full entry on other tables of larger data size. + /// </remarks> + public bool IsPartial => OwnSize != 0; + + /// <summary> + /// Creates a new partial hash table entry. + /// </summary> + /// <param name="hash">Hash of the data</param> + /// <param name="ownerData">Full data</param> + /// <param name="ownSize">Size of the sub-region of data that belongs to this entry</param> + public Entry(uint hash, byte[] ownerData, int ownSize) + { + Hash = hash; + OwnSize = ownSize; + Data = ownerData; + Item = default; + } + + /// <summary> + /// Creates a new full hash table entry. + /// </summary> + /// <param name="hash">Hash of the data</param> + /// <param name="data">Data</param> + /// <param name="item">Item associated with this entry</param> + public Entry(uint hash, byte[] data, T item) + { + Hash = hash; + OwnSize = 0; + Data = data; + Item = item; + } + + /// <summary> + /// Gets the data for this entry, either full or partial. + /// </summary> + /// <returns>Data sub-region</returns> + public ReadOnlySpan<byte> GetData() + { + if (OwnSize != 0) + { + return new ReadOnlySpan<byte>(Data).Slice(0, OwnSize); + } + + return Data; + } + } + + /// <summary> + /// Hash table bucket. + /// </summary> + private struct Bucket + { + /// <summary> + /// Inline entry, to avoid allocations for the common single entry case. + /// </summary> + public Entry InlineEntry; + + /// <summary> + /// List of additional entries for the not-so-common multiple entries case. + /// </summary> + public List<Entry> MoreEntries; + } + + private Bucket[] _buckets; + private int _count; + + /// <summary> + /// Total amount of entries on the hash table. + /// </summary> + public int Count => _count; + + /// <summary> + /// Creates a new instance of the partitioned hash table. + /// </summary> + public PartitionHashTable() + { + _buckets = Array.Empty<Bucket>(); + } + + /// <summary> + /// Gets an item on the table, or adds a new one if not present. + /// </summary> + /// <param name="data">Data</param> + /// <param name="dataHash">Hash of the data</param> + /// <param name="item">Item to be added if not found</param> + /// <returns>Existing item if found, or <paramref name="item"/> if not found</returns> + public T GetOrAdd(byte[] data, uint dataHash, T item) + { + if (TryFindItem(dataHash, data, out T existingItem)) + { + return existingItem; + } + + Entry entry = new Entry(dataHash, data, item); + + AddToBucket(dataHash, ref entry); + + return item; + } + + /// <summary> + /// Adds an item to the hash table. + /// </summary> + /// <param name="data">Data</param> + /// <param name="dataHash">Hash of the data</param> + /// <param name="item">Item to be added</param> + /// <returns>True if the item was added, false due to an item associated with the data already being on the table</returns> + public bool Add(byte[] data, uint dataHash, T item) + { + if (TryFindItem(dataHash, data, out _)) + { + return false; + } + + Entry entry = new Entry(dataHash, data, item); + + AddToBucket(dataHash, ref entry); + + return true; + } + + /// <summary> + /// Adds a partial entry to the hash table. + /// </summary> + /// <param name="ownerData">Full data</param> + /// <param name="ownSize">Size of the sub-region of <paramref name="ownerData"/> used by the partial entry</param> + /// <returns>True if added, false otherwise</returns> + public bool AddPartial(byte[] ownerData, int ownSize) + { + ReadOnlySpan<byte> data = new ReadOnlySpan<byte>(ownerData).Slice(0, ownSize); + + return AddPartial(ownerData, HashState.CalcHash(data), ownSize); + } + + /// <summary> + /// Adds a partial entry to the hash table. + /// </summary> + /// <param name="ownerData">Full data</param> + /// <param name="dataHash">Hash of the data sub-region</param> + /// <param name="ownSize">Size of the sub-region of <paramref name="ownerData"/> used by the partial entry</param> + /// <returns>True if added, false otherwise</returns> + public bool AddPartial(byte[] ownerData, uint dataHash, int ownSize) + { + ReadOnlySpan<byte> data = new ReadOnlySpan<byte>(ownerData).Slice(0, ownSize); + + if (TryFindItem(dataHash, data, out _)) + { + return false; + } + + Entry entry = new Entry(dataHash, ownerData, ownSize); + + AddToBucket(dataHash, ref entry); + + return true; + } + + /// <summary> + /// Adds entry with a given hash to the table. + /// </summary> + /// <param name="dataHash">Hash of the entry</param> + /// <param name="entry">Entry</param> + private void AddToBucket(uint dataHash, ref Entry entry) + { + int pow2Count = GetPow2Count(++_count); + if (pow2Count != _buckets.Length) + { + Rebuild(pow2Count); + } + + ref Bucket bucket = ref GetBucketForHash(dataHash); + + AddToBucket(ref bucket, ref entry); + } + + /// <summary> + /// Adds an entry to a bucket. + /// </summary> + /// <param name="bucket">Bucket to add the entry into</param> + /// <param name="entry">Entry to be added</param> + private void AddToBucket(ref Bucket bucket, ref Entry entry) + { + if (bucket.InlineEntry.Data == null) + { + bucket.InlineEntry = entry; + } + else + { + (bucket.MoreEntries ??= new List<Entry>()).Add(entry); + } + } + + /// <summary> + /// Creates partial entries on a new hash table for all existing full entries. + /// </summary> + /// <remarks> + /// This should be called every time a new hash table is created, and there are hash + /// tables with data sizes that are higher than that of the new table. + /// This will then fill the new hash table with "partial" entries of full entries + /// on the hash tables with higher size. + /// </remarks> + /// <param name="newTable">New hash table</param> + /// <param name="newEntrySize">Size of the data on the new hash table</param> + public void FillPartials(PartitionHashTable<T> newTable, int newEntrySize) + { + for (int i = 0; i < _buckets.Length; i++) + { + ref Bucket bucket = ref _buckets[i]; + ref Entry inlineEntry = ref bucket.InlineEntry; + + if (inlineEntry.Data != null) + { + if (!inlineEntry.IsPartial) + { + newTable.AddPartial(inlineEntry.Data, newEntrySize); + } + + if (bucket.MoreEntries != null) + { + foreach (Entry entry in bucket.MoreEntries) + { + if (entry.IsPartial) + { + continue; + } + + newTable.AddPartial(entry.Data, newEntrySize); + } + } + } + } + } + + /// <summary> + /// Tries to find an item on the table. + /// </summary> + /// <param name="dataHash">Hash of <paramref name="data"/></param> + /// <param name="data">Data to find</param> + /// <param name="item">Item associated with the data</param> + /// <returns>True if an item was found, false otherwise</returns> + private bool TryFindItem(uint dataHash, ReadOnlySpan<byte> data, out T item) + { + if (_count == 0) + { + item = default; + return false; + } + + ref Bucket bucket = ref GetBucketForHash(dataHash); + + if (bucket.InlineEntry.Data != null) + { + if (bucket.InlineEntry.Hash == dataHash && bucket.InlineEntry.GetData().SequenceEqual(data)) + { + item = bucket.InlineEntry.Item; + return true; + } + + if (bucket.MoreEntries != null) + { + foreach (Entry entry in bucket.MoreEntries) + { + if (entry.Hash == dataHash && entry.GetData().SequenceEqual(data)) + { + item = entry.Item; + return true; + } + } + } + } + + item = default; + return false; + } + + /// <summary> + /// Indicates the result of a hash table lookup. + /// </summary> + public enum SearchResult + { + /// <summary> + /// No entry was found, the search must continue on hash tables of lower size. + /// </summary> + NotFound, + + /// <summary> + /// A partial entry was found, the search must continue on hash tables of higher size. + /// </summary> + FoundPartial, + + /// <summary> + /// A full entry was found, the search was concluded and the item can be retrieved. + /// </summary> + FoundFull + } + + /// <summary> + /// Tries to find an item on the table. + /// </summary> + /// <param name="dataAccessor">Data accessor</param> + /// <param name="size">Size of the hash table data</param> + /// <param name="item">The item on the table, if found, otherwise unmodified</param> + /// <param name="data">The data on the table, if found, otherwise unmodified</param> + /// <returns>Table lookup result</returns> + public SearchResult TryFindItem(scoped ref SmartDataAccessor dataAccessor, int size, scoped ref T item, scoped ref byte[] data) + { + if (_count == 0) + { + return SearchResult.NotFound; + } + + ReadOnlySpan<byte> dataSpan = dataAccessor.GetSpanAndHash(size, out uint dataHash); + + if (dataSpan.Length != size) + { + return SearchResult.NotFound; + } + + ref Bucket bucket = ref GetBucketForHash(dataHash); + + if (bucket.InlineEntry.Data != null) + { + if (bucket.InlineEntry.Hash == dataHash && bucket.InlineEntry.GetData().SequenceEqual(dataSpan)) + { + item = bucket.InlineEntry.Item; + data = bucket.InlineEntry.Data; + return bucket.InlineEntry.IsPartial ? SearchResult.FoundPartial : SearchResult.FoundFull; + } + + if (bucket.MoreEntries != null) + { + foreach (Entry entry in bucket.MoreEntries) + { + if (entry.Hash == dataHash && entry.GetData().SequenceEqual(dataSpan)) + { + item = entry.Item; + data = entry.Data; + return entry.IsPartial ? SearchResult.FoundPartial : SearchResult.FoundFull; + } + } + } + } + + return SearchResult.NotFound; + } + + /// <summary> + /// Rebuilds the table for a new count. + /// </summary> + /// <param name="newPow2Count">New power of two count of the table</param> + private void Rebuild(int newPow2Count) + { + Bucket[] newBuckets = new Bucket[newPow2Count]; + + uint mask = (uint)newPow2Count - 1; + + for (int i = 0; i < _buckets.Length; i++) + { + ref Bucket bucket = ref _buckets[i]; + + if (bucket.InlineEntry.Data != null) + { + AddToBucket(ref newBuckets[(int)(bucket.InlineEntry.Hash & mask)], ref bucket.InlineEntry); + + if (bucket.MoreEntries != null) + { + foreach (Entry entry in bucket.MoreEntries) + { + Entry entryCopy = entry; + AddToBucket(ref newBuckets[(int)(entry.Hash & mask)], ref entryCopy); + } + } + } + } + + _buckets = newBuckets; + } + + /// <summary> + /// Gets the bucket for a given hash. + /// </summary> + /// <param name="hash">Data hash</param> + /// <returns>Bucket for the hash</returns> + private ref Bucket GetBucketForHash(uint hash) + { + int index = (int)(hash & (_buckets.Length - 1)); + + return ref _buckets[index]; + } + + /// <summary> + /// Gets a power of two count from a regular count. + /// </summary> + /// <param name="count">Count</param> + /// <returns>Power of two count</returns> + private static int GetPow2Count(int count) + { + // This returns the nearest power of two that is lower than count. + // This was done to optimize memory usage rather than performance. + return 1 << BitOperations.Log2((uint)count); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionedHashTable.cs b/src/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionedHashTable.cs new file mode 100644 index 00000000..e9a4f654 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionedHashTable.cs @@ -0,0 +1,244 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; + +namespace Ryujinx.Graphics.Gpu.Shader.HashTable +{ + /// <summary> + /// Partitioned hash table. + /// </summary> + /// <typeparam name="T"></typeparam> + public class PartitionedHashTable<T> + { + /// <summary> + /// Entry for a given data size. + /// </summary> + private readonly struct SizeEntry + { + /// <summary> + /// Size for the data that will be stored on the hash table on this entry. + /// </summary> + public int Size { get; } + + /// <summary> + /// Number of entries on the hash table. + /// </summary> + public int TableCount => _table.Count; + + private readonly PartitionHashTable<T> _table; + + /// <summary> + /// Creates an entry for a given size. + /// </summary> + /// <param name="size">Size of the data to be stored on this entry</param> + public SizeEntry(int size) + { + Size = size; + _table = new PartitionHashTable<T>(); + } + + /// <summary> + /// Gets an item for existing data, or adds a new one. + /// </summary> + /// <param name="data">Data associated with the item</param> + /// <param name="dataHash">Hash of <paramref name="data"/></param> + /// <param name="item">Item to be added</param> + /// <returns>Existing item, or <paramref name="item"/> if not present</returns> + public T GetOrAdd(byte[] data, uint dataHash, T item) + { + Debug.Assert(data.Length == Size); + return _table.GetOrAdd(data, dataHash, item); + } + + /// <summary> + /// Adds a new item. + /// </summary> + /// <param name="data">Data associated with the item</param> + /// <param name="dataHash">Hash of <paramref name="data"/></param> + /// <param name="item">Item to be added</param> + /// <returns>True if added, false otherwise</returns> + public bool Add(byte[] data, uint dataHash, T item) + { + Debug.Assert(data.Length == Size); + return _table.Add(data, dataHash, item); + } + + /// <summary> + /// Adds a partial entry. + /// </summary> + /// <param name="ownerData">Full entry data</param> + /// <param name="dataHash">Hash of the sub-region of the data that belongs to this entry</param> + /// <returns>True if added, false otherwise</returns> + public bool AddPartial(byte[] ownerData, uint dataHash) + { + return _table.AddPartial(ownerData, dataHash, Size); + } + + /// <summary> + /// Fills a new hash table with "partials" of existing full entries of higher size. + /// </summary> + /// <param name="newEntry">Entry with the new hash table</param> + public void FillPartials(SizeEntry newEntry) + { + Debug.Assert(newEntry.Size < Size); + _table.FillPartials(newEntry._table, newEntry.Size); + } + + /// <summary> + /// Tries to find an item on the hash table. + /// </summary> + /// <param name="dataAccessor">Data accessor</param> + /// <param name="item">The item on the table, if found, otherwise unmodified</param> + /// <param name="data">The data on the table, if found, otherwise unmodified</param> + /// <returns>Table lookup result</returns> + public PartitionHashTable<T>.SearchResult TryFindItem(scoped ref SmartDataAccessor dataAccessor, scoped ref T item, scoped ref byte[] data) + { + return _table.TryFindItem(ref dataAccessor, Size, ref item, ref data); + } + } + + private readonly List<SizeEntry> _sizeTable; + + /// <summary> + /// Creates a new partitioned hash table. + /// </summary> + public PartitionedHashTable() + { + _sizeTable = new List<SizeEntry>(); + } + + /// <summary> + /// Adds a new item to the table. + /// </summary> + /// <param name="data">Data</param> + /// <param name="item">Item associated with the data</param> + public void Add(byte[] data, T item) + { + GetOrAdd(data, item); + } + + /// <summary> + /// Gets an existing item from the table, or adds a new one if not present. + /// </summary> + /// <param name="data">Data</param> + /// <param name="item">Item associated with the data</param> + /// <returns>Existing item, or <paramref name="item"/> if not present</returns> + public T GetOrAdd(byte[] data, T item) + { + SizeEntry sizeEntry; + + int index = BinarySearch(_sizeTable, data.Length); + if (index < _sizeTable.Count && _sizeTable[index].Size == data.Length) + { + sizeEntry = _sizeTable[index]; + } + else + { + if (index < _sizeTable.Count && _sizeTable[index].Size < data.Length) + { + index++; + } + + sizeEntry = new SizeEntry(data.Length); + + _sizeTable.Insert(index, sizeEntry); + + for (int i = index + 1; i < _sizeTable.Count; i++) + { + _sizeTable[i].FillPartials(sizeEntry); + } + } + + HashState hashState = new HashState(); + hashState.Initialize(); + + for (int i = 0; i < index; i++) + { + ReadOnlySpan<byte> dataSlice = new ReadOnlySpan<byte>(data).Slice(0, _sizeTable[i].Size); + hashState.Continue(dataSlice); + _sizeTable[i].AddPartial(data, hashState.Finalize(dataSlice)); + } + + hashState.Continue(data); + return sizeEntry.GetOrAdd(data, hashState.Finalize(data), item); + } + + /// <summary> + /// Performs binary search on a list of hash tables, each one with a fixed data size. + /// </summary> + /// <param name="entries">List of hash tables</param> + /// <param name="size">Size to search for</param> + /// <returns>Index of the hash table with the given size, or nearest one otherwise</returns> + private static int BinarySearch(List<SizeEntry> entries, int size) + { + int left = 0; + int middle = 0; + int right = entries.Count - 1; + + while (left <= right) + { + middle = left + ((right - left) >> 1); + + SizeEntry entry = entries[middle]; + + if (size == entry.Size) + { + break; + } + + if (size < entry.Size) + { + right = middle - 1; + } + else + { + left = middle + 1; + } + } + + return middle; + } + + /// <summary> + /// Tries to find an item on the table. + /// </summary> + /// <param name="dataAccessor">Data accessor</param> + /// <param name="item">Item, if found</param> + /// <param name="data">Data, if found</param> + /// <returns>True if the item was found on the table, false otherwise</returns> + public bool TryFindItem(IDataAccessor dataAccessor, out T item, out byte[] data) + { + SmartDataAccessor sda = new SmartDataAccessor(dataAccessor); + + item = default; + data = null; + + int left = 0; + int right = _sizeTable.Count; + + while (left != right) + { + int index = left + ((right - left) >> 1); + + PartitionHashTable<T>.SearchResult result = _sizeTable[index].TryFindItem(ref sda, ref item, ref data); + + if (result == PartitionHashTable<T>.SearchResult.FoundFull) + { + return true; + } + + if (result == PartitionHashTable<T>.SearchResult.NotFound) + { + right = index; + } + else /* if (result == PartitionHashTable<T>.SearchResult.FoundPartial) */ + { + left = index + 1; + } + } + + data = null; + return false; + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Shader/HashTable/SmartDataAccessor.cs b/src/Ryujinx.Graphics.Gpu/Shader/HashTable/SmartDataAccessor.cs new file mode 100644 index 00000000..0632add6 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/HashTable/SmartDataAccessor.cs @@ -0,0 +1,96 @@ +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Shader.HashTable +{ + /// <summary> + /// Smart data accessor that can cache data and hashes to avoid reading and re-hashing the same memory regions. + /// </summary> + ref struct SmartDataAccessor + { + private readonly IDataAccessor _dataAccessor; + private ReadOnlySpan<byte> _data; + private readonly SortedList<int, HashState> _cachedHashes; + + /// <summary> + /// Creates a new smart data accessor. + /// </summary> + /// <param name="dataAccessor">Data accessor</param> + public SmartDataAccessor(IDataAccessor dataAccessor) + { + _dataAccessor = dataAccessor; + _data = ReadOnlySpan<byte>.Empty; + _cachedHashes = new SortedList<int, HashState>(); + } + + /// <summary> + /// Get a spans of a given size. + /// </summary> + /// <remarks> + /// The actual length of the span returned depends on the <see cref="IDataAccessor"/> + /// and might be less than requested. + /// </remarks> + /// <param name="length">Size in bytes</param> + /// <returns>Span with the requested size</returns> + public ReadOnlySpan<byte> GetSpan(int length) + { + if (_data.Length < length) + { + _data = _dataAccessor.GetSpan(0, length); + } + else if (_data.Length > length) + { + return _data.Slice(0, length); + } + + return _data; + } + + /// <summary> + /// Gets a span of the requested size, and a hash of its data. + /// </summary> + /// <param name="length">Length of the span</param> + /// <param name="hash">Hash of the span data</param> + /// <returns>Span of data</returns> + public ReadOnlySpan<byte> GetSpanAndHash(int length, out uint hash) + { + ReadOnlySpan<byte> data = GetSpan(length); + hash = data.Length == length ? CalcHashCached(data) : 0; + return data; + } + + /// <summary> + /// Calculates the hash for a requested span. + /// This will try to use a cached hash if the data was already accessed before, to avoid re-hashing. + /// </summary> + /// <param name="data">Data to be hashed</param> + /// <returns>Hash of the data</returns> + private uint CalcHashCached(ReadOnlySpan<byte> data) + { + HashState state = default; + bool found = false; + + for (int i = _cachedHashes.Count - 1; i >= 0; i--) + { + int cachedHashSize = _cachedHashes.Keys[i]; + + if (cachedHashSize < data.Length) + { + state = _cachedHashes.Values[i]; + found = true; + break; + } + } + + if (!found) + { + state = new HashState(); + state.Initialize(); + } + + state.Continue(data); + _cachedHashes[data.Length & ~7] = state; + return state.Finalize(data); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ResourceCounts.cs b/src/Ryujinx.Graphics.Gpu/Shader/ResourceCounts.cs new file mode 100644 index 00000000..b85423cb --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/ResourceCounts.cs @@ -0,0 +1,36 @@ +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// <summary> + /// Holds counts for the resources used by a shader. + /// </summary> + class ResourceCounts + { + /// <summary> + /// Total of uniform buffers used by the shaders. + /// </summary> + public int UniformBuffersCount; + + /// <summary> + /// Total of storage buffers used by the shaders. + /// </summary> + public int StorageBuffersCount; + + /// <summary> + /// Total of textures used by the shaders. + /// </summary> + public int TexturesCount; + + /// <summary> + /// Total of images used by the shaders. + /// </summary> + public int ImagesCount; + + /// <summary> + /// Creates a new instance of the shader resource counts class. + /// </summary> + public ResourceCounts() + { + UniformBuffersCount = 1; // The first binding is reserved for the support buffer. + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderAddresses.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderAddresses.cs new file mode 100644 index 00000000..651dfd26 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderAddresses.cs @@ -0,0 +1,64 @@ +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// <summary> + /// Shader code addresses in memory for each shader stage. + /// </summary> + struct ShaderAddresses : IEquatable<ShaderAddresses> + { +#pragma warning disable CS0649 + public ulong VertexA; + public ulong VertexB; + public ulong TessControl; + public ulong TessEvaluation; + public ulong Geometry; + public ulong Fragment; +#pragma warning restore CS0649 + + /// <summary> + /// Check if the addresses are equal. + /// </summary> + /// <param name="other">Shader addresses structure to compare with</param> + /// <returns>True if they are equal, false otherwise</returns> + public override bool Equals(object other) + { + return other is ShaderAddresses addresses && Equals(addresses); + } + + /// <summary> + /// Check if the addresses are equal. + /// </summary> + /// <param name="other">Shader addresses structure to compare with</param> + /// <returns>True if they are equal, false otherwise</returns> + public bool Equals(ShaderAddresses other) + { + return VertexA == other.VertexA && + VertexB == other.VertexB && + TessControl == other.TessControl && + TessEvaluation == other.TessEvaluation && + Geometry == other.Geometry && + Fragment == other.Fragment; + } + + /// <summary> + /// Computes hash code from the addresses. + /// </summary> + /// <returns>Hash code</returns> + public override int GetHashCode() + { + return HashCode.Combine(VertexA, VertexB, TessControl, TessEvaluation, Geometry, Fragment); + } + + /// <summary> + /// Gets a view of the structure as a span of addresses. + /// </summary> + /// <returns>Span of addresses</returns> + public Span<ulong> AsSpan() + { + return MemoryMarshal.CreateSpan(ref VertexA, Unsafe.SizeOf<ShaderAddresses>() / sizeof(ulong)); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs new file mode 100644 index 00000000..e1ab9327 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -0,0 +1,774 @@ +using Ryujinx.Common.Configuration; +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.Threed; +using Ryujinx.Graphics.Gpu.Engine.Types; +using Ryujinx.Graphics.Gpu.Image; +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Gpu.Shader.DiskCache; +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Threading; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// <summary> + /// Memory cache of shader code. + /// </summary> + class ShaderCache : IDisposable + { + /// <summary> + /// Default flags used on the shader translation process. + /// </summary> + public const TranslationFlags DefaultFlags = TranslationFlags.DebugMode; + + private readonly struct TranslatedShader + { + public readonly CachedShaderStage Shader; + public readonly ShaderProgram Program; + + public TranslatedShader(CachedShaderStage shader, ShaderProgram program) + { + Shader = shader; + Program = program; + } + } + + private readonly struct TranslatedShaderVertexPair + { + public readonly CachedShaderStage VertexA; + public readonly CachedShaderStage VertexB; + public readonly ShaderProgram Program; + + public TranslatedShaderVertexPair(CachedShaderStage vertexA, CachedShaderStage vertexB, ShaderProgram program) + { + VertexA = vertexA; + VertexB = vertexB; + Program = program; + } + } + + private readonly GpuContext _context; + + private readonly ShaderDumper _dumper; + + private readonly Dictionary<ulong, CachedShaderProgram> _cpPrograms; + private readonly Dictionary<ShaderAddresses, CachedShaderProgram> _gpPrograms; + + private readonly struct ProgramToSave + { + public readonly CachedShaderProgram CachedProgram; + public readonly IProgram HostProgram; + public readonly byte[] BinaryCode; + + public ProgramToSave(CachedShaderProgram cachedProgram, IProgram hostProgram, byte[] binaryCode) + { + CachedProgram = cachedProgram; + HostProgram = hostProgram; + BinaryCode = binaryCode; + } + } + + private Queue<ProgramToSave> _programsToSaveQueue; + + private readonly ComputeShaderCacheHashTable _computeShaderCache; + private readonly ShaderCacheHashTable _graphicsShaderCache; + private readonly DiskCacheHostStorage _diskCacheHostStorage; + private readonly BackgroundDiskCacheWriter _cacheWriter; + + /// <summary> + /// Event for signalling shader cache loading progress. + /// </summary> + public event Action<ShaderCacheState, int, int> ShaderCacheStateChanged; + + /// <summary> + /// Creates a new instance of the shader cache. + /// </summary> + /// <param name="context">GPU context that the shader cache belongs to</param> + public ShaderCache(GpuContext context) + { + _context = context; + + _dumper = new ShaderDumper(); + + _cpPrograms = new Dictionary<ulong, CachedShaderProgram>(); + _gpPrograms = new Dictionary<ShaderAddresses, CachedShaderProgram>(); + + _programsToSaveQueue = new Queue<ProgramToSave>(); + + string diskCacheTitleId = GetDiskCachePath(); + + _computeShaderCache = new ComputeShaderCacheHashTable(); + _graphicsShaderCache = new ShaderCacheHashTable(); + _diskCacheHostStorage = new DiskCacheHostStorage(diskCacheTitleId); + + if (_diskCacheHostStorage.CacheEnabled) + { + _cacheWriter = new BackgroundDiskCacheWriter(context, _diskCacheHostStorage); + } + } + + /// <summary> + /// Gets the path where the disk cache for the current application is stored. + /// </summary> + private static string GetDiskCachePath() + { + return GraphicsConfig.EnableShaderCache && GraphicsConfig.TitleId != null + ? Path.Combine(AppDataManager.GamesDirPath, GraphicsConfig.TitleId, "cache", "shader") + : null; + } + + /// <summary> + /// Processes the queue of shaders that must save their binaries to the disk cache. + /// </summary> + public void ProcessShaderCacheQueue() + { + // Check to see if the binaries for previously compiled shaders are ready, and save them out. + + while (_programsToSaveQueue.TryPeek(out ProgramToSave programToSave)) + { + ProgramLinkStatus result = programToSave.HostProgram.CheckProgramLink(false); + + if (result != ProgramLinkStatus.Incomplete) + { + if (result == ProgramLinkStatus.Success) + { + _cacheWriter.AddShader(programToSave.CachedProgram, programToSave.BinaryCode ?? programToSave.HostProgram.GetBinary()); + } + + _programsToSaveQueue.Dequeue(); + } + else + { + break; + } + } + } + + /// <summary> + /// Initialize the cache. + /// </summary> + /// <param name="cancellationToken">Cancellation token to cancel the shader cache initialization process</param> + internal void Initialize(CancellationToken cancellationToken) + { + if (_diskCacheHostStorage.CacheEnabled) + { + ParallelDiskCacheLoader loader = new ParallelDiskCacheLoader( + _context, + _graphicsShaderCache, + _computeShaderCache, + _diskCacheHostStorage, + cancellationToken, + ShaderCacheStateUpdate); + + loader.LoadShaders(); + + int errorCount = loader.ErrorCount; + if (errorCount != 0) + { + Logger.Warning?.Print(LogClass.Gpu, $"Failed to load {errorCount} shaders from the disk cache."); + } + } + } + + /// <summary> + /// Shader cache state update handler. + /// </summary> + /// <param name="state">Current state of the shader cache load process</param> + /// <param name="current">Number of the current shader being processed</param> + /// <param name="total">Total number of shaders to process</param> + private void ShaderCacheStateUpdate(ShaderCacheState state, int current, int total) + { + ShaderCacheStateChanged?.Invoke(state, current, total); + } + + /// <summary> + /// Gets a compute shader from the cache. + /// </summary> + /// <remarks> + /// This automatically translates, compiles and adds the code to the cache if not present. + /// </remarks> + /// <param name="channel">GPU channel</param> + /// <param name="poolState">Texture pool state</param> + /// <param name="computeState">Compute engine state</param> + /// <param name="gpuVa">GPU virtual address of the binary shader code</param> + /// <returns>Compiled compute shader code</returns> + public CachedShaderProgram GetComputeShader( + GpuChannel channel, + GpuChannelPoolState poolState, + GpuChannelComputeState computeState, + ulong gpuVa) + { + if (_cpPrograms.TryGetValue(gpuVa, out var cpShader) && IsShaderEqual(channel, poolState, computeState, cpShader, gpuVa)) + { + return cpShader; + } + + if (_computeShaderCache.TryFind(channel, poolState, computeState, gpuVa, out cpShader, out byte[] cachedGuestCode)) + { + _cpPrograms[gpuVa] = cpShader; + return cpShader; + } + + ShaderSpecializationState specState = new ShaderSpecializationState(ref computeState); + GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, computeState, default, specState); + GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState); + + TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, _context.Capabilities.Api, gpuVa); + + TranslatedShader translatedShader = TranslateShader(_dumper, channel, translatorContext, cachedGuestCode); + + ShaderSource[] shaderSourcesArray = new ShaderSource[] { CreateShaderSource(translatedShader.Program) }; + + IProgram hostProgram = _context.Renderer.CreateProgram(shaderSourcesArray, new ShaderInfo(-1)); + + cpShader = new CachedShaderProgram(hostProgram, specState, translatedShader.Shader); + + _computeShaderCache.Add(cpShader); + EnqueueProgramToSave(cpShader, hostProgram, shaderSourcesArray); + _cpPrograms[gpuVa] = cpShader; + + return cpShader; + } + + /// <summary> + /// Updates the shader pipeline state based on the current GPU state. + /// </summary> + /// <param name="state">Current GPU 3D engine state</param> + /// <param name="pipeline">Shader pipeline state to be updated</param> + /// <param name="graphicsState">Current graphics state</param> + /// <param name="channel">Current GPU channel</param> + private void UpdatePipelineInfo( + ref ThreedClassState state, + ref ProgramPipelineState pipeline, + GpuChannelGraphicsState graphicsState, + GpuChannel channel) + { + channel.TextureManager.UpdateRenderTargets(); + + var rtControl = state.RtControl; + var msaaMode = state.RtMsaaMode; + + pipeline.SamplesCount = msaaMode.SamplesInX() * msaaMode.SamplesInY(); + + int count = rtControl.UnpackCount(); + + for (int index = 0; index < Constants.TotalRenderTargets; index++) + { + int rtIndex = rtControl.UnpackPermutationIndex(index); + + var colorState = state.RtColorState[rtIndex]; + + if (index >= count || colorState.Format == 0 || colorState.WidthOrStride == 0) + { + pipeline.AttachmentEnable[index] = false; + pipeline.AttachmentFormats[index] = Format.R8G8B8A8Unorm; + } + else + { + pipeline.AttachmentEnable[index] = true; + pipeline.AttachmentFormats[index] = colorState.Format.Convert().Format; + } + } + + pipeline.DepthStencilEnable = state.RtDepthStencilEnable; + pipeline.DepthStencilFormat = pipeline.DepthStencilEnable ? state.RtDepthStencilState.Format.Convert().Format : Format.D24UnormS8Uint; + + pipeline.VertexBufferCount = Constants.TotalVertexBuffers; + pipeline.Topology = graphicsState.Topology; + } + + /// <summary> + /// Gets a graphics shader program from the shader cache. + /// This includes all the specified shader stages. + /// </summary> + /// <remarks> + /// This automatically translates, compiles and adds the code to the cache if not present. + /// </remarks> + /// <param name="state">GPU state</param> + /// <param name="pipeline">Pipeline state</param> + /// <param name="channel">GPU channel</param> + /// <param name="poolState">Texture pool state</param> + /// <param name="graphicsState">3D engine state</param> + /// <param name="addresses">Addresses of the shaders for each stage</param> + /// <returns>Compiled graphics shader code</returns> + public CachedShaderProgram GetGraphicsShader( + ref ThreedClassState state, + ref ProgramPipelineState pipeline, + GpuChannel channel, + ref GpuChannelPoolState poolState, + ref GpuChannelGraphicsState graphicsState, + ShaderAddresses addresses) + { + if (_gpPrograms.TryGetValue(addresses, out var gpShaders) && IsShaderEqual(channel, ref poolState, ref graphicsState, gpShaders, addresses)) + { + return gpShaders; + } + + if (_graphicsShaderCache.TryFind(channel, ref poolState, ref graphicsState, addresses, out gpShaders, out var cachedGuestCode)) + { + _gpPrograms[addresses] = gpShaders; + return gpShaders; + } + + TransformFeedbackDescriptor[] transformFeedbackDescriptors = GetTransformFeedbackDescriptors(ref state); + + UpdatePipelineInfo(ref state, ref pipeline, graphicsState, channel); + + ShaderSpecializationState specState = new ShaderSpecializationState(ref graphicsState, ref pipeline, transformFeedbackDescriptors); + GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, default, graphicsState, specState, transformFeedbackDescriptors); + + ReadOnlySpan<ulong> addressesSpan = addresses.AsSpan(); + + TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1]; + TranslatorContext nextStage = null; + + TargetApi api = _context.Capabilities.Api; + + for (int stageIndex = Constants.ShaderStages - 1; stageIndex >= 0; stageIndex--) + { + ulong gpuVa = addressesSpan[stageIndex + 1]; + + if (gpuVa != 0) + { + GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState, stageIndex); + TranslatorContext currentStage = DecodeGraphicsShader(gpuAccessor, api, DefaultFlags, gpuVa); + + if (nextStage != null) + { + currentStage.SetNextStage(nextStage); + } + + if (stageIndex == 0 && addresses.VertexA != 0) + { + translatorContexts[0] = DecodeGraphicsShader(gpuAccessor, api, DefaultFlags | TranslationFlags.VertexA, addresses.VertexA); + } + + translatorContexts[stageIndex + 1] = currentStage; + nextStage = currentStage; + } + } + + if (!_context.Capabilities.SupportsGeometryShader) + { + TryRemoveGeometryStage(translatorContexts); + } + + CachedShaderStage[] shaders = new CachedShaderStage[Constants.ShaderStages + 1]; + List<ShaderSource> shaderSources = new List<ShaderSource>(); + + TranslatorContext previousStage = null; + + for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++) + { + TranslatorContext currentStage = translatorContexts[stageIndex + 1]; + + if (currentStage != null) + { + ShaderProgram program; + + if (stageIndex == 0 && translatorContexts[0] != null) + { + TranslatedShaderVertexPair translatedShader = TranslateShader( + _dumper, + channel, + currentStage, + translatorContexts[0], + cachedGuestCode.VertexACode, + cachedGuestCode.VertexBCode); + + shaders[0] = translatedShader.VertexA; + shaders[1] = translatedShader.VertexB; + program = translatedShader.Program; + } + else + { + byte[] code = cachedGuestCode.GetByIndex(stageIndex); + + TranslatedShader translatedShader = TranslateShader(_dumper, channel, currentStage, code); + + shaders[stageIndex + 1] = translatedShader.Shader; + program = translatedShader.Program; + } + + if (program != null) + { + shaderSources.Add(CreateShaderSource(program)); + } + + previousStage = currentStage; + } + else if ( + previousStage != null && + previousStage.LayerOutputWritten && + stageIndex == 3 && + !_context.Capabilities.SupportsLayerVertexTessellation) + { + shaderSources.Add(CreateShaderSource(previousStage.GenerateGeometryPassthrough())); + } + } + + ShaderSource[] shaderSourcesArray = shaderSources.ToArray(); + + int fragmentOutputMap = shaders[5]?.Info.FragmentOutputMap ?? -1; + IProgram hostProgram = _context.Renderer.CreateProgram(shaderSourcesArray, new ShaderInfo(fragmentOutputMap, pipeline)); + + gpShaders = new CachedShaderProgram(hostProgram, specState, shaders); + + _graphicsShaderCache.Add(gpShaders); + EnqueueProgramToSave(gpShaders, hostProgram, shaderSourcesArray); + _gpPrograms[addresses] = gpShaders; + + return gpShaders; + } + + /// <summary> + /// Tries to eliminate the geometry stage from the array of translator contexts. + /// </summary> + /// <param name="translatorContexts">Array of translator contexts</param> + public static void TryRemoveGeometryStage(TranslatorContext[] translatorContexts) + { + if (translatorContexts[4] != null) + { + // We have a geometry shader, but geometry shaders are not supported. + // Try to eliminate the geometry shader. + + ShaderProgramInfo info = translatorContexts[4].Translate().Info; + + if (info.Identification == ShaderIdentification.GeometryLayerPassthrough) + { + // We managed to identify that this geometry shader is only used to set the output Layer value, + // we can set the Layer on the previous stage instead (usually the vertex stage) and eliminate it. + + for (int i = 3; i >= 1; i--) + { + if (translatorContexts[i] != null) + { + translatorContexts[i].SetGeometryShaderLayerInputAttribute(info.GpLayerInputAttribute); + translatorContexts[i].SetLastInVertexPipeline(); + break; + } + } + + translatorContexts[4] = null; + } + } + } + + /// <summary> + /// Creates a shader source for use with the backend from a translated shader program. + /// </summary> + /// <param name="program">Translated shader program</param> + /// <returns>Shader source</returns> + public static ShaderSource CreateShaderSource(ShaderProgram program) + { + return new ShaderSource(program.Code, program.BinaryCode, GetBindings(program.Info), program.Info.Stage, program.Language); + } + + /// <summary> + /// Puts a program on the queue of programs to be saved on the disk cache. + /// </summary> + /// <remarks> + /// This will not do anything if disk shader cache is disabled. + /// </remarks> + /// <param name="program">Cached shader program</param> + /// <param name="hostProgram">Host program</param> + /// <param name="sources">Source for each shader stage</param> + private void EnqueueProgramToSave(CachedShaderProgram program, IProgram hostProgram, ShaderSource[] sources) + { + if (_diskCacheHostStorage.CacheEnabled) + { + byte[] binaryCode = _context.Capabilities.Api == TargetApi.Vulkan ? ShaderBinarySerializer.Pack(sources) : null; + ProgramToSave programToSave = new ProgramToSave(program, hostProgram, binaryCode); + + _programsToSaveQueue.Enqueue(programToSave); + } + } + + /// <summary> + /// Gets transform feedback state from the current GPU state. + /// </summary> + /// <param name="state">Current GPU state</param> + /// <returns>Four transform feedback descriptors for the enabled TFBs, or null if TFB is disabled</returns> + private static TransformFeedbackDescriptor[] GetTransformFeedbackDescriptors(ref ThreedClassState state) + { + bool tfEnable = state.TfEnable; + if (!tfEnable) + { + return null; + } + + TransformFeedbackDescriptor[] descs = new TransformFeedbackDescriptor[Constants.TotalTransformFeedbackBuffers]; + + for (int i = 0; i < Constants.TotalTransformFeedbackBuffers; i++) + { + var tf = state.TfState[i]; + + descs[i] = new TransformFeedbackDescriptor( + tf.BufferIndex, + tf.Stride, + tf.VaryingsCount, + ref state.TfVaryingLocations[i]); + } + + return descs; + } + + /// <summary> + /// Checks if compute shader code in memory is equal to the cached shader. + /// </summary> + /// <param name="channel">GPU channel using the shader</param> + /// <param name="poolState">GPU channel state to verify shader compatibility</param> + /// <param name="computeState">GPU channel compute state to verify shader compatibility</param> + /// <param name="cpShader">Cached compute shader</param> + /// <param name="gpuVa">GPU virtual address of the shader code in memory</param> + /// <returns>True if the code is different, false otherwise</returns> + private static bool IsShaderEqual( + GpuChannel channel, + GpuChannelPoolState poolState, + GpuChannelComputeState computeState, + CachedShaderProgram cpShader, + ulong gpuVa) + { + if (IsShaderEqual(channel.MemoryManager, cpShader.Shaders[0], gpuVa)) + { + return cpShader.SpecializationState.MatchesCompute(channel, ref poolState, computeState, true); + } + + return false; + } + + /// <summary> + /// Checks if graphics shader code from all stages in memory are equal to the cached shaders. + /// </summary> + /// <param name="channel">GPU channel using the shader</param> + /// <param name="poolState">GPU channel state to verify shader compatibility</param> + /// <param name="graphicsState">GPU channel graphics state to verify shader compatibility</param> + /// <param name="gpShaders">Cached graphics shaders</param> + /// <param name="addresses">GPU virtual addresses of all enabled shader stages</param> + /// <returns>True if the code is different, false otherwise</returns> + private static bool IsShaderEqual( + GpuChannel channel, + ref GpuChannelPoolState poolState, + ref GpuChannelGraphicsState graphicsState, + CachedShaderProgram gpShaders, + ShaderAddresses addresses) + { + ReadOnlySpan<ulong> addressesSpan = addresses.AsSpan(); + + for (int stageIndex = 0; stageIndex < gpShaders.Shaders.Length; stageIndex++) + { + CachedShaderStage shader = gpShaders.Shaders[stageIndex]; + + ulong gpuVa = addressesSpan[stageIndex]; + + if (!IsShaderEqual(channel.MemoryManager, shader, gpuVa)) + { + return false; + } + } + + bool usesDrawParameters = gpShaders.Shaders[1]?.Info.UsesDrawParameters ?? false; + + return gpShaders.SpecializationState.MatchesGraphics(channel, ref poolState, ref graphicsState, usesDrawParameters, true); + } + + /// <summary> + /// Checks if the code of the specified cached shader is different from the code in memory. + /// </summary> + /// <param name="memoryManager">Memory manager used to access the GPU memory where the shader is located</param> + /// <param name="shader">Cached shader to compare with</param> + /// <param name="gpuVa">GPU virtual address of the binary shader code</param> + /// <returns>True if the code is different, false otherwise</returns> + private static bool IsShaderEqual(MemoryManager memoryManager, CachedShaderStage shader, ulong gpuVa) + { + if (shader == null) + { + return true; + } + + ReadOnlySpan<byte> memoryCode = memoryManager.GetSpan(gpuVa, shader.Code.Length); + + return memoryCode.SequenceEqual(shader.Code); + } + + /// <summary> + /// Decode the binary Maxwell shader code to a translator context. + /// </summary> + /// <param name="gpuAccessor">GPU state accessor</param> + /// <param name="api">Graphics API that will be used with the shader</param> + /// <param name="gpuVa">GPU virtual address of the binary shader code</param> + /// <returns>The generated translator context</returns> + public static TranslatorContext DecodeComputeShader(IGpuAccessor gpuAccessor, TargetApi api, ulong gpuVa) + { + var options = CreateTranslationOptions(api, DefaultFlags | TranslationFlags.Compute); + return Translator.CreateContext(gpuVa, gpuAccessor, options); + } + + /// <summary> + /// Decode the binary Maxwell shader code to a translator context. + /// </summary> + /// <remarks> + /// This will combine the "Vertex A" and "Vertex B" shader stages, if specified, into one shader. + /// </remarks> + /// <param name="gpuAccessor">GPU state accessor</param> + /// <param name="api">Graphics API that will be used with the shader</param> + /// <param name="flags">Flags that controls shader translation</param> + /// <param name="gpuVa">GPU virtual address of the shader code</param> + /// <returns>The generated translator context</returns> + public static TranslatorContext DecodeGraphicsShader(IGpuAccessor gpuAccessor, TargetApi api, TranslationFlags flags, ulong gpuVa) + { + var options = CreateTranslationOptions(api, flags); + return Translator.CreateContext(gpuVa, gpuAccessor, options); + } + + /// <summary> + /// Translates a previously generated translator context to something that the host API accepts. + /// </summary> + /// <param name="dumper">Optional shader code dumper</param> + /// <param name="channel">GPU channel using the shader</param> + /// <param name="currentStage">Translator context of the stage to be translated</param> + /// <param name="vertexA">Optional translator context of the shader that should be combined</param> + /// <param name="codeA">Optional Maxwell binary code of the Vertex A shader, if present</param> + /// <param name="codeB">Optional Maxwell binary code of the Vertex B or current stage shader, if present on cache</param> + /// <returns>Compiled graphics shader code</returns> + private static TranslatedShaderVertexPair TranslateShader( + ShaderDumper dumper, + GpuChannel channel, + TranslatorContext currentStage, + TranslatorContext vertexA, + byte[] codeA, + byte[] codeB) + { + ulong cb1DataAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(0, 1); + + var memoryManager = channel.MemoryManager; + + codeA ??= memoryManager.GetSpan(vertexA.Address, vertexA.Size).ToArray(); + codeB ??= memoryManager.GetSpan(currentStage.Address, currentStage.Size).ToArray(); + byte[] cb1DataA = memoryManager.Physical.GetSpan(cb1DataAddress, vertexA.Cb1DataSize).ToArray(); + byte[] cb1DataB = memoryManager.Physical.GetSpan(cb1DataAddress, currentStage.Cb1DataSize).ToArray(); + + ShaderDumpPaths pathsA = default; + ShaderDumpPaths pathsB = default; + + if (dumper != null) + { + pathsA = dumper.Dump(codeA, compute: false); + pathsB = dumper.Dump(codeB, compute: false); + } + + ShaderProgram program = currentStage.Translate(vertexA); + + pathsB.Prepend(program); + pathsA.Prepend(program); + + CachedShaderStage vertexAStage = new CachedShaderStage(null, codeA, cb1DataA); + CachedShaderStage vertexBStage = new CachedShaderStage(program.Info, codeB, cb1DataB); + + return new TranslatedShaderVertexPair(vertexAStage, vertexBStage, program); + } + + /// <summary> + /// Translates a previously generated translator context to something that the host API accepts. + /// </summary> + /// <param name="dumper">Optional shader code dumper</param> + /// <param name="channel">GPU channel using the shader</param> + /// <param name="context">Translator context of the stage to be translated</param> + /// <param name="code">Optional Maxwell binary code of the current stage shader, if present on cache</param> + /// <returns>Compiled graphics shader code</returns> + private static TranslatedShader TranslateShader(ShaderDumper dumper, GpuChannel channel, TranslatorContext context, byte[] code) + { + var memoryManager = channel.MemoryManager; + + ulong cb1DataAddress = context.Stage == ShaderStage.Compute + ? channel.BufferManager.GetComputeUniformBufferAddress(1) + : channel.BufferManager.GetGraphicsUniformBufferAddress(StageToStageIndex(context.Stage), 1); + + byte[] cb1Data = memoryManager.Physical.GetSpan(cb1DataAddress, context.Cb1DataSize).ToArray(); + code ??= memoryManager.GetSpan(context.Address, context.Size).ToArray(); + + ShaderDumpPaths paths = dumper?.Dump(code, context.Stage == ShaderStage.Compute) ?? default; + ShaderProgram program = context.Translate(); + + paths.Prepend(program); + + return new TranslatedShader(new CachedShaderStage(program.Info, code, cb1Data), program); + } + + /// <summary> + /// Gets the index of a stage from a <see cref="ShaderStage"/>. + /// </summary> + /// <param name="stage">Stage to get the index from</param> + /// <returns>Stage index</returns> + private static int StageToStageIndex(ShaderStage stage) + { + return stage switch + { + ShaderStage.TessellationControl => 1, + ShaderStage.TessellationEvaluation => 2, + ShaderStage.Geometry => 3, + ShaderStage.Fragment => 4, + _ => 0 + }; + } + + /// <summary> + /// Gets information about the bindings used by a shader program. + /// </summary> + /// <param name="info">Shader program information to get the information from</param> + /// <returns>Shader bindings</returns> + public static ShaderBindings GetBindings(ShaderProgramInfo info) + { + var uniformBufferBindings = info.CBuffers.Select(x => x.Binding).ToArray(); + var storageBufferBindings = info.SBuffers.Select(x => x.Binding).ToArray(); + var textureBindings = info.Textures.Select(x => x.Binding).ToArray(); + var imageBindings = info.Images.Select(x => x.Binding).ToArray(); + + return new ShaderBindings( + uniformBufferBindings, + storageBufferBindings, + textureBindings, + imageBindings); + } + + /// <summary> + /// Creates shader translation options with the requested graphics API and flags. + /// The shader language is choosen based on the current configuration and graphics API. + /// </summary> + /// <param name="api">Target graphics API</param> + /// <param name="flags">Translation flags</param> + /// <returns>Translation options</returns> + private static TranslationOptions CreateTranslationOptions(TargetApi api, TranslationFlags flags) + { + TargetLanguage lang = GraphicsConfig.EnableSpirvCompilationOnVulkan && api == TargetApi.Vulkan + ? TargetLanguage.Spirv + : TargetLanguage.Glsl; + + return new TranslationOptions(lang, api, flags); + } + + /// <summary> + /// Disposes the shader cache, deleting all the cached shaders. + /// It's an error to use the shader cache after disposal. + /// </summary> + public void Dispose() + { + foreach (CachedShaderProgram program in _graphicsShaderCache.GetPrograms()) + { + program.Dispose(); + } + + foreach (CachedShaderProgram program in _computeShaderCache.GetPrograms()) + { + program.Dispose(); + } + + _cacheWriter?.Dispose(); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderCacheHashTable.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCacheHashTable.cs new file mode 100644 index 00000000..e35c06b1 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCacheHashTable.cs @@ -0,0 +1,282 @@ +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Gpu.Shader.HashTable; +using Ryujinx.Graphics.Shader; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// <summary> + /// Holds already cached code for a guest shader. + /// </summary> + struct CachedGraphicsGuestCode + { + public byte[] VertexACode; + public byte[] VertexBCode; + public byte[] TessControlCode; + public byte[] TessEvaluationCode; + public byte[] GeometryCode; + public byte[] FragmentCode; + + /// <summary> + /// Gets the guest code of a shader stage by its index. + /// </summary> + /// <param name="stageIndex">Index of the shader stage</param> + /// <returns>Guest code, or null if not present</returns> + public byte[] GetByIndex(int stageIndex) + { + return stageIndex switch + { + 1 => TessControlCode, + 2 => TessEvaluationCode, + 3 => GeometryCode, + 4 => FragmentCode, + _ => VertexBCode + }; + } + } + + /// <summary> + /// Graphics shader cache hash table. + /// </summary> + class ShaderCacheHashTable + { + /// <summary> + /// Shader ID cache. + /// </summary> + private struct IdCache + { + private PartitionedHashTable<int> _cache; + private int _id; + + /// <summary> + /// Initializes the state. + /// </summary> + public void Initialize() + { + _cache = new PartitionedHashTable<int>(); + _id = 0; + } + + /// <summary> + /// Adds guest code to the cache. + /// </summary> + /// <remarks> + /// If the code was already cached, it will just return the existing ID. + /// </remarks> + /// <param name="code">Code to add</param> + /// <returns>Unique ID for the guest code</returns> + public int Add(byte[] code) + { + int id = ++_id; + int cachedId = _cache.GetOrAdd(code, id); + if (cachedId != id) + { + --_id; + } + + return cachedId; + } + + /// <summary> + /// Tries to find cached guest code. + /// </summary> + /// <param name="dataAccessor">Code accessor used to read guest code to find a match on the hash table</param> + /// <param name="id">ID of the guest code, if found</param> + /// <param name="data">Cached guest code, if found</param> + /// <returns>True if found, false otherwise</returns> + public bool TryFind(IDataAccessor dataAccessor, out int id, out byte[] data) + { + return _cache.TryFindItem(dataAccessor, out id, out data); + } + } + + /// <summary> + /// Guest code IDs of the guest shaders that when combined forms a single host program. + /// </summary> + private struct IdTable : IEquatable<IdTable> + { + public int VertexAId; + public int VertexBId; + public int TessControlId; + public int TessEvaluationId; + public int GeometryId; + public int FragmentId; + + public override bool Equals(object obj) + { + return obj is IdTable other && Equals(other); + } + + public bool Equals(IdTable other) + { + return other.VertexAId == VertexAId && + other.VertexBId == VertexBId && + other.TessControlId == TessControlId && + other.TessEvaluationId == TessEvaluationId && + other.GeometryId == GeometryId && + other.FragmentId == FragmentId; + } + + public override int GetHashCode() + { + return HashCode.Combine(VertexAId, VertexBId, TessControlId, TessEvaluationId, GeometryId, FragmentId); + } + } + + private IdCache _vertexACache; + private IdCache _vertexBCache; + private IdCache _tessControlCache; + private IdCache _tessEvaluationCache; + private IdCache _geometryCache; + private IdCache _fragmentCache; + + private readonly Dictionary<IdTable, ShaderSpecializationList> _shaderPrograms; + + /// <summary> + /// Creates a new graphics shader cache hash table. + /// </summary> + public ShaderCacheHashTable() + { + _vertexACache.Initialize(); + _vertexBCache.Initialize(); + _tessControlCache.Initialize(); + _tessEvaluationCache.Initialize(); + _geometryCache.Initialize(); + _fragmentCache.Initialize(); + + _shaderPrograms = new Dictionary<IdTable, ShaderSpecializationList>(); + } + + /// <summary> + /// Adds a program to the cache. + /// </summary> + /// <param name="program">Program to be added</param> + public void Add(CachedShaderProgram program) + { + IdTable idTable = new IdTable(); + + foreach (var shader in program.Shaders) + { + if (shader == null) + { + continue; + } + + if (shader.Info != null) + { + switch (shader.Info.Stage) + { + case ShaderStage.Vertex: + idTable.VertexBId = _vertexBCache.Add(shader.Code); + break; + case ShaderStage.TessellationControl: + idTable.TessControlId = _tessControlCache.Add(shader.Code); + break; + case ShaderStage.TessellationEvaluation: + idTable.TessEvaluationId = _tessEvaluationCache.Add(shader.Code); + break; + case ShaderStage.Geometry: + idTable.GeometryId = _geometryCache.Add(shader.Code); + break; + case ShaderStage.Fragment: + idTable.FragmentId = _fragmentCache.Add(shader.Code); + break; + } + } + else + { + idTable.VertexAId = _vertexACache.Add(shader.Code); + } + } + + if (!_shaderPrograms.TryGetValue(idTable, out ShaderSpecializationList specList)) + { + specList = new ShaderSpecializationList(); + _shaderPrograms.Add(idTable, specList); + } + + specList.Add(program); + } + + /// <summary> + /// Tries to find a cached program. + /// </summary> + /// <remarks> + /// Even if false is returned, <paramref name="guestCode"/> might still contain cached guest code. + /// This can be used to avoid additional allocations for guest code that was already cached. + /// </remarks> + /// <param name="channel">GPU channel</param> + /// <param name="poolState">Texture pool state</param> + /// <param name="graphicsState">Graphics state</param> + /// <param name="addresses">Guest addresses of the shaders to find</param> + /// <param name="program">Cached host program for the given state, if found</param> + /// <param name="guestCode">Cached guest code, if any found</param> + /// <returns>True if a cached host program was found, false otherwise</returns> + public bool TryFind( + GpuChannel channel, + ref GpuChannelPoolState poolState, + ref GpuChannelGraphicsState graphicsState, + ShaderAddresses addresses, + out CachedShaderProgram program, + out CachedGraphicsGuestCode guestCode) + { + var memoryManager = channel.MemoryManager; + IdTable idTable = new IdTable(); + guestCode = new CachedGraphicsGuestCode(); + + program = null; + + bool found = TryGetId(_vertexACache, memoryManager, addresses.VertexA, out idTable.VertexAId, out guestCode.VertexACode); + found &= TryGetId(_vertexBCache, memoryManager, addresses.VertexB, out idTable.VertexBId, out guestCode.VertexBCode); + found &= TryGetId(_tessControlCache, memoryManager, addresses.TessControl, out idTable.TessControlId, out guestCode.TessControlCode); + found &= TryGetId(_tessEvaluationCache, memoryManager, addresses.TessEvaluation, out idTable.TessEvaluationId, out guestCode.TessEvaluationCode); + found &= TryGetId(_geometryCache, memoryManager, addresses.Geometry, out idTable.GeometryId, out guestCode.GeometryCode); + found &= TryGetId(_fragmentCache, memoryManager, addresses.Fragment, out idTable.FragmentId, out guestCode.FragmentCode); + + if (found && _shaderPrograms.TryGetValue(idTable, out ShaderSpecializationList specList)) + { + return specList.TryFindForGraphics(channel, ref poolState, ref graphicsState, out program); + } + + return false; + } + + /// <summary> + /// Tries to get the ID of a single cached shader stage. + /// </summary> + /// <param name="idCache">ID cache of the stage</param> + /// <param name="memoryManager">GPU memory manager</param> + /// <param name="baseAddress">Base address of the shader</param> + /// <param name="id">ID, if found</param> + /// <param name="data">Cached guest code, if found</param> + /// <returns>True if a cached shader is found, false otherwise</returns> + private static bool TryGetId(IdCache idCache, MemoryManager memoryManager, ulong baseAddress, out int id, out byte[] data) + { + if (baseAddress == 0) + { + id = 0; + data = null; + return true; + } + + ShaderCodeAccessor codeAccessor = new ShaderCodeAccessor(memoryManager, baseAddress); + return idCache.TryFind(codeAccessor, out id, out data); + } + + /// <summary> + /// Gets all programs that have been added to the table. + /// </summary> + /// <returns>Programs added to the table</returns> + public IEnumerable<CachedShaderProgram> GetPrograms() + { + foreach (var specList in _shaderPrograms.Values) + { + foreach (var program in specList) + { + yield return program; + } + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderCacheState.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCacheState.cs new file mode 100644 index 00000000..623b73d7 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCacheState.cs @@ -0,0 +1,13 @@ +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// <summary>Shader cache loading states</summary> + public enum ShaderCacheState + { + /// <summary>Shader cache started loading</summary> + Start, + /// <summary>Shader cache is loading</summary> + Loading, + /// <summary>Shader cache finished loading</summary> + Loaded + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderCodeAccessor.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCodeAccessor.cs new file mode 100644 index 00000000..e896493c --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCodeAccessor.cs @@ -0,0 +1,32 @@ +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Gpu.Shader.HashTable; +using System; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// <summary> + /// Shader code accessor. + /// </summary> + readonly struct ShaderCodeAccessor : IDataAccessor + { + private readonly MemoryManager _memoryManager; + private readonly ulong _baseAddress; + + /// <summary> + /// Creates a new shader code accessor. + /// </summary> + /// <param name="memoryManager">Memory manager used to access the shader code</param> + /// <param name="baseAddress">Base address of the shader in memory</param> + public ShaderCodeAccessor(MemoryManager memoryManager, ulong baseAddress) + { + _memoryManager = memoryManager; + _baseAddress = baseAddress; + } + + /// <inheritdoc/> + public ReadOnlySpan<byte> GetSpan(int offset, int length) + { + return _memoryManager.GetSpanMapped(_baseAddress + (ulong)offset, length); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderDumpPaths.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderDumpPaths.cs new file mode 100644 index 00000000..6ca7daef --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderDumpPaths.cs @@ -0,0 +1,49 @@ +using Ryujinx.Graphics.Shader; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// <summary> + /// Paths where shader code was dumped on disk. + /// </summary> + readonly struct ShaderDumpPaths + { + /// <summary> + /// Path where the full shader code with header was dumped, or null if not dumped. + /// </summary> + public string FullPath { get; } + + /// <summary> + /// Path where the shader code without header was dumped, or null if not dumped. + /// </summary> + public string CodePath { get; } + + /// <summary> + /// True if the shader was dumped, false otherwise. + /// </summary> + public bool HasPath => FullPath != null && CodePath != null; + + /// <summary> + /// Creates a new shader dumps path structure. + /// </summary> + /// <param name="fullPath">Path where the full shader code with header was dumped, or null if not dumped</param> + /// <param name="codePath">Path where the shader code without header was dumped, or null if not dumped</param> + public ShaderDumpPaths(string fullPath, string codePath) + { + FullPath = fullPath; + CodePath = codePath; + } + + /// <summary> + /// Prepends the shader paths on the program source, as a comment. + /// </summary> + /// <param name="program">Program to prepend into</param> + public void Prepend(ShaderProgram program) + { + if (HasPath) + { + program.Prepend("// " + CodePath); + program.Prepend("// " + FullPath); + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderDumper.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderDumper.cs new file mode 100644 index 00000000..93eeb8d7 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderDumper.cs @@ -0,0 +1,129 @@ +using System.IO; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// <summary> + /// Shader dumper, writes binary shader code to disk. + /// </summary> + class ShaderDumper + { + private string _runtimeDir; + private string _dumpPath; + + /// <summary> + /// Current index of the shader dump binary file. + /// This is incremented after each save, in order to give unique names to the files. + /// </summary> + public int CurrentDumpIndex { get; private set; } + + /// <summary> + /// Creates a new instance of the shader dumper. + /// </summary> + public ShaderDumper() + { + CurrentDumpIndex = 1; + } + + /// <summary> + /// Dumps shader code to disk. + /// </summary> + /// <param name="code">Code to be dumped</param> + /// <param name="compute">True for compute shader code, false for graphics shader code</param> + /// <returns>Paths where the shader code was dumped</returns> + public ShaderDumpPaths Dump(byte[] code, bool compute) + { + _dumpPath = GraphicsConfig.ShadersDumpPath; + + if (string.IsNullOrWhiteSpace(_dumpPath)) + { + return default; + } + + string fileName = "Shader" + CurrentDumpIndex.ToString("d4") + ".bin"; + + string fullPath = Path.Combine(FullDir(), fileName); + string codePath = Path.Combine(CodeDir(), fileName); + + CurrentDumpIndex++; + + using MemoryStream stream = new MemoryStream(code); + BinaryReader codeReader = new BinaryReader(stream); + + using FileStream fullFile = File.Create(fullPath); + using FileStream codeFile = File.Create(codePath); + BinaryWriter fullWriter = new BinaryWriter(fullFile); + BinaryWriter codeWriter = new BinaryWriter(codeFile); + + int headerSize = compute ? 0 : 0x50; + + fullWriter.Write(codeReader.ReadBytes(headerSize)); + + byte[] temp = codeReader.ReadBytes(code.Length - headerSize); + + fullWriter.Write(temp); + codeWriter.Write(temp); + + // Align to meet nvdisasm requirements. + while (codeFile.Length % 0x20 != 0) + { + codeWriter.Write(0); + } + + return new ShaderDumpPaths(fullPath, codePath); + } + + /// <summary> + /// Returns the output directory for shader code with header. + /// </summary> + /// <returns>Directory path</returns> + private string FullDir() + { + return CreateAndReturn(Path.Combine(DumpDir(), "Full")); + } + + /// <summary> + /// Returns the output directory for shader code without header. + /// </summary> + /// <returns>Directory path</returns> + private string CodeDir() + { + return CreateAndReturn(Path.Combine(DumpDir(), "Code")); + } + + /// <summary> + /// Returns the full output directory for the current shader dump. + /// </summary> + /// <returns>Directory path</returns> + private string DumpDir() + { + if (string.IsNullOrEmpty(_runtimeDir)) + { + int index = 1; + + do + { + _runtimeDir = Path.Combine(_dumpPath, "Dumps" + index.ToString("d2")); + + index++; + } + while (Directory.Exists(_runtimeDir)); + + Directory.CreateDirectory(_runtimeDir); + } + + return _runtimeDir; + } + + /// <summary> + /// Creates a new specified directory if needed. + /// </summary> + /// <param name="dir">The directory to create</param> + /// <returns>The same directory passed to the method</returns> + private static string CreateAndReturn(string dir) + { + Directory.CreateDirectory(dir); + + return dir; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs new file mode 100644 index 00000000..7d61332e --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs @@ -0,0 +1,84 @@ +using System.Collections; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// <summary> + /// List of cached shader programs that differs only by specialization state. + /// </summary> + class ShaderSpecializationList : IEnumerable<CachedShaderProgram> + { + private readonly List<CachedShaderProgram> _entries = new List<CachedShaderProgram>(); + + /// <summary> + /// Adds a program to the list. + /// </summary> + /// <param name="program">Program to be added</param> + public void Add(CachedShaderProgram program) + { + _entries.Add(program); + } + + /// <summary> + /// Tries to find an existing 3D program on the cache. + /// </summary> + /// <param name="channel">GPU channel</param> + /// <param name="poolState">Texture pool state</param> + /// <param name="graphicsState">Graphics state</param> + /// <param name="program">Cached program, if found</param> + /// <returns>True if a compatible program is found, false otherwise</returns> + public bool TryFindForGraphics( + GpuChannel channel, + ref GpuChannelPoolState poolState, + ref GpuChannelGraphicsState graphicsState, + out CachedShaderProgram program) + { + foreach (var entry in _entries) + { + bool usesDrawParameters = entry.Shaders[1]?.Info.UsesDrawParameters ?? false; + + if (entry.SpecializationState.MatchesGraphics(channel, ref poolState, ref graphicsState, usesDrawParameters, true)) + { + program = entry; + return true; + } + } + + program = default; + return false; + } + + /// <summary> + /// Tries to find an existing compute program on the cache. + /// </summary> + /// <param name="channel">GPU channel</param> + /// <param name="poolState">Texture pool state</param> + /// <param name="computeState">Compute state</param> + /// <param name="program">Cached program, if found</param> + /// <returns>True if a compatible program is found, false otherwise</returns> + public bool TryFindForCompute(GpuChannel channel, GpuChannelPoolState poolState, GpuChannelComputeState computeState, out CachedShaderProgram program) + { + foreach (var entry in _entries) + { + if (entry.SpecializationState.MatchesCompute(channel, ref poolState, computeState, true)) + { + program = entry; + return true; + } + } + + program = default; + return false; + } + + public IEnumerator<CachedShaderProgram> GetEnumerator() + { + return _entries.GetEnumerator(); + } + + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs new file mode 100644 index 00000000..b2c4fccd --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs @@ -0,0 +1,874 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Image; +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Gpu.Shader.DiskCache; +using Ryujinx.Graphics.Shader; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + class ShaderSpecializationState + { + private const uint ComsMagic = (byte)'C' | ((byte)'O' << 8) | ((byte)'M' << 16) | ((byte)'S' << 24); + private const uint GfxsMagic = (byte)'G' | ((byte)'F' << 8) | ((byte)'X' << 16) | ((byte)'S' << 24); + private const uint TfbdMagic = (byte)'T' | ((byte)'F' << 8) | ((byte)'B' << 16) | ((byte)'D' << 24); + private const uint TexkMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'K' << 24); + private const uint TexsMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'S' << 24); + private const uint PgpsMagic = (byte)'P' | ((byte)'G' << 8) | ((byte)'P' << 16) | ((byte)'S' << 24); + + /// <summary> + /// Flags indicating GPU state that is used by the shader. + /// </summary> + [Flags] + private enum QueriedStateFlags + { + EarlyZForce = 1 << 0, + PrimitiveTopology = 1 << 1, + TessellationMode = 1 << 2, + TransformFeedback = 1 << 3 + } + + private QueriedStateFlags _queriedState; + private bool _compute; + private byte _constantBufferUsePerStage; + + /// <summary> + /// Compute engine state. + /// </summary> + public GpuChannelComputeState ComputeState; + + /// <summary> + /// 3D engine state. + /// </summary> + public GpuChannelGraphicsState GraphicsState; + + /// <summary> + /// Contant buffers bound at the time the shader was compiled, per stage. + /// </summary> + public Array5<uint> ConstantBufferUse; + + /// <summary> + /// Pipeline state captured at the time of shader use. + /// </summary> + public ProgramPipelineState? PipelineState; + + /// <summary> + /// Transform feedback buffers active at the time the shader was compiled. + /// </summary> + public TransformFeedbackDescriptor[] TransformFeedbackDescriptors; + + /// <summary> + /// Flags indicating texture state that is used by the shader. + /// </summary> + [Flags] + private enum QueriedTextureStateFlags + { + TextureFormat = 1 << 0, + SamplerType = 1 << 1, + CoordNormalized = 1 << 2 + } + + /// <summary> + /// Reference type wrapping a value. + /// </summary> + private class Box<T> + { + /// <summary> + /// Wrapped value. + /// </summary> + public T Value; + } + + /// <summary> + /// State of a texture or image that is accessed by the shader. + /// </summary> + private struct TextureSpecializationState + { + // New fields should be added to the end of the struct to keep disk shader cache compatibility. + + /// <summary> + /// Flags indicating which state of the texture the shader depends on. + /// </summary> + public QueriedTextureStateFlags QueriedFlags; + + /// <summary> + /// Encoded texture format value. + /// </summary> + public uint Format; + + /// <summary> + /// True if the texture format is sRGB, false otherwise. + /// </summary> + public bool FormatSrgb; + + /// <summary> + /// Texture target. + /// </summary> + public TextureTarget TextureTarget; + + /// <summary> + /// Indicates if the coordinates used to sample the texture are normalized or not (0.0..1.0 or 0..Width/Height). + /// </summary> + public bool CoordNormalized; + } + + /// <summary> + /// Texture binding information, used to identify each texture accessed by the shader. + /// </summary> + private readonly record struct TextureKey + { + // New fields should be added to the end of the struct to keep disk shader cache compatibility. + + /// <summary> + /// Shader stage where the texture is used. + /// </summary> + public readonly int StageIndex; + + /// <summary> + /// Texture handle offset in words on the texture buffer. + /// </summary> + public readonly int Handle; + + /// <summary> + /// Constant buffer slot of the texture buffer (-1 to use the texture buffer index GPU register). + /// </summary> + public readonly int CbufSlot; + + /// <summary> + /// Creates a new texture key. + /// </summary> + /// <param name="stageIndex">Shader stage where the texture is used</param> + /// <param name="handle">Texture handle offset in words on the texture buffer</param> + /// <param name="cbufSlot">Constant buffer slot of the texture buffer (-1 to use the texture buffer index GPU register)</param> + public TextureKey(int stageIndex, int handle, int cbufSlot) + { + StageIndex = stageIndex; + Handle = handle; + CbufSlot = cbufSlot; + } + } + + private readonly Dictionary<TextureKey, Box<TextureSpecializationState>> _textureSpecialization; + private KeyValuePair<TextureKey, Box<TextureSpecializationState>>[] _allTextures; + private Box<TextureSpecializationState>[][] _textureByBinding; + private Box<TextureSpecializationState>[][] _imageByBinding; + + /// <summary> + /// Creates a new instance of the shader specialization state. + /// </summary> + private ShaderSpecializationState() + { + _textureSpecialization = new Dictionary<TextureKey, Box<TextureSpecializationState>>(); + } + + /// <summary> + /// Creates a new instance of the shader specialization state. + /// </summary> + /// <param name="state">Current compute engine state</param> + public ShaderSpecializationState(ref GpuChannelComputeState state) : this() + { + ComputeState = state; + _compute = true; + } + + /// <summary> + /// Creates a new instance of the shader specialization state. + /// </summary> + /// <param name="state">Current 3D engine state</param> + /// <param name="descriptors">Optional transform feedback buffers in use, if any</param> + private ShaderSpecializationState(ref GpuChannelGraphicsState state, TransformFeedbackDescriptor[] descriptors) : this() + { + GraphicsState = state; + _compute = false; + + if (descriptors != null) + { + TransformFeedbackDescriptors = descriptors; + _queriedState |= QueriedStateFlags.TransformFeedback; + } + } + + /// <summary> + /// Prepare the shader specialization state for quick binding lookups. + /// </summary> + /// <param name="stages">The shader stages</param> + public void Prepare(CachedShaderStage[] stages) + { + _allTextures = _textureSpecialization.ToArray(); + + _textureByBinding = new Box<TextureSpecializationState>[stages.Length][]; + _imageByBinding = new Box<TextureSpecializationState>[stages.Length][]; + + for (int i = 0; i < stages.Length; i++) + { + CachedShaderStage stage = stages[i]; + if (stage?.Info != null) + { + var textures = stage.Info.Textures; + var images = stage.Info.Images; + + var texBindings = new Box<TextureSpecializationState>[textures.Count]; + var imageBindings = new Box<TextureSpecializationState>[images.Count]; + + int stageIndex = Math.Max(i - 1, 0); // Don't count VertexA for looking up spec state. No-Op for compute. + + for (int j = 0; j < textures.Count; j++) + { + var texture = textures[j]; + texBindings[j] = GetTextureSpecState(stageIndex, texture.HandleIndex, texture.CbufSlot); + } + + for (int j = 0; j < images.Count; j++) + { + var image = images[j]; + imageBindings[j] = GetTextureSpecState(stageIndex, image.HandleIndex, image.CbufSlot); + } + + _textureByBinding[i] = texBindings; + _imageByBinding[i] = imageBindings; + } + } + } + + /// <summary> + /// Creates a new instance of the shader specialization state. + /// </summary> + /// <param name="state">Current 3D engine state</param> + /// <param name="pipelineState">Current program pipeline state</param> + /// <param name="descriptors">Optional transform feedback buffers in use, if any</param> + public ShaderSpecializationState( + ref GpuChannelGraphicsState state, + ref ProgramPipelineState pipelineState, + TransformFeedbackDescriptor[] descriptors) : this(ref state, descriptors) + { + PipelineState = pipelineState; + } + + /// <summary> + /// Creates a new instance of the shader specialization state. + /// </summary> + /// <param name="state">Current 3D engine state</param> + /// <param name="pipelineState">Current program pipeline state</param> + /// <param name="descriptors">Optional transform feedback buffers in use, if any</param> + public ShaderSpecializationState( + ref GpuChannelGraphicsState state, + ProgramPipelineState? pipelineState, + TransformFeedbackDescriptor[] descriptors) : this(ref state, descriptors) + { + PipelineState = pipelineState; + } + + /// <summary> + /// Indicates that the shader accesses the early Z force state. + /// </summary> + public void RecordEarlyZForce() + { + _queriedState |= QueriedStateFlags.EarlyZForce; + } + + /// <summary> + /// Indicates that the shader accesses the primitive topology state. + /// </summary> + public void RecordPrimitiveTopology() + { + _queriedState |= QueriedStateFlags.PrimitiveTopology; + } + + /// <summary> + /// Indicates that the shader accesses the tessellation mode state. + /// </summary> + public void RecordTessellationMode() + { + _queriedState |= QueriedStateFlags.TessellationMode; + } + + /// <summary> + /// Indicates that the shader accesses the constant buffer use state. + /// </summary> + /// <param name="stageIndex">Shader stage index</param> + /// <param name="useMask">Mask indicating the constant buffers bound at the time of the shader compilation</param> + public void RecordConstantBufferUse(int stageIndex, uint useMask) + { + ConstantBufferUse[stageIndex] = useMask; + _constantBufferUsePerStage |= (byte)(1 << stageIndex); + } + + /// <summary> + /// Indicates that a given texture is accessed by the shader. + /// </summary> + /// <param name="stageIndex">Shader stage where the texture is used</param> + /// <param name="handle">Offset in words of the texture handle on the texture buffer</param> + /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param> + /// <param name="descriptor">Descriptor of the texture</param> + public void RegisterTexture(int stageIndex, int handle, int cbufSlot, Image.TextureDescriptor descriptor) + { + Box<TextureSpecializationState> state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot); + state.Value.Format = descriptor.UnpackFormat(); + state.Value.FormatSrgb = descriptor.UnpackSrgb(); + state.Value.TextureTarget = descriptor.UnpackTextureTarget(); + state.Value.CoordNormalized = descriptor.UnpackTextureCoordNormalized(); + } + + /// <summary> + /// Indicates that a given texture is accessed by the shader. + /// </summary> + /// <param name="stageIndex">Shader stage where the texture is used</param> + /// <param name="handle">Offset in words of the texture handle on the texture buffer</param> + /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param> + /// <param name="format">Maxwell texture format value</param> + /// <param name="formatSrgb">Whenever the texture format is a sRGB format</param> + /// <param name="target">Texture target type</param> + /// <param name="coordNormalized">Whenever the texture coordinates used on the shader are considered normalized</param> + public void RegisterTexture( + int stageIndex, + int handle, + int cbufSlot, + uint format, + bool formatSrgb, + TextureTarget target, + bool coordNormalized) + { + Box<TextureSpecializationState> state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot); + state.Value.Format = format; + state.Value.FormatSrgb = formatSrgb; + state.Value.TextureTarget = target; + state.Value.CoordNormalized = coordNormalized; + } + + /// <summary> + /// Indicates that the format of a given texture was used during the shader translation process. + /// </summary> + /// <param name="stageIndex">Shader stage where the texture is used</param> + /// <param name="handle">Offset in words of the texture handle on the texture buffer</param> + /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param> + public void RecordTextureFormat(int stageIndex, int handle, int cbufSlot) + { + Box<TextureSpecializationState> state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot); + state.Value.QueriedFlags |= QueriedTextureStateFlags.TextureFormat; + } + + /// <summary> + /// Indicates that the target of a given texture was used during the shader translation process. + /// </summary> + /// <param name="stageIndex">Shader stage where the texture is used</param> + /// <param name="handle">Offset in words of the texture handle on the texture buffer</param> + /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param> + public void RecordTextureSamplerType(int stageIndex, int handle, int cbufSlot) + { + Box<TextureSpecializationState> state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot); + state.Value.QueriedFlags |= QueriedTextureStateFlags.SamplerType; + } + + /// <summary> + /// Indicates that the coordinate normalization state of a given texture was used during the shader translation process. + /// </summary> + /// <param name="stageIndex">Shader stage where the texture is used</param> + /// <param name="handle">Offset in words of the texture handle on the texture buffer</param> + /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param> + public void RecordTextureCoordNormalized(int stageIndex, int handle, int cbufSlot) + { + Box<TextureSpecializationState> state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot); + state.Value.QueriedFlags |= QueriedTextureStateFlags.CoordNormalized; + } + + /// <summary> + /// Checks if primitive topology was queried by the shader. + /// </summary> + /// <returns>True if queried, false otherwise</returns> + public bool IsPrimitiveTopologyQueried() + { + return _queriedState.HasFlag(QueriedStateFlags.PrimitiveTopology); + } + + /// <summary> + /// Checks if a given texture was registerd on this specialization state. + /// </summary> + /// <param name="stageIndex">Shader stage where the texture is used</param> + /// <param name="handle">Offset in words of the texture handle on the texture buffer</param> + /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param> + public bool TextureRegistered(int stageIndex, int handle, int cbufSlot) + { + return GetTextureSpecState(stageIndex, handle, cbufSlot) != null; + } + + /// <summary> + /// Gets the recorded format of a given texture. + /// </summary> + /// <param name="stageIndex">Shader stage where the texture is used</param> + /// <param name="handle">Offset in words of the texture handle on the texture buffer</param> + /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param> + public (uint, bool) GetFormat(int stageIndex, int handle, int cbufSlot) + { + TextureSpecializationState state = GetTextureSpecState(stageIndex, handle, cbufSlot).Value; + return (state.Format, state.FormatSrgb); + } + + /// <summary> + /// Gets the recorded target of a given texture. + /// </summary> + /// <param name="stageIndex">Shader stage where the texture is used</param> + /// <param name="handle">Offset in words of the texture handle on the texture buffer</param> + /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param> + public TextureTarget GetTextureTarget(int stageIndex, int handle, int cbufSlot) + { + return GetTextureSpecState(stageIndex, handle, cbufSlot).Value.TextureTarget; + } + + /// <summary> + /// Gets the recorded coordinate normalization state of a given texture. + /// </summary> + /// <param name="stageIndex">Shader stage where the texture is used</param> + /// <param name="handle">Offset in words of the texture handle on the texture buffer</param> + /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param> + public bool GetCoordNormalized(int stageIndex, int handle, int cbufSlot) + { + return GetTextureSpecState(stageIndex, handle, cbufSlot).Value.CoordNormalized; + } + + /// <summary> + /// Gets texture specialization state for a given texture, or create a new one if not present. + /// </summary> + /// <param name="stageIndex">Shader stage where the texture is used</param> + /// <param name="handle">Offset in words of the texture handle on the texture buffer</param> + /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param> + /// <returns>Texture specialization state</returns> + private Box<TextureSpecializationState> GetOrCreateTextureSpecState(int stageIndex, int handle, int cbufSlot) + { + TextureKey key = new TextureKey(stageIndex, handle, cbufSlot); + + if (!_textureSpecialization.TryGetValue(key, out Box<TextureSpecializationState> state)) + { + _textureSpecialization.Add(key, state = new Box<TextureSpecializationState>()); + } + + return state; + } + + /// <summary> + /// Gets texture specialization state for a given texture. + /// </summary> + /// <param name="stageIndex">Shader stage where the texture is used</param> + /// <param name="handle">Offset in words of the texture handle on the texture buffer</param> + /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param> + /// <returns>Texture specialization state</returns> + private Box<TextureSpecializationState> GetTextureSpecState(int stageIndex, int handle, int cbufSlot) + { + TextureKey key = new TextureKey(stageIndex, handle, cbufSlot); + + if (_textureSpecialization.TryGetValue(key, out Box<TextureSpecializationState> state)) + { + return state; + } + + return null; + } + + /// <summary> + /// Checks if the recorded state matches the current GPU 3D engine state. + /// </summary> + /// <param name="channel">GPU channel</param> + /// <param name="poolState">Texture pool state</param> + /// <param name="graphicsState">Graphics state</param> + /// <param name="usesDrawParameters">Indicates whether the vertex shader accesses draw parameters</param> + /// <param name="checkTextures">Indicates whether texture descriptors should be checked</param> + /// <returns>True if the state matches, false otherwise</returns> + public bool MatchesGraphics( + GpuChannel channel, + ref GpuChannelPoolState poolState, + ref GpuChannelGraphicsState graphicsState, + bool usesDrawParameters, + bool checkTextures) + { + if (graphicsState.ViewportTransformDisable != GraphicsState.ViewportTransformDisable) + { + return false; + } + + bool thisA2cDitherEnable = GraphicsState.AlphaToCoverageEnable && GraphicsState.AlphaToCoverageDitherEnable; + bool otherA2cDitherEnable = graphicsState.AlphaToCoverageEnable && graphicsState.AlphaToCoverageDitherEnable; + + if (otherA2cDitherEnable != thisA2cDitherEnable) + { + return false; + } + + if (graphicsState.DepthMode != GraphicsState.DepthMode) + { + return false; + } + + if (graphicsState.AlphaTestEnable != GraphicsState.AlphaTestEnable) + { + return false; + } + + if (graphicsState.AlphaTestEnable && + (graphicsState.AlphaTestCompare != GraphicsState.AlphaTestCompare || + graphicsState.AlphaTestReference != GraphicsState.AlphaTestReference)) + { + return false; + } + + if (!graphicsState.AttributeTypes.AsSpan().SequenceEqual(GraphicsState.AttributeTypes.AsSpan())) + { + return false; + } + + if (usesDrawParameters && graphicsState.HasConstantBufferDrawParameters != GraphicsState.HasConstantBufferDrawParameters) + { + return false; + } + + if (graphicsState.HasUnalignedStorageBuffer != GraphicsState.HasUnalignedStorageBuffer) + { + return false; + } + + if (channel.Capabilities.NeedsFragmentOutputSpecialization && !graphicsState.FragmentOutputTypes.AsSpan().SequenceEqual(GraphicsState.FragmentOutputTypes.AsSpan())) + { + return false; + } + + if (graphicsState.DualSourceBlendEnable != GraphicsState.DualSourceBlendEnable) + { + return false; + } + + return Matches(channel, ref poolState, checkTextures, isCompute: false); + } + + /// <summary> + /// Checks if the recorded state matches the current GPU compute engine state. + /// </summary> + /// <param name="channel">GPU channel</param> + /// <param name="poolState">Texture pool state</param> + /// <param name="computeState">Compute state</param> + /// <param name="checkTextures">Indicates whether texture descriptors should be checked</param> + /// <returns>True if the state matches, false otherwise</returns> + public bool MatchesCompute(GpuChannel channel, ref GpuChannelPoolState poolState, GpuChannelComputeState computeState, bool checkTextures) + { + if (computeState.HasUnalignedStorageBuffer != ComputeState.HasUnalignedStorageBuffer) + { + return false; + } + + return Matches(channel, ref poolState, checkTextures, isCompute: true); + } + + /// <summary> + /// Fetch the constant buffers used for a texture to cache. + /// </summary> + /// <param name="channel">GPU channel</param> + /// <param name="isCompute">Indicates whenever the check is requested by the 3D or compute engine</param> + /// <param name="cachedTextureBufferIndex">The currently cached texture buffer index</param> + /// <param name="cachedSamplerBufferIndex">The currently cached sampler buffer index</param> + /// <param name="cachedTextureBuffer">The currently cached texture buffer data</param> + /// <param name="cachedSamplerBuffer">The currently cached sampler buffer data</param> + /// <param name="cachedStageIndex">The currently cached stage</param> + /// <param name="textureBufferIndex">The new texture buffer index</param> + /// <param name="samplerBufferIndex">The new sampler buffer index</param> + /// <param name="stageIndex">Stage index of the constant buffer</param> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void UpdateCachedBuffer( + GpuChannel channel, + bool isCompute, + scoped ref int cachedTextureBufferIndex, + scoped ref int cachedSamplerBufferIndex, + scoped ref ReadOnlySpan<int> cachedTextureBuffer, + scoped ref ReadOnlySpan<int> cachedSamplerBuffer, + scoped ref int cachedStageIndex, + int textureBufferIndex, + int samplerBufferIndex, + int stageIndex) + { + bool stageChange = stageIndex != cachedStageIndex; + + if (stageChange || textureBufferIndex != cachedTextureBufferIndex) + { + ref BufferBounds bounds = ref channel.BufferManager.GetUniformBufferBounds(isCompute, stageIndex, textureBufferIndex); + + cachedTextureBuffer = MemoryMarshal.Cast<byte, int>(channel.MemoryManager.Physical.GetSpan(bounds.Address, (int)bounds.Size)); + cachedTextureBufferIndex = textureBufferIndex; + + if (samplerBufferIndex == textureBufferIndex) + { + cachedSamplerBuffer = cachedTextureBuffer; + cachedSamplerBufferIndex = samplerBufferIndex; + } + } + + if (stageChange || samplerBufferIndex != cachedSamplerBufferIndex) + { + ref BufferBounds bounds = ref channel.BufferManager.GetUniformBufferBounds(isCompute, stageIndex, samplerBufferIndex); + + cachedSamplerBuffer = MemoryMarshal.Cast<byte, int>(channel.MemoryManager.Physical.GetSpan(bounds.Address, (int)bounds.Size)); + cachedSamplerBufferIndex = samplerBufferIndex; + } + + cachedStageIndex = stageIndex; + } + + /// <summary> + /// Checks if the recorded state matches the current GPU state. + /// </summary> + /// <param name="channel">GPU channel</param> + /// <param name="poolState">Texture pool state</param> + /// <param name="checkTextures">Indicates whether texture descriptors should be checked</param> + /// <param name="isCompute">Indicates whenever the check is requested by the 3D or compute engine</param> + /// <returns>True if the state matches, false otherwise</returns> + private bool Matches(GpuChannel channel, ref GpuChannelPoolState poolState, bool checkTextures, bool isCompute) + { + int constantBufferUsePerStageMask = _constantBufferUsePerStage; + + while (constantBufferUsePerStageMask != 0) + { + int index = BitOperations.TrailingZeroCount(constantBufferUsePerStageMask); + + uint useMask = isCompute + ? channel.BufferManager.GetComputeUniformBufferUseMask() + : channel.BufferManager.GetGraphicsUniformBufferUseMask(index); + + if (ConstantBufferUse[index] != useMask) + { + return false; + } + + constantBufferUsePerStageMask &= ~(1 << index); + } + + if (checkTextures) + { + TexturePool pool = channel.TextureManager.GetTexturePool(poolState.TexturePoolGpuVa, poolState.TexturePoolMaximumId); + + int cachedTextureBufferIndex = -1; + int cachedSamplerBufferIndex = -1; + int cachedStageIndex = -1; + ReadOnlySpan<int> cachedTextureBuffer = Span<int>.Empty; + ReadOnlySpan<int> cachedSamplerBuffer = Span<int>.Empty; + + foreach (var kv in _allTextures) + { + TextureKey textureKey = kv.Key; + + (int textureBufferIndex, int samplerBufferIndex) = TextureHandle.UnpackSlots(textureKey.CbufSlot, poolState.TextureBufferIndex); + + UpdateCachedBuffer(channel, + isCompute, + ref cachedTextureBufferIndex, + ref cachedSamplerBufferIndex, + ref cachedTextureBuffer, + ref cachedSamplerBuffer, + ref cachedStageIndex, + textureBufferIndex, + samplerBufferIndex, + textureKey.StageIndex); + + int packedId = TextureHandle.ReadPackedId(textureKey.Handle, cachedTextureBuffer, cachedSamplerBuffer); + int textureId = TextureHandle.UnpackTextureId(packedId); + + if (pool.IsValidId(textureId)) + { + ref readonly Image.TextureDescriptor descriptor = ref pool.GetDescriptorRef(textureId); + + if (!MatchesTexture(kv.Value, descriptor)) + { + return false; + } + } + } + } + + return true; + } + + /// <summary> + /// Checks if the recorded texture state matches the given texture descriptor. + /// </summary> + /// <param name="specializationState">Texture specialization state</param> + /// <param name="descriptor">Texture descriptor</param> + /// <returns>True if the state matches, false otherwise</returns> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool MatchesTexture(Box<TextureSpecializationState> specializationState, in Image.TextureDescriptor descriptor) + { + if (specializationState != null) + { + if (specializationState.Value.QueriedFlags.HasFlag(QueriedTextureStateFlags.CoordNormalized) && + specializationState.Value.CoordNormalized != descriptor.UnpackTextureCoordNormalized()) + { + return false; + } + } + + return true; + } + + /// <summary> + /// Checks if the recorded texture state for a given texture binding matches a texture descriptor. + /// </summary> + /// <param name="stage">The shader stage</param> + /// <param name="index">The texture index</param> + /// <param name="descriptor">Texture descriptor</param> + /// <returns>True if the state matches, false otherwise</returns> + public bool MatchesTexture(ShaderStage stage, int index, in Image.TextureDescriptor descriptor) + { + Box<TextureSpecializationState> specializationState = _textureByBinding[(int)stage][index]; + + return MatchesTexture(specializationState, descriptor); + } + + /// <summary> + /// Checks if the recorded texture state for a given image binding matches a texture descriptor. + /// </summary> + /// <param name="stage">The shader stage</param> + /// <param name="index">The texture index</param> + /// <param name="descriptor">Texture descriptor</param> + /// <returns>True if the state matches, false otherwise</returns> + public bool MatchesImage(ShaderStage stage, int index, in Image.TextureDescriptor descriptor) + { + Box<TextureSpecializationState> specializationState = _imageByBinding[(int)stage][index]; + + return MatchesTexture(specializationState, descriptor); + } + + /// <summary> + /// Reads shader specialization state that has been serialized. + /// </summary> + /// <param name="dataReader">Data reader</param> + /// <returns>Shader specialization state</returns> + public static ShaderSpecializationState Read(ref BinarySerializer dataReader) + { + ShaderSpecializationState specState = new ShaderSpecializationState(); + + dataReader.Read(ref specState._queriedState); + dataReader.Read(ref specState._compute); + + if (specState._compute) + { + dataReader.ReadWithMagicAndSize(ref specState.ComputeState, ComsMagic); + } + else + { + dataReader.ReadWithMagicAndSize(ref specState.GraphicsState, GfxsMagic); + } + + dataReader.Read(ref specState._constantBufferUsePerStage); + + int constantBufferUsePerStageMask = specState._constantBufferUsePerStage; + + while (constantBufferUsePerStageMask != 0) + { + int index = BitOperations.TrailingZeroCount(constantBufferUsePerStageMask); + dataReader.Read(ref specState.ConstantBufferUse[index]); + constantBufferUsePerStageMask &= ~(1 << index); + } + + bool hasPipelineState = false; + + dataReader.Read(ref hasPipelineState); + + if (hasPipelineState) + { + ProgramPipelineState pipelineState = default; + dataReader.ReadWithMagicAndSize(ref pipelineState, PgpsMagic); + specState.PipelineState = pipelineState; + } + + if (specState._queriedState.HasFlag(QueriedStateFlags.TransformFeedback)) + { + ushort tfCount = 0; + dataReader.Read(ref tfCount); + specState.TransformFeedbackDescriptors = new TransformFeedbackDescriptor[tfCount]; + + for (int index = 0; index < tfCount; index++) + { + dataReader.ReadWithMagicAndSize(ref specState.TransformFeedbackDescriptors[index], TfbdMagic); + } + } + + ushort count = 0; + dataReader.Read(ref count); + + for (int index = 0; index < count; index++) + { + TextureKey textureKey = default; + Box<TextureSpecializationState> textureState = new Box<TextureSpecializationState>(); + + dataReader.ReadWithMagicAndSize(ref textureKey, TexkMagic); + dataReader.ReadWithMagicAndSize(ref textureState.Value, TexsMagic); + + specState._textureSpecialization[textureKey] = textureState; + } + + return specState; + } + + /// <summary> + /// Serializes the shader specialization state. + /// </summary> + /// <param name="dataWriter">Data writer</param> + public void Write(ref BinarySerializer dataWriter) + { + dataWriter.Write(ref _queriedState); + dataWriter.Write(ref _compute); + + if (_compute) + { + dataWriter.WriteWithMagicAndSize(ref ComputeState, ComsMagic); + } + else + { + dataWriter.WriteWithMagicAndSize(ref GraphicsState, GfxsMagic); + } + + dataWriter.Write(ref _constantBufferUsePerStage); + + int constantBufferUsePerStageMask = _constantBufferUsePerStage; + + while (constantBufferUsePerStageMask != 0) + { + int index = BitOperations.TrailingZeroCount(constantBufferUsePerStageMask); + dataWriter.Write(ref ConstantBufferUse[index]); + constantBufferUsePerStageMask &= ~(1 << index); + } + + bool hasPipelineState = PipelineState.HasValue; + + dataWriter.Write(ref hasPipelineState); + + if (hasPipelineState) + { + ProgramPipelineState pipelineState = PipelineState.Value; + dataWriter.WriteWithMagicAndSize(ref pipelineState, PgpsMagic); + } + + if (_queriedState.HasFlag(QueriedStateFlags.TransformFeedback)) + { + ushort tfCount = (ushort)TransformFeedbackDescriptors.Length; + dataWriter.Write(ref tfCount); + + for (int index = 0; index < TransformFeedbackDescriptors.Length; index++) + { + dataWriter.WriteWithMagicAndSize(ref TransformFeedbackDescriptors[index], TfbdMagic); + } + } + + ushort count = (ushort)_textureSpecialization.Count; + dataWriter.Write(ref count); + + foreach (var kv in _textureSpecialization) + { + var textureKey = kv.Key; + var textureState = kv.Value; + + dataWriter.WriteWithMagicAndSize(ref textureKey, TexkMagic); + dataWriter.WriteWithMagicAndSize(ref textureState.Value, TexsMagic); + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Shader/TransformFeedbackDescriptor.cs b/src/Ryujinx.Graphics.Gpu/Shader/TransformFeedbackDescriptor.cs new file mode 100644 index 00000000..5baf2a1a --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/TransformFeedbackDescriptor.cs @@ -0,0 +1,58 @@ +using Ryujinx.Common.Memory; +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// <summary> + /// Transform feedback descriptor. + /// </summary> + struct TransformFeedbackDescriptor + { + // New fields should be added to the end of the struct to keep disk shader cache compatibility. + + /// <summary> + /// Index of the transform feedback. + /// </summary> + public readonly int BufferIndex; + + /// <summary> + /// Amount of bytes consumed per vertex. + /// </summary> + public readonly int Stride; + + /// <summary> + /// Number of varyings written into the buffer. + /// </summary> + public readonly int VaryingCount; + + /// <summary> + /// Location of varyings to be written into the buffer. Each byte is one location. + /// </summary> + public Array32<uint> VaryingLocations; // Making this readonly breaks AsSpan + + /// <summary> + /// Creates a new transform feedback descriptor. + /// </summary> + /// <param name="bufferIndex">Index of the transform feedback</param> + /// <param name="stride">Amount of bytes consumed per vertex</param> + /// <param name="varyingCount">Number of varyings written into the buffer. Indicates size in bytes of <paramref name="varyingLocations"/></param> + /// <param name="varyingLocations">Location of varyings to be written into the buffer. Each byte is one location</param> + public TransformFeedbackDescriptor(int bufferIndex, int stride, int varyingCount, ref Array32<uint> varyingLocations) + { + BufferIndex = bufferIndex; + Stride = stride; + VaryingCount = varyingCount; + VaryingLocations = varyingLocations; + } + + /// <summary> + /// Gets a span of the <see cref="VaryingLocations"/>. + /// </summary> + /// <returns>Span of varying locations</returns> + public ReadOnlySpan<byte> AsSpan() + { + return MemoryMarshal.Cast<uint, byte>(VaryingLocations.AsSpan()).Slice(0, Math.Min(128, VaryingCount)); + } + } +} |
