aboutsummaryrefslogtreecommitdiff
path: root/src/Ryujinx.Graphics.Gpu/Shader
diff options
context:
space:
mode:
Diffstat (limited to 'src/Ryujinx.Graphics.Gpu/Shader')
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/CachedShaderBindings.cs103
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs56
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/CachedShaderStage.cs38
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/ComputeShaderCacheHashTable.cs70
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs138
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/DiskCache/BinarySerializer.cs216
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/DiskCache/CompressionAlgorithm.cs18
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheCommon.cs57
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs266
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs459
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs839
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadException.cs48
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadResult.cs72
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheOutputStreams.cs57
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/DiskCache/GuestCodeAndCbData.cs29
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs725
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ShaderBinarySerializer.cs66
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs297
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs238
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorState.cs61
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs65
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs158
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/GpuChannelPoolState.cs50
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/HashTable/HashState.cs113
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/HashTable/IDataAccessor.cs27
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionHashTable.cs451
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionedHashTable.cs244
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/HashTable/SmartDataAccessor.cs96
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/ResourceCounts.cs36
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/ShaderAddresses.cs64
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs774
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/ShaderCacheHashTable.cs282
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/ShaderCacheState.cs13
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/ShaderCodeAccessor.cs32
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/ShaderDumpPaths.cs49
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/ShaderDumper.cs129
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs84
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs874
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/TransformFeedbackDescriptor.cs58
39 files changed, 7452 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderBindings.cs b/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderBindings.cs
new file mode 100644
index 00000000..1734f08a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderBindings.cs
@@ -0,0 +1,103 @@
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Engine;
+using Ryujinx.Graphics.Gpu.Image;
+using Ryujinx.Graphics.Shader;
+using System;
+using System.Linq;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ /// <summary>
+ /// A collection of shader bindings ready for insertion into the buffer and texture managers.
+ /// </summary>
+ internal class CachedShaderBindings
+ {
+ public TextureBindingInfo[][] TextureBindings { get; }
+ public TextureBindingInfo[][] ImageBindings { get; }
+ public BufferDescriptor[][] ConstantBufferBindings { get; }
+ public BufferDescriptor[][] StorageBufferBindings { get; }
+
+ public int MaxTextureBinding { get; }
+ public int MaxImageBinding { get; }
+
+ /// <summary>
+ /// Create a new cached shader bindings collection.
+ /// </summary>
+ /// <param name="isCompute">Whether the shader is for compute</param>
+ /// <param name="stages">The stages used by the shader</param>
+ public CachedShaderBindings(bool isCompute, CachedShaderStage[] stages)
+ {
+ int stageCount = isCompute ? 1 : Constants.ShaderStages;
+
+ TextureBindings = new TextureBindingInfo[stageCount][];
+ ImageBindings = new TextureBindingInfo[stageCount][];
+ ConstantBufferBindings = new BufferDescriptor[stageCount][];
+ StorageBufferBindings = new BufferDescriptor[stageCount][];
+
+ int maxTextureBinding = -1;
+ int maxImageBinding = -1;
+ int offset = isCompute ? 0 : 1;
+
+ for (int i = 0; i < stageCount; i++)
+ {
+ CachedShaderStage stage = stages[i + offset];
+
+ if (stage == null)
+ {
+ TextureBindings[i] = Array.Empty<TextureBindingInfo>();
+ ImageBindings[i] = Array.Empty<TextureBindingInfo>();
+ ConstantBufferBindings[i] = Array.Empty<BufferDescriptor>();
+ StorageBufferBindings[i] = Array.Empty<BufferDescriptor>();
+
+ continue;
+ }
+
+ TextureBindings[i] = stage.Info.Textures.Select(descriptor =>
+ {
+ Target target = ShaderTexture.GetTarget(descriptor.Type);
+
+ var result = new TextureBindingInfo(
+ target,
+ descriptor.Binding,
+ descriptor.CbufSlot,
+ descriptor.HandleIndex,
+ descriptor.Flags);
+
+ if (descriptor.Binding > maxTextureBinding)
+ {
+ maxTextureBinding = descriptor.Binding;
+ }
+
+ return result;
+ }).ToArray();
+
+ ImageBindings[i] = stage.Info.Images.Select(descriptor =>
+ {
+ Target target = ShaderTexture.GetTarget(descriptor.Type);
+ Format format = ShaderTexture.GetFormat(descriptor.Format);
+
+ var result = new TextureBindingInfo(
+ target,
+ format,
+ descriptor.Binding,
+ descriptor.CbufSlot,
+ descriptor.HandleIndex,
+ descriptor.Flags);
+
+ if (descriptor.Binding > maxImageBinding)
+ {
+ maxImageBinding = descriptor.Binding;
+ }
+
+ return result;
+ }).ToArray();
+
+ ConstantBufferBindings[i] = stage.Info.CBuffers.ToArray();
+ StorageBufferBindings[i] = stage.Info.SBuffers.ToArray();
+ }
+
+ MaxTextureBinding = maxTextureBinding;
+ MaxImageBinding = maxImageBinding;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs b/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs
new file mode 100644
index 00000000..ff9c39a1
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs
@@ -0,0 +1,56 @@
+using Ryujinx.Graphics.GAL;
+using System;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ /// <summary>
+ /// Represents a program composed of one or more shader stages (for graphics shaders),
+ /// or a single shader (for compute shaders).
+ /// </summary>
+ class CachedShaderProgram : IDisposable
+ {
+ /// <summary>
+ /// Host shader program object.
+ /// </summary>
+ public IProgram HostProgram { get; }
+
+ /// <summary>
+ /// GPU state used to create this version of the shader.
+ /// </summary>
+ public ShaderSpecializationState SpecializationState { get; }
+
+ /// <summary>
+ /// Compiled shader for each shader stage.
+ /// </summary>
+ public CachedShaderStage[] Shaders { get; }
+
+ /// <summary>
+ /// Cached shader bindings, ready for placing into the bindings manager.
+ /// </summary>
+ public CachedShaderBindings Bindings { get; }
+
+ /// <summary>
+ /// Creates a new instance of the shader bundle.
+ /// </summary>
+ /// <param name="hostProgram">Host program with all the shader stages</param>
+ /// <param name="specializationState">GPU state used to create this version of the shader</param>
+ /// <param name="shaders">Shaders</param>
+ public CachedShaderProgram(IProgram hostProgram, ShaderSpecializationState specializationState, params CachedShaderStage[] shaders)
+ {
+ HostProgram = hostProgram;
+ SpecializationState = specializationState;
+ Shaders = shaders;
+
+ SpecializationState.Prepare(shaders);
+ Bindings = new CachedShaderBindings(shaders.Length == 1, shaders);
+ }
+
+ /// <summary>
+ /// Dispose of the host shader resources.
+ /// </summary>
+ public void Dispose()
+ {
+ HostProgram.Dispose();
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderStage.cs b/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderStage.cs
new file mode 100644
index 00000000..22b08dd5
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderStage.cs
@@ -0,0 +1,38 @@
+using Ryujinx.Graphics.Shader;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ /// <summary>
+ /// Cached shader code for a single shader stage.
+ /// </summary>
+ class CachedShaderStage
+ {
+ /// <summary>
+ /// Shader program information.
+ /// </summary>
+ public ShaderProgramInfo Info { get; }
+
+ /// <summary>
+ /// Maxwell binary shader code.
+ /// </summary>
+ public byte[] Code { get; }
+
+ /// <summary>
+ /// Constant buffer 1 data accessed by the shader.
+ /// </summary>
+ public byte[] Cb1Data { get; }
+
+ /// <summary>
+ /// Creates a new instance of the shader code holder.
+ /// </summary>
+ /// <param name="info">Shader program information</param>
+ /// <param name="code">Maxwell binary shader code</param>
+ /// <param name="cb1Data">Constant buffer 1 data accessed by the shader</param>
+ public CachedShaderStage(ShaderProgramInfo info, byte[] code, byte[] cb1Data)
+ {
+ Info = info;
+ Code = code;
+ Cb1Data = cb1Data;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ComputeShaderCacheHashTable.cs b/src/Ryujinx.Graphics.Gpu/Shader/ComputeShaderCacheHashTable.cs
new file mode 100644
index 00000000..a6718211
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/ComputeShaderCacheHashTable.cs
@@ -0,0 +1,70 @@
+using Ryujinx.Graphics.Gpu.Shader.HashTable;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ /// <summary>
+ /// Compute shader cache hash table.
+ /// </summary>
+ class ComputeShaderCacheHashTable
+ {
+ private readonly PartitionedHashTable<ShaderSpecializationList> _cache;
+ private readonly List<CachedShaderProgram> _shaderPrograms;
+
+ /// <summary>
+ /// Creates a new compute shader cache hash table.
+ /// </summary>
+ public ComputeShaderCacheHashTable()
+ {
+ _cache = new PartitionedHashTable<ShaderSpecializationList>();
+ _shaderPrograms = new List<CachedShaderProgram>();
+ }
+
+ /// <summary>
+ /// Adds a program to the cache.
+ /// </summary>
+ /// <param name="program">Program to be added</param>
+ public void Add(CachedShaderProgram program)
+ {
+ var specList = _cache.GetOrAdd(program.Shaders[0].Code, new ShaderSpecializationList());
+ specList.Add(program);
+ _shaderPrograms.Add(program);
+ }
+
+ /// <summary>
+ /// Tries to find a cached program.
+ /// </summary>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="poolState">Texture pool state</param>
+ /// <param name="computeState">Compute state</param>
+ /// <param name="gpuVa">GPU virtual address of the compute shader</param>
+ /// <param name="program">Cached host program for the given state, if found</param>
+ /// <param name="cachedGuestCode">Cached guest code, if any found</param>
+ /// <returns>True if a cached host program was found, false otherwise</returns>
+ public bool TryFind(
+ GpuChannel channel,
+ GpuChannelPoolState poolState,
+ GpuChannelComputeState computeState,
+ ulong gpuVa,
+ out CachedShaderProgram program,
+ out byte[] cachedGuestCode)
+ {
+ program = null;
+ ShaderCodeAccessor codeAccessor = new ShaderCodeAccessor(channel.MemoryManager, gpuVa);
+ bool hasSpecList = _cache.TryFindItem(codeAccessor, out var specList, out cachedGuestCode);
+ return hasSpecList && specList.TryFindForCompute(channel, poolState, computeState, out program);
+ }
+
+ /// <summary>
+ /// Gets all programs that have been added to the table.
+ /// </summary>
+ /// <returns>Programs added to the table</returns>
+ public IEnumerable<CachedShaderProgram> GetPrograms()
+ {
+ foreach (var program in _shaderPrograms)
+ {
+ yield return program;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs
new file mode 100644
index 00000000..568fe968
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs
@@ -0,0 +1,138 @@
+using Ryujinx.Common;
+using Ryujinx.Common.Logging;
+using System;
+using System.IO;
+
+namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
+{
+ /// <summary>
+ /// Represents a background disk cache writer.
+ /// </summary>
+ class BackgroundDiskCacheWriter : IDisposable
+ {
+ /// <summary>
+ /// Possible operation to do on the <see cref="_fileWriterWorkerQueue"/>.
+ /// </summary>
+ private enum CacheFileOperation
+ {
+ /// <summary>
+ /// Operation to add a shader to the cache.
+ /// </summary>
+ AddShader
+ }
+
+ /// <summary>
+ /// Represents an operation to perform on the <see cref="_fileWriterWorkerQueue"/>.
+ /// </summary>
+ private readonly struct CacheFileOperationTask
+ {
+ /// <summary>
+ /// The type of operation to perform.
+ /// </summary>
+ public readonly CacheFileOperation Type;
+
+ /// <summary>
+ /// The data associated to this operation or null.
+ /// </summary>
+ public readonly object Data;
+
+ public CacheFileOperationTask(CacheFileOperation type, object data)
+ {
+ Type = type;
+ Data = data;
+ }
+ }
+
+ /// <summary>
+ /// Background shader cache write information.
+ /// </summary>
+ private readonly struct AddShaderData
+ {
+ /// <summary>
+ /// Cached shader program.
+ /// </summary>
+ public readonly CachedShaderProgram Program;
+
+ /// <summary>
+ /// Binary host code.
+ /// </summary>
+ public readonly byte[] HostCode;
+
+ /// <summary>
+ /// Creates a new background shader cache write information.
+ /// </summary>
+ /// <param name="program">Cached shader program</param>
+ /// <param name="hostCode">Binary host code</param>
+ public AddShaderData(CachedShaderProgram program, byte[] hostCode)
+ {
+ Program = program;
+ HostCode = hostCode;
+ }
+ }
+
+ private readonly GpuContext _context;
+ private readonly DiskCacheHostStorage _hostStorage;
+ private readonly AsyncWorkQueue<CacheFileOperationTask> _fileWriterWorkerQueue;
+
+ /// <summary>
+ /// Creates a new background disk cache writer.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="hostStorage">Disk cache host storage</param>
+ public BackgroundDiskCacheWriter(GpuContext context, DiskCacheHostStorage hostStorage)
+ {
+ _context = context;
+ _hostStorage = hostStorage;
+ _fileWriterWorkerQueue = new AsyncWorkQueue<CacheFileOperationTask>(ProcessTask, "GPU.BackgroundDiskCacheWriter");
+ }
+
+ /// <summary>
+ /// Processes a shader cache background operation.
+ /// </summary>
+ /// <param name="task">Task to process</param>
+ private void ProcessTask(CacheFileOperationTask task)
+ {
+ switch (task.Type)
+ {
+ case CacheFileOperation.AddShader:
+ AddShaderData data = (AddShaderData)task.Data;
+ try
+ {
+ _hostStorage.AddShader(_context, data.Program, data.HostCode);
+ }
+ catch (DiskCacheLoadException diskCacheLoadException)
+ {
+ Logger.Error?.Print(LogClass.Gpu, $"Error writing shader to disk cache. {diskCacheLoadException.Message}");
+ }
+ catch (IOException ioException)
+ {
+ Logger.Error?.Print(LogClass.Gpu, $"Error writing shader to disk cache. {ioException.Message}");
+ }
+ break;
+ }
+ }
+
+ /// <summary>
+ /// Adds a shader program to be cached in the background.
+ /// </summary>
+ /// <param name="program">Shader program to cache</param>
+ /// <param name="hostCode">Host binary code of the program</param>
+ public void AddShader(CachedShaderProgram program, byte[] hostCode)
+ {
+ _fileWriterWorkerQueue.Add(new CacheFileOperationTask(CacheFileOperation.AddShader, new AddShaderData(program, hostCode)));
+ }
+
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected virtual void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ _fileWriterWorkerQueue.Dispose();
+ }
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/BinarySerializer.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/BinarySerializer.cs
new file mode 100644
index 00000000..50e37033
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/BinarySerializer.cs
@@ -0,0 +1,216 @@
+using System;
+using System.IO;
+using System.IO.Compression;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
+{
+ /// <summary>
+ /// Binary data serializer.
+ /// </summary>
+ struct BinarySerializer
+ {
+ private readonly Stream _stream;
+ private Stream _activeStream;
+
+ /// <summary>
+ /// Creates a new binary serializer.
+ /// </summary>
+ /// <param name="stream">Stream to read from or write into</param>
+ public BinarySerializer(Stream stream)
+ {
+ _stream = stream;
+ _activeStream = stream;
+ }
+
+ /// <summary>
+ /// Reads data from the stream.
+ /// </summary>
+ /// <typeparam name="T">Type of the data</typeparam>
+ /// <param name="data">Data read</param>
+ public void Read<T>(ref T data) where T : unmanaged
+ {
+ Span<byte> buffer = MemoryMarshal.Cast<T, byte>(MemoryMarshal.CreateSpan(ref data, 1));
+ for (int offset = 0; offset < buffer.Length;)
+ {
+ offset += _activeStream.Read(buffer.Slice(offset));
+ }
+ }
+
+ /// <summary>
+ /// Tries to read data from the stream.
+ /// </summary>
+ /// <typeparam name="T">Type of the data</typeparam>
+ /// <param name="data">Data read</param>
+ /// <returns>True if the read was successful, false otherwise</returns>
+ public bool TryRead<T>(ref T data) where T : unmanaged
+ {
+ // Length is unknown on compressed streams.
+ if (_activeStream == _stream)
+ {
+ int size = Unsafe.SizeOf<T>();
+ if (_activeStream.Length - _activeStream.Position < size)
+ {
+ return false;
+ }
+ }
+
+ Read(ref data);
+ return true;
+ }
+
+ /// <summary>
+ /// Reads data prefixed with a magic and size from the stream.
+ /// </summary>
+ /// <typeparam name="T">Type of the data</typeparam>
+ /// <param name="data">Data read</param>
+ /// <param name="magic">Expected magic value, for validation</param>
+ public void ReadWithMagicAndSize<T>(ref T data, uint magic) where T : unmanaged
+ {
+ uint actualMagic = 0;
+ int size = 0;
+ Read(ref actualMagic);
+ Read(ref size);
+
+ if (actualMagic != magic)
+ {
+ throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedInvalidMagic);
+ }
+
+ // Structs are expected to expand but not shrink between versions.
+ if (size > Unsafe.SizeOf<T>())
+ {
+ throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedInvalidLength);
+ }
+
+ Span<byte> buffer = MemoryMarshal.Cast<T, byte>(MemoryMarshal.CreateSpan(ref data, 1)).Slice(0, size);
+ for (int offset = 0; offset < buffer.Length;)
+ {
+ offset += _activeStream.Read(buffer.Slice(offset));
+ }
+ }
+
+ /// <summary>
+ /// Writes data into the stream.
+ /// </summary>
+ /// <typeparam name="T">Type of the data</typeparam>
+ /// <param name="data">Data to be written</param>
+ public void Write<T>(ref T data) where T : unmanaged
+ {
+ Span<byte> buffer = MemoryMarshal.Cast<T, byte>(MemoryMarshal.CreateSpan(ref data, 1));
+ _activeStream.Write(buffer);
+ }
+
+ /// <summary>
+ /// Writes data prefixed with a magic and size into the stream.
+ /// </summary>
+ /// <typeparam name="T">Type of the data</typeparam>
+ /// <param name="data">Data to write</param>
+ /// <param name="magic">Magic value to write</param>
+ public void WriteWithMagicAndSize<T>(ref T data, uint magic) where T : unmanaged
+ {
+ int size = Unsafe.SizeOf<T>();
+ Write(ref magic);
+ Write(ref size);
+ Span<byte> buffer = MemoryMarshal.Cast<T, byte>(MemoryMarshal.CreateSpan(ref data, 1));
+ _activeStream.Write(buffer);
+ }
+
+ /// <summary>
+ /// Indicates that all data that will be read from the stream has been compressed.
+ /// </summary>
+ public void BeginCompression()
+ {
+ CompressionAlgorithm algorithm = CompressionAlgorithm.None;
+ Read(ref algorithm);
+
+ if (algorithm == CompressionAlgorithm.Deflate)
+ {
+ _activeStream = new DeflateStream(_stream, CompressionMode.Decompress, true);
+ }
+ }
+
+ /// <summary>
+ /// Indicates that all data that will be written into the stream should be compressed.
+ /// </summary>
+ /// <param name="algorithm">Compression algorithm that should be used</param>
+ public void BeginCompression(CompressionAlgorithm algorithm)
+ {
+ Write(ref algorithm);
+
+ if (algorithm == CompressionAlgorithm.Deflate)
+ {
+ _activeStream = new DeflateStream(_stream, CompressionLevel.SmallestSize, true);
+ }
+ }
+
+ /// <summary>
+ /// Indicates the end of a compressed chunck.
+ /// </summary>
+ /// <remarks>
+ /// Any data written after this will not be compressed unless <see cref="BeginCompression(CompressionAlgorithm)"/> is called again.
+ /// Any data read after this will be assumed to be uncompressed unless <see cref="BeginCompression"/> is called again.
+ /// </remarks>
+ public void EndCompression()
+ {
+ if (_activeStream != _stream)
+ {
+ _activeStream.Dispose();
+ _activeStream = _stream;
+ }
+ }
+
+ /// <summary>
+ /// Reads compressed data from the stream.
+ /// </summary>
+ /// <remarks>
+ /// <paramref name="data"/> must have the exact length of the uncompressed data,
+ /// otherwise decompression will fail.
+ /// </remarks>
+ /// <param name="stream">Stream to read from</param>
+ /// <param name="data">Buffer to write the uncompressed data into</param>
+ public static void ReadCompressed(Stream stream, Span<byte> data)
+ {
+ CompressionAlgorithm algorithm = (CompressionAlgorithm)stream.ReadByte();
+
+ switch (algorithm)
+ {
+ case CompressionAlgorithm.None:
+ stream.Read(data);
+ break;
+ case CompressionAlgorithm.Deflate:
+ stream = new DeflateStream(stream, CompressionMode.Decompress, true);
+ for (int offset = 0; offset < data.Length;)
+ {
+ offset += stream.Read(data.Slice(offset));
+ }
+ stream.Dispose();
+ break;
+ }
+ }
+
+ /// <summary>
+ /// Compresses and writes the compressed data into the stream.
+ /// </summary>
+ /// <param name="stream">Stream to write into</param>
+ /// <param name="data">Data to compress</param>
+ /// <param name="algorithm">Compression algorithm to be used</param>
+ public static void WriteCompressed(Stream stream, ReadOnlySpan<byte> data, CompressionAlgorithm algorithm)
+ {
+ stream.WriteByte((byte)algorithm);
+
+ switch (algorithm)
+ {
+ case CompressionAlgorithm.None:
+ stream.Write(data);
+ break;
+ case CompressionAlgorithm.Deflate:
+ stream = new DeflateStream(stream, CompressionLevel.SmallestSize, true);
+ stream.Write(data);
+ stream.Dispose();
+ break;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/CompressionAlgorithm.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/CompressionAlgorithm.cs
new file mode 100644
index 00000000..a46e1ef7
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/CompressionAlgorithm.cs
@@ -0,0 +1,18 @@
+namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
+{
+ /// <summary>
+ /// Algorithm used to compress the cache.
+ /// </summary>
+ enum CompressionAlgorithm : byte
+ {
+ /// <summary>
+ /// No compression, the data is stored as-is.
+ /// </summary>
+ None,
+
+ /// <summary>
+ /// Deflate compression (RFC 1951).
+ /// </summary>
+ Deflate
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheCommon.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheCommon.cs
new file mode 100644
index 00000000..c8a9f7ff
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheCommon.cs
@@ -0,0 +1,57 @@
+using Ryujinx.Common.Logging;
+using System.IO;
+
+namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
+{
+ /// <summary>
+ /// Common disk cache utility methods.
+ /// </summary>
+ static class DiskCacheCommon
+ {
+ /// <summary>
+ /// Opens a file for read or write.
+ /// </summary>
+ /// <param name="basePath">Base path of the file (should not include the file name)</param>
+ /// <param name="fileName">Name of the file</param>
+ /// <param name="writable">Indicates if the file will be read or written</param>
+ /// <returns>File stream</returns>
+ public static FileStream OpenFile(string basePath, string fileName, bool writable)
+ {
+ string fullPath = Path.Combine(basePath, fileName);
+
+ FileMode mode;
+ FileAccess access;
+
+ if (writable)
+ {
+ mode = FileMode.OpenOrCreate;
+ access = FileAccess.ReadWrite;
+ }
+ else
+ {
+ mode = FileMode.Open;
+ access = FileAccess.Read;
+ }
+
+ try
+ {
+ return new FileStream(fullPath, mode, access, FileShare.Read);
+ }
+ catch (IOException ioException)
+ {
+ Logger.Error?.Print(LogClass.Gpu, $"Could not access file \"{fullPath}\". {ioException.Message}");
+
+ throw new DiskCacheLoadException(DiskCacheLoadResult.NoAccess);
+ }
+ }
+
+ /// <summary>
+ /// Gets the compression algorithm that should be used when writing the disk cache.
+ /// </summary>
+ /// <returns>Compression algorithm</returns>
+ public static CompressionAlgorithm GetCompressionAlgorithm()
+ {
+ return CompressionAlgorithm.Deflate;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs
new file mode 100644
index 00000000..17639ca1
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs
@@ -0,0 +1,266 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Image;
+using Ryujinx.Graphics.Shader;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
+{
+ /// <summary>
+ /// Represents a GPU state and memory accessor.
+ /// </summary>
+ class DiskCacheGpuAccessor : GpuAccessorBase, IGpuAccessor
+ {
+ private readonly ReadOnlyMemory<byte> _data;
+ private readonly ReadOnlyMemory<byte> _cb1Data;
+ private readonly ShaderSpecializationState _oldSpecState;
+ private readonly ShaderSpecializationState _newSpecState;
+ private readonly int _stageIndex;
+ private readonly bool _isVulkan;
+ private readonly ResourceCounts _resourceCounts;
+
+ /// <summary>
+ /// Creates a new instance of the cached GPU state accessor for shader translation.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="data">The data of the shader</param>
+ /// <param name="cb1Data">The constant buffer 1 data of the shader</param>
+ /// <param name="oldSpecState">Shader specialization state of the cached shader</param>
+ /// <param name="newSpecState">Shader specialization state of the recompiled shader</param>
+ /// <param name="stageIndex">Shader stage index</param>
+ public DiskCacheGpuAccessor(
+ GpuContext context,
+ ReadOnlyMemory<byte> data,
+ ReadOnlyMemory<byte> cb1Data,
+ ShaderSpecializationState oldSpecState,
+ ShaderSpecializationState newSpecState,
+ ResourceCounts counts,
+ int stageIndex) : base(context, counts, stageIndex)
+ {
+ _data = data;
+ _cb1Data = cb1Data;
+ _oldSpecState = oldSpecState;
+ _newSpecState = newSpecState;
+ _stageIndex = stageIndex;
+ _isVulkan = context.Capabilities.Api == TargetApi.Vulkan;
+ _resourceCounts = counts;
+ }
+
+ /// <inheritdoc/>
+ public uint ConstantBuffer1Read(int offset)
+ {
+ if (offset + sizeof(uint) > _cb1Data.Length)
+ {
+ throw new DiskCacheLoadException(DiskCacheLoadResult.InvalidCb1DataLength);
+ }
+
+ return MemoryMarshal.Cast<byte, uint>(_cb1Data.Span.Slice(offset))[0];
+ }
+
+ /// <inheritdoc/>
+ public void Log(string message)
+ {
+ Logger.Warning?.Print(LogClass.Gpu, $"Shader translator: {message}");
+ }
+
+ /// <inheritdoc/>
+ public ReadOnlySpan<ulong> GetCode(ulong address, int minimumSize)
+ {
+ return MemoryMarshal.Cast<byte, ulong>(_data.Span.Slice((int)address));
+ }
+
+ /// <inheritdoc/>
+ public bool QueryAlphaToCoverageDitherEnable()
+ {
+ return _oldSpecState.GraphicsState.AlphaToCoverageEnable && _oldSpecState.GraphicsState.AlphaToCoverageDitherEnable;
+ }
+
+ /// <inheritdoc/>
+ public AlphaTestOp QueryAlphaTestCompare()
+ {
+ if (!_isVulkan || !_oldSpecState.GraphicsState.AlphaTestEnable)
+ {
+ return AlphaTestOp.Always;
+ }
+
+ return _oldSpecState.GraphicsState.AlphaTestCompare switch
+ {
+ CompareOp.Never or CompareOp.NeverGl => AlphaTestOp.Never,
+ CompareOp.Less or CompareOp.LessGl => AlphaTestOp.Less,
+ CompareOp.Equal or CompareOp.EqualGl => AlphaTestOp.Equal,
+ CompareOp.LessOrEqual or CompareOp.LessOrEqualGl => AlphaTestOp.LessOrEqual,
+ CompareOp.Greater or CompareOp.GreaterGl => AlphaTestOp.Greater,
+ CompareOp.NotEqual or CompareOp.NotEqualGl => AlphaTestOp.NotEqual,
+ CompareOp.GreaterOrEqual or CompareOp.GreaterOrEqualGl => AlphaTestOp.GreaterOrEqual,
+ _ => AlphaTestOp.Always
+ };
+ }
+
+ /// <inheritdoc/>
+ public float QueryAlphaTestReference() => _oldSpecState.GraphicsState.AlphaTestReference;
+
+ /// <inheritdoc/>
+ public AttributeType QueryAttributeType(int location)
+ {
+ return _oldSpecState.GraphicsState.AttributeTypes[location];
+ }
+
+ /// <inheritdoc/>
+ public AttributeType QueryFragmentOutputType(int location)
+ {
+ return _oldSpecState.GraphicsState.FragmentOutputTypes[location];
+ }
+
+ /// <inheritdoc/>
+ public int QueryComputeLocalSizeX() => _oldSpecState.ComputeState.LocalSizeX;
+
+ /// <inheritdoc/>
+ public int QueryComputeLocalSizeY() => _oldSpecState.ComputeState.LocalSizeY;
+
+ /// <inheritdoc/>
+ public int QueryComputeLocalSizeZ() => _oldSpecState.ComputeState.LocalSizeZ;
+
+ /// <inheritdoc/>
+ public int QueryComputeLocalMemorySize() => _oldSpecState.ComputeState.LocalMemorySize;
+
+ /// <inheritdoc/>
+ public int QueryComputeSharedMemorySize() => _oldSpecState.ComputeState.SharedMemorySize;
+
+ /// <inheritdoc/>
+ public uint QueryConstantBufferUse()
+ {
+ _newSpecState.RecordConstantBufferUse(_stageIndex, _oldSpecState.ConstantBufferUse[_stageIndex]);
+ return _oldSpecState.ConstantBufferUse[_stageIndex];
+ }
+
+ /// <inheritdoc/>
+ public bool QueryHasConstantBufferDrawParameters()
+ {
+ return _oldSpecState.GraphicsState.HasConstantBufferDrawParameters;
+ }
+
+ /// <inheritdoc/>
+ public bool QueryDualSourceBlendEnable()
+ {
+ return _oldSpecState.GraphicsState.DualSourceBlendEnable;
+ }
+
+ /// <inheritdoc/>
+ public InputTopology QueryPrimitiveTopology()
+ {
+ _newSpecState.RecordPrimitiveTopology();
+ return ConvertToInputTopology(_oldSpecState.GraphicsState.Topology, _oldSpecState.GraphicsState.TessellationMode);
+ }
+
+ /// <inheritdoc/>
+ public bool QueryProgramPointSize()
+ {
+ return _oldSpecState.GraphicsState.ProgramPointSizeEnable;
+ }
+
+ /// <inheritdoc/>
+ public float QueryPointSize()
+ {
+ return _oldSpecState.GraphicsState.PointSize;
+ }
+
+ /// <inheritdoc/>
+ public bool QueryTessCw()
+ {
+ return _oldSpecState.GraphicsState.TessellationMode.UnpackCw();
+ }
+
+ /// <inheritdoc/>
+ public TessPatchType QueryTessPatchType()
+ {
+ return _oldSpecState.GraphicsState.TessellationMode.UnpackPatchType();
+ }
+
+ /// <inheritdoc/>
+ public TessSpacing QueryTessSpacing()
+ {
+ return _oldSpecState.GraphicsState.TessellationMode.UnpackSpacing();
+ }
+
+ /// <inheritdoc/>
+ public TextureFormat QueryTextureFormat(int handle, int cbufSlot)
+ {
+ _newSpecState.RecordTextureFormat(_stageIndex, handle, cbufSlot);
+ (uint format, bool formatSrgb) = _oldSpecState.GetFormat(_stageIndex, handle, cbufSlot);
+ return ConvertToTextureFormat(format, formatSrgb);
+ }
+
+ /// <inheritdoc/>
+ public SamplerType QuerySamplerType(int handle, int cbufSlot)
+ {
+ _newSpecState.RecordTextureSamplerType(_stageIndex, handle, cbufSlot);
+ return _oldSpecState.GetTextureTarget(_stageIndex, handle, cbufSlot).ConvertSamplerType();
+ }
+
+ /// <inheritdoc/>
+ public bool QueryTextureCoordNormalized(int handle, int cbufSlot)
+ {
+ _newSpecState.RecordTextureCoordNormalized(_stageIndex, handle, cbufSlot);
+ return _oldSpecState.GetCoordNormalized(_stageIndex, handle, cbufSlot);
+ }
+
+ /// <inheritdoc/>
+ public bool QueryTransformDepthMinusOneToOne()
+ {
+ return _oldSpecState.GraphicsState.DepthMode;
+ }
+
+ /// <inheritdoc/>
+ public bool QueryTransformFeedbackEnabled()
+ {
+ return _oldSpecState.TransformFeedbackDescriptors != null;
+ }
+
+ /// <inheritdoc/>
+ public ReadOnlySpan<byte> QueryTransformFeedbackVaryingLocations(int bufferIndex)
+ {
+ return _oldSpecState.TransformFeedbackDescriptors[bufferIndex].AsSpan();
+ }
+
+ /// <inheritdoc/>
+ public int QueryTransformFeedbackStride(int bufferIndex)
+ {
+ return _oldSpecState.TransformFeedbackDescriptors[bufferIndex].Stride;
+ }
+
+ /// <inheritdoc/>
+ public bool QueryEarlyZForce()
+ {
+ _newSpecState.RecordEarlyZForce();
+ return _oldSpecState.GraphicsState.EarlyZForce;
+ }
+
+ /// <inheritdoc/>
+ public bool QueryHasUnalignedStorageBuffer()
+ {
+ return _oldSpecState.GraphicsState.HasUnalignedStorageBuffer || _oldSpecState.ComputeState.HasUnalignedStorageBuffer;
+ }
+
+ /// <inheritdoc/>
+ public bool QueryViewportTransformDisable()
+ {
+ return _oldSpecState.GraphicsState.ViewportTransformDisable;
+ }
+
+ /// <inheritdoc/>
+ public void RegisterTexture(int handle, int cbufSlot)
+ {
+ if (!_oldSpecState.TextureRegistered(_stageIndex, handle, cbufSlot))
+ {
+ throw new DiskCacheLoadException(DiskCacheLoadResult.MissingTextureDescriptor);
+ }
+
+ (uint format, bool formatSrgb) = _oldSpecState.GetFormat(_stageIndex, handle, cbufSlot);
+ TextureTarget target = _oldSpecState.GetTextureTarget(_stageIndex, handle, cbufSlot);
+ bool coordNormalized = _oldSpecState.GetCoordNormalized(_stageIndex, handle, cbufSlot);
+ _newSpecState.RegisterTexture(_stageIndex, handle, cbufSlot, format, formatSrgb, target, coordNormalized);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs
new file mode 100644
index 00000000..01034b49
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs
@@ -0,0 +1,459 @@
+using Ryujinx.Common;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
+{
+ /// <summary>
+ /// On-disk shader cache storage for guest code.
+ /// </summary>
+ class DiskCacheGuestStorage
+ {
+ private const uint TocMagic = (byte)'T' | ((byte)'O' << 8) | ((byte)'C' << 16) | ((byte)'G' << 24);
+
+ private const ushort VersionMajor = 1;
+ private const ushort VersionMinor = 1;
+ private const uint VersionPacked = ((uint)VersionMajor << 16) | VersionMinor;
+
+ private const string TocFileName = "guest.toc";
+ private const string DataFileName = "guest.data";
+
+ private readonly string _basePath;
+
+ /// <summary>
+ /// TOC (Table of contents) file header.
+ /// </summary>
+ private struct TocHeader
+ {
+ /// <summary>
+ /// Magic value, for validation and identification purposes.
+ /// </summary>
+ public uint Magic;
+
+ /// <summary>
+ /// File format version.
+ /// </summary>
+ public uint Version;
+
+ /// <summary>
+ /// Header padding.
+ /// </summary>
+ public uint Padding;
+
+ /// <summary>
+ /// Number of modifications to the file, also the shaders count.
+ /// </summary>
+ public uint ModificationsCount;
+
+ /// <summary>
+ /// Reserved space, to be used in the future. Write as zero.
+ /// </summary>
+ public ulong Reserved;
+
+ /// <summary>
+ /// Reserved space, to be used in the future. Write as zero.
+ /// </summary>
+ public ulong Reserved2;
+ }
+
+ /// <summary>
+ /// TOC (Table of contents) file entry.
+ /// </summary>
+ private struct TocEntry
+ {
+ /// <summary>
+ /// Offset of the data on the data file.
+ /// </summary>
+ public uint Offset;
+
+ /// <summary>
+ /// Code size.
+ /// </summary>
+ public uint CodeSize;
+
+ /// <summary>
+ /// Constant buffer 1 data size.
+ /// </summary>
+ public uint Cb1DataSize;
+
+ /// <summary>
+ /// Hash of the code and constant buffer data.
+ /// </summary>
+ public uint Hash;
+ }
+
+ /// <summary>
+ /// TOC (Table of contents) memory cache entry.
+ /// </summary>
+ private struct TocMemoryEntry
+ {
+ /// <summary>
+ /// Offset of the data on the data file.
+ /// </summary>
+ public uint Offset;
+
+ /// <summary>
+ /// Code size.
+ /// </summary>
+ public uint CodeSize;
+
+ /// <summary>
+ /// Constant buffer 1 data size.
+ /// </summary>
+ public uint Cb1DataSize;
+
+ /// <summary>
+ /// Index of the shader on the cache.
+ /// </summary>
+ public readonly int Index;
+
+ /// <summary>
+ /// Creates a new TOC memory entry.
+ /// </summary>
+ /// <param name="offset">Offset of the data on the data file</param>
+ /// <param name="codeSize">Code size</param>
+ /// <param name="cb1DataSize">Constant buffer 1 data size</param>
+ /// <param name="index">Index of the shader on the cache</param>
+ public TocMemoryEntry(uint offset, uint codeSize, uint cb1DataSize, int index)
+ {
+ Offset = offset;
+ CodeSize = codeSize;
+ Cb1DataSize = cb1DataSize;
+ Index = index;
+ }
+ }
+
+ private Dictionary<uint, List<TocMemoryEntry>> _toc;
+ private uint _tocModificationsCount;
+
+ private (byte[], byte[])[] _cache;
+
+ /// <summary>
+ /// Creates a new disk cache guest storage.
+ /// </summary>
+ /// <param name="basePath">Base path of the disk shader cache</param>
+ public DiskCacheGuestStorage(string basePath)
+ {
+ _basePath = basePath;
+ }
+
+ /// <summary>
+ /// Checks if the TOC (table of contents) file for the guest cache exists.
+ /// </summary>
+ /// <returns>True if the file exists, false otherwise</returns>
+ public bool TocFileExists()
+ {
+ return File.Exists(Path.Combine(_basePath, TocFileName));
+ }
+
+ /// <summary>
+ /// Checks if the data file for the guest cache exists.
+ /// </summary>
+ /// <returns>True if the file exists, false otherwise</returns>
+ public bool DataFileExists()
+ {
+ return File.Exists(Path.Combine(_basePath, DataFileName));
+ }
+
+ /// <summary>
+ /// Opens the guest cache TOC (table of contents) file.
+ /// </summary>
+ /// <returns>File stream</returns>
+ public Stream OpenTocFileStream()
+ {
+ return DiskCacheCommon.OpenFile(_basePath, TocFileName, writable: false);
+ }
+
+ /// <summary>
+ /// Opens the guest cache data file.
+ /// </summary>
+ /// <returns>File stream</returns>
+ public Stream OpenDataFileStream()
+ {
+ return DiskCacheCommon.OpenFile(_basePath, DataFileName, writable: false);
+ }
+
+ /// <summary>
+ /// Clear all content from the guest cache files.
+ /// </summary>
+ public void ClearCache()
+ {
+ using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, TocFileName, writable: true);
+ using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, DataFileName, writable: true);
+
+ tocFileStream.SetLength(0);
+ dataFileStream.SetLength(0);
+ }
+
+ /// <summary>
+ /// Loads the guest cache from file or memory cache.
+ /// </summary>
+ /// <param name="tocFileStream">Guest TOC file stream</param>
+ /// <param name="dataFileStream">Guest data file stream</param>
+ /// <param name="index">Guest shader index</param>
+ /// <returns>Guest code and constant buffer 1 data</returns>
+ public GuestCodeAndCbData LoadShader(Stream tocFileStream, Stream dataFileStream, int index)
+ {
+ if (_cache == null || index >= _cache.Length)
+ {
+ _cache = new (byte[], byte[])[Math.Max(index + 1, GetShadersCountFromLength(tocFileStream.Length))];
+ }
+
+ (byte[] guestCode, byte[] cb1Data) = _cache[index];
+
+ if (guestCode == null || cb1Data == null)
+ {
+ BinarySerializer tocReader = new BinarySerializer(tocFileStream);
+ tocFileStream.Seek(Unsafe.SizeOf<TocHeader>() + index * Unsafe.SizeOf<TocEntry>(), SeekOrigin.Begin);
+
+ TocEntry entry = new TocEntry();
+ tocReader.Read(ref entry);
+
+ guestCode = new byte[entry.CodeSize];
+ cb1Data = new byte[entry.Cb1DataSize];
+
+ if (entry.Offset >= (ulong)dataFileStream.Length)
+ {
+ throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric);
+ }
+
+ dataFileStream.Seek((long)entry.Offset, SeekOrigin.Begin);
+ dataFileStream.Read(cb1Data);
+ BinarySerializer.ReadCompressed(dataFileStream, guestCode);
+
+ _cache[index] = (guestCode, cb1Data);
+ }
+
+ return new GuestCodeAndCbData(guestCode, cb1Data);
+ }
+
+ /// <summary>
+ /// Clears guest code memory cache, forcing future loads to be from file.
+ /// </summary>
+ public void ClearMemoryCache()
+ {
+ _cache = null;
+ }
+
+ /// <summary>
+ /// Calculates the guest shaders count from the TOC file length.
+ /// </summary>
+ /// <param name="length">TOC file length</param>
+ /// <returns>Shaders count</returns>
+ private static int GetShadersCountFromLength(long length)
+ {
+ return (int)((length - Unsafe.SizeOf<TocHeader>()) / Unsafe.SizeOf<TocEntry>());
+ }
+
+ /// <summary>
+ /// Adds a guest shader to the cache.
+ /// </summary>
+ /// <remarks>
+ /// If the shader is already on the cache, the existing index will be returned and nothing will be written.
+ /// </remarks>
+ /// <param name="data">Guest code</param>
+ /// <param name="cb1Data">Constant buffer 1 data accessed by the code</param>
+ /// <returns>Index of the shader on the cache</returns>
+ public int AddShader(ReadOnlySpan<byte> data, ReadOnlySpan<byte> cb1Data)
+ {
+ using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, TocFileName, writable: true);
+ using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, DataFileName, writable: true);
+
+ TocHeader header = new TocHeader();
+
+ LoadOrCreateToc(tocFileStream, ref header);
+
+ uint hash = CalcHash(data, cb1Data);
+
+ if (_toc.TryGetValue(hash, out var list))
+ {
+ foreach (var entry in list)
+ {
+ if (data.Length != entry.CodeSize || cb1Data.Length != entry.Cb1DataSize)
+ {
+ continue;
+ }
+
+ dataFileStream.Seek((long)entry.Offset, SeekOrigin.Begin);
+ byte[] cachedCode = new byte[entry.CodeSize];
+ byte[] cachedCb1Data = new byte[entry.Cb1DataSize];
+ dataFileStream.Read(cachedCb1Data);
+ BinarySerializer.ReadCompressed(dataFileStream, cachedCode);
+
+ if (data.SequenceEqual(cachedCode) && cb1Data.SequenceEqual(cachedCb1Data))
+ {
+ return entry.Index;
+ }
+ }
+ }
+
+ return WriteNewEntry(tocFileStream, dataFileStream, ref header, data, cb1Data, hash);
+ }
+
+ /// <summary>
+ /// Loads the guest cache TOC file, or create a new one if not present.
+ /// </summary>
+ /// <param name="tocFileStream">Guest TOC file stream</param>
+ /// <param name="header">Set to the TOC file header</param>
+ private void LoadOrCreateToc(Stream tocFileStream, ref TocHeader header)
+ {
+ BinarySerializer reader = new BinarySerializer(tocFileStream);
+
+ if (!reader.TryRead(ref header) || header.Magic != TocMagic || header.Version != VersionPacked)
+ {
+ CreateToc(tocFileStream, ref header);
+ }
+
+ if (_toc == null || header.ModificationsCount != _tocModificationsCount)
+ {
+ if (!LoadTocEntries(tocFileStream, ref reader))
+ {
+ CreateToc(tocFileStream, ref header);
+ }
+
+ _tocModificationsCount = header.ModificationsCount;
+ }
+ }
+
+ /// <summary>
+ /// Creates a new guest cache TOC file.
+ /// </summary>
+ /// <param name="tocFileStream">Guest TOC file stream</param>
+ /// <param name="header">Set to the TOC header</param>
+ private void CreateToc(Stream tocFileStream, ref TocHeader header)
+ {
+ BinarySerializer writer = new BinarySerializer(tocFileStream);
+
+ header.Magic = TocMagic;
+ header.Version = VersionPacked;
+ header.Padding = 0;
+ header.ModificationsCount = 0;
+ header.Reserved = 0;
+ header.Reserved2 = 0;
+
+ if (tocFileStream.Length > 0)
+ {
+ tocFileStream.Seek(0, SeekOrigin.Begin);
+ tocFileStream.SetLength(0);
+ }
+
+ writer.Write(ref header);
+ }
+
+ /// <summary>
+ /// Reads all the entries on the guest TOC file.
+ /// </summary>
+ /// <param name="tocFileStream">Guest TOC file stream</param>
+ /// <param name="reader">TOC file reader</param>
+ /// <returns>True if the operation was successful, false otherwise</returns>
+ private bool LoadTocEntries(Stream tocFileStream, ref BinarySerializer reader)
+ {
+ _toc = new Dictionary<uint, List<TocMemoryEntry>>();
+
+ TocEntry entry = new TocEntry();
+ int index = 0;
+
+ while (tocFileStream.Position < tocFileStream.Length)
+ {
+ if (!reader.TryRead(ref entry))
+ {
+ return false;
+ }
+
+ AddTocMemoryEntry(entry.Offset, entry.CodeSize, entry.Cb1DataSize, entry.Hash, index++);
+ }
+
+ return true;
+ }
+
+ /// <summary>
+ /// Writes a new guest code entry into the file.
+ /// </summary>
+ /// <param name="tocFileStream">TOC file stream</param>
+ /// <param name="dataFileStream">Data file stream</param>
+ /// <param name="header">TOC header, to be updated with the new count</param>
+ /// <param name="data">Guest code</param>
+ /// <param name="cb1Data">Constant buffer 1 data accessed by the guest code</param>
+ /// <param name="hash">Code and constant buffer data hash</param>
+ /// <returns>Entry index</returns>
+ private int WriteNewEntry(
+ Stream tocFileStream,
+ Stream dataFileStream,
+ ref TocHeader header,
+ ReadOnlySpan<byte> data,
+ ReadOnlySpan<byte> cb1Data,
+ uint hash)
+ {
+ BinarySerializer tocWriter = new BinarySerializer(tocFileStream);
+
+ dataFileStream.Seek(0, SeekOrigin.End);
+ uint dataOffset = checked((uint)dataFileStream.Position);
+ uint codeSize = (uint)data.Length;
+ uint cb1DataSize = (uint)cb1Data.Length;
+ dataFileStream.Write(cb1Data);
+ BinarySerializer.WriteCompressed(dataFileStream, data, DiskCacheCommon.GetCompressionAlgorithm());
+
+ _tocModificationsCount = ++header.ModificationsCount;
+ tocFileStream.Seek(0, SeekOrigin.Begin);
+ tocWriter.Write(ref header);
+
+ TocEntry entry = new TocEntry()
+ {
+ Offset = dataOffset,
+ CodeSize = codeSize,
+ Cb1DataSize = cb1DataSize,
+ Hash = hash
+ };
+
+ tocFileStream.Seek(0, SeekOrigin.End);
+ int index = (int)((tocFileStream.Position - Unsafe.SizeOf<TocHeader>()) / Unsafe.SizeOf<TocEntry>());
+
+ tocWriter.Write(ref entry);
+
+ AddTocMemoryEntry(dataOffset, codeSize, cb1DataSize, hash, index);
+
+ return index;
+ }
+
+ /// <summary>
+ /// Adds an entry to the memory TOC cache. This can be used to avoid reading the TOC file all the time.
+ /// </summary>
+ /// <param name="dataOffset">Offset of the code and constant buffer data in the data file</param>
+ /// <param name="codeSize">Code size</param>
+ /// <param name="cb1DataSize">Constant buffer 1 data size</param>
+ /// <param name="hash">Code and constant buffer data hash</param>
+ /// <param name="index">Index of the data on the cache</param>
+ private void AddTocMemoryEntry(uint dataOffset, uint codeSize, uint cb1DataSize, uint hash, int index)
+ {
+ if (!_toc.TryGetValue(hash, out var list))
+ {
+ _toc.Add(hash, list = new List<TocMemoryEntry>());
+ }
+
+ list.Add(new TocMemoryEntry(dataOffset, codeSize, cb1DataSize, index));
+ }
+
+ /// <summary>
+ /// Calculates the hash for a data pair.
+ /// </summary>
+ /// <param name="data">Data 1</param>
+ /// <param name="data2">Data 2</param>
+ /// <returns>Hash of both data</returns>
+ private static uint CalcHash(ReadOnlySpan<byte> data, ReadOnlySpan<byte> data2)
+ {
+ return CalcHash(data2) * 23 ^ CalcHash(data);
+ }
+
+ /// <summary>
+ /// Calculates the hash for data.
+ /// </summary>
+ /// <param name="data">Data to be hashed</param>
+ /// <returns>Hash of the data</returns>
+ private static uint CalcHash(ReadOnlySpan<byte> data)
+ {
+ return (uint)XXHash128.ComputeHash(data).Low;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
new file mode 100644
index 00000000..b182f299
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
@@ -0,0 +1,839 @@
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Shader;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.IO;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
+{
+ /// <summary>
+ /// On-disk shader cache storage for host code.
+ /// </summary>
+ class DiskCacheHostStorage
+ {
+ private const uint TocsMagic = (byte)'T' | ((byte)'O' << 8) | ((byte)'C' << 16) | ((byte)'S' << 24);
+ private const uint TochMagic = (byte)'T' | ((byte)'O' << 8) | ((byte)'C' << 16) | ((byte)'H' << 24);
+ private const uint ShdiMagic = (byte)'S' | ((byte)'H' << 8) | ((byte)'D' << 16) | ((byte)'I' << 24);
+ private const uint BufdMagic = (byte)'B' | ((byte)'U' << 8) | ((byte)'F' << 16) | ((byte)'D' << 24);
+ private const uint TexdMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'D' << 24);
+
+ private const ushort FileFormatVersionMajor = 1;
+ private const ushort FileFormatVersionMinor = 2;
+ private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
+ private const uint CodeGenVersion = 4735;
+
+ private const string SharedTocFileName = "shared.toc";
+ private const string SharedDataFileName = "shared.data";
+
+ private readonly string _basePath;
+
+ public bool CacheEnabled => !string.IsNullOrEmpty(_basePath);
+
+ /// <summary>
+ /// TOC (Table of contents) file header.
+ /// </summary>
+ private struct TocHeader
+ {
+ /// <summary>
+ /// Magic value, for validation and identification.
+ /// </summary>
+ public uint Magic;
+
+ /// <summary>
+ /// File format version.
+ /// </summary>
+ public uint FormatVersion;
+
+ /// <summary>
+ /// Generated shader code version.
+ /// </summary>
+ public uint CodeGenVersion;
+
+ /// <summary>
+ /// Header padding.
+ /// </summary>
+ public uint Padding;
+
+ /// <summary>
+ /// Timestamp of when the file was first created.
+ /// </summary>
+ public ulong Timestamp;
+
+ /// <summary>
+ /// Reserved space, to be used in the future. Write as zero.
+ /// </summary>
+ public ulong Reserved;
+ }
+
+ /// <summary>
+ /// Offset and size pair.
+ /// </summary>
+ private struct OffsetAndSize
+ {
+ /// <summary>
+ /// Offset.
+ /// </summary>
+ public ulong Offset;
+
+ /// <summary>
+ /// Size of uncompressed data.
+ /// </summary>
+ public uint UncompressedSize;
+
+ /// <summary>
+ /// Size of compressed data.
+ /// </summary>
+ public uint CompressedSize;
+ }
+
+ /// <summary>
+ /// Per-stage data entry.
+ /// </summary>
+ private struct DataEntryPerStage
+ {
+ /// <summary>
+ /// Index of the guest code on the guest code cache TOC file.
+ /// </summary>
+ public int GuestCodeIndex;
+ }
+
+ /// <summary>
+ /// Per-program data entry.
+ /// </summary>
+ private struct DataEntry
+ {
+ /// <summary>
+ /// Bit mask where each bit set is a used shader stage. Should be zero for compute shaders.
+ /// </summary>
+ public uint StagesBitMask;
+ }
+
+ /// <summary>
+ /// Per-stage shader information, returned by the translator.
+ /// </summary>
+ private struct DataShaderInfo
+ {
+ /// <summary>
+ /// Total constant buffers used.
+ /// </summary>
+ public ushort CBuffersCount;
+
+ /// <summary>
+ /// Total storage buffers used.
+ /// </summary>
+ public ushort SBuffersCount;
+
+ /// <summary>
+ /// Total textures used.
+ /// </summary>
+ public ushort TexturesCount;
+
+ /// <summary>
+ /// Total images used.
+ /// </summary>
+ public ushort ImagesCount;
+
+ /// <summary>
+ /// Shader stage.
+ /// </summary>
+ public ShaderStage Stage;
+
+ /// <summary>
+ /// Indicates if the shader accesses the Instance ID built-in variable.
+ /// </summary>
+ public bool UsesInstanceId;
+
+ /// <summary>
+ /// Indicates if the shader modifies the Layer built-in variable.
+ /// </summary>
+ public bool UsesRtLayer;
+
+ /// <summary>
+ /// Bit mask with the clip distances written on the vertex stage.
+ /// </summary>
+ public byte ClipDistancesWritten;
+
+ /// <summary>
+ /// Bit mask of the render target components written by the fragment stage.
+ /// </summary>
+ public int FragmentOutputMap;
+
+ /// <summary>
+ /// Indicates if the vertex shader accesses draw parameters.
+ /// </summary>
+ public bool UsesDrawParameters;
+ }
+
+ private readonly DiskCacheGuestStorage _guestStorage;
+
+ /// <summary>
+ /// Creates a disk cache host storage.
+ /// </summary>
+ /// <param name="basePath">Base path of the shader cache</param>
+ public DiskCacheHostStorage(string basePath)
+ {
+ _basePath = basePath;
+ _guestStorage = new DiskCacheGuestStorage(basePath);
+
+ if (CacheEnabled)
+ {
+ Directory.CreateDirectory(basePath);
+ }
+ }
+
+ /// <summary>
+ /// Gets the total of host programs on the cache.
+ /// </summary>
+ /// <returns>Host programs count</returns>
+ public int GetProgramCount()
+ {
+ string tocFilePath = Path.Combine(_basePath, SharedTocFileName);
+
+ if (!File.Exists(tocFilePath))
+ {
+ return 0;
+ }
+
+ return Math.Max((int)((new FileInfo(tocFilePath).Length - Unsafe.SizeOf<TocHeader>()) / sizeof(ulong)), 0);
+ }
+
+ /// <summary>
+ /// Guest the name of the host program cache file, with extension.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <returns>Name of the file, without extension</returns>
+ private static string GetHostFileName(GpuContext context)
+ {
+ string apiName = context.Capabilities.Api.ToString().ToLowerInvariant();
+ string vendorName = RemoveInvalidCharacters(context.Capabilities.VendorName.ToLowerInvariant());
+ return $"{apiName}_{vendorName}";
+ }
+
+ /// <summary>
+ /// Removes invalid path characters and spaces from a file name.
+ /// </summary>
+ /// <param name="fileName">File name</param>
+ /// <returns>Filtered file name</returns>
+ private static string RemoveInvalidCharacters(string fileName)
+ {
+ int indexOfSpace = fileName.IndexOf(' ');
+ if (indexOfSpace >= 0)
+ {
+ fileName = fileName.Substring(0, indexOfSpace);
+ }
+
+ return string.Concat(fileName.Split(Path.GetInvalidFileNameChars(), StringSplitOptions.RemoveEmptyEntries));
+ }
+
+ /// <summary>
+ /// Gets the name of the TOC host file.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <returns>File name</returns>
+ private static string GetHostTocFileName(GpuContext context)
+ {
+ return GetHostFileName(context) + ".toc";
+ }
+
+ /// <summary>
+ /// Gets the name of the data host file.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <returns>File name</returns>
+ private static string GetHostDataFileName(GpuContext context)
+ {
+ return GetHostFileName(context) + ".data";
+ }
+
+ /// <summary>
+ /// Checks if a disk cache exists for the current application.
+ /// </summary>
+ /// <returns>True if a disk cache exists, false otherwise</returns>
+ public bool CacheExists()
+ {
+ string tocFilePath = Path.Combine(_basePath, SharedTocFileName);
+ string dataFilePath = Path.Combine(_basePath, SharedDataFileName);
+
+ if (!File.Exists(tocFilePath) || !File.Exists(dataFilePath) || !_guestStorage.TocFileExists() || !_guestStorage.DataFileExists())
+ {
+ return false;
+ }
+
+ return true;
+ }
+
+ /// <summary>
+ /// Loads all shaders from the cache.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="loader">Parallel disk cache loader</param>
+ public void LoadShaders(GpuContext context, ParallelDiskCacheLoader loader)
+ {
+ if (!CacheExists())
+ {
+ return;
+ }
+
+ Stream hostTocFileStream = null;
+ Stream hostDataFileStream = null;
+
+ try
+ {
+ using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, SharedTocFileName, writable: false);
+ using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, SharedDataFileName, writable: false);
+
+ using var guestTocFileStream = _guestStorage.OpenTocFileStream();
+ using var guestDataFileStream = _guestStorage.OpenDataFileStream();
+
+ BinarySerializer tocReader = new BinarySerializer(tocFileStream);
+ BinarySerializer dataReader = new BinarySerializer(dataFileStream);
+
+ TocHeader header = new TocHeader();
+
+ if (!tocReader.TryRead(ref header) || header.Magic != TocsMagic)
+ {
+ throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric);
+ }
+
+ if (header.FormatVersion != FileFormatVersionPacked)
+ {
+ throw new DiskCacheLoadException(DiskCacheLoadResult.IncompatibleVersion);
+ }
+
+ bool loadHostCache = header.CodeGenVersion == CodeGenVersion;
+
+ int programIndex = 0;
+
+ DataEntry entry = new DataEntry();
+
+ while (tocFileStream.Position < tocFileStream.Length && loader.Active)
+ {
+ ulong dataOffset = 0;
+ tocReader.Read(ref dataOffset);
+
+ if ((ulong)dataOffset >= (ulong)dataFileStream.Length)
+ {
+ throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric);
+ }
+
+ dataFileStream.Seek((long)dataOffset, SeekOrigin.Begin);
+
+ dataReader.BeginCompression();
+ dataReader.Read(ref entry);
+ uint stagesBitMask = entry.StagesBitMask;
+
+ if ((stagesBitMask & ~0x3fu) != 0)
+ {
+ throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric);
+ }
+
+ bool isCompute = stagesBitMask == 0;
+ if (isCompute)
+ {
+ stagesBitMask = 1;
+ }
+
+ GuestCodeAndCbData?[] guestShaders = new GuestCodeAndCbData?[isCompute ? 1 : Constants.ShaderStages + 1];
+
+ DataEntryPerStage stageEntry = new DataEntryPerStage();
+
+ while (stagesBitMask != 0)
+ {
+ int stageIndex = BitOperations.TrailingZeroCount(stagesBitMask);
+
+ dataReader.Read(ref stageEntry);
+
+ guestShaders[stageIndex] = _guestStorage.LoadShader(
+ guestTocFileStream,
+ guestDataFileStream,
+ stageEntry.GuestCodeIndex);
+
+ stagesBitMask &= ~(1u << stageIndex);
+ }
+
+ ShaderSpecializationState specState = ShaderSpecializationState.Read(ref dataReader);
+ dataReader.EndCompression();
+
+ if (loadHostCache)
+ {
+ (byte[] hostCode, CachedShaderStage[] shaders) = ReadHostCode(
+ context,
+ ref hostTocFileStream,
+ ref hostDataFileStream,
+ guestShaders,
+ programIndex,
+ header.Timestamp);
+
+ if (hostCode != null)
+ {
+ bool hasFragmentShader = shaders.Length > 5 && shaders[5] != null;
+ int fragmentOutputMap = hasFragmentShader ? shaders[5].Info.FragmentOutputMap : -1;
+
+ ShaderInfo shaderInfo = specState.PipelineState.HasValue
+ ? new ShaderInfo(fragmentOutputMap, specState.PipelineState.Value, fromCache: true)
+ : new ShaderInfo(fragmentOutputMap, fromCache: true);
+
+ IProgram hostProgram;
+
+ if (context.Capabilities.Api == TargetApi.Vulkan)
+ {
+ ShaderSource[] shaderSources = ShaderBinarySerializer.Unpack(shaders, hostCode);
+
+ hostProgram = context.Renderer.CreateProgram(shaderSources, shaderInfo);
+ }
+ else
+ {
+ hostProgram = context.Renderer.LoadProgramBinary(hostCode, hasFragmentShader, shaderInfo);
+ }
+
+ CachedShaderProgram program = new CachedShaderProgram(hostProgram, specState, shaders);
+
+ loader.QueueHostProgram(program, hostCode, programIndex, isCompute);
+ }
+ else
+ {
+ loadHostCache = false;
+ }
+ }
+
+ if (!loadHostCache)
+ {
+ loader.QueueGuestProgram(guestShaders, specState, programIndex, isCompute);
+ }
+
+ loader.CheckCompilation();
+ programIndex++;
+ }
+ }
+ finally
+ {
+ _guestStorage.ClearMemoryCache();
+
+ hostTocFileStream?.Dispose();
+ hostDataFileStream?.Dispose();
+ }
+ }
+
+ /// <summary>
+ /// Reads the host code for a given shader, if existent.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="tocFileStream">Host TOC file stream, intialized if needed</param>
+ /// <param name="dataFileStream">Host data file stream, initialized if needed</param>
+ /// <param name="guestShaders">Guest shader code for each active stage</param>
+ /// <param name="programIndex">Index of the program on the cache</param>
+ /// <param name="expectedTimestamp">Timestamp of the shared cache file. The host file must be newer than it</param>
+ /// <returns>Host binary code, or null if not found</returns>
+ private (byte[], CachedShaderStage[]) ReadHostCode(
+ GpuContext context,
+ ref Stream tocFileStream,
+ ref Stream dataFileStream,
+ GuestCodeAndCbData?[] guestShaders,
+ int programIndex,
+ ulong expectedTimestamp)
+ {
+ if (tocFileStream == null && dataFileStream == null)
+ {
+ string tocFilePath = Path.Combine(_basePath, GetHostTocFileName(context));
+ string dataFilePath = Path.Combine(_basePath, GetHostDataFileName(context));
+
+ if (!File.Exists(tocFilePath) || !File.Exists(dataFilePath))
+ {
+ return (null, null);
+ }
+
+ tocFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: false);
+ dataFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: false);
+
+ BinarySerializer tempTocReader = new BinarySerializer(tocFileStream);
+
+ TocHeader header = new TocHeader();
+
+ tempTocReader.Read(ref header);
+
+ if (header.Timestamp < expectedTimestamp)
+ {
+ return (null, null);
+ }
+ }
+
+ int offset = Unsafe.SizeOf<TocHeader>() + programIndex * Unsafe.SizeOf<OffsetAndSize>();
+ if (offset + Unsafe.SizeOf<OffsetAndSize>() > tocFileStream.Length)
+ {
+ return (null, null);
+ }
+
+ if ((ulong)offset >= (ulong)dataFileStream.Length)
+ {
+ throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric);
+ }
+
+ tocFileStream.Seek(offset, SeekOrigin.Begin);
+
+ BinarySerializer tocReader = new BinarySerializer(tocFileStream);
+
+ OffsetAndSize offsetAndSize = new OffsetAndSize();
+ tocReader.Read(ref offsetAndSize);
+
+ if (offsetAndSize.Offset >= (ulong)dataFileStream.Length)
+ {
+ throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric);
+ }
+
+ dataFileStream.Seek((long)offsetAndSize.Offset, SeekOrigin.Begin);
+
+ byte[] hostCode = new byte[offsetAndSize.UncompressedSize];
+
+ BinarySerializer.ReadCompressed(dataFileStream, hostCode);
+
+ CachedShaderStage[] shaders = new CachedShaderStage[guestShaders.Length];
+ BinarySerializer dataReader = new BinarySerializer(dataFileStream);
+
+ dataFileStream.Seek((long)(offsetAndSize.Offset + offsetAndSize.CompressedSize), SeekOrigin.Begin);
+
+ dataReader.BeginCompression();
+
+ for (int index = 0; index < guestShaders.Length; index++)
+ {
+ if (!guestShaders[index].HasValue)
+ {
+ continue;
+ }
+
+ GuestCodeAndCbData guestShader = guestShaders[index].Value;
+ ShaderProgramInfo info = index != 0 || guestShaders.Length == 1 ? ReadShaderProgramInfo(ref dataReader) : null;
+
+ shaders[index] = new CachedShaderStage(info, guestShader.Code, guestShader.Cb1Data);
+ }
+
+ dataReader.EndCompression();
+
+ return (hostCode, shaders);
+ }
+
+ /// <summary>
+ /// Gets output streams for the disk cache, for faster batch writing.
+ /// </summary>
+ /// <param name="context">The GPU context, used to determine the host disk cache</param>
+ /// <returns>A collection of disk cache output streams</returns>
+ public DiskCacheOutputStreams GetOutputStreams(GpuContext context)
+ {
+ var tocFileStream = DiskCacheCommon.OpenFile(_basePath, SharedTocFileName, writable: true);
+ var dataFileStream = DiskCacheCommon.OpenFile(_basePath, SharedDataFileName, writable: true);
+
+ var hostTocFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: true);
+ var hostDataFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: true);
+
+ return new DiskCacheOutputStreams(tocFileStream, dataFileStream, hostTocFileStream, hostDataFileStream);
+ }
+
+ /// <summary>
+ /// Adds a shader to the cache.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="program">Cached program</param>
+ /// <param name="hostCode">Optional host binary code</param>
+ /// <param name="streams">Output streams to use</param>
+ public void AddShader(GpuContext context, CachedShaderProgram program, ReadOnlySpan<byte> hostCode, DiskCacheOutputStreams streams = null)
+ {
+ uint stagesBitMask = 0;
+
+ for (int index = 0; index < program.Shaders.Length; index++)
+ {
+ var shader = program.Shaders[index];
+ if (shader == null || (shader.Info != null && shader.Info.Stage == ShaderStage.Compute))
+ {
+ continue;
+ }
+
+ stagesBitMask |= 1u << index;
+ }
+
+ var tocFileStream = streams != null ? streams.TocFileStream : DiskCacheCommon.OpenFile(_basePath, SharedTocFileName, writable: true);
+ var dataFileStream = streams != null ? streams.DataFileStream : DiskCacheCommon.OpenFile(_basePath, SharedDataFileName, writable: true);
+
+ ulong timestamp = (ulong)DateTime.UtcNow.Subtract(DateTime.UnixEpoch).TotalSeconds;
+
+ if (tocFileStream.Length == 0)
+ {
+ TocHeader header = new TocHeader();
+ CreateToc(tocFileStream, ref header, TocsMagic, CodeGenVersion, timestamp);
+ }
+
+ tocFileStream.Seek(0, SeekOrigin.End);
+ dataFileStream.Seek(0, SeekOrigin.End);
+
+ BinarySerializer tocWriter = new BinarySerializer(tocFileStream);
+ BinarySerializer dataWriter = new BinarySerializer(dataFileStream);
+
+ ulong dataOffset = (ulong)dataFileStream.Position;
+ tocWriter.Write(ref dataOffset);
+
+ DataEntry entry = new DataEntry();
+
+ entry.StagesBitMask = stagesBitMask;
+
+ dataWriter.BeginCompression(DiskCacheCommon.GetCompressionAlgorithm());
+ dataWriter.Write(ref entry);
+
+ DataEntryPerStage stageEntry = new DataEntryPerStage();
+
+ for (int index = 0; index < program.Shaders.Length; index++)
+ {
+ var shader = program.Shaders[index];
+ if (shader == null)
+ {
+ continue;
+ }
+
+ stageEntry.GuestCodeIndex = _guestStorage.AddShader(shader.Code, shader.Cb1Data);
+
+ dataWriter.Write(ref stageEntry);
+ }
+
+ program.SpecializationState.Write(ref dataWriter);
+ dataWriter.EndCompression();
+
+ if (streams == null)
+ {
+ tocFileStream.Dispose();
+ dataFileStream.Dispose();
+ }
+
+ if (hostCode.IsEmpty)
+ {
+ return;
+ }
+
+ WriteHostCode(context, hostCode, program.Shaders, streams, timestamp);
+ }
+
+ /// <summary>
+ /// Clears all content from the guest cache files.
+ /// </summary>
+ public void ClearGuestCache()
+ {
+ _guestStorage.ClearCache();
+ }
+
+ /// <summary>
+ /// Clears all content from the shared cache files.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ public void ClearSharedCache()
+ {
+ using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, SharedTocFileName, writable: true);
+ using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, SharedDataFileName, writable: true);
+
+ tocFileStream.SetLength(0);
+ dataFileStream.SetLength(0);
+ }
+
+ /// <summary>
+ /// Deletes all content from the host cache files.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ public void ClearHostCache(GpuContext context)
+ {
+ using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: true);
+ using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: true);
+
+ tocFileStream.SetLength(0);
+ dataFileStream.SetLength(0);
+ }
+
+ /// <summary>
+ /// Writes the host binary code on the host cache.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="hostCode">Host binary code</param>
+ /// <param name="shaders">Shader stages to be added to the host cache</param>
+ /// <param name="streams">Output streams to use</param>
+ /// <param name="timestamp">File creation timestamp</param>
+ private void WriteHostCode(
+ GpuContext context,
+ ReadOnlySpan<byte> hostCode,
+ CachedShaderStage[] shaders,
+ DiskCacheOutputStreams streams,
+ ulong timestamp)
+ {
+ var tocFileStream = streams != null ? streams.HostTocFileStream : DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: true);
+ var dataFileStream = streams != null ? streams.HostDataFileStream : DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: true);
+
+ if (tocFileStream.Length == 0)
+ {
+ TocHeader header = new TocHeader();
+ CreateToc(tocFileStream, ref header, TochMagic, 0, timestamp);
+ }
+
+ tocFileStream.Seek(0, SeekOrigin.End);
+ dataFileStream.Seek(0, SeekOrigin.End);
+
+ BinarySerializer tocWriter = new BinarySerializer(tocFileStream);
+ BinarySerializer dataWriter = new BinarySerializer(dataFileStream);
+
+ OffsetAndSize offsetAndSize = new OffsetAndSize();
+ offsetAndSize.Offset = (ulong)dataFileStream.Position;
+ offsetAndSize.UncompressedSize = (uint)hostCode.Length;
+
+ long dataStartPosition = dataFileStream.Position;
+
+ BinarySerializer.WriteCompressed(dataFileStream, hostCode, DiskCacheCommon.GetCompressionAlgorithm());
+
+ offsetAndSize.CompressedSize = (uint)(dataFileStream.Position - dataStartPosition);
+
+ tocWriter.Write(ref offsetAndSize);
+
+ dataWriter.BeginCompression(DiskCacheCommon.GetCompressionAlgorithm());
+
+ for (int index = 0; index < shaders.Length; index++)
+ {
+ if (shaders[index] != null)
+ {
+ WriteShaderProgramInfo(ref dataWriter, shaders[index].Info);
+ }
+ }
+
+ dataWriter.EndCompression();
+
+ if (streams == null)
+ {
+ tocFileStream.Dispose();
+ dataFileStream.Dispose();
+ }
+ }
+
+ /// <summary>
+ /// Creates a TOC file for the host or shared cache.
+ /// </summary>
+ /// <param name="tocFileStream">TOC file stream</param>
+ /// <param name="header">Set to the TOC file header</param>
+ /// <param name="magic">Magic value to be written</param>
+ /// <param name="codegenVersion">Shader codegen version, only valid for the host file</param>
+ /// <param name="timestamp">File creation timestamp</param>
+ private void CreateToc(Stream tocFileStream, ref TocHeader header, uint magic, uint codegenVersion, ulong timestamp)
+ {
+ BinarySerializer writer = new BinarySerializer(tocFileStream);
+
+ header.Magic = magic;
+ header.FormatVersion = FileFormatVersionPacked;
+ header.CodeGenVersion = codegenVersion;
+ header.Padding = 0;
+ header.Reserved = 0;
+ header.Timestamp = timestamp;
+
+ if (tocFileStream.Length > 0)
+ {
+ tocFileStream.Seek(0, SeekOrigin.Begin);
+ tocFileStream.SetLength(0);
+ }
+
+ writer.Write(ref header);
+ }
+
+ /// <summary>
+ /// Reads the shader program info from the cache.
+ /// </summary>
+ /// <param name="dataReader">Cache data reader</param>
+ /// <returns>Shader program info</returns>
+ private static ShaderProgramInfo ReadShaderProgramInfo(ref BinarySerializer dataReader)
+ {
+ DataShaderInfo dataInfo = new DataShaderInfo();
+
+ dataReader.ReadWithMagicAndSize(ref dataInfo, ShdiMagic);
+
+ BufferDescriptor[] cBuffers = new BufferDescriptor[dataInfo.CBuffersCount];
+ BufferDescriptor[] sBuffers = new BufferDescriptor[dataInfo.SBuffersCount];
+ TextureDescriptor[] textures = new TextureDescriptor[dataInfo.TexturesCount];
+ TextureDescriptor[] images = new TextureDescriptor[dataInfo.ImagesCount];
+
+ for (int index = 0; index < dataInfo.CBuffersCount; index++)
+ {
+ dataReader.ReadWithMagicAndSize(ref cBuffers[index], BufdMagic);
+ }
+
+ for (int index = 0; index < dataInfo.SBuffersCount; index++)
+ {
+ dataReader.ReadWithMagicAndSize(ref sBuffers[index], BufdMagic);
+ }
+
+ for (int index = 0; index < dataInfo.TexturesCount; index++)
+ {
+ dataReader.ReadWithMagicAndSize(ref textures[index], TexdMagic);
+ }
+
+ for (int index = 0; index < dataInfo.ImagesCount; index++)
+ {
+ dataReader.ReadWithMagicAndSize(ref images[index], TexdMagic);
+ }
+
+ return new ShaderProgramInfo(
+ cBuffers,
+ sBuffers,
+ textures,
+ images,
+ ShaderIdentification.None,
+ 0,
+ dataInfo.Stage,
+ dataInfo.UsesInstanceId,
+ dataInfo.UsesDrawParameters,
+ dataInfo.UsesRtLayer,
+ dataInfo.ClipDistancesWritten,
+ dataInfo.FragmentOutputMap);
+ }
+
+ /// <summary>
+ /// Writes the shader program info into the cache.
+ /// </summary>
+ /// <param name="dataWriter">Cache data writer</param>
+ /// <param name="info">Program info</param>
+ private static void WriteShaderProgramInfo(ref BinarySerializer dataWriter, ShaderProgramInfo info)
+ {
+ if (info == null)
+ {
+ return;
+ }
+
+ DataShaderInfo dataInfo = new DataShaderInfo();
+
+ dataInfo.CBuffersCount = (ushort)info.CBuffers.Count;
+ dataInfo.SBuffersCount = (ushort)info.SBuffers.Count;
+ dataInfo.TexturesCount = (ushort)info.Textures.Count;
+ dataInfo.ImagesCount = (ushort)info.Images.Count;
+ dataInfo.Stage = info.Stage;
+ dataInfo.UsesInstanceId = info.UsesInstanceId;
+ dataInfo.UsesDrawParameters = info.UsesDrawParameters;
+ dataInfo.UsesRtLayer = info.UsesRtLayer;
+ dataInfo.ClipDistancesWritten = info.ClipDistancesWritten;
+ dataInfo.FragmentOutputMap = info.FragmentOutputMap;
+
+ dataWriter.WriteWithMagicAndSize(ref dataInfo, ShdiMagic);
+
+ for (int index = 0; index < info.CBuffers.Count; index++)
+ {
+ var entry = info.CBuffers[index];
+ dataWriter.WriteWithMagicAndSize(ref entry, BufdMagic);
+ }
+
+ for (int index = 0; index < info.SBuffers.Count; index++)
+ {
+ var entry = info.SBuffers[index];
+ dataWriter.WriteWithMagicAndSize(ref entry, BufdMagic);
+ }
+
+ for (int index = 0; index < info.Textures.Count; index++)
+ {
+ var entry = info.Textures[index];
+ dataWriter.WriteWithMagicAndSize(ref entry, TexdMagic);
+ }
+
+ for (int index = 0; index < info.Images.Count; index++)
+ {
+ var entry = info.Images[index];
+ dataWriter.WriteWithMagicAndSize(ref entry, TexdMagic);
+ }
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadException.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadException.cs
new file mode 100644
index 00000000..d6e23302
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadException.cs
@@ -0,0 +1,48 @@
+using System;
+
+namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
+{
+ /// <summary>
+ /// Disk cache load exception.
+ /// </summary>
+ class DiskCacheLoadException : Exception
+ {
+ /// <summary>
+ /// Result of the cache load operation.
+ /// </summary>
+ public DiskCacheLoadResult Result { get; }
+
+ /// <summary>
+ /// Creates a new instance of the disk cache load exception.
+ /// </summary>
+ public DiskCacheLoadException()
+ {
+ }
+
+ /// <summary>
+ /// Creates a new instance of the disk cache load exception.
+ /// </summary>
+ /// <param name="message">Exception message</param>
+ public DiskCacheLoadException(string message) : base(message)
+ {
+ }
+
+ /// <summary>
+ /// Creates a new instance of the disk cache load exception.
+ /// </summary>
+ /// <param name="message">Exception message</param>
+ /// <param name="inner">Inner exception</param>
+ public DiskCacheLoadException(string message, Exception inner) : base(message, inner)
+ {
+ }
+
+ /// <summary>
+ /// Creates a new instance of the disk cache load exception.
+ /// </summary>
+ /// <param name="result">Result code</param>
+ public DiskCacheLoadException(DiskCacheLoadResult result) : base(result.GetMessage())
+ {
+ Result = result;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadResult.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadResult.cs
new file mode 100644
index 00000000..b3ffa4a7
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadResult.cs
@@ -0,0 +1,72 @@
+namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
+{
+ /// <summary>
+ /// Result of a shader cache load operation.
+ /// </summary>
+ enum DiskCacheLoadResult
+ {
+ /// <summary>
+ /// No error.
+ /// </summary>
+ Success,
+
+ /// <summary>
+ /// File can't be accessed.
+ /// </summary>
+ NoAccess,
+
+ /// <summary>
+ /// The constant buffer 1 data length is too low for the translation of the guest shader.
+ /// </summary>
+ InvalidCb1DataLength,
+
+ /// <summary>
+ /// The cache is missing the descriptor of a texture used by the shader.
+ /// </summary>
+ MissingTextureDescriptor,
+
+ /// <summary>
+ /// File is corrupted.
+ /// </summary>
+ FileCorruptedGeneric,
+
+ /// <summary>
+ /// File is corrupted, detected by magic value check.
+ /// </summary>
+ FileCorruptedInvalidMagic,
+
+ /// <summary>
+ /// File is corrupted, detected by length check.
+ /// </summary>
+ FileCorruptedInvalidLength,
+
+ /// <summary>
+ /// File might be valid, but is incompatible with the current emulator version.
+ /// </summary>
+ IncompatibleVersion
+ }
+
+ static class DiskCacheLoadResultExtensions
+ {
+ /// <summary>
+ /// Gets an error message from a result code.
+ /// </summary>
+ /// <param name="result">Result code</param>
+ /// <returns>Error message</returns>
+ public static string GetMessage(this DiskCacheLoadResult result)
+ {
+ return result switch
+ {
+ DiskCacheLoadResult.Success => "No error.",
+ DiskCacheLoadResult.NoAccess => "Could not access the cache file.",
+ DiskCacheLoadResult.InvalidCb1DataLength => "Constant buffer 1 data length is too low.",
+ DiskCacheLoadResult.MissingTextureDescriptor => "Texture descriptor missing from the cache file.",
+ DiskCacheLoadResult.FileCorruptedGeneric => "The cache file is corrupted.",
+ DiskCacheLoadResult.FileCorruptedInvalidMagic => "Magic check failed, the cache file is corrupted.",
+ DiskCacheLoadResult.FileCorruptedInvalidLength => "Length check failed, the cache file is corrupted.",
+ DiskCacheLoadResult.IncompatibleVersion => "The version of the disk cache is not compatible with this version of the emulator.",
+ _ => "Unknown error."
+ };
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheOutputStreams.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheOutputStreams.cs
new file mode 100644
index 00000000..1e0df264
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheOutputStreams.cs
@@ -0,0 +1,57 @@
+using System;
+using System.IO;
+
+namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
+{
+ /// <summary>
+ /// Output streams for the disk shader cache.
+ /// </summary>
+ class DiskCacheOutputStreams : IDisposable
+ {
+ /// <summary>
+ /// Shared table of contents (TOC) file stream.
+ /// </summary>
+ public readonly FileStream TocFileStream;
+
+ /// <summary>
+ /// Shared data file stream.
+ /// </summary>
+ public readonly FileStream DataFileStream;
+
+ /// <summary>
+ /// Host table of contents (TOC) file stream.
+ /// </summary>
+ public readonly FileStream HostTocFileStream;
+
+ /// <summary>
+ /// Host data file stream.
+ /// </summary>
+ public readonly FileStream HostDataFileStream;
+
+ /// <summary>
+ /// Creates a new instance of a disk cache output stream container.
+ /// </summary>
+ /// <param name="tocFileStream">Stream for the shared table of contents file</param>
+ /// <param name="dataFileStream">Stream for the shared data file</param>
+ /// <param name="hostTocFileStream">Stream for the host table of contents file</param>
+ /// <param name="hostDataFileStream">Stream for the host data file</param>
+ public DiskCacheOutputStreams(FileStream tocFileStream, FileStream dataFileStream, FileStream hostTocFileStream, FileStream hostDataFileStream)
+ {
+ TocFileStream = tocFileStream;
+ DataFileStream = dataFileStream;
+ HostTocFileStream = hostTocFileStream;
+ HostDataFileStream = hostDataFileStream;
+ }
+
+ /// <summary>
+ /// Disposes the output file streams.
+ /// </summary>
+ public void Dispose()
+ {
+ TocFileStream.Dispose();
+ DataFileStream.Dispose();
+ HostTocFileStream.Dispose();
+ HostDataFileStream.Dispose();
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/GuestCodeAndCbData.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/GuestCodeAndCbData.cs
new file mode 100644
index 00000000..959d6e18
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/GuestCodeAndCbData.cs
@@ -0,0 +1,29 @@
+namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
+{
+ /// <summary>
+ /// Guest shader code and constant buffer data accessed by the shader.
+ /// </summary>
+ readonly struct GuestCodeAndCbData
+ {
+ /// <summary>
+ /// Maxwell binary shader code.
+ /// </summary>
+ public byte[] Code { get; }
+
+ /// <summary>
+ /// Constant buffer 1 data accessed by the shader.
+ /// </summary>
+ public byte[] Cb1Data { get; }
+
+ /// <summary>
+ /// Creates a new instance of the guest shader code and constant buffer data.
+ /// </summary>
+ /// <param name="code">Maxwell binary shader code</param>
+ /// <param name="cb1Data">Constant buffer 1 data accessed by the shader</param>
+ public GuestCodeAndCbData(byte[] code, byte[] cb1Data)
+ {
+ Code = code;
+ Cb1Data = cb1Data;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs
new file mode 100644
index 00000000..77fb3ca4
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs
@@ -0,0 +1,725 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Shader;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.IO;
+using System.Threading;
+using static Ryujinx.Graphics.Gpu.Shader.ShaderCache;
+
+namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
+{
+ class ParallelDiskCacheLoader
+ {
+ private const int ThreadCount = 8;
+
+ private readonly GpuContext _context;
+ private readonly ShaderCacheHashTable _graphicsCache;
+ private readonly ComputeShaderCacheHashTable _computeCache;
+ private readonly DiskCacheHostStorage _hostStorage;
+ private readonly CancellationToken _cancellationToken;
+ private readonly Action<ShaderCacheState, int, int> _stateChangeCallback;
+
+ /// <summary>
+ /// Indicates if the cache should be loaded.
+ /// </summary>
+ public bool Active => !_cancellationToken.IsCancellationRequested;
+
+ private bool _needsHostRegen;
+
+ /// <summary>
+ /// Number of shaders that failed to compile from the cache.
+ /// </summary>
+ public int ErrorCount { get; private set; }
+
+ /// <summary>
+ /// Program validation entry.
+ /// </summary>
+ private readonly struct ProgramEntry
+ {
+ /// <summary>
+ /// Cached shader program.
+ /// </summary>
+ public readonly CachedShaderProgram CachedProgram;
+
+ /// <summary>
+ /// Optional binary code. If not null, it is used instead of the backend host binary.
+ /// </summary>
+ public readonly byte[] BinaryCode;
+
+ /// <summary>
+ /// Program index.
+ /// </summary>
+ public readonly int ProgramIndex;
+
+ /// <summary>
+ /// Indicates if the program is a compute shader.
+ /// </summary>
+ public readonly bool IsCompute;
+
+ /// <summary>
+ /// Indicates if the program is a host binary shader.
+ /// </summary>
+ public readonly bool IsBinary;
+
+ /// <summary>
+ /// Creates a new program validation entry.
+ /// </summary>
+ /// <param name="cachedProgram">Cached shader program</param>
+ /// <param name="binaryCode">Optional binary code. If not null, it is used instead of the backend host binary</param>
+ /// <param name="programIndex">Program index</param>
+ /// <param name="isCompute">Indicates if the program is a compute shader</param>
+ /// <param name="isBinary">Indicates if the program is a host binary shader</param>
+ public ProgramEntry(
+ CachedShaderProgram cachedProgram,
+ byte[] binaryCode,
+ int programIndex,
+ bool isCompute,
+ bool isBinary)
+ {
+ CachedProgram = cachedProgram;
+ BinaryCode = binaryCode;
+ ProgramIndex = programIndex;
+ IsCompute = isCompute;
+ IsBinary = isBinary;
+ }
+ }
+
+ /// <summary>
+ /// Translated shader compilation entry.
+ /// </summary>
+ private readonly struct ProgramCompilation
+ {
+ /// <summary>
+ /// Translated shader stages.
+ /// </summary>
+ public readonly ShaderProgram[] TranslatedStages;
+
+ /// <summary>
+ /// Cached shaders.
+ /// </summary>
+ public readonly CachedShaderStage[] Shaders;
+
+ /// <summary>
+ /// Specialization state.
+ /// </summary>
+ public readonly ShaderSpecializationState SpecializationState;
+
+ /// <summary>
+ /// Program index.
+ /// </summary>
+ public readonly int ProgramIndex;
+
+ /// <summary>
+ /// Indicates if the program is a compute shader.
+ /// </summary>
+ public readonly bool IsCompute;
+
+ /// <summary>
+ /// Creates a new translated shader compilation entry.
+ /// </summary>
+ /// <param name="translatedStages">Translated shader stages</param>
+ /// <param name="shaders">Cached shaders</param>
+ /// <param name="specState">Specialization state</param>
+ /// <param name="programIndex">Program index</param>
+ /// <param name="isCompute">Indicates if the program is a compute shader</param>
+ public ProgramCompilation(
+ ShaderProgram[] translatedStages,
+ CachedShaderStage[] shaders,
+ ShaderSpecializationState specState,
+ int programIndex,
+ bool isCompute)
+ {
+ TranslatedStages = translatedStages;
+ Shaders = shaders;
+ SpecializationState = specState;
+ ProgramIndex = programIndex;
+ IsCompute = isCompute;
+ }
+ }
+
+ /// <summary>
+ /// Program translation entry.
+ /// </summary>
+ private readonly struct AsyncProgramTranslation
+ {
+ /// <summary>
+ /// Guest code for each active stage.
+ /// </summary>
+ public readonly GuestCodeAndCbData?[] GuestShaders;
+
+ /// <summary>
+ /// Specialization state.
+ /// </summary>
+ public readonly ShaderSpecializationState SpecializationState;
+
+ /// <summary>
+ /// Program index.
+ /// </summary>
+ public readonly int ProgramIndex;
+
+ /// <summary>
+ /// Indicates if the program is a compute shader.
+ /// </summary>
+ public readonly bool IsCompute;
+
+ /// <summary>
+ /// Creates a new program translation entry.
+ /// </summary>
+ /// <param name="guestShaders">Guest code for each active stage</param>
+ /// <param name="specState">Specialization state</param>
+ /// <param name="programIndex">Program index</param>
+ /// <param name="isCompute">Indicates if the program is a compute shader</param>
+ public AsyncProgramTranslation(
+ GuestCodeAndCbData?[] guestShaders,
+ ShaderSpecializationState specState,
+ int programIndex,
+ bool isCompute)
+ {
+ GuestShaders = guestShaders;
+ SpecializationState = specState;
+ ProgramIndex = programIndex;
+ IsCompute = isCompute;
+ }
+ }
+
+ private readonly Queue<ProgramEntry> _validationQueue;
+ private readonly ConcurrentQueue<ProgramCompilation> _compilationQueue;
+ private readonly BlockingCollection<AsyncProgramTranslation> _asyncTranslationQueue;
+ private readonly SortedList<int, (CachedShaderProgram, byte[])> _programList;
+
+ private int _backendParallelCompileThreads;
+ private int _compiledCount;
+ private int _totalCount;
+
+ /// <summary>
+ /// Creates a new parallel disk cache loader.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="graphicsCache">Graphics shader cache</param>
+ /// <param name="computeCache">Compute shader cache</param>
+ /// <param name="hostStorage">Disk cache host storage</param>
+ /// <param name="cancellationToken">Cancellation token</param>
+ /// <param name="stateChangeCallback">Function to be called when there is a state change, reporting state, compiled and total shaders count</param>
+ public ParallelDiskCacheLoader(
+ GpuContext context,
+ ShaderCacheHashTable graphicsCache,
+ ComputeShaderCacheHashTable computeCache,
+ DiskCacheHostStorage hostStorage,
+ CancellationToken cancellationToken,
+ Action<ShaderCacheState, int, int> stateChangeCallback)
+ {
+ _context = context;
+ _graphicsCache = graphicsCache;
+ _computeCache = computeCache;
+ _hostStorage = hostStorage;
+ _cancellationToken = cancellationToken;
+ _stateChangeCallback = stateChangeCallback;
+ _validationQueue = new Queue<ProgramEntry>();
+ _compilationQueue = new ConcurrentQueue<ProgramCompilation>();
+ _asyncTranslationQueue = new BlockingCollection<AsyncProgramTranslation>(ThreadCount);
+ _programList = new SortedList<int, (CachedShaderProgram, byte[])>();
+ _backendParallelCompileThreads = Math.Min(Environment.ProcessorCount, 8); // Must be kept in sync with the backend code.
+ }
+
+ /// <summary>
+ /// Loads all shaders from the cache.
+ /// </summary>
+ public void LoadShaders()
+ {
+ Thread[] workThreads = new Thread[ThreadCount];
+
+ for (int index = 0; index < ThreadCount; index++)
+ {
+ workThreads[index] = new Thread(ProcessAsyncQueue)
+ {
+ Name = $"GPU.AsyncTranslationThread.{index}"
+ };
+ }
+
+ int programCount = _hostStorage.GetProgramCount();
+
+ _compiledCount = 0;
+ _totalCount = programCount;
+
+ _stateChangeCallback(ShaderCacheState.Start, 0, programCount);
+
+ Logger.Info?.Print(LogClass.Gpu, $"Loading {programCount} shaders from the cache...");
+
+ for (int index = 0; index < ThreadCount; index++)
+ {
+ workThreads[index].Start(_cancellationToken);
+ }
+
+ try
+ {
+ _hostStorage.LoadShaders(_context, this);
+ }
+ catch (DiskCacheLoadException diskCacheLoadException)
+ {
+ Logger.Warning?.Print(LogClass.Gpu, $"Error loading the shader cache. {diskCacheLoadException.Message}");
+
+ // If we can't even access the file, then we also can't rebuild.
+ if (diskCacheLoadException.Result != DiskCacheLoadResult.NoAccess)
+ {
+ _needsHostRegen = true;
+ }
+ }
+ catch (InvalidDataException invalidDataException)
+ {
+ Logger.Warning?.Print(LogClass.Gpu, $"Error decompressing the shader cache file. {invalidDataException.Message}");
+ _needsHostRegen = true;
+ }
+ catch (IOException ioException)
+ {
+ Logger.Warning?.Print(LogClass.Gpu, $"Error reading the shader cache file. {ioException.Message}");
+ _needsHostRegen = true;
+ }
+
+ _asyncTranslationQueue.CompleteAdding();
+
+ for (int index = 0; index < ThreadCount; index++)
+ {
+ workThreads[index].Join();
+ }
+
+ CheckCompilationBlocking();
+
+ if (_needsHostRegen && Active)
+ {
+ // Rebuild both shared and host cache files.
+ // Rebuilding shared is required because the shader information returned by the translator
+ // might have changed, and so we have to reconstruct the file with the new information.
+ try
+ {
+ _hostStorage.ClearSharedCache();
+ _hostStorage.ClearHostCache(_context);
+
+ if (_programList.Count != 0)
+ {
+ Logger.Info?.Print(LogClass.Gpu, $"Rebuilding {_programList.Count} shaders...");
+
+ using var streams = _hostStorage.GetOutputStreams(_context);
+
+ foreach (var kv in _programList)
+ {
+ if (!Active)
+ {
+ break;
+ }
+
+ (CachedShaderProgram program, byte[] binaryCode) = kv.Value;
+ _hostStorage.AddShader(_context, program, binaryCode, streams);
+ }
+
+ Logger.Info?.Print(LogClass.Gpu, $"Rebuilt {_programList.Count} shaders successfully.");
+ }
+ else
+ {
+ _hostStorage.ClearGuestCache();
+
+ Logger.Info?.Print(LogClass.Gpu, "Shader cache deleted due to corruption.");
+ }
+ }
+ catch (DiskCacheLoadException diskCacheLoadException)
+ {
+ Logger.Warning?.Print(LogClass.Gpu, $"Error deleting the shader cache. {diskCacheLoadException.Message}");
+ }
+ catch (IOException ioException)
+ {
+ Logger.Warning?.Print(LogClass.Gpu, $"Error deleting the shader cache file. {ioException.Message}");
+ }
+ }
+
+ Logger.Info?.Print(LogClass.Gpu, "Shader cache loaded.");
+
+ _stateChangeCallback(ShaderCacheState.Loaded, programCount, programCount);
+ }
+
+ /// <summary>
+ /// Enqueues a host program for compilation.
+ /// </summary>
+ /// <param name="cachedProgram">Cached program</param>
+ /// <param name="binaryCode">Host binary code</param>
+ /// <param name="programIndex">Program index</param>
+ /// <param name="isCompute">Indicates if the program is a compute shader</param>
+ public void QueueHostProgram(CachedShaderProgram cachedProgram, byte[] binaryCode, int programIndex, bool isCompute)
+ {
+ EnqueueForValidation(new ProgramEntry(cachedProgram, binaryCode, programIndex, isCompute, isBinary: true));
+ }
+
+ /// <summary>
+ /// Enqueues a guest program for compilation.
+ /// </summary>
+ /// <param name="guestShaders">Guest code for each active stage</param>
+ /// <param name="specState">Specialization state</param>
+ /// <param name="programIndex">Program index</param>
+ /// <param name="isCompute">Indicates if the program is a compute shader</param>
+ public void QueueGuestProgram(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex, bool isCompute)
+ {
+ try
+ {
+ AsyncProgramTranslation asyncTranslation = new AsyncProgramTranslation(guestShaders, specState, programIndex, isCompute);
+ _asyncTranslationQueue.Add(asyncTranslation, _cancellationToken);
+ }
+ catch (OperationCanceledException)
+ {
+ }
+ }
+
+ /// <summary>
+ /// Check the state of programs that have already been compiled,
+ /// and add to the cache if the compilation was successful.
+ /// </summary>
+ public void CheckCompilation()
+ {
+ ProcessCompilationQueue();
+
+ // Process programs that already finished compiling.
+ // If not yet compiled, do nothing. This avoids blocking to wait for shader compilation.
+ while (_validationQueue.TryPeek(out ProgramEntry entry))
+ {
+ ProgramLinkStatus result = entry.CachedProgram.HostProgram.CheckProgramLink(false);
+
+ if (result != ProgramLinkStatus.Incomplete)
+ {
+ ProcessCompiledProgram(ref entry, result);
+ _validationQueue.Dequeue();
+ }
+ else
+ {
+ break;
+ }
+ }
+ }
+
+ /// <summary>
+ /// Waits until all programs finishes compiling, then adds the ones
+ /// with successful compilation to the cache.
+ /// </summary>
+ private void CheckCompilationBlocking()
+ {
+ ProcessCompilationQueue();
+
+ while (_validationQueue.TryDequeue(out ProgramEntry entry) && Active)
+ {
+ ProcessCompiledProgram(ref entry, entry.CachedProgram.HostProgram.CheckProgramLink(true), asyncCompile: false);
+ }
+ }
+
+ /// <summary>
+ /// Process a compiled program result.
+ /// </summary>
+ /// <param name="entry">Compiled program entry</param>
+ /// <param name="result">Compilation result</param>
+ /// <param name="asyncCompile">For failed host compilations, indicates if a guest compilation should be done asynchronously</param>
+ private void ProcessCompiledProgram(ref ProgramEntry entry, ProgramLinkStatus result, bool asyncCompile = true)
+ {
+ if (result == ProgramLinkStatus.Success)
+ {
+ // Compilation successful, add to memory cache.
+ if (entry.IsCompute)
+ {
+ _computeCache.Add(entry.CachedProgram);
+ }
+ else
+ {
+ _graphicsCache.Add(entry.CachedProgram);
+ }
+
+ if (!entry.IsBinary)
+ {
+ _needsHostRegen = true;
+ }
+
+ // Fetch the binary code from the backend if it isn't already present.
+ byte[] binaryCode = entry.BinaryCode ?? entry.CachedProgram.HostProgram.GetBinary();
+
+ _programList.Add(entry.ProgramIndex, (entry.CachedProgram, binaryCode));
+ SignalCompiled();
+ }
+ else if (entry.IsBinary)
+ {
+ // If this is a host binary and compilation failed,
+ // we still have a chance to recompile from the guest binary.
+ CachedShaderProgram program = entry.CachedProgram;
+
+ GuestCodeAndCbData?[] guestShaders = new GuestCodeAndCbData?[program.Shaders.Length];
+
+ for (int index = 0; index < program.Shaders.Length; index++)
+ {
+ CachedShaderStage shader = program.Shaders[index];
+
+ if (shader != null)
+ {
+ guestShaders[index] = new GuestCodeAndCbData(shader.Code, shader.Cb1Data);
+ }
+ }
+
+ if (asyncCompile)
+ {
+ QueueGuestProgram(guestShaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute);
+ }
+ else
+ {
+ RecompileFromGuestCode(guestShaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute);
+ ProcessCompilationQueue();
+ }
+ }
+ else
+ {
+ // Failed to compile from both host and guest binary.
+ ErrorCount++;
+ SignalCompiled();
+ }
+ }
+
+ /// <summary>
+ /// Processes the queue of translated guest programs that should be compiled on the host.
+ /// </summary>
+ private void ProcessCompilationQueue()
+ {
+ while (_compilationQueue.TryDequeue(out ProgramCompilation compilation) && Active)
+ {
+ ShaderSource[] shaderSources = new ShaderSource[compilation.TranslatedStages.Length];
+
+ int fragmentOutputMap = -1;
+
+ for (int index = 0; index < compilation.TranslatedStages.Length; index++)
+ {
+ ShaderProgram shader = compilation.TranslatedStages[index];
+ shaderSources[index] = CreateShaderSource(shader);
+
+ if (shader.Info.Stage == ShaderStage.Fragment)
+ {
+ fragmentOutputMap = shader.Info.FragmentOutputMap;
+ }
+ }
+
+ ShaderInfo shaderInfo = compilation.SpecializationState.PipelineState.HasValue
+ ? new ShaderInfo(fragmentOutputMap, compilation.SpecializationState.PipelineState.Value, fromCache: true)
+ : new ShaderInfo(fragmentOutputMap, fromCache: true);
+
+ IProgram hostProgram = _context.Renderer.CreateProgram(shaderSources, shaderInfo);
+ CachedShaderProgram program = new CachedShaderProgram(hostProgram, compilation.SpecializationState, compilation.Shaders);
+
+ // Vulkan's binary code is the SPIR-V used for compilation, so it is ready immediately. Other APIs get this after compilation.
+ byte[] binaryCode = _context.Capabilities.Api == TargetApi.Vulkan ? ShaderBinarySerializer.Pack(shaderSources) : null;
+
+ EnqueueForValidation(new ProgramEntry(program, binaryCode, compilation.ProgramIndex, compilation.IsCompute, isBinary: false));
+ }
+ }
+
+ /// <summary>
+ /// Enqueues a program for validation, which will check if the program was compiled successfully.
+ /// </summary>
+ /// <param name="newEntry">Program entry to be validated</param>
+ private void EnqueueForValidation(ProgramEntry newEntry)
+ {
+ _validationQueue.Enqueue(newEntry);
+
+ // Do not allow more than N shader compilation in-flight, where N is the maximum number of threads
+ // the driver will be using for parallel compilation.
+ // Submitting more seems to cause NVIDIA OpenGL driver to crash.
+ if (_validationQueue.Count >= _backendParallelCompileThreads && _validationQueue.TryDequeue(out ProgramEntry entry))
+ {
+ ProcessCompiledProgram(ref entry, entry.CachedProgram.HostProgram.CheckProgramLink(true), asyncCompile: false);
+ }
+ }
+
+ /// <summary>
+ /// Processses the queue of programs that should be translated from guest code.
+ /// </summary>
+ /// <param name="state">Cancellation token</param>
+ private void ProcessAsyncQueue(object state)
+ {
+ CancellationToken ct = (CancellationToken)state;
+
+ try
+ {
+ foreach (AsyncProgramTranslation asyncCompilation in _asyncTranslationQueue.GetConsumingEnumerable(ct))
+ {
+ RecompileFromGuestCode(
+ asyncCompilation.GuestShaders,
+ asyncCompilation.SpecializationState,
+ asyncCompilation.ProgramIndex,
+ asyncCompilation.IsCompute);
+ }
+ }
+ catch (OperationCanceledException)
+ {
+ }
+ }
+
+ /// <summary>
+ /// Recompiles a program from guest code.
+ /// </summary>
+ /// <param name="guestShaders">Guest code for each active stage</param>
+ /// <param name="specState">Specialization state</param>
+ /// <param name="programIndex">Program index</param>
+ /// <param name="isCompute">Indicates if the program is a compute shader</param>
+ private void RecompileFromGuestCode(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex, bool isCompute)
+ {
+ try
+ {
+ if (isCompute)
+ {
+ RecompileComputeFromGuestCode(guestShaders, specState, programIndex);
+ }
+ else
+ {
+ RecompileGraphicsFromGuestCode(guestShaders, specState, programIndex);
+ }
+ }
+ catch (Exception exception)
+ {
+ Logger.Error?.Print(LogClass.Gpu, $"Error translating guest shader. {exception.Message}");
+
+ ErrorCount++;
+ SignalCompiled();
+ }
+ }
+
+ /// <summary>
+ /// Recompiles a graphics program from guest code.
+ /// </summary>
+ /// <param name="guestShaders">Guest code for each active stage</param>
+ /// <param name="specState">Specialization state</param>
+ /// <param name="programIndex">Program index</param>
+ private void RecompileGraphicsFromGuestCode(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex)
+ {
+ ShaderSpecializationState newSpecState = new ShaderSpecializationState(
+ ref specState.GraphicsState,
+ specState.PipelineState,
+ specState.TransformFeedbackDescriptors);
+
+ ResourceCounts counts = new ResourceCounts();
+
+ TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1];
+ TranslatorContext nextStage = null;
+
+ TargetApi api = _context.Capabilities.Api;
+
+ for (int stageIndex = Constants.ShaderStages - 1; stageIndex >= 0; stageIndex--)
+ {
+ if (guestShaders[stageIndex + 1].HasValue)
+ {
+ GuestCodeAndCbData shader = guestShaders[stageIndex + 1].Value;
+
+ byte[] guestCode = shader.Code;
+ byte[] cb1Data = shader.Cb1Data;
+
+ DiskCacheGpuAccessor gpuAccessor = new DiskCacheGpuAccessor(_context, guestCode, cb1Data, specState, newSpecState, counts, stageIndex);
+ TranslatorContext currentStage = DecodeGraphicsShader(gpuAccessor, api, DefaultFlags, 0);
+
+ if (nextStage != null)
+ {
+ currentStage.SetNextStage(nextStage);
+ }
+
+ if (stageIndex == 0 && guestShaders[0].HasValue)
+ {
+ byte[] guestCodeA = guestShaders[0].Value.Code;
+ byte[] cb1DataA = guestShaders[0].Value.Cb1Data;
+
+ DiskCacheGpuAccessor gpuAccessorA = new DiskCacheGpuAccessor(_context, guestCodeA, cb1DataA, specState, newSpecState, counts, 0);
+ translatorContexts[0] = DecodeGraphicsShader(gpuAccessorA, api, DefaultFlags | TranslationFlags.VertexA, 0);
+ }
+
+ translatorContexts[stageIndex + 1] = currentStage;
+ nextStage = currentStage;
+ }
+ }
+
+ if (!_context.Capabilities.SupportsGeometryShader)
+ {
+ ShaderCache.TryRemoveGeometryStage(translatorContexts);
+ }
+
+ CachedShaderStage[] shaders = new CachedShaderStage[guestShaders.Length];
+ List<ShaderProgram> translatedStages = new List<ShaderProgram>();
+
+ TranslatorContext previousStage = null;
+
+ for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++)
+ {
+ TranslatorContext currentStage = translatorContexts[stageIndex + 1];
+
+ if (currentStage != null)
+ {
+ ShaderProgram program;
+
+ byte[] guestCode = guestShaders[stageIndex + 1].Value.Code;
+ byte[] cb1Data = guestShaders[stageIndex + 1].Value.Cb1Data;
+
+ if (stageIndex == 0 && guestShaders[0].HasValue)
+ {
+ program = currentStage.Translate(translatorContexts[0]);
+
+ byte[] guestCodeA = guestShaders[0].Value.Code;
+ byte[] cb1DataA = guestShaders[0].Value.Cb1Data;
+
+ shaders[0] = new CachedShaderStage(null, guestCodeA, cb1DataA);
+ shaders[1] = new CachedShaderStage(program.Info, guestCode, cb1Data);
+ }
+ else
+ {
+ program = currentStage.Translate();
+
+ shaders[stageIndex + 1] = new CachedShaderStage(program.Info, guestCode, cb1Data);
+ }
+
+ if (program != null)
+ {
+ translatedStages.Add(program);
+ }
+
+ previousStage = currentStage;
+ }
+ else if (
+ previousStage != null &&
+ previousStage.LayerOutputWritten &&
+ stageIndex == 3 &&
+ !_context.Capabilities.SupportsLayerVertexTessellation)
+ {
+ translatedStages.Add(previousStage.GenerateGeometryPassthrough());
+ }
+ }
+
+ _compilationQueue.Enqueue(new ProgramCompilation(translatedStages.ToArray(), shaders, newSpecState, programIndex, isCompute: false));
+ }
+
+ /// <summary>
+ /// Recompiles a compute program from guest code.
+ /// </summary>
+ /// <param name="guestShaders">Guest code for each active stage</param>
+ /// <param name="specState">Specialization state</param>
+ /// <param name="programIndex">Program index</param>
+ private void RecompileComputeFromGuestCode(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex)
+ {
+ GuestCodeAndCbData shader = guestShaders[0].Value;
+ ResourceCounts counts = new ResourceCounts();
+ ShaderSpecializationState newSpecState = new ShaderSpecializationState(ref specState.ComputeState);
+ DiskCacheGpuAccessor gpuAccessor = new DiskCacheGpuAccessor(_context, shader.Code, shader.Cb1Data, specState, newSpecState, counts, 0);
+
+ TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, _context.Capabilities.Api, 0);
+
+ ShaderProgram program = translatorContext.Translate();
+
+ CachedShaderStage[] shaders = new[] { new CachedShaderStage(program.Info, shader.Code, shader.Cb1Data) };
+
+ _compilationQueue.Enqueue(new ProgramCompilation(new[] { program }, shaders, newSpecState, programIndex, isCompute: true));
+ }
+
+ /// <summary>
+ /// Signals that compilation of a program has been finished successfully,
+ /// or that it failed and guest recompilation has also been attempted.
+ /// </summary>
+ private void SignalCompiled()
+ {
+ _stateChangeCallback(ShaderCacheState.Loading, ++_compiledCount, _totalCount);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ShaderBinarySerializer.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ShaderBinarySerializer.cs
new file mode 100644
index 00000000..77e52667
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ShaderBinarySerializer.cs
@@ -0,0 +1,66 @@
+using Ryujinx.Common;
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Shader;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
+{
+ static class ShaderBinarySerializer
+ {
+ public static byte[] Pack(ShaderSource[] sources)
+ {
+ using MemoryStream output = MemoryStreamManager.Shared.GetStream();
+
+ output.Write(sources.Length);
+
+ foreach (ShaderSource source in sources)
+ {
+ output.Write((int)source.Stage);
+ output.Write(source.BinaryCode.Length);
+ output.Write(source.BinaryCode);
+ }
+
+ return output.ToArray();
+ }
+
+ public static ShaderSource[] Unpack(CachedShaderStage[] stages, byte[] code)
+ {
+ using MemoryStream input = new MemoryStream(code);
+ using BinaryReader reader = new BinaryReader(input);
+
+ List<ShaderSource> output = new List<ShaderSource>();
+
+ int count = reader.ReadInt32();
+
+ for (int i = 0; i < count; i++)
+ {
+ ShaderStage stage = (ShaderStage)reader.ReadInt32();
+ int binaryCodeLength = reader.ReadInt32();
+ byte[] binaryCode = reader.ReadBytes(binaryCodeLength);
+
+ output.Add(new ShaderSource(binaryCode, GetBindings(stages, stage), stage, TargetLanguage.Spirv));
+ }
+
+ return output.ToArray();
+ }
+
+ private static ShaderBindings GetBindings(CachedShaderStage[] stages, ShaderStage stage)
+ {
+ for (int i = 0; i < stages.Length; i++)
+ {
+ CachedShaderStage currentStage = stages[i];
+
+ if (currentStage?.Info != null && currentStage.Info.Stage == stage)
+ {
+ return ShaderCache.GetBindings(currentStage.Info);
+ }
+ }
+
+ return new ShaderBindings(Array.Empty<int>(), Array.Empty<int>(), Array.Empty<int>(), Array.Empty<int>());
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs
new file mode 100644
index 00000000..3e816733
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs
@@ -0,0 +1,297 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Image;
+using Ryujinx.Graphics.Shader;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ /// <summary>
+ /// Represents a GPU state and memory accessor.
+ /// </summary>
+ class GpuAccessor : GpuAccessorBase, IGpuAccessor
+ {
+ private readonly GpuChannel _channel;
+ private readonly GpuAccessorState _state;
+ private readonly int _stageIndex;
+ private readonly bool _compute;
+ private readonly bool _isVulkan;
+
+ /// <summary>
+ /// Creates a new instance of the GPU state accessor for graphics shader translation.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="state">Current GPU state</param>
+ /// <param name="stageIndex">Graphics shader stage index (0 = Vertex, 4 = Fragment)</param>
+ public GpuAccessor(
+ GpuContext context,
+ GpuChannel channel,
+ GpuAccessorState state,
+ int stageIndex) : base(context, state.ResourceCounts, stageIndex)
+ {
+ _isVulkan = context.Capabilities.Api == TargetApi.Vulkan;
+ _channel = channel;
+ _state = state;
+ _stageIndex = stageIndex;
+ }
+
+ /// <summary>
+ /// Creates a new instance of the GPU state accessor for compute shader translation.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="state">Current GPU state</param>
+ public GpuAccessor(GpuContext context, GpuChannel channel, GpuAccessorState state) : base(context, state.ResourceCounts, 0)
+ {
+ _channel = channel;
+ _state = state;
+ _compute = true;
+ }
+
+ /// <inheritdoc/>
+ public uint ConstantBuffer1Read(int offset)
+ {
+ ulong baseAddress = _compute
+ ? _channel.BufferManager.GetComputeUniformBufferAddress(1)
+ : _channel.BufferManager.GetGraphicsUniformBufferAddress(_stageIndex, 1);
+
+ return _channel.MemoryManager.Physical.Read<uint>(baseAddress + (ulong)offset);
+ }
+
+ /// <inheritdoc/>
+ public void Log(string message)
+ {
+ Logger.Warning?.Print(LogClass.Gpu, $"Shader translator: {message}");
+ }
+
+ /// <inheritdoc/>
+ public ReadOnlySpan<ulong> GetCode(ulong address, int minimumSize)
+ {
+ int size = Math.Max(minimumSize, 0x1000 - (int)(address & 0xfff));
+ return MemoryMarshal.Cast<byte, ulong>(_channel.MemoryManager.GetSpan(address, size));
+ }
+
+ /// <inheritdoc/>
+ public bool QueryAlphaToCoverageDitherEnable()
+ {
+ return _state.GraphicsState.AlphaToCoverageEnable && _state.GraphicsState.AlphaToCoverageDitherEnable;
+ }
+
+ /// <inheritdoc/>
+ public AlphaTestOp QueryAlphaTestCompare()
+ {
+ if (!_isVulkan || !_state.GraphicsState.AlphaTestEnable)
+ {
+ return AlphaTestOp.Always;
+ }
+
+ return _state.GraphicsState.AlphaTestCompare switch
+ {
+ CompareOp.Never or CompareOp.NeverGl => AlphaTestOp.Never,
+ CompareOp.Less or CompareOp.LessGl => AlphaTestOp.Less,
+ CompareOp.Equal or CompareOp.EqualGl => AlphaTestOp.Equal,
+ CompareOp.LessOrEqual or CompareOp.LessOrEqualGl => AlphaTestOp.LessOrEqual,
+ CompareOp.Greater or CompareOp.GreaterGl => AlphaTestOp.Greater,
+ CompareOp.NotEqual or CompareOp.NotEqualGl => AlphaTestOp.NotEqual,
+ CompareOp.GreaterOrEqual or CompareOp.GreaterOrEqualGl => AlphaTestOp.GreaterOrEqual,
+ _ => AlphaTestOp.Always
+ };
+ }
+
+ /// <inheritdoc/>
+ public float QueryAlphaTestReference()
+ {
+ return _state.GraphicsState.AlphaTestReference;
+ }
+
+ /// <inheritdoc/>
+ public AttributeType QueryAttributeType(int location)
+ {
+ return _state.GraphicsState.AttributeTypes[location];
+ }
+
+ /// <inheritdoc/>
+ public AttributeType QueryFragmentOutputType(int location)
+ {
+ return _state.GraphicsState.FragmentOutputTypes[location];
+ }
+
+ /// <inheritdoc/>
+ public int QueryComputeLocalSizeX() => _state.ComputeState.LocalSizeX;
+
+ /// <inheritdoc/>
+ public int QueryComputeLocalSizeY() => _state.ComputeState.LocalSizeY;
+
+ /// <inheritdoc/>
+ public int QueryComputeLocalSizeZ() => _state.ComputeState.LocalSizeZ;
+
+ /// <inheritdoc/>
+ public int QueryComputeLocalMemorySize() => _state.ComputeState.LocalMemorySize;
+
+ /// <inheritdoc/>
+ public int QueryComputeSharedMemorySize() => _state.ComputeState.SharedMemorySize;
+
+ /// <inheritdoc/>
+ public uint QueryConstantBufferUse()
+ {
+ uint useMask = _compute
+ ? _channel.BufferManager.GetComputeUniformBufferUseMask()
+ : _channel.BufferManager.GetGraphicsUniformBufferUseMask(_stageIndex);
+
+ _state.SpecializationState?.RecordConstantBufferUse(_stageIndex, useMask);
+ return useMask;
+ }
+
+ /// <inheritdoc/>
+ public bool QueryHasConstantBufferDrawParameters()
+ {
+ return _state.GraphicsState.HasConstantBufferDrawParameters;
+ }
+
+ /// <inheritdoc/>
+ public bool QueryHasUnalignedStorageBuffer()
+ {
+ return _state.GraphicsState.HasUnalignedStorageBuffer || _state.ComputeState.HasUnalignedStorageBuffer;
+ }
+
+ /// <inheritdoc/>
+ public bool QueryDualSourceBlendEnable()
+ {
+ return _state.GraphicsState.DualSourceBlendEnable;
+ }
+
+ /// <inheritdoc/>
+ public InputTopology QueryPrimitiveTopology()
+ {
+ _state.SpecializationState?.RecordPrimitiveTopology();
+ return ConvertToInputTopology(_state.GraphicsState.Topology, _state.GraphicsState.TessellationMode);
+ }
+
+ /// <inheritdoc/>
+ public bool QueryProgramPointSize()
+ {
+ return _state.GraphicsState.ProgramPointSizeEnable;
+ }
+
+ /// <inheritdoc/>
+ public float QueryPointSize()
+ {
+ return _state.GraphicsState.PointSize;
+ }
+
+ /// <inheritdoc/>
+ public bool QueryTessCw()
+ {
+ return _state.GraphicsState.TessellationMode.UnpackCw();
+ }
+
+ /// <inheritdoc/>
+ public TessPatchType QueryTessPatchType()
+ {
+ return _state.GraphicsState.TessellationMode.UnpackPatchType();
+ }
+
+ /// <inheritdoc/>
+ public TessSpacing QueryTessSpacing()
+ {
+ return _state.GraphicsState.TessellationMode.UnpackSpacing();
+ }
+
+ //// <inheritdoc/>
+ public TextureFormat QueryTextureFormat(int handle, int cbufSlot)
+ {
+ _state.SpecializationState?.RecordTextureFormat(_stageIndex, handle, cbufSlot);
+ var descriptor = GetTextureDescriptor(handle, cbufSlot);
+ return ConvertToTextureFormat(descriptor.UnpackFormat(), descriptor.UnpackSrgb());
+ }
+
+ /// <inheritdoc/>
+ public SamplerType QuerySamplerType(int handle, int cbufSlot)
+ {
+ _state.SpecializationState?.RecordTextureSamplerType(_stageIndex, handle, cbufSlot);
+ return GetTextureDescriptor(handle, cbufSlot).UnpackTextureTarget().ConvertSamplerType();
+ }
+
+ /// <inheritdoc/>
+ public bool QueryTextureCoordNormalized(int handle, int cbufSlot)
+ {
+ _state.SpecializationState?.RecordTextureCoordNormalized(_stageIndex, handle, cbufSlot);
+ return GetTextureDescriptor(handle, cbufSlot).UnpackTextureCoordNormalized();
+ }
+
+ /// <summary>
+ /// Gets the texture descriptor for a given texture on the pool.
+ /// </summary>
+ /// <param name="handle">Index of the texture (this is the word offset of the handle in the constant buffer)</param>
+ /// <param name="cbufSlot">Constant buffer slot for the texture handle</param>
+ /// <returns>Texture descriptor</returns>
+ private Image.TextureDescriptor GetTextureDescriptor(int handle, int cbufSlot)
+ {
+ if (_compute)
+ {
+ return _channel.TextureManager.GetComputeTextureDescriptor(
+ _state.PoolState.TexturePoolGpuVa,
+ _state.PoolState.TextureBufferIndex,
+ _state.PoolState.TexturePoolMaximumId,
+ handle,
+ cbufSlot);
+ }
+ else
+ {
+ return _channel.TextureManager.GetGraphicsTextureDescriptor(
+ _state.PoolState.TexturePoolGpuVa,
+ _state.PoolState.TextureBufferIndex,
+ _state.PoolState.TexturePoolMaximumId,
+ _stageIndex,
+ handle,
+ cbufSlot);
+ }
+ }
+
+ /// <inheritdoc/>
+ public bool QueryTransformDepthMinusOneToOne()
+ {
+ return _state.GraphicsState.DepthMode;
+ }
+
+ /// <inheritdoc/>
+ public bool QueryTransformFeedbackEnabled()
+ {
+ return _state.TransformFeedbackDescriptors != null;
+ }
+
+ /// <inheritdoc/>
+ public ReadOnlySpan<byte> QueryTransformFeedbackVaryingLocations(int bufferIndex)
+ {
+ return _state.TransformFeedbackDescriptors[bufferIndex].AsSpan();
+ }
+
+ /// <inheritdoc/>
+ public int QueryTransformFeedbackStride(int bufferIndex)
+ {
+ return _state.TransformFeedbackDescriptors[bufferIndex].Stride;
+ }
+
+ /// <inheritdoc/>
+ public bool QueryEarlyZForce()
+ {
+ _state.SpecializationState?.RecordEarlyZForce();
+ return _state.GraphicsState.EarlyZForce;
+ }
+
+ /// <inheritdoc/>
+ public bool QueryViewportTransformDisable()
+ {
+ return _state.GraphicsState.ViewportTransformDisable;
+ }
+
+ /// <inheritdoc/>
+ public void RegisterTexture(int handle, int cbufSlot)
+ {
+ _state.SpecializationState?.RegisterTexture(_stageIndex, handle, cbufSlot, GetTextureDescriptor(handle, cbufSlot));
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
new file mode 100644
index 00000000..d35b8d92
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
@@ -0,0 +1,238 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Engine.Threed;
+using Ryujinx.Graphics.Gpu.Image;
+using Ryujinx.Graphics.Shader;
+using Ryujinx.Graphics.Shader.Translation;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ /// <summary>
+ /// GPU accessor.
+ /// </summary>
+ class GpuAccessorBase
+ {
+ private readonly GpuContext _context;
+ private readonly ResourceCounts _resourceCounts;
+ private readonly int _stageIndex;
+
+ /// <summary>
+ /// Creates a new GPU accessor.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ public GpuAccessorBase(GpuContext context, ResourceCounts resourceCounts, int stageIndex)
+ {
+ _context = context;
+ _resourceCounts = resourceCounts;
+ _stageIndex = stageIndex;
+ }
+
+ public int QueryBindingConstantBuffer(int index)
+ {
+ if (_context.Capabilities.Api == TargetApi.Vulkan)
+ {
+ // We need to start counting from 1 since binding 0 is reserved for the support uniform buffer.
+ return GetBindingFromIndex(index, _context.Capabilities.MaximumUniformBuffersPerStage, "Uniform buffer") + 1;
+ }
+ else
+ {
+ return _resourceCounts.UniformBuffersCount++;
+ }
+ }
+
+ public int QueryBindingStorageBuffer(int index)
+ {
+ if (_context.Capabilities.Api == TargetApi.Vulkan)
+ {
+ return GetBindingFromIndex(index, _context.Capabilities.MaximumStorageBuffersPerStage, "Storage buffer");
+ }
+ else
+ {
+ return _resourceCounts.StorageBuffersCount++;
+ }
+ }
+
+ public int QueryBindingTexture(int index, bool isBuffer)
+ {
+ if (_context.Capabilities.Api == TargetApi.Vulkan)
+ {
+ if (isBuffer)
+ {
+ index += (int)_context.Capabilities.MaximumTexturesPerStage;
+ }
+
+ return GetBindingFromIndex(index, _context.Capabilities.MaximumTexturesPerStage * 2, "Texture");
+ }
+ else
+ {
+ return _resourceCounts.TexturesCount++;
+ }
+ }
+
+ public int QueryBindingImage(int index, bool isBuffer)
+ {
+ if (_context.Capabilities.Api == TargetApi.Vulkan)
+ {
+ if (isBuffer)
+ {
+ index += (int)_context.Capabilities.MaximumImagesPerStage;
+ }
+
+ return GetBindingFromIndex(index, _context.Capabilities.MaximumImagesPerStage * 2, "Image");
+ }
+ else
+ {
+ return _resourceCounts.ImagesCount++;
+ }
+ }
+
+ private int GetBindingFromIndex(int index, uint maxPerStage, string resourceName)
+ {
+ if ((uint)index >= maxPerStage)
+ {
+ Logger.Error?.Print(LogClass.Gpu, $"{resourceName} index {index} exceeds per stage limit of {maxPerStage}.");
+ }
+
+ return GetStageIndex() * (int)maxPerStage + index;
+ }
+
+ private int GetStageIndex()
+ {
+ // This is just a simple remapping to ensure that most frequently used shader stages
+ // have the lowest binding numbers.
+ // This is useful because if we need to run on a system with a low limit on the bindings,
+ // then we can still get most games working as the most common shaders will have low binding numbers.
+ return _stageIndex switch
+ {
+ 4 => 1, // Fragment
+ 3 => 2, // Geometry
+ 1 => 3, // Tessellation control
+ 2 => 4, // Tessellation evaluation
+ _ => 0 // Vertex/Compute
+ };
+ }
+
+ public int QueryHostGatherBiasPrecision() => _context.Capabilities.GatherBiasPrecision;
+
+ public bool QueryHostReducedPrecision() => _context.Capabilities.ReduceShaderPrecision;
+
+ public bool QueryHostHasFrontFacingBug() => _context.Capabilities.HasFrontFacingBug;
+
+ public bool QueryHostHasVectorIndexingBug() => _context.Capabilities.HasVectorIndexingBug;
+
+ public int QueryHostStorageBufferOffsetAlignment() => _context.Capabilities.StorageBufferOffsetAlignment;
+
+ public bool QueryHostSupportsBgraFormat() => _context.Capabilities.SupportsBgraFormat;
+
+ public bool QueryHostSupportsFragmentShaderInterlock() => _context.Capabilities.SupportsFragmentShaderInterlock;
+
+ public bool QueryHostSupportsFragmentShaderOrderingIntel() => _context.Capabilities.SupportsFragmentShaderOrderingIntel;
+
+ public bool QueryHostSupportsGeometryShader() => _context.Capabilities.SupportsGeometryShader;
+
+ public bool QueryHostSupportsGeometryShaderPassthrough() => _context.Capabilities.SupportsGeometryShaderPassthrough;
+
+ public bool QueryHostSupportsImageLoadFormatted() => _context.Capabilities.SupportsImageLoadFormatted;
+
+ public bool QueryHostSupportsLayerVertexTessellation() => _context.Capabilities.SupportsLayerVertexTessellation;
+
+ public bool QueryHostSupportsNonConstantTextureOffset() => _context.Capabilities.SupportsNonConstantTextureOffset;
+
+ public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot;
+
+ public bool QueryHostSupportsSnormBufferTextureFormat() => _context.Capabilities.SupportsSnormBufferTextureFormat;
+
+ public bool QueryHostSupportsTextureShadowLod() => _context.Capabilities.SupportsTextureShadowLod;
+
+ public bool QueryHostSupportsViewportIndexVertexTessellation() => _context.Capabilities.SupportsViewportIndexVertexTessellation;
+
+ public bool QueryHostSupportsViewportMask() => _context.Capabilities.SupportsViewportMask;
+
+ /// <summary>
+ /// Converts a packed Maxwell texture format to the shader translator texture format.
+ /// </summary>
+ /// <param name="format">Packed maxwell format</param>
+ /// <param name="formatSrgb">Indicates if the format is sRGB</param>
+ /// <returns>Shader translator texture format</returns>
+ protected static TextureFormat ConvertToTextureFormat(uint format, bool formatSrgb)
+ {
+ if (!FormatTable.TryGetTextureFormat(format, formatSrgb, out FormatInfo formatInfo))
+ {
+ return TextureFormat.Unknown;
+ }
+
+ return formatInfo.Format switch
+ {
+ Format.R8Unorm => TextureFormat.R8Unorm,
+ Format.R8Snorm => TextureFormat.R8Snorm,
+ Format.R8Uint => TextureFormat.R8Uint,
+ Format.R8Sint => TextureFormat.R8Sint,
+ Format.R16Float => TextureFormat.R16Float,
+ Format.R16Unorm => TextureFormat.R16Unorm,
+ Format.R16Snorm => TextureFormat.R16Snorm,
+ Format.R16Uint => TextureFormat.R16Uint,
+ Format.R16Sint => TextureFormat.R16Sint,
+ Format.R32Float => TextureFormat.R32Float,
+ Format.R32Uint => TextureFormat.R32Uint,
+ Format.R32Sint => TextureFormat.R32Sint,
+ Format.R8G8Unorm => TextureFormat.R8G8Unorm,
+ Format.R8G8Snorm => TextureFormat.R8G8Snorm,
+ Format.R8G8Uint => TextureFormat.R8G8Uint,
+ Format.R8G8Sint => TextureFormat.R8G8Sint,
+ Format.R16G16Float => TextureFormat.R16G16Float,
+ Format.R16G16Unorm => TextureFormat.R16G16Unorm,
+ Format.R16G16Snorm => TextureFormat.R16G16Snorm,
+ Format.R16G16Uint => TextureFormat.R16G16Uint,
+ Format.R16G16Sint => TextureFormat.R16G16Sint,
+ Format.R32G32Float => TextureFormat.R32G32Float,
+ Format.R32G32Uint => TextureFormat.R32G32Uint,
+ Format.R32G32Sint => TextureFormat.R32G32Sint,
+ Format.R8G8B8A8Unorm => TextureFormat.R8G8B8A8Unorm,
+ Format.R8G8B8A8Snorm => TextureFormat.R8G8B8A8Snorm,
+ Format.R8G8B8A8Uint => TextureFormat.R8G8B8A8Uint,
+ Format.R8G8B8A8Sint => TextureFormat.R8G8B8A8Sint,
+ Format.R8G8B8A8Srgb => TextureFormat.R8G8B8A8Unorm,
+ Format.R16G16B16A16Float => TextureFormat.R16G16B16A16Float,
+ Format.R16G16B16A16Unorm => TextureFormat.R16G16B16A16Unorm,
+ Format.R16G16B16A16Snorm => TextureFormat.R16G16B16A16Snorm,
+ Format.R16G16B16A16Uint => TextureFormat.R16G16B16A16Uint,
+ Format.R16G16B16A16Sint => TextureFormat.R16G16B16A16Sint,
+ Format.R32G32B32A32Float => TextureFormat.R32G32B32A32Float,
+ Format.R32G32B32A32Uint => TextureFormat.R32G32B32A32Uint,
+ Format.R32G32B32A32Sint => TextureFormat.R32G32B32A32Sint,
+ Format.R10G10B10A2Unorm => TextureFormat.R10G10B10A2Unorm,
+ Format.R10G10B10A2Uint => TextureFormat.R10G10B10A2Uint,
+ Format.R11G11B10Float => TextureFormat.R11G11B10Float,
+ _ => TextureFormat.Unknown
+ };
+ }
+
+ /// <summary>
+ /// Converts the Maxwell primitive topology to the shader translator topology.
+ /// </summary>
+ /// <param name="topology">Maxwell primitive topology</param>
+ /// <param name="tessellationMode">Maxwell tessellation mode</param>
+ /// <returns>Shader translator topology</returns>
+ protected static InputTopology ConvertToInputTopology(PrimitiveTopology topology, TessMode tessellationMode)
+ {
+ return topology switch
+ {
+ PrimitiveTopology.Points => InputTopology.Points,
+ PrimitiveTopology.Lines or
+ PrimitiveTopology.LineLoop or
+ PrimitiveTopology.LineStrip => InputTopology.Lines,
+ PrimitiveTopology.LinesAdjacency or
+ PrimitiveTopology.LineStripAdjacency => InputTopology.LinesAdjacency,
+ PrimitiveTopology.Triangles or
+ PrimitiveTopology.TriangleStrip or
+ PrimitiveTopology.TriangleFan => InputTopology.Triangles,
+ PrimitiveTopology.TrianglesAdjacency or
+ PrimitiveTopology.TriangleStripAdjacency => InputTopology.TrianglesAdjacency,
+ PrimitiveTopology.Patches => tessellationMode.UnpackPatchType() == TessPatchType.Isolines
+ ? InputTopology.Lines
+ : InputTopology.Triangles,
+ _ => InputTopology.Points
+ };
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorState.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorState.cs
new file mode 100644
index 00000000..0e8e979c
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorState.cs
@@ -0,0 +1,61 @@
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ /// <summary>
+ /// State used by the <see cref="GpuAccessor"/>.
+ /// </summary>
+ class GpuAccessorState
+ {
+ /// <summary>
+ /// GPU texture pool state.
+ /// </summary>
+ public readonly GpuChannelPoolState PoolState;
+
+ /// <summary>
+ /// GPU compute state, for compute shaders.
+ /// </summary>
+ public readonly GpuChannelComputeState ComputeState;
+
+ /// <summary>
+ /// GPU graphics state, for vertex, tessellation, geometry and fragment shaders.
+ /// </summary>
+ public readonly GpuChannelGraphicsState GraphicsState;
+
+ /// <summary>
+ /// Shader specialization state (shared by all stages).
+ /// </summary>
+ public readonly ShaderSpecializationState SpecializationState;
+
+ /// <summary>
+ /// Transform feedback information, if the shader uses transform feedback. Otherwise, should be null.
+ /// </summary>
+ public readonly TransformFeedbackDescriptor[] TransformFeedbackDescriptors;
+
+ /// <summary>
+ /// Shader resource counts (shared by all stages).
+ /// </summary>
+ public readonly ResourceCounts ResourceCounts;
+
+ /// <summary>
+ /// Creates a new GPU accessor state.
+ /// </summary>
+ /// <param name="poolState">GPU texture pool state</param>
+ /// <param name="computeState">GPU compute state, for compute shaders</param>
+ /// <param name="graphicsState">GPU graphics state, for vertex, tessellation, geometry and fragment shaders</param>
+ /// <param name="specializationState">Shader specialization state (shared by all stages)</param>
+ /// <param name="transformFeedbackDescriptors">Transform feedback information, if the shader uses transform feedback. Otherwise, should be null</param>
+ public GpuAccessorState(
+ GpuChannelPoolState poolState,
+ GpuChannelComputeState computeState,
+ GpuChannelGraphicsState graphicsState,
+ ShaderSpecializationState specializationState,
+ TransformFeedbackDescriptor[] transformFeedbackDescriptors = null)
+ {
+ PoolState = poolState;
+ GraphicsState = graphicsState;
+ ComputeState = computeState;
+ SpecializationState = specializationState;
+ TransformFeedbackDescriptors = transformFeedbackDescriptors;
+ ResourceCounts = new ResourceCounts();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs
new file mode 100644
index 00000000..b65dd75e
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs
@@ -0,0 +1,65 @@
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ /// <summary>
+ /// State used by the <see cref="GpuAccessor"/>.
+ /// </summary>
+ readonly struct GpuChannelComputeState
+ {
+ // New fields should be added to the end of the struct to keep disk shader cache compatibility.
+
+ /// <summary>
+ /// Local group size X of the compute shader.
+ /// </summary>
+ public readonly int LocalSizeX;
+
+ /// <summary>
+ /// Local group size Y of the compute shader.
+ /// </summary>
+ public readonly int LocalSizeY;
+
+ /// <summary>
+ /// Local group size Z of the compute shader.
+ /// </summary>
+ public readonly int LocalSizeZ;
+
+ /// <summary>
+ /// Local memory size of the compute shader.
+ /// </summary>
+ public readonly int LocalMemorySize;
+
+ /// <summary>
+ /// Shared memory size of the compute shader.
+ /// </summary>
+ public readonly int SharedMemorySize;
+
+ /// <summary>
+ /// Indicates that any storage buffer use is unaligned.
+ /// </summary>
+ public readonly bool HasUnalignedStorageBuffer;
+
+ /// <summary>
+ /// Creates a new GPU compute state.
+ /// </summary>
+ /// <param name="localSizeX">Local group size X of the compute shader</param>
+ /// <param name="localSizeY">Local group size Y of the compute shader</param>
+ /// <param name="localSizeZ">Local group size Z of the compute shader</param>
+ /// <param name="localMemorySize">Local memory size of the compute shader</param>
+ /// <param name="sharedMemorySize">Shared memory size of the compute shader</param>
+ /// <param name="hasUnalignedStorageBuffer">Indicates that any storage buffer use is unaligned</param>
+ public GpuChannelComputeState(
+ int localSizeX,
+ int localSizeY,
+ int localSizeZ,
+ int localMemorySize,
+ int sharedMemorySize,
+ bool hasUnalignedStorageBuffer)
+ {
+ LocalSizeX = localSizeX;
+ LocalSizeY = localSizeY;
+ LocalSizeZ = localSizeZ;
+ LocalMemorySize = localMemorySize;
+ SharedMemorySize = sharedMemorySize;
+ HasUnalignedStorageBuffer = hasUnalignedStorageBuffer;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs
new file mode 100644
index 00000000..5247a096
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs
@@ -0,0 +1,158 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Engine.Threed;
+using Ryujinx.Graphics.Shader;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ /// <summary>
+ /// State used by the <see cref="GpuAccessor"/>.
+ /// </summary>
+ struct GpuChannelGraphicsState
+ {
+ // New fields should be added to the end of the struct to keep disk shader cache compatibility.
+
+ /// <summary>
+ /// Early Z force enable.
+ /// </summary>
+ public bool EarlyZForce;
+
+ /// <summary>
+ /// Primitive topology of current draw.
+ /// </summary>
+ public PrimitiveTopology Topology;
+
+ /// <summary>
+ /// Tessellation mode.
+ /// </summary>
+ public TessMode TessellationMode;
+
+ /// <summary>
+ /// Indicates whether alpha-to-coverage is enabled.
+ /// </summary>
+ public bool AlphaToCoverageEnable;
+
+ /// <summary>
+ /// Indicates whether alpha-to-coverage dithering is enabled.
+ /// </summary>
+ public bool AlphaToCoverageDitherEnable;
+
+ /// <summary>
+ /// Indicates whether the viewport transform is disabled.
+ /// </summary>
+ public bool ViewportTransformDisable;
+
+ /// <summary>
+ /// Depth mode zero to one or minus one to one.
+ /// </summary>
+ public bool DepthMode;
+
+ /// <summary>
+ /// Indicates if the point size is set on the shader or is fixed.
+ /// </summary>
+ public bool ProgramPointSizeEnable;
+
+ /// <summary>
+ /// Point size used if <see cref="ProgramPointSizeEnable" /> is false.
+ /// </summary>
+ public float PointSize;
+
+ /// <summary>
+ /// Indicates whether alpha test is enabled.
+ /// </summary>
+ public bool AlphaTestEnable;
+
+ /// <summary>
+ /// When alpha test is enabled, indicates the comparison that decides if the fragment should be discarded.
+ /// </summary>
+ public CompareOp AlphaTestCompare;
+
+ /// <summary>
+ /// When alpha test is enabled, indicates the value to compare with the fragment output alpha.
+ /// </summary>
+ public float AlphaTestReference;
+
+ /// <summary>
+ /// Type of the vertex attributes consumed by the shader.
+ /// </summary>
+ public Array32<AttributeType> AttributeTypes;
+
+ /// <summary>
+ /// Indicates that the draw is writing the base vertex, base instance and draw index to Constant Buffer 0.
+ /// </summary>
+ public bool HasConstantBufferDrawParameters;
+
+ /// <summary>
+ /// Indicates that any storage buffer use is unaligned.
+ /// </summary>
+ public bool HasUnalignedStorageBuffer;
+
+ /// <summary>
+ /// Type of the fragment shader outputs.
+ /// </summary>
+ public Array8<AttributeType> FragmentOutputTypes;
+
+ /// <summary>
+ /// Indicates whether dual source blend is enabled.
+ /// </summary>
+ public bool DualSourceBlendEnable;
+
+ /// <summary>
+ /// Creates a new GPU graphics state.
+ /// </summary>
+ /// <param name="earlyZForce">Early Z force enable</param>
+ /// <param name="topology">Primitive topology</param>
+ /// <param name="tessellationMode">Tessellation mode</param>
+ /// <param name="alphaToCoverageEnable">Indicates whether alpha-to-coverage is enabled</param>
+ /// <param name="alphaToCoverageDitherEnable">Indicates whether alpha-to-coverage dithering is enabled</param>
+ /// <param name="viewportTransformDisable">Indicates whether the viewport transform is disabled</param>
+ /// <param name="depthMode">Depth mode zero to one or minus one to one</param>
+ /// <param name="programPointSizeEnable">Indicates if the point size is set on the shader or is fixed</param>
+ /// <param name="pointSize">Point size if not set from shader</param>
+ /// <param name="alphaTestEnable">Indicates whether alpha test is enabled</param>
+ /// <param name="alphaTestCompare">When alpha test is enabled, indicates the comparison that decides if the fragment should be discarded</param>
+ /// <param name="alphaTestReference">When alpha test is enabled, indicates the value to compare with the fragment output alpha</param>
+ /// <param name="attributeTypes">Type of the vertex attributes consumed by the shader</param>
+ /// <param name="hasConstantBufferDrawParameters">Indicates that the draw is writing the base vertex, base instance and draw index to Constant Buffer 0</param>
+ /// <param name="hasUnalignedStorageBuffer">Indicates that any storage buffer use is unaligned</param>
+ /// <param name="fragmentOutputTypes">Type of the fragment shader outputs</param>
+ /// <param name="dualSourceBlendEnable">Type of the vertex attributes consumed by the shader</param>
+ public GpuChannelGraphicsState(
+ bool earlyZForce,
+ PrimitiveTopology topology,
+ TessMode tessellationMode,
+ bool alphaToCoverageEnable,
+ bool alphaToCoverageDitherEnable,
+ bool viewportTransformDisable,
+ bool depthMode,
+ bool programPointSizeEnable,
+ float pointSize,
+ bool alphaTestEnable,
+ CompareOp alphaTestCompare,
+ float alphaTestReference,
+ ref Array32<AttributeType> attributeTypes,
+ bool hasConstantBufferDrawParameters,
+ bool hasUnalignedStorageBuffer,
+ ref Array8<AttributeType> fragmentOutputTypes,
+ bool dualSourceBlendEnable)
+ {
+ EarlyZForce = earlyZForce;
+ Topology = topology;
+ TessellationMode = tessellationMode;
+ AlphaToCoverageEnable = alphaToCoverageEnable;
+ AlphaToCoverageDitherEnable = alphaToCoverageDitherEnable;
+ ViewportTransformDisable = viewportTransformDisable;
+ DepthMode = depthMode;
+ ProgramPointSizeEnable = programPointSizeEnable;
+ PointSize = pointSize;
+ AlphaTestEnable = alphaTestEnable;
+ AlphaTestCompare = alphaTestCompare;
+ AlphaTestReference = alphaTestReference;
+ AttributeTypes = attributeTypes;
+ HasConstantBufferDrawParameters = hasConstantBufferDrawParameters;
+ HasUnalignedStorageBuffer = hasUnalignedStorageBuffer;
+ FragmentOutputTypes = fragmentOutputTypes;
+ DualSourceBlendEnable = dualSourceBlendEnable;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelPoolState.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelPoolState.cs
new file mode 100644
index 00000000..1e34c5de
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelPoolState.cs
@@ -0,0 +1,50 @@
+using System;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ /// <summary>
+ /// State used by the <see cref="GpuAccessor"/>.
+ /// </summary>
+ readonly struct GpuChannelPoolState : IEquatable<GpuChannelPoolState>
+ {
+ /// <summary>
+ /// GPU virtual address of the texture pool.
+ /// </summary>
+ public readonly ulong TexturePoolGpuVa;
+
+ /// <summary>
+ /// Maximum ID of the texture pool.
+ /// </summary>
+ public readonly int TexturePoolMaximumId;
+
+ /// <summary>
+ /// Constant buffer slot where the texture handles are located.
+ /// </summary>
+ public readonly int TextureBufferIndex;
+
+ /// <summary>
+ /// Creates a new GPU texture pool state.
+ /// </summary>
+ /// <param name="texturePoolGpuVa">GPU virtual address of the texture pool</param>
+ /// <param name="texturePoolMaximumId">Maximum ID of the texture pool</param>
+ /// <param name="textureBufferIndex">Constant buffer slot where the texture handles are located</param>
+ public GpuChannelPoolState(ulong texturePoolGpuVa, int texturePoolMaximumId, int textureBufferIndex)
+ {
+ TexturePoolGpuVa = texturePoolGpuVa;
+ TexturePoolMaximumId = texturePoolMaximumId;
+ TextureBufferIndex = textureBufferIndex;
+ }
+
+ /// <summary>
+ /// Check if the pool states are equal.
+ /// </summary>
+ /// <param name="other">Pool state to compare with</param>
+ /// <returns>True if they are equal, false otherwise</returns>
+ public bool Equals(GpuChannelPoolState other)
+ {
+ return TexturePoolGpuVa == other.TexturePoolGpuVa &&
+ TexturePoolMaximumId == other.TexturePoolMaximumId &&
+ TextureBufferIndex == other.TextureBufferIndex;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/HashTable/HashState.cs b/src/Ryujinx.Graphics.Gpu/Shader/HashTable/HashState.cs
new file mode 100644
index 00000000..584eefdc
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/HashTable/HashState.cs
@@ -0,0 +1,113 @@
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Shader.HashTable
+{
+ /// <summary>
+ /// State of a hash calculation.
+ /// </summary>
+ struct HashState
+ {
+ // This is using a slightly modified implementation of FastHash64.
+ // Reference: https://github.com/ztanml/fast-hash/blob/master/fasthash.c
+ private const ulong M = 0x880355f21e6d1965UL;
+ private ulong _hash;
+ private int _start;
+
+ /// <summary>
+ /// One shot hash calculation for a given data.
+ /// </summary>
+ /// <param name="data">Data to be hashed</param>
+ /// <returns>Hash of the given data</returns>
+ public static uint CalcHash(ReadOnlySpan<byte> data)
+ {
+ HashState state = new HashState();
+
+ state.Initialize();
+ state.Continue(data);
+ return state.Finalize(data);
+ }
+
+ /// <summary>
+ /// Initializes the hash state.
+ /// </summary>
+ public void Initialize()
+ {
+ _hash = 23;
+ }
+
+ /// <summary>
+ /// Calculates the hash of the given data.
+ /// </summary>
+ /// <remarks>
+ /// The full data must be passed on <paramref name="data"/>.
+ /// If this is not the first time the method is called, then <paramref name="data"/> must start with the data passed on the last call.
+ /// If a smaller slice of the data was already hashed before, only the additional data will be hashed.
+ /// This can be used for additive hashing of data in chuncks.
+ /// </remarks>
+ /// <param name="data">Data to be hashed</param>
+ public void Continue(ReadOnlySpan<byte> data)
+ {
+ ulong h = _hash;
+
+ ReadOnlySpan<ulong> dataAsUlong = MemoryMarshal.Cast<byte, ulong>(data.Slice(_start));
+
+ for (int i = 0; i < dataAsUlong.Length; i++)
+ {
+ ulong value = dataAsUlong[i];
+
+ h ^= Mix(value);
+ h *= M;
+ }
+
+ _hash = h;
+ _start = data.Length & ~7;
+ }
+
+ /// <summary>
+ /// Performs the hash finalization step, and returns the calculated hash.
+ /// </summary>
+ /// <remarks>
+ /// The full data must be passed on <paramref name="data"/>.
+ /// <paramref name="data"/> must start with the data passed on the last call to <see cref="Continue"/>.
+ /// No internal state is changed, so one can still continue hashing data with <see cref="Continue"/>
+ /// after calling this method.
+ /// </remarks>
+ /// <param name="data">Data to be hashed</param>
+ /// <returns>Hash of all the data hashed with this <see cref="HashState"/></returns>
+ public uint Finalize(ReadOnlySpan<byte> data)
+ {
+ ulong h = _hash;
+
+ int remainder = data.Length & 7;
+ if (remainder != 0)
+ {
+ ulong v = 0;
+
+ for (int i = data.Length - remainder; i < data.Length; i++)
+ {
+ v |= (ulong)data[i] << ((i - remainder) * 8);
+ }
+
+ h ^= Mix(v);
+ h *= M;
+ }
+
+ h = Mix(h);
+ return (uint)(h - (h >> 32));
+ }
+
+ /// <summary>
+ /// Hash mix function.
+ /// </summary>
+ /// <param name="h">Hash to mix</param>
+ /// <returns>Mixed hash</returns>
+ private static ulong Mix(ulong h)
+ {
+ h ^= h >> 23;
+ h *= 0x2127599bf4325c37UL;
+ h ^= h >> 47;
+ return h;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/HashTable/IDataAccessor.cs b/src/Ryujinx.Graphics.Gpu/Shader/HashTable/IDataAccessor.cs
new file mode 100644
index 00000000..c982cd9f
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/HashTable/IDataAccessor.cs
@@ -0,0 +1,27 @@
+using System;
+
+namespace Ryujinx.Graphics.Gpu.Shader.HashTable
+{
+ /// <summary>
+ /// Data accessor, used by <see cref="PartitionedHashTable{T}"/> to access data of unknown length.
+ /// </summary>
+ /// <remarks>
+ /// This will be used to access chuncks of data and try finding a match on the table.
+ /// This is necessary because the data size is assumed to be unknown, and so the
+ /// hash table must try to "guess" the size of the data based on the entries on the table.
+ /// </remarks>
+ public interface IDataAccessor
+ {
+ /// <summary>
+ /// Gets a span of shader code at the specified offset, with at most the specified size.
+ /// </summary>
+ /// <remarks>
+ /// This might return a span smaller than the requested <paramref name="length"/> if there's
+ /// no more code available.
+ /// </remarks>
+ /// <param name="offset">Offset in shader code</param>
+ /// <param name="length">Size in bytes</param>
+ /// <returns>Shader code span</returns>
+ ReadOnlySpan<byte> GetSpan(int offset, int length);
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionHashTable.cs b/src/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionHashTable.cs
new file mode 100644
index 00000000..d7cb3d99
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionHashTable.cs
@@ -0,0 +1,451 @@
+using System;
+using System.Collections.Generic;
+using System.Numerics;
+
+namespace Ryujinx.Graphics.Gpu.Shader.HashTable
+{
+ /// <summary>
+ /// Partitioned hash table.
+ /// </summary>
+ /// <typeparam name="T">Hash table entry type</typeparam>
+ class PartitionHashTable<T>
+ {
+ /// <summary>
+ /// Hash table entry.
+ /// </summary>
+ private struct Entry
+ {
+ /// <summary>
+ /// Hash <see cref="OwnSize"/> bytes of <see cref="Data"/>.
+ /// </summary>
+ public readonly uint Hash;
+
+ /// <summary>
+ /// If this entry is only a sub-region of <see cref="Data"/>, this indicates the size in bytes
+ /// of that region. Otherwise, it should be zero.
+ /// </summary>
+ public readonly int OwnSize;
+
+ /// <summary>
+ /// Data used to compute the hash for this entry.
+ /// </summary>
+ /// <remarks>
+ /// To avoid additional allocations, this might be a instance of the full entry data,
+ /// and only a sub-region of it might be actually used by this entry. Such sub-region
+ /// has its size indicated by <see cref="OwnSize"/> in this case.
+ /// </remarks>
+ public readonly byte[] Data;
+
+ /// <summary>
+ /// Item associated with this entry.
+ /// </summary>
+ public T Item;
+
+ /// <summary>
+ /// Indicates if the entry is partial, which means that this entry is only for a sub-region of the data.
+ /// </summary>
+ /// <remarks>
+ /// Partial entries have no items associated with them. They just indicates that the data might be present on
+ /// the table, and one must keep looking for the full entry on other tables of larger data size.
+ /// </remarks>
+ public bool IsPartial => OwnSize != 0;
+
+ /// <summary>
+ /// Creates a new partial hash table entry.
+ /// </summary>
+ /// <param name="hash">Hash of the data</param>
+ /// <param name="ownerData">Full data</param>
+ /// <param name="ownSize">Size of the sub-region of data that belongs to this entry</param>
+ public Entry(uint hash, byte[] ownerData, int ownSize)
+ {
+ Hash = hash;
+ OwnSize = ownSize;
+ Data = ownerData;
+ Item = default;
+ }
+
+ /// <summary>
+ /// Creates a new full hash table entry.
+ /// </summary>
+ /// <param name="hash">Hash of the data</param>
+ /// <param name="data">Data</param>
+ /// <param name="item">Item associated with this entry</param>
+ public Entry(uint hash, byte[] data, T item)
+ {
+ Hash = hash;
+ OwnSize = 0;
+ Data = data;
+ Item = item;
+ }
+
+ /// <summary>
+ /// Gets the data for this entry, either full or partial.
+ /// </summary>
+ /// <returns>Data sub-region</returns>
+ public ReadOnlySpan<byte> GetData()
+ {
+ if (OwnSize != 0)
+ {
+ return new ReadOnlySpan<byte>(Data).Slice(0, OwnSize);
+ }
+
+ return Data;
+ }
+ }
+
+ /// <summary>
+ /// Hash table bucket.
+ /// </summary>
+ private struct Bucket
+ {
+ /// <summary>
+ /// Inline entry, to avoid allocations for the common single entry case.
+ /// </summary>
+ public Entry InlineEntry;
+
+ /// <summary>
+ /// List of additional entries for the not-so-common multiple entries case.
+ /// </summary>
+ public List<Entry> MoreEntries;
+ }
+
+ private Bucket[] _buckets;
+ private int _count;
+
+ /// <summary>
+ /// Total amount of entries on the hash table.
+ /// </summary>
+ public int Count => _count;
+
+ /// <summary>
+ /// Creates a new instance of the partitioned hash table.
+ /// </summary>
+ public PartitionHashTable()
+ {
+ _buckets = Array.Empty<Bucket>();
+ }
+
+ /// <summary>
+ /// Gets an item on the table, or adds a new one if not present.
+ /// </summary>
+ /// <param name="data">Data</param>
+ /// <param name="dataHash">Hash of the data</param>
+ /// <param name="item">Item to be added if not found</param>
+ /// <returns>Existing item if found, or <paramref name="item"/> if not found</returns>
+ public T GetOrAdd(byte[] data, uint dataHash, T item)
+ {
+ if (TryFindItem(dataHash, data, out T existingItem))
+ {
+ return existingItem;
+ }
+
+ Entry entry = new Entry(dataHash, data, item);
+
+ AddToBucket(dataHash, ref entry);
+
+ return item;
+ }
+
+ /// <summary>
+ /// Adds an item to the hash table.
+ /// </summary>
+ /// <param name="data">Data</param>
+ /// <param name="dataHash">Hash of the data</param>
+ /// <param name="item">Item to be added</param>
+ /// <returns>True if the item was added, false due to an item associated with the data already being on the table</returns>
+ public bool Add(byte[] data, uint dataHash, T item)
+ {
+ if (TryFindItem(dataHash, data, out _))
+ {
+ return false;
+ }
+
+ Entry entry = new Entry(dataHash, data, item);
+
+ AddToBucket(dataHash, ref entry);
+
+ return true;
+ }
+
+ /// <summary>
+ /// Adds a partial entry to the hash table.
+ /// </summary>
+ /// <param name="ownerData">Full data</param>
+ /// <param name="ownSize">Size of the sub-region of <paramref name="ownerData"/> used by the partial entry</param>
+ /// <returns>True if added, false otherwise</returns>
+ public bool AddPartial(byte[] ownerData, int ownSize)
+ {
+ ReadOnlySpan<byte> data = new ReadOnlySpan<byte>(ownerData).Slice(0, ownSize);
+
+ return AddPartial(ownerData, HashState.CalcHash(data), ownSize);
+ }
+
+ /// <summary>
+ /// Adds a partial entry to the hash table.
+ /// </summary>
+ /// <param name="ownerData">Full data</param>
+ /// <param name="dataHash">Hash of the data sub-region</param>
+ /// <param name="ownSize">Size of the sub-region of <paramref name="ownerData"/> used by the partial entry</param>
+ /// <returns>True if added, false otherwise</returns>
+ public bool AddPartial(byte[] ownerData, uint dataHash, int ownSize)
+ {
+ ReadOnlySpan<byte> data = new ReadOnlySpan<byte>(ownerData).Slice(0, ownSize);
+
+ if (TryFindItem(dataHash, data, out _))
+ {
+ return false;
+ }
+
+ Entry entry = new Entry(dataHash, ownerData, ownSize);
+
+ AddToBucket(dataHash, ref entry);
+
+ return true;
+ }
+
+ /// <summary>
+ /// Adds entry with a given hash to the table.
+ /// </summary>
+ /// <param name="dataHash">Hash of the entry</param>
+ /// <param name="entry">Entry</param>
+ private void AddToBucket(uint dataHash, ref Entry entry)
+ {
+ int pow2Count = GetPow2Count(++_count);
+ if (pow2Count != _buckets.Length)
+ {
+ Rebuild(pow2Count);
+ }
+
+ ref Bucket bucket = ref GetBucketForHash(dataHash);
+
+ AddToBucket(ref bucket, ref entry);
+ }
+
+ /// <summary>
+ /// Adds an entry to a bucket.
+ /// </summary>
+ /// <param name="bucket">Bucket to add the entry into</param>
+ /// <param name="entry">Entry to be added</param>
+ private void AddToBucket(ref Bucket bucket, ref Entry entry)
+ {
+ if (bucket.InlineEntry.Data == null)
+ {
+ bucket.InlineEntry = entry;
+ }
+ else
+ {
+ (bucket.MoreEntries ??= new List<Entry>()).Add(entry);
+ }
+ }
+
+ /// <summary>
+ /// Creates partial entries on a new hash table for all existing full entries.
+ /// </summary>
+ /// <remarks>
+ /// This should be called every time a new hash table is created, and there are hash
+ /// tables with data sizes that are higher than that of the new table.
+ /// This will then fill the new hash table with "partial" entries of full entries
+ /// on the hash tables with higher size.
+ /// </remarks>
+ /// <param name="newTable">New hash table</param>
+ /// <param name="newEntrySize">Size of the data on the new hash table</param>
+ public void FillPartials(PartitionHashTable<T> newTable, int newEntrySize)
+ {
+ for (int i = 0; i < _buckets.Length; i++)
+ {
+ ref Bucket bucket = ref _buckets[i];
+ ref Entry inlineEntry = ref bucket.InlineEntry;
+
+ if (inlineEntry.Data != null)
+ {
+ if (!inlineEntry.IsPartial)
+ {
+ newTable.AddPartial(inlineEntry.Data, newEntrySize);
+ }
+
+ if (bucket.MoreEntries != null)
+ {
+ foreach (Entry entry in bucket.MoreEntries)
+ {
+ if (entry.IsPartial)
+ {
+ continue;
+ }
+
+ newTable.AddPartial(entry.Data, newEntrySize);
+ }
+ }
+ }
+ }
+ }
+
+ /// <summary>
+ /// Tries to find an item on the table.
+ /// </summary>
+ /// <param name="dataHash">Hash of <paramref name="data"/></param>
+ /// <param name="data">Data to find</param>
+ /// <param name="item">Item associated with the data</param>
+ /// <returns>True if an item was found, false otherwise</returns>
+ private bool TryFindItem(uint dataHash, ReadOnlySpan<byte> data, out T item)
+ {
+ if (_count == 0)
+ {
+ item = default;
+ return false;
+ }
+
+ ref Bucket bucket = ref GetBucketForHash(dataHash);
+
+ if (bucket.InlineEntry.Data != null)
+ {
+ if (bucket.InlineEntry.Hash == dataHash && bucket.InlineEntry.GetData().SequenceEqual(data))
+ {
+ item = bucket.InlineEntry.Item;
+ return true;
+ }
+
+ if (bucket.MoreEntries != null)
+ {
+ foreach (Entry entry in bucket.MoreEntries)
+ {
+ if (entry.Hash == dataHash && entry.GetData().SequenceEqual(data))
+ {
+ item = entry.Item;
+ return true;
+ }
+ }
+ }
+ }
+
+ item = default;
+ return false;
+ }
+
+ /// <summary>
+ /// Indicates the result of a hash table lookup.
+ /// </summary>
+ public enum SearchResult
+ {
+ /// <summary>
+ /// No entry was found, the search must continue on hash tables of lower size.
+ /// </summary>
+ NotFound,
+
+ /// <summary>
+ /// A partial entry was found, the search must continue on hash tables of higher size.
+ /// </summary>
+ FoundPartial,
+
+ /// <summary>
+ /// A full entry was found, the search was concluded and the item can be retrieved.
+ /// </summary>
+ FoundFull
+ }
+
+ /// <summary>
+ /// Tries to find an item on the table.
+ /// </summary>
+ /// <param name="dataAccessor">Data accessor</param>
+ /// <param name="size">Size of the hash table data</param>
+ /// <param name="item">The item on the table, if found, otherwise unmodified</param>
+ /// <param name="data">The data on the table, if found, otherwise unmodified</param>
+ /// <returns>Table lookup result</returns>
+ public SearchResult TryFindItem(scoped ref SmartDataAccessor dataAccessor, int size, scoped ref T item, scoped ref byte[] data)
+ {
+ if (_count == 0)
+ {
+ return SearchResult.NotFound;
+ }
+
+ ReadOnlySpan<byte> dataSpan = dataAccessor.GetSpanAndHash(size, out uint dataHash);
+
+ if (dataSpan.Length != size)
+ {
+ return SearchResult.NotFound;
+ }
+
+ ref Bucket bucket = ref GetBucketForHash(dataHash);
+
+ if (bucket.InlineEntry.Data != null)
+ {
+ if (bucket.InlineEntry.Hash == dataHash && bucket.InlineEntry.GetData().SequenceEqual(dataSpan))
+ {
+ item = bucket.InlineEntry.Item;
+ data = bucket.InlineEntry.Data;
+ return bucket.InlineEntry.IsPartial ? SearchResult.FoundPartial : SearchResult.FoundFull;
+ }
+
+ if (bucket.MoreEntries != null)
+ {
+ foreach (Entry entry in bucket.MoreEntries)
+ {
+ if (entry.Hash == dataHash && entry.GetData().SequenceEqual(dataSpan))
+ {
+ item = entry.Item;
+ data = entry.Data;
+ return entry.IsPartial ? SearchResult.FoundPartial : SearchResult.FoundFull;
+ }
+ }
+ }
+ }
+
+ return SearchResult.NotFound;
+ }
+
+ /// <summary>
+ /// Rebuilds the table for a new count.
+ /// </summary>
+ /// <param name="newPow2Count">New power of two count of the table</param>
+ private void Rebuild(int newPow2Count)
+ {
+ Bucket[] newBuckets = new Bucket[newPow2Count];
+
+ uint mask = (uint)newPow2Count - 1;
+
+ for (int i = 0; i < _buckets.Length; i++)
+ {
+ ref Bucket bucket = ref _buckets[i];
+
+ if (bucket.InlineEntry.Data != null)
+ {
+ AddToBucket(ref newBuckets[(int)(bucket.InlineEntry.Hash & mask)], ref bucket.InlineEntry);
+
+ if (bucket.MoreEntries != null)
+ {
+ foreach (Entry entry in bucket.MoreEntries)
+ {
+ Entry entryCopy = entry;
+ AddToBucket(ref newBuckets[(int)(entry.Hash & mask)], ref entryCopy);
+ }
+ }
+ }
+ }
+
+ _buckets = newBuckets;
+ }
+
+ /// <summary>
+ /// Gets the bucket for a given hash.
+ /// </summary>
+ /// <param name="hash">Data hash</param>
+ /// <returns>Bucket for the hash</returns>
+ private ref Bucket GetBucketForHash(uint hash)
+ {
+ int index = (int)(hash & (_buckets.Length - 1));
+
+ return ref _buckets[index];
+ }
+
+ /// <summary>
+ /// Gets a power of two count from a regular count.
+ /// </summary>
+ /// <param name="count">Count</param>
+ /// <returns>Power of two count</returns>
+ private static int GetPow2Count(int count)
+ {
+ // This returns the nearest power of two that is lower than count.
+ // This was done to optimize memory usage rather than performance.
+ return 1 << BitOperations.Log2((uint)count);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionedHashTable.cs b/src/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionedHashTable.cs
new file mode 100644
index 00000000..e9a4f654
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionedHashTable.cs
@@ -0,0 +1,244 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+namespace Ryujinx.Graphics.Gpu.Shader.HashTable
+{
+ /// <summary>
+ /// Partitioned hash table.
+ /// </summary>
+ /// <typeparam name="T"></typeparam>
+ public class PartitionedHashTable<T>
+ {
+ /// <summary>
+ /// Entry for a given data size.
+ /// </summary>
+ private readonly struct SizeEntry
+ {
+ /// <summary>
+ /// Size for the data that will be stored on the hash table on this entry.
+ /// </summary>
+ public int Size { get; }
+
+ /// <summary>
+ /// Number of entries on the hash table.
+ /// </summary>
+ public int TableCount => _table.Count;
+
+ private readonly PartitionHashTable<T> _table;
+
+ /// <summary>
+ /// Creates an entry for a given size.
+ /// </summary>
+ /// <param name="size">Size of the data to be stored on this entry</param>
+ public SizeEntry(int size)
+ {
+ Size = size;
+ _table = new PartitionHashTable<T>();
+ }
+
+ /// <summary>
+ /// Gets an item for existing data, or adds a new one.
+ /// </summary>
+ /// <param name="data">Data associated with the item</param>
+ /// <param name="dataHash">Hash of <paramref name="data"/></param>
+ /// <param name="item">Item to be added</param>
+ /// <returns>Existing item, or <paramref name="item"/> if not present</returns>
+ public T GetOrAdd(byte[] data, uint dataHash, T item)
+ {
+ Debug.Assert(data.Length == Size);
+ return _table.GetOrAdd(data, dataHash, item);
+ }
+
+ /// <summary>
+ /// Adds a new item.
+ /// </summary>
+ /// <param name="data">Data associated with the item</param>
+ /// <param name="dataHash">Hash of <paramref name="data"/></param>
+ /// <param name="item">Item to be added</param>
+ /// <returns>True if added, false otherwise</returns>
+ public bool Add(byte[] data, uint dataHash, T item)
+ {
+ Debug.Assert(data.Length == Size);
+ return _table.Add(data, dataHash, item);
+ }
+
+ /// <summary>
+ /// Adds a partial entry.
+ /// </summary>
+ /// <param name="ownerData">Full entry data</param>
+ /// <param name="dataHash">Hash of the sub-region of the data that belongs to this entry</param>
+ /// <returns>True if added, false otherwise</returns>
+ public bool AddPartial(byte[] ownerData, uint dataHash)
+ {
+ return _table.AddPartial(ownerData, dataHash, Size);
+ }
+
+ /// <summary>
+ /// Fills a new hash table with "partials" of existing full entries of higher size.
+ /// </summary>
+ /// <param name="newEntry">Entry with the new hash table</param>
+ public void FillPartials(SizeEntry newEntry)
+ {
+ Debug.Assert(newEntry.Size < Size);
+ _table.FillPartials(newEntry._table, newEntry.Size);
+ }
+
+ /// <summary>
+ /// Tries to find an item on the hash table.
+ /// </summary>
+ /// <param name="dataAccessor">Data accessor</param>
+ /// <param name="item">The item on the table, if found, otherwise unmodified</param>
+ /// <param name="data">The data on the table, if found, otherwise unmodified</param>
+ /// <returns>Table lookup result</returns>
+ public PartitionHashTable<T>.SearchResult TryFindItem(scoped ref SmartDataAccessor dataAccessor, scoped ref T item, scoped ref byte[] data)
+ {
+ return _table.TryFindItem(ref dataAccessor, Size, ref item, ref data);
+ }
+ }
+
+ private readonly List<SizeEntry> _sizeTable;
+
+ /// <summary>
+ /// Creates a new partitioned hash table.
+ /// </summary>
+ public PartitionedHashTable()
+ {
+ _sizeTable = new List<SizeEntry>();
+ }
+
+ /// <summary>
+ /// Adds a new item to the table.
+ /// </summary>
+ /// <param name="data">Data</param>
+ /// <param name="item">Item associated with the data</param>
+ public void Add(byte[] data, T item)
+ {
+ GetOrAdd(data, item);
+ }
+
+ /// <summary>
+ /// Gets an existing item from the table, or adds a new one if not present.
+ /// </summary>
+ /// <param name="data">Data</param>
+ /// <param name="item">Item associated with the data</param>
+ /// <returns>Existing item, or <paramref name="item"/> if not present</returns>
+ public T GetOrAdd(byte[] data, T item)
+ {
+ SizeEntry sizeEntry;
+
+ int index = BinarySearch(_sizeTable, data.Length);
+ if (index < _sizeTable.Count && _sizeTable[index].Size == data.Length)
+ {
+ sizeEntry = _sizeTable[index];
+ }
+ else
+ {
+ if (index < _sizeTable.Count && _sizeTable[index].Size < data.Length)
+ {
+ index++;
+ }
+
+ sizeEntry = new SizeEntry(data.Length);
+
+ _sizeTable.Insert(index, sizeEntry);
+
+ for (int i = index + 1; i < _sizeTable.Count; i++)
+ {
+ _sizeTable[i].FillPartials(sizeEntry);
+ }
+ }
+
+ HashState hashState = new HashState();
+ hashState.Initialize();
+
+ for (int i = 0; i < index; i++)
+ {
+ ReadOnlySpan<byte> dataSlice = new ReadOnlySpan<byte>(data).Slice(0, _sizeTable[i].Size);
+ hashState.Continue(dataSlice);
+ _sizeTable[i].AddPartial(data, hashState.Finalize(dataSlice));
+ }
+
+ hashState.Continue(data);
+ return sizeEntry.GetOrAdd(data, hashState.Finalize(data), item);
+ }
+
+ /// <summary>
+ /// Performs binary search on a list of hash tables, each one with a fixed data size.
+ /// </summary>
+ /// <param name="entries">List of hash tables</param>
+ /// <param name="size">Size to search for</param>
+ /// <returns>Index of the hash table with the given size, or nearest one otherwise</returns>
+ private static int BinarySearch(List<SizeEntry> entries, int size)
+ {
+ int left = 0;
+ int middle = 0;
+ int right = entries.Count - 1;
+
+ while (left <= right)
+ {
+ middle = left + ((right - left) >> 1);
+
+ SizeEntry entry = entries[middle];
+
+ if (size == entry.Size)
+ {
+ break;
+ }
+
+ if (size < entry.Size)
+ {
+ right = middle - 1;
+ }
+ else
+ {
+ left = middle + 1;
+ }
+ }
+
+ return middle;
+ }
+
+ /// <summary>
+ /// Tries to find an item on the table.
+ /// </summary>
+ /// <param name="dataAccessor">Data accessor</param>
+ /// <param name="item">Item, if found</param>
+ /// <param name="data">Data, if found</param>
+ /// <returns>True if the item was found on the table, false otherwise</returns>
+ public bool TryFindItem(IDataAccessor dataAccessor, out T item, out byte[] data)
+ {
+ SmartDataAccessor sda = new SmartDataAccessor(dataAccessor);
+
+ item = default;
+ data = null;
+
+ int left = 0;
+ int right = _sizeTable.Count;
+
+ while (left != right)
+ {
+ int index = left + ((right - left) >> 1);
+
+ PartitionHashTable<T>.SearchResult result = _sizeTable[index].TryFindItem(ref sda, ref item, ref data);
+
+ if (result == PartitionHashTable<T>.SearchResult.FoundFull)
+ {
+ return true;
+ }
+
+ if (result == PartitionHashTable<T>.SearchResult.NotFound)
+ {
+ right = index;
+ }
+ else /* if (result == PartitionHashTable<T>.SearchResult.FoundPartial) */
+ {
+ left = index + 1;
+ }
+ }
+
+ data = null;
+ return false;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/HashTable/SmartDataAccessor.cs b/src/Ryujinx.Graphics.Gpu/Shader/HashTable/SmartDataAccessor.cs
new file mode 100644
index 00000000..0632add6
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/HashTable/SmartDataAccessor.cs
@@ -0,0 +1,96 @@
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Shader.HashTable
+{
+ /// <summary>
+ /// Smart data accessor that can cache data and hashes to avoid reading and re-hashing the same memory regions.
+ /// </summary>
+ ref struct SmartDataAccessor
+ {
+ private readonly IDataAccessor _dataAccessor;
+ private ReadOnlySpan<byte> _data;
+ private readonly SortedList<int, HashState> _cachedHashes;
+
+ /// <summary>
+ /// Creates a new smart data accessor.
+ /// </summary>
+ /// <param name="dataAccessor">Data accessor</param>
+ public SmartDataAccessor(IDataAccessor dataAccessor)
+ {
+ _dataAccessor = dataAccessor;
+ _data = ReadOnlySpan<byte>.Empty;
+ _cachedHashes = new SortedList<int, HashState>();
+ }
+
+ /// <summary>
+ /// Get a spans of a given size.
+ /// </summary>
+ /// <remarks>
+ /// The actual length of the span returned depends on the <see cref="IDataAccessor"/>
+ /// and might be less than requested.
+ /// </remarks>
+ /// <param name="length">Size in bytes</param>
+ /// <returns>Span with the requested size</returns>
+ public ReadOnlySpan<byte> GetSpan(int length)
+ {
+ if (_data.Length < length)
+ {
+ _data = _dataAccessor.GetSpan(0, length);
+ }
+ else if (_data.Length > length)
+ {
+ return _data.Slice(0, length);
+ }
+
+ return _data;
+ }
+
+ /// <summary>
+ /// Gets a span of the requested size, and a hash of its data.
+ /// </summary>
+ /// <param name="length">Length of the span</param>
+ /// <param name="hash">Hash of the span data</param>
+ /// <returns>Span of data</returns>
+ public ReadOnlySpan<byte> GetSpanAndHash(int length, out uint hash)
+ {
+ ReadOnlySpan<byte> data = GetSpan(length);
+ hash = data.Length == length ? CalcHashCached(data) : 0;
+ return data;
+ }
+
+ /// <summary>
+ /// Calculates the hash for a requested span.
+ /// This will try to use a cached hash if the data was already accessed before, to avoid re-hashing.
+ /// </summary>
+ /// <param name="data">Data to be hashed</param>
+ /// <returns>Hash of the data</returns>
+ private uint CalcHashCached(ReadOnlySpan<byte> data)
+ {
+ HashState state = default;
+ bool found = false;
+
+ for (int i = _cachedHashes.Count - 1; i >= 0; i--)
+ {
+ int cachedHashSize = _cachedHashes.Keys[i];
+
+ if (cachedHashSize < data.Length)
+ {
+ state = _cachedHashes.Values[i];
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ {
+ state = new HashState();
+ state.Initialize();
+ }
+
+ state.Continue(data);
+ _cachedHashes[data.Length & ~7] = state;
+ return state.Finalize(data);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ResourceCounts.cs b/src/Ryujinx.Graphics.Gpu/Shader/ResourceCounts.cs
new file mode 100644
index 00000000..b85423cb
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/ResourceCounts.cs
@@ -0,0 +1,36 @@
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ /// <summary>
+ /// Holds counts for the resources used by a shader.
+ /// </summary>
+ class ResourceCounts
+ {
+ /// <summary>
+ /// Total of uniform buffers used by the shaders.
+ /// </summary>
+ public int UniformBuffersCount;
+
+ /// <summary>
+ /// Total of storage buffers used by the shaders.
+ /// </summary>
+ public int StorageBuffersCount;
+
+ /// <summary>
+ /// Total of textures used by the shaders.
+ /// </summary>
+ public int TexturesCount;
+
+ /// <summary>
+ /// Total of images used by the shaders.
+ /// </summary>
+ public int ImagesCount;
+
+ /// <summary>
+ /// Creates a new instance of the shader resource counts class.
+ /// </summary>
+ public ResourceCounts()
+ {
+ UniformBuffersCount = 1; // The first binding is reserved for the support buffer.
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderAddresses.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderAddresses.cs
new file mode 100644
index 00000000..651dfd26
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderAddresses.cs
@@ -0,0 +1,64 @@
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ /// <summary>
+ /// Shader code addresses in memory for each shader stage.
+ /// </summary>
+ struct ShaderAddresses : IEquatable<ShaderAddresses>
+ {
+#pragma warning disable CS0649
+ public ulong VertexA;
+ public ulong VertexB;
+ public ulong TessControl;
+ public ulong TessEvaluation;
+ public ulong Geometry;
+ public ulong Fragment;
+#pragma warning restore CS0649
+
+ /// <summary>
+ /// Check if the addresses are equal.
+ /// </summary>
+ /// <param name="other">Shader addresses structure to compare with</param>
+ /// <returns>True if they are equal, false otherwise</returns>
+ public override bool Equals(object other)
+ {
+ return other is ShaderAddresses addresses && Equals(addresses);
+ }
+
+ /// <summary>
+ /// Check if the addresses are equal.
+ /// </summary>
+ /// <param name="other">Shader addresses structure to compare with</param>
+ /// <returns>True if they are equal, false otherwise</returns>
+ public bool Equals(ShaderAddresses other)
+ {
+ return VertexA == other.VertexA &&
+ VertexB == other.VertexB &&
+ TessControl == other.TessControl &&
+ TessEvaluation == other.TessEvaluation &&
+ Geometry == other.Geometry &&
+ Fragment == other.Fragment;
+ }
+
+ /// <summary>
+ /// Computes hash code from the addresses.
+ /// </summary>
+ /// <returns>Hash code</returns>
+ public override int GetHashCode()
+ {
+ return HashCode.Combine(VertexA, VertexB, TessControl, TessEvaluation, Geometry, Fragment);
+ }
+
+ /// <summary>
+ /// Gets a view of the structure as a span of addresses.
+ /// </summary>
+ /// <returns>Span of addresses</returns>
+ public Span<ulong> AsSpan()
+ {
+ return MemoryMarshal.CreateSpan(ref VertexA, Unsafe.SizeOf<ShaderAddresses>() / sizeof(ulong));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
new file mode 100644
index 00000000..e1ab9327
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
@@ -0,0 +1,774 @@
+using Ryujinx.Common.Configuration;
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Engine.Threed;
+using Ryujinx.Graphics.Gpu.Engine.Types;
+using Ryujinx.Graphics.Gpu.Image;
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Gpu.Shader.DiskCache;
+using Ryujinx.Graphics.Shader;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Threading;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ /// <summary>
+ /// Memory cache of shader code.
+ /// </summary>
+ class ShaderCache : IDisposable
+ {
+ /// <summary>
+ /// Default flags used on the shader translation process.
+ /// </summary>
+ public const TranslationFlags DefaultFlags = TranslationFlags.DebugMode;
+
+ private readonly struct TranslatedShader
+ {
+ public readonly CachedShaderStage Shader;
+ public readonly ShaderProgram Program;
+
+ public TranslatedShader(CachedShaderStage shader, ShaderProgram program)
+ {
+ Shader = shader;
+ Program = program;
+ }
+ }
+
+ private readonly struct TranslatedShaderVertexPair
+ {
+ public readonly CachedShaderStage VertexA;
+ public readonly CachedShaderStage VertexB;
+ public readonly ShaderProgram Program;
+
+ public TranslatedShaderVertexPair(CachedShaderStage vertexA, CachedShaderStage vertexB, ShaderProgram program)
+ {
+ VertexA = vertexA;
+ VertexB = vertexB;
+ Program = program;
+ }
+ }
+
+ private readonly GpuContext _context;
+
+ private readonly ShaderDumper _dumper;
+
+ private readonly Dictionary<ulong, CachedShaderProgram> _cpPrograms;
+ private readonly Dictionary<ShaderAddresses, CachedShaderProgram> _gpPrograms;
+
+ private readonly struct ProgramToSave
+ {
+ public readonly CachedShaderProgram CachedProgram;
+ public readonly IProgram HostProgram;
+ public readonly byte[] BinaryCode;
+
+ public ProgramToSave(CachedShaderProgram cachedProgram, IProgram hostProgram, byte[] binaryCode)
+ {
+ CachedProgram = cachedProgram;
+ HostProgram = hostProgram;
+ BinaryCode = binaryCode;
+ }
+ }
+
+ private Queue<ProgramToSave> _programsToSaveQueue;
+
+ private readonly ComputeShaderCacheHashTable _computeShaderCache;
+ private readonly ShaderCacheHashTable _graphicsShaderCache;
+ private readonly DiskCacheHostStorage _diskCacheHostStorage;
+ private readonly BackgroundDiskCacheWriter _cacheWriter;
+
+ /// <summary>
+ /// Event for signalling shader cache loading progress.
+ /// </summary>
+ public event Action<ShaderCacheState, int, int> ShaderCacheStateChanged;
+
+ /// <summary>
+ /// Creates a new instance of the shader cache.
+ /// </summary>
+ /// <param name="context">GPU context that the shader cache belongs to</param>
+ public ShaderCache(GpuContext context)
+ {
+ _context = context;
+
+ _dumper = new ShaderDumper();
+
+ _cpPrograms = new Dictionary<ulong, CachedShaderProgram>();
+ _gpPrograms = new Dictionary<ShaderAddresses, CachedShaderProgram>();
+
+ _programsToSaveQueue = new Queue<ProgramToSave>();
+
+ string diskCacheTitleId = GetDiskCachePath();
+
+ _computeShaderCache = new ComputeShaderCacheHashTable();
+ _graphicsShaderCache = new ShaderCacheHashTable();
+ _diskCacheHostStorage = new DiskCacheHostStorage(diskCacheTitleId);
+
+ if (_diskCacheHostStorage.CacheEnabled)
+ {
+ _cacheWriter = new BackgroundDiskCacheWriter(context, _diskCacheHostStorage);
+ }
+ }
+
+ /// <summary>
+ /// Gets the path where the disk cache for the current application is stored.
+ /// </summary>
+ private static string GetDiskCachePath()
+ {
+ return GraphicsConfig.EnableShaderCache && GraphicsConfig.TitleId != null
+ ? Path.Combine(AppDataManager.GamesDirPath, GraphicsConfig.TitleId, "cache", "shader")
+ : null;
+ }
+
+ /// <summary>
+ /// Processes the queue of shaders that must save their binaries to the disk cache.
+ /// </summary>
+ public void ProcessShaderCacheQueue()
+ {
+ // Check to see if the binaries for previously compiled shaders are ready, and save them out.
+
+ while (_programsToSaveQueue.TryPeek(out ProgramToSave programToSave))
+ {
+ ProgramLinkStatus result = programToSave.HostProgram.CheckProgramLink(false);
+
+ if (result != ProgramLinkStatus.Incomplete)
+ {
+ if (result == ProgramLinkStatus.Success)
+ {
+ _cacheWriter.AddShader(programToSave.CachedProgram, programToSave.BinaryCode ?? programToSave.HostProgram.GetBinary());
+ }
+
+ _programsToSaveQueue.Dequeue();
+ }
+ else
+ {
+ break;
+ }
+ }
+ }
+
+ /// <summary>
+ /// Initialize the cache.
+ /// </summary>
+ /// <param name="cancellationToken">Cancellation token to cancel the shader cache initialization process</param>
+ internal void Initialize(CancellationToken cancellationToken)
+ {
+ if (_diskCacheHostStorage.CacheEnabled)
+ {
+ ParallelDiskCacheLoader loader = new ParallelDiskCacheLoader(
+ _context,
+ _graphicsShaderCache,
+ _computeShaderCache,
+ _diskCacheHostStorage,
+ cancellationToken,
+ ShaderCacheStateUpdate);
+
+ loader.LoadShaders();
+
+ int errorCount = loader.ErrorCount;
+ if (errorCount != 0)
+ {
+ Logger.Warning?.Print(LogClass.Gpu, $"Failed to load {errorCount} shaders from the disk cache.");
+ }
+ }
+ }
+
+ /// <summary>
+ /// Shader cache state update handler.
+ /// </summary>
+ /// <param name="state">Current state of the shader cache load process</param>
+ /// <param name="current">Number of the current shader being processed</param>
+ /// <param name="total">Total number of shaders to process</param>
+ private void ShaderCacheStateUpdate(ShaderCacheState state, int current, int total)
+ {
+ ShaderCacheStateChanged?.Invoke(state, current, total);
+ }
+
+ /// <summary>
+ /// Gets a compute shader from the cache.
+ /// </summary>
+ /// <remarks>
+ /// This automatically translates, compiles and adds the code to the cache if not present.
+ /// </remarks>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="poolState">Texture pool state</param>
+ /// <param name="computeState">Compute engine state</param>
+ /// <param name="gpuVa">GPU virtual address of the binary shader code</param>
+ /// <returns>Compiled compute shader code</returns>
+ public CachedShaderProgram GetComputeShader(
+ GpuChannel channel,
+ GpuChannelPoolState poolState,
+ GpuChannelComputeState computeState,
+ ulong gpuVa)
+ {
+ if (_cpPrograms.TryGetValue(gpuVa, out var cpShader) && IsShaderEqual(channel, poolState, computeState, cpShader, gpuVa))
+ {
+ return cpShader;
+ }
+
+ if (_computeShaderCache.TryFind(channel, poolState, computeState, gpuVa, out cpShader, out byte[] cachedGuestCode))
+ {
+ _cpPrograms[gpuVa] = cpShader;
+ return cpShader;
+ }
+
+ ShaderSpecializationState specState = new ShaderSpecializationState(ref computeState);
+ GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, computeState, default, specState);
+ GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState);
+
+ TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, _context.Capabilities.Api, gpuVa);
+
+ TranslatedShader translatedShader = TranslateShader(_dumper, channel, translatorContext, cachedGuestCode);
+
+ ShaderSource[] shaderSourcesArray = new ShaderSource[] { CreateShaderSource(translatedShader.Program) };
+
+ IProgram hostProgram = _context.Renderer.CreateProgram(shaderSourcesArray, new ShaderInfo(-1));
+
+ cpShader = new CachedShaderProgram(hostProgram, specState, translatedShader.Shader);
+
+ _computeShaderCache.Add(cpShader);
+ EnqueueProgramToSave(cpShader, hostProgram, shaderSourcesArray);
+ _cpPrograms[gpuVa] = cpShader;
+
+ return cpShader;
+ }
+
+ /// <summary>
+ /// Updates the shader pipeline state based on the current GPU state.
+ /// </summary>
+ /// <param name="state">Current GPU 3D engine state</param>
+ /// <param name="pipeline">Shader pipeline state to be updated</param>
+ /// <param name="graphicsState">Current graphics state</param>
+ /// <param name="channel">Current GPU channel</param>
+ private void UpdatePipelineInfo(
+ ref ThreedClassState state,
+ ref ProgramPipelineState pipeline,
+ GpuChannelGraphicsState graphicsState,
+ GpuChannel channel)
+ {
+ channel.TextureManager.UpdateRenderTargets();
+
+ var rtControl = state.RtControl;
+ var msaaMode = state.RtMsaaMode;
+
+ pipeline.SamplesCount = msaaMode.SamplesInX() * msaaMode.SamplesInY();
+
+ int count = rtControl.UnpackCount();
+
+ for (int index = 0; index < Constants.TotalRenderTargets; index++)
+ {
+ int rtIndex = rtControl.UnpackPermutationIndex(index);
+
+ var colorState = state.RtColorState[rtIndex];
+
+ if (index >= count || colorState.Format == 0 || colorState.WidthOrStride == 0)
+ {
+ pipeline.AttachmentEnable[index] = false;
+ pipeline.AttachmentFormats[index] = Format.R8G8B8A8Unorm;
+ }
+ else
+ {
+ pipeline.AttachmentEnable[index] = true;
+ pipeline.AttachmentFormats[index] = colorState.Format.Convert().Format;
+ }
+ }
+
+ pipeline.DepthStencilEnable = state.RtDepthStencilEnable;
+ pipeline.DepthStencilFormat = pipeline.DepthStencilEnable ? state.RtDepthStencilState.Format.Convert().Format : Format.D24UnormS8Uint;
+
+ pipeline.VertexBufferCount = Constants.TotalVertexBuffers;
+ pipeline.Topology = graphicsState.Topology;
+ }
+
+ /// <summary>
+ /// Gets a graphics shader program from the shader cache.
+ /// This includes all the specified shader stages.
+ /// </summary>
+ /// <remarks>
+ /// This automatically translates, compiles and adds the code to the cache if not present.
+ /// </remarks>
+ /// <param name="state">GPU state</param>
+ /// <param name="pipeline">Pipeline state</param>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="poolState">Texture pool state</param>
+ /// <param name="graphicsState">3D engine state</param>
+ /// <param name="addresses">Addresses of the shaders for each stage</param>
+ /// <returns>Compiled graphics shader code</returns>
+ public CachedShaderProgram GetGraphicsShader(
+ ref ThreedClassState state,
+ ref ProgramPipelineState pipeline,
+ GpuChannel channel,
+ ref GpuChannelPoolState poolState,
+ ref GpuChannelGraphicsState graphicsState,
+ ShaderAddresses addresses)
+ {
+ if (_gpPrograms.TryGetValue(addresses, out var gpShaders) && IsShaderEqual(channel, ref poolState, ref graphicsState, gpShaders, addresses))
+ {
+ return gpShaders;
+ }
+
+ if (_graphicsShaderCache.TryFind(channel, ref poolState, ref graphicsState, addresses, out gpShaders, out var cachedGuestCode))
+ {
+ _gpPrograms[addresses] = gpShaders;
+ return gpShaders;
+ }
+
+ TransformFeedbackDescriptor[] transformFeedbackDescriptors = GetTransformFeedbackDescriptors(ref state);
+
+ UpdatePipelineInfo(ref state, ref pipeline, graphicsState, channel);
+
+ ShaderSpecializationState specState = new ShaderSpecializationState(ref graphicsState, ref pipeline, transformFeedbackDescriptors);
+ GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, default, graphicsState, specState, transformFeedbackDescriptors);
+
+ ReadOnlySpan<ulong> addressesSpan = addresses.AsSpan();
+
+ TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1];
+ TranslatorContext nextStage = null;
+
+ TargetApi api = _context.Capabilities.Api;
+
+ for (int stageIndex = Constants.ShaderStages - 1; stageIndex >= 0; stageIndex--)
+ {
+ ulong gpuVa = addressesSpan[stageIndex + 1];
+
+ if (gpuVa != 0)
+ {
+ GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState, stageIndex);
+ TranslatorContext currentStage = DecodeGraphicsShader(gpuAccessor, api, DefaultFlags, gpuVa);
+
+ if (nextStage != null)
+ {
+ currentStage.SetNextStage(nextStage);
+ }
+
+ if (stageIndex == 0 && addresses.VertexA != 0)
+ {
+ translatorContexts[0] = DecodeGraphicsShader(gpuAccessor, api, DefaultFlags | TranslationFlags.VertexA, addresses.VertexA);
+ }
+
+ translatorContexts[stageIndex + 1] = currentStage;
+ nextStage = currentStage;
+ }
+ }
+
+ if (!_context.Capabilities.SupportsGeometryShader)
+ {
+ TryRemoveGeometryStage(translatorContexts);
+ }
+
+ CachedShaderStage[] shaders = new CachedShaderStage[Constants.ShaderStages + 1];
+ List<ShaderSource> shaderSources = new List<ShaderSource>();
+
+ TranslatorContext previousStage = null;
+
+ for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++)
+ {
+ TranslatorContext currentStage = translatorContexts[stageIndex + 1];
+
+ if (currentStage != null)
+ {
+ ShaderProgram program;
+
+ if (stageIndex == 0 && translatorContexts[0] != null)
+ {
+ TranslatedShaderVertexPair translatedShader = TranslateShader(
+ _dumper,
+ channel,
+ currentStage,
+ translatorContexts[0],
+ cachedGuestCode.VertexACode,
+ cachedGuestCode.VertexBCode);
+
+ shaders[0] = translatedShader.VertexA;
+ shaders[1] = translatedShader.VertexB;
+ program = translatedShader.Program;
+ }
+ else
+ {
+ byte[] code = cachedGuestCode.GetByIndex(stageIndex);
+
+ TranslatedShader translatedShader = TranslateShader(_dumper, channel, currentStage, code);
+
+ shaders[stageIndex + 1] = translatedShader.Shader;
+ program = translatedShader.Program;
+ }
+
+ if (program != null)
+ {
+ shaderSources.Add(CreateShaderSource(program));
+ }
+
+ previousStage = currentStage;
+ }
+ else if (
+ previousStage != null &&
+ previousStage.LayerOutputWritten &&
+ stageIndex == 3 &&
+ !_context.Capabilities.SupportsLayerVertexTessellation)
+ {
+ shaderSources.Add(CreateShaderSource(previousStage.GenerateGeometryPassthrough()));
+ }
+ }
+
+ ShaderSource[] shaderSourcesArray = shaderSources.ToArray();
+
+ int fragmentOutputMap = shaders[5]?.Info.FragmentOutputMap ?? -1;
+ IProgram hostProgram = _context.Renderer.CreateProgram(shaderSourcesArray, new ShaderInfo(fragmentOutputMap, pipeline));
+
+ gpShaders = new CachedShaderProgram(hostProgram, specState, shaders);
+
+ _graphicsShaderCache.Add(gpShaders);
+ EnqueueProgramToSave(gpShaders, hostProgram, shaderSourcesArray);
+ _gpPrograms[addresses] = gpShaders;
+
+ return gpShaders;
+ }
+
+ /// <summary>
+ /// Tries to eliminate the geometry stage from the array of translator contexts.
+ /// </summary>
+ /// <param name="translatorContexts">Array of translator contexts</param>
+ public static void TryRemoveGeometryStage(TranslatorContext[] translatorContexts)
+ {
+ if (translatorContexts[4] != null)
+ {
+ // We have a geometry shader, but geometry shaders are not supported.
+ // Try to eliminate the geometry shader.
+
+ ShaderProgramInfo info = translatorContexts[4].Translate().Info;
+
+ if (info.Identification == ShaderIdentification.GeometryLayerPassthrough)
+ {
+ // We managed to identify that this geometry shader is only used to set the output Layer value,
+ // we can set the Layer on the previous stage instead (usually the vertex stage) and eliminate it.
+
+ for (int i = 3; i >= 1; i--)
+ {
+ if (translatorContexts[i] != null)
+ {
+ translatorContexts[i].SetGeometryShaderLayerInputAttribute(info.GpLayerInputAttribute);
+ translatorContexts[i].SetLastInVertexPipeline();
+ break;
+ }
+ }
+
+ translatorContexts[4] = null;
+ }
+ }
+ }
+
+ /// <summary>
+ /// Creates a shader source for use with the backend from a translated shader program.
+ /// </summary>
+ /// <param name="program">Translated shader program</param>
+ /// <returns>Shader source</returns>
+ public static ShaderSource CreateShaderSource(ShaderProgram program)
+ {
+ return new ShaderSource(program.Code, program.BinaryCode, GetBindings(program.Info), program.Info.Stage, program.Language);
+ }
+
+ /// <summary>
+ /// Puts a program on the queue of programs to be saved on the disk cache.
+ /// </summary>
+ /// <remarks>
+ /// This will not do anything if disk shader cache is disabled.
+ /// </remarks>
+ /// <param name="program">Cached shader program</param>
+ /// <param name="hostProgram">Host program</param>
+ /// <param name="sources">Source for each shader stage</param>
+ private void EnqueueProgramToSave(CachedShaderProgram program, IProgram hostProgram, ShaderSource[] sources)
+ {
+ if (_diskCacheHostStorage.CacheEnabled)
+ {
+ byte[] binaryCode = _context.Capabilities.Api == TargetApi.Vulkan ? ShaderBinarySerializer.Pack(sources) : null;
+ ProgramToSave programToSave = new ProgramToSave(program, hostProgram, binaryCode);
+
+ _programsToSaveQueue.Enqueue(programToSave);
+ }
+ }
+
+ /// <summary>
+ /// Gets transform feedback state from the current GPU state.
+ /// </summary>
+ /// <param name="state">Current GPU state</param>
+ /// <returns>Four transform feedback descriptors for the enabled TFBs, or null if TFB is disabled</returns>
+ private static TransformFeedbackDescriptor[] GetTransformFeedbackDescriptors(ref ThreedClassState state)
+ {
+ bool tfEnable = state.TfEnable;
+ if (!tfEnable)
+ {
+ return null;
+ }
+
+ TransformFeedbackDescriptor[] descs = new TransformFeedbackDescriptor[Constants.TotalTransformFeedbackBuffers];
+
+ for (int i = 0; i < Constants.TotalTransformFeedbackBuffers; i++)
+ {
+ var tf = state.TfState[i];
+
+ descs[i] = new TransformFeedbackDescriptor(
+ tf.BufferIndex,
+ tf.Stride,
+ tf.VaryingsCount,
+ ref state.TfVaryingLocations[i]);
+ }
+
+ return descs;
+ }
+
+ /// <summary>
+ /// Checks if compute shader code in memory is equal to the cached shader.
+ /// </summary>
+ /// <param name="channel">GPU channel using the shader</param>
+ /// <param name="poolState">GPU channel state to verify shader compatibility</param>
+ /// <param name="computeState">GPU channel compute state to verify shader compatibility</param>
+ /// <param name="cpShader">Cached compute shader</param>
+ /// <param name="gpuVa">GPU virtual address of the shader code in memory</param>
+ /// <returns>True if the code is different, false otherwise</returns>
+ private static bool IsShaderEqual(
+ GpuChannel channel,
+ GpuChannelPoolState poolState,
+ GpuChannelComputeState computeState,
+ CachedShaderProgram cpShader,
+ ulong gpuVa)
+ {
+ if (IsShaderEqual(channel.MemoryManager, cpShader.Shaders[0], gpuVa))
+ {
+ return cpShader.SpecializationState.MatchesCompute(channel, ref poolState, computeState, true);
+ }
+
+ return false;
+ }
+
+ /// <summary>
+ /// Checks if graphics shader code from all stages in memory are equal to the cached shaders.
+ /// </summary>
+ /// <param name="channel">GPU channel using the shader</param>
+ /// <param name="poolState">GPU channel state to verify shader compatibility</param>
+ /// <param name="graphicsState">GPU channel graphics state to verify shader compatibility</param>
+ /// <param name="gpShaders">Cached graphics shaders</param>
+ /// <param name="addresses">GPU virtual addresses of all enabled shader stages</param>
+ /// <returns>True if the code is different, false otherwise</returns>
+ private static bool IsShaderEqual(
+ GpuChannel channel,
+ ref GpuChannelPoolState poolState,
+ ref GpuChannelGraphicsState graphicsState,
+ CachedShaderProgram gpShaders,
+ ShaderAddresses addresses)
+ {
+ ReadOnlySpan<ulong> addressesSpan = addresses.AsSpan();
+
+ for (int stageIndex = 0; stageIndex < gpShaders.Shaders.Length; stageIndex++)
+ {
+ CachedShaderStage shader = gpShaders.Shaders[stageIndex];
+
+ ulong gpuVa = addressesSpan[stageIndex];
+
+ if (!IsShaderEqual(channel.MemoryManager, shader, gpuVa))
+ {
+ return false;
+ }
+ }
+
+ bool usesDrawParameters = gpShaders.Shaders[1]?.Info.UsesDrawParameters ?? false;
+
+ return gpShaders.SpecializationState.MatchesGraphics(channel, ref poolState, ref graphicsState, usesDrawParameters, true);
+ }
+
+ /// <summary>
+ /// Checks if the code of the specified cached shader is different from the code in memory.
+ /// </summary>
+ /// <param name="memoryManager">Memory manager used to access the GPU memory where the shader is located</param>
+ /// <param name="shader">Cached shader to compare with</param>
+ /// <param name="gpuVa">GPU virtual address of the binary shader code</param>
+ /// <returns>True if the code is different, false otherwise</returns>
+ private static bool IsShaderEqual(MemoryManager memoryManager, CachedShaderStage shader, ulong gpuVa)
+ {
+ if (shader == null)
+ {
+ return true;
+ }
+
+ ReadOnlySpan<byte> memoryCode = memoryManager.GetSpan(gpuVa, shader.Code.Length);
+
+ return memoryCode.SequenceEqual(shader.Code);
+ }
+
+ /// <summary>
+ /// Decode the binary Maxwell shader code to a translator context.
+ /// </summary>
+ /// <param name="gpuAccessor">GPU state accessor</param>
+ /// <param name="api">Graphics API that will be used with the shader</param>
+ /// <param name="gpuVa">GPU virtual address of the binary shader code</param>
+ /// <returns>The generated translator context</returns>
+ public static TranslatorContext DecodeComputeShader(IGpuAccessor gpuAccessor, TargetApi api, ulong gpuVa)
+ {
+ var options = CreateTranslationOptions(api, DefaultFlags | TranslationFlags.Compute);
+ return Translator.CreateContext(gpuVa, gpuAccessor, options);
+ }
+
+ /// <summary>
+ /// Decode the binary Maxwell shader code to a translator context.
+ /// </summary>
+ /// <remarks>
+ /// This will combine the "Vertex A" and "Vertex B" shader stages, if specified, into one shader.
+ /// </remarks>
+ /// <param name="gpuAccessor">GPU state accessor</param>
+ /// <param name="api">Graphics API that will be used with the shader</param>
+ /// <param name="flags">Flags that controls shader translation</param>
+ /// <param name="gpuVa">GPU virtual address of the shader code</param>
+ /// <returns>The generated translator context</returns>
+ public static TranslatorContext DecodeGraphicsShader(IGpuAccessor gpuAccessor, TargetApi api, TranslationFlags flags, ulong gpuVa)
+ {
+ var options = CreateTranslationOptions(api, flags);
+ return Translator.CreateContext(gpuVa, gpuAccessor, options);
+ }
+
+ /// <summary>
+ /// Translates a previously generated translator context to something that the host API accepts.
+ /// </summary>
+ /// <param name="dumper">Optional shader code dumper</param>
+ /// <param name="channel">GPU channel using the shader</param>
+ /// <param name="currentStage">Translator context of the stage to be translated</param>
+ /// <param name="vertexA">Optional translator context of the shader that should be combined</param>
+ /// <param name="codeA">Optional Maxwell binary code of the Vertex A shader, if present</param>
+ /// <param name="codeB">Optional Maxwell binary code of the Vertex B or current stage shader, if present on cache</param>
+ /// <returns>Compiled graphics shader code</returns>
+ private static TranslatedShaderVertexPair TranslateShader(
+ ShaderDumper dumper,
+ GpuChannel channel,
+ TranslatorContext currentStage,
+ TranslatorContext vertexA,
+ byte[] codeA,
+ byte[] codeB)
+ {
+ ulong cb1DataAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(0, 1);
+
+ var memoryManager = channel.MemoryManager;
+
+ codeA ??= memoryManager.GetSpan(vertexA.Address, vertexA.Size).ToArray();
+ codeB ??= memoryManager.GetSpan(currentStage.Address, currentStage.Size).ToArray();
+ byte[] cb1DataA = memoryManager.Physical.GetSpan(cb1DataAddress, vertexA.Cb1DataSize).ToArray();
+ byte[] cb1DataB = memoryManager.Physical.GetSpan(cb1DataAddress, currentStage.Cb1DataSize).ToArray();
+
+ ShaderDumpPaths pathsA = default;
+ ShaderDumpPaths pathsB = default;
+
+ if (dumper != null)
+ {
+ pathsA = dumper.Dump(codeA, compute: false);
+ pathsB = dumper.Dump(codeB, compute: false);
+ }
+
+ ShaderProgram program = currentStage.Translate(vertexA);
+
+ pathsB.Prepend(program);
+ pathsA.Prepend(program);
+
+ CachedShaderStage vertexAStage = new CachedShaderStage(null, codeA, cb1DataA);
+ CachedShaderStage vertexBStage = new CachedShaderStage(program.Info, codeB, cb1DataB);
+
+ return new TranslatedShaderVertexPair(vertexAStage, vertexBStage, program);
+ }
+
+ /// <summary>
+ /// Translates a previously generated translator context to something that the host API accepts.
+ /// </summary>
+ /// <param name="dumper">Optional shader code dumper</param>
+ /// <param name="channel">GPU channel using the shader</param>
+ /// <param name="context">Translator context of the stage to be translated</param>
+ /// <param name="code">Optional Maxwell binary code of the current stage shader, if present on cache</param>
+ /// <returns>Compiled graphics shader code</returns>
+ private static TranslatedShader TranslateShader(ShaderDumper dumper, GpuChannel channel, TranslatorContext context, byte[] code)
+ {
+ var memoryManager = channel.MemoryManager;
+
+ ulong cb1DataAddress = context.Stage == ShaderStage.Compute
+ ? channel.BufferManager.GetComputeUniformBufferAddress(1)
+ : channel.BufferManager.GetGraphicsUniformBufferAddress(StageToStageIndex(context.Stage), 1);
+
+ byte[] cb1Data = memoryManager.Physical.GetSpan(cb1DataAddress, context.Cb1DataSize).ToArray();
+ code ??= memoryManager.GetSpan(context.Address, context.Size).ToArray();
+
+ ShaderDumpPaths paths = dumper?.Dump(code, context.Stage == ShaderStage.Compute) ?? default;
+ ShaderProgram program = context.Translate();
+
+ paths.Prepend(program);
+
+ return new TranslatedShader(new CachedShaderStage(program.Info, code, cb1Data), program);
+ }
+
+ /// <summary>
+ /// Gets the index of a stage from a <see cref="ShaderStage"/>.
+ /// </summary>
+ /// <param name="stage">Stage to get the index from</param>
+ /// <returns>Stage index</returns>
+ private static int StageToStageIndex(ShaderStage stage)
+ {
+ return stage switch
+ {
+ ShaderStage.TessellationControl => 1,
+ ShaderStage.TessellationEvaluation => 2,
+ ShaderStage.Geometry => 3,
+ ShaderStage.Fragment => 4,
+ _ => 0
+ };
+ }
+
+ /// <summary>
+ /// Gets information about the bindings used by a shader program.
+ /// </summary>
+ /// <param name="info">Shader program information to get the information from</param>
+ /// <returns>Shader bindings</returns>
+ public static ShaderBindings GetBindings(ShaderProgramInfo info)
+ {
+ var uniformBufferBindings = info.CBuffers.Select(x => x.Binding).ToArray();
+ var storageBufferBindings = info.SBuffers.Select(x => x.Binding).ToArray();
+ var textureBindings = info.Textures.Select(x => x.Binding).ToArray();
+ var imageBindings = info.Images.Select(x => x.Binding).ToArray();
+
+ return new ShaderBindings(
+ uniformBufferBindings,
+ storageBufferBindings,
+ textureBindings,
+ imageBindings);
+ }
+
+ /// <summary>
+ /// Creates shader translation options with the requested graphics API and flags.
+ /// The shader language is choosen based on the current configuration and graphics API.
+ /// </summary>
+ /// <param name="api">Target graphics API</param>
+ /// <param name="flags">Translation flags</param>
+ /// <returns>Translation options</returns>
+ private static TranslationOptions CreateTranslationOptions(TargetApi api, TranslationFlags flags)
+ {
+ TargetLanguage lang = GraphicsConfig.EnableSpirvCompilationOnVulkan && api == TargetApi.Vulkan
+ ? TargetLanguage.Spirv
+ : TargetLanguage.Glsl;
+
+ return new TranslationOptions(lang, api, flags);
+ }
+
+ /// <summary>
+ /// Disposes the shader cache, deleting all the cached shaders.
+ /// It's an error to use the shader cache after disposal.
+ /// </summary>
+ public void Dispose()
+ {
+ foreach (CachedShaderProgram program in _graphicsShaderCache.GetPrograms())
+ {
+ program.Dispose();
+ }
+
+ foreach (CachedShaderProgram program in _computeShaderCache.GetPrograms())
+ {
+ program.Dispose();
+ }
+
+ _cacheWriter?.Dispose();
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderCacheHashTable.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCacheHashTable.cs
new file mode 100644
index 00000000..e35c06b1
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCacheHashTable.cs
@@ -0,0 +1,282 @@
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Gpu.Shader.HashTable;
+using Ryujinx.Graphics.Shader;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ /// <summary>
+ /// Holds already cached code for a guest shader.
+ /// </summary>
+ struct CachedGraphicsGuestCode
+ {
+ public byte[] VertexACode;
+ public byte[] VertexBCode;
+ public byte[] TessControlCode;
+ public byte[] TessEvaluationCode;
+ public byte[] GeometryCode;
+ public byte[] FragmentCode;
+
+ /// <summary>
+ /// Gets the guest code of a shader stage by its index.
+ /// </summary>
+ /// <param name="stageIndex">Index of the shader stage</param>
+ /// <returns>Guest code, or null if not present</returns>
+ public byte[] GetByIndex(int stageIndex)
+ {
+ return stageIndex switch
+ {
+ 1 => TessControlCode,
+ 2 => TessEvaluationCode,
+ 3 => GeometryCode,
+ 4 => FragmentCode,
+ _ => VertexBCode
+ };
+ }
+ }
+
+ /// <summary>
+ /// Graphics shader cache hash table.
+ /// </summary>
+ class ShaderCacheHashTable
+ {
+ /// <summary>
+ /// Shader ID cache.
+ /// </summary>
+ private struct IdCache
+ {
+ private PartitionedHashTable<int> _cache;
+ private int _id;
+
+ /// <summary>
+ /// Initializes the state.
+ /// </summary>
+ public void Initialize()
+ {
+ _cache = new PartitionedHashTable<int>();
+ _id = 0;
+ }
+
+ /// <summary>
+ /// Adds guest code to the cache.
+ /// </summary>
+ /// <remarks>
+ /// If the code was already cached, it will just return the existing ID.
+ /// </remarks>
+ /// <param name="code">Code to add</param>
+ /// <returns>Unique ID for the guest code</returns>
+ public int Add(byte[] code)
+ {
+ int id = ++_id;
+ int cachedId = _cache.GetOrAdd(code, id);
+ if (cachedId != id)
+ {
+ --_id;
+ }
+
+ return cachedId;
+ }
+
+ /// <summary>
+ /// Tries to find cached guest code.
+ /// </summary>
+ /// <param name="dataAccessor">Code accessor used to read guest code to find a match on the hash table</param>
+ /// <param name="id">ID of the guest code, if found</param>
+ /// <param name="data">Cached guest code, if found</param>
+ /// <returns>True if found, false otherwise</returns>
+ public bool TryFind(IDataAccessor dataAccessor, out int id, out byte[] data)
+ {
+ return _cache.TryFindItem(dataAccessor, out id, out data);
+ }
+ }
+
+ /// <summary>
+ /// Guest code IDs of the guest shaders that when combined forms a single host program.
+ /// </summary>
+ private struct IdTable : IEquatable<IdTable>
+ {
+ public int VertexAId;
+ public int VertexBId;
+ public int TessControlId;
+ public int TessEvaluationId;
+ public int GeometryId;
+ public int FragmentId;
+
+ public override bool Equals(object obj)
+ {
+ return obj is IdTable other && Equals(other);
+ }
+
+ public bool Equals(IdTable other)
+ {
+ return other.VertexAId == VertexAId &&
+ other.VertexBId == VertexBId &&
+ other.TessControlId == TessControlId &&
+ other.TessEvaluationId == TessEvaluationId &&
+ other.GeometryId == GeometryId &&
+ other.FragmentId == FragmentId;
+ }
+
+ public override int GetHashCode()
+ {
+ return HashCode.Combine(VertexAId, VertexBId, TessControlId, TessEvaluationId, GeometryId, FragmentId);
+ }
+ }
+
+ private IdCache _vertexACache;
+ private IdCache _vertexBCache;
+ private IdCache _tessControlCache;
+ private IdCache _tessEvaluationCache;
+ private IdCache _geometryCache;
+ private IdCache _fragmentCache;
+
+ private readonly Dictionary<IdTable, ShaderSpecializationList> _shaderPrograms;
+
+ /// <summary>
+ /// Creates a new graphics shader cache hash table.
+ /// </summary>
+ public ShaderCacheHashTable()
+ {
+ _vertexACache.Initialize();
+ _vertexBCache.Initialize();
+ _tessControlCache.Initialize();
+ _tessEvaluationCache.Initialize();
+ _geometryCache.Initialize();
+ _fragmentCache.Initialize();
+
+ _shaderPrograms = new Dictionary<IdTable, ShaderSpecializationList>();
+ }
+
+ /// <summary>
+ /// Adds a program to the cache.
+ /// </summary>
+ /// <param name="program">Program to be added</param>
+ public void Add(CachedShaderProgram program)
+ {
+ IdTable idTable = new IdTable();
+
+ foreach (var shader in program.Shaders)
+ {
+ if (shader == null)
+ {
+ continue;
+ }
+
+ if (shader.Info != null)
+ {
+ switch (shader.Info.Stage)
+ {
+ case ShaderStage.Vertex:
+ idTable.VertexBId = _vertexBCache.Add(shader.Code);
+ break;
+ case ShaderStage.TessellationControl:
+ idTable.TessControlId = _tessControlCache.Add(shader.Code);
+ break;
+ case ShaderStage.TessellationEvaluation:
+ idTable.TessEvaluationId = _tessEvaluationCache.Add(shader.Code);
+ break;
+ case ShaderStage.Geometry:
+ idTable.GeometryId = _geometryCache.Add(shader.Code);
+ break;
+ case ShaderStage.Fragment:
+ idTable.FragmentId = _fragmentCache.Add(shader.Code);
+ break;
+ }
+ }
+ else
+ {
+ idTable.VertexAId = _vertexACache.Add(shader.Code);
+ }
+ }
+
+ if (!_shaderPrograms.TryGetValue(idTable, out ShaderSpecializationList specList))
+ {
+ specList = new ShaderSpecializationList();
+ _shaderPrograms.Add(idTable, specList);
+ }
+
+ specList.Add(program);
+ }
+
+ /// <summary>
+ /// Tries to find a cached program.
+ /// </summary>
+ /// <remarks>
+ /// Even if false is returned, <paramref name="guestCode"/> might still contain cached guest code.
+ /// This can be used to avoid additional allocations for guest code that was already cached.
+ /// </remarks>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="poolState">Texture pool state</param>
+ /// <param name="graphicsState">Graphics state</param>
+ /// <param name="addresses">Guest addresses of the shaders to find</param>
+ /// <param name="program">Cached host program for the given state, if found</param>
+ /// <param name="guestCode">Cached guest code, if any found</param>
+ /// <returns>True if a cached host program was found, false otherwise</returns>
+ public bool TryFind(
+ GpuChannel channel,
+ ref GpuChannelPoolState poolState,
+ ref GpuChannelGraphicsState graphicsState,
+ ShaderAddresses addresses,
+ out CachedShaderProgram program,
+ out CachedGraphicsGuestCode guestCode)
+ {
+ var memoryManager = channel.MemoryManager;
+ IdTable idTable = new IdTable();
+ guestCode = new CachedGraphicsGuestCode();
+
+ program = null;
+
+ bool found = TryGetId(_vertexACache, memoryManager, addresses.VertexA, out idTable.VertexAId, out guestCode.VertexACode);
+ found &= TryGetId(_vertexBCache, memoryManager, addresses.VertexB, out idTable.VertexBId, out guestCode.VertexBCode);
+ found &= TryGetId(_tessControlCache, memoryManager, addresses.TessControl, out idTable.TessControlId, out guestCode.TessControlCode);
+ found &= TryGetId(_tessEvaluationCache, memoryManager, addresses.TessEvaluation, out idTable.TessEvaluationId, out guestCode.TessEvaluationCode);
+ found &= TryGetId(_geometryCache, memoryManager, addresses.Geometry, out idTable.GeometryId, out guestCode.GeometryCode);
+ found &= TryGetId(_fragmentCache, memoryManager, addresses.Fragment, out idTable.FragmentId, out guestCode.FragmentCode);
+
+ if (found && _shaderPrograms.TryGetValue(idTable, out ShaderSpecializationList specList))
+ {
+ return specList.TryFindForGraphics(channel, ref poolState, ref graphicsState, out program);
+ }
+
+ return false;
+ }
+
+ /// <summary>
+ /// Tries to get the ID of a single cached shader stage.
+ /// </summary>
+ /// <param name="idCache">ID cache of the stage</param>
+ /// <param name="memoryManager">GPU memory manager</param>
+ /// <param name="baseAddress">Base address of the shader</param>
+ /// <param name="id">ID, if found</param>
+ /// <param name="data">Cached guest code, if found</param>
+ /// <returns>True if a cached shader is found, false otherwise</returns>
+ private static bool TryGetId(IdCache idCache, MemoryManager memoryManager, ulong baseAddress, out int id, out byte[] data)
+ {
+ if (baseAddress == 0)
+ {
+ id = 0;
+ data = null;
+ return true;
+ }
+
+ ShaderCodeAccessor codeAccessor = new ShaderCodeAccessor(memoryManager, baseAddress);
+ return idCache.TryFind(codeAccessor, out id, out data);
+ }
+
+ /// <summary>
+ /// Gets all programs that have been added to the table.
+ /// </summary>
+ /// <returns>Programs added to the table</returns>
+ public IEnumerable<CachedShaderProgram> GetPrograms()
+ {
+ foreach (var specList in _shaderPrograms.Values)
+ {
+ foreach (var program in specList)
+ {
+ yield return program;
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderCacheState.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCacheState.cs
new file mode 100644
index 00000000..623b73d7
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCacheState.cs
@@ -0,0 +1,13 @@
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ /// <summary>Shader cache loading states</summary>
+ public enum ShaderCacheState
+ {
+ /// <summary>Shader cache started loading</summary>
+ Start,
+ /// <summary>Shader cache is loading</summary>
+ Loading,
+ /// <summary>Shader cache finished loading</summary>
+ Loaded
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderCodeAccessor.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCodeAccessor.cs
new file mode 100644
index 00000000..e896493c
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCodeAccessor.cs
@@ -0,0 +1,32 @@
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Gpu.Shader.HashTable;
+using System;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ /// <summary>
+ /// Shader code accessor.
+ /// </summary>
+ readonly struct ShaderCodeAccessor : IDataAccessor
+ {
+ private readonly MemoryManager _memoryManager;
+ private readonly ulong _baseAddress;
+
+ /// <summary>
+ /// Creates a new shader code accessor.
+ /// </summary>
+ /// <param name="memoryManager">Memory manager used to access the shader code</param>
+ /// <param name="baseAddress">Base address of the shader in memory</param>
+ public ShaderCodeAccessor(MemoryManager memoryManager, ulong baseAddress)
+ {
+ _memoryManager = memoryManager;
+ _baseAddress = baseAddress;
+ }
+
+ /// <inheritdoc/>
+ public ReadOnlySpan<byte> GetSpan(int offset, int length)
+ {
+ return _memoryManager.GetSpanMapped(_baseAddress + (ulong)offset, length);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderDumpPaths.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderDumpPaths.cs
new file mode 100644
index 00000000..6ca7daef
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderDumpPaths.cs
@@ -0,0 +1,49 @@
+using Ryujinx.Graphics.Shader;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ /// <summary>
+ /// Paths where shader code was dumped on disk.
+ /// </summary>
+ readonly struct ShaderDumpPaths
+ {
+ /// <summary>
+ /// Path where the full shader code with header was dumped, or null if not dumped.
+ /// </summary>
+ public string FullPath { get; }
+
+ /// <summary>
+ /// Path where the shader code without header was dumped, or null if not dumped.
+ /// </summary>
+ public string CodePath { get; }
+
+ /// <summary>
+ /// True if the shader was dumped, false otherwise.
+ /// </summary>
+ public bool HasPath => FullPath != null && CodePath != null;
+
+ /// <summary>
+ /// Creates a new shader dumps path structure.
+ /// </summary>
+ /// <param name="fullPath">Path where the full shader code with header was dumped, or null if not dumped</param>
+ /// <param name="codePath">Path where the shader code without header was dumped, or null if not dumped</param>
+ public ShaderDumpPaths(string fullPath, string codePath)
+ {
+ FullPath = fullPath;
+ CodePath = codePath;
+ }
+
+ /// <summary>
+ /// Prepends the shader paths on the program source, as a comment.
+ /// </summary>
+ /// <param name="program">Program to prepend into</param>
+ public void Prepend(ShaderProgram program)
+ {
+ if (HasPath)
+ {
+ program.Prepend("// " + CodePath);
+ program.Prepend("// " + FullPath);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderDumper.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderDumper.cs
new file mode 100644
index 00000000..93eeb8d7
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderDumper.cs
@@ -0,0 +1,129 @@
+using System.IO;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ /// <summary>
+ /// Shader dumper, writes binary shader code to disk.
+ /// </summary>
+ class ShaderDumper
+ {
+ private string _runtimeDir;
+ private string _dumpPath;
+
+ /// <summary>
+ /// Current index of the shader dump binary file.
+ /// This is incremented after each save, in order to give unique names to the files.
+ /// </summary>
+ public int CurrentDumpIndex { get; private set; }
+
+ /// <summary>
+ /// Creates a new instance of the shader dumper.
+ /// </summary>
+ public ShaderDumper()
+ {
+ CurrentDumpIndex = 1;
+ }
+
+ /// <summary>
+ /// Dumps shader code to disk.
+ /// </summary>
+ /// <param name="code">Code to be dumped</param>
+ /// <param name="compute">True for compute shader code, false for graphics shader code</param>
+ /// <returns>Paths where the shader code was dumped</returns>
+ public ShaderDumpPaths Dump(byte[] code, bool compute)
+ {
+ _dumpPath = GraphicsConfig.ShadersDumpPath;
+
+ if (string.IsNullOrWhiteSpace(_dumpPath))
+ {
+ return default;
+ }
+
+ string fileName = "Shader" + CurrentDumpIndex.ToString("d4") + ".bin";
+
+ string fullPath = Path.Combine(FullDir(), fileName);
+ string codePath = Path.Combine(CodeDir(), fileName);
+
+ CurrentDumpIndex++;
+
+ using MemoryStream stream = new MemoryStream(code);
+ BinaryReader codeReader = new BinaryReader(stream);
+
+ using FileStream fullFile = File.Create(fullPath);
+ using FileStream codeFile = File.Create(codePath);
+ BinaryWriter fullWriter = new BinaryWriter(fullFile);
+ BinaryWriter codeWriter = new BinaryWriter(codeFile);
+
+ int headerSize = compute ? 0 : 0x50;
+
+ fullWriter.Write(codeReader.ReadBytes(headerSize));
+
+ byte[] temp = codeReader.ReadBytes(code.Length - headerSize);
+
+ fullWriter.Write(temp);
+ codeWriter.Write(temp);
+
+ // Align to meet nvdisasm requirements.
+ while (codeFile.Length % 0x20 != 0)
+ {
+ codeWriter.Write(0);
+ }
+
+ return new ShaderDumpPaths(fullPath, codePath);
+ }
+
+ /// <summary>
+ /// Returns the output directory for shader code with header.
+ /// </summary>
+ /// <returns>Directory path</returns>
+ private string FullDir()
+ {
+ return CreateAndReturn(Path.Combine(DumpDir(), "Full"));
+ }
+
+ /// <summary>
+ /// Returns the output directory for shader code without header.
+ /// </summary>
+ /// <returns>Directory path</returns>
+ private string CodeDir()
+ {
+ return CreateAndReturn(Path.Combine(DumpDir(), "Code"));
+ }
+
+ /// <summary>
+ /// Returns the full output directory for the current shader dump.
+ /// </summary>
+ /// <returns>Directory path</returns>
+ private string DumpDir()
+ {
+ if (string.IsNullOrEmpty(_runtimeDir))
+ {
+ int index = 1;
+
+ do
+ {
+ _runtimeDir = Path.Combine(_dumpPath, "Dumps" + index.ToString("d2"));
+
+ index++;
+ }
+ while (Directory.Exists(_runtimeDir));
+
+ Directory.CreateDirectory(_runtimeDir);
+ }
+
+ return _runtimeDir;
+ }
+
+ /// <summary>
+ /// Creates a new specified directory if needed.
+ /// </summary>
+ /// <param name="dir">The directory to create</param>
+ /// <returns>The same directory passed to the method</returns>
+ private static string CreateAndReturn(string dir)
+ {
+ Directory.CreateDirectory(dir);
+
+ return dir;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs
new file mode 100644
index 00000000..7d61332e
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs
@@ -0,0 +1,84 @@
+using System.Collections;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ /// <summary>
+ /// List of cached shader programs that differs only by specialization state.
+ /// </summary>
+ class ShaderSpecializationList : IEnumerable<CachedShaderProgram>
+ {
+ private readonly List<CachedShaderProgram> _entries = new List<CachedShaderProgram>();
+
+ /// <summary>
+ /// Adds a program to the list.
+ /// </summary>
+ /// <param name="program">Program to be added</param>
+ public void Add(CachedShaderProgram program)
+ {
+ _entries.Add(program);
+ }
+
+ /// <summary>
+ /// Tries to find an existing 3D program on the cache.
+ /// </summary>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="poolState">Texture pool state</param>
+ /// <param name="graphicsState">Graphics state</param>
+ /// <param name="program">Cached program, if found</param>
+ /// <returns>True if a compatible program is found, false otherwise</returns>
+ public bool TryFindForGraphics(
+ GpuChannel channel,
+ ref GpuChannelPoolState poolState,
+ ref GpuChannelGraphicsState graphicsState,
+ out CachedShaderProgram program)
+ {
+ foreach (var entry in _entries)
+ {
+ bool usesDrawParameters = entry.Shaders[1]?.Info.UsesDrawParameters ?? false;
+
+ if (entry.SpecializationState.MatchesGraphics(channel, ref poolState, ref graphicsState, usesDrawParameters, true))
+ {
+ program = entry;
+ return true;
+ }
+ }
+
+ program = default;
+ return false;
+ }
+
+ /// <summary>
+ /// Tries to find an existing compute program on the cache.
+ /// </summary>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="poolState">Texture pool state</param>
+ /// <param name="computeState">Compute state</param>
+ /// <param name="program">Cached program, if found</param>
+ /// <returns>True if a compatible program is found, false otherwise</returns>
+ public bool TryFindForCompute(GpuChannel channel, GpuChannelPoolState poolState, GpuChannelComputeState computeState, out CachedShaderProgram program)
+ {
+ foreach (var entry in _entries)
+ {
+ if (entry.SpecializationState.MatchesCompute(channel, ref poolState, computeState, true))
+ {
+ program = entry;
+ return true;
+ }
+ }
+
+ program = default;
+ return false;
+ }
+
+ public IEnumerator<CachedShaderProgram> GetEnumerator()
+ {
+ return _entries.GetEnumerator();
+ }
+
+ IEnumerator IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs
new file mode 100644
index 00000000..b2c4fccd
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs
@@ -0,0 +1,874 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Image;
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Gpu.Shader.DiskCache;
+using Ryujinx.Graphics.Shader;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ class ShaderSpecializationState
+ {
+ private const uint ComsMagic = (byte)'C' | ((byte)'O' << 8) | ((byte)'M' << 16) | ((byte)'S' << 24);
+ private const uint GfxsMagic = (byte)'G' | ((byte)'F' << 8) | ((byte)'X' << 16) | ((byte)'S' << 24);
+ private const uint TfbdMagic = (byte)'T' | ((byte)'F' << 8) | ((byte)'B' << 16) | ((byte)'D' << 24);
+ private const uint TexkMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'K' << 24);
+ private const uint TexsMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'S' << 24);
+ private const uint PgpsMagic = (byte)'P' | ((byte)'G' << 8) | ((byte)'P' << 16) | ((byte)'S' << 24);
+
+ /// <summary>
+ /// Flags indicating GPU state that is used by the shader.
+ /// </summary>
+ [Flags]
+ private enum QueriedStateFlags
+ {
+ EarlyZForce = 1 << 0,
+ PrimitiveTopology = 1 << 1,
+ TessellationMode = 1 << 2,
+ TransformFeedback = 1 << 3
+ }
+
+ private QueriedStateFlags _queriedState;
+ private bool _compute;
+ private byte _constantBufferUsePerStage;
+
+ /// <summary>
+ /// Compute engine state.
+ /// </summary>
+ public GpuChannelComputeState ComputeState;
+
+ /// <summary>
+ /// 3D engine state.
+ /// </summary>
+ public GpuChannelGraphicsState GraphicsState;
+
+ /// <summary>
+ /// Contant buffers bound at the time the shader was compiled, per stage.
+ /// </summary>
+ public Array5<uint> ConstantBufferUse;
+
+ /// <summary>
+ /// Pipeline state captured at the time of shader use.
+ /// </summary>
+ public ProgramPipelineState? PipelineState;
+
+ /// <summary>
+ /// Transform feedback buffers active at the time the shader was compiled.
+ /// </summary>
+ public TransformFeedbackDescriptor[] TransformFeedbackDescriptors;
+
+ /// <summary>
+ /// Flags indicating texture state that is used by the shader.
+ /// </summary>
+ [Flags]
+ private enum QueriedTextureStateFlags
+ {
+ TextureFormat = 1 << 0,
+ SamplerType = 1 << 1,
+ CoordNormalized = 1 << 2
+ }
+
+ /// <summary>
+ /// Reference type wrapping a value.
+ /// </summary>
+ private class Box<T>
+ {
+ /// <summary>
+ /// Wrapped value.
+ /// </summary>
+ public T Value;
+ }
+
+ /// <summary>
+ /// State of a texture or image that is accessed by the shader.
+ /// </summary>
+ private struct TextureSpecializationState
+ {
+ // New fields should be added to the end of the struct to keep disk shader cache compatibility.
+
+ /// <summary>
+ /// Flags indicating which state of the texture the shader depends on.
+ /// </summary>
+ public QueriedTextureStateFlags QueriedFlags;
+
+ /// <summary>
+ /// Encoded texture format value.
+ /// </summary>
+ public uint Format;
+
+ /// <summary>
+ /// True if the texture format is sRGB, false otherwise.
+ /// </summary>
+ public bool FormatSrgb;
+
+ /// <summary>
+ /// Texture target.
+ /// </summary>
+ public TextureTarget TextureTarget;
+
+ /// <summary>
+ /// Indicates if the coordinates used to sample the texture are normalized or not (0.0..1.0 or 0..Width/Height).
+ /// </summary>
+ public bool CoordNormalized;
+ }
+
+ /// <summary>
+ /// Texture binding information, used to identify each texture accessed by the shader.
+ /// </summary>
+ private readonly record struct TextureKey
+ {
+ // New fields should be added to the end of the struct to keep disk shader cache compatibility.
+
+ /// <summary>
+ /// Shader stage where the texture is used.
+ /// </summary>
+ public readonly int StageIndex;
+
+ /// <summary>
+ /// Texture handle offset in words on the texture buffer.
+ /// </summary>
+ public readonly int Handle;
+
+ /// <summary>
+ /// Constant buffer slot of the texture buffer (-1 to use the texture buffer index GPU register).
+ /// </summary>
+ public readonly int CbufSlot;
+
+ /// <summary>
+ /// Creates a new texture key.
+ /// </summary>
+ /// <param name="stageIndex">Shader stage where the texture is used</param>
+ /// <param name="handle">Texture handle offset in words on the texture buffer</param>
+ /// <param name="cbufSlot">Constant buffer slot of the texture buffer (-1 to use the texture buffer index GPU register)</param>
+ public TextureKey(int stageIndex, int handle, int cbufSlot)
+ {
+ StageIndex = stageIndex;
+ Handle = handle;
+ CbufSlot = cbufSlot;
+ }
+ }
+
+ private readonly Dictionary<TextureKey, Box<TextureSpecializationState>> _textureSpecialization;
+ private KeyValuePair<TextureKey, Box<TextureSpecializationState>>[] _allTextures;
+ private Box<TextureSpecializationState>[][] _textureByBinding;
+ private Box<TextureSpecializationState>[][] _imageByBinding;
+
+ /// <summary>
+ /// Creates a new instance of the shader specialization state.
+ /// </summary>
+ private ShaderSpecializationState()
+ {
+ _textureSpecialization = new Dictionary<TextureKey, Box<TextureSpecializationState>>();
+ }
+
+ /// <summary>
+ /// Creates a new instance of the shader specialization state.
+ /// </summary>
+ /// <param name="state">Current compute engine state</param>
+ public ShaderSpecializationState(ref GpuChannelComputeState state) : this()
+ {
+ ComputeState = state;
+ _compute = true;
+ }
+
+ /// <summary>
+ /// Creates a new instance of the shader specialization state.
+ /// </summary>
+ /// <param name="state">Current 3D engine state</param>
+ /// <param name="descriptors">Optional transform feedback buffers in use, if any</param>
+ private ShaderSpecializationState(ref GpuChannelGraphicsState state, TransformFeedbackDescriptor[] descriptors) : this()
+ {
+ GraphicsState = state;
+ _compute = false;
+
+ if (descriptors != null)
+ {
+ TransformFeedbackDescriptors = descriptors;
+ _queriedState |= QueriedStateFlags.TransformFeedback;
+ }
+ }
+
+ /// <summary>
+ /// Prepare the shader specialization state for quick binding lookups.
+ /// </summary>
+ /// <param name="stages">The shader stages</param>
+ public void Prepare(CachedShaderStage[] stages)
+ {
+ _allTextures = _textureSpecialization.ToArray();
+
+ _textureByBinding = new Box<TextureSpecializationState>[stages.Length][];
+ _imageByBinding = new Box<TextureSpecializationState>[stages.Length][];
+
+ for (int i = 0; i < stages.Length; i++)
+ {
+ CachedShaderStage stage = stages[i];
+ if (stage?.Info != null)
+ {
+ var textures = stage.Info.Textures;
+ var images = stage.Info.Images;
+
+ var texBindings = new Box<TextureSpecializationState>[textures.Count];
+ var imageBindings = new Box<TextureSpecializationState>[images.Count];
+
+ int stageIndex = Math.Max(i - 1, 0); // Don't count VertexA for looking up spec state. No-Op for compute.
+
+ for (int j = 0; j < textures.Count; j++)
+ {
+ var texture = textures[j];
+ texBindings[j] = GetTextureSpecState(stageIndex, texture.HandleIndex, texture.CbufSlot);
+ }
+
+ for (int j = 0; j < images.Count; j++)
+ {
+ var image = images[j];
+ imageBindings[j] = GetTextureSpecState(stageIndex, image.HandleIndex, image.CbufSlot);
+ }
+
+ _textureByBinding[i] = texBindings;
+ _imageByBinding[i] = imageBindings;
+ }
+ }
+ }
+
+ /// <summary>
+ /// Creates a new instance of the shader specialization state.
+ /// </summary>
+ /// <param name="state">Current 3D engine state</param>
+ /// <param name="pipelineState">Current program pipeline state</param>
+ /// <param name="descriptors">Optional transform feedback buffers in use, if any</param>
+ public ShaderSpecializationState(
+ ref GpuChannelGraphicsState state,
+ ref ProgramPipelineState pipelineState,
+ TransformFeedbackDescriptor[] descriptors) : this(ref state, descriptors)
+ {
+ PipelineState = pipelineState;
+ }
+
+ /// <summary>
+ /// Creates a new instance of the shader specialization state.
+ /// </summary>
+ /// <param name="state">Current 3D engine state</param>
+ /// <param name="pipelineState">Current program pipeline state</param>
+ /// <param name="descriptors">Optional transform feedback buffers in use, if any</param>
+ public ShaderSpecializationState(
+ ref GpuChannelGraphicsState state,
+ ProgramPipelineState? pipelineState,
+ TransformFeedbackDescriptor[] descriptors) : this(ref state, descriptors)
+ {
+ PipelineState = pipelineState;
+ }
+
+ /// <summary>
+ /// Indicates that the shader accesses the early Z force state.
+ /// </summary>
+ public void RecordEarlyZForce()
+ {
+ _queriedState |= QueriedStateFlags.EarlyZForce;
+ }
+
+ /// <summary>
+ /// Indicates that the shader accesses the primitive topology state.
+ /// </summary>
+ public void RecordPrimitiveTopology()
+ {
+ _queriedState |= QueriedStateFlags.PrimitiveTopology;
+ }
+
+ /// <summary>
+ /// Indicates that the shader accesses the tessellation mode state.
+ /// </summary>
+ public void RecordTessellationMode()
+ {
+ _queriedState |= QueriedStateFlags.TessellationMode;
+ }
+
+ /// <summary>
+ /// Indicates that the shader accesses the constant buffer use state.
+ /// </summary>
+ /// <param name="stageIndex">Shader stage index</param>
+ /// <param name="useMask">Mask indicating the constant buffers bound at the time of the shader compilation</param>
+ public void RecordConstantBufferUse(int stageIndex, uint useMask)
+ {
+ ConstantBufferUse[stageIndex] = useMask;
+ _constantBufferUsePerStage |= (byte)(1 << stageIndex);
+ }
+
+ /// <summary>
+ /// Indicates that a given texture is accessed by the shader.
+ /// </summary>
+ /// <param name="stageIndex">Shader stage where the texture is used</param>
+ /// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
+ /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
+ /// <param name="descriptor">Descriptor of the texture</param>
+ public void RegisterTexture(int stageIndex, int handle, int cbufSlot, Image.TextureDescriptor descriptor)
+ {
+ Box<TextureSpecializationState> state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot);
+ state.Value.Format = descriptor.UnpackFormat();
+ state.Value.FormatSrgb = descriptor.UnpackSrgb();
+ state.Value.TextureTarget = descriptor.UnpackTextureTarget();
+ state.Value.CoordNormalized = descriptor.UnpackTextureCoordNormalized();
+ }
+
+ /// <summary>
+ /// Indicates that a given texture is accessed by the shader.
+ /// </summary>
+ /// <param name="stageIndex">Shader stage where the texture is used</param>
+ /// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
+ /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
+ /// <param name="format">Maxwell texture format value</param>
+ /// <param name="formatSrgb">Whenever the texture format is a sRGB format</param>
+ /// <param name="target">Texture target type</param>
+ /// <param name="coordNormalized">Whenever the texture coordinates used on the shader are considered normalized</param>
+ public void RegisterTexture(
+ int stageIndex,
+ int handle,
+ int cbufSlot,
+ uint format,
+ bool formatSrgb,
+ TextureTarget target,
+ bool coordNormalized)
+ {
+ Box<TextureSpecializationState> state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot);
+ state.Value.Format = format;
+ state.Value.FormatSrgb = formatSrgb;
+ state.Value.TextureTarget = target;
+ state.Value.CoordNormalized = coordNormalized;
+ }
+
+ /// <summary>
+ /// Indicates that the format of a given texture was used during the shader translation process.
+ /// </summary>
+ /// <param name="stageIndex">Shader stage where the texture is used</param>
+ /// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
+ /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
+ public void RecordTextureFormat(int stageIndex, int handle, int cbufSlot)
+ {
+ Box<TextureSpecializationState> state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot);
+ state.Value.QueriedFlags |= QueriedTextureStateFlags.TextureFormat;
+ }
+
+ /// <summary>
+ /// Indicates that the target of a given texture was used during the shader translation process.
+ /// </summary>
+ /// <param name="stageIndex">Shader stage where the texture is used</param>
+ /// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
+ /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
+ public void RecordTextureSamplerType(int stageIndex, int handle, int cbufSlot)
+ {
+ Box<TextureSpecializationState> state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot);
+ state.Value.QueriedFlags |= QueriedTextureStateFlags.SamplerType;
+ }
+
+ /// <summary>
+ /// Indicates that the coordinate normalization state of a given texture was used during the shader translation process.
+ /// </summary>
+ /// <param name="stageIndex">Shader stage where the texture is used</param>
+ /// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
+ /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
+ public void RecordTextureCoordNormalized(int stageIndex, int handle, int cbufSlot)
+ {
+ Box<TextureSpecializationState> state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot);
+ state.Value.QueriedFlags |= QueriedTextureStateFlags.CoordNormalized;
+ }
+
+ /// <summary>
+ /// Checks if primitive topology was queried by the shader.
+ /// </summary>
+ /// <returns>True if queried, false otherwise</returns>
+ public bool IsPrimitiveTopologyQueried()
+ {
+ return _queriedState.HasFlag(QueriedStateFlags.PrimitiveTopology);
+ }
+
+ /// <summary>
+ /// Checks if a given texture was registerd on this specialization state.
+ /// </summary>
+ /// <param name="stageIndex">Shader stage where the texture is used</param>
+ /// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
+ /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
+ public bool TextureRegistered(int stageIndex, int handle, int cbufSlot)
+ {
+ return GetTextureSpecState(stageIndex, handle, cbufSlot) != null;
+ }
+
+ /// <summary>
+ /// Gets the recorded format of a given texture.
+ /// </summary>
+ /// <param name="stageIndex">Shader stage where the texture is used</param>
+ /// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
+ /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
+ public (uint, bool) GetFormat(int stageIndex, int handle, int cbufSlot)
+ {
+ TextureSpecializationState state = GetTextureSpecState(stageIndex, handle, cbufSlot).Value;
+ return (state.Format, state.FormatSrgb);
+ }
+
+ /// <summary>
+ /// Gets the recorded target of a given texture.
+ /// </summary>
+ /// <param name="stageIndex">Shader stage where the texture is used</param>
+ /// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
+ /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
+ public TextureTarget GetTextureTarget(int stageIndex, int handle, int cbufSlot)
+ {
+ return GetTextureSpecState(stageIndex, handle, cbufSlot).Value.TextureTarget;
+ }
+
+ /// <summary>
+ /// Gets the recorded coordinate normalization state of a given texture.
+ /// </summary>
+ /// <param name="stageIndex">Shader stage where the texture is used</param>
+ /// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
+ /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
+ public bool GetCoordNormalized(int stageIndex, int handle, int cbufSlot)
+ {
+ return GetTextureSpecState(stageIndex, handle, cbufSlot).Value.CoordNormalized;
+ }
+
+ /// <summary>
+ /// Gets texture specialization state for a given texture, or create a new one if not present.
+ /// </summary>
+ /// <param name="stageIndex">Shader stage where the texture is used</param>
+ /// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
+ /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
+ /// <returns>Texture specialization state</returns>
+ private Box<TextureSpecializationState> GetOrCreateTextureSpecState(int stageIndex, int handle, int cbufSlot)
+ {
+ TextureKey key = new TextureKey(stageIndex, handle, cbufSlot);
+
+ if (!_textureSpecialization.TryGetValue(key, out Box<TextureSpecializationState> state))
+ {
+ _textureSpecialization.Add(key, state = new Box<TextureSpecializationState>());
+ }
+
+ return state;
+ }
+
+ /// <summary>
+ /// Gets texture specialization state for a given texture.
+ /// </summary>
+ /// <param name="stageIndex">Shader stage where the texture is used</param>
+ /// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
+ /// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
+ /// <returns>Texture specialization state</returns>
+ private Box<TextureSpecializationState> GetTextureSpecState(int stageIndex, int handle, int cbufSlot)
+ {
+ TextureKey key = new TextureKey(stageIndex, handle, cbufSlot);
+
+ if (_textureSpecialization.TryGetValue(key, out Box<TextureSpecializationState> state))
+ {
+ return state;
+ }
+
+ return null;
+ }
+
+ /// <summary>
+ /// Checks if the recorded state matches the current GPU 3D engine state.
+ /// </summary>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="poolState">Texture pool state</param>
+ /// <param name="graphicsState">Graphics state</param>
+ /// <param name="usesDrawParameters">Indicates whether the vertex shader accesses draw parameters</param>
+ /// <param name="checkTextures">Indicates whether texture descriptors should be checked</param>
+ /// <returns>True if the state matches, false otherwise</returns>
+ public bool MatchesGraphics(
+ GpuChannel channel,
+ ref GpuChannelPoolState poolState,
+ ref GpuChannelGraphicsState graphicsState,
+ bool usesDrawParameters,
+ bool checkTextures)
+ {
+ if (graphicsState.ViewportTransformDisable != GraphicsState.ViewportTransformDisable)
+ {
+ return false;
+ }
+
+ bool thisA2cDitherEnable = GraphicsState.AlphaToCoverageEnable && GraphicsState.AlphaToCoverageDitherEnable;
+ bool otherA2cDitherEnable = graphicsState.AlphaToCoverageEnable && graphicsState.AlphaToCoverageDitherEnable;
+
+ if (otherA2cDitherEnable != thisA2cDitherEnable)
+ {
+ return false;
+ }
+
+ if (graphicsState.DepthMode != GraphicsState.DepthMode)
+ {
+ return false;
+ }
+
+ if (graphicsState.AlphaTestEnable != GraphicsState.AlphaTestEnable)
+ {
+ return false;
+ }
+
+ if (graphicsState.AlphaTestEnable &&
+ (graphicsState.AlphaTestCompare != GraphicsState.AlphaTestCompare ||
+ graphicsState.AlphaTestReference != GraphicsState.AlphaTestReference))
+ {
+ return false;
+ }
+
+ if (!graphicsState.AttributeTypes.AsSpan().SequenceEqual(GraphicsState.AttributeTypes.AsSpan()))
+ {
+ return false;
+ }
+
+ if (usesDrawParameters && graphicsState.HasConstantBufferDrawParameters != GraphicsState.HasConstantBufferDrawParameters)
+ {
+ return false;
+ }
+
+ if (graphicsState.HasUnalignedStorageBuffer != GraphicsState.HasUnalignedStorageBuffer)
+ {
+ return false;
+ }
+
+ if (channel.Capabilities.NeedsFragmentOutputSpecialization && !graphicsState.FragmentOutputTypes.AsSpan().SequenceEqual(GraphicsState.FragmentOutputTypes.AsSpan()))
+ {
+ return false;
+ }
+
+ if (graphicsState.DualSourceBlendEnable != GraphicsState.DualSourceBlendEnable)
+ {
+ return false;
+ }
+
+ return Matches(channel, ref poolState, checkTextures, isCompute: false);
+ }
+
+ /// <summary>
+ /// Checks if the recorded state matches the current GPU compute engine state.
+ /// </summary>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="poolState">Texture pool state</param>
+ /// <param name="computeState">Compute state</param>
+ /// <param name="checkTextures">Indicates whether texture descriptors should be checked</param>
+ /// <returns>True if the state matches, false otherwise</returns>
+ public bool MatchesCompute(GpuChannel channel, ref GpuChannelPoolState poolState, GpuChannelComputeState computeState, bool checkTextures)
+ {
+ if (computeState.HasUnalignedStorageBuffer != ComputeState.HasUnalignedStorageBuffer)
+ {
+ return false;
+ }
+
+ return Matches(channel, ref poolState, checkTextures, isCompute: true);
+ }
+
+ /// <summary>
+ /// Fetch the constant buffers used for a texture to cache.
+ /// </summary>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="isCompute">Indicates whenever the check is requested by the 3D or compute engine</param>
+ /// <param name="cachedTextureBufferIndex">The currently cached texture buffer index</param>
+ /// <param name="cachedSamplerBufferIndex">The currently cached sampler buffer index</param>
+ /// <param name="cachedTextureBuffer">The currently cached texture buffer data</param>
+ /// <param name="cachedSamplerBuffer">The currently cached sampler buffer data</param>
+ /// <param name="cachedStageIndex">The currently cached stage</param>
+ /// <param name="textureBufferIndex">The new texture buffer index</param>
+ /// <param name="samplerBufferIndex">The new sampler buffer index</param>
+ /// <param name="stageIndex">Stage index of the constant buffer</param>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void UpdateCachedBuffer(
+ GpuChannel channel,
+ bool isCompute,
+ scoped ref int cachedTextureBufferIndex,
+ scoped ref int cachedSamplerBufferIndex,
+ scoped ref ReadOnlySpan<int> cachedTextureBuffer,
+ scoped ref ReadOnlySpan<int> cachedSamplerBuffer,
+ scoped ref int cachedStageIndex,
+ int textureBufferIndex,
+ int samplerBufferIndex,
+ int stageIndex)
+ {
+ bool stageChange = stageIndex != cachedStageIndex;
+
+ if (stageChange || textureBufferIndex != cachedTextureBufferIndex)
+ {
+ ref BufferBounds bounds = ref channel.BufferManager.GetUniformBufferBounds(isCompute, stageIndex, textureBufferIndex);
+
+ cachedTextureBuffer = MemoryMarshal.Cast<byte, int>(channel.MemoryManager.Physical.GetSpan(bounds.Address, (int)bounds.Size));
+ cachedTextureBufferIndex = textureBufferIndex;
+
+ if (samplerBufferIndex == textureBufferIndex)
+ {
+ cachedSamplerBuffer = cachedTextureBuffer;
+ cachedSamplerBufferIndex = samplerBufferIndex;
+ }
+ }
+
+ if (stageChange || samplerBufferIndex != cachedSamplerBufferIndex)
+ {
+ ref BufferBounds bounds = ref channel.BufferManager.GetUniformBufferBounds(isCompute, stageIndex, samplerBufferIndex);
+
+ cachedSamplerBuffer = MemoryMarshal.Cast<byte, int>(channel.MemoryManager.Physical.GetSpan(bounds.Address, (int)bounds.Size));
+ cachedSamplerBufferIndex = samplerBufferIndex;
+ }
+
+ cachedStageIndex = stageIndex;
+ }
+
+ /// <summary>
+ /// Checks if the recorded state matches the current GPU state.
+ /// </summary>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="poolState">Texture pool state</param>
+ /// <param name="checkTextures">Indicates whether texture descriptors should be checked</param>
+ /// <param name="isCompute">Indicates whenever the check is requested by the 3D or compute engine</param>
+ /// <returns>True if the state matches, false otherwise</returns>
+ private bool Matches(GpuChannel channel, ref GpuChannelPoolState poolState, bool checkTextures, bool isCompute)
+ {
+ int constantBufferUsePerStageMask = _constantBufferUsePerStage;
+
+ while (constantBufferUsePerStageMask != 0)
+ {
+ int index = BitOperations.TrailingZeroCount(constantBufferUsePerStageMask);
+
+ uint useMask = isCompute
+ ? channel.BufferManager.GetComputeUniformBufferUseMask()
+ : channel.BufferManager.GetGraphicsUniformBufferUseMask(index);
+
+ if (ConstantBufferUse[index] != useMask)
+ {
+ return false;
+ }
+
+ constantBufferUsePerStageMask &= ~(1 << index);
+ }
+
+ if (checkTextures)
+ {
+ TexturePool pool = channel.TextureManager.GetTexturePool(poolState.TexturePoolGpuVa, poolState.TexturePoolMaximumId);
+
+ int cachedTextureBufferIndex = -1;
+ int cachedSamplerBufferIndex = -1;
+ int cachedStageIndex = -1;
+ ReadOnlySpan<int> cachedTextureBuffer = Span<int>.Empty;
+ ReadOnlySpan<int> cachedSamplerBuffer = Span<int>.Empty;
+
+ foreach (var kv in _allTextures)
+ {
+ TextureKey textureKey = kv.Key;
+
+ (int textureBufferIndex, int samplerBufferIndex) = TextureHandle.UnpackSlots(textureKey.CbufSlot, poolState.TextureBufferIndex);
+
+ UpdateCachedBuffer(channel,
+ isCompute,
+ ref cachedTextureBufferIndex,
+ ref cachedSamplerBufferIndex,
+ ref cachedTextureBuffer,
+ ref cachedSamplerBuffer,
+ ref cachedStageIndex,
+ textureBufferIndex,
+ samplerBufferIndex,
+ textureKey.StageIndex);
+
+ int packedId = TextureHandle.ReadPackedId(textureKey.Handle, cachedTextureBuffer, cachedSamplerBuffer);
+ int textureId = TextureHandle.UnpackTextureId(packedId);
+
+ if (pool.IsValidId(textureId))
+ {
+ ref readonly Image.TextureDescriptor descriptor = ref pool.GetDescriptorRef(textureId);
+
+ if (!MatchesTexture(kv.Value, descriptor))
+ {
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+ }
+
+ /// <summary>
+ /// Checks if the recorded texture state matches the given texture descriptor.
+ /// </summary>
+ /// <param name="specializationState">Texture specialization state</param>
+ /// <param name="descriptor">Texture descriptor</param>
+ /// <returns>True if the state matches, false otherwise</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private bool MatchesTexture(Box<TextureSpecializationState> specializationState, in Image.TextureDescriptor descriptor)
+ {
+ if (specializationState != null)
+ {
+ if (specializationState.Value.QueriedFlags.HasFlag(QueriedTextureStateFlags.CoordNormalized) &&
+ specializationState.Value.CoordNormalized != descriptor.UnpackTextureCoordNormalized())
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /// <summary>
+ /// Checks if the recorded texture state for a given texture binding matches a texture descriptor.
+ /// </summary>
+ /// <param name="stage">The shader stage</param>
+ /// <param name="index">The texture index</param>
+ /// <param name="descriptor">Texture descriptor</param>
+ /// <returns>True if the state matches, false otherwise</returns>
+ public bool MatchesTexture(ShaderStage stage, int index, in Image.TextureDescriptor descriptor)
+ {
+ Box<TextureSpecializationState> specializationState = _textureByBinding[(int)stage][index];
+
+ return MatchesTexture(specializationState, descriptor);
+ }
+
+ /// <summary>
+ /// Checks if the recorded texture state for a given image binding matches a texture descriptor.
+ /// </summary>
+ /// <param name="stage">The shader stage</param>
+ /// <param name="index">The texture index</param>
+ /// <param name="descriptor">Texture descriptor</param>
+ /// <returns>True if the state matches, false otherwise</returns>
+ public bool MatchesImage(ShaderStage stage, int index, in Image.TextureDescriptor descriptor)
+ {
+ Box<TextureSpecializationState> specializationState = _imageByBinding[(int)stage][index];
+
+ return MatchesTexture(specializationState, descriptor);
+ }
+
+ /// <summary>
+ /// Reads shader specialization state that has been serialized.
+ /// </summary>
+ /// <param name="dataReader">Data reader</param>
+ /// <returns>Shader specialization state</returns>
+ public static ShaderSpecializationState Read(ref BinarySerializer dataReader)
+ {
+ ShaderSpecializationState specState = new ShaderSpecializationState();
+
+ dataReader.Read(ref specState._queriedState);
+ dataReader.Read(ref specState._compute);
+
+ if (specState._compute)
+ {
+ dataReader.ReadWithMagicAndSize(ref specState.ComputeState, ComsMagic);
+ }
+ else
+ {
+ dataReader.ReadWithMagicAndSize(ref specState.GraphicsState, GfxsMagic);
+ }
+
+ dataReader.Read(ref specState._constantBufferUsePerStage);
+
+ int constantBufferUsePerStageMask = specState._constantBufferUsePerStage;
+
+ while (constantBufferUsePerStageMask != 0)
+ {
+ int index = BitOperations.TrailingZeroCount(constantBufferUsePerStageMask);
+ dataReader.Read(ref specState.ConstantBufferUse[index]);
+ constantBufferUsePerStageMask &= ~(1 << index);
+ }
+
+ bool hasPipelineState = false;
+
+ dataReader.Read(ref hasPipelineState);
+
+ if (hasPipelineState)
+ {
+ ProgramPipelineState pipelineState = default;
+ dataReader.ReadWithMagicAndSize(ref pipelineState, PgpsMagic);
+ specState.PipelineState = pipelineState;
+ }
+
+ if (specState._queriedState.HasFlag(QueriedStateFlags.TransformFeedback))
+ {
+ ushort tfCount = 0;
+ dataReader.Read(ref tfCount);
+ specState.TransformFeedbackDescriptors = new TransformFeedbackDescriptor[tfCount];
+
+ for (int index = 0; index < tfCount; index++)
+ {
+ dataReader.ReadWithMagicAndSize(ref specState.TransformFeedbackDescriptors[index], TfbdMagic);
+ }
+ }
+
+ ushort count = 0;
+ dataReader.Read(ref count);
+
+ for (int index = 0; index < count; index++)
+ {
+ TextureKey textureKey = default;
+ Box<TextureSpecializationState> textureState = new Box<TextureSpecializationState>();
+
+ dataReader.ReadWithMagicAndSize(ref textureKey, TexkMagic);
+ dataReader.ReadWithMagicAndSize(ref textureState.Value, TexsMagic);
+
+ specState._textureSpecialization[textureKey] = textureState;
+ }
+
+ return specState;
+ }
+
+ /// <summary>
+ /// Serializes the shader specialization state.
+ /// </summary>
+ /// <param name="dataWriter">Data writer</param>
+ public void Write(ref BinarySerializer dataWriter)
+ {
+ dataWriter.Write(ref _queriedState);
+ dataWriter.Write(ref _compute);
+
+ if (_compute)
+ {
+ dataWriter.WriteWithMagicAndSize(ref ComputeState, ComsMagic);
+ }
+ else
+ {
+ dataWriter.WriteWithMagicAndSize(ref GraphicsState, GfxsMagic);
+ }
+
+ dataWriter.Write(ref _constantBufferUsePerStage);
+
+ int constantBufferUsePerStageMask = _constantBufferUsePerStage;
+
+ while (constantBufferUsePerStageMask != 0)
+ {
+ int index = BitOperations.TrailingZeroCount(constantBufferUsePerStageMask);
+ dataWriter.Write(ref ConstantBufferUse[index]);
+ constantBufferUsePerStageMask &= ~(1 << index);
+ }
+
+ bool hasPipelineState = PipelineState.HasValue;
+
+ dataWriter.Write(ref hasPipelineState);
+
+ if (hasPipelineState)
+ {
+ ProgramPipelineState pipelineState = PipelineState.Value;
+ dataWriter.WriteWithMagicAndSize(ref pipelineState, PgpsMagic);
+ }
+
+ if (_queriedState.HasFlag(QueriedStateFlags.TransformFeedback))
+ {
+ ushort tfCount = (ushort)TransformFeedbackDescriptors.Length;
+ dataWriter.Write(ref tfCount);
+
+ for (int index = 0; index < TransformFeedbackDescriptors.Length; index++)
+ {
+ dataWriter.WriteWithMagicAndSize(ref TransformFeedbackDescriptors[index], TfbdMagic);
+ }
+ }
+
+ ushort count = (ushort)_textureSpecialization.Count;
+ dataWriter.Write(ref count);
+
+ foreach (var kv in _textureSpecialization)
+ {
+ var textureKey = kv.Key;
+ var textureState = kv.Value;
+
+ dataWriter.WriteWithMagicAndSize(ref textureKey, TexkMagic);
+ dataWriter.WriteWithMagicAndSize(ref textureState.Value, TexsMagic);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/TransformFeedbackDescriptor.cs b/src/Ryujinx.Graphics.Gpu/Shader/TransformFeedbackDescriptor.cs
new file mode 100644
index 00000000..5baf2a1a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/TransformFeedbackDescriptor.cs
@@ -0,0 +1,58 @@
+using Ryujinx.Common.Memory;
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ /// <summary>
+ /// Transform feedback descriptor.
+ /// </summary>
+ struct TransformFeedbackDescriptor
+ {
+ // New fields should be added to the end of the struct to keep disk shader cache compatibility.
+
+ /// <summary>
+ /// Index of the transform feedback.
+ /// </summary>
+ public readonly int BufferIndex;
+
+ /// <summary>
+ /// Amount of bytes consumed per vertex.
+ /// </summary>
+ public readonly int Stride;
+
+ /// <summary>
+ /// Number of varyings written into the buffer.
+ /// </summary>
+ public readonly int VaryingCount;
+
+ /// <summary>
+ /// Location of varyings to be written into the buffer. Each byte is one location.
+ /// </summary>
+ public Array32<uint> VaryingLocations; // Making this readonly breaks AsSpan
+
+ /// <summary>
+ /// Creates a new transform feedback descriptor.
+ /// </summary>
+ /// <param name="bufferIndex">Index of the transform feedback</param>
+ /// <param name="stride">Amount of bytes consumed per vertex</param>
+ /// <param name="varyingCount">Number of varyings written into the buffer. Indicates size in bytes of <paramref name="varyingLocations"/></param>
+ /// <param name="varyingLocations">Location of varyings to be written into the buffer. Each byte is one location</param>
+ public TransformFeedbackDescriptor(int bufferIndex, int stride, int varyingCount, ref Array32<uint> varyingLocations)
+ {
+ BufferIndex = bufferIndex;
+ Stride = stride;
+ VaryingCount = varyingCount;
+ VaryingLocations = varyingLocations;
+ }
+
+ /// <summary>
+ /// Gets a span of the <see cref="VaryingLocations"/>.
+ /// </summary>
+ /// <returns>Span of varying locations</returns>
+ public ReadOnlySpan<byte> AsSpan()
+ {
+ return MemoryMarshal.Cast<uint, byte>(VaryingLocations.AsSpan()).Slice(0, Math.Min(128, VaryingCount));
+ }
+ }
+}