aboutsummaryrefslogtreecommitdiff
path: root/src/Ryujinx.Graphics.Gpu/Engine
diff options
context:
space:
mode:
authorTSR Berry <20988865+TSRBerry@users.noreply.github.com>2023-04-08 01:22:00 +0200
committerMary <thog@protonmail.com>2023-04-27 23:51:14 +0200
commitcee712105850ac3385cd0091a923438167433f9f (patch)
tree4a5274b21d8b7f938c0d0ce18736d3f2993b11b1 /src/Ryujinx.Graphics.Gpu/Engine
parentcd124bda587ef09668a971fa1cac1c3f0cfc9f21 (diff)
Move solution and projects to src
Diffstat (limited to 'src/Ryujinx.Graphics.Gpu/Engine')
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs219
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClassState.cs435
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeQmd.cs275
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/ConditionalRenderEnabled.cs12
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/DeviceStateWithShadow.cs96
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs635
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClassState.cs271
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaTexture.cs20
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/GPFifo/CompressedMethod.cs41
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPEntry.cs55
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs248
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClassState.cs233
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs262
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs331
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs273
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs181
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/AluOperation.cs15
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/AluRegOperation.cs18
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/AssignmentOperation.cs17
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs52
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs101
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs341
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs16
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs113
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs400
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs39
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitCompiler.cs517
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs55
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/MmeShadowScratch.cs18
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/SetMmeShadowRamControlMode.cs13
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/ShaderTexture.cs111
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendFunctions.cs4226
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendManager.cs115
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendPreGenTable.cs273
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendUcode.cs126
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/UcodeAssembler.cs305
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Threed/ConditionalRendering.cs130
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Threed/ConstantBufferUpdater.cs183
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs856
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawState.cs65
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Threed/IbStreamer.cs194
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Threed/IndirectDrawType.cs38
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Threed/RenderTargetUpdateFlags.cs41
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Threed/SemaphoreUpdater.cs190
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Threed/SpecializationStateUpdater.cs346
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdateTracker.cs177
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs1448
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs620
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClassState.cs1048
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs379
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClassState.cs816
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodTexture.cs22
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Types/Boolean32.cs17
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Types/ColorFormat.cs165
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Types/GpuVa.cs22
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Types/MemoryLayout.cs37
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Types/PrimitiveType.cs99
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Types/SamplerIndex.cs11
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Types/SbDescriptor.cs20
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/Types/ZetaFormat.cs42
60 files changed, 17424 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs
new file mode 100644
index 00000000..2ac738fd
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs
@@ -0,0 +1,219 @@
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Engine.InlineToMemory;
+using Ryujinx.Graphics.Gpu.Engine.Threed;
+using Ryujinx.Graphics.Gpu.Engine.Types;
+using Ryujinx.Graphics.Gpu.Image;
+using Ryujinx.Graphics.Gpu.Shader;
+using Ryujinx.Graphics.Shader;
+using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Compute
+{
+ /// <summary>
+ /// Represents a compute engine class.
+ /// </summary>
+ class ComputeClass : IDeviceState
+ {
+ private readonly GpuContext _context;
+ private readonly GpuChannel _channel;
+ private readonly ThreedClass _3dEngine;
+ private readonly DeviceState<ComputeClassState> _state;
+
+ private readonly InlineToMemoryClass _i2mClass;
+
+ /// <summary>
+ /// Creates a new instance of the compute engine class.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="threedEngine">3D engine</param>
+ public ComputeClass(GpuContext context, GpuChannel channel, ThreedClass threedEngine)
+ {
+ _context = context;
+ _channel = channel;
+ _3dEngine = threedEngine;
+ _state = new DeviceState<ComputeClassState>(new Dictionary<string, RwCallback>
+ {
+ { nameof(ComputeClassState.LaunchDma), new RwCallback(LaunchDma, null) },
+ { nameof(ComputeClassState.LoadInlineData), new RwCallback(LoadInlineData, null) },
+ { nameof(ComputeClassState.SendSignalingPcasB), new RwCallback(SendSignalingPcasB, null) }
+ });
+
+ _i2mClass = new InlineToMemoryClass(context, channel, initializeState: false);
+ }
+
+ /// <summary>
+ /// Reads data from the class registers.
+ /// </summary>
+ /// <param name="offset">Register byte offset</param>
+ /// <returns>Data at the specified offset</returns>
+ public int Read(int offset) => _state.Read(offset);
+
+ /// <summary>
+ /// Writes data to the class registers.
+ /// </summary>
+ /// <param name="offset">Register byte offset</param>
+ /// <param name="data">Data to be written</param>
+ public void Write(int offset, int data) => _state.Write(offset, data);
+
+ /// <summary>
+ /// Launches the Inline-to-Memory DMA copy operation.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void LaunchDma(int argument)
+ {
+ _i2mClass.LaunchDma(ref Unsafe.As<ComputeClassState, InlineToMemoryClassState>(ref _state.State), argument);
+ }
+
+ /// <summary>
+ /// Pushes a block of data to the Inline-to-Memory engine.
+ /// </summary>
+ /// <param name="data">Data to push</param>
+ public void LoadInlineData(ReadOnlySpan<int> data)
+ {
+ _i2mClass.LoadInlineData(data);
+ }
+
+ /// <summary>
+ /// Pushes a word of data to the Inline-to-Memory engine.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void LoadInlineData(int argument)
+ {
+ _i2mClass.LoadInlineData(argument);
+ }
+
+ /// <summary>
+ /// Performs the compute dispatch operation.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void SendSignalingPcasB(int argument)
+ {
+ var memoryManager = _channel.MemoryManager;
+
+ // Since we're going to change the state, make sure any pending instanced draws are done.
+ _3dEngine.PerformDeferredDraws();
+
+ // Make sure all pending uniform buffer data is written to memory.
+ _3dEngine.FlushUboDirty();
+
+ uint qmdAddress = _state.State.SendPcasA;
+
+ var qmd = _channel.MemoryManager.Read<ComputeQmd>((ulong)qmdAddress << 8);
+
+ ulong shaderGpuVa = ((ulong)_state.State.SetProgramRegionAAddressUpper << 32) | _state.State.SetProgramRegionB;
+
+ shaderGpuVa += (uint)qmd.ProgramOffset;
+
+ int localMemorySize = qmd.ShaderLocalMemoryLowSize + qmd.ShaderLocalMemoryHighSize;
+
+ int sharedMemorySize = Math.Min(qmd.SharedMemorySize, _context.Capabilities.MaximumComputeSharedMemorySize);
+
+ for (int index = 0; index < Constants.TotalCpUniformBuffers; index++)
+ {
+ if (!qmd.ConstantBufferValid(index))
+ {
+ continue;
+ }
+
+ ulong gpuVa = (uint)qmd.ConstantBufferAddrLower(index) | (ulong)qmd.ConstantBufferAddrUpper(index) << 32;
+ ulong size = (ulong)qmd.ConstantBufferSize(index);
+
+ _channel.BufferManager.SetComputeUniformBuffer(index, gpuVa, size);
+ }
+
+ ulong samplerPoolGpuVa = ((ulong)_state.State.SetTexSamplerPoolAOffsetUpper << 32) | _state.State.SetTexSamplerPoolB;
+ ulong texturePoolGpuVa = ((ulong)_state.State.SetTexHeaderPoolAOffsetUpper << 32) | _state.State.SetTexHeaderPoolB;
+
+ GpuChannelPoolState poolState = new GpuChannelPoolState(
+ texturePoolGpuVa,
+ _state.State.SetTexHeaderPoolCMaximumIndex,
+ _state.State.SetBindlessTextureConstantBufferSlotSelect);
+
+ GpuChannelComputeState computeState = new GpuChannelComputeState(
+ qmd.CtaThreadDimension0,
+ qmd.CtaThreadDimension1,
+ qmd.CtaThreadDimension2,
+ localMemorySize,
+ sharedMemorySize,
+ _channel.BufferManager.HasUnalignedStorageBuffers);
+
+ CachedShaderProgram cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa);
+
+ _context.Renderer.Pipeline.SetProgram(cs.HostProgram);
+
+ _channel.TextureManager.SetComputeSamplerPool(samplerPoolGpuVa, _state.State.SetTexSamplerPoolCMaximumIndex, qmd.SamplerIndex);
+ _channel.TextureManager.SetComputeTexturePool(texturePoolGpuVa, _state.State.SetTexHeaderPoolCMaximumIndex);
+ _channel.TextureManager.SetComputeTextureBufferIndex(_state.State.SetBindlessTextureConstantBufferSlotSelect);
+
+ ShaderProgramInfo info = cs.Shaders[0].Info;
+
+ bool hasUnaligned = _channel.BufferManager.HasUnalignedStorageBuffers;
+
+ for (int index = 0; index < info.SBuffers.Count; index++)
+ {
+ BufferDescriptor sb = info.SBuffers[index];
+
+ ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0);
+
+ int sbDescOffset = 0x310 + sb.Slot * 0x10;
+
+ sbDescAddress += (ulong)sbDescOffset;
+
+ SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);
+
+ _channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags);
+ }
+
+ if ((_channel.BufferManager.HasUnalignedStorageBuffers) != hasUnaligned)
+ {
+ // Refetch the shader, as assumptions about storage buffer alignment have changed.
+ cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa);
+
+ _context.Renderer.Pipeline.SetProgram(cs.HostProgram);
+
+ info = cs.Shaders[0].Info;
+ }
+
+ for (int index = 0; index < info.CBuffers.Count; index++)
+ {
+ BufferDescriptor cb = info.CBuffers[index];
+
+ // NVN uses the "hardware" constant buffer for anything that is less than 8,
+ // and those are already bound above.
+ // Anything greater than or equal to 8 uses the emulated constant buffers.
+ // They are emulated using global memory loads.
+ if (cb.Slot < 8)
+ {
+ continue;
+ }
+
+ ulong cbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0);
+
+ int cbDescOffset = 0x260 + (cb.Slot - 8) * 0x10;
+
+ cbDescAddress += (ulong)cbDescOffset;
+
+ SbDescriptor cbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(cbDescAddress);
+
+ _channel.BufferManager.SetComputeUniformBuffer(cb.Slot, cbDescriptor.PackAddress(), (uint)cbDescriptor.Size);
+ }
+
+ _channel.BufferManager.SetComputeBufferBindings(cs.Bindings);
+
+ _channel.TextureManager.SetComputeBindings(cs.Bindings);
+
+ // Should never return false for mismatching spec state, since the shader was fetched above.
+ _channel.TextureManager.CommitComputeBindings(cs.SpecializationState);
+
+ _channel.BufferManager.CommitComputeBindings();
+
+ _context.Renderer.Pipeline.DispatchCompute(qmd.CtaRasterWidth, qmd.CtaRasterHeight, qmd.CtaRasterDepth);
+
+ _3dEngine.ForceShaderUpdate();
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClassState.cs b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClassState.cs
new file mode 100644
index 00000000..5d81de5d
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClassState.cs
@@ -0,0 +1,435 @@
+// This file was auto-generated from NVIDIA official Maxwell definitions.
+
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Gpu.Engine.InlineToMemory;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Compute
+{
+ /// <summary>
+ /// Notify type.
+ /// </summary>
+ enum NotifyType
+ {
+ WriteOnly = 0,
+ WriteThenAwaken = 1,
+ }
+
+ /// <summary>
+ /// CWD control SM selection.
+ /// </summary>
+ enum SetCwdControlSmSelection
+ {
+ LoadBalanced = 0,
+ RoundRobin = 1,
+ }
+
+ /// <summary>
+ /// Cache lines to invalidate.
+ /// </summary>
+ enum InvalidateCacheLines
+ {
+ All = 0,
+ One = 1,
+ }
+
+ /// <summary>
+ /// GWC SCG type.
+ /// </summary>
+ enum SetGwcScgTypeScgType
+ {
+ GraphicsCompute0 = 0,
+ Compute1 = 1,
+ }
+
+ /// <summary>
+ /// Render enable override mode.
+ /// </summary>
+ enum SetRenderEnableOverrideMode
+ {
+ UseRenderEnable = 0,
+ AlwaysRender = 1,
+ NeverRender = 2,
+ }
+
+ /// <summary>
+ /// Semaphore report operation.
+ /// </summary>
+ enum SetReportSemaphoreDOperation
+ {
+ Release = 0,
+ Trap = 3,
+ }
+
+ /// <summary>
+ /// Semaphore report structure size.
+ /// </summary>
+ enum SetReportSemaphoreDStructureSize
+ {
+ FourWords = 0,
+ OneWord = 1,
+ }
+
+ /// <summary>
+ /// Semaphore report reduction operation.
+ /// </summary>
+ enum SetReportSemaphoreDReductionOp
+ {
+ RedAdd = 0,
+ RedMin = 1,
+ RedMax = 2,
+ RedInc = 3,
+ RedDec = 4,
+ RedAnd = 5,
+ RedOr = 6,
+ RedXor = 7,
+ }
+
+ /// <summary>
+ /// Semaphore report reduction format.
+ /// </summary>
+ enum SetReportSemaphoreDReductionFormat
+ {
+ Unsigned32 = 0,
+ Signed32 = 1,
+ }
+
+ /// <summary>
+ /// Compute class state.
+ /// </summary>
+ unsafe struct ComputeClassState
+ {
+#pragma warning disable CS0649
+ public uint SetObject;
+ public int SetObjectClassId => (int)((SetObject >> 0) & 0xFFFF);
+ public int SetObjectEngineId => (int)((SetObject >> 16) & 0x1F);
+ public fixed uint Reserved04[63];
+ public uint NoOperation;
+ public uint SetNotifyA;
+ public int SetNotifyAAddressUpper => (int)((SetNotifyA >> 0) & 0xFF);
+ public uint SetNotifyB;
+ public uint Notify;
+ public NotifyType NotifyType => (NotifyType)(Notify);
+ public uint WaitForIdle;
+ public fixed uint Reserved114[7];
+ public uint SetGlobalRenderEnableA;
+ public int SetGlobalRenderEnableAOffsetUpper => (int)((SetGlobalRenderEnableA >> 0) & 0xFF);
+ public uint SetGlobalRenderEnableB;
+ public uint SetGlobalRenderEnableC;
+ public int SetGlobalRenderEnableCMode => (int)((SetGlobalRenderEnableC >> 0) & 0x7);
+ public uint SendGoIdle;
+ public uint PmTrigger;
+ public uint PmTriggerWfi;
+ public fixed uint Reserved148[2];
+ public uint SetInstrumentationMethodHeader;
+ public uint SetInstrumentationMethodData;
+ public fixed uint Reserved158[10];
+ public uint LineLengthIn;
+ public uint LineCount;
+ public uint OffsetOutUpper;
+ public int OffsetOutUpperValue => (int)((OffsetOutUpper >> 0) & 0xFF);
+ public uint OffsetOut;
+ public uint PitchOut;
+ public uint SetDstBlockSize;
+ public SetDstBlockSizeWidth SetDstBlockSizeWidth => (SetDstBlockSizeWidth)((SetDstBlockSize >> 0) & 0xF);
+ public SetDstBlockSizeHeight SetDstBlockSizeHeight => (SetDstBlockSizeHeight)((SetDstBlockSize >> 4) & 0xF);
+ public SetDstBlockSizeDepth SetDstBlockSizeDepth => (SetDstBlockSizeDepth)((SetDstBlockSize >> 8) & 0xF);
+ public uint SetDstWidth;
+ public uint SetDstHeight;
+ public uint SetDstDepth;
+ public uint SetDstLayer;
+ public uint SetDstOriginBytesX;
+ public int SetDstOriginBytesXV => (int)((SetDstOriginBytesX >> 0) & 0xFFFFF);
+ public uint SetDstOriginSamplesY;
+ public int SetDstOriginSamplesYV => (int)((SetDstOriginSamplesY >> 0) & 0xFFFF);
+ public uint LaunchDma;
+ public LaunchDmaDstMemoryLayout LaunchDmaDstMemoryLayout => (LaunchDmaDstMemoryLayout)((LaunchDma >> 0) & 0x1);
+ public LaunchDmaCompletionType LaunchDmaCompletionType => (LaunchDmaCompletionType)((LaunchDma >> 4) & 0x3);
+ public LaunchDmaInterruptType LaunchDmaInterruptType => (LaunchDmaInterruptType)((LaunchDma >> 8) & 0x3);
+ public LaunchDmaSemaphoreStructSize LaunchDmaSemaphoreStructSize => (LaunchDmaSemaphoreStructSize)((LaunchDma >> 12) & 0x1);
+ public bool LaunchDmaReductionEnable => (LaunchDma & 0x2) != 0;
+ public LaunchDmaReductionOp LaunchDmaReductionOp => (LaunchDmaReductionOp)((LaunchDma >> 13) & 0x7);
+ public LaunchDmaReductionFormat LaunchDmaReductionFormat => (LaunchDmaReductionFormat)((LaunchDma >> 2) & 0x3);
+ public bool LaunchDmaSysmembarDisable => (LaunchDma & 0x40) != 0;
+ public uint LoadInlineData;
+ public fixed uint Reserved1B8[9];
+ public uint SetI2mSemaphoreA;
+ public int SetI2mSemaphoreAOffsetUpper => (int)((SetI2mSemaphoreA >> 0) & 0xFF);
+ public uint SetI2mSemaphoreB;
+ public uint SetI2mSemaphoreC;
+ public fixed uint Reserved1E8[2];
+ public uint SetI2mSpareNoop00;
+ public uint SetI2mSpareNoop01;
+ public uint SetI2mSpareNoop02;
+ public uint SetI2mSpareNoop03;
+ public uint SetValidSpanOverflowAreaA;
+ public int SetValidSpanOverflowAreaAAddressUpper => (int)((SetValidSpanOverflowAreaA >> 0) & 0xFF);
+ public uint SetValidSpanOverflowAreaB;
+ public uint SetValidSpanOverflowAreaC;
+ public uint SetCoalesceWaitingPeriodUnit;
+ public uint PerfmonTransfer;
+ public uint SetShaderSharedMemoryWindow;
+ public uint SetSelectMaxwellTextureHeaders;
+ public bool SetSelectMaxwellTextureHeadersV => (SetSelectMaxwellTextureHeaders & 0x1) != 0;
+ public uint InvalidateShaderCaches;
+ public bool InvalidateShaderCachesInstruction => (InvalidateShaderCaches & 0x1) != 0;
+ public bool InvalidateShaderCachesData => (InvalidateShaderCaches & 0x10) != 0;
+ public bool InvalidateShaderCachesConstant => (InvalidateShaderCaches & 0x1000) != 0;
+ public bool InvalidateShaderCachesLocks => (InvalidateShaderCaches & 0x2) != 0;
+ public bool InvalidateShaderCachesFlushData => (InvalidateShaderCaches & 0x4) != 0;
+ public uint SetReservedSwMethod00;
+ public uint SetReservedSwMethod01;
+ public uint SetReservedSwMethod02;
+ public uint SetReservedSwMethod03;
+ public uint SetReservedSwMethod04;
+ public uint SetReservedSwMethod05;
+ public uint SetReservedSwMethod06;
+ public uint SetReservedSwMethod07;
+ public uint SetCwdControl;
+ public SetCwdControlSmSelection SetCwdControlSmSelection => (SetCwdControlSmSelection)((SetCwdControl >> 0) & 0x1);
+ public uint InvalidateTextureHeaderCacheNoWfi;
+ public InvalidateCacheLines InvalidateTextureHeaderCacheNoWfiLines => (InvalidateCacheLines)((InvalidateTextureHeaderCacheNoWfi >> 0) & 0x1);
+ public int InvalidateTextureHeaderCacheNoWfiTag => (int)((InvalidateTextureHeaderCacheNoWfi >> 4) & 0x3FFFFF);
+ public uint SetCwdRefCounter;
+ public int SetCwdRefCounterSelect => (int)((SetCwdRefCounter >> 0) & 0x3F);
+ public int SetCwdRefCounterValue => (int)((SetCwdRefCounter >> 8) & 0xFFFF);
+ public uint SetReservedSwMethod08;
+ public uint SetReservedSwMethod09;
+ public uint SetReservedSwMethod10;
+ public uint SetReservedSwMethod11;
+ public uint SetReservedSwMethod12;
+ public uint SetReservedSwMethod13;
+ public uint SetReservedSwMethod14;
+ public uint SetReservedSwMethod15;
+ public uint SetGwcScgType;
+ public SetGwcScgTypeScgType SetGwcScgTypeScgType => (SetGwcScgTypeScgType)((SetGwcScgType >> 0) & 0x1);
+ public uint SetScgControl;
+ public int SetScgControlCompute1MaxSmCount => (int)((SetScgControl >> 0) & 0x1FF);
+ public uint InvalidateConstantBufferCacheA;
+ public int InvalidateConstantBufferCacheAAddressUpper => (int)((InvalidateConstantBufferCacheA >> 0) & 0xFF);
+ public uint InvalidateConstantBufferCacheB;
+ public uint InvalidateConstantBufferCacheC;
+ public int InvalidateConstantBufferCacheCByteCount => (int)((InvalidateConstantBufferCacheC >> 0) & 0x1FFFF);
+ public bool InvalidateConstantBufferCacheCThruL2 => (InvalidateConstantBufferCacheC & 0x80000000) != 0;
+ public uint SetComputeClassVersion;
+ public int SetComputeClassVersionCurrent => (int)((SetComputeClassVersion >> 0) & 0xFFFF);
+ public int SetComputeClassVersionOldestSupported => (int)((SetComputeClassVersion >> 16) & 0xFFFF);
+ public uint CheckComputeClassVersion;
+ public int CheckComputeClassVersionCurrent => (int)((CheckComputeClassVersion >> 0) & 0xFFFF);
+ public int CheckComputeClassVersionOldestSupported => (int)((CheckComputeClassVersion >> 16) & 0xFFFF);
+ public uint SetQmdVersion;
+ public int SetQmdVersionCurrent => (int)((SetQmdVersion >> 0) & 0xFFFF);
+ public int SetQmdVersionOldestSupported => (int)((SetQmdVersion >> 16) & 0xFFFF);
+ public uint SetWfiConfig;
+ public bool SetWfiConfigEnableScgTypeWfi => (SetWfiConfig & 0x1) != 0;
+ public uint CheckQmdVersion;
+ public int CheckQmdVersionCurrent => (int)((CheckQmdVersion >> 0) & 0xFFFF);
+ public int CheckQmdVersionOldestSupported => (int)((CheckQmdVersion >> 16) & 0xFFFF);
+ public uint WaitForIdleScgType;
+ public uint InvalidateSkedCaches;
+ public bool InvalidateSkedCachesV => (InvalidateSkedCaches & 0x1) != 0;
+ public uint SetScgRenderEnableControl;
+ public bool SetScgRenderEnableControlCompute1UsesRenderEnable => (SetScgRenderEnableControl & 0x1) != 0;
+ public fixed uint Reserved2A0[4];
+ public uint SetCwdSlotCount;
+ public int SetCwdSlotCountV => (int)((SetCwdSlotCount >> 0) & 0xFF);
+ public uint SendPcasA;
+ public uint SendPcasB;
+ public int SendPcasBFrom => (int)((SendPcasB >> 0) & 0xFFFFFF);
+ public int SendPcasBDelta => (int)((SendPcasB >> 24) & 0xFF);
+ public uint SendSignalingPcasB;
+ public bool SendSignalingPcasBInvalidate => (SendSignalingPcasB & 0x1) != 0;
+ public bool SendSignalingPcasBSchedule => (SendSignalingPcasB & 0x2) != 0;
+ public fixed uint Reserved2C0[9];
+ public uint SetShaderLocalMemoryNonThrottledA;
+ public int SetShaderLocalMemoryNonThrottledASizeUpper => (int)((SetShaderLocalMemoryNonThrottledA >> 0) & 0xFF);
+ public uint SetShaderLocalMemoryNonThrottledB;
+ public uint SetShaderLocalMemoryNonThrottledC;
+ public int SetShaderLocalMemoryNonThrottledCMaxSmCount => (int)((SetShaderLocalMemoryNonThrottledC >> 0) & 0x1FF);
+ public uint SetShaderLocalMemoryThrottledA;
+ public int SetShaderLocalMemoryThrottledASizeUpper => (int)((SetShaderLocalMemoryThrottledA >> 0) & 0xFF);
+ public uint SetShaderLocalMemoryThrottledB;
+ public uint SetShaderLocalMemoryThrottledC;
+ public int SetShaderLocalMemoryThrottledCMaxSmCount => (int)((SetShaderLocalMemoryThrottledC >> 0) & 0x1FF);
+ public fixed uint Reserved2FC[5];
+ public uint SetSpaVersion;
+ public int SetSpaVersionMinor => (int)((SetSpaVersion >> 0) & 0xFF);
+ public int SetSpaVersionMajor => (int)((SetSpaVersion >> 8) & 0xFF);
+ public fixed uint Reserved314[123];
+ public uint SetFalcon00;
+ public uint SetFalcon01;
+ public uint SetFalcon02;
+ public uint SetFalcon03;
+ public uint SetFalcon04;
+ public uint SetFalcon05;
+ public uint SetFalcon06;
+ public uint SetFalcon07;
+ public uint SetFalcon08;
+ public uint SetFalcon09;
+ public uint SetFalcon10;
+ public uint SetFalcon11;
+ public uint SetFalcon12;
+ public uint SetFalcon13;
+ public uint SetFalcon14;
+ public uint SetFalcon15;
+ public uint SetFalcon16;
+ public uint SetFalcon17;
+ public uint SetFalcon18;
+ public uint SetFalcon19;
+ public uint SetFalcon20;
+ public uint SetFalcon21;
+ public uint SetFalcon22;
+ public uint SetFalcon23;
+ public uint SetFalcon24;
+ public uint SetFalcon25;
+ public uint SetFalcon26;
+ public uint SetFalcon27;
+ public uint SetFalcon28;
+ public uint SetFalcon29;
+ public uint SetFalcon30;
+ public uint SetFalcon31;
+ public fixed uint Reserved580[127];
+ public uint SetShaderLocalMemoryWindow;
+ public fixed uint Reserved780[4];
+ public uint SetShaderLocalMemoryA;
+ public int SetShaderLocalMemoryAAddressUpper => (int)((SetShaderLocalMemoryA >> 0) & 0xFF);
+ public uint SetShaderLocalMemoryB;
+ public fixed uint Reserved798[383];
+ public uint SetShaderCacheControl;
+ public bool SetShaderCacheControlIcachePrefetchEnable => (SetShaderCacheControl & 0x1) != 0;
+ public fixed uint ReservedD98[19];
+ public uint SetSmTimeoutInterval;
+ public int SetSmTimeoutIntervalCounterBit => (int)((SetSmTimeoutInterval >> 0) & 0x3F);
+ public fixed uint ReservedDE8[87];
+ public uint SetSpareNoop12;
+ public uint SetSpareNoop13;
+ public uint SetSpareNoop14;
+ public uint SetSpareNoop15;
+ public fixed uint ReservedF54[59];
+ public uint SetSpareNoop00;
+ public uint SetSpareNoop01;
+ public uint SetSpareNoop02;
+ public uint SetSpareNoop03;
+ public uint SetSpareNoop04;
+ public uint SetSpareNoop05;
+ public uint SetSpareNoop06;
+ public uint SetSpareNoop07;
+ public uint SetSpareNoop08;
+ public uint SetSpareNoop09;
+ public uint SetSpareNoop10;
+ public uint SetSpareNoop11;
+ public fixed uint Reserved1070[103];
+ public uint InvalidateSamplerCacheAll;
+ public bool InvalidateSamplerCacheAllV => (InvalidateSamplerCacheAll & 0x1) != 0;
+ public uint InvalidateTextureHeaderCacheAll;
+ public bool InvalidateTextureHeaderCacheAllV => (InvalidateTextureHeaderCacheAll & 0x1) != 0;
+ public fixed uint Reserved1214[29];
+ public uint InvalidateTextureDataCacheNoWfi;
+ public InvalidateCacheLines InvalidateTextureDataCacheNoWfiLines => (InvalidateCacheLines)((InvalidateTextureDataCacheNoWfi >> 0) & 0x1);
+ public int InvalidateTextureDataCacheNoWfiTag => (int)((InvalidateTextureDataCacheNoWfi >> 4) & 0x3FFFFF);
+ public fixed uint Reserved128C[7];
+ public uint ActivatePerfSettingsForComputeContext;
+ public bool ActivatePerfSettingsForComputeContextAll => (ActivatePerfSettingsForComputeContext & 0x1) != 0;
+ public fixed uint Reserved12AC[33];
+ public uint InvalidateSamplerCache;
+ public InvalidateCacheLines InvalidateSamplerCacheLines => (InvalidateCacheLines)((InvalidateSamplerCache >> 0) & 0x1);
+ public int InvalidateSamplerCacheTag => (int)((InvalidateSamplerCache >> 4) & 0x3FFFFF);
+ public uint InvalidateTextureHeaderCache;
+ public InvalidateCacheLines InvalidateTextureHeaderCacheLines => (InvalidateCacheLines)((InvalidateTextureHeaderCache >> 0) & 0x1);
+ public int InvalidateTextureHeaderCacheTag => (int)((InvalidateTextureHeaderCache >> 4) & 0x3FFFFF);
+ public uint InvalidateTextureDataCache;
+ public InvalidateCacheLines InvalidateTextureDataCacheLines => (InvalidateCacheLines)((InvalidateTextureDataCache >> 0) & 0x1);
+ public int InvalidateTextureDataCacheTag => (int)((InvalidateTextureDataCache >> 4) & 0x3FFFFF);
+ public fixed uint Reserved133C[58];
+ public uint InvalidateSamplerCacheNoWfi;
+ public InvalidateCacheLines InvalidateSamplerCacheNoWfiLines => (InvalidateCacheLines)((InvalidateSamplerCacheNoWfi >> 0) & 0x1);
+ public int InvalidateSamplerCacheNoWfiTag => (int)((InvalidateSamplerCacheNoWfi >> 4) & 0x3FFFFF);
+ public fixed uint Reserved1428[64];
+ public uint SetShaderExceptions;
+ public bool SetShaderExceptionsEnable => (SetShaderExceptions & 0x1) != 0;
+ public fixed uint Reserved152C[9];
+ public uint SetRenderEnableA;
+ public int SetRenderEnableAOffsetUpper => (int)((SetRenderEnableA >> 0) & 0xFF);
+ public uint SetRenderEnableB;
+ public uint SetRenderEnableC;
+ public int SetRenderEnableCMode => (int)((SetRenderEnableC >> 0) & 0x7);
+ public uint SetTexSamplerPoolA;
+ public int SetTexSamplerPoolAOffsetUpper => (int)((SetTexSamplerPoolA >> 0) & 0xFF);
+ public uint SetTexSamplerPoolB;
+ public uint SetTexSamplerPoolC;
+ public int SetTexSamplerPoolCMaximumIndex => (int)((SetTexSamplerPoolC >> 0) & 0xFFFFF);
+ public fixed uint Reserved1568[3];
+ public uint SetTexHeaderPoolA;
+ public int SetTexHeaderPoolAOffsetUpper => (int)((SetTexHeaderPoolA >> 0) & 0xFF);
+ public uint SetTexHeaderPoolB;
+ public uint SetTexHeaderPoolC;
+ public int SetTexHeaderPoolCMaximumIndex => (int)((SetTexHeaderPoolC >> 0) & 0x3FFFFF);
+ public fixed uint Reserved1580[34];
+ public uint SetProgramRegionA;
+ public int SetProgramRegionAAddressUpper => (int)((SetProgramRegionA >> 0) & 0xFF);
+ public uint SetProgramRegionB;
+ public fixed uint Reserved1610[34];
+ public uint InvalidateShaderCachesNoWfi;
+ public bool InvalidateShaderCachesNoWfiInstruction => (InvalidateShaderCachesNoWfi & 0x1) != 0;
+ public bool InvalidateShaderCachesNoWfiGlobalData => (InvalidateShaderCachesNoWfi & 0x10) != 0;
+ public bool InvalidateShaderCachesNoWfiConstant => (InvalidateShaderCachesNoWfi & 0x1000) != 0;
+ public fixed uint Reserved169C[170];
+ public uint SetRenderEnableOverride;
+ public SetRenderEnableOverrideMode SetRenderEnableOverrideMode => (SetRenderEnableOverrideMode)((SetRenderEnableOverride >> 0) & 0x3);
+ public fixed uint Reserved1948[57];
+ public uint PipeNop;
+ public uint SetSpare00;
+ public uint SetSpare01;
+ public uint SetSpare02;
+ public uint SetSpare03;
+ public fixed uint Reserved1A40[48];
+ public uint SetReportSemaphoreA;
+ public int SetReportSemaphoreAOffsetUpper => (int)((SetReportSemaphoreA >> 0) & 0xFF);
+ public uint SetReportSemaphoreB;
+ public uint SetReportSemaphoreC;
+ public uint SetReportSemaphoreD;
+ public SetReportSemaphoreDOperation SetReportSemaphoreDOperation => (SetReportSemaphoreDOperation)((SetReportSemaphoreD >> 0) & 0x3);
+ public bool SetReportSemaphoreDAwakenEnable => (SetReportSemaphoreD & 0x100000) != 0;
+ public SetReportSemaphoreDStructureSize SetReportSemaphoreDStructureSize => (SetReportSemaphoreDStructureSize)((SetReportSemaphoreD >> 28) & 0x1);
+ public bool SetReportSemaphoreDFlushDisable => (SetReportSemaphoreD & 0x4) != 0;
+ public bool SetReportSemaphoreDReductionEnable => (SetReportSemaphoreD & 0x8) != 0;
+ public SetReportSemaphoreDReductionOp SetReportSemaphoreDReductionOp => (SetReportSemaphoreDReductionOp)((SetReportSemaphoreD >> 9) & 0x7);
+ public SetReportSemaphoreDReductionFormat SetReportSemaphoreDReductionFormat => (SetReportSemaphoreDReductionFormat)((SetReportSemaphoreD >> 17) & 0x3);
+ public fixed uint Reserved1B10[702];
+ public uint SetBindlessTexture;
+ public int SetBindlessTextureConstantBufferSlotSelect => (int)((SetBindlessTexture >> 0) & 0x7);
+ public uint SetTrapHandler;
+ public fixed uint Reserved2610[843];
+ public Array8<uint> SetShaderPerformanceCounterValueUpper;
+ public Array8<uint> SetShaderPerformanceCounterValue;
+ public Array8<uint> SetShaderPerformanceCounterEvent;
+ public int SetShaderPerformanceCounterEventEvent(int i) => (int)((SetShaderPerformanceCounterEvent[i] >> 0) & 0xFF);
+ public Array8<uint> SetShaderPerformanceCounterControlA;
+ public int SetShaderPerformanceCounterControlAEvent0(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 0) & 0x3);
+ public int SetShaderPerformanceCounterControlABitSelect0(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 2) & 0x7);
+ public int SetShaderPerformanceCounterControlAEvent1(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 5) & 0x3);
+ public int SetShaderPerformanceCounterControlABitSelect1(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 7) & 0x7);
+ public int SetShaderPerformanceCounterControlAEvent2(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 10) & 0x3);
+ public int SetShaderPerformanceCounterControlABitSelect2(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 12) & 0x7);
+ public int SetShaderPerformanceCounterControlAEvent3(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 15) & 0x3);
+ public int SetShaderPerformanceCounterControlABitSelect3(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 17) & 0x7);
+ public int SetShaderPerformanceCounterControlAEvent4(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 20) & 0x3);
+ public int SetShaderPerformanceCounterControlABitSelect4(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 22) & 0x7);
+ public int SetShaderPerformanceCounterControlAEvent5(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 25) & 0x3);
+ public int SetShaderPerformanceCounterControlABitSelect5(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 27) & 0x7);
+ public int SetShaderPerformanceCounterControlASpare(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 30) & 0x3);
+ public Array8<uint> SetShaderPerformanceCounterControlB;
+ public bool SetShaderPerformanceCounterControlBEdge(int i) => (SetShaderPerformanceCounterControlB[i] & 0x1) != 0;
+ public int SetShaderPerformanceCounterControlBMode(int i) => (int)((SetShaderPerformanceCounterControlB[i] >> 1) & 0x3);
+ public bool SetShaderPerformanceCounterControlBWindowed(int i) => (SetShaderPerformanceCounterControlB[i] & 0x8) != 0;
+ public int SetShaderPerformanceCounterControlBFunc(int i) => (int)((SetShaderPerformanceCounterControlB[i] >> 4) & 0xFFFF);
+ public uint SetShaderPerformanceCounterTrapControl;
+ public int SetShaderPerformanceCounterTrapControlMask => (int)((SetShaderPerformanceCounterTrapControl >> 0) & 0xFF);
+ public uint StartShaderPerformanceCounter;
+ public int StartShaderPerformanceCounterCounterMask => (int)((StartShaderPerformanceCounter >> 0) & 0xFF);
+ public uint StopShaderPerformanceCounter;
+ public int StopShaderPerformanceCounterCounterMask => (int)((StopShaderPerformanceCounter >> 0) & 0xFF);
+ public fixed uint Reserved33E8[6];
+ public MmeShadowScratch SetMmeShadowScratch;
+#pragma warning restore CS0649
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeQmd.cs b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeQmd.cs
new file mode 100644
index 00000000..1b20e41c
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeQmd.cs
@@ -0,0 +1,275 @@
+using Ryujinx.Graphics.Gpu.Engine.Types;
+using System;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Compute
+{
+ /// <summary>
+ /// Type of the dependent Queue Meta Data.
+ /// </summary>
+ enum DependentQmdType
+ {
+ Queue,
+ Grid
+ }
+
+ /// <summary>
+ /// Type of the release memory barrier.
+ /// </summary>
+ enum ReleaseMembarType
+ {
+ FeNone,
+ FeSysmembar
+ }
+
+ /// <summary>
+ /// Type of the CWD memory barrier.
+ /// </summary>
+ enum CwdMembarType
+ {
+ L1None,
+ L1Sysmembar,
+ L1Membar
+ }
+
+ /// <summary>
+ /// NaN behavior of 32-bits float operations on the shader.
+ /// </summary>
+ enum Fp32NanBehavior
+ {
+ Legacy,
+ Fp64Compatible
+ }
+
+ /// <summary>
+ /// NaN behavior of 32-bits float to integer conversion on the shader.
+ /// </summary>
+ enum Fp32F2iNanBehavior
+ {
+ PassZero,
+ PassIndefinite
+ }
+
+ /// <summary>
+ /// Limit of calls.
+ /// </summary>
+ enum ApiVisibleCallLimit
+ {
+ _32,
+ NoCheck
+ }
+
+ /// <summary>
+ /// Shared memory bank mapping mode.
+ /// </summary>
+ enum SharedMemoryBankMapping
+ {
+ FourBytesPerBank,
+ EightBytesPerBank
+ }
+
+ /// <summary>
+ /// Denormal behavior of 32-bits float narrowing instructions.
+ /// </summary>
+ enum Fp32NarrowInstruction
+ {
+ KeepDenorms,
+ FlushDenorms
+ }
+
+ /// <summary>
+ /// Configuration of the L1 cache.
+ /// </summary>
+ enum L1Configuration
+ {
+ DirectlyAddressableMemorySize16kb,
+ DirectlyAddressableMemorySize32kb,
+ DirectlyAddressableMemorySize48kb
+ }
+
+ /// <summary>
+ /// Reduction operation.
+ /// </summary>
+ enum ReductionOp
+ {
+ RedAdd,
+ RedMin,
+ RedMax,
+ RedInc,
+ RedDec,
+ RedAnd,
+ RedOr,
+ RedXor
+ }
+
+ /// <summary>
+ /// Reduction format.
+ /// </summary>
+ enum ReductionFormat
+ {
+ Unsigned32,
+ Signed32
+ }
+
+ /// <summary>
+ /// Size of a structure in words.
+ /// </summary>
+ enum StructureSize
+ {
+ FourWords,
+ OneWord
+ }
+
+ /// <summary>
+ /// Compute Queue Meta Data.
+ /// </summary>
+ unsafe struct ComputeQmd
+ {
+ private fixed int _words[64];
+
+ public int OuterPut => BitRange(30, 0);
+ public bool OuterOverflow => Bit(31);
+ public int OuterGet => BitRange(62, 32);
+ public bool OuterStickyOverflow => Bit(63);
+ public int InnerGet => BitRange(94, 64);
+ public bool InnerOverflow => Bit(95);
+ public int InnerPut => BitRange(126, 96);
+ public bool InnerStickyOverflow => Bit(127);
+ public int QmdReservedAA => BitRange(159, 128);
+ public int DependentQmdPointer => BitRange(191, 160);
+ public int QmdGroupId => BitRange(197, 192);
+ public bool SmGlobalCachingEnable => Bit(198);
+ public bool RunCtaInOneSmPartition => Bit(199);
+ public bool IsQueue => Bit(200);
+ public bool AddToHeadOfQmdGroupLinkedList => Bit(201);
+ public bool SemaphoreReleaseEnable0 => Bit(202);
+ public bool SemaphoreReleaseEnable1 => Bit(203);
+ public bool RequireSchedulingPcas => Bit(204);
+ public bool DependentQmdScheduleEnable => Bit(205);
+ public DependentQmdType DependentQmdType => (DependentQmdType)BitRange(206, 206);
+ public bool DependentQmdFieldCopy => Bit(207);
+ public int QmdReservedB => BitRange(223, 208);
+ public int CircularQueueSize => BitRange(248, 224);
+ public bool QmdReservedC => Bit(249);
+ public bool InvalidateTextureHeaderCache => Bit(250);
+ public bool InvalidateTextureSamplerCache => Bit(251);
+ public bool InvalidateTextureDataCache => Bit(252);
+ public bool InvalidateShaderDataCache => Bit(253);
+ public bool InvalidateInstructionCache => Bit(254);
+ public bool InvalidateShaderConstantCache => Bit(255);
+ public int ProgramOffset => BitRange(287, 256);
+ public int CircularQueueAddrLower => BitRange(319, 288);
+ public int CircularQueueAddrUpper => BitRange(327, 320);
+ public int QmdReservedD => BitRange(335, 328);
+ public int CircularQueueEntrySize => BitRange(351, 336);
+ public int CwdReferenceCountId => BitRange(357, 352);
+ public int CwdReferenceCountDeltaMinusOne => BitRange(365, 358);
+ public ReleaseMembarType ReleaseMembarType => (ReleaseMembarType)BitRange(366, 366);
+ public bool CwdReferenceCountIncrEnable => Bit(367);
+ public CwdMembarType CwdMembarType => (CwdMembarType)BitRange(369, 368);
+ public bool SequentiallyRunCtas => Bit(370);
+ public bool CwdReferenceCountDecrEnable => Bit(371);
+ public bool Throttled => Bit(372);
+ public Fp32NanBehavior Fp32NanBehavior => (Fp32NanBehavior)BitRange(376, 376);
+ public Fp32F2iNanBehavior Fp32F2iNanBehavior => (Fp32F2iNanBehavior)BitRange(377, 377);
+ public ApiVisibleCallLimit ApiVisibleCallLimit => (ApiVisibleCallLimit)BitRange(378, 378);
+ public SharedMemoryBankMapping SharedMemoryBankMapping => (SharedMemoryBankMapping)BitRange(379, 379);
+ public SamplerIndex SamplerIndex => (SamplerIndex)BitRange(382, 382);
+ public Fp32NarrowInstruction Fp32NarrowInstruction => (Fp32NarrowInstruction)BitRange(383, 383);
+ public int CtaRasterWidth => BitRange(415, 384);
+ public int CtaRasterHeight => BitRange(431, 416);
+ public int CtaRasterDepth => BitRange(447, 432);
+ public int CtaRasterWidthResume => BitRange(479, 448);
+ public int CtaRasterHeightResume => BitRange(495, 480);
+ public int CtaRasterDepthResume => BitRange(511, 496);
+ public int QueueEntriesPerCtaMinusOne => BitRange(518, 512);
+ public int CoalesceWaitingPeriod => BitRange(529, 522);
+ public int SharedMemorySize => BitRange(561, 544);
+ public int QmdReservedG => BitRange(575, 562);
+ public int QmdVersion => BitRange(579, 576);
+ public int QmdMajorVersion => BitRange(583, 580);
+ public int QmdReservedH => BitRange(591, 584);
+ public int CtaThreadDimension0 => BitRange(607, 592);
+ public int CtaThreadDimension1 => BitRange(623, 608);
+ public int CtaThreadDimension2 => BitRange(639, 624);
+ public bool ConstantBufferValid(int i) => Bit(640 + i * 1);
+ public int QmdReservedI => BitRange(668, 648);
+ public L1Configuration L1Configuration => (L1Configuration)BitRange(671, 669);
+ public int SmDisableMaskLower => BitRange(703, 672);
+ public int SmDisableMaskUpper => BitRange(735, 704);
+ public int Release0AddressLower => BitRange(767, 736);
+ public int Release0AddressUpper => BitRange(775, 768);
+ public int QmdReservedJ => BitRange(783, 776);
+ public ReductionOp Release0ReductionOp => (ReductionOp)BitRange(790, 788);
+ public bool QmdReservedK => Bit(791);
+ public ReductionFormat Release0ReductionFormat => (ReductionFormat)BitRange(793, 792);
+ public bool Release0ReductionEnable => Bit(794);
+ public StructureSize Release0StructureSize => (StructureSize)BitRange(799, 799);
+ public int Release0Payload => BitRange(831, 800);
+ public int Release1AddressLower => BitRange(863, 832);
+ public int Release1AddressUpper => BitRange(871, 864);
+ public int QmdReservedL => BitRange(879, 872);
+ public ReductionOp Release1ReductionOp => (ReductionOp)BitRange(886, 884);
+ public bool QmdReservedM => Bit(887);
+ public ReductionFormat Release1ReductionFormat => (ReductionFormat)BitRange(889, 888);
+ public bool Release1ReductionEnable => Bit(890);
+ public StructureSize Release1StructureSize => (StructureSize)BitRange(895, 895);
+ public int Release1Payload => BitRange(927, 896);
+ public int ConstantBufferAddrLower(int i) => BitRange(959 + i * 64, 928 + i * 64);
+ public int ConstantBufferAddrUpper(int i) => BitRange(967 + i * 64, 960 + i * 64);
+ public int ConstantBufferReservedAddr(int i) => BitRange(973 + i * 64, 968 + i * 64);
+ public bool ConstantBufferInvalidate(int i) => Bit(974 + i * 64);
+ public int ConstantBufferSize(int i) => BitRange(991 + i * 64, 975 + i * 64);
+ public int ShaderLocalMemoryLowSize => BitRange(1463, 1440);
+ public int QmdReservedN => BitRange(1466, 1464);
+ public int BarrierCount => BitRange(1471, 1467);
+ public int ShaderLocalMemoryHighSize => BitRange(1495, 1472);
+ public int RegisterCount => BitRange(1503, 1496);
+ public int ShaderLocalMemoryCrsSize => BitRange(1527, 1504);
+ public int SassVersion => BitRange(1535, 1528);
+ public int HwOnlyInnerGet => BitRange(1566, 1536);
+ public bool HwOnlyRequireSchedulingPcas => Bit(1567);
+ public int HwOnlyInnerPut => BitRange(1598, 1568);
+ public bool HwOnlyScgType => Bit(1599);
+ public int HwOnlySpanListHeadIndex => BitRange(1629, 1600);
+ public bool QmdReservedQ => Bit(1630);
+ public bool HwOnlySpanListHeadIndexValid => Bit(1631);
+ public int HwOnlySkedNextQmdPointer => BitRange(1663, 1632);
+ public int QmdSpareE => BitRange(1695, 1664);
+ public int QmdSpareF => BitRange(1727, 1696);
+ public int QmdSpareG => BitRange(1759, 1728);
+ public int QmdSpareH => BitRange(1791, 1760);
+ public int QmdSpareI => BitRange(1823, 1792);
+ public int QmdSpareJ => BitRange(1855, 1824);
+ public int QmdSpareK => BitRange(1887, 1856);
+ public int QmdSpareL => BitRange(1919, 1888);
+ public int QmdSpareM => BitRange(1951, 1920);
+ public int QmdSpareN => BitRange(1983, 1952);
+ public int DebugIdUpper => BitRange(2015, 1984);
+ public int DebugIdLower => BitRange(2047, 2016);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private bool Bit(int bit)
+ {
+ if ((uint)bit >= 64 * 32)
+ {
+ throw new ArgumentOutOfRangeException(nameof(bit));
+ }
+
+ return (_words[bit >> 5] & (1 << (bit & 31))) != 0;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private int BitRange(int upper, int lower)
+ {
+ if ((uint)lower >= 64 * 32)
+ {
+ throw new ArgumentOutOfRangeException(nameof(lower));
+ }
+
+ int mask = (int)(uint.MaxValue >> (32 - (upper - lower + 1)));
+
+ return (_words[lower >> 5] >> (lower & 31)) & mask;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/ConditionalRenderEnabled.cs b/src/Ryujinx.Graphics.Gpu/Engine/ConditionalRenderEnabled.cs
new file mode 100644
index 00000000..5581b5cc
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/ConditionalRenderEnabled.cs
@@ -0,0 +1,12 @@
+namespace Ryujinx.Graphics.Gpu.Engine
+{
+ /// <summary>
+ /// Conditional rendering enable.
+ /// </summary>
+ enum ConditionalRenderEnabled
+ {
+ False,
+ True,
+ Host
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/DeviceStateWithShadow.cs b/src/Ryujinx.Graphics.Gpu/Engine/DeviceStateWithShadow.cs
new file mode 100644
index 00000000..74a9aa04
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/DeviceStateWithShadow.cs
@@ -0,0 +1,96 @@
+using Ryujinx.Graphics.Device;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Diagnostics.CodeAnalysis;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine
+{
+ /// <summary>
+ /// State interface with a shadow memory control register.
+ /// </summary>
+ interface IShadowState
+ {
+ /// <summary>
+ /// MME shadow ram control mode.
+ /// </summary>
+ SetMmeShadowRamControlMode SetMmeShadowRamControlMode { get; }
+ }
+
+ /// <summary>
+ /// Represents a device's state, with a additional shadow state.
+ /// </summary>
+ /// <typeparam name="TState">Type of the state</typeparam>
+ class DeviceStateWithShadow<[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicFields)] TState> : IDeviceState where TState : unmanaged, IShadowState
+ {
+ private readonly DeviceState<TState> _state;
+ private readonly DeviceState<TState> _shadowState;
+
+ /// <summary>
+ /// Current device state.
+ /// </summary>
+ public ref TState State => ref _state.State;
+
+ /// <summary>
+ /// Creates a new instance of the device state, with shadow state.
+ /// </summary>
+ /// <param name="callbacks">Optional that will be called if a register specified by name is read or written</param>
+ /// <param name="debugLogCallback">Optional callback to be used for debug log messages</param>
+ public DeviceStateWithShadow(IReadOnlyDictionary<string, RwCallback> callbacks = null, Action<string> debugLogCallback = null)
+ {
+ _state = new DeviceState<TState>(callbacks, debugLogCallback);
+ _shadowState = new DeviceState<TState>();
+ }
+
+ /// <summary>
+ /// Reads a value from a register.
+ /// </summary>
+ /// <param name="offset">Register offset in bytes</param>
+ /// <returns>Value stored on the register</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public int Read(int offset)
+ {
+ return _state.Read(offset);
+ }
+
+ /// <summary>
+ /// Writes a value to a register.
+ /// </summary>
+ /// <param name="offset">Register offset in bytes</param>
+ /// <param name="value">Value to be written</param>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void Write(int offset, int value)
+ {
+ WriteWithRedundancyCheck(offset, value, out _);
+ }
+
+ /// <summary>
+ /// Writes a value to a register, returning a value indicating if <paramref name="value"/>
+ /// is different from the current value on the register.
+ /// </summary>
+ /// <param name="offset">Register offset in bytes</param>
+ /// <param name="value">Value to be written</param>
+ /// <param name="changed">True if the value was changed, false otherwise</param>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void WriteWithRedundancyCheck(int offset, int value, out bool changed)
+ {
+ var shadowRamControl = _state.State.SetMmeShadowRamControlMode;
+ if (shadowRamControl == SetMmeShadowRamControlMode.MethodPassthrough || offset < 0x200)
+ {
+ _state.WriteWithRedundancyCheck(offset, value, out changed);
+ }
+ else if (shadowRamControl == SetMmeShadowRamControlMode.MethodTrack ||
+ shadowRamControl == SetMmeShadowRamControlMode.MethodTrackWithFilter)
+ {
+ _shadowState.Write(offset, value);
+ _state.WriteWithRedundancyCheck(offset, value, out changed);
+ }
+ else /* if (shadowRamControl == SetMmeShadowRamControlMode.MethodReplay) */
+ {
+ Debug.Assert(shadowRamControl == SetMmeShadowRamControlMode.MethodReplay);
+ _state.WriteWithRedundancyCheck(offset, _shadowState.Read(offset), out changed);
+ }
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs
new file mode 100644
index 00000000..fd93cd8b
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs
@@ -0,0 +1,635 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.Gpu.Engine.Threed;
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Texture;
+using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Dma
+{
+ /// <summary>
+ /// Represents a DMA copy engine class.
+ /// </summary>
+ class DmaClass : IDeviceState
+ {
+ private readonly GpuContext _context;
+ private readonly GpuChannel _channel;
+ private readonly ThreedClass _3dEngine;
+ private readonly DeviceState<DmaClassState> _state;
+
+ /// <summary>
+ /// Copy flags passed on DMA launch.
+ /// </summary>
+ [Flags]
+ private enum CopyFlags
+ {
+ SrcLinear = 1 << 7,
+ DstLinear = 1 << 8,
+ MultiLineEnable = 1 << 9,
+ RemapEnable = 1 << 10
+ }
+
+ /// <summary>
+ /// Texture parameters for copy.
+ /// </summary>
+ private struct TextureParams
+ {
+ /// <summary>
+ /// Copy region X coordinate.
+ /// </summary>
+ public readonly int RegionX;
+
+ /// <summary>
+ /// Copy region Y coordinate.
+ /// </summary>
+ public readonly int RegionY;
+
+ /// <summary>
+ /// Offset from the base pointer of the data in memory.
+ /// </summary>
+ public readonly int BaseOffset;
+
+ /// <summary>
+ /// Bytes per pixel.
+ /// </summary>
+ public readonly int Bpp;
+
+ /// <summary>
+ /// Whether the texture is linear. If false, the texture is block linear.
+ /// </summary>
+ public readonly bool Linear;
+
+ /// <summary>
+ /// Pixel offset from XYZ coordinates calculator.
+ /// </summary>
+ public readonly OffsetCalculator Calculator;
+
+ /// <summary>
+ /// Creates texture parameters.
+ /// </summary>
+ /// <param name="regionX">Copy region X coordinate</param>
+ /// <param name="regionY">Copy region Y coordinate</param>
+ /// <param name="baseOffset">Offset from the base pointer of the data in memory</param>
+ /// <param name="bpp">Bytes per pixel</param>
+ /// <param name="linear">Whether the texture is linear. If false, the texture is block linear</param>
+ /// <param name="calculator">Pixel offset from XYZ coordinates calculator</param>
+ public TextureParams(int regionX, int regionY, int baseOffset, int bpp, bool linear, OffsetCalculator calculator)
+ {
+ RegionX = regionX;
+ RegionY = regionY;
+ BaseOffset = baseOffset;
+ Bpp = bpp;
+ Linear = linear;
+ Calculator = calculator;
+ }
+ }
+
+ [StructLayout(LayoutKind.Sequential, Size = 3, Pack = 1)]
+ private struct UInt24
+ {
+ public byte Byte0;
+ public byte Byte1;
+ public byte Byte2;
+ }
+
+ /// <summary>
+ /// Creates a new instance of the DMA copy engine class.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="threedEngine">3D engine</param>
+ public DmaClass(GpuContext context, GpuChannel channel, ThreedClass threedEngine)
+ {
+ _context = context;
+ _channel = channel;
+ _3dEngine = threedEngine;
+ _state = new DeviceState<DmaClassState>(new Dictionary<string, RwCallback>
+ {
+ { nameof(DmaClassState.LaunchDma), new RwCallback(LaunchDma, null) }
+ });
+ }
+
+ /// <summary>
+ /// Reads data from the class registers.
+ /// </summary>
+ /// <param name="offset">Register byte offset</param>
+ /// <returns>Data at the specified offset</returns>
+ public int Read(int offset) => _state.Read(offset);
+
+ /// <summary>
+ /// Writes data to the class registers.
+ /// </summary>
+ /// <param name="offset">Register byte offset</param>
+ /// <param name="data">Data to be written</param>
+ public void Write(int offset, int data) => _state.Write(offset, data);
+
+ /// <summary>
+ /// Determine if a buffer-to-texture region covers the entirety of a texture.
+ /// </summary>
+ /// <param name="tex">Texture to compare</param>
+ /// <param name="linear">True if the texture is linear, false if block linear</param>
+ /// <param name="bpp">Texture bytes per pixel</param>
+ /// <param name="stride">Texture stride</param>
+ /// <param name="xCount">Number of pixels to be copied</param>
+ /// <param name="yCount">Number of lines to be copied</param>
+ /// <returns></returns>
+ private static bool IsTextureCopyComplete(DmaTexture tex, bool linear, int bpp, int stride, int xCount, int yCount)
+ {
+ if (linear)
+ {
+ // If the stride is negative, the texture has to be flipped, so
+ // the fast copy is not trivial, use the slow path.
+ if (stride <= 0)
+ {
+ return false;
+ }
+
+ int alignWidth = Constants.StrideAlignment / bpp;
+ return stride / bpp == BitUtils.AlignUp(xCount, alignWidth);
+ }
+ else
+ {
+ int alignWidth = Constants.GobAlignment / bpp;
+ return tex.RegionX == 0 &&
+ tex.RegionY == 0 &&
+ tex.Width == BitUtils.AlignUp(xCount, alignWidth) &&
+ tex.Height == yCount;
+ }
+ }
+
+ /// <summary>
+ /// Releases a semaphore for a given LaunchDma method call.
+ /// </summary>
+ /// <param name="argument">The LaunchDma call argument</param>
+ private void ReleaseSemaphore(int argument)
+ {
+ LaunchDmaSemaphoreType type = (LaunchDmaSemaphoreType)((argument >> 3) & 0x3);
+ if (type != LaunchDmaSemaphoreType.None)
+ {
+ ulong address = ((ulong)_state.State.SetSemaphoreA << 32) | _state.State.SetSemaphoreB;
+ if (type == LaunchDmaSemaphoreType.ReleaseOneWordSemaphore)
+ {
+ _channel.MemoryManager.Write(address, _state.State.SetSemaphorePayload);
+ }
+ else /* if (type == LaunchDmaSemaphoreType.ReleaseFourWordSemaphore) */
+ {
+ _channel.MemoryManager.Write(address + 8, _context.GetTimestamp());
+ _channel.MemoryManager.Write(address, (ulong)_state.State.SetSemaphorePayload);
+ }
+ }
+ }
+
+ /// <summary>
+ /// Performs a buffer to buffer, or buffer to texture copy.
+ /// </summary>
+ /// <param name="argument">The LaunchDma call argument</param>
+ private void DmaCopy(int argument)
+ {
+ var memoryManager = _channel.MemoryManager;
+
+ CopyFlags copyFlags = (CopyFlags)argument;
+
+ bool srcLinear = copyFlags.HasFlag(CopyFlags.SrcLinear);
+ bool dstLinear = copyFlags.HasFlag(CopyFlags.DstLinear);
+ bool copy2D = copyFlags.HasFlag(CopyFlags.MultiLineEnable);
+ bool remap = copyFlags.HasFlag(CopyFlags.RemapEnable);
+
+ uint size = _state.State.LineLengthIn;
+
+ if (size == 0)
+ {
+ return;
+ }
+
+ ulong srcGpuVa = ((ulong)_state.State.OffsetInUpperUpper << 32) | _state.State.OffsetInLower;
+ ulong dstGpuVa = ((ulong)_state.State.OffsetOutUpperUpper << 32) | _state.State.OffsetOutLower;
+
+ int xCount = (int)_state.State.LineLengthIn;
+ int yCount = (int)_state.State.LineCount;
+
+ _3dEngine.CreatePendingSyncs();
+ _3dEngine.FlushUboDirty();
+
+ if (copy2D)
+ {
+ // Buffer to texture copy.
+ int componentSize = (int)_state.State.SetRemapComponentsComponentSize + 1;
+ int srcComponents = (int)_state.State.SetRemapComponentsNumSrcComponents + 1;
+ int dstComponents = (int)_state.State.SetRemapComponentsNumDstComponents + 1;
+ int srcBpp = remap ? srcComponents * componentSize : 1;
+ int dstBpp = remap ? dstComponents * componentSize : 1;
+
+ var dst = Unsafe.As<uint, DmaTexture>(ref _state.State.SetDstBlockSize);
+ var src = Unsafe.As<uint, DmaTexture>(ref _state.State.SetSrcBlockSize);
+
+ int srcRegionX = 0, srcRegionY = 0, dstRegionX = 0, dstRegionY = 0;
+
+ if (!srcLinear)
+ {
+ srcRegionX = src.RegionX;
+ srcRegionY = src.RegionY;
+ }
+
+ if (!dstLinear)
+ {
+ dstRegionX = dst.RegionX;
+ dstRegionY = dst.RegionY;
+ }
+
+ int srcStride = (int)_state.State.PitchIn;
+ int dstStride = (int)_state.State.PitchOut;
+
+ var srcCalculator = new OffsetCalculator(
+ src.Width,
+ src.Height,
+ srcStride,
+ srcLinear,
+ src.MemoryLayout.UnpackGobBlocksInY(),
+ src.MemoryLayout.UnpackGobBlocksInZ(),
+ srcBpp);
+
+ var dstCalculator = new OffsetCalculator(
+ dst.Width,
+ dst.Height,
+ dstStride,
+ dstLinear,
+ dst.MemoryLayout.UnpackGobBlocksInY(),
+ dst.MemoryLayout.UnpackGobBlocksInZ(),
+ dstBpp);
+
+ (int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(srcRegionX, srcRegionY, xCount, yCount);
+ (int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dstRegionX, dstRegionY, xCount, yCount);
+
+ if (srcLinear && srcStride < 0)
+ {
+ srcBaseOffset += srcStride * (yCount - 1);
+ }
+
+ if (dstLinear && dstStride < 0)
+ {
+ dstBaseOffset += dstStride * (yCount - 1);
+ }
+
+ ReadOnlySpan<byte> srcSpan = memoryManager.GetSpan(srcGpuVa + (ulong)srcBaseOffset, srcSize, true);
+
+ bool completeSource = IsTextureCopyComplete(src, srcLinear, srcBpp, srcStride, xCount, yCount);
+ bool completeDest = IsTextureCopyComplete(dst, dstLinear, dstBpp, dstStride, xCount, yCount);
+
+ if (completeSource && completeDest)
+ {
+ var target = memoryManager.Physical.TextureCache.FindTexture(
+ memoryManager,
+ dstGpuVa,
+ dstBpp,
+ dstStride,
+ dst.Height,
+ xCount,
+ yCount,
+ dstLinear,
+ dst.MemoryLayout.UnpackGobBlocksInY(),
+ dst.MemoryLayout.UnpackGobBlocksInZ());
+
+ if (target != null)
+ {
+ byte[] data;
+ if (srcLinear)
+ {
+ data = LayoutConverter.ConvertLinearStridedToLinear(
+ target.Info.Width,
+ target.Info.Height,
+ 1,
+ 1,
+ xCount * srcBpp,
+ srcStride,
+ target.Info.FormatInfo.BytesPerPixel,
+ srcSpan);
+ }
+ else
+ {
+ data = LayoutConverter.ConvertBlockLinearToLinear(
+ src.Width,
+ src.Height,
+ src.Depth,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ srcBpp,
+ src.MemoryLayout.UnpackGobBlocksInY(),
+ src.MemoryLayout.UnpackGobBlocksInZ(),
+ 1,
+ new SizeInfo((int)target.Size),
+ srcSpan);
+ }
+
+ target.SynchronizeMemory();
+ target.SetData(data);
+ target.SignalModified();
+ return;
+ }
+ else if (srcCalculator.LayoutMatches(dstCalculator))
+ {
+ // No layout conversion has to be performed, just copy the data entirely.
+ memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, srcSpan);
+ return;
+ }
+ }
+
+ // OPT: This allocates a (potentially) huge temporary array and then copies an existing
+ // region of memory into it, data that might get overwritten entirely anyways. Ideally this should
+ // all be rewritten to use pooled arrays, but that gets complicated with packed data and strides
+ Span<byte> dstSpan = memoryManager.GetSpan(dstGpuVa + (ulong)dstBaseOffset, dstSize).ToArray();
+
+ TextureParams srcParams = new TextureParams(srcRegionX, srcRegionY, srcBaseOffset, srcBpp, srcLinear, srcCalculator);
+ TextureParams dstParams = new TextureParams(dstRegionX, dstRegionY, dstBaseOffset, dstBpp, dstLinear, dstCalculator);
+
+ // If remapping is enabled, we always copy the components directly, in order.
+ // If it's enabled, but the mapping is just XYZW, we also copy them in order.
+ bool isIdentityRemap = !remap ||
+ (_state.State.SetRemapComponentsDstX == SetRemapComponentsDst.SrcX &&
+ (dstComponents < 2 || _state.State.SetRemapComponentsDstY == SetRemapComponentsDst.SrcY) &&
+ (dstComponents < 3 || _state.State.SetRemapComponentsDstZ == SetRemapComponentsDst.SrcZ) &&
+ (dstComponents < 4 || _state.State.SetRemapComponentsDstW == SetRemapComponentsDst.SrcW));
+
+ if (isIdentityRemap)
+ {
+ // The order of the components doesn't change, so we can just copy directly
+ // (with layout conversion if necessary).
+
+ switch (srcBpp)
+ {
+ case 1: Copy<byte>(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 2: Copy<ushort>(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 4: Copy<uint>(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 8: Copy<ulong>(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 12: Copy<Bpp12Pixel>(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 16: Copy<Vector128<byte>>(dstSpan, srcSpan, dstParams, srcParams); break;
+ default: throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.");
+ }
+ }
+ else
+ {
+ // The order or value of the components might change.
+
+ switch (componentSize)
+ {
+ case 1: CopyShuffle<byte>(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 2: CopyShuffle<ushort>(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 3: CopyShuffle<UInt24>(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 4: CopyShuffle<uint>(dstSpan, srcSpan, dstParams, srcParams); break;
+ default: throw new NotSupportedException($"Unable to copy ${componentSize} component size.");
+ }
+ }
+
+ memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, dstSpan);
+ }
+ else
+ {
+ if (remap &&
+ _state.State.SetRemapComponentsDstX == SetRemapComponentsDst.ConstA &&
+ _state.State.SetRemapComponentsDstY == SetRemapComponentsDst.ConstA &&
+ _state.State.SetRemapComponentsDstZ == SetRemapComponentsDst.ConstA &&
+ _state.State.SetRemapComponentsDstW == SetRemapComponentsDst.ConstA &&
+ _state.State.SetRemapComponentsNumSrcComponents == SetRemapComponentsNumComponents.One &&
+ _state.State.SetRemapComponentsNumDstComponents == SetRemapComponentsNumComponents.One &&
+ _state.State.SetRemapComponentsComponentSize == SetRemapComponentsComponentSize.Four)
+ {
+ // Fast path for clears when remap is enabled.
+ memoryManager.Physical.BufferCache.ClearBuffer(memoryManager, dstGpuVa, size * 4, _state.State.SetRemapConstA);
+ }
+ else
+ {
+ // TODO: Implement remap functionality.
+ // Buffer to buffer copy.
+
+ bool srcIsPitchKind = memoryManager.GetKind(srcGpuVa).IsPitch();
+ bool dstIsPitchKind = memoryManager.GetKind(dstGpuVa).IsPitch();
+
+ if (!srcIsPitchKind && dstIsPitchKind)
+ {
+ CopyGobBlockLinearToLinear(memoryManager, srcGpuVa, dstGpuVa, size);
+ }
+ else if (srcIsPitchKind && !dstIsPitchKind)
+ {
+ CopyGobLinearToBlockLinear(memoryManager, srcGpuVa, dstGpuVa, size);
+ }
+ else
+ {
+ memoryManager.Physical.BufferCache.CopyBuffer(memoryManager, srcGpuVa, dstGpuVa, size);
+ }
+ }
+ }
+ }
+
+ /// <summary>
+ /// Copies data from one texture to another, while performing layout conversion if necessary.
+ /// </summary>
+ /// <typeparam name="T">Pixel type</typeparam>
+ /// <param name="dstSpan">Destination texture memory region</param>
+ /// <param name="srcSpan">Source texture memory region</param>
+ /// <param name="dst">Destination texture parameters</param>
+ /// <param name="src">Source texture parameters</param>
+ private unsafe void Copy<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan, TextureParams dst, TextureParams src) where T : unmanaged
+ {
+ int xCount = (int)_state.State.LineLengthIn;
+ int yCount = (int)_state.State.LineCount;
+
+ if (src.Linear && dst.Linear && src.Bpp == dst.Bpp)
+ {
+ // Optimized path for purely linear copies - we don't need to calculate every single byte offset,
+ // and we can make use of Span.CopyTo which is very very fast (even compared to pointers)
+ for (int y = 0; y < yCount; y++)
+ {
+ src.Calculator.SetY(src.RegionY + y);
+ dst.Calculator.SetY(dst.RegionY + y);
+ int srcOffset = src.Calculator.GetOffset(src.RegionX);
+ int dstOffset = dst.Calculator.GetOffset(dst.RegionX);
+ srcSpan.Slice(srcOffset - src.BaseOffset, xCount * src.Bpp)
+ .CopyTo(dstSpan.Slice(dstOffset - dst.BaseOffset, xCount * dst.Bpp));
+ }
+ }
+ else
+ {
+ fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan)
+ {
+ byte* dstBase = dstPtr - dst.BaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset.
+ byte* srcBase = srcPtr - src.BaseOffset;
+
+ for (int y = 0; y < yCount; y++)
+ {
+ src.Calculator.SetY(src.RegionY + y);
+ dst.Calculator.SetY(dst.RegionY + y);
+
+ for (int x = 0; x < xCount; x++)
+ {
+ int srcOffset = src.Calculator.GetOffset(src.RegionX + x);
+ int dstOffset = dst.Calculator.GetOffset(dst.RegionX + x);
+
+ *(T*)(dstBase + dstOffset) = *(T*)(srcBase + srcOffset);
+ }
+ }
+ }
+ }
+ }
+
+ /// <summary>
+ /// Sets texture pixel data to a constant value, while performing layout conversion if necessary.
+ /// </summary>
+ /// <typeparam name="T">Pixel type</typeparam>
+ /// <param name="dstSpan">Destination texture memory region</param>
+ /// <param name="dst">Destination texture parameters</param>
+ /// <param name="fillValue">Constant pixel value to be set</param>
+ private unsafe void Fill<T>(Span<byte> dstSpan, TextureParams dst, T fillValue) where T : unmanaged
+ {
+ int xCount = (int)_state.State.LineLengthIn;
+ int yCount = (int)_state.State.LineCount;
+
+ fixed (byte* dstPtr = dstSpan)
+ {
+ byte* dstBase = dstPtr - dst.BaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset.
+
+ for (int y = 0; y < yCount; y++)
+ {
+ dst.Calculator.SetY(dst.RegionY + y);
+
+ for (int x = 0; x < xCount; x++)
+ {
+ int dstOffset = dst.Calculator.GetOffset(dst.RegionX + x);
+
+ *(T*)(dstBase + dstOffset) = fillValue;
+ }
+ }
+ }
+ }
+
+ /// <summary>
+ /// Copies data from one texture to another, while performing layout conversion and component shuffling if necessary.
+ /// </summary>
+ /// <typeparam name="T">Pixel type</typeparam>
+ /// <param name="dstSpan">Destination texture memory region</param>
+ /// <param name="srcSpan">Source texture memory region</param>
+ /// <param name="dst">Destination texture parameters</param>
+ /// <param name="src">Source texture parameters</param>
+ private void CopyShuffle<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan, TextureParams dst, TextureParams src) where T : unmanaged
+ {
+ int dstComponents = (int)_state.State.SetRemapComponentsNumDstComponents + 1;
+
+ for (int i = 0; i < dstComponents; i++)
+ {
+ SetRemapComponentsDst componentsDst = i switch
+ {
+ 0 => _state.State.SetRemapComponentsDstX,
+ 1 => _state.State.SetRemapComponentsDstY,
+ 2 => _state.State.SetRemapComponentsDstZ,
+ _ => _state.State.SetRemapComponentsDstW
+ };
+
+ switch (componentsDst)
+ {
+ case SetRemapComponentsDst.SrcX:
+ Copy<T>(dstSpan.Slice(Unsafe.SizeOf<T>() * i), srcSpan, dst, src);
+ break;
+ case SetRemapComponentsDst.SrcY:
+ Copy<T>(dstSpan.Slice(Unsafe.SizeOf<T>() * i), srcSpan.Slice(Unsafe.SizeOf<T>()), dst, src);
+ break;
+ case SetRemapComponentsDst.SrcZ:
+ Copy<T>(dstSpan.Slice(Unsafe.SizeOf<T>() * i), srcSpan.Slice(Unsafe.SizeOf<T>() * 2), dst, src);
+ break;
+ case SetRemapComponentsDst.SrcW:
+ Copy<T>(dstSpan.Slice(Unsafe.SizeOf<T>() * i), srcSpan.Slice(Unsafe.SizeOf<T>() * 3), dst, src);
+ break;
+ case SetRemapComponentsDst.ConstA:
+ Fill<T>(dstSpan.Slice(Unsafe.SizeOf<T>() * i), dst, Unsafe.As<uint, T>(ref _state.State.SetRemapConstA));
+ break;
+ case SetRemapComponentsDst.ConstB:
+ Fill<T>(dstSpan.Slice(Unsafe.SizeOf<T>() * i), dst, Unsafe.As<uint, T>(ref _state.State.SetRemapConstB));
+ break;
+ }
+ }
+ }
+
+ /// <summary>
+ /// Copies block linear data with block linear GOBs to a block linear destination with linear GOBs.
+ /// </summary>
+ /// <param name="memoryManager">GPU memory manager</param>
+ /// <param name="srcGpuVa">Source GPU virtual address</param>
+ /// <param name="dstGpuVa">Destination GPU virtual address</param>
+ /// <param name="size">Size in bytes of the copy</param>
+ private static void CopyGobBlockLinearToLinear(MemoryManager memoryManager, ulong srcGpuVa, ulong dstGpuVa, ulong size)
+ {
+ if (((srcGpuVa | dstGpuVa | size) & 0xf) == 0)
+ {
+ for (ulong offset = 0; offset < size; offset += 16)
+ {
+ Vector128<byte> data = memoryManager.Read<Vector128<byte>>(ConvertGobLinearToBlockLinearAddress(srcGpuVa + offset), true);
+ memoryManager.Write(dstGpuVa + offset, data);
+ }
+ }
+ else
+ {
+ for (ulong offset = 0; offset < size; offset++)
+ {
+ byte data = memoryManager.Read<byte>(ConvertGobLinearToBlockLinearAddress(srcGpuVa + offset), true);
+ memoryManager.Write(dstGpuVa + offset, data);
+ }
+ }
+ }
+
+ /// <summary>
+ /// Copies block linear data with linear GOBs to a block linear destination with block linear GOBs.
+ /// </summary>
+ /// <param name="memoryManager">GPU memory manager</param>
+ /// <param name="srcGpuVa">Source GPU virtual address</param>
+ /// <param name="dstGpuVa">Destination GPU virtual address</param>
+ /// <param name="size">Size in bytes of the copy</param>
+ private static void CopyGobLinearToBlockLinear(MemoryManager memoryManager, ulong srcGpuVa, ulong dstGpuVa, ulong size)
+ {
+ if (((srcGpuVa | dstGpuVa | size) & 0xf) == 0)
+ {
+ for (ulong offset = 0; offset < size; offset += 16)
+ {
+ Vector128<byte> data = memoryManager.Read<Vector128<byte>>(srcGpuVa + offset, true);
+ memoryManager.Write(ConvertGobLinearToBlockLinearAddress(dstGpuVa + offset), data);
+ }
+ }
+ else
+ {
+ for (ulong offset = 0; offset < size; offset++)
+ {
+ byte data = memoryManager.Read<byte>(srcGpuVa + offset, true);
+ memoryManager.Write(ConvertGobLinearToBlockLinearAddress(dstGpuVa + offset), data);
+ }
+ }
+ }
+
+ /// <summary>
+ /// Calculates the GOB block linear address from a linear address.
+ /// </summary>
+ /// <param name="address">Linear address</param>
+ /// <returns>Block linear address</returns>
+ private static ulong ConvertGobLinearToBlockLinearAddress(ulong address)
+ {
+ // y2 y1 y0 x5 x4 x3 x2 x1 x0 -> x5 y2 y1 x4 y0 x3 x2 x1 x0
+ return (address & ~0x1f0UL) |
+ ((address & 0x40) >> 2) |
+ ((address & 0x10) << 1) |
+ ((address & 0x180) >> 1) |
+ ((address & 0x20) << 3);
+ }
+
+ /// <summary>
+ /// Performs a buffer to buffer, or buffer to texture copy, then optionally releases a semaphore.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void LaunchDma(int argument)
+ {
+ DmaCopy(argument);
+ ReleaseSemaphore(argument);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClassState.cs b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClassState.cs
new file mode 100644
index 00000000..7de4d5f0
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClassState.cs
@@ -0,0 +1,271 @@
+// This file was auto-generated from NVIDIA official Maxwell definitions.
+
+namespace Ryujinx.Graphics.Gpu.Engine.Dma
+{
+ /// <summary>
+ /// Physical mode target.
+ /// </summary>
+ enum SetPhysModeTarget
+ {
+ LocalFb = 0,
+ CoherentSysmem = 1,
+ NoncoherentSysmem = 2,
+ }
+
+ /// <summary>
+ /// DMA data transfer type.
+ /// </summary>
+ enum LaunchDmaDataTransferType
+ {
+ None = 0,
+ Pipelined = 1,
+ NonPipelined = 2,
+ }
+
+ /// <summary>
+ /// DMA semaphore type.
+ /// </summary>
+ enum LaunchDmaSemaphoreType
+ {
+ None = 0,
+ ReleaseOneWordSemaphore = 1,
+ ReleaseFourWordSemaphore = 2,
+ }
+
+ /// <summary>
+ /// DMA interrupt type.
+ /// </summary>
+ enum LaunchDmaInterruptType
+ {
+ None = 0,
+ Blocking = 1,
+ NonBlocking = 2,
+ }
+
+ /// <summary>
+ /// DMA destination memory layout.
+ /// </summary>
+ enum LaunchDmaMemoryLayout
+ {
+ Blocklinear = 0,
+ Pitch = 1,
+ }
+
+ /// <summary>
+ /// DMA type.
+ /// </summary>
+ enum LaunchDmaType
+ {
+ Virtual = 0,
+ Physical = 1,
+ }
+
+ /// <summary>
+ /// DMA semaphore reduction operation.
+ /// </summary>
+ enum LaunchDmaSemaphoreReduction
+ {
+ Imin = 0,
+ Imax = 1,
+ Ixor = 2,
+ Iand = 3,
+ Ior = 4,
+ Iadd = 5,
+ Inc = 6,
+ Dec = 7,
+ Fadd = 10,
+ }
+
+ /// <summary>
+ /// DMA semaphore reduction signedness.
+ /// </summary>
+ enum LaunchDmaSemaphoreReductionSign
+ {
+ Signed = 0,
+ Unsigned = 1,
+ }
+
+ /// <summary>
+ /// DMA L2 cache bypass.
+ /// </summary>
+ enum LaunchDmaBypassL2
+ {
+ UsePteSetting = 0,
+ ForceVolatile = 1,
+ }
+
+ /// <summary>
+ /// DMA component remapping source component.
+ /// </summary>
+ enum SetRemapComponentsDst
+ {
+ SrcX = 0,
+ SrcY = 1,
+ SrcZ = 2,
+ SrcW = 3,
+ ConstA = 4,
+ ConstB = 5,
+ NoWrite = 6,
+ }
+
+ /// <summary>
+ /// DMA component remapping component size.
+ /// </summary>
+ enum SetRemapComponentsComponentSize
+ {
+ One = 0,
+ Two = 1,
+ Three = 2,
+ Four = 3,
+ }
+
+ /// <summary>
+ /// DMA component remapping number of components.
+ /// </summary>
+ enum SetRemapComponentsNumComponents
+ {
+ One = 0,
+ Two = 1,
+ Three = 2,
+ Four = 3,
+ }
+
+ /// <summary>
+ /// Width in GOBs of the destination texture.
+ /// </summary>
+ enum SetBlockSizeWidth
+ {
+ QuarterGob = 14,
+ OneGob = 0,
+ }
+
+ /// <summary>
+ /// Height in GOBs of the destination texture.
+ /// </summary>
+ enum SetBlockSizeHeight
+ {
+ OneGob = 0,
+ TwoGobs = 1,
+ FourGobs = 2,
+ EightGobs = 3,
+ SixteenGobs = 4,
+ ThirtytwoGobs = 5,
+ }
+
+ /// <summary>
+ /// Depth in GOBs of the destination texture.
+ /// </summary>
+ enum SetBlockSizeDepth
+ {
+ OneGob = 0,
+ TwoGobs = 1,
+ FourGobs = 2,
+ EightGobs = 3,
+ SixteenGobs = 4,
+ ThirtytwoGobs = 5,
+ }
+
+ /// <summary>
+ /// Height of a single GOB in lines.
+ /// </summary>
+ enum SetBlockSizeGobHeight
+ {
+ GobHeightTesla4 = 0,
+ GobHeightFermi8 = 1,
+ }
+
+ /// <summary>
+ /// DMA copy class state.
+ /// </summary>
+ unsafe struct DmaClassState
+ {
+#pragma warning disable CS0649
+ public fixed uint Reserved00[64];
+ public uint Nop;
+ public fixed uint Reserved104[15];
+ public uint PmTrigger;
+ public fixed uint Reserved144[63];
+ public uint SetSemaphoreA;
+ public int SetSemaphoreAUpper => (int)((SetSemaphoreA >> 0) & 0xFF);
+ public uint SetSemaphoreB;
+ public uint SetSemaphorePayload;
+ public fixed uint Reserved24C[2];
+ public uint SetRenderEnableA;
+ public int SetRenderEnableAUpper => (int)((SetRenderEnableA >> 0) & 0xFF);
+ public uint SetRenderEnableB;
+ public uint SetRenderEnableC;
+ public int SetRenderEnableCMode => (int)((SetRenderEnableC >> 0) & 0x7);
+ public uint SetSrcPhysMode;
+ public SetPhysModeTarget SetSrcPhysModeTarget => (SetPhysModeTarget)((SetSrcPhysMode >> 0) & 0x3);
+ public uint SetDstPhysMode;
+ public SetPhysModeTarget SetDstPhysModeTarget => (SetPhysModeTarget)((SetDstPhysMode >> 0) & 0x3);
+ public fixed uint Reserved268[38];
+ public uint LaunchDma;
+ public LaunchDmaDataTransferType LaunchDmaDataTransferType => (LaunchDmaDataTransferType)((LaunchDma >> 0) & 0x3);
+ public bool LaunchDmaFlushEnable => (LaunchDma & 0x4) != 0;
+ public LaunchDmaSemaphoreType LaunchDmaSemaphoreType => (LaunchDmaSemaphoreType)((LaunchDma >> 3) & 0x3);
+ public LaunchDmaInterruptType LaunchDmaInterruptType => (LaunchDmaInterruptType)((LaunchDma >> 5) & 0x3);
+ public LaunchDmaMemoryLayout LaunchDmaSrcMemoryLayout => (LaunchDmaMemoryLayout)((LaunchDma >> 7) & 0x1);
+ public LaunchDmaMemoryLayout LaunchDmaDstMemoryLayout => (LaunchDmaMemoryLayout)((LaunchDma >> 8) & 0x1);
+ public bool LaunchDmaMultiLineEnable => (LaunchDma & 0x200) != 0;
+ public bool LaunchDmaRemapEnable => (LaunchDma & 0x400) != 0;
+ public bool LaunchDmaForceRmwdisable => (LaunchDma & 0x800) != 0;
+ public LaunchDmaType LaunchDmaSrcType => (LaunchDmaType)((LaunchDma >> 12) & 0x1);
+ public LaunchDmaType LaunchDmaDstType => (LaunchDmaType)((LaunchDma >> 13) & 0x1);
+ public LaunchDmaSemaphoreReduction LaunchDmaSemaphoreReduction => (LaunchDmaSemaphoreReduction)((LaunchDma >> 14) & 0xF);
+ public LaunchDmaSemaphoreReductionSign LaunchDmaSemaphoreReductionSign => (LaunchDmaSemaphoreReductionSign)((LaunchDma >> 18) & 0x1);
+ public bool LaunchDmaSemaphoreReductionEnable => (LaunchDma & 0x80000) != 0;
+ public LaunchDmaBypassL2 LaunchDmaBypassL2 => (LaunchDmaBypassL2)((LaunchDma >> 20) & 0x1);
+ public fixed uint Reserved304[63];
+ public uint OffsetInUpper;
+ public int OffsetInUpperUpper => (int)((OffsetInUpper >> 0) & 0xFF);
+ public uint OffsetInLower;
+ public uint OffsetOutUpper;
+ public int OffsetOutUpperUpper => (int)((OffsetOutUpper >> 0) & 0xFF);
+ public uint OffsetOutLower;
+ public uint PitchIn;
+ public uint PitchOut;
+ public uint LineLengthIn;
+ public uint LineCount;
+ public fixed uint Reserved420[184];
+ public uint SetRemapConstA;
+ public uint SetRemapConstB;
+ public uint SetRemapComponents;
+ public SetRemapComponentsDst SetRemapComponentsDstX => (SetRemapComponentsDst)((SetRemapComponents >> 0) & 0x7);
+ public SetRemapComponentsDst SetRemapComponentsDstY => (SetRemapComponentsDst)((SetRemapComponents >> 4) & 0x7);
+ public SetRemapComponentsDst SetRemapComponentsDstZ => (SetRemapComponentsDst)((SetRemapComponents >> 8) & 0x7);
+ public SetRemapComponentsDst SetRemapComponentsDstW => (SetRemapComponentsDst)((SetRemapComponents >> 12) & 0x7);
+ public SetRemapComponentsComponentSize SetRemapComponentsComponentSize => (SetRemapComponentsComponentSize)((SetRemapComponents >> 16) & 0x3);
+ public SetRemapComponentsNumComponents SetRemapComponentsNumSrcComponents => (SetRemapComponentsNumComponents)((SetRemapComponents >> 20) & 0x3);
+ public SetRemapComponentsNumComponents SetRemapComponentsNumDstComponents => (SetRemapComponentsNumComponents)((SetRemapComponents >> 24) & 0x3);
+ public uint SetDstBlockSize;
+ public SetBlockSizeWidth SetDstBlockSizeWidth => (SetBlockSizeWidth)((SetDstBlockSize >> 0) & 0xF);
+ public SetBlockSizeHeight SetDstBlockSizeHeight => (SetBlockSizeHeight)((SetDstBlockSize >> 4) & 0xF);
+ public SetBlockSizeDepth SetDstBlockSizeDepth => (SetBlockSizeDepth)((SetDstBlockSize >> 8) & 0xF);
+ public SetBlockSizeGobHeight SetDstBlockSizeGobHeight => (SetBlockSizeGobHeight)((SetDstBlockSize >> 12) & 0xF);
+ public uint SetDstWidth;
+ public uint SetDstHeight;
+ public uint SetDstDepth;
+ public uint SetDstLayer;
+ public uint SetDstOrigin;
+ public int SetDstOriginX => (int)((SetDstOrigin >> 0) & 0xFFFF);
+ public int SetDstOriginY => (int)((SetDstOrigin >> 16) & 0xFFFF);
+ public uint Reserved724;
+ public uint SetSrcBlockSize;
+ public SetBlockSizeWidth SetSrcBlockSizeWidth => (SetBlockSizeWidth)((SetSrcBlockSize >> 0) & 0xF);
+ public SetBlockSizeHeight SetSrcBlockSizeHeight => (SetBlockSizeHeight)((SetSrcBlockSize >> 4) & 0xF);
+ public SetBlockSizeDepth SetSrcBlockSizeDepth => (SetBlockSizeDepth)((SetSrcBlockSize >> 8) & 0xF);
+ public SetBlockSizeGobHeight SetSrcBlockSizeGobHeight => (SetBlockSizeGobHeight)((SetSrcBlockSize >> 12) & 0xF);
+ public uint SetSrcWidth;
+ public uint SetSrcHeight;
+ public uint SetSrcDepth;
+ public uint SetSrcLayer;
+ public uint SetSrcOrigin;
+ public int SetSrcOriginX => (int)((SetSrcOrigin >> 0) & 0xFFFF);
+ public int SetSrcOriginY => (int)((SetSrcOrigin >> 16) & 0xFFFF);
+ public fixed uint Reserved740[629];
+ public uint PmTriggerEnd;
+ public fixed uint Reserved1118[2490];
+#pragma warning restore CS0649
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaTexture.cs b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaTexture.cs
new file mode 100644
index 00000000..6873ff40
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaTexture.cs
@@ -0,0 +1,20 @@
+using Ryujinx.Graphics.Gpu.Engine.Types;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Dma
+{
+ /// <summary>
+ /// Buffer to texture copy parameters.
+ /// </summary>
+ struct DmaTexture
+ {
+#pragma warning disable CS0649
+ public MemoryLayout MemoryLayout;
+ public int Width;
+ public int Height;
+ public int Depth;
+ public int RegionZ;
+ public ushort RegionX;
+ public ushort RegionY;
+#pragma warning restore CS0649
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/CompressedMethod.cs b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/CompressedMethod.cs
new file mode 100644
index 00000000..458dc8f6
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/CompressedMethod.cs
@@ -0,0 +1,41 @@
+// This file was auto-generated from NVIDIA official Maxwell definitions.
+
+namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
+{
+ enum TertOp
+ {
+ Grp0IncMethod = 0,
+ Grp0SetSubDevMask = 1,
+ Grp0StoreSubDevMask = 2,
+ Grp0UseSubDevMask = 3,
+ Grp2NonIncMethod = 0
+ }
+
+ enum SecOp
+ {
+ Grp0UseTert = 0,
+ IncMethod = 1,
+ Grp2UseTert = 2,
+ NonIncMethod = 3,
+ ImmdDataMethod = 4,
+ OneInc = 5,
+ Reserved6 = 6,
+ EndPbSegment = 7
+ }
+
+ struct CompressedMethod
+ {
+#pragma warning disable CS0649
+ public uint Method;
+#pragma warning restore CS0649
+ public int MethodAddressOld => (int)((Method >> 2) & 0x7FF);
+ public int MethodAddress => (int)((Method >> 0) & 0xFFF);
+ public int SubdeviceMask => (int)((Method >> 4) & 0xFFF);
+ public int MethodSubchannel => (int)((Method >> 13) & 0x7);
+ public TertOp TertOp => (TertOp)((Method >> 16) & 0x3);
+ public int MethodCountOld => (int)((Method >> 18) & 0x7FF);
+ public int MethodCount => (int)((Method >> 16) & 0x1FFF);
+ public int ImmdData => (int)((Method >> 16) & 0x1FFF);
+ public SecOp SecOp => (SecOp)((Method >> 29) & 0x7);
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPEntry.cs b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPEntry.cs
new file mode 100644
index 00000000..b1b236e7
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPEntry.cs
@@ -0,0 +1,55 @@
+// This file was auto-generated from NVIDIA official Maxwell definitions.
+
+namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
+{
+ enum Entry0Fetch
+ {
+ Unconditional = 0,
+ Conditional = 1,
+ }
+
+ enum Entry1Priv
+ {
+ User = 0,
+ Kernel = 1,
+ }
+
+ enum Entry1Level
+ {
+ Main = 0,
+ Subroutine = 1,
+ }
+
+ enum Entry1Sync
+ {
+ Proceed = 0,
+ Wait = 1,
+ }
+
+ enum Entry1Opcode
+ {
+ Nop = 0,
+ Illegal = 1,
+ Crc = 2,
+ PbCrc = 3,
+ }
+
+ struct GPEntry
+ {
+#pragma warning disable CS0649
+ public uint Entry0;
+#pragma warning restore CS0649
+ public Entry0Fetch Entry0Fetch => (Entry0Fetch)((Entry0 >> 0) & 0x1);
+ public int Entry0Get => (int)((Entry0 >> 2) & 0x3FFFFFFF);
+ public int Entry0Operand => (int)(Entry0);
+#pragma warning disable CS0649
+ public uint Entry1;
+#pragma warning restore CS0649
+ public int Entry1GetHi => (int)((Entry1 >> 0) & 0xFF);
+ public Entry1Priv Entry1Priv => (Entry1Priv)((Entry1 >> 8) & 0x1);
+ public Entry1Level Entry1Level => (Entry1Level)((Entry1 >> 9) & 0x1);
+ public int Entry1Length => (int)((Entry1 >> 10) & 0x1FFFFF);
+ public Entry1Sync Entry1Sync => (Entry1Sync)((Entry1 >> 31) & 0x1);
+ public Entry1Opcode Entry1Opcode => (Entry1Opcode)((Entry1 >> 0) & 0xFF);
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs
new file mode 100644
index 00000000..e80d98a1
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs
@@ -0,0 +1,248 @@
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.Gpu.Engine.MME;
+using System;
+using System.Collections.Generic;
+using System.Threading;
+
+namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
+{
+ /// <summary>
+ /// Represents a GPU General Purpose FIFO class.
+ /// </summary>
+ class GPFifoClass : IDeviceState
+ {
+ private readonly GpuContext _context;
+ private readonly GPFifoProcessor _parent;
+ private readonly DeviceState<GPFifoClassState> _state;
+
+ private int _previousSubChannel;
+ private bool _createSyncPending;
+
+ private const int MacrosCount = 0x80;
+
+ // Note: The size of the macro memory is unknown, we just make
+ // a guess here and use 256kb as the size. Increase if needed.
+ private const int MacroCodeSize = 256 * 256;
+
+ private readonly Macro[] _macros;
+ private readonly int[] _macroCode;
+
+ /// <summary>
+ /// Creates a new instance of the GPU General Purpose FIFO class.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="parent">Parent GPU General Purpose FIFO processor</param>
+ public GPFifoClass(GpuContext context, GPFifoProcessor parent)
+ {
+ _context = context;
+ _parent = parent;
+ _state = new DeviceState<GPFifoClassState>(new Dictionary<string, RwCallback>
+ {
+ { nameof(GPFifoClassState.Semaphored), new RwCallback(Semaphored, null) },
+ { nameof(GPFifoClassState.Syncpointb), new RwCallback(Syncpointb, null) },
+ { nameof(GPFifoClassState.WaitForIdle), new RwCallback(WaitForIdle, null) },
+ { nameof(GPFifoClassState.SetReference), new RwCallback(SetReference, null) },
+ { nameof(GPFifoClassState.LoadMmeInstructionRam), new RwCallback(LoadMmeInstructionRam, null) },
+ { nameof(GPFifoClassState.LoadMmeStartAddressRam), new RwCallback(LoadMmeStartAddressRam, null) },
+ { nameof(GPFifoClassState.SetMmeShadowRamControl), new RwCallback(SetMmeShadowRamControl, null) }
+ });
+
+ _macros = new Macro[MacrosCount];
+ _macroCode = new int[MacroCodeSize];
+ }
+
+ /// <summary>
+ /// Create any syncs from WaitForIdle command that are currently pending.
+ /// </summary>
+ public void CreatePendingSyncs()
+ {
+ if (_createSyncPending)
+ {
+ _createSyncPending = false;
+ _context.CreateHostSyncIfNeeded(false, false);
+ }
+ }
+
+ /// <summary>
+ /// Reads data from the class registers.
+ /// </summary>
+ /// <param name="offset">Register byte offset</param>
+ /// <returns>Data at the specified offset</returns>
+ public int Read(int offset) => _state.Read(offset);
+
+ /// <summary>
+ /// Writes data to the class registers.
+ /// </summary>
+ /// <param name="offset">Register byte offset</param>
+ /// <param name="data">Data to be written</param>
+ public void Write(int offset, int data) => _state.Write(offset, data);
+
+ /// <summary>
+ /// Writes a GPU counter to guest memory.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void Semaphored(int argument)
+ {
+ ulong address = ((ulong)_state.State.SemaphorebOffsetLower << 2) |
+ ((ulong)_state.State.SemaphoreaOffsetUpper << 32);
+
+ int value = _state.State.SemaphorecPayload;
+
+ SemaphoredOperation operation = _state.State.SemaphoredOperation;
+
+ if (_state.State.SemaphoredReleaseSize == SemaphoredReleaseSize.SixteenBytes)
+ {
+ _parent.MemoryManager.Write(address + 4, 0);
+ _parent.MemoryManager.Write(address + 8, _context.GetTimestamp());
+ }
+
+ // TODO: Acquire operations (Wait), interrupts for invalid combinations.
+ if (operation == SemaphoredOperation.Release)
+ {
+ _parent.MemoryManager.Write(address, value);
+ }
+ else if (operation == SemaphoredOperation.Reduction)
+ {
+ bool signed = _state.State.SemaphoredFormat == SemaphoredFormat.Signed;
+
+ int mem = _parent.MemoryManager.Read<int>(address);
+
+ switch (_state.State.SemaphoredReduction)
+ {
+ case SemaphoredReduction.Min:
+ value = signed ? Math.Min(mem, value) : (int)Math.Min((uint)mem, (uint)value);
+ break;
+ case SemaphoredReduction.Max:
+ value = signed ? Math.Max(mem, value) : (int)Math.Max((uint)mem, (uint)value);
+ break;
+ case SemaphoredReduction.Xor:
+ value ^= mem;
+ break;
+ case SemaphoredReduction.And:
+ value &= mem;
+ break;
+ case SemaphoredReduction.Or:
+ value |= mem;
+ break;
+ case SemaphoredReduction.Add:
+ value += mem;
+ break;
+ case SemaphoredReduction.Inc:
+ value = (uint)mem < (uint)value ? mem + 1 : 0;
+ break;
+ case SemaphoredReduction.Dec:
+ value = (uint)mem > 0 && (uint)mem <= (uint)value ? mem - 1 : value;
+ break;
+ }
+
+ _parent.MemoryManager.Write(address, value);
+ }
+ }
+
+ /// <summary>
+ /// Apply a fence operation on a syncpoint.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void Syncpointb(int argument)
+ {
+ SyncpointbOperation operation = _state.State.SyncpointbOperation;
+
+ uint syncpointId = (uint)_state.State.SyncpointbSyncptIndex;
+
+ if (operation == SyncpointbOperation.Wait)
+ {
+ uint threshold = (uint)_state.State.SyncpointaPayload;
+
+ _context.Synchronization.WaitOnSyncpoint(syncpointId, threshold, Timeout.InfiniteTimeSpan);
+ }
+ else if (operation == SyncpointbOperation.Incr)
+ {
+ _context.CreateHostSyncIfNeeded(true, true);
+ _context.Synchronization.IncrementSyncpoint(syncpointId);
+ }
+
+ _context.AdvanceSequence();
+ }
+
+ /// <summary>
+ /// Waits for the GPU to be idle.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void WaitForIdle(int argument)
+ {
+ _parent.PerformDeferredDraws();
+ _context.Renderer.Pipeline.Barrier();
+
+ _createSyncPending = true;
+ }
+
+ /// <summary>
+ /// Used as an indirect data barrier on NVN. When used, access to previously written data must be coherent.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void SetReference(int argument)
+ {
+ _context.Renderer.Pipeline.CommandBufferBarrier();
+
+ _context.CreateHostSyncIfNeeded(false, true);
+ }
+
+ /// <summary>
+ /// Sends macro code/data to the MME.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void LoadMmeInstructionRam(int argument)
+ {
+ _macroCode[_state.State.LoadMmeInstructionRamPointer++] = argument;
+ }
+
+ /// <summary>
+ /// Binds a macro index to a position for the MME
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void LoadMmeStartAddressRam(int argument)
+ {
+ _macros[_state.State.LoadMmeStartAddressRamPointer++] = new Macro(argument);
+ }
+
+ /// <summary>
+ /// Changes the shadow RAM control.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void SetMmeShadowRamControl(int argument)
+ {
+ _parent.SetShadowRamControl(argument);
+ }
+
+ /// <summary>
+ /// Pushes an argument to a macro.
+ /// </summary>
+ /// <param name="index">Index of the macro</param>
+ /// <param name="gpuVa">GPU virtual address where the command word is located</param>
+ /// <param name="argument">Argument to be pushed to the macro</param>
+ public void MmePushArgument(int index, ulong gpuVa, int argument)
+ {
+ _macros[index].PushArgument(gpuVa, argument);
+ }
+
+ /// <summary>
+ /// Prepares a macro for execution.
+ /// </summary>
+ /// <param name="index">Index of the macro</param>
+ /// <param name="argument">Initial argument passed to the macro</param>
+ public void MmeStart(int index, int argument)
+ {
+ _macros[index].StartExecution(_context, _parent, _macroCode, argument);
+ }
+
+ /// <summary>
+ /// Executes a macro.
+ /// </summary>
+ /// <param name="index">Index of the macro</param>
+ /// <param name="state">Current GPU state</param>
+ public void CallMme(int index, IDeviceState state)
+ {
+ _macros[index].Execute(_macroCode, state);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClassState.cs b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClassState.cs
new file mode 100644
index 00000000..07d062eb
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClassState.cs
@@ -0,0 +1,233 @@
+// This file was auto-generated from NVIDIA official Maxwell definitions.
+
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
+{
+ /// <summary>
+ /// Semaphore operation.
+ /// </summary>
+ enum SemaphoredOperation
+ {
+ Acquire = 1,
+ Release = 2,
+ AcqGeq = 4,
+ AcqAnd = 8,
+ Reduction = 16
+ }
+
+ /// <summary>
+ /// Semaphore acquire switch enable.
+ /// </summary>
+ enum SemaphoredAcquireSwitch
+ {
+ Disabled = 0,
+ Enabled = 1
+ }
+
+ /// <summary>
+ /// Semaphore release interrupt wait enable.
+ /// </summary>
+ enum SemaphoredReleaseWfi
+ {
+ En = 0,
+ Dis = 1
+ }
+
+ /// <summary>
+ /// Semaphore release structure size.
+ /// </summary>
+ enum SemaphoredReleaseSize
+ {
+ SixteenBytes = 0,
+ FourBytes = 1
+ }
+
+ /// <summary>
+ /// Semaphore reduction operation.
+ /// </summary>
+ enum SemaphoredReduction
+ {
+ Min = 0,
+ Max = 1,
+ Xor = 2,
+ And = 3,
+ Or = 4,
+ Add = 5,
+ Inc = 6,
+ Dec = 7
+ }
+
+ /// <summary>
+ /// Semaphore format.
+ /// </summary>
+ enum SemaphoredFormat
+ {
+ Signed = 0,
+ Unsigned = 1
+ }
+
+ /// <summary>
+ /// Memory Translation Lookaside Buffer Page Directory Buffer invalidation.
+ /// </summary>
+ enum MemOpCTlbInvalidatePdb
+ {
+ One = 0,
+ All = 1
+ }
+
+ /// <summary>
+ /// Memory Translation Lookaside Buffer GPC invalidation enable.
+ /// </summary>
+ enum MemOpCTlbInvalidateGpc
+ {
+ Enable = 0,
+ Disable = 1
+ }
+
+ /// <summary>
+ /// Memory Translation Lookaside Buffer invalidation target.
+ /// </summary>
+ enum MemOpCTlbInvalidateTarget
+ {
+ VidMem = 0,
+ SysMemCoherent = 2,
+ SysMemNoncoherent = 3
+ }
+
+ /// <summary>
+ /// Memory operation.
+ /// </summary>
+ enum MemOpDOperation
+ {
+ Membar = 5,
+ MmuTlbInvalidate = 9,
+ L2PeermemInvalidate = 13,
+ L2SysmemInvalidate = 14,
+ L2CleanComptags = 15,
+ L2FlushDirty = 16
+ }
+
+ /// <summary>
+ /// Syncpoint operation.
+ /// </summary>
+ enum SyncpointbOperation
+ {
+ Wait = 0,
+ Incr = 1
+ }
+
+ /// <summary>
+ /// Syncpoint wait switch enable.
+ /// </summary>
+ enum SyncpointbWaitSwitch
+ {
+ Dis = 0,
+ En = 1
+ }
+
+ /// <summary>
+ /// Wait for interrupt scope.
+ /// </summary>
+ enum WfiScope
+ {
+ CurrentScgType = 0,
+ All = 1
+ }
+
+ /// <summary>
+ /// Yield operation.
+ /// </summary>
+ enum YieldOp
+ {
+ Nop = 0,
+ PbdmaTimeslice = 1,
+ RunlistTimeslice = 2,
+ Tsg = 3
+ }
+
+ /// <summary>
+ /// General Purpose FIFO class state.
+ /// </summary>
+ struct GPFifoClassState
+ {
+#pragma warning disable CS0649
+ public uint SetObject;
+ public int SetObjectNvclass => (int)((SetObject >> 0) & 0xFFFF);
+ public int SetObjectEngine => (int)((SetObject >> 16) & 0x1F);
+ public uint Illegal;
+ public int IllegalHandle => (int)(Illegal);
+ public uint Nop;
+ public int NopHandle => (int)(Nop);
+ public uint Reserved0C;
+ public uint Semaphorea;
+ public int SemaphoreaOffsetUpper => (int)((Semaphorea >> 0) & 0xFF);
+ public uint Semaphoreb;
+ public int SemaphorebOffsetLower => (int)((Semaphoreb >> 2) & 0x3FFFFFFF);
+ public uint Semaphorec;
+ public int SemaphorecPayload => (int)(Semaphorec);
+ public uint Semaphored;
+ public SemaphoredOperation SemaphoredOperation => (SemaphoredOperation)((Semaphored >> 0) & 0x1F);
+ public SemaphoredAcquireSwitch SemaphoredAcquireSwitch => (SemaphoredAcquireSwitch)((Semaphored >> 12) & 0x1);
+ public SemaphoredReleaseWfi SemaphoredReleaseWfi => (SemaphoredReleaseWfi)((Semaphored >> 20) & 0x1);
+ public SemaphoredReleaseSize SemaphoredReleaseSize => (SemaphoredReleaseSize)((Semaphored >> 24) & 0x1);
+ public SemaphoredReduction SemaphoredReduction => (SemaphoredReduction)((Semaphored >> 27) & 0xF);
+ public SemaphoredFormat SemaphoredFormat => (SemaphoredFormat)((Semaphored >> 31) & 0x1);
+ public uint NonStallInterrupt;
+ public int NonStallInterruptHandle => (int)(NonStallInterrupt);
+ public uint FbFlush;
+ public int FbFlushHandle => (int)(FbFlush);
+ public uint Reserved28;
+ public uint Reserved2C;
+ public uint MemOpC;
+ public int MemOpCOperandLow => (int)((MemOpC >> 2) & 0x3FFFFFFF);
+ public MemOpCTlbInvalidatePdb MemOpCTlbInvalidatePdb => (MemOpCTlbInvalidatePdb)((MemOpC >> 0) & 0x1);
+ public MemOpCTlbInvalidateGpc MemOpCTlbInvalidateGpc => (MemOpCTlbInvalidateGpc)((MemOpC >> 1) & 0x1);
+ public MemOpCTlbInvalidateTarget MemOpCTlbInvalidateTarget => (MemOpCTlbInvalidateTarget)((MemOpC >> 10) & 0x3);
+ public int MemOpCTlbInvalidateAddrLo => (int)((MemOpC >> 12) & 0xFFFFF);
+ public uint MemOpD;
+ public int MemOpDOperandHigh => (int)((MemOpD >> 0) & 0xFF);
+ public MemOpDOperation MemOpDOperation => (MemOpDOperation)((MemOpD >> 27) & 0x1F);
+ public int MemOpDTlbInvalidateAddrHi => (int)((MemOpD >> 0) & 0xFF);
+ public uint Reserved38;
+ public uint Reserved3C;
+ public uint Reserved40;
+ public uint Reserved44;
+ public uint Reserved48;
+ public uint Reserved4C;
+ public uint SetReference;
+ public int SetReferenceCount => (int)(SetReference);
+ public uint Reserved54;
+ public uint Reserved58;
+ public uint Reserved5C;
+ public uint Reserved60;
+ public uint Reserved64;
+ public uint Reserved68;
+ public uint Reserved6C;
+ public uint Syncpointa;
+ public int SyncpointaPayload => (int)(Syncpointa);
+ public uint Syncpointb;
+ public SyncpointbOperation SyncpointbOperation => (SyncpointbOperation)((Syncpointb >> 0) & 0x1);
+ public SyncpointbWaitSwitch SyncpointbWaitSwitch => (SyncpointbWaitSwitch)((Syncpointb >> 4) & 0x1);
+ public int SyncpointbSyncptIndex => (int)((Syncpointb >> 8) & 0xFFF);
+ public uint Wfi;
+ public WfiScope WfiScope => (WfiScope)((Wfi >> 0) & 0x1);
+ public uint CrcCheck;
+ public int CrcCheckValue => (int)(CrcCheck);
+ public uint Yield;
+ public YieldOp YieldOp => (YieldOp)((Yield >> 0) & 0x3);
+ // TODO: Eventually move this to per-engine state.
+ public Array31<uint> Reserved84;
+ public uint NoOperation;
+ public uint SetNotifyA;
+ public uint SetNotifyB;
+ public uint Notify;
+ public uint WaitForIdle;
+ public uint LoadMmeInstructionRamPointer;
+ public uint LoadMmeInstructionRam;
+ public uint LoadMmeStartAddressRamPointer;
+ public uint LoadMmeStartAddressRam;
+ public uint SetMmeShadowRamControl;
+#pragma warning restore CS0649
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs
new file mode 100644
index 00000000..cd29a9da
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs
@@ -0,0 +1,262 @@
+using Ryujinx.Graphics.Gpu.Memory;
+using System;
+using System.Collections.Concurrent;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Threading;
+
+namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
+{
+ /// <summary>
+ /// Represents a GPU General Purpose FIFO device.
+ /// </summary>
+ public sealed class GPFifoDevice : IDisposable
+ {
+ /// <summary>
+ /// Indicates if the command buffer has pre-fetch enabled.
+ /// </summary>
+ private enum CommandBufferType
+ {
+ Prefetch,
+ NoPrefetch
+ }
+
+ /// <summary>
+ /// Command buffer data.
+ /// </summary>
+ private struct CommandBuffer
+ {
+ /// <summary>
+ /// Processor used to process the command buffer. Contains channel state.
+ /// </summary>
+ public GPFifoProcessor Processor;
+
+ /// <summary>
+ /// The type of the command buffer.
+ /// </summary>
+ public CommandBufferType Type;
+
+ /// <summary>
+ /// Fetched data.
+ /// </summary>
+ public int[] Words;
+
+ /// <summary>
+ /// The GPFIFO entry address (used in <see cref="CommandBufferType.NoPrefetch"/> mode).
+ /// </summary>
+ public ulong EntryAddress;
+
+ /// <summary>
+ /// The count of entries inside this GPFIFO entry.
+ /// </summary>
+ public uint EntryCount;
+
+ /// <summary>
+ /// Get the entries for the command buffer from memory.
+ /// </summary>
+ /// <param name="memoryManager">The memory manager used to fetch the data</param>
+ /// <param name="flush">If true, flushes potential GPU written data before reading the command buffer</param>
+ /// <returns>The fetched data</returns>
+ private ReadOnlySpan<int> GetWords(MemoryManager memoryManager, bool flush)
+ {
+ return MemoryMarshal.Cast<byte, int>(memoryManager.GetSpan(EntryAddress, (int)EntryCount * 4, flush));
+ }
+
+ /// <summary>
+ /// Prefetch the command buffer.
+ /// </summary>
+ /// <param name="memoryManager">The memory manager used to fetch the data</param>
+ public void Prefetch(MemoryManager memoryManager)
+ {
+ Words = GetWords(memoryManager, true).ToArray();
+ }
+
+ /// <summary>
+ /// Fetch the command buffer.
+ /// </summary>
+ /// <param name="memoryManager">The memory manager used to fetch the data</param>
+ /// <param name="flush">If true, flushes potential GPU written data before reading the command buffer</param>
+ /// <returns>The command buffer words</returns>
+ public ReadOnlySpan<int> Fetch(MemoryManager memoryManager, bool flush)
+ {
+ return Words ?? GetWords(memoryManager, flush);
+ }
+ }
+
+ private readonly ConcurrentQueue<CommandBuffer> _commandBufferQueue;
+
+ private CommandBuffer _currentCommandBuffer;
+ private GPFifoProcessor _prevChannelProcessor;
+
+ private readonly bool _ibEnable;
+ private readonly GpuContext _context;
+ private readonly AutoResetEvent _event;
+
+ private bool _interrupt;
+ private int _flushSkips;
+
+ /// <summary>
+ /// Creates a new instance of the GPU General Purpose FIFO device.
+ /// </summary>
+ /// <param name="context">GPU context that the GPFIFO belongs to</param>
+ internal GPFifoDevice(GpuContext context)
+ {
+ _commandBufferQueue = new ConcurrentQueue<CommandBuffer>();
+ _ibEnable = true;
+ _context = context;
+ _event = new AutoResetEvent(false);
+ }
+
+ /// <summary>
+ /// Signal the FIFO that there are new entries to process.
+ /// </summary>
+ public void SignalNewEntries()
+ {
+ _event.Set();
+ }
+
+ /// <summary>
+ /// Push a GPFIFO entry in the form of a prefetched command buffer.
+ /// It is intended to be used by nvservices to handle special cases.
+ /// </summary>
+ /// <param name="processor">Processor used to process <paramref name="commandBuffer"/></param>
+ /// <param name="commandBuffer">The command buffer containing the prefetched commands</param>
+ internal void PushHostCommandBuffer(GPFifoProcessor processor, int[] commandBuffer)
+ {
+ _commandBufferQueue.Enqueue(new CommandBuffer
+ {
+ Processor = processor,
+ Type = CommandBufferType.Prefetch,
+ Words = commandBuffer,
+ EntryAddress = ulong.MaxValue,
+ EntryCount = (uint)commandBuffer.Length
+ });
+ }
+
+ /// <summary>
+ /// Create a CommandBuffer from a GPFIFO entry.
+ /// </summary>
+ /// <param name="processor">Processor used to process the command buffer pointed to by <paramref name="entry"/></param>
+ /// <param name="entry">The GPFIFO entry</param>
+ /// <returns>A new CommandBuffer based on the GPFIFO entry</returns>
+ private static CommandBuffer CreateCommandBuffer(GPFifoProcessor processor, GPEntry entry)
+ {
+ CommandBufferType type = CommandBufferType.Prefetch;
+
+ if (entry.Entry1Sync == Entry1Sync.Wait)
+ {
+ type = CommandBufferType.NoPrefetch;
+ }
+
+ ulong startAddress = ((ulong)entry.Entry0Get << 2) | ((ulong)entry.Entry1GetHi << 32);
+
+ return new CommandBuffer
+ {
+ Processor = processor,
+ Type = type,
+ Words = null,
+ EntryAddress = startAddress,
+ EntryCount = (uint)entry.Entry1Length
+ };
+ }
+
+ /// <summary>
+ /// Pushes GPFIFO entries.
+ /// </summary>
+ /// <param name="processor">Processor used to process the command buffers pointed to by <paramref name="entries"/></param>
+ /// <param name="entries">GPFIFO entries</param>
+ internal void PushEntries(GPFifoProcessor processor, ReadOnlySpan<ulong> entries)
+ {
+ bool beforeBarrier = true;
+
+ for (int index = 0; index < entries.Length; index++)
+ {
+ ulong entry = entries[index];
+
+ CommandBuffer commandBuffer = CreateCommandBuffer(processor, Unsafe.As<ulong, GPEntry>(ref entry));
+
+ if (beforeBarrier && commandBuffer.Type == CommandBufferType.Prefetch)
+ {
+ commandBuffer.Prefetch(processor.MemoryManager);
+ }
+
+ if (commandBuffer.Type == CommandBufferType.NoPrefetch)
+ {
+ beforeBarrier = false;
+ }
+
+ _commandBufferQueue.Enqueue(commandBuffer);
+ }
+ }
+
+ /// <summary>
+ /// Waits until commands are pushed to the FIFO.
+ /// </summary>
+ /// <returns>True if commands were received, false if wait timed out</returns>
+ public bool WaitForCommands()
+ {
+ return !_commandBufferQueue.IsEmpty || (_event.WaitOne(8) && !_commandBufferQueue.IsEmpty);
+ }
+
+ /// <summary>
+ /// Processes commands pushed to the FIFO.
+ /// </summary>
+ public void DispatchCalls()
+ {
+ // Use this opportunity to also dispose any pending channels that were closed.
+ _context.RunDeferredActions();
+
+ // Process command buffers.
+ while (_ibEnable && !_interrupt && _commandBufferQueue.TryDequeue(out CommandBuffer entry))
+ {
+ bool flushCommandBuffer = true;
+
+ if (_flushSkips != 0)
+ {
+ _flushSkips--;
+ flushCommandBuffer = false;
+ }
+
+ _currentCommandBuffer = entry;
+ ReadOnlySpan<int> words = entry.Fetch(entry.Processor.MemoryManager, flushCommandBuffer);
+
+ // If we are changing the current channel,
+ // we need to force all the host state to be updated.
+ if (_prevChannelProcessor != entry.Processor)
+ {
+ _prevChannelProcessor = entry.Processor;
+ entry.Processor.ForceAllDirty();
+ }
+
+ entry.Processor.Process(entry.EntryAddress, words);
+ }
+
+ _interrupt = false;
+ }
+
+ /// <summary>
+ /// Sets the number of flushes that should be skipped for subsequent command buffers.
+ /// </summary>
+ /// <remarks>
+ /// This can improve performance when command buffer data only needs to be consumed by the GPU.
+ /// </remarks>
+ /// <param name="count">The amount of flushes that should be skipped</param>
+ internal void SetFlushSkips(int count)
+ {
+ _flushSkips = count;
+ }
+
+ /// <summary>
+ /// Interrupts command processing. This will break out of the DispatchCalls loop.
+ /// </summary>
+ public void Interrupt()
+ {
+ _interrupt = true;
+ }
+
+ /// <summary>
+ /// Disposes of resources used for GPFifo command processing.
+ /// </summary>
+ public void Dispose() => _event.Dispose();
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs
new file mode 100644
index 00000000..3fb3feee
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs
@@ -0,0 +1,331 @@
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.Gpu.Engine.Compute;
+using Ryujinx.Graphics.Gpu.Engine.Dma;
+using Ryujinx.Graphics.Gpu.Engine.InlineToMemory;
+using Ryujinx.Graphics.Gpu.Engine.Threed;
+using Ryujinx.Graphics.Gpu.Engine.Twod;
+using Ryujinx.Graphics.Gpu.Memory;
+using System;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
+{
+ /// <summary>
+ /// Represents a GPU General Purpose FIFO command processor.
+ /// </summary>
+ class GPFifoProcessor
+ {
+ private const int MacrosCount = 0x80;
+ private const int MacroIndexMask = MacrosCount - 1;
+
+ private const int LoadInlineDataMethodOffset = 0x6d;
+ private const int UniformBufferUpdateDataMethodOffset = 0x8e4;
+
+ private readonly GpuChannel _channel;
+
+ /// <summary>
+ /// Channel memory manager.
+ /// </summary>
+ public MemoryManager MemoryManager => _channel.MemoryManager;
+
+ /// <summary>
+ /// 3D Engine.
+ /// </summary>
+ public ThreedClass ThreedClass => _3dClass;
+
+ /// <summary>
+ /// Internal GPFIFO state.
+ /// </summary>
+ private struct DmaState
+ {
+ public int Method;
+ public int SubChannel;
+ public int MethodCount;
+ public bool NonIncrementing;
+ public bool IncrementOnce;
+ }
+
+ private DmaState _state;
+
+ private readonly ThreedClass _3dClass;
+ private readonly ComputeClass _computeClass;
+ private readonly InlineToMemoryClass _i2mClass;
+ private readonly TwodClass _2dClass;
+ private readonly DmaClass _dmaClass;
+
+ private readonly GPFifoClass _fifoClass;
+
+ /// <summary>
+ /// Creates a new instance of the GPU General Purpose FIFO command processor.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="channel">Channel that the GPFIFO processor belongs to</param>
+ public GPFifoProcessor(GpuContext context, GpuChannel channel)
+ {
+ _channel = channel;
+
+ _fifoClass = new GPFifoClass(context, this);
+ _3dClass = new ThreedClass(context, channel, _fifoClass);
+ _computeClass = new ComputeClass(context, channel, _3dClass);
+ _i2mClass = new InlineToMemoryClass(context, channel);
+ _2dClass = new TwodClass(channel);
+ _dmaClass = new DmaClass(context, channel, _3dClass);
+ }
+
+ /// <summary>
+ /// Processes a command buffer.
+ /// </summary>
+ /// <param name="baseGpuVa">Base GPU virtual address of the command buffer</param>
+ /// <param name="commandBuffer">Command buffer</param>
+ public void Process(ulong baseGpuVa, ReadOnlySpan<int> commandBuffer)
+ {
+ for (int index = 0; index < commandBuffer.Length; index++)
+ {
+ int command = commandBuffer[index];
+
+ ulong gpuVa = baseGpuVa + (ulong)index * 4;
+
+ if (_state.MethodCount != 0)
+ {
+ if (TryFastI2mBufferUpdate(commandBuffer, ref index))
+ {
+ continue;
+ }
+
+ Send(gpuVa, _state.Method, command, _state.SubChannel, _state.MethodCount <= 1);
+
+ if (!_state.NonIncrementing)
+ {
+ _state.Method++;
+ }
+
+ if (_state.IncrementOnce)
+ {
+ _state.NonIncrementing = true;
+ }
+
+ _state.MethodCount--;
+ }
+ else
+ {
+ CompressedMethod meth = Unsafe.As<int, CompressedMethod>(ref command);
+
+ if (TryFastUniformBufferUpdate(meth, commandBuffer, index))
+ {
+ index += meth.MethodCount;
+ continue;
+ }
+
+ switch (meth.SecOp)
+ {
+ case SecOp.IncMethod:
+ case SecOp.NonIncMethod:
+ case SecOp.OneInc:
+ _state.Method = meth.MethodAddress;
+ _state.SubChannel = meth.MethodSubchannel;
+ _state.MethodCount = meth.MethodCount;
+ _state.IncrementOnce = meth.SecOp == SecOp.OneInc;
+ _state.NonIncrementing = meth.SecOp == SecOp.NonIncMethod;
+ break;
+ case SecOp.ImmdDataMethod:
+ Send(gpuVa, meth.MethodAddress, meth.ImmdData, meth.MethodSubchannel, true);
+ break;
+ }
+ }
+ }
+
+ _3dClass.FlushUboDirty();
+ }
+
+ /// <summary>
+ /// Tries to perform a fast Inline-to-Memory data update.
+ /// If successful, all data will be copied at once, and <see cref="DmaState.MethodCount"/>
+ /// command buffer entries will be consumed.
+ /// </summary>
+ /// <param name="commandBuffer">Command buffer where the data is contained</param>
+ /// <param name="offset">Offset at <paramref name="commandBuffer"/> where the data is located, auto-incremented on success</param>
+ /// <returns>True if the fast copy was successful, false otherwise</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private bool TryFastI2mBufferUpdate(ReadOnlySpan<int> commandBuffer, ref int offset)
+ {
+ if (_state.Method == LoadInlineDataMethodOffset && _state.NonIncrementing && _state.SubChannel <= 2)
+ {
+ int availableCount = commandBuffer.Length - offset;
+ int consumeCount = Math.Min(_state.MethodCount, availableCount);
+
+ var data = commandBuffer.Slice(offset, consumeCount);
+
+ if (_state.SubChannel == 0)
+ {
+ _3dClass.LoadInlineData(data);
+ }
+ else if (_state.SubChannel == 1)
+ {
+ _computeClass.LoadInlineData(data);
+ }
+ else /* if (_state.SubChannel == 2) */
+ {
+ _i2mClass.LoadInlineData(data);
+ }
+
+ offset += consumeCount - 1;
+ _state.MethodCount -= consumeCount;
+
+ return true;
+ }
+
+ return false;
+ }
+
+ /// <summary>
+ /// Tries to perform a fast constant buffer data update.
+ /// If successful, all data will be copied at once, and <see cref="CompressedMethod.MethodCount"/> + 1
+ /// command buffer entries will be consumed.
+ /// </summary>
+ /// <param name="meth">Compressed method to be checked</param>
+ /// <param name="commandBuffer">Command buffer where <paramref name="meth"/> is contained</param>
+ /// <param name="offset">Offset at <paramref name="commandBuffer"/> where <paramref name="meth"/> is located</param>
+ /// <returns>True if the fast copy was successful, false otherwise</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private bool TryFastUniformBufferUpdate(CompressedMethod meth, ReadOnlySpan<int> commandBuffer, int offset)
+ {
+ int availableCount = commandBuffer.Length - offset;
+
+ if (meth.MethodAddress == UniformBufferUpdateDataMethodOffset &&
+ meth.MethodCount < availableCount &&
+ meth.SecOp == SecOp.NonIncMethod)
+ {
+ _3dClass.ConstantBufferUpdate(commandBuffer.Slice(offset + 1, meth.MethodCount));
+
+ return true;
+ }
+
+ return false;
+ }
+
+ /// <summary>
+ /// Sends a uncompressed method for processing by the graphics pipeline.
+ /// </summary>
+ /// <param name="gpuVa">GPU virtual address where the command word is located</param>
+ /// <param name="meth">Method to be processed</param>
+ private void Send(ulong gpuVa, int offset, int argument, int subChannel, bool isLastCall)
+ {
+ if (offset < 0x60)
+ {
+ _fifoClass.Write(offset * 4, argument);
+ }
+ else if (offset < 0xe00)
+ {
+ offset *= 4;
+
+ switch (subChannel)
+ {
+ case 0:
+ _3dClass.Write(offset, argument);
+ break;
+ case 1:
+ _computeClass.Write(offset, argument);
+ break;
+ case 2:
+ _i2mClass.Write(offset, argument);
+ break;
+ case 3:
+ _2dClass.Write(offset, argument);
+ break;
+ case 4:
+ _dmaClass.Write(offset, argument);
+ break;
+ }
+ }
+ else
+ {
+ IDeviceState state = subChannel switch
+ {
+ 0 => _3dClass,
+ 3 => _2dClass,
+ _ => null
+ };
+
+ if (state != null)
+ {
+ int macroIndex = (offset >> 1) & MacroIndexMask;
+
+ if ((offset & 1) != 0)
+ {
+ _fifoClass.MmePushArgument(macroIndex, gpuVa, argument);
+ }
+ else
+ {
+ _fifoClass.MmeStart(macroIndex, argument);
+ }
+
+ if (isLastCall)
+ {
+ _fifoClass.CallMme(macroIndex, state);
+
+ _3dClass.PerformDeferredDraws();
+ }
+ }
+ }
+ }
+
+ /// <summary>
+ /// Writes data directly to the state of the specified class.
+ /// </summary>
+ /// <param name="classId">ID of the class to write the data into</param>
+ /// <param name="offset">State offset in bytes</param>
+ /// <param name="value">Value to be written</param>
+ public void Write(ClassId classId, int offset, int value)
+ {
+ switch (classId)
+ {
+ case ClassId.Threed:
+ _3dClass.Write(offset, value);
+ break;
+ case ClassId.Compute:
+ _computeClass.Write(offset, value);
+ break;
+ case ClassId.InlineToMemory:
+ _i2mClass.Write(offset, value);
+ break;
+ case ClassId.Twod:
+ _2dClass.Write(offset, value);
+ break;
+ case ClassId.Dma:
+ _dmaClass.Write(offset, value);
+ break;
+ case ClassId.GPFifo:
+ _fifoClass.Write(offset, value);
+ break;
+ }
+ }
+
+ /// <summary>
+ /// Sets the shadow ram control value of all sub-channels.
+ /// </summary>
+ /// <param name="control">New shadow ram control value</param>
+ public void SetShadowRamControl(int control)
+ {
+ _3dClass.SetShadowRamControl(control);
+ }
+
+ /// <summary>
+ /// Forces a full host state update by marking all state as modified,
+ /// and also requests all GPU resources in use to be rebound.
+ /// </summary>
+ public void ForceAllDirty()
+ {
+ _3dClass.ForceStateDirty();
+ _channel.BufferManager.Rebind();
+ _channel.TextureManager.Rebind();
+ }
+
+ /// <summary>
+ /// Perform any deferred draws.
+ /// </summary>
+ public void PerformDeferredDraws()
+ {
+ _3dClass.PerformDeferredDraws();
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs
new file mode 100644
index 00000000..e1d7e940
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs
@@ -0,0 +1,273 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.Texture;
+using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+
+namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory
+{
+ /// <summary>
+ /// Represents a Inline-to-Memory engine class.
+ /// </summary>
+ class InlineToMemoryClass : IDeviceState
+ {
+ private readonly GpuContext _context;
+ private readonly GpuChannel _channel;
+ private readonly DeviceState<InlineToMemoryClassState> _state;
+
+ private bool _isLinear;
+
+ private int _offset;
+ private int _size;
+
+ private ulong _dstGpuVa;
+ private int _dstX;
+ private int _dstY;
+ private int _dstWidth;
+ private int _dstHeight;
+ private int _dstStride;
+ private int _dstGobBlocksInY;
+ private int _dstGobBlocksInZ;
+ private int _lineLengthIn;
+ private int _lineCount;
+
+ private bool _finished;
+
+ private int[] _buffer;
+
+ /// <summary>
+ /// Creates a new instance of the Inline-to-Memory engine class.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="initializeState">Indicates if the internal state should be initialized. Set to false if part of another engine</param>
+ public InlineToMemoryClass(GpuContext context, GpuChannel channel, bool initializeState)
+ {
+ _context = context;
+ _channel = channel;
+
+ if (initializeState)
+ {
+ _state = new DeviceState<InlineToMemoryClassState>(new Dictionary<string, RwCallback>
+ {
+ { nameof(InlineToMemoryClassState.LaunchDma), new RwCallback(LaunchDma, null) },
+ { nameof(InlineToMemoryClassState.LoadInlineData), new RwCallback(LoadInlineData, null) }
+ });
+ }
+ }
+
+ /// <summary>
+ /// Creates a new instance of the inline-to-memory engine class.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="channel">GPU channel</param>
+ public InlineToMemoryClass(GpuContext context, GpuChannel channel) : this(context, channel, true)
+ {
+ }
+
+ /// <summary>
+ /// Reads data from the class registers.
+ /// </summary>
+ /// <param name="offset">Register byte offset</param>
+ /// <returns>Data at the specified offset</returns>
+ public int Read(int offset) => _state.Read(offset);
+
+ /// <summary>
+ /// Writes data to the class registers.
+ /// </summary>
+ /// <param name="offset">Register byte offset</param>
+ /// <param name="data">Data to be written</param>
+ public void Write(int offset, int data) => _state.Write(offset, data);
+
+ /// <summary>
+ /// Launches Inline-to-Memory engine DMA copy.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void LaunchDma(int argument)
+ {
+ LaunchDma(ref _state.State, argument);
+ }
+
+ /// <summary>
+ /// Launches Inline-to-Memory engine DMA copy.
+ /// </summary>
+ /// <param name="state">Current class state</param>
+ /// <param name="argument">Method call argument</param>
+ public void LaunchDma(ref InlineToMemoryClassState state, int argument)
+ {
+ _isLinear = (argument & 1) != 0;
+
+ _offset = 0;
+ _size = (int)(BitUtils.AlignUp<uint>(state.LineLengthIn, 4) * state.LineCount);
+
+ int count = _size / 4;
+
+ if (_buffer == null || _buffer.Length < count)
+ {
+ _buffer = new int[count];
+ }
+
+ ulong dstGpuVa = ((ulong)state.OffsetOutUpperValue << 32) | state.OffsetOut;
+
+ _dstGpuVa = dstGpuVa;
+ _dstX = state.SetDstOriginBytesXV;
+ _dstY = state.SetDstOriginSamplesYV;
+ _dstWidth = (int)state.SetDstWidth;
+ _dstHeight = (int)state.SetDstHeight;
+ _dstStride = (int)state.PitchOut;
+ _dstGobBlocksInY = 1 << (int)state.SetDstBlockSizeHeight;
+ _dstGobBlocksInZ = 1 << (int)state.SetDstBlockSizeDepth;
+ _lineLengthIn = (int)state.LineLengthIn;
+ _lineCount = (int)state.LineCount;
+
+ _finished = false;
+ }
+
+ /// <summary>
+ /// Pushes a block of data to the Inline-to-Memory engine.
+ /// </summary>
+ /// <param name="data">Data to push</param>
+ public void LoadInlineData(ReadOnlySpan<int> data)
+ {
+ if (!_finished)
+ {
+ int copySize = Math.Min(data.Length, _buffer.Length - _offset);
+ data.Slice(0, copySize).CopyTo(new Span<int>(_buffer).Slice(_offset, copySize));
+
+ _offset += copySize;
+
+ if (_offset * 4 >= _size)
+ {
+ FinishTransfer();
+ }
+ }
+ }
+
+ /// <summary>
+ /// Pushes a word of data to the Inline-to-Memory engine.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void LoadInlineData(int argument)
+ {
+ if (!_finished)
+ {
+ _buffer[_offset++] = argument;
+
+ if (_offset * 4 >= _size)
+ {
+ FinishTransfer();
+ }
+ }
+ }
+
+ /// <summary>
+ /// Performs actual copy of the inline data after the transfer is finished.
+ /// </summary>
+ private void FinishTransfer()
+ {
+ var memoryManager = _channel.MemoryManager;
+
+ var data = MemoryMarshal.Cast<int, byte>(_buffer).Slice(0, _size);
+
+ if (_isLinear && _lineCount == 1)
+ {
+ memoryManager.WriteTrackedResource(_dstGpuVa, data.Slice(0, _lineLengthIn));
+ _context.AdvanceSequence();
+ }
+ else
+ {
+ // TODO: Verify if the destination X/Y and width/height are taken into account
+ // for linear texture transfers. If not, we can use the fast path for that aswell.
+ // Right now the copy code at the bottom assumes that it is used on both which might be incorrect.
+ if (!_isLinear)
+ {
+ var target = memoryManager.Physical.TextureCache.FindTexture(
+ memoryManager,
+ _dstGpuVa,
+ 1,
+ _dstStride,
+ _dstHeight,
+ _lineLengthIn,
+ _lineCount,
+ _isLinear,
+ _dstGobBlocksInY,
+ _dstGobBlocksInZ);
+
+ if (target != null)
+ {
+ target.SynchronizeMemory();
+ target.SetData(data, 0, 0, new GAL.Rectangle<int>(_dstX, _dstY, _lineLengthIn / target.Info.FormatInfo.BytesPerPixel, _lineCount));
+ target.SignalModified();
+
+ return;
+ }
+ }
+
+ var dstCalculator = new OffsetCalculator(
+ _dstWidth,
+ _dstHeight,
+ _dstStride,
+ _isLinear,
+ _dstGobBlocksInY,
+ 1);
+
+ int srcOffset = 0;
+
+ for (int y = _dstY; y < _dstY + _lineCount; y++)
+ {
+ int x1 = _dstX;
+ int x2 = _dstX + _lineLengthIn;
+ int x1Round = BitUtils.AlignUp(_dstX, 16);
+ int x2Trunc = BitUtils.AlignDown(x2, 16);
+
+ int x = x1;
+
+ if (x1Round <= x2)
+ {
+ for (; x < x1Round; x++, srcOffset++)
+ {
+ int dstOffset = dstCalculator.GetOffset(x, y);
+
+ ulong dstAddress = _dstGpuVa + (uint)dstOffset;
+
+ memoryManager.Write(dstAddress, data[srcOffset]);
+ }
+ }
+
+ for (; x < x2Trunc; x += 16, srcOffset += 16)
+ {
+ int dstOffset = dstCalculator.GetOffset(x, y);
+
+ ulong dstAddress = _dstGpuVa + (uint)dstOffset;
+
+ memoryManager.Write(dstAddress, MemoryMarshal.Cast<byte, Vector128<byte>>(data.Slice(srcOffset, 16))[0]);
+ }
+
+ for (; x < x2; x++, srcOffset++)
+ {
+ int dstOffset = dstCalculator.GetOffset(x, y);
+
+ ulong dstAddress = _dstGpuVa + (uint)dstOffset;
+
+ memoryManager.Write(dstAddress, data[srcOffset]);
+ }
+
+ // All lines must be aligned to 4 bytes, as the data is pushed one word at a time.
+ // If our copy length is not a multiple of 4, then we need to skip the padding bytes here.
+ int misalignment = _lineLengthIn & 3;
+
+ if (misalignment != 0)
+ {
+ srcOffset += 4 - misalignment;
+ }
+ }
+
+ _context.AdvanceSequence();
+ }
+
+ _finished = true;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs b/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs
new file mode 100644
index 00000000..d0c82a5e
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs
@@ -0,0 +1,181 @@
+// This file was auto-generated from NVIDIA official Maxwell definitions.
+
+namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory
+{
+ /// <summary>
+ /// Notify type.
+ /// </summary>
+ enum NotifyType
+ {
+ WriteOnly = 0,
+ WriteThenAwaken = 1,
+ }
+
+ /// <summary>
+ /// Width in GOBs of the destination texture.
+ /// </summary>
+ enum SetDstBlockSizeWidth
+ {
+ OneGob = 0,
+ }
+
+ /// <summary>
+ /// Height in GOBs of the destination texture.
+ /// </summary>
+ enum SetDstBlockSizeHeight
+ {
+ OneGob = 0,
+ TwoGobs = 1,
+ FourGobs = 2,
+ EightGobs = 3,
+ SixteenGobs = 4,
+ ThirtytwoGobs = 5,
+ }
+
+ /// <summary>
+ /// Depth in GOBs of the destination texture.
+ /// </summary>
+ enum SetDstBlockSizeDepth
+ {
+ OneGob = 0,
+ TwoGobs = 1,
+ FourGobs = 2,
+ EightGobs = 3,
+ SixteenGobs = 4,
+ ThirtytwoGobs = 5,
+ }
+
+ /// <summary>
+ /// Memory layout of the destination texture.
+ /// </summary>
+ enum LaunchDmaDstMemoryLayout
+ {
+ Blocklinear = 0,
+ Pitch = 1,
+ }
+
+ /// <summary>
+ /// DMA completion type.
+ /// </summary>
+ enum LaunchDmaCompletionType
+ {
+ FlushDisable = 0,
+ FlushOnly = 1,
+ ReleaseSemaphore = 2,
+ }
+
+ /// <summary>
+ /// DMA interrupt type.
+ /// </summary>
+ enum LaunchDmaInterruptType
+ {
+ None = 0,
+ Interrupt = 1,
+ }
+
+ /// <summary>
+ /// DMA semaphore structure size.
+ /// </summary>
+ enum LaunchDmaSemaphoreStructSize
+ {
+ FourWords = 0,
+ OneWord = 1,
+ }
+
+ /// <summary>
+ /// DMA semaphore reduction operation.
+ /// </summary>
+ enum LaunchDmaReductionOp
+ {
+ RedAdd = 0,
+ RedMin = 1,
+ RedMax = 2,
+ RedInc = 3,
+ RedDec = 4,
+ RedAnd = 5,
+ RedOr = 6,
+ RedXor = 7,
+ }
+
+ /// <summary>
+ /// DMA semaphore reduction format.
+ /// </summary>
+ enum LaunchDmaReductionFormat
+ {
+ Unsigned32 = 0,
+ Signed32 = 1,
+ }
+
+ /// <summary>
+ /// Inline-to-Memory class state.
+ /// </summary>
+ unsafe struct InlineToMemoryClassState
+ {
+#pragma warning disable CS0649
+ public uint SetObject;
+ public int SetObjectClassId => (int)((SetObject >> 0) & 0xFFFF);
+ public int SetObjectEngineId => (int)((SetObject >> 16) & 0x1F);
+ public fixed uint Reserved04[63];
+ public uint NoOperation;
+ public uint SetNotifyA;
+ public int SetNotifyAAddressUpper => (int)((SetNotifyA >> 0) & 0xFF);
+ public uint SetNotifyB;
+ public uint Notify;
+ public NotifyType NotifyType => (NotifyType)(Notify);
+ public uint WaitForIdle;
+ public fixed uint Reserved114[7];
+ public uint SetGlobalRenderEnableA;
+ public int SetGlobalRenderEnableAOffsetUpper => (int)((SetGlobalRenderEnableA >> 0) & 0xFF);
+ public uint SetGlobalRenderEnableB;
+ public uint SetGlobalRenderEnableC;
+ public int SetGlobalRenderEnableCMode => (int)((SetGlobalRenderEnableC >> 0) & 0x7);
+ public uint SendGoIdle;
+ public uint PmTrigger;
+ public uint PmTriggerWfi;
+ public fixed uint Reserved148[2];
+ public uint SetInstrumentationMethodHeader;
+ public uint SetInstrumentationMethodData;
+ public fixed uint Reserved158[10];
+ public uint LineLengthIn;
+ public uint LineCount;
+ public uint OffsetOutUpper;
+ public int OffsetOutUpperValue => (int)((OffsetOutUpper >> 0) & 0xFF);
+ public uint OffsetOut;
+ public uint PitchOut;
+ public uint SetDstBlockSize;
+ public SetDstBlockSizeWidth SetDstBlockSizeWidth => (SetDstBlockSizeWidth)((SetDstBlockSize >> 0) & 0xF);
+ public SetDstBlockSizeHeight SetDstBlockSizeHeight => (SetDstBlockSizeHeight)((SetDstBlockSize >> 4) & 0xF);
+ public SetDstBlockSizeDepth SetDstBlockSizeDepth => (SetDstBlockSizeDepth)((SetDstBlockSize >> 8) & 0xF);
+ public uint SetDstWidth;
+ public uint SetDstHeight;
+ public uint SetDstDepth;
+ public uint SetDstLayer;
+ public uint SetDstOriginBytesX;
+ public int SetDstOriginBytesXV => (int)((SetDstOriginBytesX >> 0) & 0xFFFFF);
+ public uint SetDstOriginSamplesY;
+ public int SetDstOriginSamplesYV => (int)((SetDstOriginSamplesY >> 0) & 0xFFFF);
+ public uint LaunchDma;
+ public LaunchDmaDstMemoryLayout LaunchDmaDstMemoryLayout => (LaunchDmaDstMemoryLayout)((LaunchDma >> 0) & 0x1);
+ public LaunchDmaCompletionType LaunchDmaCompletionType => (LaunchDmaCompletionType)((LaunchDma >> 4) & 0x3);
+ public LaunchDmaInterruptType LaunchDmaInterruptType => (LaunchDmaInterruptType)((LaunchDma >> 8) & 0x3);
+ public LaunchDmaSemaphoreStructSize LaunchDmaSemaphoreStructSize => (LaunchDmaSemaphoreStructSize)((LaunchDma >> 12) & 0x1);
+ public bool LaunchDmaReductionEnable => (LaunchDma & 0x2) != 0;
+ public LaunchDmaReductionOp LaunchDmaReductionOp => (LaunchDmaReductionOp)((LaunchDma >> 13) & 0x7);
+ public LaunchDmaReductionFormat LaunchDmaReductionFormat => (LaunchDmaReductionFormat)((LaunchDma >> 2) & 0x3);
+ public bool LaunchDmaSysmembarDisable => (LaunchDma & 0x40) != 0;
+ public uint LoadInlineData;
+ public fixed uint Reserved1B8[9];
+ public uint SetI2mSemaphoreA;
+ public int SetI2mSemaphoreAOffsetUpper => (int)((SetI2mSemaphoreA >> 0) & 0xFF);
+ public uint SetI2mSemaphoreB;
+ public uint SetI2mSemaphoreC;
+ public fixed uint Reserved1E8[2];
+ public uint SetI2mSpareNoop00;
+ public uint SetI2mSpareNoop01;
+ public uint SetI2mSpareNoop02;
+ public uint SetI2mSpareNoop03;
+ public fixed uint Reserved200[3200];
+ public MmeShadowScratch SetMmeShadowScratch;
+#pragma warning restore CS0649
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/AluOperation.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/AluOperation.cs
new file mode 100644
index 00000000..eeef9c67
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/AluOperation.cs
@@ -0,0 +1,15 @@
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// GPU Macro Arithmetic and Logic unit operation.
+ /// </summary>
+ enum AluOperation
+ {
+ AluReg = 0,
+ AddImmediate = 1,
+ BitfieldReplace = 2,
+ BitfieldExtractLslImm = 3,
+ BitfieldExtractLslReg = 4,
+ ReadImmediate = 5
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/AluRegOperation.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/AluRegOperation.cs
new file mode 100644
index 00000000..f3e05d38
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/AluRegOperation.cs
@@ -0,0 +1,18 @@
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// GPU Macro Arithmetic and Logic unit binary register-to-register operation.
+ /// </summary>
+ enum AluRegOperation
+ {
+ Add = 0,
+ AddWithCarry = 1,
+ Subtract = 2,
+ SubtractWithBorrow = 3,
+ BitwiseExclusiveOr = 8,
+ BitwiseOr = 9,
+ BitwiseAnd = 10,
+ BitwiseAndNot = 11,
+ BitwiseNotAnd = 12
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/AssignmentOperation.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/AssignmentOperation.cs
new file mode 100644
index 00000000..dc336026
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/AssignmentOperation.cs
@@ -0,0 +1,17 @@
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// GPU Macro assignment operation.
+ /// </summary>
+ enum AssignmentOperation
+ {
+ IgnoreAndFetch = 0,
+ Move = 1,
+ MoveAndSetMaddr = 2,
+ FetchAndSend = 3,
+ MoveAndSend = 4,
+ FetchAndSetMaddr = 5,
+ MoveAndSetMaddrThenFetchAndSend = 6,
+ MoveAndSetMaddrThenSendHigh = 7
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs
new file mode 100644
index 00000000..117961db
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs
@@ -0,0 +1,52 @@
+using Ryujinx.Graphics.Device;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// FIFO word.
+ /// </summary>
+ readonly struct FifoWord
+ {
+ /// <summary>
+ /// GPU virtual address where the word is located in memory.
+ /// </summary>
+ public ulong GpuVa { get; }
+
+ /// <summary>
+ /// Word value.
+ /// </summary>
+ public int Word { get; }
+
+ /// <summary>
+ /// Creates a new FIFO word.
+ /// </summary>
+ /// <param name="gpuVa">GPU virtual address where the word is located in memory</param>
+ /// <param name="word">Word value</param>
+ public FifoWord(ulong gpuVa, int word)
+ {
+ GpuVa = gpuVa;
+ Word = word;
+ }
+ }
+
+ /// <summary>
+ /// Macro Execution Engine interface.
+ /// </summary>
+ interface IMacroEE
+ {
+ /// <summary>
+ /// Arguments FIFO.
+ /// </summary>
+ Queue<FifoWord> Fifo { get; }
+
+ /// <summary>
+ /// Should execute the GPU Macro code being passed.
+ /// </summary>
+ /// <param name="code">Code to be executed</param>
+ /// <param name="state">GPU state at the time of the call</param>
+ /// <param name="arg0">First argument to be passed to the GPU Macro</param>
+ void Execute(ReadOnlySpan<int> code, IDeviceState state, int arg0);
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs
new file mode 100644
index 00000000..12a3ac02
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs
@@ -0,0 +1,101 @@
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.Gpu.Engine.GPFifo;
+using System;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// GPU macro program.
+ /// </summary>
+ struct Macro
+ {
+ /// <summary>
+ /// Word offset of the code on the code memory.
+ /// </summary>
+ public int Position { get; }
+
+ private IMacroEE _executionEngine;
+ private bool _executionPending;
+ private int _argument;
+ private MacroHLEFunctionName _hleFunction;
+
+ /// <summary>
+ /// Creates a new instance of the GPU cached macro program.
+ /// </summary>
+ /// <param name="position">Macro code start position</param>
+ public Macro(int position)
+ {
+ Position = position;
+
+ _executionEngine = null;
+ _executionPending = false;
+ _argument = 0;
+ _hleFunction = MacroHLEFunctionName.None;
+ }
+
+ /// <summary>
+ /// Sets the first argument for the macro call.
+ /// </summary>
+ /// <param name="context">GPU context where the macro code is being executed</param>
+ /// <param name="processor">GPU GP FIFO command processor</param>
+ /// <param name="code">Code to be executed</param>
+ /// <param name="argument">First argument</param>
+ public void StartExecution(GpuContext context, GPFifoProcessor processor, ReadOnlySpan<int> code, int argument)
+ {
+ _argument = argument;
+
+ _executionPending = true;
+
+ if (_executionEngine == null)
+ {
+ if (GraphicsConfig.EnableMacroHLE && MacroHLETable.TryGetMacroHLEFunction(code.Slice(Position), context.Capabilities, out _hleFunction))
+ {
+ _executionEngine = new MacroHLE(processor, _hleFunction);
+ }
+ else if (GraphicsConfig.EnableMacroJit)
+ {
+ _executionEngine = new MacroJit();
+ }
+ else
+ {
+ _executionEngine = new MacroInterpreter();
+ }
+ }
+
+ // We don't consume the parameter buffer value, so we don't need to flush it.
+ // Doing so improves performance if the value was written by a GPU shader.
+ if (_hleFunction == MacroHLEFunctionName.DrawElementsIndirect)
+ {
+ context.GPFifo.SetFlushSkips(1);
+ }
+ else if (_hleFunction == MacroHLEFunctionName.MultiDrawElementsIndirectCount)
+ {
+ context.GPFifo.SetFlushSkips(2);
+ }
+ }
+
+ /// <summary>
+ /// Starts executing the macro program code.
+ /// </summary>
+ /// <param name="code">Program code</param>
+ /// <param name="state">Current GPU state</param>
+ public void Execute(ReadOnlySpan<int> code, IDeviceState state)
+ {
+ if (_executionPending)
+ {
+ _executionPending = false;
+ _executionEngine?.Execute(code.Slice(Position), state, _argument);
+ }
+ }
+
+ /// <summary>
+ /// Pushes an argument to the macro call argument FIFO.
+ /// </summary>
+ /// <param name="gpuVa">GPU virtual address where the command word is located</param>
+ /// <param name="argument">Argument to be pushed</param>
+ public void PushArgument(ulong gpuVa, int argument)
+ {
+ _executionEngine?.Fifo.Enqueue(new FifoWord(gpuVa, argument));
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs
new file mode 100644
index 00000000..8630bbc4
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs
@@ -0,0 +1,341 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Engine.GPFifo;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// Macro High-level emulation.
+ /// </summary>
+ class MacroHLE : IMacroEE
+ {
+ private const int ColorLayerCountOffset = 0x818;
+ private const int ColorStructSize = 0x40;
+ private const int ZetaLayerCountOffset = 0x1230;
+
+ private const int IndirectDataEntrySize = 0x10;
+ private const int IndirectIndexedDataEntrySize = 0x14;
+
+ private readonly GPFifoProcessor _processor;
+ private readonly MacroHLEFunctionName _functionName;
+
+ /// <summary>
+ /// Arguments FIFO.
+ /// </summary>
+ public Queue<FifoWord> Fifo { get; }
+
+ /// <summary>
+ /// Creates a new instance of the HLE macro handler.
+ /// </summary>
+ /// <param name="processor">GPU GP FIFO command processor</param>
+ /// <param name="functionName">Name of the HLE macro function to be called</param>
+ public MacroHLE(GPFifoProcessor processor, MacroHLEFunctionName functionName)
+ {
+ _processor = processor;
+ _functionName = functionName;
+
+ Fifo = new Queue<FifoWord>();
+ }
+
+ /// <summary>
+ /// Executes a macro program until it exits.
+ /// </summary>
+ /// <param name="code">Code of the program to execute</param>
+ /// <param name="state">GPU state at the time of the call</param>
+ /// <param name="arg0">Optional argument passed to the program, 0 if not used</param>
+ public void Execute(ReadOnlySpan<int> code, IDeviceState state, int arg0)
+ {
+ switch (_functionName)
+ {
+ case MacroHLEFunctionName.ClearColor:
+ ClearColor(state, arg0);
+ break;
+ case MacroHLEFunctionName.ClearDepthStencil:
+ ClearDepthStencil(state, arg0);
+ break;
+ case MacroHLEFunctionName.DrawArraysInstanced:
+ DrawArraysInstanced(state, arg0);
+ break;
+ case MacroHLEFunctionName.DrawElementsInstanced:
+ DrawElementsInstanced(state, arg0);
+ break;
+ case MacroHLEFunctionName.DrawElementsIndirect:
+ DrawElementsIndirect(state, arg0);
+ break;
+ case MacroHLEFunctionName.MultiDrawElementsIndirectCount:
+ MultiDrawElementsIndirectCount(state, arg0);
+ break;
+ default:
+ throw new NotImplementedException(_functionName.ToString());
+ }
+
+ // It should be empty at this point, but clear it just to be safe.
+ Fifo.Clear();
+ }
+
+ /// <summary>
+ /// Clears one bound color target.
+ /// </summary>
+ /// <param name="state">GPU state at the time of the call</param>
+ /// <param name="arg0">First argument of the call</param>
+ private void ClearColor(IDeviceState state, int arg0)
+ {
+ int index = (arg0 >> 6) & 0xf;
+ int layerCount = state.Read(ColorLayerCountOffset + index * ColorStructSize);
+
+ _processor.ThreedClass.Clear(arg0, layerCount);
+ }
+
+ /// <summary>
+ /// Clears the current depth-stencil target.
+ /// </summary>
+ /// <param name="state">GPU state at the time of the call</param>
+ /// <param name="arg0">First argument of the call</param>
+ private void ClearDepthStencil(IDeviceState state, int arg0)
+ {
+ int layerCount = state.Read(ZetaLayerCountOffset);
+
+ _processor.ThreedClass.Clear(arg0, layerCount);
+ }
+
+ /// <summary>
+ /// Performs a draw.
+ /// </summary>
+ /// <param name="state">GPU state at the time of the call</param>
+ /// <param name="arg0">First argument of the call</param>
+ private void DrawArraysInstanced(IDeviceState state, int arg0)
+ {
+ var topology = (PrimitiveTopology)arg0;
+
+ var count = FetchParam();
+ var instanceCount = FetchParam();
+ var firstVertex = FetchParam();
+ var firstInstance = FetchParam();
+
+ if (ShouldSkipDraw(state, instanceCount.Word))
+ {
+ return;
+ }
+
+ _processor.ThreedClass.Draw(
+ topology,
+ count.Word,
+ instanceCount.Word,
+ 0,
+ firstVertex.Word,
+ firstInstance.Word,
+ indexed: false);
+ }
+
+ /// <summary>
+ /// Performs a indexed draw.
+ /// </summary>
+ /// <param name="state">GPU state at the time of the call</param>
+ /// <param name="arg0">First argument of the call</param>
+ private void DrawElementsInstanced(IDeviceState state, int arg0)
+ {
+ var topology = (PrimitiveTopology)arg0;
+
+ var count = FetchParam();
+ var instanceCount = FetchParam();
+ var firstIndex = FetchParam();
+ var firstVertex = FetchParam();
+ var firstInstance = FetchParam();
+
+ if (ShouldSkipDraw(state, instanceCount.Word))
+ {
+ return;
+ }
+
+ _processor.ThreedClass.Draw(
+ topology,
+ count.Word,
+ instanceCount.Word,
+ firstIndex.Word,
+ firstVertex.Word,
+ firstInstance.Word,
+ indexed: true);
+ }
+
+ /// <summary>
+ /// Performs a indirect indexed draw, with parameters from a GPU buffer.
+ /// </summary>
+ /// <param name="state">GPU state at the time of the call</param>
+ /// <param name="arg0">First argument of the call</param>
+ private void DrawElementsIndirect(IDeviceState state, int arg0)
+ {
+ var topology = (PrimitiveTopology)arg0;
+
+ var count = FetchParam();
+ var instanceCount = FetchParam();
+ var firstIndex = FetchParam();
+ var firstVertex = FetchParam();
+ var firstInstance = FetchParam();
+
+ ulong indirectBufferGpuVa = count.GpuVa;
+
+ var bufferCache = _processor.MemoryManager.Physical.BufferCache;
+
+ bool useBuffer = bufferCache.CheckModified(_processor.MemoryManager, indirectBufferGpuVa, IndirectIndexedDataEntrySize, out ulong indirectBufferAddress);
+
+ if (useBuffer)
+ {
+ int indexCount = firstIndex.Word + count.Word;
+
+ _processor.ThreedClass.DrawIndirect(
+ topology,
+ indirectBufferAddress,
+ 0,
+ 1,
+ IndirectIndexedDataEntrySize,
+ indexCount,
+ Threed.IndirectDrawType.DrawIndexedIndirect);
+ }
+ else
+ {
+ if (ShouldSkipDraw(state, instanceCount.Word))
+ {
+ return;
+ }
+
+ _processor.ThreedClass.Draw(
+ topology,
+ count.Word,
+ instanceCount.Word,
+ firstIndex.Word,
+ firstVertex.Word,
+ firstInstance.Word,
+ indexed: true);
+ }
+ }
+
+ /// <summary>
+ /// Performs a indirect indexed multi-draw, with parameters from a GPU buffer.
+ /// </summary>
+ /// <param name="state">GPU state at the time of the call</param>
+ /// <param name="arg0">First argument of the call</param>
+ private void MultiDrawElementsIndirectCount(IDeviceState state, int arg0)
+ {
+ int arg1 = FetchParam().Word;
+ int arg2 = FetchParam().Word;
+ int arg3 = FetchParam().Word;
+
+ int startDraw = arg0;
+ int endDraw = arg1;
+ var topology = (PrimitiveTopology)arg2;
+ int paddingWords = arg3;
+ int stride = paddingWords * 4 + 0x14;
+
+ ulong parameterBufferGpuVa = FetchParam().GpuVa;
+
+ int maxDrawCount = endDraw - startDraw;
+
+ if (startDraw != 0)
+ {
+ int drawCount = _processor.MemoryManager.Read<int>(parameterBufferGpuVa, tracked: true);
+
+ // Calculate maximum draw count based on the previous draw count and current draw count.
+ if ((uint)drawCount <= (uint)startDraw)
+ {
+ // The start draw is past our total draw count, so all draws were already performed.
+ maxDrawCount = 0;
+ }
+ else
+ {
+ // Perform just the missing number of draws.
+ maxDrawCount = (int)Math.Min((uint)maxDrawCount, (uint)(drawCount - startDraw));
+ }
+ }
+
+ if (maxDrawCount == 0)
+ {
+ Fifo.Clear();
+ return;
+ }
+
+ ulong indirectBufferGpuVa = 0;
+ int indexCount = 0;
+
+ for (int i = 0; i < maxDrawCount; i++)
+ {
+ var count = FetchParam();
+ var instanceCount = FetchParam();
+ var firstIndex = FetchParam();
+ var firstVertex = FetchParam();
+ var firstInstance = FetchParam();
+
+ if (i == 0)
+ {
+ indirectBufferGpuVa = count.GpuVa;
+ }
+
+ indexCount = Math.Max(indexCount, count.Word + firstIndex.Word);
+
+ if (i != maxDrawCount - 1)
+ {
+ for (int j = 0; j < paddingWords; j++)
+ {
+ FetchParam();
+ }
+ }
+ }
+
+ var bufferCache = _processor.MemoryManager.Physical.BufferCache;
+
+ ulong indirectBufferSize = (ulong)maxDrawCount * (ulong)stride;
+
+ ulong indirectBufferAddress = bufferCache.TranslateAndCreateBuffer(_processor.MemoryManager, indirectBufferGpuVa, indirectBufferSize);
+ ulong parameterBufferAddress = bufferCache.TranslateAndCreateBuffer(_processor.MemoryManager, parameterBufferGpuVa, 4);
+
+ _processor.ThreedClass.DrawIndirect(
+ topology,
+ indirectBufferAddress,
+ parameterBufferAddress,
+ maxDrawCount,
+ stride,
+ indexCount,
+ Threed.IndirectDrawType.DrawIndexedIndirectCount);
+ }
+
+ /// <summary>
+ /// Checks if the draw should be skipped, because the masked instance count is zero.
+ /// </summary>
+ /// <param name="state">Current GPU state</param>
+ /// <param name="instanceCount">Draw instance count</param>
+ /// <returns>True if the draw should be skipped, false otherwise</returns>
+ private static bool ShouldSkipDraw(IDeviceState state, int instanceCount)
+ {
+ return (Read(state, 0xd1b) & instanceCount) == 0;
+ }
+
+ /// <summary>
+ /// Fetches a arguments from the arguments FIFO.
+ /// </summary>
+ /// <returns>The call argument, or a 0 value with null address if the FIFO is empty</returns>
+ private FifoWord FetchParam()
+ {
+ if (!Fifo.TryDequeue(out var value))
+ {
+ Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument.");
+
+ return new FifoWord(0UL, 0);
+ }
+
+ return value;
+ }
+
+ /// <summary>
+ /// Reads data from a GPU register.
+ /// </summary>
+ /// <param name="state">Current GPU state</param>
+ /// <param name="reg">Register offset to read</param>
+ /// <returns>GPU register value</returns>
+ private static int Read(IDeviceState state, int reg)
+ {
+ return state.Read(reg * 4);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs
new file mode 100644
index 00000000..751867fc
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs
@@ -0,0 +1,16 @@
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// Name of the High-level implementation of a Macro function.
+ /// </summary>
+ enum MacroHLEFunctionName
+ {
+ None,
+ ClearColor,
+ ClearDepthStencil,
+ DrawArraysInstanced,
+ DrawElementsInstanced,
+ DrawElementsIndirect,
+ MultiDrawElementsIndirectCount
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs
new file mode 100644
index 00000000..719e170f
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs
@@ -0,0 +1,113 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.GAL;
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// Table with information about High-level implementations of GPU Macro code.
+ /// </summary>
+ static class MacroHLETable
+ {
+ /// <summary>
+ /// Macroo High-level implementation table entry.
+ /// </summary>
+ readonly struct TableEntry
+ {
+ /// <summary>
+ /// Name of the Macro function.
+ /// </summary>
+ public MacroHLEFunctionName Name { get; }
+
+ /// <summary>
+ /// Hash of the original binary Macro function code.
+ /// </summary>
+ public Hash128 Hash { get; }
+
+ /// <summary>
+ /// Size (in bytes) of the original binary Macro function code.
+ /// </summary>
+ public int Length { get; }
+
+ /// <summary>
+ /// Creates a new table entry.
+ /// </summary>
+ /// <param name="name">Name of the Macro function</param>
+ /// <param name="hash">Hash of the original binary Macro function code</param>
+ /// <param name="length">Size (in bytes) of the original binary Macro function code</param>
+ public TableEntry(MacroHLEFunctionName name, Hash128 hash, int length)
+ {
+ Name = name;
+ Hash = hash;
+ Length = length;
+ }
+ }
+
+ private static readonly TableEntry[] _table = new TableEntry[]
+ {
+ new TableEntry(MacroHLEFunctionName.ClearColor, new Hash128(0xA9FB28D1DC43645A, 0xB177E5D2EAE67FB0), 0x28),
+ new TableEntry(MacroHLEFunctionName.ClearDepthStencil, new Hash128(0x1B96CB77D4879F4F, 0x8557032FE0C965FB), 0x24),
+ new TableEntry(MacroHLEFunctionName.DrawArraysInstanced, new Hash128(0x197FB416269DBC26, 0x34288C01DDA82202), 0x48),
+ new TableEntry(MacroHLEFunctionName.DrawElementsInstanced, new Hash128(0x1A501FD3D54EC8E0, 0x6CF570CF79DA74D6), 0x5c),
+ new TableEntry(MacroHLEFunctionName.DrawElementsIndirect, new Hash128(0x86A3E8E903AF8F45, 0xD35BBA07C23860A4), 0x7c),
+ new TableEntry(MacroHLEFunctionName.MultiDrawElementsIndirectCount, new Hash128(0x890AF57ED3FB1C37, 0x35D0C95C61F5386F), 0x19C)
+ };
+
+ /// <summary>
+ /// Checks if the host supports all features required by the HLE macro.
+ /// </summary>
+ /// <param name="caps">Host capabilities</param>
+ /// <param name="name">Name of the HLE macro to be checked</param>
+ /// <returns>True if the host supports the HLE macro, false otherwise</returns>
+ private static bool IsMacroHLESupported(Capabilities caps, MacroHLEFunctionName name)
+ {
+ if (name == MacroHLEFunctionName.ClearColor ||
+ name == MacroHLEFunctionName.ClearDepthStencil ||
+ name == MacroHLEFunctionName.DrawArraysInstanced ||
+ name == MacroHLEFunctionName.DrawElementsInstanced ||
+ name == MacroHLEFunctionName.DrawElementsIndirect)
+ {
+ return true;
+ }
+ else if (name == MacroHLEFunctionName.MultiDrawElementsIndirectCount)
+ {
+ return caps.SupportsIndirectParameters;
+ }
+
+ return false;
+ }
+
+ /// <summary>
+ /// Checks if there's a fast, High-level implementation of the specified Macro code available.
+ /// </summary>
+ /// <param name="code">Macro code to be checked</param>
+ /// <param name="caps">Renderer capabilities to check for this macro HLE support</param>
+ /// <param name="name">Name of the function if a implementation is available and supported, otherwise <see cref="MacroHLEFunctionName.None"/></param>
+ /// <returns>True if there is a implementation available and supported, false otherwise</returns>
+ public static bool TryGetMacroHLEFunction(ReadOnlySpan<int> code, Capabilities caps, out MacroHLEFunctionName name)
+ {
+ var mc = MemoryMarshal.Cast<int, byte>(code);
+
+ for (int i = 0; i < _table.Length; i++)
+ {
+ ref var entry = ref _table[i];
+
+ var hash = XXHash128.ComputeHash(mc.Slice(0, entry.Length));
+ if (hash == entry.Hash)
+ {
+ if (IsMacroHLESupported(caps, entry.Name))
+ {
+ name = entry.Name;
+ return true;
+ }
+
+ break;
+ }
+ }
+
+ name = MacroHLEFunctionName.None;
+ return false;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs
new file mode 100644
index 00000000..df6ee040
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs
@@ -0,0 +1,400 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Device;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// Macro code interpreter.
+ /// </summary>
+ class MacroInterpreter : IMacroEE
+ {
+ /// <summary>
+ /// Arguments FIFO.
+ /// </summary>
+ public Queue<FifoWord> Fifo { get; }
+
+ private int[] _gprs;
+
+ private int _methAddr;
+ private int _methIncr;
+
+ private bool _carry;
+
+ private int _opCode;
+ private int _pipeOp;
+
+ private bool _ignoreExitFlag;
+
+ private int _pc;
+
+ /// <summary>
+ /// Creates a new instance of the macro code interpreter.
+ /// </summary>
+ public MacroInterpreter()
+ {
+ Fifo = new Queue<FifoWord>();
+
+ _gprs = new int[8];
+ }
+
+ /// <summary>
+ /// Executes a macro program until it exits.
+ /// </summary>
+ /// <param name="code">Code of the program to execute</param>
+ /// <param name="state">Current GPU state</param>
+ /// <param name="arg0">Optional argument passed to the program, 0 if not used</param>
+ public void Execute(ReadOnlySpan<int> code, IDeviceState state, int arg0)
+ {
+ Reset();
+
+ _gprs[1] = arg0;
+
+ _pc = 0;
+
+ FetchOpCode(code);
+
+ while (Step(code, state))
+ {
+ }
+
+ // Due to the delay slot, we still need to execute
+ // one more instruction before we actually exit.
+ Step(code, state);
+ }
+
+ /// <summary>
+ /// Resets the internal interpreter state.
+ /// Call each time you run a new program.
+ /// </summary>
+ private void Reset()
+ {
+ for (int index = 0; index < _gprs.Length; index++)
+ {
+ _gprs[index] = 0;
+ }
+
+ _methAddr = 0;
+ _methIncr = 0;
+
+ _carry = false;
+ }
+
+ /// <summary>
+ /// Executes a single instruction of the program.
+ /// </summary>
+ /// <param name="code">Program code to execute</param>
+ /// <param name="state">Current GPU state</param>
+ /// <returns>True to continue execution, false if the program exited</returns>
+ private bool Step(ReadOnlySpan<int> code, IDeviceState state)
+ {
+ int baseAddr = _pc - 1;
+
+ FetchOpCode(code);
+
+ if ((_opCode & 7) < 7)
+ {
+ // Operation produces a value.
+ AssignmentOperation asgOp = (AssignmentOperation)((_opCode >> 4) & 7);
+
+ int result = GetAluResult(state);
+
+ switch (asgOp)
+ {
+ // Fetch parameter and ignore result.
+ case AssignmentOperation.IgnoreAndFetch:
+ SetDstGpr(FetchParam());
+ break;
+ // Move result.
+ case AssignmentOperation.Move:
+ SetDstGpr(result);
+ break;
+ // Move result and use as Method Address.
+ case AssignmentOperation.MoveAndSetMaddr:
+ SetDstGpr(result);
+ SetMethAddr(result);
+ break;
+ // Fetch parameter and send result.
+ case AssignmentOperation.FetchAndSend:
+ SetDstGpr(FetchParam());
+ Send(state, result);
+ break;
+ // Move and send result.
+ case AssignmentOperation.MoveAndSend:
+ SetDstGpr(result);
+ Send(state, result);
+ break;
+ // Fetch parameter and use result as Method Address.
+ case AssignmentOperation.FetchAndSetMaddr:
+ SetDstGpr(FetchParam());
+ SetMethAddr(result);
+ break;
+ // Move result and use as Method Address, then fetch and send parameter.
+ case AssignmentOperation.MoveAndSetMaddrThenFetchAndSend:
+ SetDstGpr(result);
+ SetMethAddr(result);
+ Send(state, FetchParam());
+ break;
+ // Move result and use as Method Address, then send bits 17:12 of result.
+ case AssignmentOperation.MoveAndSetMaddrThenSendHigh:
+ SetDstGpr(result);
+ SetMethAddr(result);
+ Send(state, (result >> 12) & 0x3f);
+ break;
+ }
+ }
+ else
+ {
+ // Branch.
+ bool onNotZero = ((_opCode >> 4) & 1) != 0;
+
+ bool taken = onNotZero
+ ? GetGprA() != 0
+ : GetGprA() == 0;
+
+ if (taken)
+ {
+ _pc = baseAddr + GetImm();
+
+ bool noDelays = (_opCode & 0x20) != 0;
+
+ if (noDelays)
+ {
+ FetchOpCode(code);
+ }
+ else
+ {
+ // The delay slot instruction exit flag should be ignored.
+ _ignoreExitFlag = true;
+ }
+
+ return true;
+ }
+ }
+
+ bool exit = (_opCode & 0x80) != 0 && !_ignoreExitFlag;
+
+ _ignoreExitFlag = false;
+
+ return !exit;
+ }
+
+ /// <summary>
+ /// Fetches a single operation code from the program code.
+ /// </summary>
+ /// <param name="code">Program code</param>
+ private void FetchOpCode(ReadOnlySpan<int> code)
+ {
+ _opCode = _pipeOp;
+ _pipeOp = code[_pc++];
+ }
+
+ /// <summary>
+ /// Gets the result of the current Arithmetic and Logic unit operation.
+ /// </summary>
+ /// <param name="state">Current GPU state</param>
+ /// <returns>Operation result</returns>
+ private int GetAluResult(IDeviceState state)
+ {
+ AluOperation op = (AluOperation)(_opCode & 7);
+
+ switch (op)
+ {
+ case AluOperation.AluReg:
+ return GetAluResult((AluRegOperation)((_opCode >> 17) & 0x1f), GetGprA(), GetGprB());
+
+ case AluOperation.AddImmediate:
+ return GetGprA() + GetImm();
+
+ case AluOperation.BitfieldReplace:
+ case AluOperation.BitfieldExtractLslImm:
+ case AluOperation.BitfieldExtractLslReg:
+ int bfSrcBit = (_opCode >> 17) & 0x1f;
+ int bfSize = (_opCode >> 22) & 0x1f;
+ int bfDstBit = (_opCode >> 27) & 0x1f;
+
+ int bfMask = (1 << bfSize) - 1;
+
+ int dst = GetGprA();
+ int src = GetGprB();
+
+ switch (op)
+ {
+ case AluOperation.BitfieldReplace:
+ src = (int)((uint)src >> bfSrcBit) & bfMask;
+
+ dst &= ~(bfMask << bfDstBit);
+
+ dst |= src << bfDstBit;
+
+ return dst;
+
+ case AluOperation.BitfieldExtractLslImm:
+ src = (int)((uint)src >> dst) & bfMask;
+
+ return src << bfDstBit;
+
+ case AluOperation.BitfieldExtractLslReg:
+ src = (int)((uint)src >> bfSrcBit) & bfMask;
+
+ return src << dst;
+ }
+
+ break;
+
+ case AluOperation.ReadImmediate:
+ return Read(state, GetGprA() + GetImm());
+ }
+
+ throw new InvalidOperationException($"Invalid operation \"{op}\" on instruction 0x{_opCode:X8}.");
+ }
+
+ /// <summary>
+ /// Gets the result of an Arithmetic and Logic operation using registers.
+ /// </summary>
+ /// <param name="aluOp">Arithmetic and Logic unit operation with registers</param>
+ /// <param name="a">First operand value</param>
+ /// <param name="b">Second operand value</param>
+ /// <returns>Operation result</returns>
+ private int GetAluResult(AluRegOperation aluOp, int a, int b)
+ {
+ ulong result;
+
+ switch (aluOp)
+ {
+ case AluRegOperation.Add:
+ result = (ulong)a + (ulong)b;
+
+ _carry = result > 0xffffffff;
+
+ return (int)result;
+
+ case AluRegOperation.AddWithCarry:
+ result = (ulong)a + (ulong)b + (_carry ? 1UL : 0UL);
+
+ _carry = result > 0xffffffff;
+
+ return (int)result;
+
+ case AluRegOperation.Subtract:
+ result = (ulong)a - (ulong)b;
+
+ _carry = result < 0x100000000;
+
+ return (int)result;
+
+ case AluRegOperation.SubtractWithBorrow:
+ result = (ulong)a - (ulong)b - (_carry ? 0UL : 1UL);
+
+ _carry = result < 0x100000000;
+
+ return (int)result;
+
+ case AluRegOperation.BitwiseExclusiveOr: return a ^ b;
+ case AluRegOperation.BitwiseOr: return a | b;
+ case AluRegOperation.BitwiseAnd: return a & b;
+ case AluRegOperation.BitwiseAndNot: return a & ~b;
+ case AluRegOperation.BitwiseNotAnd: return ~(a & b);
+ }
+
+ throw new InvalidOperationException($"Invalid operation \"{aluOp}\" on instruction 0x{_opCode:X8}.");
+ }
+
+ /// <summary>
+ /// Extracts a 32-bits signed integer constant from the current operation code.
+ /// </summary>
+ /// <returns>The 32-bits immediate value encoded at the current operation code</returns>
+ private int GetImm()
+ {
+ // Note: The immediate is signed, the sign-extension is intended here.
+ return _opCode >> 14;
+ }
+
+ /// <summary>
+ /// Sets the current method address, for method calls.
+ /// </summary>
+ /// <param name="value">Packed address and increment value</param>
+ private void SetMethAddr(int value)
+ {
+ _methAddr = (value >> 0) & 0xfff;
+ _methIncr = (value >> 12) & 0x3f;
+ }
+
+ /// <summary>
+ /// Sets the destination register value.
+ /// </summary>
+ /// <param name="value">Value to set (usually the operation result)</param>
+ private void SetDstGpr(int value)
+ {
+ _gprs[(_opCode >> 8) & 7] = value;
+ }
+
+ /// <summary>
+ /// Gets first operand value from the respective register.
+ /// </summary>
+ /// <returns>Operand value</returns>
+ private int GetGprA()
+ {
+ return GetGprValue((_opCode >> 11) & 7);
+ }
+
+ /// <summary>
+ /// Gets second operand value from the respective register.
+ /// </summary>
+ /// <returns>Operand value</returns>
+ private int GetGprB()
+ {
+ return GetGprValue((_opCode >> 14) & 7);
+ }
+
+ /// <summary>
+ /// Gets the value from a register, or 0 if the R0 register is specified.
+ /// </summary>
+ /// <param name="index">Index of the register</param>
+ /// <returns>Register value</returns>
+ private int GetGprValue(int index)
+ {
+ return index != 0 ? _gprs[index] : 0;
+ }
+
+ /// <summary>
+ /// Fetches a call argument from the call argument FIFO.
+ /// </summary>
+ /// <returns>The call argument, or 0 if the FIFO is empty</returns>
+ private int FetchParam()
+ {
+ if (!Fifo.TryDequeue(out var value))
+ {
+ Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument.");
+
+ return 0;
+ }
+
+ return value.Word;
+ }
+
+ /// <summary>
+ /// Reads data from a GPU register.
+ /// </summary>
+ /// <param name="state">Current GPU state</param>
+ /// <param name="reg">Register offset to read</param>
+ /// <returns>GPU register value</returns>
+ private int Read(IDeviceState state, int reg)
+ {
+ return state.Read(reg * 4);
+ }
+
+ /// <summary>
+ /// Performs a GPU method call.
+ /// </summary>
+ /// <param name="state">Current GPU state</param>
+ /// <param name="value">Call argument</param>
+ private void Send(IDeviceState state, int value)
+ {
+ state.Write(_methAddr * 4, value);
+
+ _methAddr += _methIncr;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs
new file mode 100644
index 00000000..4077f74e
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs
@@ -0,0 +1,39 @@
+using Ryujinx.Graphics.Device;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// Represents a execution engine that uses a Just-in-Time compiler for fast execution.
+ /// </summary>
+ class MacroJit : IMacroEE
+ {
+ private readonly MacroJitContext _context = new MacroJitContext();
+
+ /// <summary>
+ /// Arguments FIFO.
+ /// </summary>
+ public Queue<FifoWord> Fifo => _context.Fifo;
+
+ private MacroJitCompiler.MacroExecute _execute;
+
+ /// <summary>
+ /// Executes a macro program until it exits.
+ /// </summary>
+ /// <param name="code">Code of the program to execute</param>
+ /// <param name="state">Current GPU state</param>
+ /// <param name="arg0">Optional argument passed to the program, 0 if not used</param>
+ public void Execute(ReadOnlySpan<int> code, IDeviceState state, int arg0)
+ {
+ if (_execute == null)
+ {
+ MacroJitCompiler compiler = new MacroJitCompiler();
+
+ _execute = compiler.Compile(code);
+ }
+
+ _execute(_context, state, arg0);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitCompiler.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitCompiler.cs
new file mode 100644
index 00000000..f8d839fa
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitCompiler.cs
@@ -0,0 +1,517 @@
+using Ryujinx.Graphics.Device;
+using System;
+using System.Collections.Generic;
+using System.Reflection.Emit;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// Represents a Macro Just-in-Time compiler.
+ /// </summary>R
+ class MacroJitCompiler
+ {
+ private readonly DynamicMethod _meth;
+ private readonly ILGenerator _ilGen;
+ private readonly LocalBuilder[] _gprs;
+ private readonly LocalBuilder _carry;
+ private readonly LocalBuilder _methAddr;
+ private readonly LocalBuilder _methIncr;
+
+ /// <summary>
+ /// Creates a new instance of the Macro Just-in-Time compiler.
+ /// </summary>
+ public MacroJitCompiler()
+ {
+ _meth = new DynamicMethod("Macro", typeof(void), new Type[] { typeof(MacroJitContext), typeof(IDeviceState), typeof(int) });
+ _ilGen = _meth.GetILGenerator();
+ _gprs = new LocalBuilder[8];
+
+ for (int i = 1; i < 8; i++)
+ {
+ _gprs[i] = _ilGen.DeclareLocal(typeof(int));
+ }
+
+ _carry = _ilGen.DeclareLocal(typeof(int));
+ _methAddr = _ilGen.DeclareLocal(typeof(int));
+ _methIncr = _ilGen.DeclareLocal(typeof(int));
+
+ _ilGen.Emit(OpCodes.Ldarg_2);
+ _ilGen.Emit(OpCodes.Stloc, _gprs[1]);
+ }
+
+ public delegate void MacroExecute(MacroJitContext context, IDeviceState state, int arg0);
+
+ /// <summary>
+ /// Translates a new piece of GPU Macro code into host executable code.
+ /// </summary>
+ /// <param name="code">Code to be translated</param>
+ /// <returns>Delegate of the host compiled code</returns>
+ public MacroExecute Compile(ReadOnlySpan<int> code)
+ {
+ Dictionary<int, Label> labels = new Dictionary<int, Label>();
+
+ int lastTarget = 0;
+ int i;
+
+ // Collect all branch targets.
+ for (i = 0; i < code.Length; i++)
+ {
+ int opCode = code[i];
+
+ if ((opCode & 7) == 7)
+ {
+ int target = i + (opCode >> 14);
+
+ if (!labels.ContainsKey(target))
+ {
+ labels.Add(target, _ilGen.DefineLabel());
+ }
+
+ if (lastTarget < target)
+ {
+ lastTarget = target;
+ }
+ }
+
+ bool exit = (opCode & 0x80) != 0;
+
+ if (exit && i >= lastTarget)
+ {
+ break;
+ }
+ }
+
+ // Code generation.
+ for (i = 0; i < code.Length; i++)
+ {
+ if (labels.TryGetValue(i, out Label label))
+ {
+ _ilGen.MarkLabel(label);
+ }
+
+ Emit(code, i, labels);
+
+ int opCode = code[i];
+
+ bool exit = (opCode & 0x80) != 0;
+
+ if (exit)
+ {
+ Emit(code, i + 1, labels);
+ _ilGen.Emit(OpCodes.Ret);
+
+ if (i >= lastTarget)
+ {
+ break;
+ }
+ }
+ }
+
+ if (i == code.Length)
+ {
+ _ilGen.Emit(OpCodes.Ret);
+ }
+
+ return _meth.CreateDelegate<MacroExecute>();
+ }
+
+ /// <summary>
+ /// Emits IL equivalent to the Macro instruction at a given offset.
+ /// </summary>
+ /// <param name="code">GPU Macro code</param>
+ /// <param name="offset">Offset, in words, where the instruction is located</param>
+ /// <param name="labels">Labels for Macro branch targets, used by branch instructions</param>
+ private void Emit(ReadOnlySpan<int> code, int offset, Dictionary<int, Label> labels)
+ {
+ int opCode = code[offset];
+
+ if ((opCode & 7) < 7)
+ {
+ // Operation produces a value.
+ AssignmentOperation asgOp = (AssignmentOperation)((opCode >> 4) & 7);
+
+ EmitAluOp(opCode);
+
+ switch (asgOp)
+ {
+ // Fetch parameter and ignore result.
+ case AssignmentOperation.IgnoreAndFetch:
+ _ilGen.Emit(OpCodes.Pop);
+ EmitFetchParam();
+ EmitStoreDstGpr(opCode);
+ break;
+ // Move result.
+ case AssignmentOperation.Move:
+ EmitStoreDstGpr(opCode);
+ break;
+ // Move result and use as Method Address.
+ case AssignmentOperation.MoveAndSetMaddr:
+ _ilGen.Emit(OpCodes.Dup);
+ EmitStoreDstGpr(opCode);
+ EmitStoreMethAddr();
+ break;
+ // Fetch parameter and send result.
+ case AssignmentOperation.FetchAndSend:
+ EmitFetchParam();
+ EmitStoreDstGpr(opCode);
+ EmitSend();
+ break;
+ // Move and send result.
+ case AssignmentOperation.MoveAndSend:
+ _ilGen.Emit(OpCodes.Dup);
+ EmitStoreDstGpr(opCode);
+ EmitSend();
+ break;
+ // Fetch parameter and use result as Method Address.
+ case AssignmentOperation.FetchAndSetMaddr:
+ EmitFetchParam();
+ EmitStoreDstGpr(opCode);
+ EmitStoreMethAddr();
+ break;
+ // Move result and use as Method Address, then fetch and send parameter.
+ case AssignmentOperation.MoveAndSetMaddrThenFetchAndSend:
+ _ilGen.Emit(OpCodes.Dup);
+ EmitStoreDstGpr(opCode);
+ EmitStoreMethAddr();
+ EmitFetchParam();
+ EmitSend();
+ break;
+ // Move result and use as Method Address, then send bits 17:12 of result.
+ case AssignmentOperation.MoveAndSetMaddrThenSendHigh:
+ _ilGen.Emit(OpCodes.Dup);
+ _ilGen.Emit(OpCodes.Dup);
+ EmitStoreDstGpr(opCode);
+ EmitStoreMethAddr();
+ _ilGen.Emit(OpCodes.Ldc_I4, 12);
+ _ilGen.Emit(OpCodes.Shr_Un);
+ _ilGen.Emit(OpCodes.Ldc_I4, 0x3f);
+ _ilGen.Emit(OpCodes.And);
+ EmitSend();
+ break;
+ }
+ }
+ else
+ {
+ // Branch.
+ bool onNotZero = ((opCode >> 4) & 1) != 0;
+
+ EmitLoadGprA(opCode);
+
+ Label lblSkip = _ilGen.DefineLabel();
+
+ if (onNotZero)
+ {
+ _ilGen.Emit(OpCodes.Brfalse, lblSkip);
+ }
+ else
+ {
+ _ilGen.Emit(OpCodes.Brtrue, lblSkip);
+ }
+
+ bool noDelays = (opCode & 0x20) != 0;
+
+ if (!noDelays)
+ {
+ Emit(code, offset + 1, labels);
+ }
+
+ int target = offset + (opCode >> 14);
+
+ _ilGen.Emit(OpCodes.Br, labels[target]);
+
+ _ilGen.MarkLabel(lblSkip);
+ }
+ }
+
+ /// <summary>
+ /// Emits IL for a Arithmetic and Logic Unit instruction.
+ /// </summary>
+ /// <param name="opCode">Instruction to be translated</param>
+ /// <exception cref="InvalidOperationException">Throw when the instruction encoding is invalid</exception>
+ private void EmitAluOp(int opCode)
+ {
+ AluOperation op = (AluOperation)(opCode & 7);
+
+ switch (op)
+ {
+ case AluOperation.AluReg:
+ EmitAluOp((AluRegOperation)((opCode >> 17) & 0x1f), opCode);
+ break;
+
+ case AluOperation.AddImmediate:
+ EmitLoadGprA(opCode);
+ EmitLoadImm(opCode);
+ _ilGen.Emit(OpCodes.Add);
+ break;
+
+ case AluOperation.BitfieldReplace:
+ case AluOperation.BitfieldExtractLslImm:
+ case AluOperation.BitfieldExtractLslReg:
+ int bfSrcBit = (opCode >> 17) & 0x1f;
+ int bfSize = (opCode >> 22) & 0x1f;
+ int bfDstBit = (opCode >> 27) & 0x1f;
+
+ int bfMask = (1 << bfSize) - 1;
+
+ switch (op)
+ {
+ case AluOperation.BitfieldReplace:
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Ldc_I4, bfSrcBit);
+ _ilGen.Emit(OpCodes.Shr_Un);
+ _ilGen.Emit(OpCodes.Ldc_I4, bfMask);
+ _ilGen.Emit(OpCodes.And);
+ _ilGen.Emit(OpCodes.Ldc_I4, bfDstBit);
+ _ilGen.Emit(OpCodes.Shl);
+ EmitLoadGprA(opCode);
+ _ilGen.Emit(OpCodes.Ldc_I4, ~(bfMask << bfDstBit));
+ _ilGen.Emit(OpCodes.And);
+ _ilGen.Emit(OpCodes.Or);
+ break;
+
+ case AluOperation.BitfieldExtractLslImm:
+ EmitLoadGprB(opCode);
+ EmitLoadGprA(opCode);
+ _ilGen.Emit(OpCodes.Shr_Un);
+ _ilGen.Emit(OpCodes.Ldc_I4, bfMask);
+ _ilGen.Emit(OpCodes.And);
+ _ilGen.Emit(OpCodes.Ldc_I4, bfDstBit);
+ _ilGen.Emit(OpCodes.Shl);
+ break;
+
+ case AluOperation.BitfieldExtractLslReg:
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Ldc_I4, bfSrcBit);
+ _ilGen.Emit(OpCodes.Shr_Un);
+ _ilGen.Emit(OpCodes.Ldc_I4, bfMask);
+ _ilGen.Emit(OpCodes.And);
+ EmitLoadGprA(opCode);
+ _ilGen.Emit(OpCodes.Shl);
+ break;
+ }
+ break;
+
+ case AluOperation.ReadImmediate:
+ _ilGen.Emit(OpCodes.Ldarg_1);
+ EmitLoadGprA(opCode);
+ EmitLoadImm(opCode);
+ _ilGen.Emit(OpCodes.Add);
+ _ilGen.Emit(OpCodes.Call, typeof(MacroJitContext).GetMethod(nameof(MacroJitContext.Read)));
+ break;
+
+ default:
+ throw new InvalidOperationException($"Invalid operation \"{op}\" on instruction 0x{opCode:X8}.");
+ }
+ }
+
+ /// <summary>
+ /// Emits IL for a binary Arithmetic and Logic Unit instruction.
+ /// </summary>
+ /// <param name="aluOp">Arithmetic and Logic Unit instruction</param>
+ /// <param name="opCode">Raw instruction</param>
+ /// <exception cref="InvalidOperationException">Throw when the instruction encoding is invalid</exception>
+ private void EmitAluOp(AluRegOperation aluOp, int opCode)
+ {
+ switch (aluOp)
+ {
+ case AluRegOperation.Add:
+ EmitLoadGprA(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ _ilGen.Emit(OpCodes.Add);
+ _ilGen.Emit(OpCodes.Dup);
+ _ilGen.Emit(OpCodes.Ldc_I8, 0xffffffffL);
+ _ilGen.Emit(OpCodes.Cgt_Un);
+ _ilGen.Emit(OpCodes.Stloc, _carry);
+ _ilGen.Emit(OpCodes.Conv_U4);
+ break;
+ case AluRegOperation.AddWithCarry:
+ EmitLoadGprA(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ _ilGen.Emit(OpCodes.Ldloc_S, _carry);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ _ilGen.Emit(OpCodes.Add);
+ _ilGen.Emit(OpCodes.Add);
+ _ilGen.Emit(OpCodes.Dup);
+ _ilGen.Emit(OpCodes.Ldc_I8, 0xffffffffL);
+ _ilGen.Emit(OpCodes.Cgt_Un);
+ _ilGen.Emit(OpCodes.Stloc, _carry);
+ _ilGen.Emit(OpCodes.Conv_U4);
+ break;
+ case AluRegOperation.Subtract:
+ EmitLoadGprA(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ _ilGen.Emit(OpCodes.Sub);
+ _ilGen.Emit(OpCodes.Dup);
+ _ilGen.Emit(OpCodes.Ldc_I8, 0x100000000L);
+ _ilGen.Emit(OpCodes.Clt_Un);
+ _ilGen.Emit(OpCodes.Stloc, _carry);
+ _ilGen.Emit(OpCodes.Conv_U4);
+ break;
+ case AluRegOperation.SubtractWithBorrow:
+ EmitLoadGprA(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ _ilGen.Emit(OpCodes.Ldc_I4_1);
+ _ilGen.Emit(OpCodes.Ldloc_S, _carry);
+ _ilGen.Emit(OpCodes.Sub);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ _ilGen.Emit(OpCodes.Sub);
+ _ilGen.Emit(OpCodes.Sub);
+ _ilGen.Emit(OpCodes.Dup);
+ _ilGen.Emit(OpCodes.Ldc_I8, 0x100000000L);
+ _ilGen.Emit(OpCodes.Clt_Un);
+ _ilGen.Emit(OpCodes.Stloc, _carry);
+ _ilGen.Emit(OpCodes.Conv_U4);
+ break;
+ case AluRegOperation.BitwiseExclusiveOr:
+ EmitLoadGprA(opCode);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Xor);
+ break;
+ case AluRegOperation.BitwiseOr:
+ EmitLoadGprA(opCode);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Or);
+ break;
+ case AluRegOperation.BitwiseAnd:
+ EmitLoadGprA(opCode);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.And);
+ break;
+ case AluRegOperation.BitwiseAndNot:
+ EmitLoadGprA(opCode);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Not);
+ _ilGen.Emit(OpCodes.And);
+ break;
+ case AluRegOperation.BitwiseNotAnd:
+ EmitLoadGprA(opCode);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.And);
+ _ilGen.Emit(OpCodes.Not);
+ break;
+ default:
+ throw new InvalidOperationException($"Invalid operation \"{aluOp}\" on instruction 0x{opCode:X8}.");
+ }
+ }
+
+ /// <summary>
+ /// Loads a immediate value on the IL evaluation stack.
+ /// </summary>
+ /// <param name="opCode">Instruction from where the immediate should be extracted</param>
+ private void EmitLoadImm(int opCode)
+ {
+ // Note: The immediate is signed, the sign-extension is intended here.
+ _ilGen.Emit(OpCodes.Ldc_I4, opCode >> 14);
+ }
+
+ /// <summary>
+ /// Loads a value from the General Purpose register specified as first operand on the IL evaluation stack.
+ /// </summary>
+ /// <param name="opCode">Instruction from where the register number should be extracted</param>
+ private void EmitLoadGprA(int opCode)
+ {
+ EmitLoadGpr((opCode >> 11) & 7);
+ }
+
+ /// <summary>
+ /// Loads a value from the General Purpose register specified as second operand on the IL evaluation stack.
+ /// </summary>
+ /// <param name="opCode">Instruction from where the register number should be extracted</param>
+ private void EmitLoadGprB(int opCode)
+ {
+ EmitLoadGpr((opCode >> 14) & 7);
+ }
+
+ /// <summary>
+ /// Loads a value a General Purpose register on the IL evaluation stack.
+ /// </summary>
+ /// <remarks>
+ /// Register number 0 has a hardcoded value of 0.
+ /// </remarks>
+ /// <param name="index">Register number</param>
+ private void EmitLoadGpr(int index)
+ {
+ if (index == 0)
+ {
+ _ilGen.Emit(OpCodes.Ldc_I4_0);
+ }
+ else
+ {
+ _ilGen.Emit(OpCodes.Ldloc_S, _gprs[index]);
+ }
+ }
+
+ /// <summary>
+ /// Emits a call to the method that fetches an argument from the arguments FIFO.
+ /// The argument is pushed into the IL evaluation stack.
+ /// </summary>
+ private void EmitFetchParam()
+ {
+ _ilGen.Emit(OpCodes.Ldarg_0);
+ _ilGen.Emit(OpCodes.Call, typeof(MacroJitContext).GetMethod(nameof(MacroJitContext.FetchParam)));
+ }
+
+ /// <summary>
+ /// Stores the value on the top of the IL evaluation stack into a General Purpose register.
+ /// </summary>
+ /// <remarks>
+ /// Register number 0 does not exist, reads are hardcoded to 0, and writes are simply discarded.
+ /// </remarks>
+ /// <param name="opCode">Instruction from where the register number should be extracted</param>
+ private void EmitStoreDstGpr(int opCode)
+ {
+ int index = (opCode >> 8) & 7;
+
+ if (index == 0)
+ {
+ _ilGen.Emit(OpCodes.Pop);
+ }
+ else
+ {
+ _ilGen.Emit(OpCodes.Stloc_S, _gprs[index]);
+ }
+ }
+
+ /// <summary>
+ /// Stores the value on the top of the IL evaluation stack as method address.
+ /// This will be used on subsequent send calls as the destination method address.
+ /// Additionally, the 6 bits starting at bit 12 will be used as increment value,
+ /// added to the method address after each sent value.
+ /// </summary>
+ private void EmitStoreMethAddr()
+ {
+ _ilGen.Emit(OpCodes.Dup);
+ _ilGen.Emit(OpCodes.Ldc_I4, 0xfff);
+ _ilGen.Emit(OpCodes.And);
+ _ilGen.Emit(OpCodes.Stloc_S, _methAddr);
+ _ilGen.Emit(OpCodes.Ldc_I4, 12);
+ _ilGen.Emit(OpCodes.Shr_Un);
+ _ilGen.Emit(OpCodes.Ldc_I4, 0x3f);
+ _ilGen.Emit(OpCodes.And);
+ _ilGen.Emit(OpCodes.Stloc_S, _methIncr);
+ }
+
+ /// <summary>
+ /// Sends the value on the top of the IL evaluation stack to the GPU,
+ /// using the current method address.
+ /// </summary>
+ private void EmitSend()
+ {
+ _ilGen.Emit(OpCodes.Ldarg_1);
+ _ilGen.Emit(OpCodes.Ldloc_S, _methAddr);
+ _ilGen.Emit(OpCodes.Call, typeof(MacroJitContext).GetMethod(nameof(MacroJitContext.Send)));
+ _ilGen.Emit(OpCodes.Ldloc_S, _methAddr);
+ _ilGen.Emit(OpCodes.Ldloc_S, _methIncr);
+ _ilGen.Emit(OpCodes.Add);
+ _ilGen.Emit(OpCodes.Stloc_S, _methAddr);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs
new file mode 100644
index 00000000..52c2a11b
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs
@@ -0,0 +1,55 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Device;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// Represents a Macro Just-in-Time compiler execution context.
+ /// </summary>
+ class MacroJitContext
+ {
+ /// <summary>
+ /// Arguments FIFO.
+ /// </summary>
+ public Queue<FifoWord> Fifo { get; } = new Queue<FifoWord>();
+
+ /// <summary>
+ /// Fetches a arguments from the arguments FIFO.
+ /// </summary>
+ /// <returns>The call argument, or 0 if the FIFO is empty</returns>
+ public int FetchParam()
+ {
+ if (!Fifo.TryDequeue(out var value))
+ {
+ Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument.");
+
+ return 0;
+ }
+
+ return value.Word;
+ }
+
+ /// <summary>
+ /// Reads data from a GPU register.
+ /// </summary>
+ /// <param name="state">Current GPU state</param>
+ /// <param name="reg">Register offset to read</param>
+ /// <returns>GPU register value</returns>
+ public static int Read(IDeviceState state, int reg)
+ {
+ return state.Read(reg * 4);
+ }
+
+ /// <summary>
+ /// Performs a GPU method call.
+ /// </summary>
+ /// <param name="value">Call argument</param>
+ /// <param name="state">Current GPU state</param>
+ /// <param name="methAddr">Address, in words, of the method</param>
+ public static void Send(int value, IDeviceState state, int methAddr)
+ {
+ state.Write(methAddr * 4, value);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MmeShadowScratch.cs b/src/Ryujinx.Graphics.Gpu/Engine/MmeShadowScratch.cs
new file mode 100644
index 00000000..44cd8213
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MmeShadowScratch.cs
@@ -0,0 +1,18 @@
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine
+{
+ /// <summary>
+ /// Represents temporary storage used by macros.
+ /// </summary>
+ [StructLayout(LayoutKind.Sequential, Size = 1024)]
+ struct MmeShadowScratch
+ {
+#pragma warning disable CS0169
+ private uint _e0;
+#pragma warning restore CS0169
+ public ref uint this[int index] => ref AsSpan()[index];
+ public Span<uint> AsSpan() => MemoryMarshal.CreateSpan(ref _e0, 256);
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/SetMmeShadowRamControlMode.cs b/src/Ryujinx.Graphics.Gpu/Engine/SetMmeShadowRamControlMode.cs
new file mode 100644
index 00000000..060d35ca
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/SetMmeShadowRamControlMode.cs
@@ -0,0 +1,13 @@
+namespace Ryujinx.Graphics.Gpu.Engine
+{
+ /// <summary>
+ /// MME shadow RAM control mode.
+ /// </summary>
+ enum SetMmeShadowRamControlMode
+ {
+ MethodTrack = 0,
+ MethodTrackWithFilter = 1,
+ MethodPassthrough = 2,
+ MethodReplay = 3,
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/ShaderTexture.cs b/src/Ryujinx.Graphics.Gpu/Engine/ShaderTexture.cs
new file mode 100644
index 00000000..e1e3085b
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/ShaderTexture.cs
@@ -0,0 +1,111 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Shader;
+
+namespace Ryujinx.Graphics.Gpu.Engine
+{
+ /// <summary>
+ /// Shader texture properties conversion methods.
+ /// </summary>
+ static class ShaderTexture
+ {
+ /// <summary>
+ /// Gets a texture target from a sampler type.
+ /// </summary>
+ /// <param name="type">Sampler type</param>
+ /// <returns>Texture target value</returns>
+ public static Target GetTarget(SamplerType type)
+ {
+ type &= ~(SamplerType.Indexed | SamplerType.Shadow);
+
+ switch (type)
+ {
+ case SamplerType.Texture1D:
+ return Target.Texture1D;
+
+ case SamplerType.TextureBuffer:
+ return Target.TextureBuffer;
+
+ case SamplerType.Texture1D | SamplerType.Array:
+ return Target.Texture1DArray;
+
+ case SamplerType.Texture2D:
+ return Target.Texture2D;
+
+ case SamplerType.Texture2D | SamplerType.Array:
+ return Target.Texture2DArray;
+
+ case SamplerType.Texture2D | SamplerType.Multisample:
+ return Target.Texture2DMultisample;
+
+ case SamplerType.Texture2D | SamplerType.Multisample | SamplerType.Array:
+ return Target.Texture2DMultisampleArray;
+
+ case SamplerType.Texture3D:
+ return Target.Texture3D;
+
+ case SamplerType.TextureCube:
+ return Target.Cubemap;
+
+ case SamplerType.TextureCube | SamplerType.Array:
+ return Target.CubemapArray;
+ }
+
+ Logger.Warning?.Print(LogClass.Gpu, $"Invalid sampler type \"{type}\".");
+
+ return Target.Texture2D;
+ }
+
+ /// <summary>
+ /// Gets a texture format from a shader image format.
+ /// </summary>
+ /// <param name="format">Shader image format</param>
+ /// <returns>Texture format</returns>
+ public static Format GetFormat(TextureFormat format)
+ {
+ return format switch
+ {
+ TextureFormat.R8Unorm => Format.R8Unorm,
+ TextureFormat.R8Snorm => Format.R8Snorm,
+ TextureFormat.R8Uint => Format.R8Uint,
+ TextureFormat.R8Sint => Format.R8Sint,
+ TextureFormat.R16Float => Format.R16Float,
+ TextureFormat.R16Unorm => Format.R16Unorm,
+ TextureFormat.R16Snorm => Format.R16Snorm,
+ TextureFormat.R16Uint => Format.R16Uint,
+ TextureFormat.R16Sint => Format.R16Sint,
+ TextureFormat.R32Float => Format.R32Float,
+ TextureFormat.R32Uint => Format.R32Uint,
+ TextureFormat.R32Sint => Format.R32Sint,
+ TextureFormat.R8G8Unorm => Format.R8G8Unorm,
+ TextureFormat.R8G8Snorm => Format.R8G8Snorm,
+ TextureFormat.R8G8Uint => Format.R8G8Uint,
+ TextureFormat.R8G8Sint => Format.R8G8Sint,
+ TextureFormat.R16G16Float => Format.R16G16Float,
+ TextureFormat.R16G16Unorm => Format.R16G16Unorm,
+ TextureFormat.R16G16Snorm => Format.R16G16Snorm,
+ TextureFormat.R16G16Uint => Format.R16G16Uint,
+ TextureFormat.R16G16Sint => Format.R16G16Sint,
+ TextureFormat.R32G32Float => Format.R32G32Float,
+ TextureFormat.R32G32Uint => Format.R32G32Uint,
+ TextureFormat.R32G32Sint => Format.R32G32Sint,
+ TextureFormat.R8G8B8A8Unorm => Format.R8G8B8A8Unorm,
+ TextureFormat.R8G8B8A8Snorm => Format.R8G8B8A8Snorm,
+ TextureFormat.R8G8B8A8Uint => Format.R8G8B8A8Uint,
+ TextureFormat.R8G8B8A8Sint => Format.R8G8B8A8Sint,
+ TextureFormat.R16G16B16A16Float => Format.R16G16B16A16Float,
+ TextureFormat.R16G16B16A16Unorm => Format.R16G16B16A16Unorm,
+ TextureFormat.R16G16B16A16Snorm => Format.R16G16B16A16Snorm,
+ TextureFormat.R16G16B16A16Uint => Format.R16G16B16A16Uint,
+ TextureFormat.R16G16B16A16Sint => Format.R16G16B16A16Sint,
+ TextureFormat.R32G32B32A32Float => Format.R32G32B32A32Float,
+ TextureFormat.R32G32B32A32Uint => Format.R32G32B32A32Uint,
+ TextureFormat.R32G32B32A32Sint => Format.R32G32B32A32Sint,
+ TextureFormat.R10G10B10A2Unorm => Format.R10G10B10A2Unorm,
+ TextureFormat.R10G10B10A2Uint => Format.R10G10B10A2Uint,
+ TextureFormat.R11G11B10Float => Format.R11G11B10Float,
+ _ => 0
+ };
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendFunctions.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendFunctions.cs
new file mode 100644
index 00000000..a40b9cc4
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendFunctions.cs
@@ -0,0 +1,4226 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.GAL;
+using System.Globalization;
+using System.Runtime.InteropServices;
+using System.Text;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Threed.Blender
+{
+ static class AdvancedBlendFunctions
+ {
+ public static readonly AdvancedBlendUcode[] Table = new AdvancedBlendUcode[]
+ {
+ new AdvancedBlendUcode(AdvancedBlendOp.PlusClamped, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedPlusClampedPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.PlusClampedAlpha, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedPlusClampedAlphaPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.PlusDarker, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedPlusDarkerPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedMultiplyPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedScreenPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedOverlayPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedDarkenPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedLightenPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedColorDodgePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedColorBurnPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedHardLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedSoftLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedDifferencePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Minus, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedMinusPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.MinusClamped, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedMinusClampedPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedExclusionPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Contrast, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedContrastPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Invert, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedInvertPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedInvertRGBPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.InvertOvg, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedInvertOvgPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedLinearDodgePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedLinearBurnPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedVividLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedLinearLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedPinLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedHardMixPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Red, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedRedPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Green, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedGreenPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Blue, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedBluePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedHslHuePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedHslSaturationPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedHslColorPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedHslLuminosityPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Src, AdvancedBlendOverlap.Disjoint, true, GenDisjointSrcPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Dst, AdvancedBlendOverlap.Disjoint, true, GenDisjointDstPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Disjoint, true, GenDisjointSrcOverPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Disjoint, true, GenDisjointDstOverPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Disjoint, true, GenDisjointSrcInPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstIn, AdvancedBlendOverlap.Disjoint, true, GenDisjointDstInPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Disjoint, true, GenDisjointSrcOutPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstOut, AdvancedBlendOverlap.Disjoint, true, GenDisjointDstOutPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Disjoint, true, GenDisjointSrcAtopPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Disjoint, true, GenDisjointDstAtopPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Disjoint, true, GenDisjointXorPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Plus, AdvancedBlendOverlap.Disjoint, true, GenDisjointPlusPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Disjoint, true, GenDisjointMultiplyPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Disjoint, true, GenDisjointScreenPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Disjoint, true, GenDisjointOverlayPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Disjoint, true, GenDisjointDarkenPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Disjoint, true, GenDisjointLightenPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Disjoint, true, GenDisjointColorDodgePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Disjoint, true, GenDisjointColorBurnPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Disjoint, true, GenDisjointHardLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Disjoint, true, GenDisjointSoftLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Disjoint, true, GenDisjointDifferencePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Disjoint, true, GenDisjointExclusionPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Invert, AdvancedBlendOverlap.Disjoint, true, GenDisjointInvertPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Disjoint, true, GenDisjointInvertRGBPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Disjoint, true, GenDisjointLinearDodgePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Disjoint, true, GenDisjointLinearBurnPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Disjoint, true, GenDisjointVividLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Disjoint, true, GenDisjointLinearLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Disjoint, true, GenDisjointPinLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Disjoint, true, GenDisjointHardMixPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Disjoint, true, GenDisjointHslHuePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Disjoint, true, GenDisjointHslSaturationPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Disjoint, true, GenDisjointHslColorPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Disjoint, true, GenDisjointHslLuminosityPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Src, AdvancedBlendOverlap.Conjoint, true, GenConjointSrcPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Dst, AdvancedBlendOverlap.Conjoint, true, GenConjointDstPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Conjoint, true, GenConjointSrcOverPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Conjoint, true, GenConjointDstOverPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Conjoint, true, GenConjointSrcInPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstIn, AdvancedBlendOverlap.Conjoint, true, GenConjointDstInPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Conjoint, true, GenConjointSrcOutPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstOut, AdvancedBlendOverlap.Conjoint, true, GenConjointDstOutPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Conjoint, true, GenConjointSrcAtopPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Conjoint, true, GenConjointDstAtopPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Conjoint, true, GenConjointXorPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Conjoint, true, GenConjointMultiplyPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Conjoint, true, GenConjointScreenPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Conjoint, true, GenConjointOverlayPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Conjoint, true, GenConjointDarkenPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Conjoint, true, GenConjointLightenPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Conjoint, true, GenConjointColorDodgePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Conjoint, true, GenConjointColorBurnPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Conjoint, true, GenConjointHardLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Conjoint, true, GenConjointSoftLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Conjoint, true, GenConjointDifferencePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Conjoint, true, GenConjointExclusionPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Invert, AdvancedBlendOverlap.Conjoint, true, GenConjointInvertPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Conjoint, true, GenConjointInvertRGBPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Conjoint, true, GenConjointLinearDodgePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Conjoint, true, GenConjointLinearBurnPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Conjoint, true, GenConjointVividLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Conjoint, true, GenConjointLinearLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Conjoint, true, GenConjointPinLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Conjoint, true, GenConjointHardMixPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Conjoint, true, GenConjointHslHuePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Conjoint, true, GenConjointHslSaturationPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Conjoint, true, GenConjointHslColorPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Conjoint, true, GenConjointHslLuminosityPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedDstOver),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedSrcIn),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedSrcOut),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedSrcAtop),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedDstAtop),
+ new AdvancedBlendUcode(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedXor),
+ new AdvancedBlendUcode(AdvancedBlendOp.PlusClamped, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedPlusClamped),
+ new AdvancedBlendUcode(AdvancedBlendOp.PlusClampedAlpha, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedPlusClampedAlpha),
+ new AdvancedBlendUcode(AdvancedBlendOp.PlusDarker, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedPlusDarker),
+ new AdvancedBlendUcode(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedMultiply),
+ new AdvancedBlendUcode(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedScreen),
+ new AdvancedBlendUcode(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedOverlay),
+ new AdvancedBlendUcode(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedDarken),
+ new AdvancedBlendUcode(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedLighten),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedColorDodge),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedColorBurn),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedHardLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedSoftLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedDifference),
+ new AdvancedBlendUcode(AdvancedBlendOp.Minus, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedMinus),
+ new AdvancedBlendUcode(AdvancedBlendOp.MinusClamped, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedMinusClamped),
+ new AdvancedBlendUcode(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedExclusion),
+ new AdvancedBlendUcode(AdvancedBlendOp.Contrast, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedContrast),
+ new AdvancedBlendUcode(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedInvertRGB),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedLinearDodge),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedLinearBurn),
+ new AdvancedBlendUcode(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedVividLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedLinearLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedPinLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedHardMix),
+ new AdvancedBlendUcode(AdvancedBlendOp.Red, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedRed),
+ new AdvancedBlendUcode(AdvancedBlendOp.Green, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedGreen),
+ new AdvancedBlendUcode(AdvancedBlendOp.Blue, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedBlue),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedHslHue),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedHslSaturation),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedHslColor),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedHslLuminosity),
+ new AdvancedBlendUcode(AdvancedBlendOp.Src, AdvancedBlendOverlap.Disjoint, false, GenDisjointSrc),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Disjoint, false, GenDisjointSrcOver),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Disjoint, false, GenDisjointDstOver),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Disjoint, false, GenDisjointSrcIn),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Disjoint, false, GenDisjointSrcOut),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Disjoint, false, GenDisjointSrcAtop),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Disjoint, false, GenDisjointDstAtop),
+ new AdvancedBlendUcode(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Disjoint, false, GenDisjointXor),
+ new AdvancedBlendUcode(AdvancedBlendOp.Plus, AdvancedBlendOverlap.Disjoint, false, GenDisjointPlus),
+ new AdvancedBlendUcode(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Disjoint, false, GenDisjointMultiply),
+ new AdvancedBlendUcode(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Disjoint, false, GenDisjointScreen),
+ new AdvancedBlendUcode(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Disjoint, false, GenDisjointOverlay),
+ new AdvancedBlendUcode(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Disjoint, false, GenDisjointDarken),
+ new AdvancedBlendUcode(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Disjoint, false, GenDisjointLighten),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Disjoint, false, GenDisjointColorDodge),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Disjoint, false, GenDisjointColorBurn),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Disjoint, false, GenDisjointHardLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Disjoint, false, GenDisjointSoftLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Disjoint, false, GenDisjointDifference),
+ new AdvancedBlendUcode(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Disjoint, false, GenDisjointExclusion),
+ new AdvancedBlendUcode(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Disjoint, false, GenDisjointInvertRGB),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Disjoint, false, GenDisjointLinearDodge),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Disjoint, false, GenDisjointLinearBurn),
+ new AdvancedBlendUcode(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Disjoint, false, GenDisjointVividLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Disjoint, false, GenDisjointLinearLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Disjoint, false, GenDisjointPinLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Disjoint, false, GenDisjointHardMix),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Disjoint, false, GenDisjointHslHue),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Disjoint, false, GenDisjointHslSaturation),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Disjoint, false, GenDisjointHslColor),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Disjoint, false, GenDisjointHslLuminosity),
+ new AdvancedBlendUcode(AdvancedBlendOp.Src, AdvancedBlendOverlap.Conjoint, false, GenConjointSrc),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Conjoint, false, GenConjointSrcOver),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Conjoint, false, GenConjointDstOver),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Conjoint, false, GenConjointSrcIn),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Conjoint, false, GenConjointSrcOut),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Conjoint, false, GenConjointSrcAtop),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Conjoint, false, GenConjointDstAtop),
+ new AdvancedBlendUcode(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Conjoint, false, GenConjointXor),
+ new AdvancedBlendUcode(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Conjoint, false, GenConjointMultiply),
+ new AdvancedBlendUcode(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Conjoint, false, GenConjointScreen),
+ new AdvancedBlendUcode(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Conjoint, false, GenConjointOverlay),
+ new AdvancedBlendUcode(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Conjoint, false, GenConjointDarken),
+ new AdvancedBlendUcode(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Conjoint, false, GenConjointLighten),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Conjoint, false, GenConjointColorDodge),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Conjoint, false, GenConjointColorBurn),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Conjoint, false, GenConjointHardLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Conjoint, false, GenConjointSoftLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Conjoint, false, GenConjointDifference),
+ new AdvancedBlendUcode(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Conjoint, false, GenConjointExclusion),
+ new AdvancedBlendUcode(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Conjoint, false, GenConjointInvertRGB),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Conjoint, false, GenConjointLinearDodge),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Conjoint, false, GenConjointLinearBurn),
+ new AdvancedBlendUcode(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Conjoint, false, GenConjointVividLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Conjoint, false, GenConjointLinearLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Conjoint, false, GenConjointPinLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Conjoint, false, GenConjointHardMix),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Conjoint, false, GenConjointHslHue),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Conjoint, false, GenConjointHslSaturation),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Conjoint, false, GenConjointHslColor),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Conjoint, false, GenConjointHslLuminosity)
+ };
+
+ public static string GenTable()
+ {
+ // This can be used to generate the table on AdvancedBlendPreGenTable.
+
+ StringBuilder sb = new StringBuilder();
+
+ sb.AppendLine($"private static Dictionary<Hash128, AdvancedBlendEntry> _entries = new()");
+ sb.AppendLine("{");
+
+ foreach (var entry in Table)
+ {
+ Hash128 hash = XXHash128.ComputeHash(MemoryMarshal.Cast<uint, byte>(entry.Code));
+
+ string[] constants = new string[entry.Constants != null ? entry.Constants.Length : 0];
+
+ for (int i = 0; i < constants.Length; i++)
+ {
+ RgbFloat rgb = entry.Constants[i];
+
+ constants[i] = string.Format(CultureInfo.InvariantCulture, "new " + nameof(RgbFloat) + "({0}f, {1}f, {2}f)", rgb.R, rgb.G, rgb.B);
+ }
+
+ string constantList = constants.Length > 0 ? $"new[] {{ {string.Join(", ", constants)} }}" : $"Array.Empty<{nameof(RgbFloat)}>()";
+
+ static string EnumValue(string name, object value)
+ {
+ if (value.ToString() == "0")
+ {
+ return "0";
+ }
+
+ return $"{name}.{value}";
+ }
+
+ string alpha = $"new {nameof(FixedFunctionAlpha)}({EnumValue(nameof(BlendUcodeEnable), entry.Alpha.Enable)}, {EnumValue(nameof(BlendOp), entry.Alpha.AlphaOp)}, {EnumValue(nameof(BlendFactor), entry.Alpha.AlphaSrcFactor)}, {EnumValue(nameof(BlendFactor), entry.Alpha.AlphaDstFactor)})";
+
+ sb.AppendLine($" {{ new Hash128(0x{hash.Low:X16}, 0x{hash.High:X16}), new AdvancedBlendEntry({nameof(AdvancedBlendOp)}.{entry.Op}, {nameof(AdvancedBlendOverlap)}.{entry.Overlap}, {(entry.SrcPreMultiplied ? "true" : "false")}, {constantList}, {alpha}) }},");
+ }
+
+ sb.AppendLine("};");
+
+ return sb.ToString();
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedPlusClampedPremul(ref UcodeAssembler asm)
+ {
+ asm.Add(CC.T, Dest.PBR, OpBD.DstRGB, OpBD.SrcRGB);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedPlusClampedAlphaPremul(ref UcodeAssembler asm)
+ {
+ asm.Add(CC.T, Dest.Temp0, OpBD.DstRGB, OpBD.SrcRGB);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedPlusDarkerPremul(ref UcodeAssembler asm)
+ {
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne);
+ asm.Add(CC.T, Dest.PBR, OpBD.PBR, OpBD.SrcRGB);
+ asm.Add(CC.T, Dest.PBR, OpBD.PBR, OpBD.DstRGB);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.SrcAAA);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedMultiplyPremul(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstRGB);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedScreenPremul(ref UcodeAssembler asm)
+ {
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.DstRGB);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedOverlayPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp2, OpBD.Temp1, OpAC.Temp2, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedDarkenPremul(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Min(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedLightenPremul(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedColorDodgePremul(ref UcodeAssembler asm)
+ {
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.SrcRGB);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.SrcAAA);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.DstRGB);
+ asm.Min(CC.GT, Dest.PBR, OpAC.DstAAA, OpBD.PBR);
+ asm.Mul(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.SrcAAA);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.DstRGB, OpBD.ConstantZero);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedColorBurnPremul(ref UcodeAssembler asm)
+ {
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.DstAAA, OpBD.SrcAAA, OpAC.SrcAAA, OpBD.DstRGB);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcRGB);
+ asm.Mul(CC.T, Dest.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.SrcAAA, OpBD.DstAAA, OpAC.SrcAAA, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantZero);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.DstAAA, OpBD.DstRGB);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHardLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.Temp2, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp2, OpBD.Temp1, OpAC.Temp2, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedSoftLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(4, 0.25f, 0.25f, 0.25f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(0, 0.2605f, 0.2605f, 0.2605f);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(1, -0.7817f, -0.7817f, -0.7817f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(2, 0.3022f, 0.3022f, 0.3022f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(3, 0.2192f, 0.2192f, 0.2192f);
+ asm.Add(CC.GT, Dest.Temp0, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(5, 16f, 16f, 16f);
+ asm.Mul(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(6, 12f, 12f, 12f);
+ asm.Mmsub(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(7, 3f, 3f, 3f);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mmsub(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.ConstantOne, OpAC.Temp1, OpBD.Temp1);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedDifferencePremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.Temp2, OpBD.Temp1);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedMinusPremul(ref UcodeAssembler asm)
+ {
+ asm.Sub(CC.T, Dest.Temp0, OpBD.DstRGB, OpBD.SrcRGB);
+ return new FixedFunctionAlpha(BlendOp.ReverseSubtractGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedMinusClampedPremul(ref UcodeAssembler asm)
+ {
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstRGB, OpBD.SrcRGB);
+ asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedExclusionPremul(ref UcodeAssembler asm)
+ {
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.DstRGB);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.DstRGB);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedContrastPremul(ref UcodeAssembler asm)
+ {
+ asm.SetConstant(0, 2f, 2f, 2f);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.ConstantRGB, OpAC.DstAAA, OpBD.ConstantOne);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.SrcAAA, OpBD.ConstantOne);
+ asm.Mul(CC.T, Dest.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.PBR, OpBD.DstAAA);
+ asm.SetConstant(1, 0.5f, 0.5f, 0.5f);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantRGB);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedInvertPremul(ref UcodeAssembler asm)
+ {
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA, OpAC.SrcAAA, OpBD.DstRGB);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.OneMinusSrcAAA, OpAC.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedInvertRGBPremul(ref UcodeAssembler asm)
+ {
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.DstAAA, OpAC.SrcRGB, OpBD.DstRGB);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.OneMinusSrcAAA, OpAC.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedInvertOvgPremul(ref UcodeAssembler asm)
+ {
+ asm.Sub(CC.T, Dest.PBR, OpBD.ConstantOne, OpBD.DstRGB);
+ asm.Mmadd(CC.T, Dest.Temp0, OpAC.SrcAAA, OpBD.PBR, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedLinearDodgePremul(ref UcodeAssembler asm)
+ {
+ asm.Mmadd(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedLinearBurnPremul(ref UcodeAssembler asm)
+ {
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedVividLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantRGB);
+ asm.Sub(CC.GE, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Add(CC.GE, Dest.PBR, OpBD.PBR, OpBD.PBR);
+ asm.Rcp(CC.GE, Dest.PBR, OpAC.PBR);
+ asm.Mul(CC.GE, Dest.PBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GE, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Add(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Rcp(CC.LT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantZero);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantOne);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedLinearLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 2f, 2f, 2f);
+ asm.Madd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedPinLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.Temp0, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero);
+ asm.Add(CC.LE, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Min(CC.LE, Dest.Temp0, OpAC.PBR, OpBD.Temp1);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHardMixPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mul(CC.LT, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedRedPremul(ref UcodeAssembler asm)
+ {
+ asm.Mov(CC.T, Dest.Temp0, OpBD.DstRGB);
+ asm.Mov(CC.T, Dest.Temp0.R, OpBD.SrcRGB);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedGreenPremul(ref UcodeAssembler asm)
+ {
+ asm.Mov(CC.T, Dest.Temp0, OpBD.DstRGB);
+ asm.Mov(CC.T, Dest.Temp0.G, OpBD.SrcRGB);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedBluePremul(ref UcodeAssembler asm)
+ {
+ asm.Mov(CC.T, Dest.Temp0, OpBD.DstRGB);
+ asm.Mov(CC.T, Dest.Temp0.B, OpBD.SrcRGB);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHslHuePremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp2, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.Temp2.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp2);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHslSaturationPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp1, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.Temp1.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp1);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHslColorPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp2, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp2, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHslLuminosityPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp2, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp2.BBB, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp1, OpBD.Temp1, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp2);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp1, OpBD.Temp2);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp2);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp1, OpBD.Temp2, OpAC.Temp2, OpBD.Temp2);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp2);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointSrcPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointDstPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.Temp1, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointSrcOverPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp2);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointDstOverPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp1);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointSrcInPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Sub(CC.T, Dest.Temp1.RToA, OpBD.DstAAA, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointDstInPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.Temp1, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Sub(CC.T, Dest.Temp1.RToA, OpBD.DstAAA, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointSrcOutPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointDstOutPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointSrcAtopPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointDstAtopPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.Temp1, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointXorPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ asm.Min(CC.T, Dest.Temp1, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Add(CC.T, Dest.Temp1.RToA, OpBD.Temp1, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointPlusPremul(ref UcodeAssembler asm)
+ {
+ asm.Add(CC.T, Dest.Temp0, OpBD.DstRGB, OpBD.SrcRGB);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointMultiplyPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointScreenPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointOverlayPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp2, OpBD.Temp1, OpAC.Temp2, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointDarkenPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointLightenPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Max(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointColorDodgePremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.Temp0);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp1, OpBD.ConstantZero);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantZero);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointColorBurnPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.Temp2, OpBD.ConstantZero);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.Temp2);
+ asm.Mmsub(CC.GT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Max(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHardLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.Temp2, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp2, OpBD.Temp1, OpAC.Temp2, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointSoftLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(4, 0.25f, 0.25f, 0.25f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(0, 0.2605f, 0.2605f, 0.2605f);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(1, -0.7817f, -0.7817f, -0.7817f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(2, 0.3022f, 0.3022f, 0.3022f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(3, 0.2192f, 0.2192f, 0.2192f);
+ asm.Add(CC.GT, Dest.Temp0, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(5, 16f, 16f, 16f);
+ asm.Mul(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(6, 12f, 12f, 12f);
+ asm.Mmsub(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(7, 3f, 3f, 3f);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mmsub(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.ConstantOne, OpAC.Temp1, OpBD.Temp1);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointDifferencePremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.Temp2, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointExclusionPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.Temp1);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointInvertPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp0, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointInvertRGBPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.ConstantOne, OpAC.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp0, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointLinearDodgePremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointLinearBurnPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointVividLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantRGB);
+ asm.Sub(CC.GE, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Add(CC.GE, Dest.PBR, OpBD.PBR, OpBD.PBR);
+ asm.Rcp(CC.GE, Dest.PBR, OpAC.PBR);
+ asm.Mul(CC.GE, Dest.PBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GE, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Add(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Rcp(CC.LT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantZero);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantOne);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointLinearLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 2f, 2f, 2f);
+ asm.Madd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointPinLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.Temp0, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero);
+ asm.Add(CC.LE, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Min(CC.LE, Dest.Temp0, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHardMixPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mul(CC.LT, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHslHuePremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp2, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.Temp2.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp2);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHslSaturationPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp1, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.Temp1.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp1);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHslColorPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp2, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp2, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHslLuminosityPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp2, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp2.BBB, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp1, OpBD.Temp1, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp2);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp1, OpBD.Temp2);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp2);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp1, OpBD.Temp2, OpAC.Temp2, OpBD.Temp2);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp2);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenConjointSrcPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointDstPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointSrcOverPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp2, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointDstOverPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointSrcInPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MinimumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointDstInPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MinimumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointSrcOutPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenConjointDstOutPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenConjointSrcAtopPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointDstAtopPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointXorPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ asm.Sub(CC.T, Dest.Temp1.CC, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Sub(CC.LT, Dest.Temp1, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mov(CC.T, Dest.Temp1.RToA, OpBD.Temp1);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenConjointMultiplyPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointScreenPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointOverlayPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp2, OpBD.Temp1, OpAC.Temp2, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointDarkenPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointLightenPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Max(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointColorDodgePremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.Temp0);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp1, OpBD.ConstantZero);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointColorBurnPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.Temp2, OpBD.ConstantZero);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.Temp2);
+ asm.Mmsub(CC.GT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Max(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHardLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.Temp2, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp2, OpBD.Temp1, OpAC.Temp2, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointSoftLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(4, 0.25f, 0.25f, 0.25f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(0, 0.2605f, 0.2605f, 0.2605f);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(1, -0.7817f, -0.7817f, -0.7817f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(2, 0.3022f, 0.3022f, 0.3022f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(3, 0.2192f, 0.2192f, 0.2192f);
+ asm.Add(CC.GT, Dest.Temp0, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(5, 16f, 16f, 16f);
+ asm.Mul(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(6, 12f, 12f, 12f);
+ asm.Mmsub(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(7, 3f, 3f, 3f);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mmsub(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.ConstantOne, OpAC.Temp1, OpBD.Temp1);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointDifferencePremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.Temp2, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointExclusionPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.Temp1);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointInvertPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointInvertRGBPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.ConstantOne, OpAC.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointLinearDodgePremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointLinearBurnPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointVividLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantRGB);
+ asm.Sub(CC.GE, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Add(CC.GE, Dest.PBR, OpBD.PBR, OpBD.PBR);
+ asm.Rcp(CC.GE, Dest.PBR, OpAC.PBR);
+ asm.Mul(CC.GE, Dest.PBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GE, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Add(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Rcp(CC.LT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantZero);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantOne);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointLinearLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 2f, 2f, 2f);
+ asm.Madd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointPinLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.Temp0, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero);
+ asm.Add(CC.LE, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Min(CC.LE, Dest.Temp0, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHardMixPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mul(CC.LT, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHslHuePremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp2, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.Temp2.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp2);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHslSaturationPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp1, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.Temp1.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp1);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHslColorPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp2, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp2, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHslLuminosityPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp2, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp2.BBB, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp1, OpBD.Temp1, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp2);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp1, OpBD.Temp2);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp2);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp1, OpBD.Temp2, OpAC.Temp2, OpBD.Temp2);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp2);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedDstOver(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedSrcIn(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.PBR, OpBD.DstAAA);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.DstAlphaGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedSrcOut(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.PBR, OpBD.OneMinusDstAAA);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneMinusDstAlphaGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedSrcAtop(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.PBR, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.OneMinusSrcAAA, OpAC.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedDstAtop(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedXor(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.PBR, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.OneMinusSrcAAA, OpAC.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneMinusDstAlphaGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedPlusClamped(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Add(CC.T, Dest.PBR, OpBD.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedPlusClampedAlpha(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedPlusDarker(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne);
+ asm.Add(CC.T, Dest.PBR, OpBD.PBR, OpBD.Temp2);
+ asm.Add(CC.T, Dest.PBR, OpBD.PBR, OpBD.DstRGB);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.SrcAAA);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedMultiply(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.PBR, OpBD.DstRGB);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedScreen(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.PBR, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.DstRGB);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedOverlay(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.SrcRGB, OpBD.Temp1, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedDarken(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.PBR, OpBD.DstAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Min(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedLighten(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.PBR, OpBD.DstAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedColorDodge(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.PBR);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.SrcAAA);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.DstRGB);
+ asm.Min(CC.GT, Dest.PBR, OpAC.DstAAA, OpBD.PBR);
+ asm.Mul(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.SrcAAA);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.DstRGB, OpBD.ConstantZero);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedColorBurn(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.DstAAA, OpBD.SrcAAA, OpAC.SrcAAA, OpBD.DstRGB);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.Temp2);
+ asm.Mul(CC.T, Dest.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.SrcAAA, OpBD.DstAAA, OpAC.SrcAAA, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantZero);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.DstAAA, OpBD.DstRGB);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHardLight(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.SrcRGB, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.SrcRGB, OpBD.Temp1, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedSoftLight(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(4, 0.25f, 0.25f, 0.25f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(0, 0.2605f, 0.2605f, 0.2605f);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(1, -0.7817f, -0.7817f, -0.7817f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(2, 0.3022f, 0.3022f, 0.3022f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(3, 0.2192f, 0.2192f, 0.2192f);
+ asm.Add(CC.GT, Dest.Temp0, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(5, 16f, 16f, 16f);
+ asm.Mul(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(6, 12f, 12f, 12f);
+ asm.Mmsub(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(7, 3f, 3f, 3f);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mmsub(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.ConstantOne, OpAC.Temp1, OpBD.Temp1);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedDifference(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.SrcRGB);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.SrcRGB, OpBD.Temp1);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedMinus(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Sub(CC.T, Dest.Temp0, OpBD.DstRGB, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.ReverseSubtractGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedMinusClamped(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstRGB, OpBD.PBR);
+ asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedExclusion(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.PBR, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.DstRGB);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.DstRGB);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedContrast(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.SetConstant(0, 2f, 2f, 2f);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.ConstantRGB, OpAC.DstAAA, OpBD.ConstantOne);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.Temp2, OpBD.ConstantRGB, OpAC.SrcAAA, OpBD.ConstantOne);
+ asm.Mul(CC.T, Dest.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.PBR, OpBD.DstAAA);
+ asm.SetConstant(1, 0.5f, 0.5f, 0.5f);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantRGB);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedInvertRGB(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.DstAAA, OpAC.PBR, OpBD.DstRGB);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.OneMinusSrcAAA, OpAC.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedLinearDodge(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mmadd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedLinearBurn(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.PBR, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedVividLight(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantRGB);
+ asm.Sub(CC.GE, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Add(CC.GE, Dest.PBR, OpBD.PBR, OpBD.PBR);
+ asm.Rcp(CC.GE, Dest.PBR, OpAC.PBR);
+ asm.Mul(CC.GE, Dest.PBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GE, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Add(CC.LT, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Rcp(CC.LT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantZero);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantOne);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedLinearLight(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 2f, 2f, 2f);
+ asm.Madd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedPinLight(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.Temp0, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero);
+ asm.Add(CC.LE, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Min(CC.LE, Dest.Temp0, OpAC.PBR, OpBD.Temp1);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHardMix(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mul(CC.LT, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedRed(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.DstRGB);
+ asm.Mov(CC.T, Dest.Temp0.R, OpBD.Temp2);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedGreen(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.DstRGB);
+ asm.Mov(CC.T, Dest.Temp0.G, OpBD.Temp2);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedBlue(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.DstRGB);
+ asm.Mov(CC.T, Dest.Temp0.B, OpBD.Temp2);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHslHue(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.SrcRGB, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.Temp2.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp2);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.PBR, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHslSaturation(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp1, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.Temp1.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp1);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.PBR, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHslColor(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.SrcRGB, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.SrcRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.PBR, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHslLuminosity(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.SrcRGB, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp2.BBB, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp1, OpBD.Temp1, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp2);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp1, OpBD.Temp2);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp2);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp1, OpBD.Temp2, OpAC.Temp2, OpBD.Temp2);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp2);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.PBR, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointSrc(ref UcodeAssembler asm)
+ {
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointSrcOver(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.SrcRGB);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointDstOver(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp1);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointSrcIn(ref UcodeAssembler asm)
+ {
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Sub(CC.T, Dest.Temp1.RToA, OpBD.DstAAA, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointSrcOut(ref UcodeAssembler asm)
+ {
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointSrcAtop(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointDstAtop(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.Temp1, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointXor(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ asm.Min(CC.T, Dest.Temp1, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Add(CC.T, Dest.Temp1.RToA, OpBD.Temp1, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointPlus(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.DstRGB, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointMultiply(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointScreen(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointOverlay(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.SrcRGB, OpBD.Temp1, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointDarken(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointLighten(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Max(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointColorDodge(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.Temp0);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp1, OpBD.ConstantZero);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantZero);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointColorBurn(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.SrcRGB, OpBD.ConstantZero);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.SrcRGB);
+ asm.Mmsub(CC.GT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Max(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHardLight(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.SrcRGB, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.SrcRGB, OpBD.Temp1, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointSoftLight(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(4, 0.25f, 0.25f, 0.25f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(0, 0.2605f, 0.2605f, 0.2605f);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(1, -0.7817f, -0.7817f, -0.7817f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(2, 0.3022f, 0.3022f, 0.3022f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(3, 0.2192f, 0.2192f, 0.2192f);
+ asm.Add(CC.GT, Dest.Temp0, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(5, 16f, 16f, 16f);
+ asm.Mul(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(6, 12f, 12f, 12f);
+ asm.Mmsub(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(7, 3f, 3f, 3f);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mmsub(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.ConstantOne, OpAC.Temp1, OpBD.Temp1);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointDifference(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.SrcRGB);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.SrcRGB, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointExclusion(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointInvertRGB(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp0, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointLinearDodge(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointLinearBurn(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointVividLight(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantRGB);
+ asm.Sub(CC.GE, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Add(CC.GE, Dest.PBR, OpBD.PBR, OpBD.PBR);
+ asm.Rcp(CC.GE, Dest.PBR, OpAC.PBR);
+ asm.Mul(CC.GE, Dest.PBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GE, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Add(CC.LT, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Rcp(CC.LT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantZero);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantOne);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointLinearLight(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 2f, 2f, 2f);
+ asm.Madd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointPinLight(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.Temp0, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero);
+ asm.Add(CC.LE, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Min(CC.LE, Dest.Temp0, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHardMix(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mul(CC.LT, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHslHue(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.SrcRGB, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.Temp2.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp2);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHslSaturation(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp1, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.Temp1.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp1);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHslColor(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.SrcRGB, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.SrcRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHslLuminosity(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.SrcRGB, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp2.BBB, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp1, OpBD.Temp1, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp2);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp1, OpBD.Temp2);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp2);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp1, OpBD.Temp2, OpAC.Temp2, OpBD.Temp2);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp2);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenConjointSrc(ref UcodeAssembler asm)
+ {
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointSrcOver(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointDstOver(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointSrcIn(ref UcodeAssembler asm)
+ {
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MinimumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointSrcOut(ref UcodeAssembler asm)
+ {
+ asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenConjointSrcAtop(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointDstAtop(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointXor(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ asm.Sub(CC.T, Dest.Temp1.CC, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Sub(CC.LT, Dest.Temp1, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mov(CC.T, Dest.Temp1.RToA, OpBD.Temp1);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenConjointMultiply(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointScreen(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointOverlay(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.SrcRGB, OpBD.Temp1, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointDarken(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointLighten(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Max(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointColorDodge(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.Temp0);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp1, OpBD.ConstantZero);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointColorBurn(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.SrcRGB, OpBD.ConstantZero);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.SrcRGB);
+ asm.Mmsub(CC.GT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Max(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHardLight(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.SrcRGB, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.SrcRGB, OpBD.Temp1, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointSoftLight(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(4, 0.25f, 0.25f, 0.25f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(0, 0.2605f, 0.2605f, 0.2605f);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(1, -0.7817f, -0.7817f, -0.7817f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(2, 0.3022f, 0.3022f, 0.3022f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(3, 0.2192f, 0.2192f, 0.2192f);
+ asm.Add(CC.GT, Dest.Temp0, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(5, 16f, 16f, 16f);
+ asm.Mul(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(6, 12f, 12f, 12f);
+ asm.Mmsub(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(7, 3f, 3f, 3f);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mmsub(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.ConstantOne, OpAC.Temp1, OpBD.Temp1);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointDifference(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.SrcRGB);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.SrcRGB, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointExclusion(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointInvertRGB(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointLinearDodge(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointLinearBurn(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointVividLight(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantRGB);
+ asm.Sub(CC.GE, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Add(CC.GE, Dest.PBR, OpBD.PBR, OpBD.PBR);
+ asm.Rcp(CC.GE, Dest.PBR, OpAC.PBR);
+ asm.Mul(CC.GE, Dest.PBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GE, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Add(CC.LT, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Rcp(CC.LT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantZero);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantOne);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointLinearLight(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 2f, 2f, 2f);
+ asm.Madd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointPinLight(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.Temp0, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero);
+ asm.Add(CC.LE, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Min(CC.LE, Dest.Temp0, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHardMix(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mul(CC.LT, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHslHue(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.SrcRGB, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.Temp2.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp2);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHslSaturation(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp1, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.Temp1.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp1);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHslColor(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.SrcRGB, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.SrcRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHslLuminosity(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.SrcRGB, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp2.BBB, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp1, OpBD.Temp1, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp2);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp1, OpBD.Temp2);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp2);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp1, OpBD.Temp2, OpAC.Temp2, OpBD.Temp2);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp2);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendManager.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendManager.cs
new file mode 100644
index 00000000..8072c6af
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendManager.cs
@@ -0,0 +1,115 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.GAL;
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Threed.Blender
+{
+ /// <summary>
+ /// Advanced blend manager.
+ /// </summary>
+ class AdvancedBlendManager
+ {
+ private const int InstructionRamSize = 128;
+ private const int InstructionRamSizeMask = InstructionRamSize - 1;
+
+ private readonly DeviceStateWithShadow<ThreedClassState> _state;
+
+ private readonly uint[] _code;
+ private int _ip;
+
+ /// <summary>
+ /// Creates a new instance of the advanced blend manager.
+ /// </summary>
+ /// <param name="state">GPU state of the channel owning this manager</param>
+ public AdvancedBlendManager(DeviceStateWithShadow<ThreedClassState> state)
+ {
+ _state = state;
+ _code = new uint[InstructionRamSize];
+ }
+
+ /// <summary>
+ /// Sets the start offset of the blend microcode in memory.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void LoadBlendUcodeStart(int argument)
+ {
+ _ip = argument;
+ }
+
+ /// <summary>
+ /// Pushes one word of blend microcode.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void LoadBlendUcodeInstruction(int argument)
+ {
+ _code[_ip++ & InstructionRamSizeMask] = (uint)argument;
+ }
+
+ /// <summary>
+ /// Tries to identify the current advanced blend function being used,
+ /// given the current state and microcode that was uploaded.
+ /// </summary>
+ /// <param name="descriptor">Advanced blend descriptor</param>
+ /// <returns>True if the function was found, false otherwise</returns>
+ public bool TryGetAdvancedBlend(out AdvancedBlendDescriptor descriptor)
+ {
+ Span<uint> currentCode = new Span<uint>(_code);
+ byte codeLength = (byte)_state.State.BlendUcodeSize;
+
+ if (currentCode.Length > codeLength)
+ {
+ currentCode = currentCode.Slice(0, codeLength);
+ }
+
+ Hash128 hash = XXHash128.ComputeHash(MemoryMarshal.Cast<uint, byte>(currentCode));
+
+ descriptor = default;
+
+ if (!AdvancedBlendPreGenTable.Entries.TryGetValue(hash, out var entry))
+ {
+ return false;
+ }
+
+ if (entry.Constants != null)
+ {
+ bool constantsMatch = true;
+
+ for (int i = 0; i < entry.Constants.Length; i++)
+ {
+ RgbFloat constant = entry.Constants[i];
+ RgbHalf constant2 = _state.State.BlendUcodeConstants[i];
+
+ if ((Half)constant.R != constant2.UnpackR() ||
+ (Half)constant.G != constant2.UnpackG() ||
+ (Half)constant.B != constant2.UnpackB())
+ {
+ constantsMatch = false;
+ break;
+ }
+ }
+
+ if (!constantsMatch)
+ {
+ return false;
+ }
+ }
+
+ if (entry.Alpha.Enable != _state.State.BlendUcodeEnable)
+ {
+ return false;
+ }
+
+ if (entry.Alpha.Enable == BlendUcodeEnable.EnableRGBA &&
+ (entry.Alpha.AlphaOp != _state.State.BlendStateCommon.AlphaOp ||
+ entry.Alpha.AlphaSrcFactor != _state.State.BlendStateCommon.AlphaSrcFactor ||
+ entry.Alpha.AlphaDstFactor != _state.State.BlendStateCommon.AlphaDstFactor))
+ {
+ return false;
+ }
+
+ descriptor = new AdvancedBlendDescriptor(entry.Op, entry.Overlap, entry.SrcPreMultiplied);
+ return true;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendPreGenTable.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendPreGenTable.cs
new file mode 100644
index 00000000..d35d8abf
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendPreGenTable.cs
@@ -0,0 +1,273 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.GAL;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Threed.Blender
+{
+ /// <summary>
+ /// Advanced blend function entry.
+ /// </summary>
+ struct AdvancedBlendEntry
+ {
+ /// <summary>
+ /// Advanced blend operation.
+ /// </summary>
+ public AdvancedBlendOp Op { get; }
+
+ /// <summary>
+ /// Advanced blend overlap mode.
+ /// </summary>
+ public AdvancedBlendOverlap Overlap { get; }
+
+ /// <summary>
+ /// Whenever the source input is pre-multiplied.
+ /// </summary>
+ public bool SrcPreMultiplied { get; }
+
+ /// <summary>
+ /// Constants used by the microcode.
+ /// </summary>
+ public RgbFloat[] Constants { get; }
+
+ /// <summary>
+ /// Fixed function alpha state.
+ /// </summary>
+ public FixedFunctionAlpha Alpha { get; }
+
+ /// <summary>
+ /// Creates a new advanced blend function entry.
+ /// </summary>
+ /// <param name="op">Advanced blend operation</param>
+ /// <param name="overlap">Advanced blend overlap mode</param>
+ /// <param name="srcPreMultiplied">Whenever the source input is pre-multiplied</param>
+ /// <param name="constants">Constants used by the microcode</param>
+ /// <param name="alpha">Fixed function alpha state</param>
+ public AdvancedBlendEntry(
+ AdvancedBlendOp op,
+ AdvancedBlendOverlap overlap,
+ bool srcPreMultiplied,
+ RgbFloat[] constants,
+ FixedFunctionAlpha alpha)
+ {
+ Op = op;
+ Overlap = overlap;
+ SrcPreMultiplied = srcPreMultiplied;
+ Constants = constants;
+ Alpha = alpha;
+ }
+ }
+
+ /// <summary>
+ /// Pre-generated hash table with advanced blend functions used by the driver.
+ /// </summary>
+ static class AdvancedBlendPreGenTable
+ {
+ /// <summary>
+ /// Advanced blend functions dictionary.
+ /// </summary>
+ public static readonly IReadOnlyDictionary<Hash128, AdvancedBlendEntry> Entries = new Dictionary<Hash128, AdvancedBlendEntry>()
+ {
+ { new Hash128(0x19ECF57B83DE31F7, 0x5BAE759246F264C0), new AdvancedBlendEntry(AdvancedBlendOp.PlusClamped, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xDE1B14A356A1A9ED, 0x59D803593C607C1D), new AdvancedBlendEntry(AdvancedBlendOp.PlusClampedAlpha, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x1A3C3A6D32DEC368, 0xBCAE519EC6AAA045), new AdvancedBlendEntry(AdvancedBlendOp.PlusDarker, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x6FD380261A63B240, 0x17C3B335DBB9E3DB), new AdvancedBlendEntry(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x1D39164823D3A2D1, 0xC45350959CE1C8FB), new AdvancedBlendEntry(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x18DF09FF53B129FE, 0xC02EDA33C36019F6), new AdvancedBlendEntry(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x5973E583271EBF06, 0x711497D75D1272E0), new AdvancedBlendEntry(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x4759E0E5DA54D5E8, 0x1FDD57C0C38AFA1F), new AdvancedBlendEntry(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x337684D43CCE97FA, 0x0139E30CC529E1C9), new AdvancedBlendEntry(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xDA59E85D8428992D, 0x1D3D7C64C9EF0132), new AdvancedBlendEntry(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x9455B949298CE805, 0xE73D3301518BE98A), new AdvancedBlendEntry(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xBDD3B4DEDBE336AA, 0xBFA4DCD50D535DEE), new AdvancedBlendEntry(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.2605f, 0.2605f, 0.2605f), new RgbFloat(-0.7817f, -0.7817f, -0.7817f), new RgbFloat(0.3022f, 0.3022f, 0.3022f), new RgbFloat(0.2192f, 0.2192f, 0.2192f), new RgbFloat(0.25f, 0.25f, 0.25f), new RgbFloat(16f, 16f, 16f), new RgbFloat(12f, 12f, 12f), new RgbFloat(3f, 3f, 3f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x22D4E970A028649A, 0x4F3FCB055FCED965), new AdvancedBlendEntry(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xA346A91311D72114, 0x151A27A3FB0A1904), new AdvancedBlendEntry(AdvancedBlendOp.Minus, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.ReverseSubtractGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x8A307241061FACD6, 0xA39D1826440B8EE7), new AdvancedBlendEntry(AdvancedBlendOp.MinusClamped, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xB3BE569485EFFFE0, 0x0BA4E269B3CFB165), new AdvancedBlendEntry(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x36FCA3277DC11822, 0x2BC0F6CAC2029672), new AdvancedBlendEntry(AdvancedBlendOp.Contrast, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(2f, 2f, 2f), new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x4A6226AF2DE9BD7F, 0xEB890D7DA716F73A), new AdvancedBlendEntry(AdvancedBlendOp.Invert, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0xF364CAA94E160FEB, 0xBF364512C72A3797), new AdvancedBlendEntry(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x6BF791AB4AC19C87, 0x6FA17A994EA0FCDE), new AdvancedBlendEntry(AdvancedBlendOp.InvertOvg, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x053C75A0AE0BB222, 0x03C791FEEB59754C), new AdvancedBlendEntry(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x25762AB40B6CBDE9, 0x595E9A968AC4F01C), new AdvancedBlendEntry(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xC2D05E2DBE16955D, 0xB8659C7A3FCFA7CE), new AdvancedBlendEntry(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x223F220B8F74CBFB, 0xD3DD19D7C39209A5), new AdvancedBlendEntry(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(2f, 2f, 2f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xD0DAE57A9F1FE78A, 0x353796BCFB8CE30B), new AdvancedBlendEntry(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x601C8CBEC07FF8FF, 0xB8E22882360E8695), new AdvancedBlendEntry(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x3A55B7B78C76A7A8, 0x206F503B2D9FFEAA), new AdvancedBlendEntry(AdvancedBlendOp.Red, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x80BC65C7831388E5, 0xC652457B2C766AEC), new AdvancedBlendEntry(AdvancedBlendOp.Green, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x3D3A912E5833EE13, 0x307895951349EE33), new AdvancedBlendEntry(AdvancedBlendOp.Blue, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x289105BE92E81803, 0xFD8F1F03D15C53B4), new AdvancedBlendEntry(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x007AE3BD140764EB, 0x0EE05A0D2E80BBAE), new AdvancedBlendEntry(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x77F7EE0DB3FDDB96, 0xDEA47C881306DB3E), new AdvancedBlendEntry(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x66F4E9A7D73CA157, 0x1486058A177DB11C), new AdvancedBlendEntry(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x593E9F331612D618, 0x9D217BEFA4EB919A), new AdvancedBlendEntry(AdvancedBlendOp.Src, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) },
+ { new Hash128(0x0A5194C5E6891106, 0xDD8EC6586106557C), new AdvancedBlendEntry(AdvancedBlendOp.Dst, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x8D77173D5E06E916, 0x06AB190E7D10F4D4), new AdvancedBlendEntry(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x655B4EBC148981DA, 0x455999EF2B9BD28A), new AdvancedBlendEntry(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x98F5437D5F518929, 0xBFF4A6E83183DB63), new AdvancedBlendEntry(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x6ADDEFE3B9CEF2FD, 0xB6F6272AFECB1AAB), new AdvancedBlendEntry(AdvancedBlendOp.DstIn, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x80953F0953BF05B1, 0xD59ABFAA34F8196F), new AdvancedBlendEntry(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xA401D9AA2A39C121, 0xFC0C8005C22AD7E3), new AdvancedBlendEntry(AdvancedBlendOp.DstOut, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x06274FB7CA9CDD22, 0x6CE8188B1A9AB6EF), new AdvancedBlendEntry(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x0B079BE7F7F70817, 0xB72E7736CA51E321), new AdvancedBlendEntry(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) },
+ { new Hash128(0x66215C99403CEDDE, 0x900B733D62204C48), new AdvancedBlendEntry(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x12DEF2AD900CAD6C, 0x58CF5CC3004910DF), new AdvancedBlendEntry(AdvancedBlendOp.Plus, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x272BA3A49F64DAE4, 0xAC70B96C00A99EAF), new AdvancedBlendEntry(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x206C34AAA7D3F545, 0xDA4B30CACAA483A0), new AdvancedBlendEntry(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x3D93494920D257BE, 0xDCC573BE1F5F4449), new AdvancedBlendEntry(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x0D7417D80191107B, 0xEAF40547827E005F), new AdvancedBlendEntry(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xEC1B03E8C883F9C9, 0x2D3CA044C58C01B4), new AdvancedBlendEntry(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x58A19A0135D68B31, 0x82F35B97AED068E5), new AdvancedBlendEntry(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x20489F9AB36CC0E3, 0x20499874219E35EE), new AdvancedBlendEntry(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xBB176935E5EE05BF, 0x95B26D4D30EA7A14), new AdvancedBlendEntry(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x5FF9393C908ACFED, 0x068B0BD875773ABF), new AdvancedBlendEntry(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.2605f, 0.2605f, 0.2605f), new RgbFloat(-0.7817f, -0.7817f, -0.7817f), new RgbFloat(0.3022f, 0.3022f, 0.3022f), new RgbFloat(0.2192f, 0.2192f, 0.2192f), new RgbFloat(0.25f, 0.25f, 0.25f), new RgbFloat(16f, 16f, 16f), new RgbFloat(12f, 12f, 12f), new RgbFloat(3f, 3f, 3f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x03181F8711C9802C, 0x6B02C7C6B224FE7B), new AdvancedBlendEntry(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x2EE2209021F6B977, 0xF3AFA1491B8B89FC), new AdvancedBlendEntry(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xD8BA4DD2EDE4DC9E, 0x01006114977CF715), new AdvancedBlendEntry(AdvancedBlendOp.Invert, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0xD156B99835A2D8ED, 0x2D0BEE9E135EA7A7), new AdvancedBlendEntry(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x20CE8C898ED4BE27, 0x1514900B6F5E8F66), new AdvancedBlendEntry(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xCDE5F743820BA2D9, 0x917845FE2ECB083D), new AdvancedBlendEntry(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xEB03DF4A0C1D14CD, 0xBAE2E831C6E8FFE4), new AdvancedBlendEntry(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x1DC9E49AABC779AC, 0x4053A1441EB713D3), new AdvancedBlendEntry(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(2f, 2f, 2f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xFBDEF776248F7B3E, 0xE05EEFD65AC47CB7), new AdvancedBlendEntry(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x415A1A48E03AA6E7, 0x046D7EE33CA46B9A), new AdvancedBlendEntry(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x59A6901EC9BB2041, 0x2F3E19CE5EEC3EBE), new AdvancedBlendEntry(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x044B2B6E105221DA, 0x3089BBC033F994AF), new AdvancedBlendEntry(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x374A5A24AA8E6CC5, 0x29930FAA6215FA2B), new AdvancedBlendEntry(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x30CD0F7AF0CF26F9, 0x06CCA6744DE7DCF5), new AdvancedBlendEntry(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x1A6C9A1F6FE494A5, 0xA0CFAF77617E54DD), new AdvancedBlendEntry(AdvancedBlendOp.Src, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) },
+ { new Hash128(0x081AF6DAAB1C8717, 0xBFEDCE59AE3DC9AC), new AdvancedBlendEntry(AdvancedBlendOp.Dst, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x3518E44573AB68BA, 0xC96EE71AF9F8F546), new AdvancedBlendEntry(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0xF89E81FE8D73C96F, 0x4583A04577A0F21C), new AdvancedBlendEntry(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0xDF4026421CB61119, 0x14115A1F5139AFC7), new AdvancedBlendEntry(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MinimumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x91A20262C3E3A695, 0x0B3A102BFCDC6B1C), new AdvancedBlendEntry(AdvancedBlendOp.DstIn, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MinimumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x44F4C7CCFEB9EBFA, 0xF68394E6D56E5C2F), new AdvancedBlendEntry(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xB89F17C7021E9760, 0x430357EE0F7188EF), new AdvancedBlendEntry(AdvancedBlendOp.DstOut, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xDA2D20EA4242B8A0, 0x0D1EC05B72E3838F), new AdvancedBlendEntry(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x855DFEE1208D11B9, 0x77C6E3DDCFE30B85), new AdvancedBlendEntry(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) },
+ { new Hash128(0x9B3808439683FD58, 0x123DCBE4705AB25E), new AdvancedBlendEntry(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xA42CF045C248A00A, 0x0C6C63C24EA0B0C1), new AdvancedBlendEntry(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x320A83B6D00C8059, 0x796EDAB3EB7314BC), new AdvancedBlendEntry(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x45253AC9ABFFC613, 0x8F92EA70195FB573), new AdvancedBlendEntry(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x1A5D263B588274B6, 0x167D305F6C794179), new AdvancedBlendEntry(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x709C1A837FE966AC, 0x75D8CE49E8A78EDB), new AdvancedBlendEntry(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x8265C26F85E4145F, 0x932E6CCBF37CB600), new AdvancedBlendEntry(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x3F252B3FEF983F27, 0x9370D7EEFEFA1A9E), new AdvancedBlendEntry(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x66A334A4AEA41078, 0xCB52254E1E395231), new AdvancedBlendEntry(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0xFDD05C53B25F0035, 0xB7E3ECEE166C222F), new AdvancedBlendEntry(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.2605f, 0.2605f, 0.2605f), new RgbFloat(-0.7817f, -0.7817f, -0.7817f), new RgbFloat(0.3022f, 0.3022f, 0.3022f), new RgbFloat(0.2192f, 0.2192f, 0.2192f), new RgbFloat(0.25f, 0.25f, 0.25f), new RgbFloat(16f, 16f, 16f), new RgbFloat(12f, 12f, 12f), new RgbFloat(3f, 3f, 3f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x25D932A77FFED81A, 0xA50D797B0FCA94E8), new AdvancedBlendEntry(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x4A953B6F5F7D341C, 0xDC05CFB50DDB5DC1), new AdvancedBlendEntry(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x838CB660C4F41F6D, 0x9E7D958697543495), new AdvancedBlendEntry(AdvancedBlendOp.Invert, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x4DF6EC1348A8F797, 0xA128E0CD69DB5A64), new AdvancedBlendEntry(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x178CDFAB9A015295, 0x2BF40EA72E596D57), new AdvancedBlendEntry(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x338FC99050E56AFD, 0x2AF41CF82BE602BF), new AdvancedBlendEntry(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x62E02ED60D1E978E, 0xBF726B3E68C11E4D), new AdvancedBlendEntry(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0xFBAF92DD4C101502, 0x7AF2EDA6596B819D), new AdvancedBlendEntry(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(2f, 2f, 2f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x0EF1241F65D4B50A, 0xE8D85DFA6AEDDB84), new AdvancedBlendEntry(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x77FE024B5C9D4A18, 0xF19D48A932F6860F), new AdvancedBlendEntry(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x9C88CBFA2E09D857, 0x0A0361704CBEEE1D), new AdvancedBlendEntry(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x5B94127FA190E640, 0x8D1FEFF837A91268), new AdvancedBlendEntry(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0xB9C9105B7E063DDB, 0xF6A70E1D511B96FD), new AdvancedBlendEntry(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0xF0751AAE332B3ED1, 0xC40146F5C83C2533), new AdvancedBlendEntry(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x579EB12F595F75AD, 0x151BF0504703B81B), new AdvancedBlendEntry(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xF9CA152C03AC8C62, 0x1581336205E5CF47), new AdvancedBlendEntry(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.DstAlphaGl, BlendFactor.ZeroGl)) },
+ { new Hash128(0x98ACD8BB5E195D0F, 0x91F937672BE899F0), new AdvancedBlendEntry(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneMinusDstAlphaGl, BlendFactor.ZeroGl)) },
+ { new Hash128(0xBF97F10FC301F44C, 0x75721789F0D48548), new AdvancedBlendEntry(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x1B982263B8B08A10, 0x3350C76E2E1B27DF), new AdvancedBlendEntry(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) },
+ { new Hash128(0xFF20AC79F64EDED8, 0xAF9025B2D97B9273), new AdvancedBlendEntry(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneMinusDstAlphaGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x9FFD986600FB112F, 0x384FDDF4E060139A), new AdvancedBlendEntry(AdvancedBlendOp.PlusClamped, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x0425E40B5B8B3B52, 0x5880CBED7CAB631C), new AdvancedBlendEntry(AdvancedBlendOp.PlusClampedAlpha, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x16DAC8593F28623A, 0x233DBC82325B8AED), new AdvancedBlendEntry(AdvancedBlendOp.PlusDarker, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xB37E5F234B9F0948, 0xD5F957A2ECD98FD6), new AdvancedBlendEntry(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xCA0FDADD1D20DBE3, 0x1A5C15CCBF1AC538), new AdvancedBlendEntry(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x1C48304D73A9DF3A, 0x891DB93FA36E3450), new AdvancedBlendEntry(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x53200F2279B7FA39, 0x051C2462EBF6789C), new AdvancedBlendEntry(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xB88BFB80714DCD5C, 0xEBD6938D744E6A41), new AdvancedBlendEntry(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xE33DC2A25FC1A976, 0x08B3DBB1F3027D45), new AdvancedBlendEntry(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xCE97E71615370316, 0xE131AE49D3A4D62B), new AdvancedBlendEntry(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xE059FD265149B256, 0x94AF817AC348F61F), new AdvancedBlendEntry(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x16D31333D477E231, 0x9A98AAC84F72CC62), new AdvancedBlendEntry(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.2605f, 0.2605f, 0.2605f), new RgbFloat(-0.7817f, -0.7817f, -0.7817f), new RgbFloat(0.3022f, 0.3022f, 0.3022f), new RgbFloat(0.2192f, 0.2192f, 0.2192f), new RgbFloat(0.25f, 0.25f, 0.25f), new RgbFloat(16f, 16f, 16f), new RgbFloat(12f, 12f, 12f), new RgbFloat(3f, 3f, 3f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x47FC3B0776366D3C, 0xE96D9BD83B277874), new AdvancedBlendEntry(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x7230401E3FEA1F3B, 0xF0D15F05D3D1E309), new AdvancedBlendEntry(AdvancedBlendOp.Minus, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.ReverseSubtractGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x188212F9303742F5, 0x100C51CB96E03591), new AdvancedBlendEntry(AdvancedBlendOp.MinusClamped, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x52B755D296B44DC5, 0x4003B87275625973), new AdvancedBlendEntry(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xD873ED973ADF7EAD, 0x73E68B57D92034E7), new AdvancedBlendEntry(AdvancedBlendOp.Contrast, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(2f, 2f, 2f), new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x471F9FA34B945ACB, 0x10524D1410B3C402), new AdvancedBlendEntry(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x99F569454EA0EF32, 0x6FC70A8B3A07DC8B), new AdvancedBlendEntry(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x5AD55F950067AC7E, 0x4BA60A4FBABDD0AC), new AdvancedBlendEntry(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x03FF2C858C9C4C5B, 0xE95AE7F561FB60E9), new AdvancedBlendEntry(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x6DC0E510C7BCF9D2, 0xAE805D7CECDCB5C1), new AdvancedBlendEntry(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(2f, 2f, 2f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x44832332CED5C054, 0x2F8D5536C085B30A), new AdvancedBlendEntry(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x4AB4D387618AC51F, 0x495B46E0555F4B32), new AdvancedBlendEntry(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x99282B49405A01A8, 0xD6FA93F864F24A8E), new AdvancedBlendEntry(AdvancedBlendOp.Red, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x37B30C1064FBD23E, 0x5D068366F42317C2), new AdvancedBlendEntry(AdvancedBlendOp.Green, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x760FAE9D59E04BC2, 0xA40AD483EA01435E), new AdvancedBlendEntry(AdvancedBlendOp.Blue, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0xE786950FD9D1C6EF, 0xF9FDD5AF6451D239), new AdvancedBlendEntry(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x052458BB4788B0CA, 0x8AC58FDCA1F45EF5), new AdvancedBlendEntry(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x6AFC3837D1D31920, 0xB9D49C2FE49642C6), new AdvancedBlendEntry(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xAFC2911949317E01, 0xD5B63636F5CB3422), new AdvancedBlendEntry(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x13B46DF507CC2C53, 0x86DE26517E6BF0A7), new AdvancedBlendEntry(AdvancedBlendOp.Src, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) },
+ { new Hash128(0x5C372442474BE410, 0x79ECD3C0C496EF2E), new AdvancedBlendEntry(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x74AAB45DBF5336E9, 0x01BFC4E181DAD442), new AdvancedBlendEntry(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x43239E282A36C85C, 0x36FB65560E46AD0F), new AdvancedBlendEntry(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x1A3BA8A7583B8F7A, 0xE64E41D548033180), new AdvancedBlendEntry(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x32BBB9859E9B565D, 0x3D5CE94FE55F18B5), new AdvancedBlendEntry(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0xD947A0766AE3C0FC, 0x391E5D53E86F4ED6), new AdvancedBlendEntry(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) },
+ { new Hash128(0xBD9A7C08BDFD8CE6, 0x905407634901355E), new AdvancedBlendEntry(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x8395475BCB0D7A8C, 0x48AF5DD501D44A70), new AdvancedBlendEntry(AdvancedBlendOp.Plus, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x80AAC23FEBD4A3E5, 0xEA8C70F0B4DE52DE), new AdvancedBlendEntry(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x2F3AD1B0F1B3FD09, 0xC0EBC784BFAB8EA3), new AdvancedBlendEntry(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x52B54032F2F70BFF, 0xC941D6FDED674765), new AdvancedBlendEntry(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xCA7B86F72EC6A99B, 0x55868A131AFE359E), new AdvancedBlendEntry(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x377919B60BD133CA, 0x0FD611627664EF40), new AdvancedBlendEntry(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x9D4A0C5EE1153887, 0x7B869EBA218C589B), new AdvancedBlendEntry(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x311F2A858545D123, 0xB4D09C802480AD62), new AdvancedBlendEntry(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xCF78AA6A83AFA689, 0x9DC48B0C2182A3E1), new AdvancedBlendEntry(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xC3018CD6F1CF62D1, 0x016E32DD9087B1BB), new AdvancedBlendEntry(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.2605f, 0.2605f, 0.2605f), new RgbFloat(-0.7817f, -0.7817f, -0.7817f), new RgbFloat(0.3022f, 0.3022f, 0.3022f), new RgbFloat(0.2192f, 0.2192f, 0.2192f), new RgbFloat(0.25f, 0.25f, 0.25f), new RgbFloat(16f, 16f, 16f), new RgbFloat(12f, 12f, 12f), new RgbFloat(3f, 3f, 3f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x9CB62CE0E956EE29, 0x0FB67F503E60B3AD), new AdvancedBlendEntry(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x3589A13C16EF3BFA, 0x15B29BFC91F3BDFB), new AdvancedBlendEntry(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x3502CA5FB7529917, 0xFA51BFD0D1688071), new AdvancedBlendEntry(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x62ADC25AD6D0A923, 0x76CB6D238276D3A3), new AdvancedBlendEntry(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x09FDEB1116A9D52C, 0x85BB8627CD5C2733), new AdvancedBlendEntry(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x0709FED1B65E18EB, 0x5BC3AA4D99EC19CF), new AdvancedBlendEntry(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xB18D28AE5DE4C723, 0xE820AA2B75C9C02E), new AdvancedBlendEntry(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(2f, 2f, 2f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x6743C51621497480, 0x4B164E40858834AE), new AdvancedBlendEntry(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x63D1E181E34A2944, 0x1AE292C9D9F12819), new AdvancedBlendEntry(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x079523298250BFF6, 0xC0C793510603CDB5), new AdvancedBlendEntry(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x4C9D0A973C805EA6, 0xD1FF59AD5156B93C), new AdvancedBlendEntry(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x1E914678F3057BCD, 0xD503AE389C12D229), new AdvancedBlendEntry(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x9FDBADE5556C5311, 0x03F0CBC798FC5C94), new AdvancedBlendEntry(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xE39451534635403C, 0x606CC1CA1F452388), new AdvancedBlendEntry(AdvancedBlendOp.Src, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) },
+ { new Hash128(0x1D39F0F0A1008AA6, 0xBFDF2B97E6C3F125), new AdvancedBlendEntry(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0xDB81BED30D5BDBEA, 0xAF0B2856EB93AD2C), new AdvancedBlendEntry(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x83F69CCF1D0A79B6, 0x70D31332797430AC), new AdvancedBlendEntry(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MinimumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x7B87F807AB7A8F5C, 0x1241A2A01FB31771), new AdvancedBlendEntry(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xF557172E20D5272D, 0xC1961F8C7A5D2820), new AdvancedBlendEntry(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0xA8476B3944DBBC9B, 0x84A2F6AF97B15FDF), new AdvancedBlendEntry(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) },
+ { new Hash128(0x3259602B55414DA3, 0x72AACCC00B5A9D10), new AdvancedBlendEntry(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xC0CB8C10F36EDCD6, 0x8C2D088AD8191E1C), new AdvancedBlendEntry(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x81806C451C6255EF, 0x5AA8AC9A08941A15), new AdvancedBlendEntry(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0xE55A6537F4568198, 0xCA8735390B799B19), new AdvancedBlendEntry(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Conjoint, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x5C044BA14536DDA3, 0xBCE0123ED7D510EC), new AdvancedBlendEntry(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x6788346C405BE130, 0x372A4BB199C01F9F), new AdvancedBlendEntry(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x510EDC2A34E2856B, 0xE1727A407E294254), new AdvancedBlendEntry(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x4B7BE01BD398C7A8, 0x5BFF79BC00672C18), new AdvancedBlendEntry(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x213B43845540CFEC, 0xDA857411CF1CCFCE), new AdvancedBlendEntry(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Conjoint, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x765AFA6732E783F1, 0x8F1CABF1BC78A014), new AdvancedBlendEntry(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Conjoint, false, new[] { new RgbFloat(0.2605f, 0.2605f, 0.2605f), new RgbFloat(-0.7817f, -0.7817f, -0.7817f), new RgbFloat(0.3022f, 0.3022f, 0.3022f), new RgbFloat(0.2192f, 0.2192f, 0.2192f), new RgbFloat(0.25f, 0.25f, 0.25f), new RgbFloat(16f, 16f, 16f), new RgbFloat(12f, 12f, 12f), new RgbFloat(3f, 3f, 3f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0xA4A5DE1CC06F6CB1, 0xA0634A0011001709), new AdvancedBlendEntry(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x81F32BD8816EA796, 0x697EE86683165170), new AdvancedBlendEntry(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0xB870C209EAA5F092, 0xAF5FD923909CAA1F), new AdvancedBlendEntry(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x3649A9F5C936FB83, 0xDD7C834897AA182A), new AdvancedBlendEntry(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0xD72A2B1097A5995C, 0x3D41B2763A913654), new AdvancedBlendEntry(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x551E212B9F6C454A, 0xB0DFA05BEB3C37FA), new AdvancedBlendEntry(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Conjoint, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x681B5A313B7416BF, 0xCB1CBAEEB4D81500), new AdvancedBlendEntry(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Conjoint, false, new[] { new RgbFloat(2f, 2f, 2f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x9343A18BD4B16777, 0xEDB4AC1C8972C3A4), new AdvancedBlendEntry(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0xC960BF6D8519DE28, 0x78D8557FD405D119), new AdvancedBlendEntry(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x65A7B01FDC73A46C, 0x297E096ED5CC4D8A), new AdvancedBlendEntry(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Conjoint, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0xD9C99BA4A6CDC13B, 0x3CFF0ACEDC2EE150), new AdvancedBlendEntry(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Conjoint, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x6BC00DA6EB922BD1, 0x5FD4C11F2A685234), new AdvancedBlendEntry(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Conjoint, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x8652300E32D93050, 0x9460E7B449132371), new AdvancedBlendEntry(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Conjoint, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ };
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendUcode.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendUcode.cs
new file mode 100644
index 00000000..f06b4bf7
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendUcode.cs
@@ -0,0 +1,126 @@
+using Ryujinx.Graphics.GAL;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Threed.Blender
+{
+ /// <summary>
+ /// Fixed function alpha state used for a advanced blend function.
+ /// </summary>
+ struct FixedFunctionAlpha
+ {
+ /// <summary>
+ /// Fixed function alpha state with alpha blending disabled.
+ /// </summary>
+ public static FixedFunctionAlpha Disabled => new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, default, default, default);
+
+ /// <summary>
+ /// Individual enable bits for the RGB and alpha components.
+ /// </summary>
+ public BlendUcodeEnable Enable { get; }
+
+ /// <summary>
+ /// Alpha blend operation.
+ /// </summary>
+ public BlendOp AlphaOp { get; }
+
+ /// <summary>
+ /// Value multiplied with the blend source operand.
+ /// </summary>
+ public BlendFactor AlphaSrcFactor { get; }
+
+ /// <summary>
+ /// Value multiplied with the blend destination operand.
+ /// </summary>
+ public BlendFactor AlphaDstFactor { get; }
+
+ /// <summary>
+ /// Creates a new blend fixed function alpha state.
+ /// </summary>
+ /// <param name="enable">Individual enable bits for the RGB and alpha components</param>
+ /// <param name="alphaOp">Alpha blend operation</param>
+ /// <param name="alphaSrc">Value multiplied with the blend source operand</param>
+ /// <param name="alphaDst">Value multiplied with the blend destination operand</param>
+ public FixedFunctionAlpha(BlendUcodeEnable enable, BlendOp alphaOp, BlendFactor alphaSrc, BlendFactor alphaDst)
+ {
+ Enable = enable;
+ AlphaOp = alphaOp;
+ AlphaSrcFactor = alphaSrc;
+ AlphaDstFactor = alphaDst;
+ }
+
+ /// <summary>
+ /// Creates a new blend fixed function alpha state.
+ /// </summary>
+ /// <param name="alphaOp">Alpha blend operation</param>
+ /// <param name="alphaSrc">Value multiplied with the blend source operand</param>
+ /// <param name="alphaDst">Value multiplied with the blend destination operand</param>
+ public FixedFunctionAlpha(BlendOp alphaOp, BlendFactor alphaSrc, BlendFactor alphaDst) : this(BlendUcodeEnable.EnableRGB, alphaOp, alphaSrc, alphaDst)
+ {
+ }
+ }
+
+ /// <summary>
+ /// Blend microcode assembly function delegate.
+ /// </summary>
+ /// <param name="asm">Assembler</param>
+ /// <returns>Fixed function alpha state for the microcode</returns>
+ delegate FixedFunctionAlpha GenUcodeFunc(ref UcodeAssembler asm);
+
+ /// <summary>
+ /// Advanced blend microcode state.
+ /// </summary>
+ struct AdvancedBlendUcode
+ {
+ /// <summary>
+ /// Advanced blend operation.
+ /// </summary>
+ public AdvancedBlendOp Op { get; }
+
+ /// <summary>
+ /// Advanced blend overlap mode.
+ /// </summary>
+ public AdvancedBlendOverlap Overlap { get; }
+
+ /// <summary>
+ /// Whenever the source input is pre-multiplied.
+ /// </summary>
+ public bool SrcPreMultiplied { get; }
+
+ /// <summary>
+ /// Fixed function alpha state.
+ /// </summary>
+ public FixedFunctionAlpha Alpha { get; }
+
+ /// <summary>
+ /// Microcode.
+ /// </summary>
+ public uint[] Code { get; }
+
+ /// <summary>
+ /// Constants used by the microcode.
+ /// </summary>
+ public RgbFloat[] Constants { get; }
+
+ /// <summary>
+ /// Creates a new advanced blend state.
+ /// </summary>
+ /// <param name="op">Advanced blend operation</param>
+ /// <param name="overlap">Advanced blend overlap mode</param>
+ /// <param name="srcPreMultiplied">Whenever the source input is pre-multiplied</param>
+ /// <param name="genFunc">Function that will generate the advanced blend microcode</param>
+ public AdvancedBlendUcode(
+ AdvancedBlendOp op,
+ AdvancedBlendOverlap overlap,
+ bool srcPreMultiplied,
+ GenUcodeFunc genFunc)
+ {
+ Op = op;
+ Overlap = overlap;
+ SrcPreMultiplied = srcPreMultiplied;
+
+ UcodeAssembler asm = new UcodeAssembler();
+ Alpha = genFunc(ref asm);
+ Code = asm.GetCode();
+ Constants = asm.GetConstants();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/UcodeAssembler.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/UcodeAssembler.cs
new file mode 100644
index 00000000..f854787e
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/UcodeAssembler.cs
@@ -0,0 +1,305 @@
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Threed.Blender
+{
+ /// <summary>
+ /// Blend microcode instruction.
+ /// </summary>
+ enum Instruction
+ {
+ Mmadd = 0,
+ Mmsub = 1,
+ Min = 2,
+ Max = 3,
+ Rcp = 4,
+ Add = 5,
+ Sub = 6
+ }
+
+ /// <summary>
+ /// Blend microcode condition code.
+ /// </summary>
+ enum CC
+ {
+ F = 0,
+ T = 1,
+ EQ = 2,
+ NE = 3,
+ LT = 4,
+ LE = 5,
+ GT = 6,
+ GE = 7
+ }
+
+ /// <summary>
+ /// Blend microcode opend B or D value.
+ /// </summary>
+ enum OpBD
+ {
+ ConstantZero = 0x0,
+ ConstantOne = 0x1,
+ SrcRGB = 0x2,
+ SrcAAA = 0x3,
+ OneMinusSrcAAA = 0x4,
+ DstRGB = 0x5,
+ DstAAA = 0x6,
+ OneMinusDstAAA = 0x7,
+ Temp0 = 0x9,
+ Temp1 = 0xa,
+ Temp2 = 0xb,
+ PBR = 0xc,
+ ConstantRGB = 0xd
+ }
+
+ /// <summary>
+ /// Blend microcode operand A or C value.
+ /// </summary>
+ enum OpAC
+ {
+ SrcRGB = 0,
+ DstRGB = 1,
+ SrcAAA = 2,
+ DstAAA = 3,
+ Temp0 = 4,
+ Temp1 = 5,
+ Temp2 = 6,
+ PBR = 7
+ }
+
+ /// <summary>
+ /// Blend microcode destination operand.
+ /// </summary>
+ enum OpDst
+ {
+ Temp0 = 0,
+ Temp1 = 1,
+ Temp2 = 2,
+ PBR = 3
+ }
+
+ /// <summary>
+ /// Blend microcode input swizzle.
+ /// </summary>
+ enum Swizzle
+ {
+ RGB = 0,
+ GBR = 1,
+ RRR = 2,
+ GGG = 3,
+ BBB = 4,
+ RToA = 5
+ }
+
+ /// <summary>
+ /// Blend microcode output components.
+ /// </summary>
+ enum WriteMask
+ {
+ RGB = 0,
+ R = 1,
+ G = 2,
+ B = 3
+ }
+
+ /// <summary>
+ /// Floating-point RGB color values.
+ /// </summary>
+ struct RgbFloat
+ {
+ /// <summary>
+ /// Red component value.
+ /// </summary>
+ public float R { get; }
+
+ /// <summary>
+ /// Green component value.
+ /// </summary>
+ public float G { get; }
+
+ /// <summary>
+ /// Blue component value.
+ /// </summary>
+ public float B { get; }
+
+ /// <summary>
+ /// Creates a new floating-point RGB value.
+ /// </summary>
+ /// <param name="r">Red component value</param>
+ /// <param name="g">Green component value</param>
+ /// <param name="b">Blue component value</param>
+ public RgbFloat(float r, float g, float b)
+ {
+ R = r;
+ G = g;
+ B = b;
+ }
+ }
+
+ /// <summary>
+ /// Blend microcode destination operand, including swizzle, write mask and condition code update flag.
+ /// </summary>
+ struct Dest
+ {
+ public static Dest Temp0 => new Dest(OpDst.Temp0, Swizzle.RGB, WriteMask.RGB, false);
+ public static Dest Temp1 => new Dest(OpDst.Temp1, Swizzle.RGB, WriteMask.RGB, false);
+ public static Dest Temp2 => new Dest(OpDst.Temp2, Swizzle.RGB, WriteMask.RGB, false);
+ public static Dest PBR => new Dest(OpDst.PBR, Swizzle.RGB, WriteMask.RGB, false);
+
+ public Dest GBR => new Dest(Dst, Swizzle.GBR, WriteMask, WriteCC);
+ public Dest RRR => new Dest(Dst, Swizzle.RRR, WriteMask, WriteCC);
+ public Dest GGG => new Dest(Dst, Swizzle.GGG, WriteMask, WriteCC);
+ public Dest BBB => new Dest(Dst, Swizzle.BBB, WriteMask, WriteCC);
+ public Dest RToA => new Dest(Dst, Swizzle.RToA, WriteMask, WriteCC);
+
+ public Dest R => new Dest(Dst, Swizzle, WriteMask.R, WriteCC);
+ public Dest G => new Dest(Dst, Swizzle, WriteMask.G, WriteCC);
+ public Dest B => new Dest(Dst, Swizzle, WriteMask.B, WriteCC);
+
+ public Dest CC => new Dest(Dst, Swizzle, WriteMask, true);
+
+ public OpDst Dst { get; }
+ public Swizzle Swizzle { get; }
+ public WriteMask WriteMask { get; }
+ public bool WriteCC { get; }
+
+ /// <summary>
+ /// Creates a new blend microcode destination operand.
+ /// </summary>
+ /// <param name="dst">Operand</param>
+ /// <param name="swizzle">Swizzle</param>
+ /// <param name="writeMask">Write maks</param>
+ /// <param name="writeCC">Indicates if condition codes should be updated</param>
+ public Dest(OpDst dst, Swizzle swizzle, WriteMask writeMask, bool writeCC)
+ {
+ Dst = dst;
+ Swizzle = swizzle;
+ WriteMask = writeMask;
+ WriteCC = writeCC;
+ }
+ }
+
+ /// <summary>
+ /// Blend microcode operaiton.
+ /// </summary>
+ struct UcodeOp
+ {
+ public readonly uint Word;
+
+ /// <summary>
+ /// Creates a new blend microcode operation.
+ /// </summary>
+ /// <param name="cc">Condition code that controls whenever the operation is executed or not</param>
+ /// <param name="inst">Instruction</param>
+ /// <param name="constIndex">Index on the constant table of the constant used by any constant operand</param>
+ /// <param name="dest">Destination operand</param>
+ /// <param name="srcA">First input operand</param>
+ /// <param name="srcB">Second input operand</param>
+ /// <param name="srcC">Third input operand</param>
+ /// <param name="srcD">Fourth input operand</param>
+ public UcodeOp(CC cc, Instruction inst, int constIndex, Dest dest, OpAC srcA, OpBD srcB, OpAC srcC, OpBD srcD)
+ {
+ Word = (uint)cc |
+ ((uint)inst << 3) |
+ ((uint)constIndex << 6) |
+ ((uint)srcA << 9) |
+ ((uint)srcB << 12) |
+ ((uint)srcC << 16) |
+ ((uint)srcD << 19) |
+ ((uint)dest.Swizzle << 23) |
+ ((uint)dest.WriteMask << 26) |
+ ((uint)dest.Dst << 28) |
+ (dest.WriteCC ? (1u << 31) : 0);
+ }
+ }
+
+ /// <summary>
+ /// Blend microcode assembler.
+ /// </summary>
+ struct UcodeAssembler
+ {
+ private List<uint> _code;
+ private RgbFloat[] _constants;
+ private int _constantIndex;
+
+ public void Mul(CC cc, Dest dest, OpAC srcA, OpBD srcB)
+ {
+ Assemble(cc, Instruction.Mmadd, dest, srcA, srcB, OpAC.SrcRGB, OpBD.ConstantZero);
+ }
+
+ public void Madd(CC cc, Dest dest, OpAC srcA, OpBD srcB, OpAC srcC)
+ {
+ Assemble(cc, Instruction.Mmadd, dest, srcA, srcB, srcC, OpBD.ConstantOne);
+ }
+
+ public void Mmadd(CC cc, Dest dest, OpAC srcA, OpBD srcB, OpAC srcC, OpBD srcD)
+ {
+ Assemble(cc, Instruction.Mmadd, dest, srcA, srcB, srcC, srcD);
+ }
+
+ public void Mmsub(CC cc, Dest dest, OpAC srcA, OpBD srcB, OpAC srcC, OpBD srcD)
+ {
+ Assemble(cc, Instruction.Mmsub, dest, srcA, srcB, srcC, srcD);
+ }
+
+ public void Min(CC cc, Dest dest, OpAC srcA, OpBD srcB)
+ {
+ Assemble(cc, Instruction.Min, dest, srcA, srcB, OpAC.SrcRGB, OpBD.ConstantZero);
+ }
+
+ public void Max(CC cc, Dest dest, OpAC srcA, OpBD srcB)
+ {
+ Assemble(cc, Instruction.Max, dest, srcA, srcB, OpAC.SrcRGB, OpBD.ConstantZero);
+ }
+
+ public void Rcp(CC cc, Dest dest, OpAC srcA)
+ {
+ Assemble(cc, Instruction.Rcp, dest, srcA, OpBD.ConstantZero, OpAC.SrcRGB, OpBD.ConstantZero);
+ }
+
+ public void Mov(CC cc, Dest dest, OpBD srcB)
+ {
+ Assemble(cc, Instruction.Add, dest, OpAC.SrcRGB, srcB, OpAC.SrcRGB, OpBD.ConstantZero);
+ }
+
+ public void Add(CC cc, Dest dest, OpBD srcB, OpBD srcD)
+ {
+ Assemble(cc, Instruction.Add, dest, OpAC.SrcRGB, srcB, OpAC.SrcRGB, srcD);
+ }
+
+ public void Sub(CC cc, Dest dest, OpBD srcB, OpBD srcD)
+ {
+ Assemble(cc, Instruction.Sub, dest, OpAC.SrcRGB, srcB, OpAC.SrcRGB, srcD);
+ }
+
+ private void Assemble(CC cc, Instruction inst, Dest dest, OpAC srcA, OpBD srcB, OpAC srcC, OpBD srcD)
+ {
+ (_code ??= new List<uint>()).Add(new UcodeOp(cc, inst, _constantIndex, dest, srcA, srcB, srcC, srcD).Word);
+ }
+
+ public void SetConstant(int index, float r, float g, float b)
+ {
+ if (_constants == null)
+ {
+ _constants = new RgbFloat[index + 1];
+ }
+ else if (_constants.Length <= index)
+ {
+ Array.Resize(ref _constants, index + 1);
+ }
+
+ _constants[index] = new RgbFloat(r, g, b);
+ _constantIndex = index;
+ }
+
+ public uint[] GetCode()
+ {
+ return _code?.ToArray();
+ }
+
+ public RgbFloat[] GetConstants()
+ {
+ return _constants;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ConditionalRendering.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ConditionalRendering.cs
new file mode 100644
index 00000000..a6b62a4a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ConditionalRendering.cs
@@ -0,0 +1,130 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Engine.Types;
+using Ryujinx.Graphics.Gpu.Memory;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Threed
+{
+ /// <summary>
+ /// Helper methods used for conditional rendering.
+ /// </summary>
+ static class ConditionalRendering
+ {
+ /// <summary>
+ /// Checks if draws and clears should be performed, according
+ /// to currently set conditional rendering conditions.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="memoryManager">Memory manager bound to the channel currently executing</param>
+ /// <param name="address">Conditional rendering buffer address</param>
+ /// <param name="condition">Conditional rendering condition</param>
+ /// <returns>True if rendering is enabled, false otherwise</returns>
+ public static ConditionalRenderEnabled GetRenderEnable(GpuContext context, MemoryManager memoryManager, GpuVa address, Condition condition)
+ {
+ switch (condition)
+ {
+ case Condition.Always:
+ return ConditionalRenderEnabled.True;
+ case Condition.Never:
+ return ConditionalRenderEnabled.False;
+ case Condition.ResultNonZero:
+ return CounterNonZero(context, memoryManager, address.Pack());
+ case Condition.Equal:
+ return CounterCompare(context, memoryManager, address.Pack(), true);
+ case Condition.NotEqual:
+ return CounterCompare(context, memoryManager, address.Pack(), false);
+ }
+
+ Logger.Warning?.Print(LogClass.Gpu, $"Invalid conditional render condition \"{condition}\".");
+
+ return ConditionalRenderEnabled.True;
+ }
+
+ /// <summary>
+ /// Checks if the counter value at a given GPU memory address is non-zero.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="memoryManager">Memory manager bound to the channel currently executing</param>
+ /// <param name="gpuVa">GPU virtual address of the counter value</param>
+ /// <returns>True if the value is not zero, false otherwise. Returns host if handling with host conditional rendering</returns>
+ private static ConditionalRenderEnabled CounterNonZero(GpuContext context, MemoryManager memoryManager, ulong gpuVa)
+ {
+ ICounterEvent evt = memoryManager.CounterCache.FindEvent(gpuVa);
+
+ if (evt == null)
+ {
+ return ConditionalRenderEnabled.False;
+ }
+
+ if (context.Renderer.Pipeline.TryHostConditionalRendering(evt, 0L, false))
+ {
+ return ConditionalRenderEnabled.Host;
+ }
+ else
+ {
+ evt.Flush();
+ return (memoryManager.Read<ulong>(gpuVa, true) != 0) ? ConditionalRenderEnabled.True : ConditionalRenderEnabled.False;
+ }
+ }
+
+ /// <summary>
+ /// Checks if the counter at a given GPU memory address passes a specified equality comparison.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="memoryManager">Memory manager bound to the channel currently executing</param>
+ /// <param name="gpuVa">GPU virtual address</param>
+ /// <param name="isEqual">True to check if the values are equal, false to check if they are not equal</param>
+ /// <returns>True if the condition is met, false otherwise. Returns host if handling with host conditional rendering</returns>
+ private static ConditionalRenderEnabled CounterCompare(GpuContext context, MemoryManager memoryManager, ulong gpuVa, bool isEqual)
+ {
+ ICounterEvent evt = FindEvent(memoryManager.CounterCache, gpuVa);
+ ICounterEvent evt2 = FindEvent(memoryManager.CounterCache, gpuVa + 16);
+
+ bool useHost;
+
+ if (evt != null && evt2 == null)
+ {
+ useHost = context.Renderer.Pipeline.TryHostConditionalRendering(evt, memoryManager.Read<ulong>(gpuVa + 16), isEqual);
+ }
+ else if (evt == null && evt2 != null)
+ {
+ useHost = context.Renderer.Pipeline.TryHostConditionalRendering(evt2, memoryManager.Read<ulong>(gpuVa), isEqual);
+ }
+ else if (evt != null && evt2 != null)
+ {
+ useHost = context.Renderer.Pipeline.TryHostConditionalRendering(evt, evt2, isEqual);
+ }
+ else
+ {
+ useHost = false;
+ }
+
+ if (useHost)
+ {
+ return ConditionalRenderEnabled.Host;
+ }
+ else
+ {
+ evt?.Flush();
+ evt2?.Flush();
+
+ ulong x = memoryManager.Read<ulong>(gpuVa, true);
+ ulong y = memoryManager.Read<ulong>(gpuVa + 16, true);
+
+ return (isEqual ? x == y : x != y) ? ConditionalRenderEnabled.True : ConditionalRenderEnabled.False;
+ }
+ }
+
+ /// <summary>
+ /// Tries to find a counter that is supposed to be written at the specified address,
+ /// returning the related event.
+ /// </summary>
+ /// <param name="counterCache">GPU counter cache to search on</param>
+ /// <param name="gpuVa">GPU virtual address where the counter is supposed to be written</param>
+ /// <returns>The counter event, or null if not present</returns>
+ private static ICounterEvent FindEvent(CounterCache counterCache, ulong gpuVa)
+ {
+ return counterCache.FindEvent(gpuVa);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ConstantBufferUpdater.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ConstantBufferUpdater.cs
new file mode 100644
index 00000000..5c936616
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ConstantBufferUpdater.cs
@@ -0,0 +1,183 @@
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Threed
+{
+ /// <summary>
+ /// Constant buffer updater.
+ /// </summary>
+ class ConstantBufferUpdater
+ {
+ private const int UniformDataCacheSize = 512;
+
+ private readonly GpuChannel _channel;
+ private readonly DeviceStateWithShadow<ThreedClassState> _state;
+
+ // State associated with direct uniform buffer updates.
+ // This state is used to attempt to batch together consecutive updates.
+ private ulong _ubBeginCpuAddress = 0;
+ private ulong _ubFollowUpAddress = 0;
+ private ulong _ubByteCount = 0;
+ private int _ubIndex = 0;
+ private int[] _ubData = new int[UniformDataCacheSize];
+
+ /// <summary>
+ /// Creates a new instance of the constant buffer updater.
+ /// </summary>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="state">Channel state</param>
+ public ConstantBufferUpdater(GpuChannel channel, DeviceStateWithShadow<ThreedClassState> state)
+ {
+ _channel = channel;
+ _state = state;
+ }
+
+ /// <summary>
+ /// Binds a uniform buffer for the vertex shader stage.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void BindVertex(int argument)
+ {
+ Bind(argument, ShaderType.Vertex);
+ }
+
+ /// <summary>
+ /// Binds a uniform buffer for the tessellation control shader stage.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void BindTessControl(int argument)
+ {
+ Bind(argument, ShaderType.TessellationControl);
+ }
+
+ /// <summary>
+ /// Binds a uniform buffer for the tessellation evaluation shader stage.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void BindTessEvaluation(int argument)
+ {
+ Bind(argument, ShaderType.TessellationEvaluation);
+ }
+
+ /// <summary>
+ /// Binds a uniform buffer for the geometry shader stage.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void BindGeometry(int argument)
+ {
+ Bind(argument, ShaderType.Geometry);
+ }
+
+ /// <summary>
+ /// Binds a uniform buffer for the fragment shader stage.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void BindFragment(int argument)
+ {
+ Bind(argument, ShaderType.Fragment);
+ }
+
+ /// <summary>
+ /// Binds a uniform buffer for the specified shader stage.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ /// <param name="type">Shader stage that will access the uniform buffer</param>
+ private void Bind(int argument, ShaderType type)
+ {
+ bool enable = (argument & 1) != 0;
+
+ int index = (argument >> 4) & 0x1f;
+
+ FlushUboDirty();
+
+ if (enable)
+ {
+ var uniformBuffer = _state.State.UniformBufferState;
+
+ ulong address = uniformBuffer.Address.Pack();
+
+ _channel.BufferManager.SetGraphicsUniformBuffer((int)type, index, address, (uint)uniformBuffer.Size);
+ }
+ else
+ {
+ _channel.BufferManager.SetGraphicsUniformBuffer((int)type, index, 0, 0);
+ }
+ }
+
+ /// <summary>
+ /// Flushes any queued UBO updates.
+ /// </summary>
+ public void FlushUboDirty()
+ {
+ if (_ubFollowUpAddress != 0)
+ {
+ var memoryManager = _channel.MemoryManager;
+
+ Span<byte> data = MemoryMarshal.Cast<int, byte>(_ubData.AsSpan(0, (int)(_ubByteCount / 4)));
+
+ if (memoryManager.Physical.WriteWithRedundancyCheck(_ubBeginCpuAddress, data))
+ {
+ memoryManager.Physical.BufferCache.ForceDirty(memoryManager, _ubFollowUpAddress - _ubByteCount, _ubByteCount);
+ }
+
+ _ubFollowUpAddress = 0;
+ _ubIndex = 0;
+ }
+ }
+
+ /// <summary>
+ /// Updates the uniform buffer data with inline data.
+ /// </summary>
+ /// <param name="argument">New uniform buffer data word</param>
+ public void Update(int argument)
+ {
+ var uniformBuffer = _state.State.UniformBufferState;
+
+ ulong address = uniformBuffer.Address.Pack() + (uint)uniformBuffer.Offset;
+
+ if (_ubFollowUpAddress != address || _ubIndex == _ubData.Length)
+ {
+ FlushUboDirty();
+
+ _ubByteCount = 0;
+ _ubBeginCpuAddress = _channel.MemoryManager.Translate(address);
+ }
+
+ _ubData[_ubIndex++] = argument;
+
+ _ubFollowUpAddress = address + 4;
+ _ubByteCount += 4;
+
+ _state.State.UniformBufferState.Offset += 4;
+ }
+
+ /// <summary>
+ /// Updates the uniform buffer data with inline data.
+ /// </summary>
+ /// <param name="data">Data to be written to the uniform buffer</param>
+ public void Update(ReadOnlySpan<int> data)
+ {
+ var uniformBuffer = _state.State.UniformBufferState;
+
+ ulong address = uniformBuffer.Address.Pack() + (uint)uniformBuffer.Offset;
+
+ ulong size = (ulong)data.Length * 4;
+
+ if (_ubFollowUpAddress != address || _ubIndex + data.Length > _ubData.Length)
+ {
+ FlushUboDirty();
+
+ _ubByteCount = 0;
+ _ubBeginCpuAddress = _channel.MemoryManager.Translate(address);
+ }
+
+ data.CopyTo(_ubData.AsSpan(_ubIndex));
+ _ubIndex += data.Length;
+
+ _ubFollowUpAddress = address + size;
+ _ubByteCount += size;
+
+ _state.State.UniformBufferState.Offset += data.Length * 4;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs
new file mode 100644
index 00000000..7438ba03
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs
@@ -0,0 +1,856 @@
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Engine.Types;
+using Ryujinx.Graphics.Gpu.Memory;
+using System;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Threed
+{
+ /// <summary>
+ /// Draw manager.
+ /// </summary>
+ class DrawManager
+ {
+ // Since we don't know the index buffer size for indirect draws,
+ // we must assume a minimum and maximum size and use that for buffer data update purposes.
+ private const int MinIndirectIndexCount = 0x10000;
+ private const int MaxIndirectIndexCount = 0x4000000;
+
+ private readonly GpuContext _context;
+ private readonly GpuChannel _channel;
+ private readonly DeviceStateWithShadow<ThreedClassState> _state;
+ private readonly DrawState _drawState;
+ private readonly SpecializationStateUpdater _currentSpecState;
+ private bool _topologySet;
+
+ private bool _instancedDrawPending;
+ private bool _instancedIndexed;
+ private bool _instancedIndexedInline;
+
+ private int _instancedFirstIndex;
+ private int _instancedFirstVertex;
+ private int _instancedFirstInstance;
+ private int _instancedIndexCount;
+ private int _instancedDrawStateFirst;
+ private int _instancedDrawStateCount;
+
+ private int _instanceIndex;
+
+ private const int VertexBufferFirstMethodOffset = 0x35d;
+ private const int IndexBufferCountMethodOffset = 0x5f8;
+
+ /// <summary>
+ /// Creates a new instance of the draw manager.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="state">Channel state</param>
+ /// <param name="drawState">Draw state</param>
+ /// <param name="spec">Specialization state updater</param>
+ public DrawManager(GpuContext context, GpuChannel channel, DeviceStateWithShadow<ThreedClassState> state, DrawState drawState, SpecializationStateUpdater spec)
+ {
+ _context = context;
+ _channel = channel;
+ _state = state;
+ _drawState = drawState;
+ _currentSpecState = spec;
+ }
+
+ /// <summary>
+ /// Marks the entire state as dirty, forcing a full host state update before the next draw.
+ /// </summary>
+ public void ForceStateDirty()
+ {
+ _topologySet = false;
+ }
+
+ /// <summary>
+ /// Pushes four 8-bit index buffer elements.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void VbElementU8(int argument)
+ {
+ _drawState.IbStreamer.VbElementU8(_context.Renderer, argument);
+ }
+
+ /// <summary>
+ /// Pushes two 16-bit index buffer elements.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void VbElementU16(int argument)
+ {
+ _drawState.IbStreamer.VbElementU16(_context.Renderer, argument);
+ }
+
+ /// <summary>
+ /// Pushes one 32-bit index buffer element.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void VbElementU32(int argument)
+ {
+ _drawState.IbStreamer.VbElementU32(_context.Renderer, argument);
+ }
+
+ /// <summary>
+ /// Finishes the draw call.
+ /// This draws geometry on the bound buffers based on the current GPU state.
+ /// </summary>
+ /// <param name="engine">3D engine where this method is being called</param>
+ /// <param name="argument">Method call argument</param>
+ public void DrawEnd(ThreedClass engine, int argument)
+ {
+ DrawEnd(
+ engine,
+ _state.State.IndexBufferState.First,
+ (int)_state.State.IndexBufferCount,
+ _state.State.VertexBufferDrawState.First,
+ _state.State.VertexBufferDrawState.Count);
+ }
+
+ /// <summary>
+ /// Finishes the draw call.
+ /// This draws geometry on the bound buffers based on the current GPU state.
+ /// </summary>
+ /// <param name="engine">3D engine where this method is being called</param>
+ /// <param name="firstIndex">Index of the first index buffer element used on the draw</param>
+ /// <param name="indexCount">Number of index buffer elements used on the draw</param>
+ /// <param name="drawFirstVertex">Index of the first vertex used on the draw</param>
+ /// <param name="drawVertexCount">Number of vertices used on the draw</param>
+ private void DrawEnd(ThreedClass engine, int firstIndex, int indexCount, int drawFirstVertex, int drawVertexCount)
+ {
+ ConditionalRenderEnabled renderEnable = ConditionalRendering.GetRenderEnable(
+ _context,
+ _channel.MemoryManager,
+ _state.State.RenderEnableAddress,
+ _state.State.RenderEnableCondition);
+
+ if (renderEnable == ConditionalRenderEnabled.False || _instancedDrawPending)
+ {
+ if (renderEnable == ConditionalRenderEnabled.False)
+ {
+ PerformDeferredDraws();
+ }
+
+ _drawState.DrawIndexed = false;
+
+ if (renderEnable == ConditionalRenderEnabled.Host)
+ {
+ _context.Renderer.Pipeline.EndHostConditionalRendering();
+ }
+
+ return;
+ }
+
+ _drawState.FirstIndex = firstIndex;
+ _drawState.IndexCount = indexCount;
+ _drawState.DrawFirstVertex = drawFirstVertex;
+ _drawState.DrawVertexCount = drawVertexCount;
+ _currentSpecState.SetHasConstantBufferDrawParameters(false);
+
+ engine.UpdateState();
+
+ bool instanced = _drawState.VsUsesInstanceId || _drawState.IsAnyVbInstanced;
+
+ if (instanced)
+ {
+ _instancedDrawPending = true;
+
+ int ibCount = _drawState.IbStreamer.InlineIndexCount;
+
+ _instancedIndexed = _drawState.DrawIndexed;
+ _instancedIndexedInline = ibCount != 0;
+
+ _instancedFirstIndex = firstIndex;
+ _instancedFirstVertex = (int)_state.State.FirstVertex;
+ _instancedFirstInstance = (int)_state.State.FirstInstance;
+
+ _instancedIndexCount = ibCount != 0 ? ibCount : indexCount;
+
+ _instancedDrawStateFirst = drawFirstVertex;
+ _instancedDrawStateCount = drawVertexCount;
+
+ _drawState.DrawIndexed = false;
+
+ if (renderEnable == ConditionalRenderEnabled.Host)
+ {
+ _context.Renderer.Pipeline.EndHostConditionalRendering();
+ }
+
+ return;
+ }
+
+ int firstInstance = (int)_state.State.FirstInstance;
+
+ int inlineIndexCount = _drawState.IbStreamer.GetAndResetInlineIndexCount(_context.Renderer);
+
+ if (inlineIndexCount != 0)
+ {
+ int firstVertex = (int)_state.State.FirstVertex;
+
+ BufferRange br = new BufferRange(_drawState.IbStreamer.GetInlineIndexBuffer(), 0, inlineIndexCount * 4);
+
+ _channel.BufferManager.SetIndexBuffer(br, IndexType.UInt);
+
+ _context.Renderer.Pipeline.DrawIndexed(inlineIndexCount, 1, firstIndex, firstVertex, firstInstance);
+ }
+ else if (_drawState.DrawIndexed)
+ {
+ int firstVertex = (int)_state.State.FirstVertex;
+
+ _context.Renderer.Pipeline.DrawIndexed(indexCount, 1, firstIndex, firstVertex, firstInstance);
+ }
+ else
+ {
+ var drawState = _state.State.VertexBufferDrawState;
+
+ _context.Renderer.Pipeline.Draw(drawVertexCount, 1, drawFirstVertex, firstInstance);
+ }
+
+ _drawState.DrawIndexed = false;
+
+ if (renderEnable == ConditionalRenderEnabled.Host)
+ {
+ _context.Renderer.Pipeline.EndHostConditionalRendering();
+ }
+ }
+
+ /// <summary>
+ /// Starts draw.
+ /// This sets primitive type and instanced draw parameters.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void DrawBegin(int argument)
+ {
+ bool incrementInstance = (argument & (1 << 26)) != 0;
+ bool resetInstance = (argument & (1 << 27)) == 0;
+
+ PrimitiveType type = (PrimitiveType)(argument & 0xffff);
+ DrawBegin(incrementInstance, resetInstance, type);
+ }
+
+ /// <summary>
+ /// Starts draw.
+ /// This sets primitive type and instanced draw parameters.
+ /// </summary>
+ /// <param name="incrementInstance">Indicates if the current instance should be incremented</param>
+ /// <param name="resetInstance">Indicates if the current instance should be set to zero</param>
+ /// <param name="primitiveType">Primitive type</param>
+ private void DrawBegin(bool incrementInstance, bool resetInstance, PrimitiveType primitiveType)
+ {
+ if (incrementInstance)
+ {
+ _instanceIndex++;
+ }
+ else if (resetInstance)
+ {
+ PerformDeferredDraws();
+
+ _instanceIndex = 0;
+ }
+
+ PrimitiveTopology topology;
+
+ if (_state.State.PrimitiveTypeOverrideEnable)
+ {
+ PrimitiveTypeOverride typeOverride = _state.State.PrimitiveTypeOverride;
+ topology = typeOverride.Convert();
+ }
+ else
+ {
+ topology = primitiveType.Convert();
+ }
+
+ UpdateTopology(topology);
+ }
+
+ /// <summary>
+ /// Updates the current primitive topology if needed.
+ /// </summary>
+ /// <param name="topology">New primitive topology</param>
+ private void UpdateTopology(PrimitiveTopology topology)
+ {
+ if (_drawState.Topology != topology || !_topologySet)
+ {
+ _context.Renderer.Pipeline.SetPrimitiveTopology(topology);
+ _currentSpecState.SetTopology(topology);
+ _drawState.Topology = topology;
+ _topologySet = true;
+ }
+ }
+
+ /// <summary>
+ /// Sets the index buffer count.
+ /// This also sets internal state that indicates that the next draw is an indexed draw.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void SetIndexBufferCount(int argument)
+ {
+ _drawState.DrawIndexed = true;
+ }
+
+ // TODO: Verify if the index type is implied from the method that is called,
+ // or if it uses the state index type on hardware.
+
+ /// <summary>
+ /// Performs a indexed draw with 8-bit index buffer elements.
+ /// </summary>
+ /// <param name="engine">3D engine where this method is being called</param>
+ /// <param name="argument">Method call argument</param>
+ public void DrawIndexBuffer8BeginEndInstanceFirst(ThreedClass engine, int argument)
+ {
+ DrawIndexBufferBeginEndInstance(engine, argument, false);
+ }
+
+ /// <summary>
+ /// Performs a indexed draw with 16-bit index buffer elements.
+ /// </summary>
+ /// <param name="engine">3D engine where this method is being called</param>
+ /// <param name="argument">Method call argument</param>
+ public void DrawIndexBuffer16BeginEndInstanceFirst(ThreedClass engine, int argument)
+ {
+ DrawIndexBufferBeginEndInstance(engine, argument, false);
+ }
+
+ /// <summary>
+ /// Performs a indexed draw with 32-bit index buffer elements.
+ /// </summary>
+ /// <param name="engine">3D engine where this method is being called</param>
+ /// <param name="argument">Method call argument</param>
+ public void DrawIndexBuffer32BeginEndInstanceFirst(ThreedClass engine, int argument)
+ {
+ DrawIndexBufferBeginEndInstance(engine, argument, false);
+ }
+
+ /// <summary>
+ /// Performs a indexed draw with 8-bit index buffer elements,
+ /// while also pre-incrementing the current instance value.
+ /// </summary>
+ /// <param name="engine">3D engine where this method is being called</param>
+ /// <param name="argument">Method call argument</param>
+ public void DrawIndexBuffer8BeginEndInstanceSubsequent(ThreedClass engine, int argument)
+ {
+ DrawIndexBufferBeginEndInstance(engine, argument, true);
+ }
+
+ /// <summary>
+ /// Performs a indexed draw with 16-bit index buffer elements,
+ /// while also pre-incrementing the current instance value.
+ /// </summary>
+ /// <param name="engine">3D engine where this method is being called</param>
+ /// <param name="argument">Method call argument</param>
+ public void DrawIndexBuffer16BeginEndInstanceSubsequent(ThreedClass engine, int argument)
+ {
+ DrawIndexBufferBeginEndInstance(engine, argument, true);
+ }
+
+ /// <summary>
+ /// Performs a indexed draw with 32-bit index buffer elements,
+ /// while also pre-incrementing the current instance value.
+ /// </summary>
+ /// <param name="engine">3D engine where this method is being called</param>
+ /// <param name="argument">Method call argument</param>
+ public void DrawIndexBuffer32BeginEndInstanceSubsequent(ThreedClass engine, int argument)
+ {
+ DrawIndexBufferBeginEndInstance(engine, argument, true);
+ }
+
+ /// <summary>
+ /// Performs a indexed draw with a low number of index buffer elements,
+ /// while optionally also pre-incrementing the current instance value.
+ /// </summary>
+ /// <param name="engine">3D engine where this method is being called</param>
+ /// <param name="argument">Method call argument</param>
+ /// <param name="instanced">True to increment the current instance value, false otherwise</param>
+ private void DrawIndexBufferBeginEndInstance(ThreedClass engine, int argument, bool instanced)
+ {
+ DrawBegin(instanced, !instanced, (PrimitiveType)((argument >> 28) & 0xf));
+
+ int firstIndex = argument & 0xffff;
+ int indexCount = (argument >> 16) & 0xfff;
+
+ bool oldDrawIndexed = _drawState.DrawIndexed;
+
+ _drawState.DrawIndexed = true;
+ engine.ForceStateDirty(IndexBufferCountMethodOffset * 4);
+
+ DrawEnd(engine, firstIndex, indexCount, 0, 0);
+
+ _drawState.DrawIndexed = oldDrawIndexed;
+ }
+
+ /// <summary>
+ /// Performs a non-indexed draw with the specified topology, index and count.
+ /// </summary>
+ /// <param name="engine">3D engine where this method is being called</param>
+ /// <param name="argument">Method call argument</param>
+ public void DrawVertexArrayBeginEndInstanceFirst(ThreedClass engine, int argument)
+ {
+ DrawVertexArrayBeginEndInstance(engine, argument, false);
+ }
+
+ /// <summary>
+ /// Performs a non-indexed draw with the specified topology, index and count,
+ /// while incrementing the current instance.
+ /// </summary>
+ /// <param name="engine">3D engine where this method is being called</param>
+ /// <param name="argument">Method call argument</param>
+ public void DrawVertexArrayBeginEndInstanceSubsequent(ThreedClass engine, int argument)
+ {
+ DrawVertexArrayBeginEndInstance(engine, argument, true);
+ }
+
+ /// <summary>
+ /// Performs a indexed draw with a low number of index buffer elements,
+ /// while optionally also pre-incrementing the current instance value.
+ /// </summary>
+ /// <param name="engine">3D engine where this method is being called</param>
+ /// <param name="argument">Method call argument</param>
+ /// <param name="instanced">True to increment the current instance value, false otherwise</param>
+ private void DrawVertexArrayBeginEndInstance(ThreedClass engine, int argument, bool instanced)
+ {
+ DrawBegin(instanced, !instanced, (PrimitiveType)((argument >> 28) & 0xf));
+
+ int firstVertex = argument & 0xffff;
+ int vertexCount = (argument >> 16) & 0xfff;
+
+ bool oldDrawIndexed = _drawState.DrawIndexed;
+
+ _drawState.DrawIndexed = false;
+ engine.ForceStateDirty(VertexBufferFirstMethodOffset * 4);
+
+ DrawEnd(engine, 0, 0, firstVertex, vertexCount);
+
+ _drawState.DrawIndexed = oldDrawIndexed;
+ }
+
+ /// <summary>
+ /// Performs a texture draw with a source texture and sampler ID, along with source
+ /// and destination coordinates and sizes.
+ /// </summary>
+ /// <param name="engine">3D engine where this method is being called</param>
+ /// <param name="argument">Method call argument</param>
+ public void DrawTexture(ThreedClass engine, int argument)
+ {
+ static float FixedToFloat(int fixedValue)
+ {
+ return fixedValue * (1f / 4096);
+ }
+
+ float dstX0 = FixedToFloat(_state.State.DrawTextureDstX);
+ float dstY0 = FixedToFloat(_state.State.DrawTextureDstY);
+ float dstWidth = FixedToFloat(_state.State.DrawTextureDstWidth);
+ float dstHeight = FixedToFloat(_state.State.DrawTextureDstHeight);
+
+ // TODO: Confirm behaviour on hardware.
+ // When this is active, the origin appears to be on the bottom.
+ if (_state.State.YControl.HasFlag(YControl.NegateY))
+ {
+ dstY0 -= dstHeight;
+ }
+
+ float dstX1 = dstX0 + dstWidth;
+ float dstY1 = dstY0 + dstHeight;
+
+ float srcX0 = FixedToFloat(_state.State.DrawTextureSrcX);
+ float srcY0 = FixedToFloat(_state.State.DrawTextureSrcY);
+ float srcX1 = ((float)_state.State.DrawTextureDuDx / (1UL << 32)) * dstWidth + srcX0;
+ float srcY1 = ((float)_state.State.DrawTextureDvDy / (1UL << 32)) * dstHeight + srcY0;
+
+ engine.UpdateState(ulong.MaxValue & ~(1UL << StateUpdater.ShaderStateIndex));
+
+ _channel.TextureManager.UpdateRenderTargets();
+
+ int textureId = _state.State.DrawTextureTextureId;
+ int samplerId = _state.State.DrawTextureSamplerId;
+
+ (var texture, var sampler) = _channel.TextureManager.GetGraphicsTextureAndSampler(textureId, samplerId);
+
+ srcX0 *= texture.ScaleFactor;
+ srcY0 *= texture.ScaleFactor;
+ srcX1 *= texture.ScaleFactor;
+ srcY1 *= texture.ScaleFactor;
+
+ float dstScale = _channel.TextureManager.RenderTargetScale;
+
+ dstX0 *= dstScale;
+ dstY0 *= dstScale;
+ dstX1 *= dstScale;
+ dstY1 *= dstScale;
+
+ _context.Renderer.Pipeline.DrawTexture(
+ texture?.HostTexture,
+ sampler?.GetHostSampler(texture),
+ new Extents2DF(srcX0, srcY0, srcX1, srcY1),
+ new Extents2DF(dstX0, dstY0, dstX1, dstY1));
+ }
+
+ /// <summary>
+ /// Performs a indexed or non-indexed draw.
+ /// </summary>
+ /// <param name="engine">3D engine where this method is being called</param>
+ /// <param name="topology">Primitive topology</param>
+ /// <param name="count">Index count for indexed draws, vertex count for non-indexed draws</param>
+ /// <param name="instanceCount">Instance count</param>
+ /// <param name="firstIndex">First index on the index buffer for indexed draws, ignored for non-indexed draws</param>
+ /// <param name="firstVertex">First vertex on the vertex buffer</param>
+ /// <param name="firstInstance">First instance</param>
+ /// <param name="indexed">True if the draw is indexed, false otherwise</param>
+ public void Draw(
+ ThreedClass engine,
+ PrimitiveTopology topology,
+ int count,
+ int instanceCount,
+ int firstIndex,
+ int firstVertex,
+ int firstInstance,
+ bool indexed)
+ {
+ UpdateTopology(topology);
+
+ ConditionalRenderEnabled renderEnable = ConditionalRendering.GetRenderEnable(
+ _context,
+ _channel.MemoryManager,
+ _state.State.RenderEnableAddress,
+ _state.State.RenderEnableCondition);
+
+ if (renderEnable == ConditionalRenderEnabled.False)
+ {
+ _drawState.DrawIndexed = false;
+ return;
+ }
+
+ if (indexed)
+ {
+ _drawState.FirstIndex = firstIndex;
+ _drawState.IndexCount = count;
+ _state.State.FirstVertex = (uint)firstVertex;
+ engine.ForceStateDirty(IndexBufferCountMethodOffset * 4);
+ }
+ else
+ {
+ _drawState.DrawFirstVertex = firstVertex;
+ _drawState.DrawVertexCount = count;
+ engine.ForceStateDirty(VertexBufferFirstMethodOffset * 4);
+ }
+
+ _state.State.FirstInstance = (uint)firstInstance;
+
+ _drawState.DrawIndexed = indexed;
+ _currentSpecState.SetHasConstantBufferDrawParameters(true);
+
+ engine.UpdateState();
+
+ if (indexed)
+ {
+ _context.Renderer.Pipeline.DrawIndexed(count, instanceCount, firstIndex, firstVertex, firstInstance);
+ _state.State.FirstVertex = 0;
+ }
+ else
+ {
+ _context.Renderer.Pipeline.Draw(count, instanceCount, firstVertex, firstInstance);
+ }
+
+ _state.State.FirstInstance = 0;
+
+ _drawState.DrawIndexed = false;
+
+ if (renderEnable == ConditionalRenderEnabled.Host)
+ {
+ _context.Renderer.Pipeline.EndHostConditionalRendering();
+ }
+ }
+
+ /// <summary>
+ /// Performs a indirect draw, with parameters from a GPU buffer.
+ /// </summary>
+ /// <param name="engine">3D engine where this method is being called</param>
+ /// <param name="topology">Primitive topology</param>
+ /// <param name="indirectBufferAddress">Address of the buffer with the draw parameters, such as count, first index, etc</param>
+ /// <param name="parameterBufferAddress">Address of the buffer with the draw count</param>
+ /// <param name="maxDrawCount">Maximum number of draws that can be made</param>
+ /// <param name="stride">Distance in bytes between each entry on the data pointed to by <paramref name="indirectBufferAddress"/></param>
+ /// <param name="indexCount">Maximum number of indices that the draw can consume</param>
+ /// <param name="drawType">Type of the indirect draw, which can be indexed or non-indexed, with or without a draw count</param>
+ public void DrawIndirect(
+ ThreedClass engine,
+ PrimitiveTopology topology,
+ ulong indirectBufferAddress,
+ ulong parameterBufferAddress,
+ int maxDrawCount,
+ int stride,
+ int indexCount,
+ IndirectDrawType drawType)
+ {
+ UpdateTopology(topology);
+
+ ConditionalRenderEnabled renderEnable = ConditionalRendering.GetRenderEnable(
+ _context,
+ _channel.MemoryManager,
+ _state.State.RenderEnableAddress,
+ _state.State.RenderEnableCondition);
+
+ if (renderEnable == ConditionalRenderEnabled.False)
+ {
+ _drawState.DrawIndexed = false;
+ return;
+ }
+
+ PhysicalMemory memory = _channel.MemoryManager.Physical;
+
+ bool hasCount = (drawType & IndirectDrawType.Count) != 0;
+ bool indexed = (drawType & IndirectDrawType.Indexed) != 0;
+
+ if (indexed)
+ {
+ indexCount = Math.Clamp(indexCount, MinIndirectIndexCount, MaxIndirectIndexCount);
+ _drawState.FirstIndex = 0;
+ _drawState.IndexCount = indexCount;
+ engine.ForceStateDirty(IndexBufferCountMethodOffset * 4);
+ }
+
+ _drawState.DrawIndexed = indexed;
+ _drawState.DrawIndirect = true;
+ _currentSpecState.SetHasConstantBufferDrawParameters(true);
+
+ engine.UpdateState();
+
+ if (hasCount)
+ {
+ var indirectBuffer = memory.BufferCache.GetBufferRange(indirectBufferAddress, (ulong)maxDrawCount * (ulong)stride);
+ var parameterBuffer = memory.BufferCache.GetBufferRange(parameterBufferAddress, 4);
+
+ if (indexed)
+ {
+ _context.Renderer.Pipeline.DrawIndexedIndirectCount(indirectBuffer, parameterBuffer, maxDrawCount, stride);
+ }
+ else
+ {
+ _context.Renderer.Pipeline.DrawIndirectCount(indirectBuffer, parameterBuffer, maxDrawCount, stride);
+ }
+ }
+ else
+ {
+ var indirectBuffer = memory.BufferCache.GetBufferRange(indirectBufferAddress, (ulong)stride);
+
+ if (indexed)
+ {
+ _context.Renderer.Pipeline.DrawIndexedIndirect(indirectBuffer);
+ }
+ else
+ {
+ _context.Renderer.Pipeline.DrawIndirect(indirectBuffer);
+ }
+ }
+
+ _drawState.DrawIndexed = false;
+ _drawState.DrawIndirect = false;
+
+ if (renderEnable == ConditionalRenderEnabled.Host)
+ {
+ _context.Renderer.Pipeline.EndHostConditionalRendering();
+ }
+ }
+
+ /// <summary>
+ /// Perform any deferred draws.
+ /// This is used for instanced draws.
+ /// Since each instance is a separate draw, we defer the draw and accumulate the instance count.
+ /// Once we detect the last instanced draw, then we perform the host instanced draw,
+ /// with the accumulated instance count.
+ /// </summary>
+ public void PerformDeferredDraws()
+ {
+ // Perform any pending instanced draw.
+ if (_instancedDrawPending)
+ {
+ _instancedDrawPending = false;
+
+ bool indexedInline = _instancedIndexedInline;
+
+ if (_instancedIndexed || indexedInline)
+ {
+ if (indexedInline)
+ {
+ int inlineIndexCount = _drawState.IbStreamer.GetAndResetInlineIndexCount(_context.Renderer);
+ BufferRange br = new BufferRange(_drawState.IbStreamer.GetInlineIndexBuffer(), 0, inlineIndexCount * 4);
+
+ _channel.BufferManager.SetIndexBuffer(br, IndexType.UInt);
+ }
+
+ _context.Renderer.Pipeline.DrawIndexed(
+ _instancedIndexCount,
+ _instanceIndex + 1,
+ _instancedFirstIndex,
+ _instancedFirstVertex,
+ _instancedFirstInstance);
+ }
+ else
+ {
+ _context.Renderer.Pipeline.Draw(
+ _instancedDrawStateCount,
+ _instanceIndex + 1,
+ _instancedDrawStateFirst,
+ _instancedFirstInstance);
+ }
+ }
+ }
+
+ /// <summary>
+ /// Clears the current color and depth-stencil buffers.
+ /// Which buffers should be cleared can also be specified with the argument.
+ /// </summary>
+ /// <param name="engine">3D engine where this method is being called</param>
+ /// <param name="argument">Method call argument</param>
+ public void Clear(ThreedClass engine, int argument)
+ {
+ Clear(engine, argument, 1);
+ }
+
+ /// <summary>
+ /// Clears the current color and depth-stencil buffers.
+ /// Which buffers should be cleared can also specified with the arguments.
+ /// </summary>
+ /// <param name="engine">3D engine where this method is being called</param>
+ /// <param name="argument">Method call argument</param>
+ /// <param name="layerCount">For array and 3D textures, indicates how many layers should be cleared</param>
+ public void Clear(ThreedClass engine, int argument, int layerCount)
+ {
+ ConditionalRenderEnabled renderEnable = ConditionalRendering.GetRenderEnable(
+ _context,
+ _channel.MemoryManager,
+ _state.State.RenderEnableAddress,
+ _state.State.RenderEnableCondition);
+
+ if (renderEnable == ConditionalRenderEnabled.False)
+ {
+ return;
+ }
+
+ bool clearDepth = (argument & 1) != 0;
+ bool clearStencil = (argument & 2) != 0;
+ uint componentMask = (uint)((argument >> 2) & 0xf);
+ int index = (argument >> 6) & 0xf;
+ int layer = (argument >> 10) & 0x3ff;
+
+ RenderTargetUpdateFlags updateFlags = RenderTargetUpdateFlags.SingleColor;
+
+ if (layer != 0 || layerCount > 1)
+ {
+ updateFlags |= RenderTargetUpdateFlags.Layered;
+ }
+
+ if (clearDepth || clearStencil)
+ {
+ updateFlags |= RenderTargetUpdateFlags.UpdateDepthStencil;
+ }
+
+ engine.UpdateRenderTargetState(updateFlags, singleUse: componentMask != 0 ? index : -1);
+
+ // If there is a mismatch on the host clip region and the one explicitly defined by the guest
+ // on the screen scissor state, then we need to force only one texture to be bound to avoid
+ // host clipping.
+ var screenScissorState = _state.State.ScreenScissorState;
+
+ // Must happen after UpdateRenderTargetState to have up-to-date clip region values.
+ bool clipMismatch = (screenScissorState.X | screenScissorState.Y) != 0 ||
+ screenScissorState.Width != _channel.TextureManager.ClipRegionWidth ||
+ screenScissorState.Height != _channel.TextureManager.ClipRegionHeight;
+
+ bool clearAffectedByStencilMask = (_state.State.ClearFlags & 1) != 0;
+ bool clearAffectedByScissor = (_state.State.ClearFlags & 0x100) != 0;
+ bool needsCustomScissor = !clearAffectedByScissor || clipMismatch;
+
+ // Scissor and rasterizer discard also affect clears.
+ ulong updateMask = 1UL << StateUpdater.RasterizerStateIndex;
+
+ if (!needsCustomScissor)
+ {
+ updateMask |= 1UL << StateUpdater.ScissorStateIndex;
+ }
+
+ engine.UpdateState(updateMask);
+
+ if (needsCustomScissor)
+ {
+ int scissorX = screenScissorState.X;
+ int scissorY = screenScissorState.Y;
+ int scissorW = screenScissorState.Width;
+ int scissorH = screenScissorState.Height;
+
+ if (clearAffectedByScissor && _state.State.ScissorState[0].Enable)
+ {
+ ref var scissorState = ref _state.State.ScissorState[0];
+
+ scissorX = Math.Max(scissorX, scissorState.X1);
+ scissorY = Math.Max(scissorY, scissorState.Y1);
+ scissorW = Math.Min(scissorW, scissorState.X2 - scissorState.X1);
+ scissorH = Math.Min(scissorH, scissorState.Y2 - scissorState.Y1);
+ }
+
+ float scale = _channel.TextureManager.RenderTargetScale;
+ if (scale != 1f)
+ {
+ scissorX = (int)(scissorX * scale);
+ scissorY = (int)(scissorY * scale);
+ scissorW = (int)MathF.Ceiling(scissorW * scale);
+ scissorH = (int)MathF.Ceiling(scissorH * scale);
+ }
+
+ Span<Rectangle<int>> scissors = stackalloc Rectangle<int>[]
+ {
+ new Rectangle<int>(scissorX, scissorY, scissorW, scissorH)
+ };
+
+ _context.Renderer.Pipeline.SetScissors(scissors);
+ }
+
+ _channel.TextureManager.UpdateRenderTargets();
+
+ if (componentMask != 0)
+ {
+ var clearColor = _state.State.ClearColors;
+
+ ColorF color = new ColorF(clearColor.Red, clearColor.Green, clearColor.Blue, clearColor.Alpha);
+
+ _context.Renderer.Pipeline.ClearRenderTargetColor(index, layer, layerCount, componentMask, color);
+ }
+
+ if (clearDepth || clearStencil)
+ {
+ float depthValue = _state.State.ClearDepthValue;
+ int stencilValue = (int)_state.State.ClearStencilValue;
+
+ int stencilMask = 0;
+
+ if (clearStencil)
+ {
+ stencilMask = clearAffectedByStencilMask ? _state.State.StencilTestState.FrontMask : 0xff;
+ }
+
+ if (clipMismatch)
+ {
+ _channel.TextureManager.UpdateRenderTargetDepthStencil();
+ }
+
+ _context.Renderer.Pipeline.ClearRenderTargetDepthStencil(
+ layer,
+ layerCount,
+ depthValue,
+ clearDepth,
+ stencilValue,
+ stencilMask);
+ }
+
+ if (needsCustomScissor)
+ {
+ engine.UpdateScissorState();
+ }
+
+ engine.UpdateRenderTargetState(RenderTargetUpdateFlags.UpdateAll);
+
+ if (renderEnable == ConditionalRenderEnabled.Host)
+ {
+ _context.Renderer.Pipeline.EndHostConditionalRendering();
+ }
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawState.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawState.cs
new file mode 100644
index 00000000..42ec2442
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawState.cs
@@ -0,0 +1,65 @@
+using Ryujinx.Graphics.GAL;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Threed
+{
+ /// <summary>
+ /// Draw state.
+ /// </summary>
+ class DrawState
+ {
+ /// <summary>
+ /// First index to be used for the draw on the index buffer.
+ /// </summary>
+ public int FirstIndex;
+
+ /// <summary>
+ /// Number of indices to be used for the draw on the index buffer.
+ /// </summary>
+ public int IndexCount;
+
+ /// <summary>
+ /// First vertex used on non-indexed draws. This value is stored somewhere else on indexed draws.
+ /// </summary>
+ public int DrawFirstVertex;
+
+ /// <summary>
+ /// Vertex count used on non-indexed draws. Indexed draws have a index count instead.
+ /// </summary>
+ public int DrawVertexCount;
+
+ /// <summary>
+ /// Indicates if the next draw will be a indexed draw.
+ /// </summary>
+ public bool DrawIndexed;
+
+ /// <summary>
+ /// Indicates if the next draw will be a indirect draw.
+ /// </summary>
+ public bool DrawIndirect;
+
+ /// <summary>
+ /// Indicates if any of the currently used vertex shaders reads the instance ID.
+ /// </summary>
+ public bool VsUsesInstanceId;
+
+ /// <summary>
+ /// Indicates if any of the currently used vertex buffers is instanced.
+ /// </summary>
+ public bool IsAnyVbInstanced;
+
+ /// <summary>
+ /// Indicates that the draw is writing the base vertex, base instance and draw index to Constant Buffer 0.
+ /// </summary>
+ public bool HasConstantBufferDrawParameters;
+
+ /// <summary>
+ /// Primitive topology for the next draw.
+ /// </summary>
+ public PrimitiveTopology Topology;
+
+ /// <summary>
+ /// Index buffer data streamer for inline index buffer updates, such as those used in legacy OpenGL.
+ /// </summary>
+ public IbStreamer IbStreamer = new IbStreamer();
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/IbStreamer.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/IbStreamer.cs
new file mode 100644
index 00000000..80d8c00b
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/IbStreamer.cs
@@ -0,0 +1,194 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.GAL;
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Threed
+{
+ /// <summary>
+ /// Holds inline index buffer state.
+ /// The inline index buffer data is sent to the GPU through the command buffer.
+ /// </summary>
+ struct IbStreamer
+ {
+ private const int BufferCapacity = 256; // Must be a power of 2.
+
+ private BufferHandle _inlineIndexBuffer;
+ private int _inlineIndexBufferSize;
+ private int _inlineIndexCount;
+ private uint[] _buffer;
+ private int _bufferOffset;
+
+ /// <summary>
+ /// Indicates if any index buffer data has been pushed.
+ /// </summary>
+ public bool HasInlineIndexData => _inlineIndexCount != 0;
+
+ /// <summary>
+ /// Total numbers of indices that have been pushed.
+ /// </summary>
+ public int InlineIndexCount => _inlineIndexCount;
+
+ /// <summary>
+ /// Gets the handle for the host buffer currently holding the inline index buffer data.
+ /// </summary>
+ /// <returns>Host buffer handle</returns>
+ public BufferHandle GetInlineIndexBuffer()
+ {
+ return _inlineIndexBuffer;
+ }
+
+ /// <summary>
+ /// Gets the number of elements on the current inline index buffer,
+ /// while also reseting it to zero for the next draw.
+ /// </summary>
+ /// <param name="renderer">Host renderer</param>
+ /// <returns>Inline index bufffer count</returns>
+ public int GetAndResetInlineIndexCount(IRenderer renderer)
+ {
+ UpdateRemaining(renderer);
+ int temp = _inlineIndexCount;
+ _inlineIndexCount = 0;
+ return temp;
+ }
+
+ /// <summary>
+ /// Pushes four 8-bit index buffer elements.
+ /// </summary>
+ /// <param name="renderer">Host renderer</param>
+ /// <param name="argument">Method call argument</param>
+ public void VbElementU8(IRenderer renderer, int argument)
+ {
+ byte i0 = (byte)argument;
+ byte i1 = (byte)(argument >> 8);
+ byte i2 = (byte)(argument >> 16);
+ byte i3 = (byte)(argument >> 24);
+
+ int offset = _inlineIndexCount;
+
+ PushData(renderer, offset, i0);
+ PushData(renderer, offset + 1, i1);
+ PushData(renderer, offset + 2, i2);
+ PushData(renderer, offset + 3, i3);
+
+ _inlineIndexCount += 4;
+ }
+
+ /// <summary>
+ /// Pushes two 16-bit index buffer elements.
+ /// </summary>
+ /// <param name="renderer">Host renderer</param>
+ /// <param name="argument">Method call argument</param>
+ public void VbElementU16(IRenderer renderer, int argument)
+ {
+ ushort i0 = (ushort)argument;
+ ushort i1 = (ushort)(argument >> 16);
+
+ int offset = _inlineIndexCount;
+
+ PushData(renderer, offset, i0);
+ PushData(renderer, offset + 1, i1);
+
+ _inlineIndexCount += 2;
+ }
+
+ /// <summary>
+ /// Pushes one 32-bit index buffer element.
+ /// </summary>
+ /// <param name="renderer">Host renderer</param>
+ /// <param name="argument">Method call argument</param>
+ public void VbElementU32(IRenderer renderer, int argument)
+ {
+ uint i0 = (uint)argument;
+
+ int offset = _inlineIndexCount++;
+
+ PushData(renderer, offset, i0);
+ }
+
+ /// <summary>
+ /// Pushes a 32-bit value to the index buffer.
+ /// </summary>
+ /// <param name="renderer">Host renderer</param>
+ /// <param name="offset">Offset where the data should be written, in 32-bit words</param>
+ /// <param name="value">Index value to be written</param>
+ private void PushData(IRenderer renderer, int offset, uint value)
+ {
+ if (_buffer == null)
+ {
+ _buffer = new uint[BufferCapacity];
+ }
+
+ // We upload data in chunks.
+ // If we are at the start of a chunk, then the buffer might be full,
+ // in that case we need to submit any existing data before overwriting the buffer.
+ int subOffset = offset & (BufferCapacity - 1);
+
+ if (subOffset == 0 && offset != 0)
+ {
+ int baseOffset = (offset - BufferCapacity) * sizeof(uint);
+ BufferHandle buffer = GetInlineIndexBuffer(renderer, baseOffset, BufferCapacity * sizeof(uint));
+ renderer.SetBufferData(buffer, baseOffset, MemoryMarshal.Cast<uint, byte>(_buffer));
+ }
+
+ _buffer[subOffset] = value;
+ }
+
+ /// <summary>
+ /// Makes sure that any pending data is submitted to the GPU before the index buffer is used.
+ /// </summary>
+ /// <param name="renderer">Host renderer</param>
+ private void UpdateRemaining(IRenderer renderer)
+ {
+ int offset = _inlineIndexCount;
+ if (offset == 0)
+ {
+ return;
+ }
+
+ int count = offset & (BufferCapacity - 1);
+ if (count == 0)
+ {
+ count = BufferCapacity;
+ }
+
+ int baseOffset = (offset - count) * sizeof(uint);
+ int length = count * sizeof(uint);
+ BufferHandle buffer = GetInlineIndexBuffer(renderer, baseOffset, length);
+ renderer.SetBufferData(buffer, baseOffset, MemoryMarshal.Cast<uint, byte>(_buffer).Slice(0, length));
+ }
+
+ /// <summary>
+ /// Gets the handle of a buffer large enough to hold the data that will be written to <paramref name="offset"/>.
+ /// </summary>
+ /// <param name="renderer">Host renderer</param>
+ /// <param name="offset">Offset where the data will be written</param>
+ /// <param name="length">Number of bytes that will be written</param>
+ /// <returns>Buffer handle</returns>
+ private BufferHandle GetInlineIndexBuffer(IRenderer renderer, int offset, int length)
+ {
+ // Calculate a reasonable size for the buffer that can fit all the data,
+ // and that also won't require frequent resizes if we need to push more data.
+ int size = BitUtils.AlignUp(offset + length + 0x10, 0x200);
+
+ if (_inlineIndexBuffer == BufferHandle.Null)
+ {
+ _inlineIndexBuffer = renderer.CreateBuffer(size);
+ _inlineIndexBufferSize = size;
+ }
+ else if (_inlineIndexBufferSize < size)
+ {
+ BufferHandle oldBuffer = _inlineIndexBuffer;
+ int oldSize = _inlineIndexBufferSize;
+
+ _inlineIndexBuffer = renderer.CreateBuffer(size);
+ _inlineIndexBufferSize = size;
+
+ renderer.Pipeline.CopyBuffer(oldBuffer, _inlineIndexBuffer, 0, 0, oldSize);
+ renderer.DeleteBuffer(oldBuffer);
+ }
+
+ return _inlineIndexBuffer;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/IndirectDrawType.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/IndirectDrawType.cs
new file mode 100644
index 00000000..d78aa498
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/IndirectDrawType.cs
@@ -0,0 +1,38 @@
+namespace Ryujinx.Graphics.Gpu.Engine.Threed
+{
+ /// <summary>
+ /// Indirect draw type, which can be indexed or non-indexed, with or without a draw count.
+ /// </summary>
+ enum IndirectDrawType
+ {
+ /// <summary>
+ /// Non-indexed draw without draw count.
+ /// </summary>
+ DrawIndirect = 0,
+
+ /// <summary>
+ /// Indexed draw without draw count.
+ /// </summary>
+ DrawIndexedIndirect = Indexed,
+
+ /// <summary>
+ /// Non-indexed draw with draw count.
+ /// </summary>
+ DrawIndirectCount = Count,
+
+ /// <summary>
+ /// Indexed draw with draw count.
+ /// </summary>
+ DrawIndexedIndirectCount = Indexed | Count,
+
+ /// <summary>
+ /// Indexed flag.
+ /// </summary>
+ Indexed = 1 << 0,
+
+ /// <summary>
+ /// Draw count flag.
+ /// </summary>
+ Count = 1 << 1
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/RenderTargetUpdateFlags.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/RenderTargetUpdateFlags.cs
new file mode 100644
index 00000000..cf2e818c
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/RenderTargetUpdateFlags.cs
@@ -0,0 +1,41 @@
+using System;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Threed
+{
+ /// <summary>
+ /// Flags indicating how the render targets should be updated.
+ /// </summary>
+ [Flags]
+ enum RenderTargetUpdateFlags
+ {
+ /// <summary>
+ /// No flags.
+ /// </summary>
+ None = 0,
+
+ /// <summary>
+ /// Get render target index from the control register.
+ /// </summary>
+ UseControl = 1 << 0,
+
+ /// <summary>
+ /// Indicates that all render targets are 2D array textures.
+ /// </summary>
+ Layered = 1 << 1,
+
+ /// <summary>
+ /// Indicates that only a single color target will be used.
+ /// </summary>
+ SingleColor = 1 << 2,
+
+ /// <summary>
+ /// Indicates that the depth-stencil target will be used.
+ /// </summary>
+ UpdateDepthStencil = 1 << 3,
+
+ /// <summary>
+ /// Default update flags for draw.
+ /// </summary>
+ UpdateAll = UseControl | UpdateDepthStencil
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/SemaphoreUpdater.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/SemaphoreUpdater.cs
new file mode 100644
index 00000000..63a2c841
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/SemaphoreUpdater.cs
@@ -0,0 +1,190 @@
+using Ryujinx.Graphics.GAL;
+using System;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Threed
+{
+ /// <summary>
+ /// Semaphore updater.
+ /// </summary>
+ class SemaphoreUpdater
+ {
+ /// <summary>
+ /// GPU semaphore operation.
+ /// </summary>
+ private enum SemaphoreOperation
+ {
+ Release = 0,
+ Acquire = 1,
+ Counter = 2
+ }
+
+ /// <summary>
+ /// Counter type for GPU counter reset.
+ /// </summary>
+ private enum ResetCounterType
+ {
+ SamplesPassed = 1,
+ ZcullStats = 2,
+ TransformFeedbackPrimitivesWritten = 0x10,
+ InputVertices = 0x12,
+ InputPrimitives = 0x13,
+ VertexShaderInvocations = 0x15,
+ TessControlShaderInvocations = 0x16,
+ TessEvaluationShaderInvocations = 0x17,
+ TessEvaluationShaderPrimitives = 0x18,
+ GeometryShaderInvocations = 0x1a,
+ GeometryShaderPrimitives = 0x1b,
+ ClipperInputPrimitives = 0x1c,
+ ClipperOutputPrimitives = 0x1d,
+ FragmentShaderInvocations = 0x1e,
+ PrimitivesGenerated = 0x1f
+ }
+
+ /// <summary>
+ /// Counter type for GPU counter reporting.
+ /// </summary>
+ private enum ReportCounterType
+ {
+ Payload = 0,
+ InputVertices = 1,
+ InputPrimitives = 3,
+ VertexShaderInvocations = 5,
+ GeometryShaderInvocations = 7,
+ GeometryShaderPrimitives = 9,
+ ZcullStats0 = 0xa,
+ TransformFeedbackPrimitivesWritten = 0xb,
+ ZcullStats1 = 0xc,
+ ZcullStats2 = 0xe,
+ ClipperInputPrimitives = 0xf,
+ ZcullStats3 = 0x10,
+ ClipperOutputPrimitives = 0x11,
+ PrimitivesGenerated = 0x12,
+ FragmentShaderInvocations = 0x13,
+ SamplesPassed = 0x15,
+ TransformFeedbackOffset = 0x1a,
+ TessControlShaderInvocations = 0x1b,
+ TessEvaluationShaderInvocations = 0x1d,
+ TessEvaluationShaderPrimitives = 0x1f
+ }
+
+ private readonly GpuContext _context;
+ private readonly GpuChannel _channel;
+ private readonly DeviceStateWithShadow<ThreedClassState> _state;
+
+ /// <summary>
+ /// Creates a new instance of the semaphore updater.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="state">Channel state</param>
+ public SemaphoreUpdater(GpuContext context, GpuChannel channel, DeviceStateWithShadow<ThreedClassState> state)
+ {
+ _context = context;
+ _channel = channel;
+ _state = state;
+ }
+
+ /// <summary>
+ /// Resets the value of an internal GPU counter back to zero.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void ResetCounter(int argument)
+ {
+ ResetCounterType type = (ResetCounterType)argument;
+
+ switch (type)
+ {
+ case ResetCounterType.SamplesPassed:
+ _context.Renderer.ResetCounter(CounterType.SamplesPassed);
+ break;
+ case ResetCounterType.PrimitivesGenerated:
+ _context.Renderer.ResetCounter(CounterType.PrimitivesGenerated);
+ break;
+ case ResetCounterType.TransformFeedbackPrimitivesWritten:
+ _context.Renderer.ResetCounter(CounterType.TransformFeedbackPrimitivesWritten);
+ break;
+ }
+ }
+
+ /// <summary>
+ /// Writes a GPU counter to guest memory.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void Report(int argument)
+ {
+ SemaphoreOperation op = (SemaphoreOperation)(argument & 3);
+ ReportCounterType type = (ReportCounterType)((argument >> 23) & 0x1f);
+
+ switch (op)
+ {
+ case SemaphoreOperation.Release: ReleaseSemaphore(); break;
+ case SemaphoreOperation.Counter: ReportCounter(type); break;
+ }
+ }
+
+ /// <summary>
+ /// Writes (or Releases) a GPU semaphore value to guest memory.
+ /// </summary>
+ private void ReleaseSemaphore()
+ {
+ _channel.MemoryManager.Write(_state.State.SemaphoreAddress.Pack(), _state.State.SemaphorePayload);
+
+ _context.AdvanceSequence();
+ }
+
+ /// <summary>
+ /// Packed GPU counter data (including GPU timestamp) in memory.
+ /// </summary>
+ private struct CounterData
+ {
+ public ulong Counter;
+ public ulong Timestamp;
+ }
+
+ /// <summary>
+ /// Writes a GPU counter to guest memory.
+ /// This also writes the current timestamp value.
+ /// </summary>
+ /// <param name="type">Counter to be written to memory</param>
+ private void ReportCounter(ReportCounterType type)
+ {
+ ulong gpuVa = _state.State.SemaphoreAddress.Pack();
+
+ ulong ticks = _context.GetTimestamp();
+
+ ICounterEvent counter = null;
+
+ void resultHandler(object evt, ulong result)
+ {
+ CounterData counterData = new CounterData
+ {
+ Counter = result,
+ Timestamp = ticks
+ };
+
+ if (counter?.Invalid != true)
+ {
+ _channel.MemoryManager.Write(gpuVa, counterData);
+ }
+ }
+
+ switch (type)
+ {
+ case ReportCounterType.Payload:
+ resultHandler(null, (ulong)_state.State.SemaphorePayload);
+ break;
+ case ReportCounterType.SamplesPassed:
+ counter = _context.Renderer.ReportCounter(CounterType.SamplesPassed, resultHandler, false);
+ break;
+ case ReportCounterType.PrimitivesGenerated:
+ counter = _context.Renderer.ReportCounter(CounterType.PrimitivesGenerated, resultHandler, false);
+ break;
+ case ReportCounterType.TransformFeedbackPrimitivesWritten:
+ counter = _context.Renderer.ReportCounter(CounterType.TransformFeedbackPrimitivesWritten, resultHandler, false);
+ break;
+ }
+
+ _channel.MemoryManager.CounterCache.AddOrUpdate(gpuVa, counter);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/SpecializationStateUpdater.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/SpecializationStateUpdater.cs
new file mode 100644
index 00000000..a8af5497
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/SpecializationStateUpdater.cs
@@ -0,0 +1,346 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Engine.Types;
+using Ryujinx.Graphics.Gpu.Shader;
+using Ryujinx.Graphics.Shader;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Threed
+{
+ /// <summary>
+ /// Maintains a "current" specialiation state, and provides a flag to check if it has changed meaningfully.
+ /// </summary>
+ internal class SpecializationStateUpdater
+ {
+ private readonly GpuContext _context;
+ private GpuChannelGraphicsState _graphics;
+ private GpuChannelPoolState _pool;
+
+ private bool _usesDrawParameters;
+ private bool _usesTopology;
+
+ private bool _changed;
+
+ /// <summary>
+ /// Creates a new instance of the specialization state updater class.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ public SpecializationStateUpdater(GpuContext context)
+ {
+ _context = context;
+ }
+
+ /// <summary>
+ /// Signal that the specialization state has changed.
+ /// </summary>
+ private void Signal()
+ {
+ _changed = true;
+ }
+
+ /// <summary>
+ /// Checks if the specialization state has changed since the last check.
+ /// </summary>
+ /// <returns>True if it has changed, false otherwise</returns>
+ public bool HasChanged()
+ {
+ if (_changed)
+ {
+ _changed = false;
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ /// <summary>
+ /// Sets the active shader, clearing the dirty state and recording if certain specializations are noteworthy.
+ /// </summary>
+ /// <param name="gs">The active shader</param>
+ public void SetShader(CachedShaderProgram gs)
+ {
+ _usesDrawParameters = gs.Shaders[1]?.Info.UsesDrawParameters ?? false;
+ _usesTopology = gs.SpecializationState.IsPrimitiveTopologyQueried();
+
+ _changed = false;
+ }
+
+ /// <summary>
+ /// Get the current graphics state.
+ /// </summary>
+ /// <returns>GPU graphics state</returns>
+ public ref GpuChannelGraphicsState GetGraphicsState()
+ {
+ return ref _graphics;
+ }
+
+ /// <summary>
+ /// Get the current pool state.
+ /// </summary>
+ /// <returns>GPU pool state</returns>
+ public ref GpuChannelPoolState GetPoolState()
+ {
+ return ref _pool;
+ }
+
+ /// <summary>
+ /// Early Z force enable.
+ /// </summary>
+ /// <param name="value">The new value</param>
+ public void SetEarlyZForce(bool value)
+ {
+ _graphics.EarlyZForce = value;
+
+ Signal();
+ }
+
+ /// <summary>
+ /// Primitive topology of current draw.
+ /// </summary>
+ /// <param name="value">The new value</param>
+ public void SetTopology(PrimitiveTopology value)
+ {
+ if (value != _graphics.Topology)
+ {
+ _graphics.Topology = value;
+
+ if (_usesTopology)
+ {
+ Signal();
+ }
+ }
+ }
+
+ /// <summary>
+ /// Tessellation mode.
+ /// </summary>
+ /// <param name="value">The new value</param>
+ public void SetTessellationMode(TessMode value)
+ {
+ if (value.Packed != _graphics.TessellationMode.Packed)
+ {
+ _graphics.TessellationMode = value;
+
+ Signal();
+ }
+ }
+
+ /// <summary>
+ /// Updates alpha-to-coverage state, and sets it as changed.
+ /// </summary>
+ /// <param name="enable">Whether alpha-to-coverage is enabled</param>
+ /// <param name="ditherEnable">Whether alpha-to-coverage dithering is enabled</param>
+ public void SetAlphaToCoverageEnable(bool enable, bool ditherEnable)
+ {
+ _graphics.AlphaToCoverageEnable = enable;
+ _graphics.AlphaToCoverageDitherEnable = ditherEnable;
+
+ Signal();
+ }
+
+ /// <summary>
+ /// Indicates whether the viewport transform is disabled.
+ /// </summary>
+ /// <param name="value">The new value</param>
+ public void SetViewportTransformDisable(bool value)
+ {
+ if (value != _graphics.ViewportTransformDisable)
+ {
+ _graphics.ViewportTransformDisable = value;
+
+ Signal();
+ }
+ }
+
+ /// <summary>
+ /// Depth mode zero to one or minus one to one.
+ /// </summary>
+ /// <param name="value">The new value</param>
+ public void SetDepthMode(bool value)
+ {
+ if (value != _graphics.DepthMode)
+ {
+ _graphics.DepthMode = value;
+
+ Signal();
+ }
+ }
+
+ /// <summary>
+ /// Indicates if the point size is set on the shader or is fixed.
+ /// </summary>
+ /// <param name="value">The new value</param>
+ public void SetProgramPointSizeEnable(bool value)
+ {
+ if (value != _graphics.ProgramPointSizeEnable)
+ {
+ _graphics.ProgramPointSizeEnable = value;
+
+ Signal();
+ }
+ }
+
+ /// <summary>
+ /// Point size used if <see cref="SetProgramPointSizeEnable" /> is provided false.
+ /// </summary>
+ /// <param name="value">The new value</param>
+ public void SetPointSize(float value)
+ {
+ if (value != _graphics.PointSize)
+ {
+ _graphics.PointSize = value;
+
+ Signal();
+ }
+ }
+
+ /// <summary>
+ /// Updates alpha test specialization state, and sets it as changed.
+ /// </summary>
+ /// <param name="enable">Whether alpha test is enabled</param>
+ /// <param name="reference">The value to compare with the fragment output alpha</param>
+ /// <param name="op">The comparison that decides if the fragment should be discarded</param>
+ public void SetAlphaTest(bool enable, float reference, CompareOp op)
+ {
+ _graphics.AlphaTestEnable = enable;
+ _graphics.AlphaTestReference = reference;
+ _graphics.AlphaTestCompare = op;
+
+ Signal();
+ }
+
+ /// <summary>
+ /// Updates the type of the vertex attributes consumed by the shader.
+ /// </summary>
+ /// <param name="state">The new state</param>
+ public void SetAttributeTypes(ref Array32<VertexAttribState> state)
+ {
+ bool changed = false;
+ ref Array32<AttributeType> attributeTypes = ref _graphics.AttributeTypes;
+
+ for (int location = 0; location < state.Length; location++)
+ {
+ VertexAttribType type = state[location].UnpackType();
+
+ AttributeType value = type switch
+ {
+ VertexAttribType.Sint => AttributeType.Sint,
+ VertexAttribType.Uint => AttributeType.Uint,
+ _ => AttributeType.Float
+ };
+
+ if (attributeTypes[location] != value)
+ {
+ attributeTypes[location] = value;
+ changed = true;
+ }
+ }
+
+ if (changed)
+ {
+ Signal();
+ }
+ }
+
+ /// <summary>
+ /// Updates the type of the outputs produced by the fragment shader based on the current render target state.
+ /// </summary>
+ /// <param name="rtControl">The render target control register</param>
+ /// <param name="state">The color attachment state</param>
+ public void SetFragmentOutputTypes(RtControl rtControl, ref Array8<RtColorState> state)
+ {
+ bool changed = false;
+ int count = rtControl.UnpackCount();
+
+ for (int index = 0; index < Constants.TotalRenderTargets; index++)
+ {
+ int rtIndex = rtControl.UnpackPermutationIndex(index);
+
+ var colorState = state[rtIndex];
+
+ if (index < count && StateUpdater.IsRtEnabled(colorState))
+ {
+ Format format = colorState.Format.Convert().Format;
+
+ AttributeType type = format.IsInteger() ? (format.IsSint() ? AttributeType.Sint : AttributeType.Uint) : AttributeType.Float;
+
+ if (type != _graphics.FragmentOutputTypes[index])
+ {
+ _graphics.FragmentOutputTypes[index] = type;
+ changed = true;
+ }
+ }
+ }
+
+ if (changed && _context.Capabilities.NeedsFragmentOutputSpecialization)
+ {
+ Signal();
+ }
+ }
+
+ /// <summary>
+ /// Indicates that the draw is writing the base vertex, base instance and draw index to Constant Buffer 0.
+ /// </summary>
+ /// <param name="value">The new value</param>
+ public void SetHasConstantBufferDrawParameters(bool value)
+ {
+ if (value != _graphics.HasConstantBufferDrawParameters)
+ {
+ _graphics.HasConstantBufferDrawParameters = value;
+
+ if (_usesDrawParameters)
+ {
+ Signal();
+ }
+ }
+ }
+
+ /// <summary>
+ /// Indicates that any storage buffer use is unaligned.
+ /// </summary>
+ /// <param name="value">The new value</param>
+ /// <returns>True if the unaligned state changed, false otherwise</returns>
+ public bool SetHasUnalignedStorageBuffer(bool value)
+ {
+ if (value != _graphics.HasUnalignedStorageBuffer)
+ {
+ _graphics.HasUnalignedStorageBuffer = value;
+
+ Signal();
+
+ return true;
+ }
+
+ return false;
+ }
+
+ /// <summary>
+ /// Sets the GPU pool state.
+ /// </summary>
+ /// <param name="state">The new state</param>
+ public void SetPoolState(GpuChannelPoolState state)
+ {
+ if (!state.Equals(_pool))
+ {
+ _pool = state;
+
+ Signal();
+ }
+ }
+
+ /// <summary>
+ /// Sets the dual-source blend enabled state.
+ /// </summary>
+ /// <param name="enabled">True if blending is enabled and using dual-source blend</param>
+ public void SetDualSourceBlendEnabled(bool enabled)
+ {
+ if (enabled != _graphics.DualSourceBlendEnable)
+ {
+ _graphics.DualSourceBlendEnable = enabled;
+
+ Signal();
+ }
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdateTracker.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdateTracker.cs
new file mode 100644
index 00000000..7c730967
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdateTracker.cs
@@ -0,0 +1,177 @@
+using Ryujinx.Graphics.Device;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Diagnostics.CodeAnalysis;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Threed
+{
+ /// <summary>
+ /// State update callback entry, with the callback function and associated field names.
+ /// </summary>
+ readonly struct StateUpdateCallbackEntry
+ {
+ /// <summary>
+ /// Callback function, to be called if the register was written as the state needs to be updated.
+ /// </summary>
+ public Action Callback { get; }
+
+ /// <summary>
+ /// Name of the state fields (registers) associated with the callback function.
+ /// </summary>
+ public string[] FieldNames { get; }
+
+ /// <summary>
+ /// Creates a new state update callback entry.
+ /// </summary>
+ /// <param name="callback">Callback function, to be called if the register was written as the state needs to be updated</param>
+ /// <param name="fieldNames">Name of the state fields (registers) associated with the callback function</param>
+ public StateUpdateCallbackEntry(Action callback, params string[] fieldNames)
+ {
+ Callback = callback;
+ FieldNames = fieldNames;
+ }
+ }
+
+ /// <summary>
+ /// GPU state update tracker.
+ /// </summary>
+ /// <typeparam name="TState">State type</typeparam>
+ class StateUpdateTracker<[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicFields)] TState>
+ {
+ private const int BlockSize = 0xe00;
+ private const int RegisterSize = sizeof(uint);
+
+ private readonly byte[] _registerToGroupMapping;
+ private readonly Action[] _callbacks;
+ private ulong _dirtyMask;
+
+ /// <summary>
+ /// Creates a new instance of the state update tracker.
+ /// </summary>
+ /// <param name="entries">Update tracker callback entries</param>
+ public StateUpdateTracker(StateUpdateCallbackEntry[] entries)
+ {
+ _registerToGroupMapping = new byte[BlockSize];
+ _callbacks = new Action[entries.Length];
+
+ var fieldToDelegate = new Dictionary<string, int>();
+
+ for (int entryIndex = 0; entryIndex < entries.Length; entryIndex++)
+ {
+ var entry = entries[entryIndex];
+
+ foreach (var fieldName in entry.FieldNames)
+ {
+ fieldToDelegate.Add(fieldName, entryIndex);
+ }
+
+ _callbacks[entryIndex] = entry.Callback;
+ }
+
+ var fields = typeof(TState).GetFields();
+ int offset = 0;
+
+ for (int fieldIndex = 0; fieldIndex < fields.Length; fieldIndex++)
+ {
+ var field = fields[fieldIndex];
+
+ int sizeOfField = SizeCalculator.SizeOf(field.FieldType);
+
+ if (fieldToDelegate.TryGetValue(field.Name, out int entryIndex))
+ {
+ for (int i = 0; i < ((sizeOfField + 3) & ~3); i += 4)
+ {
+ _registerToGroupMapping[(offset + i) / RegisterSize] = (byte)(entryIndex + 1);
+ }
+ }
+
+ offset += sizeOfField;
+ }
+
+ Debug.Assert(offset == Unsafe.SizeOf<TState>());
+ }
+
+ /// <summary>
+ /// Sets a register as modified.
+ /// </summary>
+ /// <param name="offset">Register offset in bytes</param>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void SetDirty(int offset)
+ {
+ uint index = (uint)offset / RegisterSize;
+
+ if (index < BlockSize)
+ {
+ int groupIndex = Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(_registerToGroupMapping), (IntPtr)index);
+ if (groupIndex != 0)
+ {
+ groupIndex--;
+ _dirtyMask |= 1UL << groupIndex;
+ }
+ }
+ }
+
+ /// <summary>
+ /// Forces a register group as dirty, by index.
+ /// </summary>
+ /// <param name="groupIndex">Index of the group to be dirtied</param>
+ public void ForceDirty(int groupIndex)
+ {
+ if ((uint)groupIndex >= _callbacks.Length)
+ {
+ throw new ArgumentOutOfRangeException(nameof(groupIndex));
+ }
+
+ _dirtyMask |= 1UL << groupIndex;
+ }
+
+ /// <summary>
+ /// Forces all register groups as dirty, triggering a full update on the next call to <see cref="Update"/>.
+ /// </summary>
+ public void SetAllDirty()
+ {
+ Debug.Assert(_callbacks.Length <= sizeof(ulong) * 8);
+ _dirtyMask = ulong.MaxValue >> ((sizeof(ulong) * 8) - _callbacks.Length);
+ }
+
+ /// <summary>
+ /// Check if the given register group is dirty without clearing it.
+ /// </summary>
+ /// <param name="groupIndex">Index of the group to check</param>
+ /// <returns>True if dirty, false otherwise</returns>
+ public bool IsDirty(int groupIndex)
+ {
+ return (_dirtyMask & (1UL << groupIndex)) != 0;
+ }
+
+ /// <summary>
+ /// Check all the groups specified by <paramref name="checkMask"/> for modification, and update if modified.
+ /// </summary>
+ /// <param name="checkMask">Mask, where each bit set corresponds to a group index that should be checked</param>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void Update(ulong checkMask)
+ {
+ ulong mask = _dirtyMask & checkMask;
+ if (mask == 0)
+ {
+ return;
+ }
+
+ do
+ {
+ int groupIndex = BitOperations.TrailingZeroCount(mask);
+
+ _callbacks[groupIndex]();
+
+ mask &= ~(1UL << groupIndex);
+ }
+ while (mask != 0);
+
+ _dirtyMask &= ~checkMask;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs
new file mode 100644
index 00000000..00e09a31
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs
@@ -0,0 +1,1448 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Engine.Threed.Blender;
+using Ryujinx.Graphics.Gpu.Engine.Types;
+using Ryujinx.Graphics.Gpu.Image;
+using Ryujinx.Graphics.Gpu.Shader;
+using Ryujinx.Graphics.Shader;
+using Ryujinx.Graphics.Texture;
+using System;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Threed
+{
+ /// <summary>
+ /// GPU state updater.
+ /// </summary>
+ class StateUpdater
+ {
+ public const int ShaderStateIndex = 26;
+ public const int RasterizerStateIndex = 15;
+ public const int ScissorStateIndex = 16;
+ public const int VertexBufferStateIndex = 0;
+ public const int PrimitiveRestartStateIndex = 12;
+ public const int RenderTargetStateIndex = 27;
+
+ private readonly GpuContext _context;
+ private readonly GpuChannel _channel;
+ private readonly DeviceStateWithShadow<ThreedClassState> _state;
+ private readonly DrawState _drawState;
+ private readonly AdvancedBlendManager _blendManager;
+
+ private readonly StateUpdateTracker<ThreedClassState> _updateTracker;
+
+ private readonly ShaderProgramInfo[] _currentProgramInfo;
+ private ShaderSpecializationState _shaderSpecState;
+ private SpecializationStateUpdater _currentSpecState;
+
+ private ProgramPipelineState _pipeline;
+
+ private bool _vsUsesDrawParameters;
+ private bool _vtgWritesRtLayer;
+ private byte _vsClipDistancesWritten;
+ private uint _vbEnableMask;
+
+ private bool _prevDrawIndexed;
+ private bool _prevDrawIndirect;
+ private IndexType _prevIndexType;
+ private uint _prevFirstVertex;
+ private bool _prevTfEnable;
+
+ private uint _prevRtNoAlphaMask;
+
+ /// <summary>
+ /// Creates a new instance of the state updater.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="state">3D engine state</param>
+ /// <param name="drawState">Draw state</param>
+ /// <param name="blendManager">Advanced blend manager</param>
+ /// <param name="spec">Specialization state updater</param>
+ public StateUpdater(
+ GpuContext context,
+ GpuChannel channel,
+ DeviceStateWithShadow<ThreedClassState> state,
+ DrawState drawState,
+ AdvancedBlendManager blendManager,
+ SpecializationStateUpdater spec)
+ {
+ _context = context;
+ _channel = channel;
+ _state = state;
+ _drawState = drawState;
+ _blendManager = blendManager;
+ _currentProgramInfo = new ShaderProgramInfo[Constants.ShaderStages];
+ _currentSpecState = spec;
+
+ // ShaderState must be updated after other state updates, as specialization/pipeline state is used when fetching shaders.
+ // Render target state must appear after shader state as it depends on information from the currently bound shader.
+ // Rasterizer and scissor states are checked by render target clear, their indexes
+ // must be updated on the constants "RasterizerStateIndex" and "ScissorStateIndex" if modified.
+ // The vertex buffer state may be forced dirty when a indexed draw starts, the "VertexBufferStateIndex"
+ // constant must be updated if modified.
+ // The order of the other state updates doesn't matter.
+ _updateTracker = new StateUpdateTracker<ThreedClassState>(new[]
+ {
+ new StateUpdateCallbackEntry(UpdateVertexBufferState,
+ nameof(ThreedClassState.VertexBufferDrawState),
+ nameof(ThreedClassState.VertexBufferInstanced),
+ nameof(ThreedClassState.VertexBufferState),
+ nameof(ThreedClassState.VertexBufferEndAddress)),
+
+ // Must be done after vertex buffer updates.
+ new StateUpdateCallbackEntry(UpdateVertexAttribState, nameof(ThreedClassState.VertexAttribState)),
+
+ new StateUpdateCallbackEntry(UpdateBlendState,
+ nameof(ThreedClassState.BlendUcodeEnable),
+ nameof(ThreedClassState.BlendUcodeSize),
+ nameof(ThreedClassState.BlendIndependent),
+ nameof(ThreedClassState.BlendConstant),
+ nameof(ThreedClassState.BlendStateCommon),
+ nameof(ThreedClassState.BlendEnableCommon),
+ nameof(ThreedClassState.BlendEnable),
+ nameof(ThreedClassState.BlendState)),
+
+ new StateUpdateCallbackEntry(UpdateFaceState, nameof(ThreedClassState.FaceState)),
+
+ new StateUpdateCallbackEntry(UpdateStencilTestState,
+ nameof(ThreedClassState.StencilBackMasks),
+ nameof(ThreedClassState.StencilTestState),
+ nameof(ThreedClassState.StencilBackTestState)),
+
+ new StateUpdateCallbackEntry(UpdateDepthTestState,
+ nameof(ThreedClassState.DepthTestEnable),
+ nameof(ThreedClassState.DepthWriteEnable),
+ nameof(ThreedClassState.DepthTestFunc)),
+
+ new StateUpdateCallbackEntry(UpdateTessellationState,
+ nameof(ThreedClassState.TessMode),
+ nameof(ThreedClassState.TessOuterLevel),
+ nameof(ThreedClassState.TessInnerLevel),
+ nameof(ThreedClassState.PatchVertices)),
+
+ new StateUpdateCallbackEntry(UpdateViewportTransform,
+ nameof(ThreedClassState.DepthMode),
+ nameof(ThreedClassState.ViewportTransform),
+ nameof(ThreedClassState.ViewportExtents),
+ nameof(ThreedClassState.YControl),
+ nameof(ThreedClassState.ViewportTransformEnable)),
+
+ new StateUpdateCallbackEntry(UpdateLogicOpState, nameof(ThreedClassState.LogicOpState)),
+
+ new StateUpdateCallbackEntry(UpdateDepthClampState, nameof(ThreedClassState.ViewVolumeClipControl)),
+
+ new StateUpdateCallbackEntry(UpdatePolygonMode,
+ nameof(ThreedClassState.PolygonModeFront),
+ nameof(ThreedClassState.PolygonModeBack)),
+
+ new StateUpdateCallbackEntry(UpdateDepthBiasState,
+ nameof(ThreedClassState.DepthBiasState),
+ nameof(ThreedClassState.DepthBiasFactor),
+ nameof(ThreedClassState.DepthBiasUnits),
+ nameof(ThreedClassState.DepthBiasClamp)),
+
+ new StateUpdateCallbackEntry(UpdatePrimitiveRestartState, nameof(ThreedClassState.PrimitiveRestartState)),
+
+ new StateUpdateCallbackEntry(UpdateLineState,
+ nameof(ThreedClassState.LineWidthSmooth),
+ nameof(ThreedClassState.LineSmoothEnable)),
+
+ new StateUpdateCallbackEntry(UpdateRtColorMask,
+ nameof(ThreedClassState.RtColorMaskShared),
+ nameof(ThreedClassState.RtColorMask)),
+
+ new StateUpdateCallbackEntry(UpdateRasterizerState, nameof(ThreedClassState.RasterizeEnable)),
+
+ new StateUpdateCallbackEntry(UpdateScissorState,
+ nameof(ThreedClassState.ScissorState),
+ nameof(ThreedClassState.ScreenScissorState)),
+
+ new StateUpdateCallbackEntry(UpdateTfBufferState, nameof(ThreedClassState.TfBufferState)),
+ new StateUpdateCallbackEntry(UpdateUserClipState, nameof(ThreedClassState.ClipDistanceEnable)),
+
+ new StateUpdateCallbackEntry(UpdateAlphaTestState,
+ nameof(ThreedClassState.AlphaTestEnable),
+ nameof(ThreedClassState.AlphaTestRef),
+ nameof(ThreedClassState.AlphaTestFunc)),
+
+ new StateUpdateCallbackEntry(UpdateSamplerPoolState,
+ nameof(ThreedClassState.SamplerPoolState),
+ nameof(ThreedClassState.SamplerIndex)),
+
+ new StateUpdateCallbackEntry(UpdateTexturePoolState, nameof(ThreedClassState.TexturePoolState)),
+
+ new StateUpdateCallbackEntry(UpdatePointState,
+ nameof(ThreedClassState.PointSize),
+ nameof(ThreedClassState.VertexProgramPointSize),
+ nameof(ThreedClassState.PointSpriteEnable),
+ nameof(ThreedClassState.PointCoordReplace)),
+
+ new StateUpdateCallbackEntry(UpdateIndexBufferState,
+ nameof(ThreedClassState.IndexBufferState),
+ nameof(ThreedClassState.IndexBufferCount)),
+
+ new StateUpdateCallbackEntry(UpdateMultisampleState,
+ nameof(ThreedClassState.AlphaToCoverageDitherEnable),
+ nameof(ThreedClassState.MultisampleControl)),
+
+ new StateUpdateCallbackEntry(UpdateEarlyZState,
+ nameof(ThreedClassState.EarlyZForce)),
+
+ new StateUpdateCallbackEntry(UpdateShaderState,
+ nameof(ThreedClassState.ShaderBaseAddress),
+ nameof(ThreedClassState.ShaderState)),
+
+ new StateUpdateCallbackEntry(UpdateRenderTargetState,
+ nameof(ThreedClassState.RtColorState),
+ nameof(ThreedClassState.RtDepthStencilState),
+ nameof(ThreedClassState.RtControl),
+ nameof(ThreedClassState.RtDepthStencilSize),
+ nameof(ThreedClassState.RtDepthStencilEnable)),
+ });
+ }
+
+ /// <summary>
+ /// Sets a register at a specific offset as dirty.
+ /// This must be called if the register value was modified.
+ /// </summary>
+ /// <param name="offset">Register offset</param>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void SetDirty(int offset)
+ {
+ _updateTracker.SetDirty(offset);
+ }
+
+ /// <summary>
+ /// Force all the guest state to be marked as dirty.
+ /// The next call to <see cref="Update"/> will update all the host state.
+ /// </summary>
+ public void SetAllDirty()
+ {
+ _updateTracker.SetAllDirty();
+ }
+
+ /// <summary>
+ /// Updates host state for any modified guest state, since the last time this function was called.
+ /// </summary>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void Update()
+ {
+ // The vertex buffer size is calculated using a different
+ // method when doing indexed draws, so we need to make sure
+ // to update the vertex buffers if we are doing a regular
+ // draw after a indexed one and vice-versa.
+ if (_drawState.DrawIndexed != _prevDrawIndexed)
+ {
+ _updateTracker.ForceDirty(VertexBufferStateIndex);
+
+ // If PrimitiveRestartDrawArrays is false and this is a non-indexed draw, we need to ensure primitive restart is disabled.
+ // If PrimitiveRestartDrawArrays is false and this is a indexed draw, we need to ensure primitive restart enable matches GPU state.
+ // If PrimitiveRestartDrawArrays is true, then primitive restart enable should always match GPU state.
+ // That is because "PrimitiveRestartDrawArrays" is not configurable on the backend, it is always
+ // true on OpenGL and always false on Vulkan.
+ if (!_state.State.PrimitiveRestartDrawArrays && _state.State.PrimitiveRestartState.Enable)
+ {
+ _updateTracker.ForceDirty(PrimitiveRestartStateIndex);
+ }
+
+ _prevDrawIndexed = _drawState.DrawIndexed;
+ }
+
+ // Some draw parameters are used to restrict the vertex buffer size,
+ // but they can't be used on indirect draws because their values are unknown in this case.
+ // When switching between indirect and non-indirect draw, we need to
+ // make sure the vertex buffer sizes are still correct.
+ if (_drawState.DrawIndirect != _prevDrawIndirect)
+ {
+ _updateTracker.ForceDirty(VertexBufferStateIndex);
+ }
+
+ // In some cases, the index type is also used to guess the
+ // vertex buffer size, so we must update it if the type changed too.
+ if (_drawState.DrawIndexed &&
+ (_prevIndexType != _state.State.IndexBufferState.Type ||
+ _prevFirstVertex != _state.State.FirstVertex))
+ {
+ _updateTracker.ForceDirty(VertexBufferStateIndex);
+ _prevIndexType = _state.State.IndexBufferState.Type;
+ _prevFirstVertex = _state.State.FirstVertex;
+ }
+
+ bool tfEnable = _state.State.TfEnable;
+
+ if (!tfEnable && _prevTfEnable)
+ {
+ _context.Renderer.Pipeline.EndTransformFeedback();
+ _prevTfEnable = false;
+ }
+
+ if (_updateTracker.IsDirty(RenderTargetStateIndex))
+ {
+ UpdateRenderTargetSpecialization();
+ }
+
+ _updateTracker.Update(ulong.MaxValue);
+
+ // If any state that the shader depends on changed,
+ // then we may need to compile/bind a different version
+ // of the shader for the new state.
+ if (_shaderSpecState != null && _currentSpecState.HasChanged())
+ {
+ if (!_shaderSpecState.MatchesGraphics(_channel, ref _currentSpecState.GetPoolState(), ref _currentSpecState.GetGraphicsState(), _vsUsesDrawParameters, false))
+ {
+ // Shader must be reloaded. _vtgWritesRtLayer should not change.
+ UpdateShaderState();
+ }
+ }
+
+ CommitBindings();
+
+ if (tfEnable && !_prevTfEnable)
+ {
+ _context.Renderer.Pipeline.BeginTransformFeedback(_drawState.Topology);
+ _prevTfEnable = true;
+ }
+ }
+
+ /// <summary>
+ /// Updates the host state for any modified guest state group with the respective bit set on <paramref name="mask"/>.
+ /// </summary>
+ /// <param name="mask">Mask, where each bit set corresponds to a group index that should be checked and updated</param>
+ public void Update(ulong mask)
+ {
+ _updateTracker.Update(mask);
+ }
+
+ /// <summary>
+ /// Ensures that the bindings are visible to the host GPU.
+ /// Note: this actually performs the binding using the host graphics API.
+ /// </summary>
+ private void CommitBindings()
+ {
+ UpdateStorageBuffers();
+
+ bool unalignedChanged = _currentSpecState.SetHasUnalignedStorageBuffer(_channel.BufferManager.HasUnalignedStorageBuffers);
+
+ if (!_channel.TextureManager.CommitGraphicsBindings(_shaderSpecState) || unalignedChanged)
+ {
+ // Shader must be reloaded. _vtgWritesRtLayer should not change.
+ UpdateShaderState();
+ }
+
+ _channel.BufferManager.CommitGraphicsBindings();
+ }
+
+ /// <summary>
+ /// Updates storage buffer bindings.
+ /// </summary>
+ private void UpdateStorageBuffers()
+ {
+ for (int stage = 0; stage < Constants.ShaderStages; stage++)
+ {
+ ShaderProgramInfo info = _currentProgramInfo[stage];
+
+ if (info == null)
+ {
+ continue;
+ }
+
+ for (int index = 0; index < info.SBuffers.Count; index++)
+ {
+ BufferDescriptor sb = info.SBuffers[index];
+
+ ulong sbDescAddress = _channel.BufferManager.GetGraphicsUniformBufferAddress(stage, 0);
+
+ int sbDescOffset = 0x110 + stage * 0x100 + sb.Slot * 0x10;
+
+ sbDescAddress += (ulong)sbDescOffset;
+
+ SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);
+
+ _channel.BufferManager.SetGraphicsStorageBuffer(stage, sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags);
+ }
+ }
+ }
+
+ /// <summary>
+ /// Updates tessellation state based on the guest GPU state.
+ /// </summary>
+ private void UpdateTessellationState()
+ {
+ _pipeline.PatchControlPoints = (uint)_state.State.PatchVertices;
+
+ _context.Renderer.Pipeline.SetPatchParameters(
+ _state.State.PatchVertices,
+ _state.State.TessOuterLevel.AsSpan(),
+ _state.State.TessInnerLevel.AsSpan());
+
+ _currentSpecState.SetTessellationMode(_state.State.TessMode);
+ }
+
+ /// <summary>
+ /// Updates transform feedback buffer state based on the guest GPU state.
+ /// </summary>
+ private void UpdateTfBufferState()
+ {
+ for (int index = 0; index < Constants.TotalTransformFeedbackBuffers; index++)
+ {
+ TfBufferState tfb = _state.State.TfBufferState[index];
+
+ if (!tfb.Enable)
+ {
+ _channel.BufferManager.SetTransformFeedbackBuffer(index, 0, 0);
+
+ continue;
+ }
+
+ _channel.BufferManager.SetTransformFeedbackBuffer(index, tfb.Address.Pack(), (uint)tfb.Size);
+ }
+ }
+
+ /// <summary>
+ /// Updates Rasterizer primitive discard state based on guest gpu state.
+ /// </summary>
+ private void UpdateRasterizerState()
+ {
+ bool enable = _state.State.RasterizeEnable;
+ _pipeline.RasterizerDiscard = !enable;
+ _context.Renderer.Pipeline.SetRasterizerDiscard(!enable);
+ }
+
+ /// <summary>
+ /// Updates render targets (color and depth-stencil buffers) based on current render target state.
+ /// </summary>
+ private void UpdateRenderTargetState()
+ {
+ UpdateRenderTargetState(RenderTargetUpdateFlags.UpdateAll);
+ }
+
+ /// <summary>
+ /// Updates render targets (color and depth-stencil buffers) based on current render target state.
+ /// </summary>
+ /// <param name="updateFlags">Flags indicating which render targets should be updated and how</param>
+ /// <param name="singleUse">If this is not -1, it indicates that only the given indexed target will be used.</param>
+ public void UpdateRenderTargetState(RenderTargetUpdateFlags updateFlags, int singleUse = -1)
+ {
+ var memoryManager = _channel.MemoryManager;
+ var rtControl = _state.State.RtControl;
+
+ bool useControl = updateFlags.HasFlag(RenderTargetUpdateFlags.UseControl);
+ bool layered = updateFlags.HasFlag(RenderTargetUpdateFlags.Layered);
+ bool singleColor = updateFlags.HasFlag(RenderTargetUpdateFlags.SingleColor);
+
+ int count = useControl ? rtControl.UnpackCount() : Constants.TotalRenderTargets;
+
+ var msaaMode = _state.State.RtMsaaMode;
+
+ int samplesInX = msaaMode.SamplesInX();
+ int samplesInY = msaaMode.SamplesInY();
+
+ var scissor = _state.State.ScreenScissorState;
+ Size sizeHint = new Size((scissor.X + scissor.Width) * samplesInX, (scissor.Y + scissor.Height) * samplesInY, 1);
+
+ int clipRegionWidth = int.MaxValue;
+ int clipRegionHeight = int.MaxValue;
+
+ bool changedScale = false;
+ uint rtNoAlphaMask = 0;
+
+ for (int index = 0; index < Constants.TotalRenderTargets; index++)
+ {
+ int rtIndex = useControl ? rtControl.UnpackPermutationIndex(index) : index;
+
+ var colorState = _state.State.RtColorState[rtIndex];
+
+ if (index >= count || !IsRtEnabled(colorState) || (singleColor && index != singleUse))
+ {
+ changedScale |= _channel.TextureManager.SetRenderTargetColor(index, null);
+
+ continue;
+ }
+
+ if (colorState.Format.NoAlpha())
+ {
+ rtNoAlphaMask |= 1u << index;
+ }
+
+ Image.Texture color = memoryManager.Physical.TextureCache.FindOrCreateTexture(
+ memoryManager,
+ colorState,
+ _vtgWritesRtLayer || layered,
+ samplesInX,
+ samplesInY,
+ sizeHint);
+
+ changedScale |= _channel.TextureManager.SetRenderTargetColor(index, color);
+
+ if (color != null)
+ {
+ if (clipRegionWidth > color.Width / samplesInX)
+ {
+ clipRegionWidth = color.Width / samplesInX;
+ }
+
+ if (clipRegionHeight > color.Height / samplesInY)
+ {
+ clipRegionHeight = color.Height / samplesInY;
+ }
+ }
+ }
+
+ bool dsEnable = _state.State.RtDepthStencilEnable;
+
+ Image.Texture depthStencil = null;
+
+ if (dsEnable && updateFlags.HasFlag(RenderTargetUpdateFlags.UpdateDepthStencil))
+ {
+ var dsState = _state.State.RtDepthStencilState;
+ var dsSize = _state.State.RtDepthStencilSize;
+
+ depthStencil = memoryManager.Physical.TextureCache.FindOrCreateTexture(
+ memoryManager,
+ dsState,
+ dsSize,
+ _vtgWritesRtLayer || layered,
+ samplesInX,
+ samplesInY,
+ sizeHint);
+
+ if (depthStencil != null)
+ {
+ if (clipRegionWidth > depthStencil.Width / samplesInX)
+ {
+ clipRegionWidth = depthStencil.Width / samplesInX;
+ }
+
+ if (clipRegionHeight > depthStencil.Height / samplesInY)
+ {
+ clipRegionHeight = depthStencil.Height / samplesInY;
+ }
+ }
+ }
+
+ changedScale |= _channel.TextureManager.SetRenderTargetDepthStencil(depthStencil);
+
+ if (changedScale)
+ {
+ float oldScale = _channel.TextureManager.RenderTargetScale;
+ _channel.TextureManager.UpdateRenderTargetScale(singleUse);
+
+ if (oldScale != _channel.TextureManager.RenderTargetScale)
+ {
+ _context.Renderer.Pipeline.SetRenderTargetScale(_channel.TextureManager.RenderTargetScale);
+
+ UpdateViewportTransform();
+ UpdateScissorState();
+ }
+ }
+
+ _channel.TextureManager.SetClipRegion(clipRegionWidth, clipRegionHeight);
+
+ if (useControl && _prevRtNoAlphaMask != rtNoAlphaMask)
+ {
+ _prevRtNoAlphaMask = rtNoAlphaMask;
+
+ UpdateBlendState();
+ }
+ }
+
+ /// <summary>
+ /// Updates specialization state based on render target state.
+ /// </summary>
+ public void UpdateRenderTargetSpecialization()
+ {
+ _currentSpecState.SetFragmentOutputTypes(_state.State.RtControl, ref _state.State.RtColorState);
+ }
+
+ /// <summary>
+ /// Checks if a render target color buffer is used.
+ /// </summary>
+ /// <param name="colorState">Color buffer information</param>
+ /// <returns>True if the specified buffer is enabled/used, false otherwise</returns>
+ internal static bool IsRtEnabled(RtColorState colorState)
+ {
+ // Colors are disabled by writing 0 to the format.
+ return colorState.Format != 0 && colorState.WidthOrStride != 0;
+ }
+
+ /// <summary>
+ /// Updates host scissor test state based on current GPU state.
+ /// </summary>
+ public void UpdateScissorState()
+ {
+ const int MinX = 0;
+ const int MinY = 0;
+ const int MaxW = 0xffff;
+ const int MaxH = 0xffff;
+
+ Span<Rectangle<int>> regions = stackalloc Rectangle<int>[Constants.TotalViewports];
+
+ for (int index = 0; index < Constants.TotalViewports; index++)
+ {
+ ScissorState scissor = _state.State.ScissorState[index];
+
+ bool enable = scissor.Enable && (scissor.X1 != MinX ||
+ scissor.Y1 != MinY ||
+ scissor.X2 != MaxW ||
+ scissor.Y2 != MaxH);
+
+ if (enable)
+ {
+ int x = scissor.X1;
+ int y = scissor.Y1;
+ int width = scissor.X2 - x;
+ int height = scissor.Y2 - y;
+
+ if (_state.State.YControl.HasFlag(YControl.NegateY))
+ {
+ ref var screenScissor = ref _state.State.ScreenScissorState;
+ y = screenScissor.Height - height - y;
+
+ if (y < 0)
+ {
+ height += y;
+ y = 0;
+ }
+ }
+
+ float scale = _channel.TextureManager.RenderTargetScale;
+ if (scale != 1f)
+ {
+ x = (int)(x * scale);
+ y = (int)(y * scale);
+ width = (int)MathF.Ceiling(width * scale);
+ height = (int)MathF.Ceiling(height * scale);
+ }
+
+ regions[index] = new Rectangle<int>(x, y, width, height);
+ }
+ else
+ {
+ regions[index] = new Rectangle<int>(MinX, MinY, MaxW, MaxH);
+ }
+ }
+
+ _context.Renderer.Pipeline.SetScissors(regions);
+ }
+
+ /// <summary>
+ /// Updates host depth clamp state based on current GPU state.
+ /// </summary>
+ /// <param name="state">Current GPU state</param>
+ private void UpdateDepthClampState()
+ {
+ ViewVolumeClipControl clip = _state.State.ViewVolumeClipControl;
+ bool clamp = (clip & ViewVolumeClipControl.DepthClampDisabled) == 0;
+
+ _pipeline.DepthClampEnable = clamp;
+ _context.Renderer.Pipeline.SetDepthClamp(clamp);
+ }
+
+ /// <summary>
+ /// Updates host alpha test state based on current GPU state.
+ /// </summary>
+ private void UpdateAlphaTestState()
+ {
+ _context.Renderer.Pipeline.SetAlphaTest(
+ _state.State.AlphaTestEnable,
+ _state.State.AlphaTestRef,
+ _state.State.AlphaTestFunc);
+
+ _currentSpecState.SetAlphaTest(
+ _state.State.AlphaTestEnable,
+ _state.State.AlphaTestRef,
+ _state.State.AlphaTestFunc);
+ }
+
+ /// <summary>
+ /// Updates host depth test state based on current GPU state.
+ /// </summary>
+ private void UpdateDepthTestState()
+ {
+ DepthTestDescriptor descriptor = new DepthTestDescriptor(
+ _state.State.DepthTestEnable,
+ _state.State.DepthWriteEnable,
+ _state.State.DepthTestFunc);
+
+ _pipeline.DepthTest = descriptor;
+ _context.Renderer.Pipeline.SetDepthTest(descriptor);
+ }
+
+ /// <summary>
+ /// Updates host viewport transform and clipping state based on current GPU state.
+ /// </summary>
+ private void UpdateViewportTransform()
+ {
+ var yControl = _state.State.YControl;
+ var face = _state.State.FaceState;
+
+ bool disableTransform = _state.State.ViewportTransformEnable == 0;
+
+ UpdateFrontFace(yControl, face.FrontFace);
+ UpdateDepthMode();
+
+ bool flipY = yControl.HasFlag(YControl.NegateY);
+
+ Span<Viewport> viewports = stackalloc Viewport[Constants.TotalViewports];
+
+ for (int index = 0; index < Constants.TotalViewports; index++)
+ {
+ if (disableTransform)
+ {
+ ref var scissor = ref _state.State.ScreenScissorState;
+
+ float rScale = _channel.TextureManager.RenderTargetScale;
+ var scissorRect = new Rectangle<float>(0, 0, (scissor.X + scissor.Width) * rScale, (scissor.Y + scissor.Height) * rScale);
+
+ viewports[index] = new Viewport(scissorRect, ViewportSwizzle.PositiveX, ViewportSwizzle.PositiveY, ViewportSwizzle.PositiveZ, ViewportSwizzle.PositiveW, 0, 1);
+ continue;
+ }
+
+ ref var transform = ref _state.State.ViewportTransform[index];
+ ref var extents = ref _state.State.ViewportExtents[index];
+
+ float scaleX = MathF.Abs(transform.ScaleX);
+ float scaleY = transform.ScaleY;
+
+ if (flipY)
+ {
+ scaleY = -scaleY;
+ }
+
+ if (!_context.Capabilities.SupportsViewportSwizzle && transform.UnpackSwizzleY() == ViewportSwizzle.NegativeY)
+ {
+ scaleY = -scaleY;
+ }
+
+ float x = transform.TranslateX - scaleX;
+ float y = transform.TranslateY - scaleY;
+
+ float width = scaleX * 2;
+ float height = scaleY * 2;
+
+ float scale = _channel.TextureManager.RenderTargetScale;
+ if (scale != 1f)
+ {
+ x *= scale;
+ y *= scale;
+ width *= scale;
+ height *= scale;
+ }
+
+ Rectangle<float> region = new Rectangle<float>(x, y, width, height);
+
+ ViewportSwizzle swizzleX = transform.UnpackSwizzleX();
+ ViewportSwizzle swizzleY = transform.UnpackSwizzleY();
+ ViewportSwizzle swizzleZ = transform.UnpackSwizzleZ();
+ ViewportSwizzle swizzleW = transform.UnpackSwizzleW();
+
+ float depthNear = extents.DepthNear;
+ float depthFar = extents.DepthFar;
+
+ if (transform.ScaleZ < 0)
+ {
+ float temp = depthNear;
+ depthNear = depthFar;
+ depthFar = temp;
+ }
+
+ viewports[index] = new Viewport(region, swizzleX, swizzleY, swizzleZ, swizzleW, depthNear, depthFar);
+ }
+
+ _context.Renderer.Pipeline.SetDepthMode(GetDepthMode());
+ _context.Renderer.Pipeline.SetViewports(viewports, disableTransform);
+
+ _currentSpecState.SetViewportTransformDisable(_state.State.ViewportTransformEnable == 0);
+ _currentSpecState.SetDepthMode(GetDepthMode() == DepthMode.MinusOneToOne);
+ }
+
+ /// <summary>
+ /// Updates the depth mode (0 to 1 or -1 to 1) based on the current viewport and depth mode register state.
+ /// </summary>
+ private void UpdateDepthMode()
+ {
+ _context.Renderer.Pipeline.SetDepthMode(GetDepthMode());
+ }
+
+ /// <summary>
+ /// Updates polygon mode state based on current GPU state.
+ /// </summary>
+ private void UpdatePolygonMode()
+ {
+ _context.Renderer.Pipeline.SetPolygonMode(_state.State.PolygonModeFront, _state.State.PolygonModeBack);
+ }
+
+ /// <summary>
+ /// Updates host depth bias (also called polygon offset) state based on current GPU state.
+ /// </summary>
+ private void UpdateDepthBiasState()
+ {
+ var depthBias = _state.State.DepthBiasState;
+
+ float factor = _state.State.DepthBiasFactor;
+ float units = _state.State.DepthBiasUnits;
+ float clamp = _state.State.DepthBiasClamp;
+
+ PolygonModeMask enables;
+
+ enables = (depthBias.PointEnable ? PolygonModeMask.Point : 0);
+ enables |= (depthBias.LineEnable ? PolygonModeMask.Line : 0);
+ enables |= (depthBias.FillEnable ? PolygonModeMask.Fill : 0);
+
+ _pipeline.BiasEnable = enables;
+ _context.Renderer.Pipeline.SetDepthBias(enables, factor, units / 2f, clamp);
+ }
+
+ /// <summary>
+ /// Updates host stencil test state based on current GPU state.
+ /// </summary>
+ private void UpdateStencilTestState()
+ {
+ var backMasks = _state.State.StencilBackMasks;
+ var test = _state.State.StencilTestState;
+ var backTest = _state.State.StencilBackTestState;
+
+ CompareOp backFunc;
+ StencilOp backSFail;
+ StencilOp backDpPass;
+ StencilOp backDpFail;
+ int backFuncRef;
+ int backFuncMask;
+ int backMask;
+
+ if (backTest.TwoSided)
+ {
+ backFunc = backTest.BackFunc;
+ backSFail = backTest.BackSFail;
+ backDpPass = backTest.BackDpPass;
+ backDpFail = backTest.BackDpFail;
+ backFuncRef = backMasks.FuncRef;
+ backFuncMask = backMasks.FuncMask;
+ backMask = backMasks.Mask;
+ }
+ else
+ {
+ backFunc = test.FrontFunc;
+ backSFail = test.FrontSFail;
+ backDpPass = test.FrontDpPass;
+ backDpFail = test.FrontDpFail;
+ backFuncRef = test.FrontFuncRef;
+ backFuncMask = test.FrontFuncMask;
+ backMask = test.FrontMask;
+ }
+
+ StencilTestDescriptor descriptor = new StencilTestDescriptor(
+ test.Enable,
+ test.FrontFunc,
+ test.FrontSFail,
+ test.FrontDpPass,
+ test.FrontDpFail,
+ test.FrontFuncRef,
+ test.FrontFuncMask,
+ test.FrontMask,
+ backFunc,
+ backSFail,
+ backDpPass,
+ backDpFail,
+ backFuncRef,
+ backFuncMask,
+ backMask);
+
+ _pipeline.StencilTest = descriptor;
+ _context.Renderer.Pipeline.SetStencilTest(descriptor);
+ }
+
+ /// <summary>
+ /// Updates user-defined clipping based on the guest GPU state.
+ /// </summary>
+ private void UpdateUserClipState()
+ {
+ uint clipMask = _state.State.ClipDistanceEnable & _vsClipDistancesWritten;
+
+ for (int i = 0; i < Constants.TotalClipDistances; ++i)
+ {
+ _context.Renderer.Pipeline.SetUserClipDistance(i, (clipMask & (1 << i)) != 0);
+ }
+ }
+
+ /// <summary>
+ /// Updates current sampler pool address and size based on guest GPU state.
+ /// </summary>
+ private void UpdateSamplerPoolState()
+ {
+ var texturePool = _state.State.TexturePoolState;
+ var samplerPool = _state.State.SamplerPoolState;
+
+ var samplerIndex = _state.State.SamplerIndex;
+
+ int maximumId = samplerIndex == SamplerIndex.ViaHeaderIndex
+ ? texturePool.MaximumId
+ : samplerPool.MaximumId;
+
+ _channel.TextureManager.SetGraphicsSamplerPool(samplerPool.Address.Pack(), maximumId, samplerIndex);
+ }
+
+ /// <summary>
+ /// Updates current texture pool address and size based on guest GPU state.
+ /// </summary>
+ private void UpdateTexturePoolState()
+ {
+ var texturePool = _state.State.TexturePoolState;
+
+ _channel.TextureManager.SetGraphicsTexturePool(texturePool.Address.Pack(), texturePool.MaximumId);
+ _channel.TextureManager.SetGraphicsTextureBufferIndex((int)_state.State.TextureBufferIndex);
+
+ _currentSpecState.SetPoolState(GetPoolState());
+ }
+
+ /// <summary>
+ /// Updates host vertex attributes based on guest GPU state.
+ /// </summary>
+ private void UpdateVertexAttribState()
+ {
+ uint vbEnableMask = _vbEnableMask;
+
+ Span<VertexAttribDescriptor> vertexAttribs = stackalloc VertexAttribDescriptor[Constants.TotalVertexAttribs];
+
+ for (int index = 0; index < Constants.TotalVertexAttribs; index++)
+ {
+ var vertexAttrib = _state.State.VertexAttribState[index];
+
+ int bufferIndex = vertexAttrib.UnpackBufferIndex();
+
+ if ((vbEnableMask & (1u << bufferIndex)) == 0)
+ {
+ // Using a vertex buffer that doesn't exist is invalid, so let's use a dummy attribute for those cases.
+ vertexAttribs[index] = new VertexAttribDescriptor(0, 0, true, Format.R32G32B32A32Float);
+ continue;
+ }
+
+ if (!FormatTable.TryGetAttribFormat(vertexAttrib.UnpackFormat(), out Format format))
+ {
+ Logger.Debug?.Print(LogClass.Gpu, $"Invalid attribute format 0x{vertexAttrib.UnpackFormat():X}.");
+
+ format = vertexAttrib.UnpackType() switch
+ {
+ VertexAttribType.Sint => Format.R32G32B32A32Sint,
+ VertexAttribType.Uint => Format.R32G32B32A32Uint,
+ _ => Format.R32G32B32A32Float
+ };
+ }
+
+ vertexAttribs[index] = new VertexAttribDescriptor(
+ bufferIndex,
+ vertexAttrib.UnpackOffset(),
+ vertexAttrib.UnpackIsConstant(),
+ format);
+ }
+
+ _pipeline.SetVertexAttribs(vertexAttribs);
+ _context.Renderer.Pipeline.SetVertexAttribs(vertexAttribs);
+ _currentSpecState.SetAttributeTypes(ref _state.State.VertexAttribState);
+ }
+
+ /// <summary>
+ /// Updates host line width based on guest GPU state.
+ /// </summary>
+ private void UpdateLineState()
+ {
+ float width = _state.State.LineWidthSmooth;
+ bool smooth = _state.State.LineSmoothEnable;
+
+ _pipeline.LineWidth = width;
+ _context.Renderer.Pipeline.SetLineParameters(width, smooth);
+ }
+
+ /// <summary>
+ /// Updates host point size based on guest GPU state.
+ /// </summary>
+ private void UpdatePointState()
+ {
+ float size = _state.State.PointSize;
+ bool isProgramPointSize = _state.State.VertexProgramPointSize;
+ bool enablePointSprite = _state.State.PointSpriteEnable;
+
+ // TODO: Need to figure out a way to map PointCoordReplace enable bit.
+ Origin origin = (_state.State.PointCoordReplace & 4) == 0 ? Origin.LowerLeft : Origin.UpperLeft;
+
+ _context.Renderer.Pipeline.SetPointParameters(size, isProgramPointSize, enablePointSprite, origin);
+
+ _currentSpecState.SetProgramPointSizeEnable(isProgramPointSize);
+ _currentSpecState.SetPointSize(size);
+ }
+
+ /// <summary>
+ /// Updates host primitive restart based on guest GPU state.
+ /// </summary>
+ private void UpdatePrimitiveRestartState()
+ {
+ PrimitiveRestartState primitiveRestart = _state.State.PrimitiveRestartState;
+ bool enable = primitiveRestart.Enable && (_drawState.DrawIndexed || _state.State.PrimitiveRestartDrawArrays);
+
+ _pipeline.PrimitiveRestartEnable = enable;
+ _context.Renderer.Pipeline.SetPrimitiveRestart(enable, primitiveRestart.Index);
+ }
+
+ /// <summary>
+ /// Updates host index buffer binding based on guest GPU state.
+ /// </summary>
+ private void UpdateIndexBufferState()
+ {
+ var indexBuffer = _state.State.IndexBufferState;
+
+ if (_drawState.IndexCount == 0)
+ {
+ return;
+ }
+
+ ulong gpuVa = indexBuffer.Address.Pack();
+
+ // Do not use the end address to calculate the size, because
+ // the result may be much larger than the real size of the index buffer.
+ ulong size = (ulong)(_drawState.FirstIndex + _drawState.IndexCount);
+
+ switch (indexBuffer.Type)
+ {
+ case IndexType.UShort: size *= 2; break;
+ case IndexType.UInt: size *= 4; break;
+ }
+
+ _channel.BufferManager.SetIndexBuffer(gpuVa, size, indexBuffer.Type);
+ }
+
+ /// <summary>
+ /// Updates host vertex buffer bindings based on guest GPU state.
+ /// </summary>
+ private void UpdateVertexBufferState()
+ {
+ IndexType indexType = _state.State.IndexBufferState.Type;
+ bool indexTypeSmall = indexType == IndexType.UByte || indexType == IndexType.UShort;
+
+ _drawState.IsAnyVbInstanced = false;
+
+ bool drawIndexed = _drawState.DrawIndexed;
+ bool drawIndirect = _drawState.DrawIndirect;
+ int drawFirstVertex = _drawState.DrawFirstVertex;
+ int drawVertexCount = _drawState.DrawVertexCount;
+ uint vbEnableMask = 0;
+
+ for (int index = 0; index < Constants.TotalVertexBuffers; index++)
+ {
+ var vertexBuffer = _state.State.VertexBufferState[index];
+
+ if (!vertexBuffer.UnpackEnable())
+ {
+ _pipeline.VertexBuffers[index] = new BufferPipelineDescriptor(false, 0, 0);
+ _channel.BufferManager.SetVertexBuffer(index, 0, 0, 0, 0);
+
+ continue;
+ }
+
+ GpuVa endAddress = _state.State.VertexBufferEndAddress[index];
+
+ ulong address = vertexBuffer.Address.Pack();
+
+ if (_channel.MemoryManager.IsMapped(address))
+ {
+ vbEnableMask |= 1u << index;
+ }
+
+ int stride = vertexBuffer.UnpackStride();
+
+ bool instanced = _state.State.VertexBufferInstanced[index];
+
+ int divisor = instanced ? vertexBuffer.Divisor : 0;
+
+ _drawState.IsAnyVbInstanced |= divisor != 0;
+
+ ulong vbSize = endAddress.Pack() - address + 1;
+ ulong size;
+
+ if (_drawState.IbStreamer.HasInlineIndexData || drawIndexed || stride == 0 || instanced)
+ {
+ // This size may be (much) larger than the real vertex buffer size.
+ // Avoid calculating it this way, unless we don't have any other option.
+
+ size = vbSize;
+
+ if (stride > 0 && indexTypeSmall && drawIndexed && !drawIndirect && !instanced)
+ {
+ // If the index type is a small integer type, then we might be still able
+ // to reduce the vertex buffer size based on the maximum possible index value.
+
+ ulong maxVertexBufferSize = indexType == IndexType.UByte ? 0x100UL : 0x10000UL;
+
+ maxVertexBufferSize += _state.State.FirstVertex;
+ maxVertexBufferSize *= (uint)stride;
+
+ size = Math.Min(size, maxVertexBufferSize);
+ }
+ }
+ else
+ {
+ // For non-indexed draws, we can guess the size from the vertex count
+ // and stride.
+
+ int firstInstance = (int)_state.State.FirstInstance;
+
+ size = Math.Min(vbSize, (ulong)((firstInstance + drawFirstVertex + drawVertexCount) * stride));
+ }
+
+ _pipeline.VertexBuffers[index] = new BufferPipelineDescriptor(_channel.MemoryManager.IsMapped(address), stride, divisor);
+ _channel.BufferManager.SetVertexBuffer(index, address, size, stride, divisor);
+ }
+
+ if (_vbEnableMask != vbEnableMask)
+ {
+ _vbEnableMask = vbEnableMask;
+ UpdateVertexAttribState();
+ }
+ }
+
+ /// <summary>
+ /// Updates host face culling and orientation based on guest GPU state.
+ /// </summary>
+ private void UpdateFaceState()
+ {
+ var yControl = _state.State.YControl;
+ var face = _state.State.FaceState;
+
+ _pipeline.CullEnable = face.CullEnable;
+ _pipeline.CullMode = face.CullFace;
+ _context.Renderer.Pipeline.SetFaceCulling(face.CullEnable, face.CullFace);
+
+ UpdateFrontFace(yControl, face.FrontFace);
+ }
+
+ /// <summary>
+ /// Updates the front face based on the current front face and the origin.
+ /// </summary>
+ /// <param name="yControl">Y control register value, where the origin is located</param>
+ /// <param name="frontFace">Front face</param>
+ private void UpdateFrontFace(YControl yControl, FrontFace frontFace)
+ {
+ bool isUpperLeftOrigin = !yControl.HasFlag(YControl.TriangleRastFlip);
+
+ if (isUpperLeftOrigin)
+ {
+ frontFace = frontFace == FrontFace.CounterClockwise ? FrontFace.Clockwise : FrontFace.CounterClockwise;
+ }
+
+ _pipeline.FrontFace = frontFace;
+ _context.Renderer.Pipeline.SetFrontFace(frontFace);
+ }
+
+ /// <summary>
+ /// Updates host render target color masks, based on guest GPU state.
+ /// This defines which color channels are written to each color buffer.
+ /// </summary>
+ private void UpdateRtColorMask()
+ {
+ bool rtColorMaskShared = _state.State.RtColorMaskShared;
+
+ Span<uint> componentMasks = stackalloc uint[Constants.TotalRenderTargets];
+
+ for (int index = 0; index < Constants.TotalRenderTargets; index++)
+ {
+ var colorMask = _state.State.RtColorMask[rtColorMaskShared ? 0 : index];
+
+ uint componentMask;
+
+ componentMask = (colorMask.UnpackRed() ? 1u : 0u);
+ componentMask |= (colorMask.UnpackGreen() ? 2u : 0u);
+ componentMask |= (colorMask.UnpackBlue() ? 4u : 0u);
+ componentMask |= (colorMask.UnpackAlpha() ? 8u : 0u);
+
+ componentMasks[index] = componentMask;
+ _pipeline.ColorWriteMask[index] = componentMask;
+ }
+
+ _context.Renderer.Pipeline.SetRenderTargetColorMasks(componentMasks);
+ }
+
+ /// <summary>
+ /// Updates host render target color buffer blending state, based on guest state.
+ /// </summary>
+ private void UpdateBlendState()
+ {
+ if (_state.State.BlendUcodeEnable != BlendUcodeEnable.Disabled)
+ {
+ if (_context.Capabilities.SupportsBlendEquationAdvanced && _blendManager.TryGetAdvancedBlend(out var blendDescriptor))
+ {
+ // Try to HLE it using advanced blend on the host if we can.
+ _context.Renderer.Pipeline.SetBlendState(blendDescriptor);
+ return;
+ }
+ else
+ {
+ // TODO: Blend emulation fallback.
+ }
+ }
+
+ bool blendIndependent = _state.State.BlendIndependent;
+ ColorF blendConstant = _state.State.BlendConstant;
+
+ bool dualSourceBlendEnabled = false;
+
+ if (blendIndependent)
+ {
+ for (int index = 0; index < Constants.TotalRenderTargets; index++)
+ {
+ bool enable = _state.State.BlendEnable[index];
+ var blend = _state.State.BlendState[index];
+
+ var descriptor = new BlendDescriptor(
+ enable,
+ blendConstant,
+ blend.ColorOp,
+ FilterBlendFactor(blend.ColorSrcFactor, index),
+ FilterBlendFactor(blend.ColorDstFactor, index),
+ blend.AlphaOp,
+ FilterBlendFactor(blend.AlphaSrcFactor, index),
+ FilterBlendFactor(blend.AlphaDstFactor, index));
+
+ if (enable &&
+ (blend.ColorSrcFactor.IsDualSource() ||
+ blend.ColorDstFactor.IsDualSource() ||
+ blend.AlphaSrcFactor.IsDualSource() ||
+ blend.AlphaDstFactor.IsDualSource()))
+ {
+ dualSourceBlendEnabled = true;
+ }
+
+ _pipeline.BlendDescriptors[index] = descriptor;
+ _context.Renderer.Pipeline.SetBlendState(index, descriptor);
+ }
+ }
+ else
+ {
+ bool enable = _state.State.BlendEnable[0];
+ var blend = _state.State.BlendStateCommon;
+
+ var descriptor = new BlendDescriptor(
+ enable,
+ blendConstant,
+ blend.ColorOp,
+ FilterBlendFactor(blend.ColorSrcFactor, 0),
+ FilterBlendFactor(blend.ColorDstFactor, 0),
+ blend.AlphaOp,
+ FilterBlendFactor(blend.AlphaSrcFactor, 0),
+ FilterBlendFactor(blend.AlphaDstFactor, 0));
+
+ if (enable &&
+ (blend.ColorSrcFactor.IsDualSource() ||
+ blend.ColorDstFactor.IsDualSource() ||
+ blend.AlphaSrcFactor.IsDualSource() ||
+ blend.AlphaDstFactor.IsDualSource()))
+ {
+ dualSourceBlendEnabled = true;
+ }
+
+ for (int index = 0; index < Constants.TotalRenderTargets; index++)
+ {
+ _pipeline.BlendDescriptors[index] = descriptor;
+ _context.Renderer.Pipeline.SetBlendState(index, descriptor);
+ }
+ }
+
+ _currentSpecState.SetDualSourceBlendEnabled(dualSourceBlendEnabled);
+ }
+
+ /// <summary>
+ /// Gets a blend factor for the color target currently.
+ /// This will return <paramref name="factor"/> unless the target format has no alpha component,
+ /// in which case it will replace destination alpha factor with a constant factor of one or zero.
+ /// </summary>
+ /// <param name="factor">Input factor</param>
+ /// <param name="index">Color target index</param>
+ /// <returns>New blend factor</returns>
+ private BlendFactor FilterBlendFactor(BlendFactor factor, int index)
+ {
+ // If any color target format without alpha is being used, we need to make sure that
+ // if blend is active, it will not use destination alpha as a factor.
+ // That is required because RGBX formats are emulated using host RGBA formats.
+
+ if (_state.State.RtColorState[index].Format.NoAlpha())
+ {
+ switch (factor)
+ {
+ case BlendFactor.DstAlpha:
+ case BlendFactor.DstAlphaGl:
+ factor = BlendFactor.One;
+ break;
+ case BlendFactor.OneMinusDstAlpha:
+ case BlendFactor.OneMinusDstAlphaGl:
+ factor = BlendFactor.Zero;
+ break;
+ }
+ }
+
+ return factor;
+ }
+
+ /// <summary>
+ /// Updates host logical operation state, based on guest state.
+ /// </summary>
+ private void UpdateLogicOpState()
+ {
+ LogicalOpState logicOpState = _state.State.LogicOpState;
+
+ _pipeline.SetLogicOpState(logicOpState.Enable, logicOpState.LogicalOp);
+ _context.Renderer.Pipeline.SetLogicOpState(logicOpState.Enable, logicOpState.LogicalOp);
+ }
+
+ /// <summary>
+ /// Updates multisample state, based on guest state.
+ /// </summary>
+ private void UpdateMultisampleState()
+ {
+ bool alphaToCoverageEnable = (_state.State.MultisampleControl & 1) != 0;
+ bool alphaToOneEnable = (_state.State.MultisampleControl & 0x10) != 0;
+
+ _context.Renderer.Pipeline.SetMultisampleState(new MultisampleDescriptor(
+ alphaToCoverageEnable,
+ _state.State.AlphaToCoverageDitherEnable,
+ alphaToOneEnable));
+
+ _currentSpecState.SetAlphaToCoverageEnable(alphaToCoverageEnable, _state.State.AlphaToCoverageDitherEnable);
+ }
+
+ /// <summary>
+ /// Updates the early z flag, based on guest state.
+ /// </summary>
+ private void UpdateEarlyZState()
+ {
+ _currentSpecState.SetEarlyZForce(_state.State.EarlyZForce);
+ }
+
+ /// <summary>
+ /// Updates host shaders based on the guest GPU state.
+ /// </summary>
+ private void UpdateShaderState()
+ {
+ var shaderCache = _channel.MemoryManager.Physical.ShaderCache;
+
+ _vtgWritesRtLayer = false;
+
+ ShaderAddresses addresses = new ShaderAddresses();
+ Span<ulong> addressesSpan = addresses.AsSpan();
+
+ ulong baseAddress = _state.State.ShaderBaseAddress.Pack();
+
+ for (int index = 0; index < 6; index++)
+ {
+ var shader = _state.State.ShaderState[index];
+ if (!shader.UnpackEnable() && index != 1)
+ {
+ continue;
+ }
+
+ addressesSpan[index] = baseAddress + shader.Offset;
+ }
+
+ CachedShaderProgram gs = shaderCache.GetGraphicsShader(ref _state.State, ref _pipeline, _channel, ref _currentSpecState.GetPoolState(), ref _currentSpecState.GetGraphicsState(), addresses);
+
+ // Consume the modified flag for spec state so that it isn't checked again.
+ _currentSpecState.SetShader(gs);
+
+ _shaderSpecState = gs.SpecializationState;
+
+ byte oldVsClipDistancesWritten = _vsClipDistancesWritten;
+
+ _drawState.VsUsesInstanceId = gs.Shaders[1]?.Info.UsesInstanceId ?? false;
+ _vsUsesDrawParameters = gs.Shaders[1]?.Info.UsesDrawParameters ?? false;
+ _vsClipDistancesWritten = gs.Shaders[1]?.Info.ClipDistancesWritten ?? 0;
+
+ if (oldVsClipDistancesWritten != _vsClipDistancesWritten)
+ {
+ UpdateUserClipState();
+ }
+
+ UpdateShaderBindings(gs.Bindings);
+
+ for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++)
+ {
+ ShaderProgramInfo info = gs.Shaders[stageIndex + 1]?.Info;
+
+ if (info?.UsesRtLayer == true)
+ {
+ _vtgWritesRtLayer = true;
+ }
+
+ _currentProgramInfo[stageIndex] = info;
+ }
+
+ _context.Renderer.Pipeline.SetProgram(gs.HostProgram);
+ }
+
+ /// <summary>
+ /// Updates bindings consumed by the shader on the texture and buffer managers.
+ /// </summary>
+ /// <param name="bindings">Bindings for the active shader</param>
+ private void UpdateShaderBindings(CachedShaderBindings bindings)
+ {
+ _channel.TextureManager.SetGraphicsBindings(bindings);
+ _channel.BufferManager.SetGraphicsBufferBindings(bindings);
+ }
+
+ /// <summary>
+ /// Gets the current texture pool state.
+ /// </summary>
+ /// <returns>Texture pool state</returns>
+ private GpuChannelPoolState GetPoolState()
+ {
+ return new GpuChannelPoolState(
+ _state.State.TexturePoolState.Address.Pack(),
+ _state.State.TexturePoolState.MaximumId,
+ (int)_state.State.TextureBufferIndex);
+ }
+
+ /// <summary>
+ /// Gets the depth mode that is currently being used (zero to one or minus one to one).
+ /// </summary>
+ /// <returns>Current depth mode</returns>
+ private DepthMode GetDepthMode()
+ {
+ ref var transform = ref _state.State.ViewportTransform[0];
+ ref var extents = ref _state.State.ViewportExtents[0];
+
+ DepthMode depthMode;
+
+ if (!float.IsInfinity(extents.DepthNear) &&
+ !float.IsInfinity(extents.DepthFar) &&
+ (extents.DepthFar - extents.DepthNear) != 0)
+ {
+ // Try to guess the depth mode being used on the high level API
+ // based on current transform.
+ // It is setup like so by said APIs:
+ // If depth mode is ZeroToOne:
+ // TranslateZ = Near
+ // ScaleZ = Far - Near
+ // If depth mode is MinusOneToOne:
+ // TranslateZ = (Near + Far) / 2
+ // ScaleZ = (Far - Near) / 2
+ // DepthNear/Far are sorted such as that Near is always less than Far.
+ depthMode = extents.DepthNear != transform.TranslateZ &&
+ extents.DepthFar != transform.TranslateZ
+ ? DepthMode.MinusOneToOne
+ : DepthMode.ZeroToOne;
+ }
+ else
+ {
+ // If we can't guess from the viewport transform, then just use the depth mode register.
+ depthMode = (DepthMode)(_state.State.DepthMode & 1);
+ }
+
+ return depthMode;
+ }
+
+ /// <summary>
+ /// Forces the shaders to be rebound on the next draw.
+ /// </summary>
+ public void ForceShaderUpdate()
+ {
+ _updateTracker.ForceDirty(ShaderStateIndex);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs
new file mode 100644
index 00000000..caeee18e
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs
@@ -0,0 +1,620 @@
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Engine.GPFifo;
+using Ryujinx.Graphics.Gpu.Engine.InlineToMemory;
+using Ryujinx.Graphics.Gpu.Engine.Threed.Blender;
+using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Threed
+{
+ /// <summary>
+ /// Represents a 3D engine class.
+ /// </summary>
+ class ThreedClass : IDeviceState
+ {
+ private readonly GpuContext _context;
+ private readonly GPFifoClass _fifoClass;
+ private readonly DeviceStateWithShadow<ThreedClassState> _state;
+
+ private readonly InlineToMemoryClass _i2mClass;
+ private readonly AdvancedBlendManager _blendManager;
+ private readonly DrawManager _drawManager;
+ private readonly SemaphoreUpdater _semaphoreUpdater;
+ private readonly ConstantBufferUpdater _cbUpdater;
+ private readonly StateUpdater _stateUpdater;
+
+ /// <summary>
+ /// Creates a new instance of the 3D engine class.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="channel">GPU channel</param>
+ public ThreedClass(GpuContext context, GpuChannel channel, GPFifoClass fifoClass)
+ {
+ _context = context;
+ _fifoClass = fifoClass;
+ _state = new DeviceStateWithShadow<ThreedClassState>(new Dictionary<string, RwCallback>
+ {
+ { nameof(ThreedClassState.LaunchDma), new RwCallback(LaunchDma, null) },
+ { nameof(ThreedClassState.LoadInlineData), new RwCallback(LoadInlineData, null) },
+ { nameof(ThreedClassState.SyncpointAction), new RwCallback(IncrementSyncpoint, null) },
+ { nameof(ThreedClassState.InvalidateSamplerCacheNoWfi), new RwCallback(InvalidateSamplerCacheNoWfi, null) },
+ { nameof(ThreedClassState.InvalidateTextureHeaderCacheNoWfi), new RwCallback(InvalidateTextureHeaderCacheNoWfi, null) },
+ { nameof(ThreedClassState.TextureBarrier), new RwCallback(TextureBarrier, null) },
+ { nameof(ThreedClassState.LoadBlendUcodeStart), new RwCallback(LoadBlendUcodeStart, null) },
+ { nameof(ThreedClassState.LoadBlendUcodeInstruction), new RwCallback(LoadBlendUcodeInstruction, null) },
+ { nameof(ThreedClassState.TextureBarrierTiled), new RwCallback(TextureBarrierTiled, null) },
+ { nameof(ThreedClassState.DrawTextureSrcY), new RwCallback(DrawTexture, null) },
+ { nameof(ThreedClassState.DrawVertexArrayBeginEndInstanceFirst), new RwCallback(DrawVertexArrayBeginEndInstanceFirst, null) },
+ { nameof(ThreedClassState.DrawVertexArrayBeginEndInstanceSubsequent), new RwCallback(DrawVertexArrayBeginEndInstanceSubsequent, null) },
+ { nameof(ThreedClassState.VbElementU8), new RwCallback(VbElementU8, null) },
+ { nameof(ThreedClassState.VbElementU16), new RwCallback(VbElementU16, null) },
+ { nameof(ThreedClassState.VbElementU32), new RwCallback(VbElementU32, null) },
+ { nameof(ThreedClassState.ResetCounter), new RwCallback(ResetCounter, null) },
+ { nameof(ThreedClassState.RenderEnableCondition), new RwCallback(null, Zero) },
+ { nameof(ThreedClassState.DrawEnd), new RwCallback(DrawEnd, null) },
+ { nameof(ThreedClassState.DrawBegin), new RwCallback(DrawBegin, null) },
+ { nameof(ThreedClassState.DrawIndexBuffer32BeginEndInstanceFirst), new RwCallback(DrawIndexBuffer32BeginEndInstanceFirst, null) },
+ { nameof(ThreedClassState.DrawIndexBuffer16BeginEndInstanceFirst), new RwCallback(DrawIndexBuffer16BeginEndInstanceFirst, null) },
+ { nameof(ThreedClassState.DrawIndexBuffer8BeginEndInstanceFirst), new RwCallback(DrawIndexBuffer8BeginEndInstanceFirst, null) },
+ { nameof(ThreedClassState.DrawIndexBuffer32BeginEndInstanceSubsequent), new RwCallback(DrawIndexBuffer32BeginEndInstanceSubsequent, null) },
+ { nameof(ThreedClassState.DrawIndexBuffer16BeginEndInstanceSubsequent), new RwCallback(DrawIndexBuffer16BeginEndInstanceSubsequent, null) },
+ { nameof(ThreedClassState.DrawIndexBuffer8BeginEndInstanceSubsequent), new RwCallback(DrawIndexBuffer8BeginEndInstanceSubsequent, null) },
+ { nameof(ThreedClassState.IndexBufferCount), new RwCallback(SetIndexBufferCount, null) },
+ { nameof(ThreedClassState.Clear), new RwCallback(Clear, null) },
+ { nameof(ThreedClassState.SemaphoreControl), new RwCallback(Report, null) },
+ { nameof(ThreedClassState.SetFalcon04), new RwCallback(SetFalcon04, null) },
+ { nameof(ThreedClassState.UniformBufferUpdateData), new RwCallback(ConstantBufferUpdate, null) },
+ { nameof(ThreedClassState.UniformBufferBindVertex), new RwCallback(ConstantBufferBindVertex, null) },
+ { nameof(ThreedClassState.UniformBufferBindTessControl), new RwCallback(ConstantBufferBindTessControl, null) },
+ { nameof(ThreedClassState.UniformBufferBindTessEvaluation), new RwCallback(ConstantBufferBindTessEvaluation, null) },
+ { nameof(ThreedClassState.UniformBufferBindGeometry), new RwCallback(ConstantBufferBindGeometry, null) },
+ { nameof(ThreedClassState.UniformBufferBindFragment), new RwCallback(ConstantBufferBindFragment, null) }
+ });
+
+ _i2mClass = new InlineToMemoryClass(context, channel, initializeState: false);
+
+ var spec = new SpecializationStateUpdater(context);
+ var drawState = new DrawState();
+
+ _drawManager = new DrawManager(context, channel, _state, drawState, spec);
+ _blendManager = new AdvancedBlendManager(_state);
+ _semaphoreUpdater = new SemaphoreUpdater(context, channel, _state);
+ _cbUpdater = new ConstantBufferUpdater(channel, _state);
+ _stateUpdater = new StateUpdater(context, channel, _state, drawState, _blendManager, spec);
+
+ // This defaults to "always", even without any register write.
+ // Reads just return 0, regardless of what was set there.
+ _state.State.RenderEnableCondition = Condition.Always;
+ }
+
+ /// <summary>
+ /// Reads data from the class registers.
+ /// </summary>
+ /// <param name="offset">Register byte offset</param>
+ /// <returns>Data at the specified offset</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public int Read(int offset) => _state.Read(offset);
+
+ /// <summary>
+ /// Writes data to the class registers.
+ /// </summary>
+ /// <param name="offset">Register byte offset</param>
+ /// <param name="data">Data to be written</param>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void Write(int offset, int data)
+ {
+ _state.WriteWithRedundancyCheck(offset, data, out bool valueChanged);
+
+ if (valueChanged)
+ {
+ _stateUpdater.SetDirty(offset);
+ }
+ }
+
+ /// <summary>
+ /// Sets the shadow ram control value of all sub-channels.
+ /// </summary>
+ /// <param name="control">New shadow ram control value</param>
+ public void SetShadowRamControl(int control)
+ {
+ _state.State.SetMmeShadowRamControl = (uint)control;
+ }
+
+ /// <summary>
+ /// Updates current host state for all registers modified since the last call to this method.
+ /// </summary>
+ public void UpdateState()
+ {
+ _fifoClass.CreatePendingSyncs();
+ _cbUpdater.FlushUboDirty();
+ _stateUpdater.Update();
+ }
+
+ /// <summary>
+ /// Updates current host state for all registers modified since the last call to this method.
+ /// </summary>
+ /// <param name="mask">Mask where each bit set indicates that the respective state group index should be checked</param>
+ public void UpdateState(ulong mask)
+ {
+ _stateUpdater.Update(mask);
+ }
+
+ /// <summary>
+ /// Updates render targets (color and depth-stencil buffers) based on current render target state.
+ /// </summary>
+ /// <param name="updateFlags">Flags indicating which render targets should be updated and how</param>
+ /// <param name="singleUse">If this is not -1, it indicates that only the given indexed target will be used.</param>
+ public void UpdateRenderTargetState(RenderTargetUpdateFlags updateFlags, int singleUse = -1)
+ {
+ _stateUpdater.UpdateRenderTargetState(updateFlags, singleUse);
+ }
+
+ /// <summary>
+ /// Updates scissor based on current render target state.
+ /// </summary>
+ public void UpdateScissorState()
+ {
+ _stateUpdater.UpdateScissorState();
+ }
+
+ /// <summary>
+ /// Marks the entire state as dirty, forcing a full host state update before the next draw.
+ /// </summary>
+ public void ForceStateDirty()
+ {
+ _drawManager.ForceStateDirty();
+ _stateUpdater.SetAllDirty();
+ }
+
+ /// <summary>
+ /// Marks the specified register offset as dirty, forcing the associated state to update on the next draw.
+ /// </summary>
+ /// <param name="offset">Register offset</param>
+ public void ForceStateDirty(int offset)
+ {
+ _stateUpdater.SetDirty(offset);
+ }
+
+ /// <summary>
+ /// Forces the shaders to be rebound on the next draw.
+ /// </summary>
+ public void ForceShaderUpdate()
+ {
+ _stateUpdater.ForceShaderUpdate();
+ }
+
+ /// <summary>
+ /// Create any syncs from WaitForIdle command that are currently pending.
+ /// </summary>
+ public void CreatePendingSyncs()
+ {
+ _fifoClass.CreatePendingSyncs();
+ }
+
+ /// <summary>
+ /// Flushes any queued UBO updates.
+ /// </summary>
+ public void FlushUboDirty()
+ {
+ _cbUpdater.FlushUboDirty();
+ }
+
+ /// <summary>
+ /// Perform any deferred draws.
+ /// </summary>
+ public void PerformDeferredDraws()
+ {
+ _drawManager.PerformDeferredDraws();
+ }
+
+ /// <summary>
+ /// Updates the currently bound constant buffer.
+ /// </summary>
+ /// <param name="data">Data to be written to the buffer</param>
+ public void ConstantBufferUpdate(ReadOnlySpan<int> data)
+ {
+ _cbUpdater.Update(data);
+ }
+
+ /// <summary>
+ /// Launches the Inline-to-Memory DMA copy operation.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void LaunchDma(int argument)
+ {
+ _i2mClass.LaunchDma(ref Unsafe.As<ThreedClassState, InlineToMemoryClassState>(ref _state.State), argument);
+ }
+
+ /// <summary>
+ /// Pushes a block of data to the Inline-to-Memory engine.
+ /// </summary>
+ /// <param name="data">Data to push</param>
+ public void LoadInlineData(ReadOnlySpan<int> data)
+ {
+ _i2mClass.LoadInlineData(data);
+ }
+
+ /// <summary>
+ /// Pushes a word of data to the Inline-to-Memory engine.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void LoadInlineData(int argument)
+ {
+ _i2mClass.LoadInlineData(argument);
+ }
+
+ /// <summary>
+ /// Performs an incrementation on a syncpoint.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void IncrementSyncpoint(int argument)
+ {
+ uint syncpointId = (uint)argument & 0xFFFF;
+
+ _context.AdvanceSequence();
+ _context.CreateHostSyncIfNeeded(true, true);
+ _context.Renderer.UpdateCounters(); // Poll the query counters, the game may want an updated result.
+ _context.Synchronization.IncrementSyncpoint(syncpointId);
+ }
+
+ /// <summary>
+ /// Invalidates the cache with the sampler descriptors from the sampler pool.
+ /// </summary>
+ /// <param name="argument">Method call argument (unused)</param>
+ private void InvalidateSamplerCacheNoWfi(int argument)
+ {
+ _context.AdvanceSequence();
+ }
+
+ /// <summary>
+ /// Invalidates the cache with the texture descriptors from the texture pool.
+ /// </summary>
+ /// <param name="argument">Method call argument (unused)</param>
+ private void InvalidateTextureHeaderCacheNoWfi(int argument)
+ {
+ _context.AdvanceSequence();
+ }
+
+ /// <summary>
+ /// Issues a texture barrier.
+ /// This waits until previous texture writes from the GPU to finish, before
+ /// performing new operations with said textures.
+ /// </summary>
+ /// <param name="argument">Method call argument (unused)</param>
+ private void TextureBarrier(int argument)
+ {
+ _context.Renderer.Pipeline.TextureBarrier();
+ }
+
+ /// <summary>
+ /// Sets the start offset of the blend microcode in memory.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void LoadBlendUcodeStart(int argument)
+ {
+ _blendManager.LoadBlendUcodeStart(argument);
+ }
+
+ /// <summary>
+ /// Pushes one word of blend microcode.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void LoadBlendUcodeInstruction(int argument)
+ {
+ _blendManager.LoadBlendUcodeInstruction(argument);
+ }
+
+ /// <summary>
+ /// Issues a texture barrier.
+ /// This waits until previous texture writes from the GPU to finish, before
+ /// performing new operations with said textures.
+ /// This performs a per-tile wait, it is only valid if both the previous write
+ /// and current access has the same access patterns.
+ /// This may be faster than the regular barrier on tile-based rasterizers.
+ /// </summary>
+ /// <param name="argument">Method call argument (unused)</param>
+ private void TextureBarrierTiled(int argument)
+ {
+ _context.Renderer.Pipeline.TextureBarrierTiled();
+ }
+
+ /// <summary>
+ /// Draws a texture, without needing to specify shader programs.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void DrawTexture(int argument)
+ {
+ _drawManager.DrawTexture(this, argument);
+ }
+
+ /// <summary>
+ /// Performs a non-indexed draw with the specified topology, index and count.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void DrawVertexArrayBeginEndInstanceFirst(int argument)
+ {
+ _drawManager.DrawVertexArrayBeginEndInstanceFirst(this, argument);
+ }
+
+ /// <summary>
+ /// Performs a non-indexed draw with the specified topology, index and count,
+ /// while incrementing the current instance.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void DrawVertexArrayBeginEndInstanceSubsequent(int argument)
+ {
+ _drawManager.DrawVertexArrayBeginEndInstanceSubsequent(this, argument);
+ }
+
+ /// <summary>
+ /// Pushes four 8-bit index buffer elements.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void VbElementU8(int argument)
+ {
+ _drawManager.VbElementU8(argument);
+ }
+
+ /// <summary>
+ /// Pushes two 16-bit index buffer elements.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void VbElementU16(int argument)
+ {
+ _drawManager.VbElementU16(argument);
+ }
+
+ /// <summary>
+ /// Pushes one 32-bit index buffer element.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void VbElementU32(int argument)
+ {
+ _drawManager.VbElementU32(argument);
+ }
+
+ /// <summary>
+ /// Resets the value of an internal GPU counter back to zero.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void ResetCounter(int argument)
+ {
+ _semaphoreUpdater.ResetCounter(argument);
+ }
+
+ /// <summary>
+ /// Finishes the draw call.
+ /// This draws geometry on the bound buffers based on the current GPU state.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void DrawEnd(int argument)
+ {
+ _drawManager.DrawEnd(this, argument);
+ }
+
+ /// <summary>
+ /// Starts draw.
+ /// This sets primitive type and instanced draw parameters.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void DrawBegin(int argument)
+ {
+ _drawManager.DrawBegin(argument);
+ }
+
+ /// <summary>
+ /// Sets the index buffer count.
+ /// This also sets internal state that indicates that the next draw is an indexed draw.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void SetIndexBufferCount(int argument)
+ {
+ _drawManager.SetIndexBufferCount(argument);
+ }
+
+ /// <summary>
+ /// Performs a indexed draw with 8-bit index buffer elements.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void DrawIndexBuffer8BeginEndInstanceFirst(int argument)
+ {
+ _drawManager.DrawIndexBuffer8BeginEndInstanceFirst(this, argument);
+ }
+
+ /// <summary>
+ /// Performs a indexed draw with 16-bit index buffer elements.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void DrawIndexBuffer16BeginEndInstanceFirst(int argument)
+ {
+ _drawManager.DrawIndexBuffer16BeginEndInstanceFirst(this, argument);
+ }
+
+ /// <summary>
+ /// Performs a indexed draw with 32-bit index buffer elements.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void DrawIndexBuffer32BeginEndInstanceFirst(int argument)
+ {
+ _drawManager.DrawIndexBuffer32BeginEndInstanceFirst(this, argument);
+ }
+
+ /// <summary>
+ /// Performs a indexed draw with 8-bit index buffer elements,
+ /// while also pre-incrementing the current instance value.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void DrawIndexBuffer8BeginEndInstanceSubsequent(int argument)
+ {
+ _drawManager.DrawIndexBuffer8BeginEndInstanceSubsequent(this, argument);
+ }
+
+ /// <summary>
+ /// Performs a indexed draw with 16-bit index buffer elements,
+ /// while also pre-incrementing the current instance value.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void DrawIndexBuffer16BeginEndInstanceSubsequent(int argument)
+ {
+ _drawManager.DrawIndexBuffer16BeginEndInstanceSubsequent(this, argument);
+ }
+
+ /// <summary>
+ /// Performs a indexed draw with 32-bit index buffer elements,
+ /// while also pre-incrementing the current instance value.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void DrawIndexBuffer32BeginEndInstanceSubsequent(int argument)
+ {
+ _drawManager.DrawIndexBuffer32BeginEndInstanceSubsequent(this, argument);
+ }
+
+ /// <summary>
+ /// Clears the current color and depth-stencil buffers.
+ /// Which buffers should be cleared is also specified on the argument.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void Clear(int argument)
+ {
+ _drawManager.Clear(this, argument);
+ }
+
+ /// <summary>
+ /// Writes a GPU counter to guest memory.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void Report(int argument)
+ {
+ _semaphoreUpdater.Report(argument);
+ }
+
+ /// <summary>
+ /// Performs high-level emulation of Falcon microcode function number "4".
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void SetFalcon04(int argument)
+ {
+ _state.State.SetMmeShadowScratch[0] = 1;
+ }
+
+ /// <summary>
+ /// Updates the uniform buffer data with inline data.
+ /// </summary>
+ /// <param name="argument">New uniform buffer data word</param>
+ private void ConstantBufferUpdate(int argument)
+ {
+ _cbUpdater.Update(argument);
+ }
+
+ /// <summary>
+ /// Binds a uniform buffer for the vertex shader stage.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void ConstantBufferBindVertex(int argument)
+ {
+ _cbUpdater.BindVertex(argument);
+ }
+
+ /// <summary>
+ /// Binds a uniform buffer for the tessellation control shader stage.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void ConstantBufferBindTessControl(int argument)
+ {
+ _cbUpdater.BindTessControl(argument);
+ }
+
+ /// <summary>
+ /// Binds a uniform buffer for the tessellation evaluation shader stage.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void ConstantBufferBindTessEvaluation(int argument)
+ {
+ _cbUpdater.BindTessEvaluation(argument);
+ }
+
+ /// <summary>
+ /// Binds a uniform buffer for the geometry shader stage.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void ConstantBufferBindGeometry(int argument)
+ {
+ _cbUpdater.BindGeometry(argument);
+ }
+
+ /// <summary>
+ /// Binds a uniform buffer for the fragment shader stage.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void ConstantBufferBindFragment(int argument)
+ {
+ _cbUpdater.BindFragment(argument);
+ }
+
+ /// <summary>
+ /// Generic register read function that just returns 0.
+ /// </summary>
+ /// <returns>Zero</returns>
+ private static int Zero()
+ {
+ return 0;
+ }
+
+ /// <summary>
+ /// Performs a indexed or non-indexed draw.
+ /// </summary>
+ /// <param name="topology">Primitive topology</param>
+ /// <param name="count">Index count for indexed draws, vertex count for non-indexed draws</param>
+ /// <param name="instanceCount">Instance count</param>
+ /// <param name="firstIndex">First index on the index buffer for indexed draws, ignored for non-indexed draws</param>
+ /// <param name="firstVertex">First vertex on the vertex buffer</param>
+ /// <param name="firstInstance">First instance</param>
+ /// <param name="indexed">True if the draw is indexed, false otherwise</param>
+ public void Draw(
+ PrimitiveTopology topology,
+ int count,
+ int instanceCount,
+ int firstIndex,
+ int firstVertex,
+ int firstInstance,
+ bool indexed)
+ {
+ _drawManager.Draw(this, topology, count, instanceCount, firstIndex, firstVertex, firstInstance, indexed);
+ }
+
+ /// <summary>
+ /// Performs a indirect draw, with parameters from a GPU buffer.
+ /// </summary>
+ /// <param name="topology">Primitive topology</param>
+ /// <param name="indirectBufferAddress">Address of the buffer with the draw parameters, such as count, first index, etc</param>
+ /// <param name="parameterBufferAddress">Address of the buffer with the draw count</param>
+ /// <param name="maxDrawCount">Maximum number of draws that can be made</param>
+ /// <param name="stride">Distance in bytes between each entry on the data pointed to by <paramref name="indirectBufferAddress"/></param>
+ /// <param name="indexCount">Maximum number of indices that the draw can consume</param>
+ /// <param name="drawType">Type of the indirect draw, which can be indexed or non-indexed, with or without a draw count</param>
+ public void DrawIndirect(
+ PrimitiveTopology topology,
+ ulong indirectBufferAddress,
+ ulong parameterBufferAddress,
+ int maxDrawCount,
+ int stride,
+ int indexCount,
+ IndirectDrawType drawType)
+ {
+ _drawManager.DrawIndirect(this, topology, indirectBufferAddress, parameterBufferAddress, maxDrawCount, stride, indexCount, drawType);
+ }
+
+ /// <summary>
+ /// Clears the current color and depth-stencil buffers.
+ /// Which buffers should be cleared can also specified with the arguments.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ /// <param name="layerCount">For array and 3D textures, indicates how many layers should be cleared</param>
+ public void Clear(int argument, int layerCount)
+ {
+ _drawManager.Clear(this, argument, layerCount);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClassState.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClassState.cs
new file mode 100644
index 00000000..8f26f38f
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClassState.cs
@@ -0,0 +1,1048 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Engine.InlineToMemory;
+using Ryujinx.Graphics.Gpu.Engine.Types;
+using Ryujinx.Graphics.Gpu.Image;
+using Ryujinx.Graphics.Shader;
+using System;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Threed
+{
+ /// <summary>
+ /// Shader stage name.
+ /// </summary>
+ enum ShaderType
+ {
+ Vertex,
+ TessellationControl,
+ TessellationEvaluation,
+ Geometry,
+ Fragment
+ }
+
+ /// <summary>
+ /// Tessellation mode.
+ /// </summary>
+ struct TessMode
+ {
+#pragma warning disable CS0649
+ public uint Packed;
+#pragma warning restore CS0649
+
+ /// <summary>
+ /// Unpacks the tessellation abstract patch type.
+ /// </summary>
+ /// <returns>Abtract patch type</returns>
+ public TessPatchType UnpackPatchType()
+ {
+ return (TessPatchType)(Packed & 3);
+ }
+
+ /// <summary>
+ /// Unpacks the spacing between tessellated vertices of the patch.
+ /// </summary>
+ /// <returns>Spacing between tessellated vertices</returns>
+ public TessSpacing UnpackSpacing()
+ {
+ return (TessSpacing)((Packed >> 4) & 3);
+ }
+
+ /// <summary>
+ /// Unpacks the primitive winding order.
+ /// </summary>
+ /// <returns>True if clockwise, false if counter-clockwise</returns>
+ public bool UnpackCw()
+ {
+ return (Packed & (1 << 8)) != 0;
+ }
+ }
+
+ /// <summary>
+ /// Transform feedback buffer state.
+ /// </summary>
+ struct TfBufferState
+ {
+#pragma warning disable CS0649
+ public Boolean32 Enable;
+ public GpuVa Address;
+ public int Size;
+ public int Offset;
+ public uint Padding0;
+ public uint Padding1;
+ public uint Padding2;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// Transform feedback state.
+ /// </summary>
+ struct TfState
+ {
+#pragma warning disable CS0649
+ public int BufferIndex;
+ public int VaryingsCount;
+ public int Stride;
+ public uint Padding;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// Render target color buffer state.
+ /// </summary>
+ struct RtColorState
+ {
+#pragma warning disable CS0649
+ public GpuVa Address;
+ public int WidthOrStride;
+ public int Height;
+ public ColorFormat Format;
+ public MemoryLayout MemoryLayout;
+ public int Depth;
+ public int LayerSize;
+ public int BaseLayer;
+ public int Unknown0x24;
+ public int Padding0;
+ public int Padding1;
+ public int Padding2;
+ public int Padding3;
+ public int Padding4;
+ public int Padding5;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// Viewport transform parameters, for viewport transformation.
+ /// </summary>
+ struct ViewportTransform
+ {
+#pragma warning disable CS0649
+ public float ScaleX;
+ public float ScaleY;
+ public float ScaleZ;
+ public float TranslateX;
+ public float TranslateY;
+ public float TranslateZ;
+ public uint Swizzle;
+ public uint SubpixelPrecisionBias;
+#pragma warning restore CS0649
+
+ /// <summary>
+ /// Unpacks viewport swizzle of the position X component.
+ /// </summary>
+ /// <returns>Swizzle enum value</returns>
+ public ViewportSwizzle UnpackSwizzleX()
+ {
+ return (ViewportSwizzle)(Swizzle & 7);
+ }
+
+ /// <summary>
+ /// Unpacks viewport swizzle of the position Y component.
+ /// </summary>
+ /// <returns>Swizzle enum value</returns>
+ public ViewportSwizzle UnpackSwizzleY()
+ {
+ return (ViewportSwizzle)((Swizzle >> 4) & 7);
+ }
+
+ /// <summary>
+ /// Unpacks viewport swizzle of the position Z component.
+ /// </summary>
+ /// <returns>Swizzle enum value</returns>
+ public ViewportSwizzle UnpackSwizzleZ()
+ {
+ return (ViewportSwizzle)((Swizzle >> 8) & 7);
+ }
+
+ /// <summary>
+ /// Unpacks viewport swizzle of the position W component.
+ /// </summary>
+ /// <returns>Swizzle enum value</returns>
+ public ViewportSwizzle UnpackSwizzleW()
+ {
+ return (ViewportSwizzle)((Swizzle >> 12) & 7);
+ }
+ }
+
+ /// <summary>
+ /// Viewport extents for viewport clipping, also includes depth range.
+ /// </summary>
+ struct ViewportExtents
+ {
+#pragma warning disable CS0649
+ public ushort X;
+ public ushort Width;
+ public ushort Y;
+ public ushort Height;
+ public float DepthNear;
+ public float DepthFar;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// Draw state for non-indexed draws.
+ /// </summary>
+ struct VertexBufferDrawState
+ {
+#pragma warning disable CS0649
+ public int First;
+ public int Count;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// Color buffer clear color.
+ /// </summary>
+ struct ClearColors
+ {
+#pragma warning disable CS0649
+ public float Red;
+ public float Green;
+ public float Blue;
+ public float Alpha;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// Depth bias (also called polygon offset) parameters.
+ /// </summary>
+ struct DepthBiasState
+ {
+#pragma warning disable CS0649
+ public Boolean32 PointEnable;
+ public Boolean32 LineEnable;
+ public Boolean32 FillEnable;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// Indicates whenever the blend microcode processes RGB and alpha components.
+ /// </summary>
+ enum BlendUcodeEnable
+ {
+ Disabled = 0,
+ EnableRGB = 1,
+ EnableAlpha = 2,
+ EnableRGBA = 3
+ }
+
+ /// <summary>
+ /// Scissor state.
+ /// </summary>
+ struct ScissorState
+ {
+#pragma warning disable CS0649
+ public Boolean32 Enable;
+ public ushort X1;
+ public ushort X2;
+ public ushort Y1;
+ public ushort Y2;
+ public uint Padding;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// Stencil test masks for back tests.
+ /// </summary>
+ struct StencilBackMasks
+ {
+#pragma warning disable CS0649
+ public int FuncRef;
+ public int Mask;
+ public int FuncMask;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// Render target depth-stencil buffer state.
+ /// </summary>
+ struct RtDepthStencilState
+ {
+#pragma warning disable CS0649
+ public GpuVa Address;
+ public ZetaFormat Format;
+ public MemoryLayout MemoryLayout;
+ public int LayerSize;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// Screen scissor state.
+ /// </summary>
+ struct ScreenScissorState
+ {
+#pragma warning disable CS0649
+ public ushort X;
+ public ushort Width;
+ public ushort Y;
+ public ushort Height;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// Vertex attribute vector and component size.
+ /// </summary>
+ enum VertexAttribSize
+ {
+ Size32x4 = 1,
+ Size32x3 = 2,
+ Size16x4 = 3,
+ Size32x2 = 4,
+ Size16x3 = 5,
+ Size8x4 = 0xa,
+ Size16x2 = 0xf,
+ Size32 = 0x12,
+ Size8x3 = 0x13,
+ Size8x2 = 0x18,
+ Size16 = 0x1b,
+ Size8 = 0x1d,
+ Rgb10A2 = 0x30,
+ Rg11B10 = 0x31
+ }
+
+ /// <summary>
+ /// Vertex attribute component type.
+ /// </summary>
+ enum VertexAttribType
+ {
+ Snorm = 1,
+ Unorm = 2,
+ Sint = 3,
+ Uint = 4,
+ Uscaled = 5,
+ Sscaled = 6,
+ Float = 7
+ }
+
+ /// <summary>
+ /// Vertex buffer attribute state.
+ /// </summary>
+ struct VertexAttribState
+ {
+#pragma warning disable CS0649
+ public uint Attribute;
+#pragma warning restore CS0649
+
+ /// <summary>
+ /// Unpacks the index of the vertex buffer this attribute belongs to.
+ /// </summary>
+ /// <returns>Vertex buffer index</returns>
+ public int UnpackBufferIndex()
+ {
+ return (int)(Attribute & 0x1f);
+ }
+
+ /// <summary>
+ /// Unpacks the attribute constant flag.
+ /// </summary>
+ /// <returns>True if the attribute is constant, false otherwise</returns>
+ public bool UnpackIsConstant()
+ {
+ return (Attribute & 0x40) != 0;
+ }
+
+ /// <summary>
+ /// Unpacks the offset, in bytes, of the attribute on the vertex buffer.
+ /// </summary>
+ /// <returns>Attribute offset in bytes</returns>
+ public int UnpackOffset()
+ {
+ return (int)((Attribute >> 7) & 0x3fff);
+ }
+
+ /// <summary>
+ /// Unpacks the Maxwell attribute format integer.
+ /// </summary>
+ /// <returns>Attribute format integer</returns>
+ public uint UnpackFormat()
+ {
+ return Attribute & 0x3fe00000;
+ }
+
+ /// <summary>
+ /// Unpacks the Maxwell attribute size.
+ /// </summary>
+ /// <returns>Attribute size</returns>
+ public VertexAttribSize UnpackSize()
+ {
+ return (VertexAttribSize)((Attribute >> 21) & 0x3f);
+ }
+
+ /// <summary>
+ /// Unpacks the Maxwell attribute component type.
+ /// </summary>
+ /// <returns>Attribute component type</returns>
+ public VertexAttribType UnpackType()
+ {
+ return (VertexAttribType)((Attribute >> 27) & 7);
+ }
+ }
+
+ /// <summary>
+ /// Render target draw buffers control.
+ /// </summary>
+ struct RtControl
+ {
+#pragma warning disable CS0649
+ public uint Packed;
+#pragma warning restore CS0649
+
+ /// <summary>
+ /// Unpacks the number of active draw buffers.
+ /// </summary>
+ /// <returns>Number of active draw buffers</returns>
+ public int UnpackCount()
+ {
+ return (int)(Packed & 0xf);
+ }
+
+ /// <summary>
+ /// Unpacks the color attachment index for a given draw buffer.
+ /// </summary>
+ /// <param name="index">Index of the draw buffer</param>
+ /// <returns>Attachment index</returns>
+ public int UnpackPermutationIndex(int index)
+ {
+ return (int)((Packed >> (4 + index * 3)) & 7);
+ }
+ }
+
+ /// <summary>
+ /// 3D, 2D or 1D texture size.
+ /// </summary>
+ struct Size3D
+ {
+#pragma warning disable CS0649
+ public int Width;
+ public int Height;
+ public int Depth;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// Stencil front test state and masks.
+ /// </summary>
+ struct StencilTestState
+ {
+#pragma warning disable CS0649
+ public Boolean32 Enable;
+ public StencilOp FrontSFail;
+ public StencilOp FrontDpFail;
+ public StencilOp FrontDpPass;
+ public CompareOp FrontFunc;
+ public int FrontFuncRef;
+ public int FrontFuncMask;
+ public int FrontMask;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// Screen Y control register.
+ /// </summary>
+ [Flags]
+ enum YControl
+ {
+ NegateY = 1 << 0,
+ TriangleRastFlip = 1 << 4
+ }
+
+ /// <summary>
+ /// RGB color components packed as 16-bit float values.
+ /// </summary>
+ struct RgbHalf
+ {
+#pragma warning disable CS0649
+ public uint R;
+ public uint G;
+ public uint B;
+ public uint Padding;
+#pragma warning restore CS0649
+
+ /// <summary>
+ /// Unpacks the red color component as a 16-bit float value.
+ /// </summary>
+ /// <returns>The component value</returns>
+ public Half UnpackR()
+ {
+ ushort value = (ushort)R;
+ return Unsafe.As<ushort, Half>(ref value);
+ }
+
+ /// <summary>
+ /// Unpacks the green color component as a 16-bit float value.
+ /// </summary>
+ /// <returns>The component value</returns>
+ public Half UnpackG()
+ {
+ ushort value = (ushort)G;
+ return Unsafe.As<ushort, Half>(ref value);
+ }
+
+ /// <summary>
+ /// Unpacks the blue color component as a 16-bit float value.
+ /// </summary>
+ /// <returns>The component value</returns>
+ public Half UnpackB()
+ {
+ ushort value = (ushort)B;
+ return Unsafe.As<ushort, Half>(ref value);
+ }
+ }
+
+ /// <summary>
+ /// Condition for conditional rendering.
+ /// </summary>
+ enum Condition
+ {
+ Never,
+ Always,
+ ResultNonZero,
+ Equal,
+ NotEqual
+ }
+
+ /// <summary>
+ /// Texture or sampler pool state.
+ /// </summary>
+ struct PoolState
+ {
+#pragma warning disable CS0649
+ public GpuVa Address;
+ public int MaximumId;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// Stencil back test state.
+ /// </summary>
+ struct StencilBackTestState
+ {
+#pragma warning disable CS0649
+ public Boolean32 TwoSided;
+ public StencilOp BackSFail;
+ public StencilOp BackDpFail;
+ public StencilOp BackDpPass;
+ public CompareOp BackFunc;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// Primitive restart state.
+ /// </summary>
+ struct PrimitiveRestartState
+ {
+#pragma warning disable CS0649
+ public Boolean32 Enable;
+ public int Index;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// GPU index buffer state.
+ /// This is used on indexed draws.
+ /// </summary>
+ struct IndexBufferState
+ {
+#pragma warning disable CS0649
+ public GpuVa Address;
+ public GpuVa EndAddress;
+ public IndexType Type;
+ public int First;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// Face culling and orientation parameters.
+ /// </summary>
+ struct FaceState
+ {
+#pragma warning disable CS0649
+ public Boolean32 CullEnable;
+ public FrontFace FrontFace;
+ public Face CullFace;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// View volume clip control.
+ /// </summary>
+ [Flags]
+ enum ViewVolumeClipControl
+ {
+ ForceDepthRangeZeroToOne = 1 << 0,
+ DepthClampDisabled = 1 << 11
+ }
+
+ /// <summary>
+ /// Logical operation state.
+ /// </summary>
+ struct LogicalOpState
+ {
+#pragma warning disable CS0649
+ public Boolean32 Enable;
+ public LogicalOp LogicalOp;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// Render target color buffer mask.
+ /// This defines which color channels are written to the color buffer.
+ /// </summary>
+ struct RtColorMask
+ {
+#pragma warning disable CS0649
+ public uint Packed;
+#pragma warning restore CS0649
+
+ /// <summary>
+ /// Unpacks red channel enable.
+ /// </summary>
+ /// <returns>True to write the new red channel color, false to keep the old value</returns>
+ public bool UnpackRed()
+ {
+ return (Packed & 0x1) != 0;
+ }
+
+ /// <summary>
+ /// Unpacks green channel enable.
+ /// </summary>
+ /// <returns>True to write the new green channel color, false to keep the old value</returns>
+ public bool UnpackGreen()
+ {
+ return (Packed & 0x10) != 0;
+ }
+
+ /// <summary>
+ /// Unpacks blue channel enable.
+ /// </summary>
+ /// <returns>True to write the new blue channel color, false to keep the old value</returns>
+ public bool UnpackBlue()
+ {
+ return (Packed & 0x100) != 0;
+ }
+
+ /// <summary>
+ /// Unpacks alpha channel enable.
+ /// </summary>
+ /// <returns>True to write the new alpha channel color, false to keep the old value</returns>
+ public bool UnpackAlpha()
+ {
+ return (Packed & 0x1000) != 0;
+ }
+ }
+
+ /// <summary>
+ /// Vertex buffer state.
+ /// </summary>
+ struct VertexBufferState
+ {
+#pragma warning disable CS0649
+ public uint Control;
+ public GpuVa Address;
+ public int Divisor;
+#pragma warning restore CS0649
+
+ /// <summary>
+ /// Vertex buffer stride, defined as the number of bytes occupied by each vertex in memory.
+ /// </summary>
+ /// <returns>Vertex buffer stride</returns>
+ public int UnpackStride()
+ {
+ return (int)(Control & 0xfff);
+ }
+
+ /// <summary>
+ /// Vertex buffer enable.
+ /// </summary>
+ /// <returns>True if the vertex buffer is enabled, false otherwise</returns>
+ public bool UnpackEnable()
+ {
+ return (Control & (1 << 12)) != 0;
+ }
+ }
+
+ /// <summary>
+ /// Color buffer blending parameters, shared by all color buffers.
+ /// </summary>
+ struct BlendStateCommon
+ {
+#pragma warning disable CS0649
+ public Boolean32 SeparateAlpha;
+ public BlendOp ColorOp;
+ public BlendFactor ColorSrcFactor;
+ public BlendFactor ColorDstFactor;
+ public BlendOp AlphaOp;
+ public BlendFactor AlphaSrcFactor;
+ public uint Unknown0x1354;
+ public BlendFactor AlphaDstFactor;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// Color buffer blending parameters.
+ /// </summary>
+ struct BlendState
+ {
+#pragma warning disable CS0649
+ public Boolean32 SeparateAlpha;
+ public BlendOp ColorOp;
+ public BlendFactor ColorSrcFactor;
+ public BlendFactor ColorDstFactor;
+ public BlendOp AlphaOp;
+ public BlendFactor AlphaSrcFactor;
+ public BlendFactor AlphaDstFactor;
+ public uint Padding;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// Graphics shader stage state.
+ /// </summary>
+ struct ShaderState
+ {
+#pragma warning disable CS0649
+ public uint Control;
+ public uint Offset;
+ public uint Unknown0x8;
+ public int MaxRegisters;
+ public ShaderType Type;
+ public uint Unknown0x14;
+ public uint Unknown0x18;
+ public uint Unknown0x1c;
+ public uint Unknown0x20;
+ public uint Unknown0x24;
+ public uint Unknown0x28;
+ public uint Unknown0x2c;
+ public uint Unknown0x30;
+ public uint Unknown0x34;
+ public uint Unknown0x38;
+ public uint Unknown0x3c;
+#pragma warning restore CS0649
+
+ /// <summary>
+ /// Unpacks shader enable information.
+ /// Must be ignored for vertex shaders, those are always enabled.
+ /// </summary>
+ /// <returns>True if the stage is enabled, false otherwise</returns>
+ public bool UnpackEnable()
+ {
+ return (Control & 1) != 0;
+ }
+ }
+
+ /// <summary>
+ /// Uniform buffer state for the uniform buffer currently being modified.
+ /// </summary>
+ struct UniformBufferState
+ {
+#pragma warning disable CS0649
+ public int Size;
+ public GpuVa Address;
+ public int Offset;
+#pragma warning restore CS0649
+ }
+
+ unsafe struct ThreedClassState : IShadowState
+ {
+#pragma warning disable CS0649
+ public uint SetObject;
+ public int SetObjectClassId => (int)((SetObject >> 0) & 0xFFFF);
+ public int SetObjectEngineId => (int)((SetObject >> 16) & 0x1F);
+ public fixed uint Reserved04[63];
+ public uint NoOperation;
+ public uint SetNotifyA;
+ public int SetNotifyAAddressUpper => (int)((SetNotifyA >> 0) & 0xFF);
+ public uint SetNotifyB;
+ public uint Notify;
+ public NotifyType NotifyType => (NotifyType)(Notify);
+ public uint WaitForIdle;
+ public uint LoadMmeInstructionRamPointer;
+ public uint LoadMmeInstructionRam;
+ public uint LoadMmeStartAddressRamPointer;
+ public uint LoadMmeStartAddressRam;
+ public uint SetMmeShadowRamControl;
+ public SetMmeShadowRamControlMode SetMmeShadowRamControlMode => (SetMmeShadowRamControlMode)((SetMmeShadowRamControl >> 0) & 0x3);
+ public fixed uint Reserved128[2];
+ public uint SetGlobalRenderEnableA;
+ public int SetGlobalRenderEnableAOffsetUpper => (int)((SetGlobalRenderEnableA >> 0) & 0xFF);
+ public uint SetGlobalRenderEnableB;
+ public uint SetGlobalRenderEnableC;
+ public int SetGlobalRenderEnableCMode => (int)((SetGlobalRenderEnableC >> 0) & 0x7);
+ public uint SendGoIdle;
+ public uint PmTrigger;
+ public uint PmTriggerWfi;
+ public fixed uint Reserved148[2];
+ public uint SetInstrumentationMethodHeader;
+ public uint SetInstrumentationMethodData;
+ public fixed uint Reserved158[10];
+ public uint LineLengthIn;
+ public uint LineCount;
+ public uint OffsetOutUpper;
+ public int OffsetOutUpperValue => (int)((OffsetOutUpper >> 0) & 0xFF);
+ public uint OffsetOut;
+ public uint PitchOut;
+ public uint SetDstBlockSize;
+ public SetDstBlockSizeWidth SetDstBlockSizeWidth => (SetDstBlockSizeWidth)((SetDstBlockSize >> 0) & 0xF);
+ public SetDstBlockSizeHeight SetDstBlockSizeHeight => (SetDstBlockSizeHeight)((SetDstBlockSize >> 4) & 0xF);
+ public SetDstBlockSizeDepth SetDstBlockSizeDepth => (SetDstBlockSizeDepth)((SetDstBlockSize >> 8) & 0xF);
+ public uint SetDstWidth;
+ public uint SetDstHeight;
+ public uint SetDstDepth;
+ public uint SetDstLayer;
+ public uint SetDstOriginBytesX;
+ public int SetDstOriginBytesXV => (int)((SetDstOriginBytesX >> 0) & 0xFFFFF);
+ public uint SetDstOriginSamplesY;
+ public int SetDstOriginSamplesYV => (int)((SetDstOriginSamplesY >> 0) & 0xFFFF);
+ public uint LaunchDma;
+ public LaunchDmaDstMemoryLayout LaunchDmaDstMemoryLayout => (LaunchDmaDstMemoryLayout)((LaunchDma >> 0) & 0x1);
+ public LaunchDmaCompletionType LaunchDmaCompletionType => (LaunchDmaCompletionType)((LaunchDma >> 4) & 0x3);
+ public LaunchDmaInterruptType LaunchDmaInterruptType => (LaunchDmaInterruptType)((LaunchDma >> 8) & 0x3);
+ public LaunchDmaSemaphoreStructSize LaunchDmaSemaphoreStructSize => (LaunchDmaSemaphoreStructSize)((LaunchDma >> 12) & 0x1);
+ public bool LaunchDmaReductionEnable => (LaunchDma & 0x2) != 0;
+ public LaunchDmaReductionOp LaunchDmaReductionOp => (LaunchDmaReductionOp)((LaunchDma >> 13) & 0x7);
+ public LaunchDmaReductionFormat LaunchDmaReductionFormat => (LaunchDmaReductionFormat)((LaunchDma >> 2) & 0x3);
+ public bool LaunchDmaSysmembarDisable => (LaunchDma & 0x40) != 0;
+ public uint LoadInlineData;
+ public fixed uint Reserved1B8[22];
+ public Boolean32 EarlyZForce;
+ public fixed uint Reserved214[45];
+ public uint SyncpointAction;
+ public fixed uint Reserved2CC[10];
+ public uint BlendUcodeNormalizedDst;
+ public fixed uint Reserved2F8[10];
+ public TessMode TessMode;
+ public Array4<float> TessOuterLevel;
+ public Array2<float> TessInnerLevel;
+ public fixed uint Reserved33C[16];
+ public Boolean32 RasterizeEnable;
+ public Array4<TfBufferState> TfBufferState;
+ public fixed uint Reserved400[192];
+ public Array4<TfState> TfState;
+ public fixed uint Reserved740[1];
+ public Boolean32 TfEnable;
+ public fixed uint Reserved748[46];
+ public Array8<RtColorState> RtColorState;
+ public Array16<ViewportTransform> ViewportTransform;
+ public Array16<ViewportExtents> ViewportExtents;
+ public fixed uint ReservedD00[29];
+ public VertexBufferDrawState VertexBufferDrawState;
+ public uint DepthMode;
+ public ClearColors ClearColors;
+ public float ClearDepthValue;
+ public fixed uint ReservedD94[3];
+ public uint ClearStencilValue;
+ public fixed uint ReservedDA4[2];
+ public PolygonMode PolygonModeFront;
+ public PolygonMode PolygonModeBack;
+ public Boolean32 PolygonSmoothEnable;
+ public fixed uint ReservedDB8[2];
+ public DepthBiasState DepthBiasState;
+ public int PatchVertices;
+ public BlendUcodeEnable BlendUcodeEnable;
+ public uint BlendUcodeSize;
+ public fixed uint ReservedDD8[2];
+ public uint TextureBarrier;
+ public uint WatchdogTimer;
+ public Boolean32 PrimitiveRestartDrawArrays;
+ public uint ReservedDEC;
+ public uint LoadBlendUcodeStart;
+ public uint LoadBlendUcodeInstruction;
+ public fixed uint ReservedDF8[2];
+ public Array16<ScissorState> ScissorState;
+ public fixed uint ReservedF00[21];
+ public StencilBackMasks StencilBackMasks;
+ public fixed uint ReservedF60[5];
+ public uint InvalidateTextures;
+ public fixed uint ReservedF78[1];
+ public uint TextureBarrierTiled;
+ public fixed uint ReservedF80[4];
+ public Boolean32 RtColorMaskShared;
+ public fixed uint ReservedF94[19];
+ public RtDepthStencilState RtDepthStencilState;
+ public ScreenScissorState ScreenScissorState;
+ public fixed uint ReservedFFC[33];
+ public int DrawTextureDstX;
+ public int DrawTextureDstY;
+ public int DrawTextureDstWidth;
+ public int DrawTextureDstHeight;
+ public long DrawTextureDuDx;
+ public long DrawTextureDvDy;
+ public int DrawTextureSamplerId;
+ public int DrawTextureTextureId;
+ public int DrawTextureSrcX;
+ public int DrawTextureSrcY;
+ public fixed uint Reserved10B0[18];
+ public uint ClearFlags;
+ public fixed uint Reserved10FC[25];
+ public Array32<VertexAttribState> VertexAttribState;
+ public fixed uint Reserved11E0[13];
+ public uint DrawVertexArrayBeginEndInstanceFirst;
+ public uint DrawVertexArrayBeginEndInstanceSubsequent;
+ public RtControl RtControl;
+ public fixed uint Reserved1220[2];
+ public Size3D RtDepthStencilSize;
+ public SamplerIndex SamplerIndex;
+ public fixed uint Reserved1238[37];
+ public Boolean32 DepthTestEnable;
+ public fixed uint Reserved12D0[4];
+ public Boolean32 AlphaToCoverageDitherEnable;
+ public Boolean32 BlendIndependent;
+ public Boolean32 DepthWriteEnable;
+ public Boolean32 AlphaTestEnable;
+ public fixed uint Reserved12F0[5];
+ public uint VbElementU8;
+ public uint Reserved1308;
+ public CompareOp DepthTestFunc;
+ public float AlphaTestRef;
+ public CompareOp AlphaTestFunc;
+ public uint Reserved1318;
+ public ColorF BlendConstant;
+ public fixed uint Reserved132C[4];
+ public BlendStateCommon BlendStateCommon;
+ public Boolean32 BlendEnableCommon;
+ public Array8<Boolean32> BlendEnable;
+ public StencilTestState StencilTestState;
+ public fixed uint Reserved13A0[3];
+ public YControl YControl;
+ public float LineWidthSmooth;
+ public float LineWidthAliased;
+ public fixed uint Reserved13B8[27];
+ public uint InvalidateSamplerCacheNoWfi;
+ public uint InvalidateTextureHeaderCacheNoWfi;
+ public fixed uint Reserved142C[2];
+ public uint FirstVertex;
+ public uint FirstInstance;
+ public fixed uint Reserved143C[17];
+ public Array8<RgbHalf> BlendUcodeConstants;
+ public fixed uint Reserved1500[4];
+ public uint ClipDistanceEnable;
+ public uint Reserved1514;
+ public float PointSize;
+ public uint Reserved151C;
+ public Boolean32 PointSpriteEnable;
+ public fixed uint Reserved1524[3];
+ public uint ResetCounter;
+ public Boolean32 MultisampleEnable;
+ public Boolean32 RtDepthStencilEnable;
+ public uint MultisampleControl;
+ public fixed uint Reserved1540[4];
+ public GpuVa RenderEnableAddress;
+ public Condition RenderEnableCondition;
+ public PoolState SamplerPoolState;
+ public uint Reserved1568;
+ public float DepthBiasFactor;
+ public Boolean32 LineSmoothEnable;
+ public PoolState TexturePoolState;
+ public fixed uint Reserved1580[5];
+ public StencilBackTestState StencilBackTestState;
+ public fixed uint Reserved15A8[5];
+ public float DepthBiasUnits;
+ public fixed uint Reserved15C0[4];
+ public TextureMsaaMode RtMsaaMode;
+ public fixed uint Reserved15D4[5];
+ public uint VbElementU32;
+ public uint Reserved15EC;
+ public uint VbElementU16;
+ public fixed uint Reserved15F4[4];
+ public uint PointCoordReplace;
+ public GpuVa ShaderBaseAddress;
+ public uint Reserved1610;
+ public uint DrawEnd;
+ public uint DrawBegin;
+ public fixed uint Reserved161C[10];
+ public PrimitiveRestartState PrimitiveRestartState;
+ public fixed uint Reserved164C[95];
+ public IndexBufferState IndexBufferState;
+ public uint IndexBufferCount;
+ public uint DrawIndexBuffer32BeginEndInstanceFirst;
+ public uint DrawIndexBuffer16BeginEndInstanceFirst;
+ public uint DrawIndexBuffer8BeginEndInstanceFirst;
+ public uint DrawIndexBuffer32BeginEndInstanceSubsequent;
+ public uint DrawIndexBuffer16BeginEndInstanceSubsequent;
+ public uint DrawIndexBuffer8BeginEndInstanceSubsequent;
+ public fixed uint Reserved17FC[32];
+ public float DepthBiasClamp;
+ public Array16<Boolean32> VertexBufferInstanced;
+ public fixed uint Reserved18C0[20];
+ public Boolean32 VertexProgramPointSize;
+ public uint Reserved1914;
+ public FaceState FaceState;
+ public fixed uint Reserved1924[2];
+ public uint ViewportTransformEnable;
+ public fixed uint Reserved1930[3];
+ public ViewVolumeClipControl ViewVolumeClipControl;
+ public fixed uint Reserved1940[2];
+ public Boolean32 PrimitiveTypeOverrideEnable;
+ public fixed uint Reserved194C[9];
+ public PrimitiveTypeOverride PrimitiveTypeOverride;
+ public fixed uint Reserved1974[20];
+ public LogicalOpState LogicOpState;
+ public uint Reserved19CC;
+ public uint Clear;
+ public fixed uint Reserved19D4[11];
+ public Array8<RtColorMask> RtColorMask;
+ public fixed uint Reserved1A20[56];
+ public GpuVa SemaphoreAddress;
+ public int SemaphorePayload;
+ public uint SemaphoreControl;
+ public fixed uint Reserved1B10[60];
+ public Array16<VertexBufferState> VertexBufferState;
+ public fixed uint Reserved1D00[64];
+ public Array8<BlendState> BlendState;
+ public Array16<GpuVa> VertexBufferEndAddress;
+ public fixed uint Reserved1F80[32];
+ public Array6<ShaderState> ShaderState;
+ public fixed uint Reserved2180[96];
+ public uint SetFalcon00;
+ public uint SetFalcon01;
+ public uint SetFalcon02;
+ public uint SetFalcon03;
+ public uint SetFalcon04;
+ public uint SetFalcon05;
+ public uint SetFalcon06;
+ public uint SetFalcon07;
+ public uint SetFalcon08;
+ public uint SetFalcon09;
+ public uint SetFalcon10;
+ public uint SetFalcon11;
+ public uint SetFalcon12;
+ public uint SetFalcon13;
+ public uint SetFalcon14;
+ public uint SetFalcon15;
+ public uint SetFalcon16;
+ public uint SetFalcon17;
+ public uint SetFalcon18;
+ public uint SetFalcon19;
+ public uint SetFalcon20;
+ public uint SetFalcon21;
+ public uint SetFalcon22;
+ public uint SetFalcon23;
+ public uint SetFalcon24;
+ public uint SetFalcon25;
+ public uint SetFalcon26;
+ public uint SetFalcon27;
+ public uint SetFalcon28;
+ public uint SetFalcon29;
+ public uint SetFalcon30;
+ public uint SetFalcon31;
+ public UniformBufferState UniformBufferState;
+ public Array16<uint> UniformBufferUpdateData;
+ public fixed uint Reserved23D0[16];
+ public uint UniformBufferBindVertex;
+ public fixed uint Reserved2414[7];
+ public uint UniformBufferBindTessControl;
+ public fixed uint Reserved2434[7];
+ public uint UniformBufferBindTessEvaluation;
+ public fixed uint Reserved2454[7];
+ public uint UniformBufferBindGeometry;
+ public fixed uint Reserved2474[7];
+ public uint UniformBufferBindFragment;
+ public fixed uint Reserved2494[93];
+ public uint TextureBufferIndex;
+ public fixed uint Reserved260C[125];
+ public Array4<Array32<uint>> TfVaryingLocations;
+ public fixed uint Reserved2A00[640];
+ public MmeShadowScratch SetMmeShadowScratch;
+#pragma warning restore CS0649
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs
new file mode 100644
index 00000000..4ce53e78
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs
@@ -0,0 +1,379 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Engine.Types;
+using Ryujinx.Graphics.Gpu.Image;
+using Ryujinx.Graphics.Texture;
+using Ryujinx.Memory;
+using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Twod
+{
+ /// <summary>
+ /// Represents a 2D engine class.
+ /// </summary>
+ class TwodClass : IDeviceState
+ {
+ private readonly GpuChannel _channel;
+ private readonly DeviceState<TwodClassState> _state;
+
+ /// <summary>
+ /// Creates a new instance of the 2D engine class.
+ /// </summary>
+ /// <param name="channel">The channel that will make use of the engine</param>
+ public TwodClass(GpuChannel channel)
+ {
+ _channel = channel;
+ _state = new DeviceState<TwodClassState>(new Dictionary<string, RwCallback>
+ {
+ { nameof(TwodClassState.PixelsFromMemorySrcY0Int), new RwCallback(PixelsFromMemorySrcY0Int, null) }
+ });
+ }
+
+ /// <summary>
+ /// Reads data from the class registers.
+ /// </summary>
+ /// <param name="offset">Register byte offset</param>
+ /// <returns>Data at the specified offset</returns>
+ public int Read(int offset) => _state.Read(offset);
+
+ /// <summary>
+ /// Writes data to the class registers.
+ /// </summary>
+ /// <param name="offset">Register byte offset</param>
+ /// <param name="data">Data to be written</param>
+ public void Write(int offset, int data) => _state.Write(offset, data);
+
+ /// <summary>
+ /// Determines if data is compatible between the source and destination texture.
+ /// The two textures must have the same size, layout, and bytes per pixel.
+ /// </summary>
+ /// <param name="lhs">Info for the first texture</param>
+ /// <param name="rhs">Info for the second texture</param>
+ /// <param name="lhsFormat">Format of the first texture</param>
+ /// <param name="rhsFormat">Format of the second texture</param>
+ /// <returns>True if the data is compatible, false otherwise</returns>
+ private bool IsDataCompatible(TwodTexture lhs, TwodTexture rhs, FormatInfo lhsFormat, FormatInfo rhsFormat)
+ {
+ if (lhsFormat.BytesPerPixel != rhsFormat.BytesPerPixel ||
+ lhs.Height != rhs.Height ||
+ lhs.Depth != rhs.Depth ||
+ lhs.LinearLayout != rhs.LinearLayout ||
+ lhs.MemoryLayout.Packed != rhs.MemoryLayout.Packed)
+ {
+ return false;
+ }
+
+ if (lhs.LinearLayout)
+ {
+ return lhs.Stride == rhs.Stride;
+ }
+ else
+ {
+ return lhs.Width == rhs.Width;
+ }
+ }
+
+ /// <summary>
+ /// Determine if the given region covers the full texture, also considering width alignment.
+ /// </summary>
+ /// <param name="texture">The texture to check</param>
+ /// <param name="formatInfo"></param>
+ /// <param name="x1">Region start x</param>
+ /// <param name="y1">Region start y</param>
+ /// <param name="x2">Region end x</param>
+ /// <param name="y2">Region end y</param>
+ /// <returns>True if the region covers the full texture, false otherwise</returns>
+ private bool IsCopyRegionComplete(TwodTexture texture, FormatInfo formatInfo, int x1, int y1, int x2, int y2)
+ {
+ if (x1 != 0 || y1 != 0 || y2 != texture.Height)
+ {
+ return false;
+ }
+
+ int width;
+ int widthAlignment;
+
+ if (texture.LinearLayout)
+ {
+ widthAlignment = 1;
+ width = texture.Stride / formatInfo.BytesPerPixel;
+ }
+ else
+ {
+ widthAlignment = Constants.GobAlignment / formatInfo.BytesPerPixel;
+ width = texture.Width;
+ }
+
+ return width == BitUtils.AlignUp(x2, widthAlignment);
+ }
+
+ /// <summary>
+ /// Performs a full data copy between two textures, reading and writing guest memory directly.
+ /// The textures must have a matching layout, size, and bytes per pixel.
+ /// </summary>
+ /// <param name="src">The source texture</param>
+ /// <param name="dst">The destination texture</param>
+ /// <param name="w">Copy width</param>
+ /// <param name="h">Copy height</param>
+ /// <param name="bpp">Bytes per pixel</param>
+ private void UnscaledFullCopy(TwodTexture src, TwodTexture dst, int w, int h, int bpp)
+ {
+ var srcCalculator = new OffsetCalculator(
+ w,
+ h,
+ src.Stride,
+ src.LinearLayout,
+ src.MemoryLayout.UnpackGobBlocksInY(),
+ src.MemoryLayout.UnpackGobBlocksInZ(),
+ bpp);
+
+ (int _, int srcSize) = srcCalculator.GetRectangleRange(0, 0, w, h);
+
+ var memoryManager = _channel.MemoryManager;
+
+ ulong srcGpuVa = src.Address.Pack();
+ ulong dstGpuVa = dst.Address.Pack();
+
+ ReadOnlySpan<byte> srcSpan = memoryManager.GetSpan(srcGpuVa, srcSize, true);
+
+ int width;
+ int height = src.Height;
+ if (src.LinearLayout)
+ {
+ width = src.Stride / bpp;
+ }
+ else
+ {
+ width = src.Width;
+ }
+
+ // If the copy is not equal to the width and height of the texture, we will need to copy partially.
+ // It's worth noting that it has already been established that the src and dst are the same size.
+
+ if (w == width && h == height)
+ {
+ memoryManager.Write(dstGpuVa, srcSpan);
+ }
+ else
+ {
+ using WritableRegion dstRegion = memoryManager.GetWritableRegion(dstGpuVa, srcSize, true);
+ Span<byte> dstSpan = dstRegion.Memory.Span;
+
+ if (src.LinearLayout)
+ {
+ int stride = src.Stride;
+ int offset = 0;
+ int lineSize = width * bpp;
+
+ for (int y = 0; y < height; y++)
+ {
+ srcSpan.Slice(offset, lineSize).CopyTo(dstSpan.Slice(offset));
+
+ offset += stride;
+ }
+ }
+ else
+ {
+ // Copy with the block linear layout in mind.
+ // Recreate the offset calculate with bpp 1 for copy.
+
+ int stride = w * bpp;
+
+ srcCalculator = new OffsetCalculator(
+ stride,
+ h,
+ 0,
+ false,
+ src.MemoryLayout.UnpackGobBlocksInY(),
+ src.MemoryLayout.UnpackGobBlocksInZ(),
+ 1);
+
+ int strideTrunc = BitUtils.AlignDown(stride, 16);
+
+ ReadOnlySpan<Vector128<byte>> srcVec = MemoryMarshal.Cast<byte, Vector128<byte>>(srcSpan);
+ Span<Vector128<byte>> dstVec = MemoryMarshal.Cast<byte, Vector128<byte>>(dstSpan);
+
+ for (int y = 0; y < h; y++)
+ {
+ int x = 0;
+
+ srcCalculator.SetY(y);
+
+ for (; x < strideTrunc; x += 16)
+ {
+ int offset = srcCalculator.GetOffset(x) >> 4;
+
+ dstVec[offset] = srcVec[offset];
+ }
+
+ for (; x < stride; x++)
+ {
+ int offset = srcCalculator.GetOffset(x);
+
+ dstSpan[offset] = srcSpan[offset];
+ }
+ }
+ }
+ }
+ }
+
+ /// <summary>
+ /// Performs the blit operation, triggered by the register write.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void PixelsFromMemorySrcY0Int(int argument)
+ {
+ var memoryManager = _channel.MemoryManager;
+
+ var dstCopyTexture = Unsafe.As<uint, TwodTexture>(ref _state.State.SetDstFormat);
+ var srcCopyTexture = Unsafe.As<uint, TwodTexture>(ref _state.State.SetSrcFormat);
+
+ long srcX = ((long)_state.State.SetPixelsFromMemorySrcX0Int << 32) | (long)(ulong)_state.State.SetPixelsFromMemorySrcX0Frac;
+ long srcY = ((long)_state.State.PixelsFromMemorySrcY0Int << 32) | (long)(ulong)_state.State.SetPixelsFromMemorySrcY0Frac;
+
+ long duDx = ((long)_state.State.SetPixelsFromMemoryDuDxInt << 32) | (long)(ulong)_state.State.SetPixelsFromMemoryDuDxFrac;
+ long dvDy = ((long)_state.State.SetPixelsFromMemoryDvDyInt << 32) | (long)(ulong)_state.State.SetPixelsFromMemoryDvDyFrac;
+
+ bool originCorner = _state.State.SetPixelsFromMemorySampleModeOrigin == SetPixelsFromMemorySampleModeOrigin.Corner;
+
+ if (originCorner)
+ {
+ // If the origin is corner, it is assumed that the guest API
+ // is manually centering the origin by adding a offset to the
+ // source region X/Y coordinates.
+ // Here we attempt to remove such offset to ensure we have the correct region.
+ // The offset is calculated as FactorXY / 2.0, where FactorXY = SrcXY / DstXY,
+ // so we do the same here by dividing the fixed point value by 2, while
+ // throwing away the fractional part to avoid rounding errors.
+ srcX -= (duDx >> 33) << 32;
+ srcY -= (dvDy >> 33) << 32;
+ }
+
+ int srcX1 = (int)(srcX >> 32);
+ int srcY1 = (int)(srcY >> 32);
+
+ int srcX2 = srcX1 + (int)((duDx * _state.State.SetPixelsFromMemoryDstWidth + uint.MaxValue) >> 32);
+ int srcY2 = srcY1 + (int)((dvDy * _state.State.SetPixelsFromMemoryDstHeight + uint.MaxValue) >> 32);
+
+ int dstX1 = (int)_state.State.SetPixelsFromMemoryDstX0;
+ int dstY1 = (int)_state.State.SetPixelsFromMemoryDstY0;
+
+ int dstX2 = dstX1 + (int)_state.State.SetPixelsFromMemoryDstWidth;
+ int dstY2 = dstY1 + (int)_state.State.SetPixelsFromMemoryDstHeight;
+
+ // The source and destination textures should at least be as big as the region being requested.
+ // The hints will only resize within alignment constraints, so out of bound copies won't resize in most cases.
+ var srcHint = new Size(srcX2, srcY2, 1);
+ var dstHint = new Size(dstX2, dstY2, 1);
+
+ var srcCopyTextureFormat = srcCopyTexture.Format.Convert();
+
+ int srcWidthAligned = srcCopyTexture.Stride / srcCopyTextureFormat.BytesPerPixel;
+
+ ulong offset = 0;
+
+ // For an out of bounds copy, we must ensure that the copy wraps to the next line,
+ // so for a copy from a 64x64 texture, in the region [32, 96[, there are 32 pixels that are
+ // outside the bounds of the texture. We fill the destination with the first 32 pixels
+ // of the next line on the source texture.
+ // This can be done by simply adding an offset to the texture address, so that the initial
+ // gap is skipped and the copy is inside bounds again.
+ // This is required by the proprietary guest OpenGL driver.
+ if (srcCopyTexture.LinearLayout && srcCopyTexture.Width == srcX2 && srcX2 > srcWidthAligned && srcX1 > 0)
+ {
+ offset = (ulong)(srcX1 * srcCopyTextureFormat.BytesPerPixel);
+ srcCopyTexture.Width -= srcX1;
+ srcX2 -= srcX1;
+ srcX1 = 0;
+ }
+
+ FormatInfo dstCopyTextureFormat = dstCopyTexture.Format.Convert();
+
+ bool canDirectCopy = GraphicsConfig.Fast2DCopy &&
+ srcX2 == dstX2 && srcY2 == dstY2 &&
+ IsDataCompatible(srcCopyTexture, dstCopyTexture, srcCopyTextureFormat, dstCopyTextureFormat) &&
+ IsCopyRegionComplete(srcCopyTexture, srcCopyTextureFormat, srcX1, srcY1, srcX2, srcY2) &&
+ IsCopyRegionComplete(dstCopyTexture, dstCopyTextureFormat, dstX1, dstY1, dstX2, dstY2);
+
+ var srcTexture = memoryManager.Physical.TextureCache.FindOrCreateTexture(
+ memoryManager,
+ srcCopyTexture,
+ offset,
+ srcCopyTextureFormat,
+ !canDirectCopy,
+ false,
+ srcHint);
+
+ if (srcTexture == null)
+ {
+ if (canDirectCopy)
+ {
+ // Directly copy the data on CPU.
+ UnscaledFullCopy(srcCopyTexture, dstCopyTexture, srcX2, srcY2, srcCopyTextureFormat.BytesPerPixel);
+ }
+
+ return;
+ }
+
+ memoryManager.Physical.TextureCache.Lift(srcTexture);
+
+ // When the source texture that was found has a depth format,
+ // we must enforce the target texture also has a depth format,
+ // as copies between depth and color formats are not allowed.
+
+ if (srcTexture.Format.IsDepthOrStencil())
+ {
+ dstCopyTextureFormat = srcTexture.Info.FormatInfo;
+ }
+ else
+ {
+ dstCopyTextureFormat = dstCopyTexture.Format.Convert();
+ }
+
+ var dstTexture = memoryManager.Physical.TextureCache.FindOrCreateTexture(
+ memoryManager,
+ dstCopyTexture,
+ 0,
+ dstCopyTextureFormat,
+ true,
+ srcTexture.ScaleMode == TextureScaleMode.Scaled,
+ dstHint);
+
+ if (dstTexture == null)
+ {
+ return;
+ }
+
+ if (srcTexture.Info.Samples > 1 || dstTexture.Info.Samples > 1)
+ {
+ srcTexture.PropagateScale(dstTexture);
+ }
+
+ float scale = srcTexture.ScaleFactor;
+ float dstScale = dstTexture.ScaleFactor;
+
+ Extents2D srcRegion = new Extents2D(
+ (int)Math.Ceiling(scale * (srcX1 / srcTexture.Info.SamplesInX)),
+ (int)Math.Ceiling(scale * (srcY1 / srcTexture.Info.SamplesInY)),
+ (int)Math.Ceiling(scale * (srcX2 / srcTexture.Info.SamplesInX)),
+ (int)Math.Ceiling(scale * (srcY2 / srcTexture.Info.SamplesInY)));
+
+ Extents2D dstRegion = new Extents2D(
+ (int)Math.Ceiling(dstScale * (dstX1 / dstTexture.Info.SamplesInX)),
+ (int)Math.Ceiling(dstScale * (dstY1 / dstTexture.Info.SamplesInY)),
+ (int)Math.Ceiling(dstScale * (dstX2 / dstTexture.Info.SamplesInX)),
+ (int)Math.Ceiling(dstScale * (dstY2 / dstTexture.Info.SamplesInY)));
+
+ bool linearFilter = _state.State.SetPixelsFromMemorySampleModeFilter == SetPixelsFromMemorySampleModeFilter.Bilinear;
+
+ srcTexture.HostTexture.CopyTo(dstTexture.HostTexture, srcRegion, dstRegion, linearFilter);
+
+ dstTexture.SignalModified();
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClassState.cs b/src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClassState.cs
new file mode 100644
index 00000000..46fddb04
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClassState.cs
@@ -0,0 +1,816 @@
+// This file was auto-generated from NVIDIA official Maxwell definitions.
+
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Twod
+{
+ /// <summary>
+ /// Notify type.
+ /// </summary>
+ enum NotifyType
+ {
+ WriteOnly = 0,
+ WriteThenAwaken = 1,
+ }
+
+ /// <summary>
+ /// Format of the destination texture.
+ /// </summary>
+ enum SetDstFormatV
+ {
+ A8r8g8b8 = 207,
+ A8rl8gl8bl8 = 208,
+ A2r10g10b10 = 223,
+ A8b8g8r8 = 213,
+ A8bl8gl8rl8 = 214,
+ A2b10g10r10 = 209,
+ X8r8g8b8 = 230,
+ X8rl8gl8bl8 = 231,
+ X8b8g8r8 = 249,
+ X8bl8gl8rl8 = 250,
+ R5g6b5 = 232,
+ A1r5g5b5 = 233,
+ X1r5g5b5 = 248,
+ Y8 = 243,
+ Y16 = 238,
+ Y32 = 255,
+ Z1r5g5b5 = 251,
+ O1r5g5b5 = 252,
+ Z8r8g8b8 = 253,
+ O8r8g8b8 = 254,
+ Y18x8 = 28,
+ Rf16 = 242,
+ Rf32 = 229,
+ Rf32Gf32 = 203,
+ Rf16Gf16Bf16Af16 = 202,
+ Rf16Gf16Bf16X16 = 206,
+ Rf32Gf32Bf32Af32 = 192,
+ Rf32Gf32Bf32X32 = 195,
+ R16G16B16A16 = 198,
+ Rn16Gn16Bn16An16 = 199,
+ Bf10gf11rf11 = 224,
+ An8bn8gn8rn8 = 215,
+ Rf16Gf16 = 222,
+ R16G16 = 218,
+ Rn16Gn16 = 219,
+ G8r8 = 234,
+ Gn8rn8 = 235,
+ Rn16 = 239,
+ Rn8 = 244,
+ A8 = 247,
+ }
+
+ /// <summary>
+ /// Memory layout of the destination texture.
+ /// </summary>
+ enum SetDstMemoryLayoutV
+ {
+ Blocklinear = 0,
+ Pitch = 1,
+ }
+
+ /// <summary>
+ /// Height in GOBs of the destination texture.
+ /// </summary>
+ enum SetDstBlockSizeHeight
+ {
+ OneGob = 0,
+ TwoGobs = 1,
+ FourGobs = 2,
+ EightGobs = 3,
+ SixteenGobs = 4,
+ ThirtytwoGobs = 5,
+ }
+
+ /// <summary>
+ /// Depth in GOBs of the destination texture.
+ /// </summary>
+ enum SetDstBlockSizeDepth
+ {
+ OneGob = 0,
+ TwoGobs = 1,
+ FourGobs = 2,
+ EightGobs = 3,
+ SixteenGobs = 4,
+ ThirtytwoGobs = 5,
+ }
+
+ /// <summary>
+ /// Format of the source texture.
+ /// </summary>
+ enum SetSrcFormatV
+ {
+ A8r8g8b8 = 207,
+ A8rl8gl8bl8 = 208,
+ A2r10g10b10 = 223,
+ A8b8g8r8 = 213,
+ A8bl8gl8rl8 = 214,
+ A2b10g10r10 = 209,
+ X8r8g8b8 = 230,
+ X8rl8gl8bl8 = 231,
+ X8b8g8r8 = 249,
+ X8bl8gl8rl8 = 250,
+ R5g6b5 = 232,
+ A1r5g5b5 = 233,
+ X1r5g5b5 = 248,
+ Y8 = 243,
+ Ay8 = 29,
+ Y16 = 238,
+ Y32 = 255,
+ Z1r5g5b5 = 251,
+ O1r5g5b5 = 252,
+ Z8r8g8b8 = 253,
+ O8r8g8b8 = 254,
+ Y18x8 = 28,
+ Rf16 = 242,
+ Rf32 = 229,
+ Rf32Gf32 = 203,
+ Rf16Gf16Bf16Af16 = 202,
+ Rf16Gf16Bf16X16 = 206,
+ Rf32Gf32Bf32Af32 = 192,
+ Rf32Gf32Bf32X32 = 195,
+ R16G16B16A16 = 198,
+ Rn16Gn16Bn16An16 = 199,
+ Bf10gf11rf11 = 224,
+ An8bn8gn8rn8 = 215,
+ Rf16Gf16 = 222,
+ R16G16 = 218,
+ Rn16Gn16 = 219,
+ G8r8 = 234,
+ Gn8rn8 = 235,
+ Rn16 = 239,
+ Rn8 = 244,
+ A8 = 247,
+ }
+
+ /// <summary>
+ /// Memory layout of the source texture.
+ /// </summary>
+ enum SetSrcMemoryLayoutV
+ {
+ Blocklinear = 0,
+ Pitch = 1,
+ }
+
+ /// <summary>
+ /// Height in GOBs of the source texture.
+ /// </summary>
+ enum SetSrcBlockSizeHeight
+ {
+ OneGob = 0,
+ TwoGobs = 1,
+ FourGobs = 2,
+ EightGobs = 3,
+ SixteenGobs = 4,
+ ThirtytwoGobs = 5,
+ }
+
+ /// <summary>
+ /// Depth in GOBs of the source texture.
+ /// </summary>
+ enum SetSrcBlockSizeDepth
+ {
+ OneGob = 0,
+ TwoGobs = 1,
+ FourGobs = 2,
+ EightGobs = 3,
+ SixteenGobs = 4,
+ ThirtytwoGobs = 5,
+ }
+
+ /// <summary>
+ /// Texture data caches to invalidate.
+ /// </summary>
+ enum TwodInvalidateTextureDataCacheV
+ {
+ L1Only = 0,
+ L2Only = 1,
+ L1AndL2 = 2,
+ }
+
+ /// <summary>
+ /// Sector promotion parameters.
+ /// </summary>
+ enum SetPixelsFromMemorySectorPromotionV
+ {
+ NoPromotion = 0,
+ PromoteTo2V = 1,
+ PromoteTo2H = 2,
+ PromoteTo4 = 3,
+ }
+
+ /// <summary>
+ /// Number of processing clusters.
+ /// </summary>
+ enum SetNumProcessingClustersV
+ {
+ All = 0,
+ One = 1,
+ }
+
+ /// <summary>
+ /// Color key format.
+ /// </summary>
+ enum SetColorKeyFormatV
+ {
+ A16r5g6b5 = 0,
+ A1r5g5b5 = 1,
+ A8r8g8b8 = 2,
+ A2r10g10b10 = 3,
+ Y8 = 4,
+ Y16 = 5,
+ Y32 = 6,
+ }
+
+ /// <summary>
+ /// Color blit operation.
+ /// </summary>
+ enum SetOperationV
+ {
+ SrccopyAnd = 0,
+ RopAnd = 1,
+ BlendAnd = 2,
+ Srccopy = 3,
+ Rop = 4,
+ SrccopyPremult = 5,
+ BlendPremult = 6,
+ }
+
+ /// <summary>
+ /// Texture pattern selection.
+ /// </summary>
+ enum SetPatternSelectV
+ {
+ Monochrome8x8 = 0,
+ Monochrome64x1 = 1,
+ Monochrome1x64 = 2,
+ Color = 3,
+ }
+
+ /// <summary>
+ /// Render enable override mode.
+ /// </summary>
+ enum SetRenderEnableOverrideMode
+ {
+ UseRenderEnable = 0,
+ AlwaysRender = 1,
+ NeverRender = 2,
+ }
+
+ /// <summary>
+ /// Pixels from memory horizontal direction.
+ /// </summary>
+ enum SetPixelsFromMemoryDirectionHorizontal
+ {
+ HwDecides = 0,
+ LeftToRight = 1,
+ RightToLeft = 2,
+ }
+
+ /// <summary>
+ /// Pixels from memory vertical direction.
+ /// </summary>
+ enum SetPixelsFromMemoryDirectionVertical
+ {
+ HwDecides = 0,
+ TopToBottom = 1,
+ BottomToTop = 2,
+ }
+
+ /// <summary>
+ /// Color format of the monochrome pattern.
+ /// </summary>
+ enum SetMonochromePatternColorFormatV
+ {
+ A8x8r5g6b5 = 0,
+ A1r5g5b5 = 1,
+ A8r8g8b8 = 2,
+ A8y8 = 3,
+ A8x8y16 = 4,
+ Y32 = 5,
+ ByteExpand = 6,
+ }
+
+ /// <summary>
+ /// Format of the monochrome pattern.
+ /// </summary>
+ enum SetMonochromePatternFormatV
+ {
+ Cga6M1 = 0,
+ LeM1 = 1,
+ }
+
+ /// <summary>
+ /// DMA semaphore reduction operation.
+ /// </summary>
+ enum MmeDmaReductionReductionOp
+ {
+ RedAdd = 0,
+ RedMin = 1,
+ RedMax = 2,
+ RedInc = 3,
+ RedDec = 4,
+ RedAnd = 5,
+ RedOr = 6,
+ RedXor = 7,
+ }
+
+ /// <summary>
+ /// DMA semaphore reduction format.
+ /// </summary>
+ enum MmeDmaReductionReductionFormat
+ {
+ Unsigned = 0,
+ Signed = 1,
+ }
+
+ /// <summary>
+ /// DMA semaphore reduction size.
+ /// </summary>
+ enum MmeDmaReductionReductionSize
+ {
+ FourBytes = 0,
+ EightBytes = 1,
+ }
+
+ /// <summary>
+ /// Data FIFO size.
+ /// </summary>
+ enum SetMmeDataFifoConfigFifoSize
+ {
+ Size0kb = 0,
+ Size4kb = 1,
+ Size8kb = 2,
+ Size12kb = 3,
+ Size16kb = 4,
+ }
+
+ /// <summary>
+ /// Render solid primitive mode.
+ /// </summary>
+ enum RenderSolidPrimModeV
+ {
+ Points = 0,
+ Lines = 1,
+ Polyline = 2,
+ Triangles = 3,
+ Rects = 4,
+ }
+
+ /// <summary>
+ /// Render solid primitive color format.
+ /// </summary>
+ enum SetRenderSolidPrimColorFormatV
+ {
+ Rf32Gf32Bf32Af32 = 192,
+ Rf16Gf16Bf16Af16 = 202,
+ Rf32Gf32 = 203,
+ A8r8g8b8 = 207,
+ A2r10g10b10 = 223,
+ A8b8g8r8 = 213,
+ A2b10g10r10 = 209,
+ X8r8g8b8 = 230,
+ X8b8g8r8 = 249,
+ R5g6b5 = 232,
+ A1r5g5b5 = 233,
+ X1r5g5b5 = 248,
+ Y8 = 243,
+ Y16 = 238,
+ Y32 = 255,
+ Z1r5g5b5 = 251,
+ O1r5g5b5 = 252,
+ Z8r8g8b8 = 253,
+ O8r8g8b8 = 254,
+ }
+
+ /// <summary>
+ /// Pixels from CPU data type.
+ /// </summary>
+ enum SetPixelsFromCpuDataTypeV
+ {
+ Color = 0,
+ Index = 1,
+ }
+
+ /// <summary>
+ /// Pixels from CPU color format.
+ /// </summary>
+ enum SetPixelsFromCpuColorFormatV
+ {
+ A8r8g8b8 = 207,
+ A2r10g10b10 = 223,
+ A8b8g8r8 = 213,
+ A2b10g10r10 = 209,
+ X8r8g8b8 = 230,
+ X8b8g8r8 = 249,
+ R5g6b5 = 232,
+ A1r5g5b5 = 233,
+ X1r5g5b5 = 248,
+ Y8 = 243,
+ Y16 = 238,
+ Y32 = 255,
+ Z1r5g5b5 = 251,
+ O1r5g5b5 = 252,
+ Z8r8g8b8 = 253,
+ O8r8g8b8 = 254,
+ }
+
+ /// <summary>
+ /// Pixels from CPU palette index format.
+ /// </summary>
+ enum SetPixelsFromCpuIndexFormatV
+ {
+ I1 = 0,
+ I4 = 1,
+ I8 = 2,
+ }
+
+ /// <summary>
+ /// Pixels from CPU monochrome format.
+ /// </summary>
+ enum SetPixelsFromCpuMonoFormatV
+ {
+ Cga6M1 = 0,
+ LeM1 = 1,
+ }
+
+ /// <summary>
+ /// Pixels from CPU wrap mode.
+ /// </summary>
+ enum SetPixelsFromCpuWrapV
+ {
+ WrapPixel = 0,
+ WrapByte = 1,
+ WrapDword = 2,
+ }
+
+ /// <summary>
+ /// Pixels from CPU monochrome opacity.
+ /// </summary>
+ enum SetPixelsFromCpuMonoOpacityV
+ {
+ Transparent = 0,
+ Opaque = 1,
+ }
+
+ /// <summary>
+ /// Pixels from memory block shape.
+ /// </summary>
+ enum SetPixelsFromMemoryBlockShapeV
+ {
+ Auto = 0,
+ Shape8x8 = 1,
+ Shape16x4 = 2,
+ }
+
+ /// <summary>
+ /// Pixels from memory origin.
+ /// </summary>
+ enum SetPixelsFromMemorySampleModeOrigin
+ {
+ Center = 0,
+ Corner = 1,
+ }
+
+ /// <summary>
+ /// Pixels from memory filter mode.
+ /// </summary>
+ enum SetPixelsFromMemorySampleModeFilter
+ {
+ Point = 0,
+ Bilinear = 1,
+ }
+
+ /// <summary>
+ /// Render solid primitive point coordinates.
+ /// </summary>
+ struct RenderSolidPrimPoint
+ {
+#pragma warning disable CS0649
+ public uint SetX;
+ public uint Y;
+#pragma warning restore CS0649
+ }
+
+ /// <summary>
+ /// 2D class state.
+ /// </summary>
+ unsafe struct TwodClassState : IShadowState
+ {
+#pragma warning disable CS0649
+ public uint SetObject;
+ public int SetObjectClassId => (int)((SetObject >> 0) & 0xFFFF);
+ public int SetObjectEngineId => (int)((SetObject >> 16) & 0x1F);
+ public fixed uint Reserved04[63];
+ public uint NoOperation;
+ public uint SetNotifyA;
+ public int SetNotifyAAddressUpper => (int)((SetNotifyA >> 0) & 0x1FFFFFF);
+ public uint SetNotifyB;
+ public uint Notify;
+ public NotifyType NotifyType => (NotifyType)(Notify);
+ public uint WaitForIdle;
+ public uint LoadMmeInstructionRamPointer;
+ public uint LoadMmeInstructionRam;
+ public uint LoadMmeStartAddressRamPointer;
+ public uint LoadMmeStartAddressRam;
+ public uint SetMmeShadowRamControl;
+ public SetMmeShadowRamControlMode SetMmeShadowRamControlMode => (SetMmeShadowRamControlMode)((SetMmeShadowRamControl >> 0) & 0x3);
+ public fixed uint Reserved128[2];
+ public uint SetGlobalRenderEnableA;
+ public int SetGlobalRenderEnableAOffsetUpper => (int)((SetGlobalRenderEnableA >> 0) & 0xFF);
+ public uint SetGlobalRenderEnableB;
+ public uint SetGlobalRenderEnableC;
+ public int SetGlobalRenderEnableCMode => (int)((SetGlobalRenderEnableC >> 0) & 0x7);
+ public uint SendGoIdle;
+ public uint PmTrigger;
+ public fixed uint Reserved144[3];
+ public uint SetInstrumentationMethodHeader;
+ public uint SetInstrumentationMethodData;
+ public fixed uint Reserved158[37];
+ public uint SetMmeSwitchState;
+ public bool SetMmeSwitchStateValid => (SetMmeSwitchState & 0x1) != 0;
+ public int SetMmeSwitchStateSaveMacro => (int)((SetMmeSwitchState >> 4) & 0xFF);
+ public int SetMmeSwitchStateRestoreMacro => (int)((SetMmeSwitchState >> 12) & 0xFF);
+ public fixed uint Reserved1F0[4];
+ public uint SetDstFormat;
+ public SetDstFormatV SetDstFormatV => (SetDstFormatV)((SetDstFormat >> 0) & 0xFF);
+ public uint SetDstMemoryLayout;
+ public SetDstMemoryLayoutV SetDstMemoryLayoutV => (SetDstMemoryLayoutV)((SetDstMemoryLayout >> 0) & 0x1);
+ public uint SetDstBlockSize;
+ public SetDstBlockSizeHeight SetDstBlockSizeHeight => (SetDstBlockSizeHeight)((SetDstBlockSize >> 4) & 0x7);
+ public SetDstBlockSizeDepth SetDstBlockSizeDepth => (SetDstBlockSizeDepth)((SetDstBlockSize >> 8) & 0x7);
+ public uint SetDstDepth;
+ public uint SetDstLayer;
+ public uint SetDstPitch;
+ public uint SetDstWidth;
+ public uint SetDstHeight;
+ public uint SetDstOffsetUpper;
+ public int SetDstOffsetUpperV => (int)((SetDstOffsetUpper >> 0) & 0xFF);
+ public uint SetDstOffsetLower;
+ public uint FlushAndInvalidateRopMiniCache;
+ public bool FlushAndInvalidateRopMiniCacheV => (FlushAndInvalidateRopMiniCache & 0x1) != 0;
+ public uint SetSpareNoop06;
+ public uint SetSrcFormat;
+ public SetSrcFormatV SetSrcFormatV => (SetSrcFormatV)((SetSrcFormat >> 0) & 0xFF);
+ public uint SetSrcMemoryLayout;
+ public SetSrcMemoryLayoutV SetSrcMemoryLayoutV => (SetSrcMemoryLayoutV)((SetSrcMemoryLayout >> 0) & 0x1);
+ public uint SetSrcBlockSize;
+ public SetSrcBlockSizeHeight SetSrcBlockSizeHeight => (SetSrcBlockSizeHeight)((SetSrcBlockSize >> 4) & 0x7);
+ public SetSrcBlockSizeDepth SetSrcBlockSizeDepth => (SetSrcBlockSizeDepth)((SetSrcBlockSize >> 8) & 0x7);
+ public uint SetSrcDepth;
+ public uint TwodInvalidateTextureDataCache;
+ public TwodInvalidateTextureDataCacheV TwodInvalidateTextureDataCacheV => (TwodInvalidateTextureDataCacheV)((TwodInvalidateTextureDataCache >> 0) & 0x3);
+ public uint SetSrcPitch;
+ public uint SetSrcWidth;
+ public uint SetSrcHeight;
+ public uint SetSrcOffsetUpper;
+ public int SetSrcOffsetUpperV => (int)((SetSrcOffsetUpper >> 0) & 0xFF);
+ public uint SetSrcOffsetLower;
+ public uint SetPixelsFromMemorySectorPromotion;
+ public SetPixelsFromMemorySectorPromotionV SetPixelsFromMemorySectorPromotionV => (SetPixelsFromMemorySectorPromotionV)((SetPixelsFromMemorySectorPromotion >> 0) & 0x3);
+ public uint SetSpareNoop12;
+ public uint SetNumProcessingClusters;
+ public SetNumProcessingClustersV SetNumProcessingClustersV => (SetNumProcessingClustersV)((SetNumProcessingClusters >> 0) & 0x1);
+ public uint SetRenderEnableA;
+ public int SetRenderEnableAOffsetUpper => (int)((SetRenderEnableA >> 0) & 0xFF);
+ public uint SetRenderEnableB;
+ public uint SetRenderEnableC;
+ public int SetRenderEnableCMode => (int)((SetRenderEnableC >> 0) & 0x7);
+ public uint SetSpareNoop08;
+ public uint SetSpareNoop01;
+ public uint SetSpareNoop11;
+ public uint SetSpareNoop07;
+ public uint SetClipX0;
+ public uint SetClipY0;
+ public uint SetClipWidth;
+ public uint SetClipHeight;
+ public uint SetClipEnable;
+ public bool SetClipEnableV => (SetClipEnable & 0x1) != 0;
+ public uint SetColorKeyFormat;
+ public SetColorKeyFormatV SetColorKeyFormatV => (SetColorKeyFormatV)((SetColorKeyFormat >> 0) & 0x7);
+ public uint SetColorKey;
+ public uint SetColorKeyEnable;
+ public bool SetColorKeyEnableV => (SetColorKeyEnable & 0x1) != 0;
+ public uint SetRop;
+ public int SetRopV => (int)((SetRop >> 0) & 0xFF);
+ public uint SetBeta1;
+ public uint SetBeta4;
+ public int SetBeta4B => (int)((SetBeta4 >> 0) & 0xFF);
+ public int SetBeta4G => (int)((SetBeta4 >> 8) & 0xFF);
+ public int SetBeta4R => (int)((SetBeta4 >> 16) & 0xFF);
+ public int SetBeta4A => (int)((SetBeta4 >> 24) & 0xFF);
+ public uint SetOperation;
+ public SetOperationV SetOperationV => (SetOperationV)((SetOperation >> 0) & 0x7);
+ public uint SetPatternOffset;
+ public int SetPatternOffsetX => (int)((SetPatternOffset >> 0) & 0x3F);
+ public int SetPatternOffsetY => (int)((SetPatternOffset >> 8) & 0x3F);
+ public uint SetPatternSelect;
+ public SetPatternSelectV SetPatternSelectV => (SetPatternSelectV)((SetPatternSelect >> 0) & 0x3);
+ public uint SetDstColorRenderToZetaSurface;
+ public bool SetDstColorRenderToZetaSurfaceV => (SetDstColorRenderToZetaSurface & 0x1) != 0;
+ public uint SetSpareNoop04;
+ public uint SetSpareNoop15;
+ public uint SetSpareNoop13;
+ public uint SetSpareNoop03;
+ public uint SetSpareNoop14;
+ public uint SetSpareNoop02;
+ public uint SetCompression;
+ public bool SetCompressionEnable => (SetCompression & 0x1) != 0;
+ public uint SetSpareNoop09;
+ public uint SetRenderEnableOverride;
+ public SetRenderEnableOverrideMode SetRenderEnableOverrideMode => (SetRenderEnableOverrideMode)((SetRenderEnableOverride >> 0) & 0x3);
+ public uint SetPixelsFromMemoryDirection;
+ public SetPixelsFromMemoryDirectionHorizontal SetPixelsFromMemoryDirectionHorizontal => (SetPixelsFromMemoryDirectionHorizontal)((SetPixelsFromMemoryDirection >> 0) & 0x3);
+ public SetPixelsFromMemoryDirectionVertical SetPixelsFromMemoryDirectionVertical => (SetPixelsFromMemoryDirectionVertical)((SetPixelsFromMemoryDirection >> 4) & 0x3);
+ public uint SetSpareNoop10;
+ public uint SetMonochromePatternColorFormat;
+ public SetMonochromePatternColorFormatV SetMonochromePatternColorFormatV => (SetMonochromePatternColorFormatV)((SetMonochromePatternColorFormat >> 0) & 0x7);
+ public uint SetMonochromePatternFormat;
+ public SetMonochromePatternFormatV SetMonochromePatternFormatV => (SetMonochromePatternFormatV)((SetMonochromePatternFormat >> 0) & 0x1);
+ public uint SetMonochromePatternColor0;
+ public uint SetMonochromePatternColor1;
+ public uint SetMonochromePattern0;
+ public uint SetMonochromePattern1;
+ public Array64<uint> ColorPatternX8r8g8b8;
+ public int ColorPatternX8r8g8b8B0(int i) => (int)((ColorPatternX8r8g8b8[i] >> 0) & 0xFF);
+ public int ColorPatternX8r8g8b8G0(int i) => (int)((ColorPatternX8r8g8b8[i] >> 8) & 0xFF);
+ public int ColorPatternX8r8g8b8R0(int i) => (int)((ColorPatternX8r8g8b8[i] >> 16) & 0xFF);
+ public int ColorPatternX8r8g8b8Ignore0(int i) => (int)((ColorPatternX8r8g8b8[i] >> 24) & 0xFF);
+ public Array32<uint> ColorPatternR5g6b5;
+ public int ColorPatternR5g6b5B0(int i) => (int)((ColorPatternR5g6b5[i] >> 0) & 0x1F);
+ public int ColorPatternR5g6b5G0(int i) => (int)((ColorPatternR5g6b5[i] >> 5) & 0x3F);
+ public int ColorPatternR5g6b5R0(int i) => (int)((ColorPatternR5g6b5[i] >> 11) & 0x1F);
+ public int ColorPatternR5g6b5B1(int i) => (int)((ColorPatternR5g6b5[i] >> 16) & 0x1F);
+ public int ColorPatternR5g6b5G1(int i) => (int)((ColorPatternR5g6b5[i] >> 21) & 0x3F);
+ public int ColorPatternR5g6b5R1(int i) => (int)((ColorPatternR5g6b5[i] >> 27) & 0x1F);
+ public Array32<uint> ColorPatternX1r5g5b5;
+ public int ColorPatternX1r5g5b5B0(int i) => (int)((ColorPatternX1r5g5b5[i] >> 0) & 0x1F);
+ public int ColorPatternX1r5g5b5G0(int i) => (int)((ColorPatternX1r5g5b5[i] >> 5) & 0x1F);
+ public int ColorPatternX1r5g5b5R0(int i) => (int)((ColorPatternX1r5g5b5[i] >> 10) & 0x1F);
+ public bool ColorPatternX1r5g5b5Ignore0(int i) => (ColorPatternX1r5g5b5[i] & 0x8000) != 0;
+ public int ColorPatternX1r5g5b5B1(int i) => (int)((ColorPatternX1r5g5b5[i] >> 16) & 0x1F);
+ public int ColorPatternX1r5g5b5G1(int i) => (int)((ColorPatternX1r5g5b5[i] >> 21) & 0x1F);
+ public int ColorPatternX1r5g5b5R1(int i) => (int)((ColorPatternX1r5g5b5[i] >> 26) & 0x1F);
+ public bool ColorPatternX1r5g5b5Ignore1(int i) => (ColorPatternX1r5g5b5[i] & 0x80000000) != 0;
+ public Array16<uint> ColorPatternY8;
+ public int ColorPatternY8Y0(int i) => (int)((ColorPatternY8[i] >> 0) & 0xFF);
+ public int ColorPatternY8Y1(int i) => (int)((ColorPatternY8[i] >> 8) & 0xFF);
+ public int ColorPatternY8Y2(int i) => (int)((ColorPatternY8[i] >> 16) & 0xFF);
+ public int ColorPatternY8Y3(int i) => (int)((ColorPatternY8[i] >> 24) & 0xFF);
+ public uint SetRenderSolidPrimColor0;
+ public uint SetRenderSolidPrimColor1;
+ public uint SetRenderSolidPrimColor2;
+ public uint SetRenderSolidPrimColor3;
+ public uint SetMmeMemAddressA;
+ public int SetMmeMemAddressAUpper => (int)((SetMmeMemAddressA >> 0) & 0x1FFFFFF);
+ public uint SetMmeMemAddressB;
+ public uint SetMmeDataRamAddress;
+ public uint MmeDmaRead;
+ public uint MmeDmaReadFifoed;
+ public uint MmeDmaWrite;
+ public uint MmeDmaReduction;
+ public MmeDmaReductionReductionOp MmeDmaReductionReductionOp => (MmeDmaReductionReductionOp)((MmeDmaReduction >> 0) & 0x7);
+ public MmeDmaReductionReductionFormat MmeDmaReductionReductionFormat => (MmeDmaReductionReductionFormat)((MmeDmaReduction >> 4) & 0x3);
+ public MmeDmaReductionReductionSize MmeDmaReductionReductionSize => (MmeDmaReductionReductionSize)((MmeDmaReduction >> 8) & 0x1);
+ public uint MmeDmaSysmembar;
+ public bool MmeDmaSysmembarV => (MmeDmaSysmembar & 0x1) != 0;
+ public uint MmeDmaSync;
+ public uint SetMmeDataFifoConfig;
+ public SetMmeDataFifoConfigFifoSize SetMmeDataFifoConfigFifoSize => (SetMmeDataFifoConfigFifoSize)((SetMmeDataFifoConfig >> 0) & 0x7);
+ public fixed uint Reserved578[2];
+ public uint RenderSolidPrimMode;
+ public RenderSolidPrimModeV RenderSolidPrimModeV => (RenderSolidPrimModeV)((RenderSolidPrimMode >> 0) & 0x7);
+ public uint SetRenderSolidPrimColorFormat;
+ public SetRenderSolidPrimColorFormatV SetRenderSolidPrimColorFormatV => (SetRenderSolidPrimColorFormatV)((SetRenderSolidPrimColorFormat >> 0) & 0xFF);
+ public uint SetRenderSolidPrimColor;
+ public uint SetRenderSolidLineTieBreakBits;
+ public bool SetRenderSolidLineTieBreakBitsXmajXincYinc => (SetRenderSolidLineTieBreakBits & 0x1) != 0;
+ public bool SetRenderSolidLineTieBreakBitsXmajXdecYinc => (SetRenderSolidLineTieBreakBits & 0x10) != 0;
+ public bool SetRenderSolidLineTieBreakBitsYmajXincYinc => (SetRenderSolidLineTieBreakBits & 0x100) != 0;
+ public bool SetRenderSolidLineTieBreakBitsYmajXdecYinc => (SetRenderSolidLineTieBreakBits & 0x1000) != 0;
+ public fixed uint Reserved590[20];
+ public uint RenderSolidPrimPointXY;
+ public int RenderSolidPrimPointXYX => (int)((RenderSolidPrimPointXY >> 0) & 0xFFFF);
+ public int RenderSolidPrimPointXYY => (int)((RenderSolidPrimPointXY >> 16) & 0xFFFF);
+ public fixed uint Reserved5E4[7];
+ public Array64<RenderSolidPrimPoint> RenderSolidPrimPoint;
+ public uint SetPixelsFromCpuDataType;
+ public SetPixelsFromCpuDataTypeV SetPixelsFromCpuDataTypeV => (SetPixelsFromCpuDataTypeV)((SetPixelsFromCpuDataType >> 0) & 0x1);
+ public uint SetPixelsFromCpuColorFormat;
+ public SetPixelsFromCpuColorFormatV SetPixelsFromCpuColorFormatV => (SetPixelsFromCpuColorFormatV)((SetPixelsFromCpuColorFormat >> 0) & 0xFF);
+ public uint SetPixelsFromCpuIndexFormat;
+ public SetPixelsFromCpuIndexFormatV SetPixelsFromCpuIndexFormatV => (SetPixelsFromCpuIndexFormatV)((SetPixelsFromCpuIndexFormat >> 0) & 0x3);
+ public uint SetPixelsFromCpuMonoFormat;
+ public SetPixelsFromCpuMonoFormatV SetPixelsFromCpuMonoFormatV => (SetPixelsFromCpuMonoFormatV)((SetPixelsFromCpuMonoFormat >> 0) & 0x1);
+ public uint SetPixelsFromCpuWrap;
+ public SetPixelsFromCpuWrapV SetPixelsFromCpuWrapV => (SetPixelsFromCpuWrapV)((SetPixelsFromCpuWrap >> 0) & 0x3);
+ public uint SetPixelsFromCpuColor0;
+ public uint SetPixelsFromCpuColor1;
+ public uint SetPixelsFromCpuMonoOpacity;
+ public SetPixelsFromCpuMonoOpacityV SetPixelsFromCpuMonoOpacityV => (SetPixelsFromCpuMonoOpacityV)((SetPixelsFromCpuMonoOpacity >> 0) & 0x1);
+ public fixed uint Reserved820[6];
+ public uint SetPixelsFromCpuSrcWidth;
+ public uint SetPixelsFromCpuSrcHeight;
+ public uint SetPixelsFromCpuDxDuFrac;
+ public uint SetPixelsFromCpuDxDuInt;
+ public uint SetPixelsFromCpuDyDvFrac;
+ public uint SetPixelsFromCpuDyDvInt;
+ public uint SetPixelsFromCpuDstX0Frac;
+ public uint SetPixelsFromCpuDstX0Int;
+ public uint SetPixelsFromCpuDstY0Frac;
+ public uint SetPixelsFromCpuDstY0Int;
+ public uint PixelsFromCpuData;
+ public fixed uint Reserved864[3];
+ public uint SetBigEndianControl;
+ public bool SetBigEndianControlX32Swap1 => (SetBigEndianControl & 0x1) != 0;
+ public bool SetBigEndianControlX32Swap4 => (SetBigEndianControl & 0x2) != 0;
+ public bool SetBigEndianControlX32Swap8 => (SetBigEndianControl & 0x4) != 0;
+ public bool SetBigEndianControlX32Swap16 => (SetBigEndianControl & 0x8) != 0;
+ public bool SetBigEndianControlX16Swap1 => (SetBigEndianControl & 0x10) != 0;
+ public bool SetBigEndianControlX16Swap4 => (SetBigEndianControl & 0x20) != 0;
+ public bool SetBigEndianControlX16Swap8 => (SetBigEndianControl & 0x40) != 0;
+ public bool SetBigEndianControlX16Swap16 => (SetBigEndianControl & 0x80) != 0;
+ public bool SetBigEndianControlX8Swap1 => (SetBigEndianControl & 0x100) != 0;
+ public bool SetBigEndianControlX8Swap4 => (SetBigEndianControl & 0x200) != 0;
+ public bool SetBigEndianControlX8Swap8 => (SetBigEndianControl & 0x400) != 0;
+ public bool SetBigEndianControlX8Swap16 => (SetBigEndianControl & 0x800) != 0;
+ public bool SetBigEndianControlI1X8Cga6Swap1 => (SetBigEndianControl & 0x1000) != 0;
+ public bool SetBigEndianControlI1X8Cga6Swap4 => (SetBigEndianControl & 0x2000) != 0;
+ public bool SetBigEndianControlI1X8Cga6Swap8 => (SetBigEndianControl & 0x4000) != 0;
+ public bool SetBigEndianControlI1X8Cga6Swap16 => (SetBigEndianControl & 0x8000) != 0;
+ public bool SetBigEndianControlI1X8LeSwap1 => (SetBigEndianControl & 0x10000) != 0;
+ public bool SetBigEndianControlI1X8LeSwap4 => (SetBigEndianControl & 0x20000) != 0;
+ public bool SetBigEndianControlI1X8LeSwap8 => (SetBigEndianControl & 0x40000) != 0;
+ public bool SetBigEndianControlI1X8LeSwap16 => (SetBigEndianControl & 0x80000) != 0;
+ public bool SetBigEndianControlI4Swap1 => (SetBigEndianControl & 0x100000) != 0;
+ public bool SetBigEndianControlI4Swap4 => (SetBigEndianControl & 0x200000) != 0;
+ public bool SetBigEndianControlI4Swap8 => (SetBigEndianControl & 0x400000) != 0;
+ public bool SetBigEndianControlI4Swap16 => (SetBigEndianControl & 0x800000) != 0;
+ public bool SetBigEndianControlI8Swap1 => (SetBigEndianControl & 0x1000000) != 0;
+ public bool SetBigEndianControlI8Swap4 => (SetBigEndianControl & 0x2000000) != 0;
+ public bool SetBigEndianControlI8Swap8 => (SetBigEndianControl & 0x4000000) != 0;
+ public bool SetBigEndianControlI8Swap16 => (SetBigEndianControl & 0x8000000) != 0;
+ public bool SetBigEndianControlOverride => (SetBigEndianControl & 0x10000000) != 0;
+ public fixed uint Reserved874[3];
+ public uint SetPixelsFromMemoryBlockShape;
+ public SetPixelsFromMemoryBlockShapeV SetPixelsFromMemoryBlockShapeV => (SetPixelsFromMemoryBlockShapeV)((SetPixelsFromMemoryBlockShape >> 0) & 0x7);
+ public uint SetPixelsFromMemoryCorralSize;
+ public int SetPixelsFromMemoryCorralSizeV => (int)((SetPixelsFromMemoryCorralSize >> 0) & 0x3FF);
+ public uint SetPixelsFromMemorySafeOverlap;
+ public bool SetPixelsFromMemorySafeOverlapV => (SetPixelsFromMemorySafeOverlap & 0x1) != 0;
+ public uint SetPixelsFromMemorySampleMode;
+ public SetPixelsFromMemorySampleModeOrigin SetPixelsFromMemorySampleModeOrigin => (SetPixelsFromMemorySampleModeOrigin)((SetPixelsFromMemorySampleMode >> 0) & 0x1);
+ public SetPixelsFromMemorySampleModeFilter SetPixelsFromMemorySampleModeFilter => (SetPixelsFromMemorySampleModeFilter)((SetPixelsFromMemorySampleMode >> 4) & 0x1);
+ public fixed uint Reserved890[8];
+ public uint SetPixelsFromMemoryDstX0;
+ public uint SetPixelsFromMemoryDstY0;
+ public uint SetPixelsFromMemoryDstWidth;
+ public uint SetPixelsFromMemoryDstHeight;
+ public uint SetPixelsFromMemoryDuDxFrac;
+ public uint SetPixelsFromMemoryDuDxInt;
+ public uint SetPixelsFromMemoryDvDyFrac;
+ public uint SetPixelsFromMemoryDvDyInt;
+ public uint SetPixelsFromMemorySrcX0Frac;
+ public uint SetPixelsFromMemorySrcX0Int;
+ public uint SetPixelsFromMemorySrcY0Frac;
+ public uint PixelsFromMemorySrcY0Int;
+ public uint SetFalcon00;
+ public uint SetFalcon01;
+ public uint SetFalcon02;
+ public uint SetFalcon03;
+ public uint SetFalcon04;
+ public uint SetFalcon05;
+ public uint SetFalcon06;
+ public uint SetFalcon07;
+ public uint SetFalcon08;
+ public uint SetFalcon09;
+ public uint SetFalcon10;
+ public uint SetFalcon11;
+ public uint SetFalcon12;
+ public uint SetFalcon13;
+ public uint SetFalcon14;
+ public uint SetFalcon15;
+ public uint SetFalcon16;
+ public uint SetFalcon17;
+ public uint SetFalcon18;
+ public uint SetFalcon19;
+ public uint SetFalcon20;
+ public uint SetFalcon21;
+ public uint SetFalcon22;
+ public uint SetFalcon23;
+ public uint SetFalcon24;
+ public uint SetFalcon25;
+ public uint SetFalcon26;
+ public uint SetFalcon27;
+ public uint SetFalcon28;
+ public uint SetFalcon29;
+ public uint SetFalcon30;
+ public uint SetFalcon31;
+ public fixed uint Reserved960[291];
+ public uint MmeDmaWriteMethodBarrier;
+ public bool MmeDmaWriteMethodBarrierV => (MmeDmaWriteMethodBarrier & 0x1) != 0;
+ public fixed uint ReservedDF0[2436];
+ public MmeShadowScratch SetMmeShadowScratch;
+#pragma warning restore CS0649
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodTexture.cs b/src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodTexture.cs
new file mode 100644
index 00000000..c28da094
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodTexture.cs
@@ -0,0 +1,22 @@
+using Ryujinx.Graphics.Gpu.Engine.Types;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Twod
+{
+ /// <summary>
+ /// Texture to texture (with optional resizing) copy parameters.
+ /// </summary>
+ struct TwodTexture
+ {
+#pragma warning disable CS0649
+ public ColorFormat Format;
+ public Boolean32 LinearLayout;
+ public MemoryLayout MemoryLayout;
+ public int Depth;
+ public int Layer;
+ public int Stride;
+ public int Width;
+ public int Height;
+ public GpuVa Address;
+#pragma warning restore CS0649
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Types/Boolean32.cs b/src/Ryujinx.Graphics.Gpu/Engine/Types/Boolean32.cs
new file mode 100644
index 00000000..c982347a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Types/Boolean32.cs
@@ -0,0 +1,17 @@
+namespace Ryujinx.Graphics.Gpu.Engine.Types
+{
+ /// <summary>
+ /// Boolean value, stored as a 32-bits integer in memory.
+ /// </summary>
+ struct Boolean32
+ {
+#pragma warning disable CS0649
+ private uint _value;
+#pragma warning restore CS0649
+
+ public static implicit operator bool(Boolean32 value)
+ {
+ return (value._value & 1) != 0;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Types/ColorFormat.cs b/src/Ryujinx.Graphics.Gpu/Engine/Types/ColorFormat.cs
new file mode 100644
index 00000000..889b5c8b
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Types/ColorFormat.cs
@@ -0,0 +1,165 @@
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Image;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Types
+{
+ /// <summary>
+ /// Color texture format.
+ /// </summary>
+ enum ColorFormat
+ {
+ R32G32B32A32Float = 0xc0,
+ R32G32B32A32Sint = 0xc1,
+ R32G32B32A32Uint = 0xc2,
+ R32G32B32X32Float = 0xc3,
+ R32G32B32X32Sint = 0xc4,
+ R32G32B32X32Uint = 0xc5,
+ R16G16B16X16Unorm = 0xc6,
+ R16G16B16X16Snorm = 0xc7,
+ R16G16B16X16Sint = 0xc8,
+ R16G16B16X16Uint = 0xc9,
+ R16G16B16A16Float = 0xca,
+ R32G32Float = 0xcb,
+ R32G32Sint = 0xcc,
+ R32G32Uint = 0xcd,
+ R16G16B16X16Float = 0xce,
+ B8G8R8A8Unorm = 0xcf,
+ B8G8R8A8Srgb = 0xd0,
+ R10G10B10A2Unorm = 0xd1,
+ R10G10B10A2Uint = 0xd2,
+ R8G8B8A8Unorm = 0xd5,
+ R8G8B8A8Srgb = 0xd6,
+ R8G8B8X8Snorm = 0xd7,
+ R8G8B8X8Sint = 0xd8,
+ R8G8B8X8Uint = 0xd9,
+ R16G16Unorm = 0xda,
+ R16G16Snorm = 0xdb,
+ R16G16Sint = 0xdc,
+ R16G16Uint = 0xdd,
+ R16G16Float = 0xde,
+ R11G11B10Float = 0xe0,
+ R32Sint = 0xe3,
+ R32Uint = 0xe4,
+ R32Float = 0xe5,
+ B8G8R8X8Unorm = 0xe6,
+ B8G8R8X8Srgb = 0xe7,
+ B5G6R5Unorm = 0xe8,
+ B5G5R5A1Unorm = 0xe9,
+ R8G8Unorm = 0xea,
+ R8G8Snorm = 0xeb,
+ R8G8Sint = 0xec,
+ R8G8Uint = 0xed,
+ R16Unorm = 0xee,
+ R16Snorm = 0xef,
+ R16Sint = 0xf0,
+ R16Uint = 0xf1,
+ R16Float = 0xf2,
+ R8Unorm = 0xf3,
+ R8Snorm = 0xf4,
+ R8Sint = 0xf5,
+ R8Uint = 0xf6,
+ B5G5R5X1Unorm = 0xf8,
+ R8G8B8X8Unorm = 0xf9,
+ R8G8B8X8Srgb = 0xfa
+ }
+
+ static class ColorFormatConverter
+ {
+ /// <summary>
+ /// Converts the color texture format to a host compatible format.
+ /// </summary>
+ /// <param name="format">Color format</param>
+ /// <returns>Host compatible format enum value</returns>
+ public static FormatInfo Convert(this ColorFormat format)
+ {
+ return format switch
+ {
+ ColorFormat.R32G32B32A32Float => new FormatInfo(Format.R32G32B32A32Float, 1, 1, 16, 4),
+ ColorFormat.R32G32B32A32Sint => new FormatInfo(Format.R32G32B32A32Sint, 1, 1, 16, 4),
+ ColorFormat.R32G32B32A32Uint => new FormatInfo(Format.R32G32B32A32Uint, 1, 1, 16, 4),
+ ColorFormat.R32G32B32X32Float => new FormatInfo(Format.R32G32B32A32Float, 1, 1, 16, 4),
+ ColorFormat.R32G32B32X32Sint => new FormatInfo(Format.R32G32B32A32Sint, 1, 1, 16, 4),
+ ColorFormat.R32G32B32X32Uint => new FormatInfo(Format.R32G32B32A32Uint, 1, 1, 16, 4),
+ ColorFormat.R16G16B16X16Unorm => new FormatInfo(Format.R16G16B16A16Unorm, 1, 1, 8, 4),
+ ColorFormat.R16G16B16X16Snorm => new FormatInfo(Format.R16G16B16A16Snorm, 1, 1, 8, 4),
+ ColorFormat.R16G16B16X16Sint => new FormatInfo(Format.R16G16B16A16Sint, 1, 1, 8, 4),
+ ColorFormat.R16G16B16X16Uint => new FormatInfo(Format.R16G16B16A16Uint, 1, 1, 8, 4),
+ ColorFormat.R16G16B16A16Float => new FormatInfo(Format.R16G16B16A16Float, 1, 1, 8, 4),
+ ColorFormat.R32G32Float => new FormatInfo(Format.R32G32Float, 1, 1, 8, 2),
+ ColorFormat.R32G32Sint => new FormatInfo(Format.R32G32Sint, 1, 1, 8, 2),
+ ColorFormat.R32G32Uint => new FormatInfo(Format.R32G32Uint, 1, 1, 8, 2),
+ ColorFormat.R16G16B16X16Float => new FormatInfo(Format.R16G16B16A16Float, 1, 1, 8, 4),
+ ColorFormat.B8G8R8A8Unorm => new FormatInfo(Format.B8G8R8A8Unorm, 1, 1, 4, 4),
+ ColorFormat.B8G8R8A8Srgb => new FormatInfo(Format.B8G8R8A8Srgb, 1, 1, 4, 4),
+ ColorFormat.R10G10B10A2Unorm => new FormatInfo(Format.R10G10B10A2Unorm, 1, 1, 4, 4),
+ ColorFormat.R10G10B10A2Uint => new FormatInfo(Format.R10G10B10A2Uint, 1, 1, 4, 4),
+ ColorFormat.R8G8B8A8Unorm => new FormatInfo(Format.R8G8B8A8Unorm, 1, 1, 4, 4),
+ ColorFormat.R8G8B8A8Srgb => new FormatInfo(Format.R8G8B8A8Srgb, 1, 1, 4, 4),
+ ColorFormat.R8G8B8X8Snorm => new FormatInfo(Format.R8G8B8A8Snorm, 1, 1, 4, 4),
+ ColorFormat.R8G8B8X8Sint => new FormatInfo(Format.R8G8B8A8Sint, 1, 1, 4, 4),
+ ColorFormat.R8G8B8X8Uint => new FormatInfo(Format.R8G8B8A8Uint, 1, 1, 4, 4),
+ ColorFormat.R16G16Unorm => new FormatInfo(Format.R16G16Unorm, 1, 1, 4, 2),
+ ColorFormat.R16G16Snorm => new FormatInfo(Format.R16G16Snorm, 1, 1, 4, 2),
+ ColorFormat.R16G16Sint => new FormatInfo(Format.R16G16Sint, 1, 1, 4, 2),
+ ColorFormat.R16G16Uint => new FormatInfo(Format.R16G16Uint, 1, 1, 4, 2),
+ ColorFormat.R16G16Float => new FormatInfo(Format.R16G16Float, 1, 1, 4, 2),
+ ColorFormat.R11G11B10Float => new FormatInfo(Format.R11G11B10Float, 1, 1, 4, 3),
+ ColorFormat.R32Sint => new FormatInfo(Format.R32Sint, 1, 1, 4, 1),
+ ColorFormat.R32Uint => new FormatInfo(Format.R32Uint, 1, 1, 4, 1),
+ ColorFormat.R32Float => new FormatInfo(Format.R32Float, 1, 1, 4, 1),
+ ColorFormat.B8G8R8X8Unorm => new FormatInfo(Format.B8G8R8A8Unorm, 1, 1, 4, 4),
+ ColorFormat.B8G8R8X8Srgb => new FormatInfo(Format.B8G8R8A8Srgb, 1, 1, 4, 4),
+ ColorFormat.B5G6R5Unorm => new FormatInfo(Format.B5G6R5Unorm, 1, 1, 2, 3),
+ ColorFormat.B5G5R5A1Unorm => new FormatInfo(Format.B5G5R5A1Unorm, 1, 1, 2, 4),
+ ColorFormat.R8G8Unorm => new FormatInfo(Format.R8G8Unorm, 1, 1, 2, 2),
+ ColorFormat.R8G8Snorm => new FormatInfo(Format.R8G8Snorm, 1, 1, 2, 2),
+ ColorFormat.R8G8Sint => new FormatInfo(Format.R8G8Sint, 1, 1, 2, 2),
+ ColorFormat.R8G8Uint => new FormatInfo(Format.R8G8Uint, 1, 1, 2, 2),
+ ColorFormat.R16Unorm => new FormatInfo(Format.R16Unorm, 1, 1, 2, 1),
+ ColorFormat.R16Snorm => new FormatInfo(Format.R16Snorm, 1, 1, 2, 1),
+ ColorFormat.R16Sint => new FormatInfo(Format.R16Sint, 1, 1, 2, 1),
+ ColorFormat.R16Uint => new FormatInfo(Format.R16Uint, 1, 1, 2, 1),
+ ColorFormat.R16Float => new FormatInfo(Format.R16Float, 1, 1, 2, 1),
+ ColorFormat.R8Unorm => new FormatInfo(Format.R8Unorm, 1, 1, 1, 1),
+ ColorFormat.R8Snorm => new FormatInfo(Format.R8Snorm, 1, 1, 1, 1),
+ ColorFormat.R8Sint => new FormatInfo(Format.R8Sint, 1, 1, 1, 1),
+ ColorFormat.R8Uint => new FormatInfo(Format.R8Uint, 1, 1, 1, 1),
+ ColorFormat.B5G5R5X1Unorm => new FormatInfo(Format.B5G5R5A1Unorm, 1, 1, 2, 4),
+ ColorFormat.R8G8B8X8Unorm => new FormatInfo(Format.R8G8B8A8Unorm, 1, 1, 4, 4),
+ ColorFormat.R8G8B8X8Srgb => new FormatInfo(Format.R8G8B8A8Srgb, 1, 1, 4, 4),
+ _ => FormatInfo.Default
+ };
+ }
+
+ /// <summary>
+ /// Checks if a format has an alpha component.
+ /// </summary>
+ /// <param name="format">Format to be checked</param>
+ /// <returns>True if the format has no alpha component (RGBX), false if it does (RGBA)</returns>
+ public static bool NoAlpha(this ColorFormat format)
+ {
+ switch (format)
+ {
+ case ColorFormat.R32G32B32X32Float:
+ case ColorFormat.R32G32B32X32Sint:
+ case ColorFormat.R32G32B32X32Uint:
+ case ColorFormat.R16G16B16X16Unorm:
+ case ColorFormat.R16G16B16X16Snorm:
+ case ColorFormat.R16G16B16X16Sint:
+ case ColorFormat.R16G16B16X16Uint:
+ case ColorFormat.R16G16B16X16Float:
+ case ColorFormat.R8G8B8X8Snorm:
+ case ColorFormat.R8G8B8X8Sint:
+ case ColorFormat.R8G8B8X8Uint:
+ case ColorFormat.B8G8R8X8Unorm:
+ case ColorFormat.B8G8R8X8Srgb:
+ case ColorFormat.B5G5R5X1Unorm:
+ case ColorFormat.R8G8B8X8Unorm:
+ case ColorFormat.R8G8B8X8Srgb:
+ return true;
+ }
+
+ return false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Types/GpuVa.cs b/src/Ryujinx.Graphics.Gpu/Engine/Types/GpuVa.cs
new file mode 100644
index 00000000..839faac9
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Types/GpuVa.cs
@@ -0,0 +1,22 @@
+namespace Ryujinx.Graphics.Gpu.Engine.Types
+{
+ /// <summary>
+ /// Split GPU virtual address.
+ /// </summary>
+ struct GpuVa
+ {
+#pragma warning disable CS0649
+ public uint High;
+ public uint Low;
+#pragma warning restore CS0649
+
+ /// <summary>
+ /// Packs the split address into a 64-bits address value.
+ /// </summary>
+ /// <returns>The 64-bits address value</returns>
+ public ulong Pack()
+ {
+ return Low | ((ulong)High << 32);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Types/MemoryLayout.cs b/src/Ryujinx.Graphics.Gpu/Engine/Types/MemoryLayout.cs
new file mode 100644
index 00000000..6da96bd4
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Types/MemoryLayout.cs
@@ -0,0 +1,37 @@
+namespace Ryujinx.Graphics.Gpu.Engine.Types
+{
+ /// <summary>
+ /// Memory layout parameters, for block linear textures.
+ /// </summary>
+ struct MemoryLayout
+ {
+#pragma warning disable CS0649
+ public uint Packed;
+#pragma warning restore CS0649
+
+ public int UnpackGobBlocksInX()
+ {
+ return 1 << (int)(Packed & 0xf);
+ }
+
+ public int UnpackGobBlocksInY()
+ {
+ return 1 << (int)((Packed >> 4) & 0xf);
+ }
+
+ public int UnpackGobBlocksInZ()
+ {
+ return 1 << (int)((Packed >> 8) & 0xf);
+ }
+
+ public bool UnpackIsLinear()
+ {
+ return (Packed & 0x1000) != 0;
+ }
+
+ public bool UnpackIsTarget3D()
+ {
+ return (Packed & 0x10000) != 0;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Types/PrimitiveType.cs b/src/Ryujinx.Graphics.Gpu/Engine/Types/PrimitiveType.cs
new file mode 100644
index 00000000..dae63124
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Types/PrimitiveType.cs
@@ -0,0 +1,99 @@
+using Ryujinx.Graphics.GAL;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Types
+{
+ /// <summary>
+ /// Draw primitive type.
+ /// </summary>
+ enum PrimitiveType
+ {
+ Points,
+ Lines,
+ LineLoop,
+ LineStrip,
+ Triangles,
+ TriangleStrip,
+ TriangleFan,
+ Quads,
+ QuadStrip,
+ Polygon,
+ LinesAdjacency,
+ LineStripAdjacency,
+ TrianglesAdjacency,
+ TriangleStripAdjacency,
+ Patches
+ }
+
+ /// <summary>
+ /// Alternative primitive type that might override <see cref="PrimitiveType"/>.
+ /// </summary>
+ enum PrimitiveTypeOverride
+ {
+ Points = 1,
+ Lines = 2,
+ LineStrip = 3,
+ Triangles = 4,
+ TriangleStrip = 5,
+ TriangleFan = 0x1015,
+ LinesAdjacency = 10,
+ LineStripAdjacency = 11,
+ TrianglesAdjacency = 12,
+ TriangleStripAdjacency = 13,
+ Patches = 14
+ }
+
+ static class PrimitiveTypeConverter
+ {
+ /// <summary>
+ /// Converts the primitive type into something that can be used with the host API.
+ /// </summary>
+ /// <param name="type">The primitive type to convert</param>
+ /// <returns>A host compatible enum value</returns>
+ public static PrimitiveTopology Convert(this PrimitiveType type)
+ {
+ return type switch
+ {
+ PrimitiveType.Points => PrimitiveTopology.Points,
+ PrimitiveType.Lines => PrimitiveTopology.Lines,
+ PrimitiveType.LineLoop => PrimitiveTopology.LineLoop,
+ PrimitiveType.LineStrip => PrimitiveTopology.LineStrip,
+ PrimitiveType.Triangles => PrimitiveTopology.Triangles,
+ PrimitiveType.TriangleStrip => PrimitiveTopology.TriangleStrip,
+ PrimitiveType.TriangleFan => PrimitiveTopology.TriangleFan,
+ PrimitiveType.Quads => PrimitiveTopology.Quads,
+ PrimitiveType.QuadStrip => PrimitiveTopology.QuadStrip,
+ PrimitiveType.Polygon => PrimitiveTopology.Polygon,
+ PrimitiveType.LinesAdjacency => PrimitiveTopology.LinesAdjacency,
+ PrimitiveType.LineStripAdjacency => PrimitiveTopology.LineStripAdjacency,
+ PrimitiveType.TrianglesAdjacency => PrimitiveTopology.TrianglesAdjacency,
+ PrimitiveType.TriangleStripAdjacency => PrimitiveTopology.TriangleStripAdjacency,
+ PrimitiveType.Patches => PrimitiveTopology.Patches,
+ _ => PrimitiveTopology.Triangles
+ };
+ }
+
+ /// <summary>
+ /// Converts the primitive type into something that can be used with the host API.
+ /// </summary>
+ /// <param name="type">The primitive type to convert</param>
+ /// <returns>A host compatible enum value</returns>
+ public static PrimitiveTopology Convert(this PrimitiveTypeOverride type)
+ {
+ return type switch
+ {
+ PrimitiveTypeOverride.Points => PrimitiveTopology.Points,
+ PrimitiveTypeOverride.Lines => PrimitiveTopology.Lines,
+ PrimitiveTypeOverride.LineStrip => PrimitiveTopology.LineStrip,
+ PrimitiveTypeOverride.Triangles => PrimitiveTopology.Triangles,
+ PrimitiveTypeOverride.TriangleStrip => PrimitiveTopology.TriangleStrip,
+ PrimitiveTypeOverride.TriangleFan => PrimitiveTopology.TriangleFan,
+ PrimitiveTypeOverride.LinesAdjacency => PrimitiveTopology.LinesAdjacency,
+ PrimitiveTypeOverride.LineStripAdjacency => PrimitiveTopology.LineStripAdjacency,
+ PrimitiveTypeOverride.TrianglesAdjacency => PrimitiveTopology.TrianglesAdjacency,
+ PrimitiveTypeOverride.TriangleStripAdjacency => PrimitiveTopology.TriangleStripAdjacency,
+ PrimitiveTypeOverride.Patches => PrimitiveTopology.Patches,
+ _ => PrimitiveTopology.Triangles
+ };
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Types/SamplerIndex.cs b/src/Ryujinx.Graphics.Gpu/Engine/Types/SamplerIndex.cs
new file mode 100644
index 00000000..839a4d0a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Types/SamplerIndex.cs
@@ -0,0 +1,11 @@
+namespace Ryujinx.Graphics.Gpu.Engine.Types
+{
+ /// <summary>
+ /// Sampler pool indexing mode.
+ /// </summary>
+ enum SamplerIndex
+ {
+ Independently = 0,
+ ViaHeaderIndex = 1
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Types/SbDescriptor.cs b/src/Ryujinx.Graphics.Gpu/Engine/Types/SbDescriptor.cs
new file mode 100644
index 00000000..c457dbf9
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Types/SbDescriptor.cs
@@ -0,0 +1,20 @@
+namespace Ryujinx.Graphics.Gpu.Engine.Types
+{
+ /// <summary>
+ /// Storage buffer address and size information.
+ /// </summary>
+ struct SbDescriptor
+ {
+#pragma warning disable CS0649
+ public uint AddressLow;
+ public uint AddressHigh;
+ public int Size;
+ public int Padding;
+#pragma warning restore CS0649
+
+ public ulong PackAddress()
+ {
+ return AddressLow | ((ulong)AddressHigh << 32);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Types/ZetaFormat.cs b/src/Ryujinx.Graphics.Gpu/Engine/Types/ZetaFormat.cs
new file mode 100644
index 00000000..1de1621f
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Types/ZetaFormat.cs
@@ -0,0 +1,42 @@
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Image;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Types
+{
+ /// <summary>
+ /// Depth-stencil texture format.
+ /// </summary>
+ enum ZetaFormat
+ {
+ D32Float = 0xa,
+ D16Unorm = 0x13,
+ D24UnormS8Uint = 0x14,
+ D24Unorm = 0x15,
+ S8UintD24Unorm = 0x16,
+ S8Uint = 0x17,
+ D32FloatS8Uint = 0x19
+ }
+
+ static class ZetaFormatConverter
+ {
+ /// <summary>
+ /// Converts the depth-stencil texture format to a host compatible format.
+ /// </summary>
+ /// <param name="format">Depth-stencil format</param>
+ /// <returns>Host compatible format enum value</returns>
+ public static FormatInfo Convert(this ZetaFormat format)
+ {
+ return format switch
+ {
+ ZetaFormat.D32Float => new FormatInfo(Format.D32Float, 1, 1, 4, 1),
+ ZetaFormat.D16Unorm => new FormatInfo(Format.D16Unorm, 1, 1, 2, 1),
+ ZetaFormat.D24UnormS8Uint => new FormatInfo(Format.D24UnormS8Uint, 1, 1, 4, 2),
+ ZetaFormat.D24Unorm => new FormatInfo(Format.D24UnormS8Uint, 1, 1, 4, 1),
+ ZetaFormat.S8UintD24Unorm => new FormatInfo(Format.S8UintD24Unorm, 1, 1, 4, 2),
+ ZetaFormat.S8Uint => new FormatInfo(Format.S8Uint, 1, 1, 1, 1),
+ ZetaFormat.D32FloatS8Uint => new FormatInfo(Format.D32FloatS8Uint, 1, 1, 8, 2),
+ _ => FormatInfo.Default
+ };
+ }
+ }
+}