From cee712105850ac3385cd0091a923438167433f9f Mon Sep 17 00:00:00 2001
From: TSR Berry <20988865+TSRBerry@users.noreply.github.com>
Date: Sat, 8 Apr 2023 01:22:00 +0200
Subject: Move solution and projects to src
---
.../Engine/Compute/ComputeClass.cs | 219 +
.../Engine/Compute/ComputeClassState.cs | 435 ++
.../Engine/Compute/ComputeQmd.cs | 275 ++
.../Engine/ConditionalRenderEnabled.cs | 12 +
.../Engine/DeviceStateWithShadow.cs | 96 +
src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs | 635 +++
.../Engine/Dma/DmaClassState.cs | 271 ++
src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaTexture.cs | 20 +
.../Engine/GPFifo/CompressedMethod.cs | 41 +
src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPEntry.cs | 55 +
.../Engine/GPFifo/GPFifoClass.cs | 248 ++
.../Engine/GPFifo/GPFifoClassState.cs | 233 ++
.../Engine/GPFifo/GPFifoDevice.cs | 262 ++
.../Engine/GPFifo/GPFifoProcessor.cs | 331 ++
.../Engine/InlineToMemory/InlineToMemoryClass.cs | 273 ++
.../InlineToMemory/InlineToMemoryClassState.cs | 181 +
.../Engine/MME/AluOperation.cs | 15 +
.../Engine/MME/AluRegOperation.cs | 18 +
.../Engine/MME/AssignmentOperation.cs | 17 +
src/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs | 52 +
src/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs | 101 +
src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs | 341 ++
.../Engine/MME/MacroHLEFunctionName.cs | 16 +
.../Engine/MME/MacroHLETable.cs | 113 +
.../Engine/MME/MacroInterpreter.cs | 400 ++
src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs | 39 +
.../Engine/MME/MacroJitCompiler.cs | 517 +++
.../Engine/MME/MacroJitContext.cs | 55 +
.../Engine/MmeShadowScratch.cs | 18 +
.../Engine/SetMmeShadowRamControlMode.cs | 13 +
src/Ryujinx.Graphics.Gpu/Engine/ShaderTexture.cs | 111 +
.../Threed/Blender/AdvancedBlendFunctions.cs | 4226 ++++++++++++++++++++
.../Engine/Threed/Blender/AdvancedBlendManager.cs | 115 +
.../Threed/Blender/AdvancedBlendPreGenTable.cs | 273 ++
.../Engine/Threed/Blender/AdvancedBlendUcode.cs | 126 +
.../Engine/Threed/Blender/UcodeAssembler.cs | 305 ++
.../Engine/Threed/ConditionalRendering.cs | 130 +
.../Engine/Threed/ConstantBufferUpdater.cs | 183 +
.../Engine/Threed/DrawManager.cs | 856 ++++
.../Engine/Threed/DrawState.cs | 65 +
.../Engine/Threed/IbStreamer.cs | 194 +
.../Engine/Threed/IndirectDrawType.cs | 38 +
.../Engine/Threed/RenderTargetUpdateFlags.cs | 41 +
.../Engine/Threed/SemaphoreUpdater.cs | 190 +
.../Engine/Threed/SpecializationStateUpdater.cs | 346 ++
.../Engine/Threed/StateUpdateTracker.cs | 177 +
.../Engine/Threed/StateUpdater.cs | 1448 +++++++
.../Engine/Threed/ThreedClass.cs | 620 +++
.../Engine/Threed/ThreedClassState.cs | 1048 +++++
src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs | 379 ++
.../Engine/Twod/TwodClassState.cs | 816 ++++
.../Engine/Twod/TwodTexture.cs | 22 +
src/Ryujinx.Graphics.Gpu/Engine/Types/Boolean32.cs | 17 +
.../Engine/Types/ColorFormat.cs | 165 +
src/Ryujinx.Graphics.Gpu/Engine/Types/GpuVa.cs | 22 +
.../Engine/Types/MemoryLayout.cs | 37 +
.../Engine/Types/PrimitiveType.cs | 99 +
.../Engine/Types/SamplerIndex.cs | 11 +
.../Engine/Types/SbDescriptor.cs | 20 +
.../Engine/Types/ZetaFormat.cs | 42 +
60 files changed, 17424 insertions(+)
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClassState.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeQmd.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/ConditionalRenderEnabled.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/DeviceStateWithShadow.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClassState.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaTexture.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/GPFifo/CompressedMethod.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPEntry.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClassState.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/MME/AluOperation.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/MME/AluRegOperation.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/MME/AssignmentOperation.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitCompiler.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/MmeShadowScratch.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/SetMmeShadowRamControlMode.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/ShaderTexture.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendFunctions.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendManager.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendPreGenTable.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendUcode.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/UcodeAssembler.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/ConditionalRendering.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/ConstantBufferUpdater.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawState.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/IbStreamer.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/IndirectDrawType.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/RenderTargetUpdateFlags.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/SemaphoreUpdater.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/SpecializationStateUpdater.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdateTracker.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClassState.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClassState.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodTexture.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Types/Boolean32.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Types/ColorFormat.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Types/GpuVa.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Types/MemoryLayout.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Types/PrimitiveType.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Types/SamplerIndex.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Types/SbDescriptor.cs
create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Types/ZetaFormat.cs
(limited to 'src/Ryujinx.Graphics.Gpu/Engine')
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs
new file mode 100644
index 00000000..2ac738fd
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs
@@ -0,0 +1,219 @@
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Engine.InlineToMemory;
+using Ryujinx.Graphics.Gpu.Engine.Threed;
+using Ryujinx.Graphics.Gpu.Engine.Types;
+using Ryujinx.Graphics.Gpu.Image;
+using Ryujinx.Graphics.Gpu.Shader;
+using Ryujinx.Graphics.Shader;
+using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Compute
+{
+ ///
+ /// Represents a compute engine class.
+ ///
+ class ComputeClass : IDeviceState
+ {
+ private readonly GpuContext _context;
+ private readonly GpuChannel _channel;
+ private readonly ThreedClass _3dEngine;
+ private readonly DeviceState _state;
+
+ private readonly InlineToMemoryClass _i2mClass;
+
+ ///
+ /// Creates a new instance of the compute engine class.
+ ///
+ /// GPU context
+ /// GPU channel
+ /// 3D engine
+ public ComputeClass(GpuContext context, GpuChannel channel, ThreedClass threedEngine)
+ {
+ _context = context;
+ _channel = channel;
+ _3dEngine = threedEngine;
+ _state = new DeviceState(new Dictionary
+ {
+ { nameof(ComputeClassState.LaunchDma), new RwCallback(LaunchDma, null) },
+ { nameof(ComputeClassState.LoadInlineData), new RwCallback(LoadInlineData, null) },
+ { nameof(ComputeClassState.SendSignalingPcasB), new RwCallback(SendSignalingPcasB, null) }
+ });
+
+ _i2mClass = new InlineToMemoryClass(context, channel, initializeState: false);
+ }
+
+ ///
+ /// Reads data from the class registers.
+ ///
+ /// Register byte offset
+ /// Data at the specified offset
+ public int Read(int offset) => _state.Read(offset);
+
+ ///
+ /// Writes data to the class registers.
+ ///
+ /// Register byte offset
+ /// Data to be written
+ public void Write(int offset, int data) => _state.Write(offset, data);
+
+ ///
+ /// Launches the Inline-to-Memory DMA copy operation.
+ ///
+ /// Method call argument
+ private void LaunchDma(int argument)
+ {
+ _i2mClass.LaunchDma(ref Unsafe.As(ref _state.State), argument);
+ }
+
+ ///
+ /// Pushes a block of data to the Inline-to-Memory engine.
+ ///
+ /// Data to push
+ public void LoadInlineData(ReadOnlySpan data)
+ {
+ _i2mClass.LoadInlineData(data);
+ }
+
+ ///
+ /// Pushes a word of data to the Inline-to-Memory engine.
+ ///
+ /// Method call argument
+ private void LoadInlineData(int argument)
+ {
+ _i2mClass.LoadInlineData(argument);
+ }
+
+ ///
+ /// Performs the compute dispatch operation.
+ ///
+ /// Method call argument
+ private void SendSignalingPcasB(int argument)
+ {
+ var memoryManager = _channel.MemoryManager;
+
+ // Since we're going to change the state, make sure any pending instanced draws are done.
+ _3dEngine.PerformDeferredDraws();
+
+ // Make sure all pending uniform buffer data is written to memory.
+ _3dEngine.FlushUboDirty();
+
+ uint qmdAddress = _state.State.SendPcasA;
+
+ var qmd = _channel.MemoryManager.Read((ulong)qmdAddress << 8);
+
+ ulong shaderGpuVa = ((ulong)_state.State.SetProgramRegionAAddressUpper << 32) | _state.State.SetProgramRegionB;
+
+ shaderGpuVa += (uint)qmd.ProgramOffset;
+
+ int localMemorySize = qmd.ShaderLocalMemoryLowSize + qmd.ShaderLocalMemoryHighSize;
+
+ int sharedMemorySize = Math.Min(qmd.SharedMemorySize, _context.Capabilities.MaximumComputeSharedMemorySize);
+
+ for (int index = 0; index < Constants.TotalCpUniformBuffers; index++)
+ {
+ if (!qmd.ConstantBufferValid(index))
+ {
+ continue;
+ }
+
+ ulong gpuVa = (uint)qmd.ConstantBufferAddrLower(index) | (ulong)qmd.ConstantBufferAddrUpper(index) << 32;
+ ulong size = (ulong)qmd.ConstantBufferSize(index);
+
+ _channel.BufferManager.SetComputeUniformBuffer(index, gpuVa, size);
+ }
+
+ ulong samplerPoolGpuVa = ((ulong)_state.State.SetTexSamplerPoolAOffsetUpper << 32) | _state.State.SetTexSamplerPoolB;
+ ulong texturePoolGpuVa = ((ulong)_state.State.SetTexHeaderPoolAOffsetUpper << 32) | _state.State.SetTexHeaderPoolB;
+
+ GpuChannelPoolState poolState = new GpuChannelPoolState(
+ texturePoolGpuVa,
+ _state.State.SetTexHeaderPoolCMaximumIndex,
+ _state.State.SetBindlessTextureConstantBufferSlotSelect);
+
+ GpuChannelComputeState computeState = new GpuChannelComputeState(
+ qmd.CtaThreadDimension0,
+ qmd.CtaThreadDimension1,
+ qmd.CtaThreadDimension2,
+ localMemorySize,
+ sharedMemorySize,
+ _channel.BufferManager.HasUnalignedStorageBuffers);
+
+ CachedShaderProgram cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa);
+
+ _context.Renderer.Pipeline.SetProgram(cs.HostProgram);
+
+ _channel.TextureManager.SetComputeSamplerPool(samplerPoolGpuVa, _state.State.SetTexSamplerPoolCMaximumIndex, qmd.SamplerIndex);
+ _channel.TextureManager.SetComputeTexturePool(texturePoolGpuVa, _state.State.SetTexHeaderPoolCMaximumIndex);
+ _channel.TextureManager.SetComputeTextureBufferIndex(_state.State.SetBindlessTextureConstantBufferSlotSelect);
+
+ ShaderProgramInfo info = cs.Shaders[0].Info;
+
+ bool hasUnaligned = _channel.BufferManager.HasUnalignedStorageBuffers;
+
+ for (int index = 0; index < info.SBuffers.Count; index++)
+ {
+ BufferDescriptor sb = info.SBuffers[index];
+
+ ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0);
+
+ int sbDescOffset = 0x310 + sb.Slot * 0x10;
+
+ sbDescAddress += (ulong)sbDescOffset;
+
+ SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read(sbDescAddress);
+
+ _channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags);
+ }
+
+ if ((_channel.BufferManager.HasUnalignedStorageBuffers) != hasUnaligned)
+ {
+ // Refetch the shader, as assumptions about storage buffer alignment have changed.
+ cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa);
+
+ _context.Renderer.Pipeline.SetProgram(cs.HostProgram);
+
+ info = cs.Shaders[0].Info;
+ }
+
+ for (int index = 0; index < info.CBuffers.Count; index++)
+ {
+ BufferDescriptor cb = info.CBuffers[index];
+
+ // NVN uses the "hardware" constant buffer for anything that is less than 8,
+ // and those are already bound above.
+ // Anything greater than or equal to 8 uses the emulated constant buffers.
+ // They are emulated using global memory loads.
+ if (cb.Slot < 8)
+ {
+ continue;
+ }
+
+ ulong cbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0);
+
+ int cbDescOffset = 0x260 + (cb.Slot - 8) * 0x10;
+
+ cbDescAddress += (ulong)cbDescOffset;
+
+ SbDescriptor cbDescriptor = _channel.MemoryManager.Physical.Read(cbDescAddress);
+
+ _channel.BufferManager.SetComputeUniformBuffer(cb.Slot, cbDescriptor.PackAddress(), (uint)cbDescriptor.Size);
+ }
+
+ _channel.BufferManager.SetComputeBufferBindings(cs.Bindings);
+
+ _channel.TextureManager.SetComputeBindings(cs.Bindings);
+
+ // Should never return false for mismatching spec state, since the shader was fetched above.
+ _channel.TextureManager.CommitComputeBindings(cs.SpecializationState);
+
+ _channel.BufferManager.CommitComputeBindings();
+
+ _context.Renderer.Pipeline.DispatchCompute(qmd.CtaRasterWidth, qmd.CtaRasterHeight, qmd.CtaRasterDepth);
+
+ _3dEngine.ForceShaderUpdate();
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClassState.cs b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClassState.cs
new file mode 100644
index 00000000..5d81de5d
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClassState.cs
@@ -0,0 +1,435 @@
+// This file was auto-generated from NVIDIA official Maxwell definitions.
+
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Gpu.Engine.InlineToMemory;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Compute
+{
+ ///
+ /// Notify type.
+ ///
+ enum NotifyType
+ {
+ WriteOnly = 0,
+ WriteThenAwaken = 1,
+ }
+
+ ///
+ /// CWD control SM selection.
+ ///
+ enum SetCwdControlSmSelection
+ {
+ LoadBalanced = 0,
+ RoundRobin = 1,
+ }
+
+ ///
+ /// Cache lines to invalidate.
+ ///
+ enum InvalidateCacheLines
+ {
+ All = 0,
+ One = 1,
+ }
+
+ ///
+ /// GWC SCG type.
+ ///
+ enum SetGwcScgTypeScgType
+ {
+ GraphicsCompute0 = 0,
+ Compute1 = 1,
+ }
+
+ ///
+ /// Render enable override mode.
+ ///
+ enum SetRenderEnableOverrideMode
+ {
+ UseRenderEnable = 0,
+ AlwaysRender = 1,
+ NeverRender = 2,
+ }
+
+ ///
+ /// Semaphore report operation.
+ ///
+ enum SetReportSemaphoreDOperation
+ {
+ Release = 0,
+ Trap = 3,
+ }
+
+ ///
+ /// Semaphore report structure size.
+ ///
+ enum SetReportSemaphoreDStructureSize
+ {
+ FourWords = 0,
+ OneWord = 1,
+ }
+
+ ///
+ /// Semaphore report reduction operation.
+ ///
+ enum SetReportSemaphoreDReductionOp
+ {
+ RedAdd = 0,
+ RedMin = 1,
+ RedMax = 2,
+ RedInc = 3,
+ RedDec = 4,
+ RedAnd = 5,
+ RedOr = 6,
+ RedXor = 7,
+ }
+
+ ///
+ /// Semaphore report reduction format.
+ ///
+ enum SetReportSemaphoreDReductionFormat
+ {
+ Unsigned32 = 0,
+ Signed32 = 1,
+ }
+
+ ///
+ /// Compute class state.
+ ///
+ unsafe struct ComputeClassState
+ {
+#pragma warning disable CS0649
+ public uint SetObject;
+ public int SetObjectClassId => (int)((SetObject >> 0) & 0xFFFF);
+ public int SetObjectEngineId => (int)((SetObject >> 16) & 0x1F);
+ public fixed uint Reserved04[63];
+ public uint NoOperation;
+ public uint SetNotifyA;
+ public int SetNotifyAAddressUpper => (int)((SetNotifyA >> 0) & 0xFF);
+ public uint SetNotifyB;
+ public uint Notify;
+ public NotifyType NotifyType => (NotifyType)(Notify);
+ public uint WaitForIdle;
+ public fixed uint Reserved114[7];
+ public uint SetGlobalRenderEnableA;
+ public int SetGlobalRenderEnableAOffsetUpper => (int)((SetGlobalRenderEnableA >> 0) & 0xFF);
+ public uint SetGlobalRenderEnableB;
+ public uint SetGlobalRenderEnableC;
+ public int SetGlobalRenderEnableCMode => (int)((SetGlobalRenderEnableC >> 0) & 0x7);
+ public uint SendGoIdle;
+ public uint PmTrigger;
+ public uint PmTriggerWfi;
+ public fixed uint Reserved148[2];
+ public uint SetInstrumentationMethodHeader;
+ public uint SetInstrumentationMethodData;
+ public fixed uint Reserved158[10];
+ public uint LineLengthIn;
+ public uint LineCount;
+ public uint OffsetOutUpper;
+ public int OffsetOutUpperValue => (int)((OffsetOutUpper >> 0) & 0xFF);
+ public uint OffsetOut;
+ public uint PitchOut;
+ public uint SetDstBlockSize;
+ public SetDstBlockSizeWidth SetDstBlockSizeWidth => (SetDstBlockSizeWidth)((SetDstBlockSize >> 0) & 0xF);
+ public SetDstBlockSizeHeight SetDstBlockSizeHeight => (SetDstBlockSizeHeight)((SetDstBlockSize >> 4) & 0xF);
+ public SetDstBlockSizeDepth SetDstBlockSizeDepth => (SetDstBlockSizeDepth)((SetDstBlockSize >> 8) & 0xF);
+ public uint SetDstWidth;
+ public uint SetDstHeight;
+ public uint SetDstDepth;
+ public uint SetDstLayer;
+ public uint SetDstOriginBytesX;
+ public int SetDstOriginBytesXV => (int)((SetDstOriginBytesX >> 0) & 0xFFFFF);
+ public uint SetDstOriginSamplesY;
+ public int SetDstOriginSamplesYV => (int)((SetDstOriginSamplesY >> 0) & 0xFFFF);
+ public uint LaunchDma;
+ public LaunchDmaDstMemoryLayout LaunchDmaDstMemoryLayout => (LaunchDmaDstMemoryLayout)((LaunchDma >> 0) & 0x1);
+ public LaunchDmaCompletionType LaunchDmaCompletionType => (LaunchDmaCompletionType)((LaunchDma >> 4) & 0x3);
+ public LaunchDmaInterruptType LaunchDmaInterruptType => (LaunchDmaInterruptType)((LaunchDma >> 8) & 0x3);
+ public LaunchDmaSemaphoreStructSize LaunchDmaSemaphoreStructSize => (LaunchDmaSemaphoreStructSize)((LaunchDma >> 12) & 0x1);
+ public bool LaunchDmaReductionEnable => (LaunchDma & 0x2) != 0;
+ public LaunchDmaReductionOp LaunchDmaReductionOp => (LaunchDmaReductionOp)((LaunchDma >> 13) & 0x7);
+ public LaunchDmaReductionFormat LaunchDmaReductionFormat => (LaunchDmaReductionFormat)((LaunchDma >> 2) & 0x3);
+ public bool LaunchDmaSysmembarDisable => (LaunchDma & 0x40) != 0;
+ public uint LoadInlineData;
+ public fixed uint Reserved1B8[9];
+ public uint SetI2mSemaphoreA;
+ public int SetI2mSemaphoreAOffsetUpper => (int)((SetI2mSemaphoreA >> 0) & 0xFF);
+ public uint SetI2mSemaphoreB;
+ public uint SetI2mSemaphoreC;
+ public fixed uint Reserved1E8[2];
+ public uint SetI2mSpareNoop00;
+ public uint SetI2mSpareNoop01;
+ public uint SetI2mSpareNoop02;
+ public uint SetI2mSpareNoop03;
+ public uint SetValidSpanOverflowAreaA;
+ public int SetValidSpanOverflowAreaAAddressUpper => (int)((SetValidSpanOverflowAreaA >> 0) & 0xFF);
+ public uint SetValidSpanOverflowAreaB;
+ public uint SetValidSpanOverflowAreaC;
+ public uint SetCoalesceWaitingPeriodUnit;
+ public uint PerfmonTransfer;
+ public uint SetShaderSharedMemoryWindow;
+ public uint SetSelectMaxwellTextureHeaders;
+ public bool SetSelectMaxwellTextureHeadersV => (SetSelectMaxwellTextureHeaders & 0x1) != 0;
+ public uint InvalidateShaderCaches;
+ public bool InvalidateShaderCachesInstruction => (InvalidateShaderCaches & 0x1) != 0;
+ public bool InvalidateShaderCachesData => (InvalidateShaderCaches & 0x10) != 0;
+ public bool InvalidateShaderCachesConstant => (InvalidateShaderCaches & 0x1000) != 0;
+ public bool InvalidateShaderCachesLocks => (InvalidateShaderCaches & 0x2) != 0;
+ public bool InvalidateShaderCachesFlushData => (InvalidateShaderCaches & 0x4) != 0;
+ public uint SetReservedSwMethod00;
+ public uint SetReservedSwMethod01;
+ public uint SetReservedSwMethod02;
+ public uint SetReservedSwMethod03;
+ public uint SetReservedSwMethod04;
+ public uint SetReservedSwMethod05;
+ public uint SetReservedSwMethod06;
+ public uint SetReservedSwMethod07;
+ public uint SetCwdControl;
+ public SetCwdControlSmSelection SetCwdControlSmSelection => (SetCwdControlSmSelection)((SetCwdControl >> 0) & 0x1);
+ public uint InvalidateTextureHeaderCacheNoWfi;
+ public InvalidateCacheLines InvalidateTextureHeaderCacheNoWfiLines => (InvalidateCacheLines)((InvalidateTextureHeaderCacheNoWfi >> 0) & 0x1);
+ public int InvalidateTextureHeaderCacheNoWfiTag => (int)((InvalidateTextureHeaderCacheNoWfi >> 4) & 0x3FFFFF);
+ public uint SetCwdRefCounter;
+ public int SetCwdRefCounterSelect => (int)((SetCwdRefCounter >> 0) & 0x3F);
+ public int SetCwdRefCounterValue => (int)((SetCwdRefCounter >> 8) & 0xFFFF);
+ public uint SetReservedSwMethod08;
+ public uint SetReservedSwMethod09;
+ public uint SetReservedSwMethod10;
+ public uint SetReservedSwMethod11;
+ public uint SetReservedSwMethod12;
+ public uint SetReservedSwMethod13;
+ public uint SetReservedSwMethod14;
+ public uint SetReservedSwMethod15;
+ public uint SetGwcScgType;
+ public SetGwcScgTypeScgType SetGwcScgTypeScgType => (SetGwcScgTypeScgType)((SetGwcScgType >> 0) & 0x1);
+ public uint SetScgControl;
+ public int SetScgControlCompute1MaxSmCount => (int)((SetScgControl >> 0) & 0x1FF);
+ public uint InvalidateConstantBufferCacheA;
+ public int InvalidateConstantBufferCacheAAddressUpper => (int)((InvalidateConstantBufferCacheA >> 0) & 0xFF);
+ public uint InvalidateConstantBufferCacheB;
+ public uint InvalidateConstantBufferCacheC;
+ public int InvalidateConstantBufferCacheCByteCount => (int)((InvalidateConstantBufferCacheC >> 0) & 0x1FFFF);
+ public bool InvalidateConstantBufferCacheCThruL2 => (InvalidateConstantBufferCacheC & 0x80000000) != 0;
+ public uint SetComputeClassVersion;
+ public int SetComputeClassVersionCurrent => (int)((SetComputeClassVersion >> 0) & 0xFFFF);
+ public int SetComputeClassVersionOldestSupported => (int)((SetComputeClassVersion >> 16) & 0xFFFF);
+ public uint CheckComputeClassVersion;
+ public int CheckComputeClassVersionCurrent => (int)((CheckComputeClassVersion >> 0) & 0xFFFF);
+ public int CheckComputeClassVersionOldestSupported => (int)((CheckComputeClassVersion >> 16) & 0xFFFF);
+ public uint SetQmdVersion;
+ public int SetQmdVersionCurrent => (int)((SetQmdVersion >> 0) & 0xFFFF);
+ public int SetQmdVersionOldestSupported => (int)((SetQmdVersion >> 16) & 0xFFFF);
+ public uint SetWfiConfig;
+ public bool SetWfiConfigEnableScgTypeWfi => (SetWfiConfig & 0x1) != 0;
+ public uint CheckQmdVersion;
+ public int CheckQmdVersionCurrent => (int)((CheckQmdVersion >> 0) & 0xFFFF);
+ public int CheckQmdVersionOldestSupported => (int)((CheckQmdVersion >> 16) & 0xFFFF);
+ public uint WaitForIdleScgType;
+ public uint InvalidateSkedCaches;
+ public bool InvalidateSkedCachesV => (InvalidateSkedCaches & 0x1) != 0;
+ public uint SetScgRenderEnableControl;
+ public bool SetScgRenderEnableControlCompute1UsesRenderEnable => (SetScgRenderEnableControl & 0x1) != 0;
+ public fixed uint Reserved2A0[4];
+ public uint SetCwdSlotCount;
+ public int SetCwdSlotCountV => (int)((SetCwdSlotCount >> 0) & 0xFF);
+ public uint SendPcasA;
+ public uint SendPcasB;
+ public int SendPcasBFrom => (int)((SendPcasB >> 0) & 0xFFFFFF);
+ public int SendPcasBDelta => (int)((SendPcasB >> 24) & 0xFF);
+ public uint SendSignalingPcasB;
+ public bool SendSignalingPcasBInvalidate => (SendSignalingPcasB & 0x1) != 0;
+ public bool SendSignalingPcasBSchedule => (SendSignalingPcasB & 0x2) != 0;
+ public fixed uint Reserved2C0[9];
+ public uint SetShaderLocalMemoryNonThrottledA;
+ public int SetShaderLocalMemoryNonThrottledASizeUpper => (int)((SetShaderLocalMemoryNonThrottledA >> 0) & 0xFF);
+ public uint SetShaderLocalMemoryNonThrottledB;
+ public uint SetShaderLocalMemoryNonThrottledC;
+ public int SetShaderLocalMemoryNonThrottledCMaxSmCount => (int)((SetShaderLocalMemoryNonThrottledC >> 0) & 0x1FF);
+ public uint SetShaderLocalMemoryThrottledA;
+ public int SetShaderLocalMemoryThrottledASizeUpper => (int)((SetShaderLocalMemoryThrottledA >> 0) & 0xFF);
+ public uint SetShaderLocalMemoryThrottledB;
+ public uint SetShaderLocalMemoryThrottledC;
+ public int SetShaderLocalMemoryThrottledCMaxSmCount => (int)((SetShaderLocalMemoryThrottledC >> 0) & 0x1FF);
+ public fixed uint Reserved2FC[5];
+ public uint SetSpaVersion;
+ public int SetSpaVersionMinor => (int)((SetSpaVersion >> 0) & 0xFF);
+ public int SetSpaVersionMajor => (int)((SetSpaVersion >> 8) & 0xFF);
+ public fixed uint Reserved314[123];
+ public uint SetFalcon00;
+ public uint SetFalcon01;
+ public uint SetFalcon02;
+ public uint SetFalcon03;
+ public uint SetFalcon04;
+ public uint SetFalcon05;
+ public uint SetFalcon06;
+ public uint SetFalcon07;
+ public uint SetFalcon08;
+ public uint SetFalcon09;
+ public uint SetFalcon10;
+ public uint SetFalcon11;
+ public uint SetFalcon12;
+ public uint SetFalcon13;
+ public uint SetFalcon14;
+ public uint SetFalcon15;
+ public uint SetFalcon16;
+ public uint SetFalcon17;
+ public uint SetFalcon18;
+ public uint SetFalcon19;
+ public uint SetFalcon20;
+ public uint SetFalcon21;
+ public uint SetFalcon22;
+ public uint SetFalcon23;
+ public uint SetFalcon24;
+ public uint SetFalcon25;
+ public uint SetFalcon26;
+ public uint SetFalcon27;
+ public uint SetFalcon28;
+ public uint SetFalcon29;
+ public uint SetFalcon30;
+ public uint SetFalcon31;
+ public fixed uint Reserved580[127];
+ public uint SetShaderLocalMemoryWindow;
+ public fixed uint Reserved780[4];
+ public uint SetShaderLocalMemoryA;
+ public int SetShaderLocalMemoryAAddressUpper => (int)((SetShaderLocalMemoryA >> 0) & 0xFF);
+ public uint SetShaderLocalMemoryB;
+ public fixed uint Reserved798[383];
+ public uint SetShaderCacheControl;
+ public bool SetShaderCacheControlIcachePrefetchEnable => (SetShaderCacheControl & 0x1) != 0;
+ public fixed uint ReservedD98[19];
+ public uint SetSmTimeoutInterval;
+ public int SetSmTimeoutIntervalCounterBit => (int)((SetSmTimeoutInterval >> 0) & 0x3F);
+ public fixed uint ReservedDE8[87];
+ public uint SetSpareNoop12;
+ public uint SetSpareNoop13;
+ public uint SetSpareNoop14;
+ public uint SetSpareNoop15;
+ public fixed uint ReservedF54[59];
+ public uint SetSpareNoop00;
+ public uint SetSpareNoop01;
+ public uint SetSpareNoop02;
+ public uint SetSpareNoop03;
+ public uint SetSpareNoop04;
+ public uint SetSpareNoop05;
+ public uint SetSpareNoop06;
+ public uint SetSpareNoop07;
+ public uint SetSpareNoop08;
+ public uint SetSpareNoop09;
+ public uint SetSpareNoop10;
+ public uint SetSpareNoop11;
+ public fixed uint Reserved1070[103];
+ public uint InvalidateSamplerCacheAll;
+ public bool InvalidateSamplerCacheAllV => (InvalidateSamplerCacheAll & 0x1) != 0;
+ public uint InvalidateTextureHeaderCacheAll;
+ public bool InvalidateTextureHeaderCacheAllV => (InvalidateTextureHeaderCacheAll & 0x1) != 0;
+ public fixed uint Reserved1214[29];
+ public uint InvalidateTextureDataCacheNoWfi;
+ public InvalidateCacheLines InvalidateTextureDataCacheNoWfiLines => (InvalidateCacheLines)((InvalidateTextureDataCacheNoWfi >> 0) & 0x1);
+ public int InvalidateTextureDataCacheNoWfiTag => (int)((InvalidateTextureDataCacheNoWfi >> 4) & 0x3FFFFF);
+ public fixed uint Reserved128C[7];
+ public uint ActivatePerfSettingsForComputeContext;
+ public bool ActivatePerfSettingsForComputeContextAll => (ActivatePerfSettingsForComputeContext & 0x1) != 0;
+ public fixed uint Reserved12AC[33];
+ public uint InvalidateSamplerCache;
+ public InvalidateCacheLines InvalidateSamplerCacheLines => (InvalidateCacheLines)((InvalidateSamplerCache >> 0) & 0x1);
+ public int InvalidateSamplerCacheTag => (int)((InvalidateSamplerCache >> 4) & 0x3FFFFF);
+ public uint InvalidateTextureHeaderCache;
+ public InvalidateCacheLines InvalidateTextureHeaderCacheLines => (InvalidateCacheLines)((InvalidateTextureHeaderCache >> 0) & 0x1);
+ public int InvalidateTextureHeaderCacheTag => (int)((InvalidateTextureHeaderCache >> 4) & 0x3FFFFF);
+ public uint InvalidateTextureDataCache;
+ public InvalidateCacheLines InvalidateTextureDataCacheLines => (InvalidateCacheLines)((InvalidateTextureDataCache >> 0) & 0x1);
+ public int InvalidateTextureDataCacheTag => (int)((InvalidateTextureDataCache >> 4) & 0x3FFFFF);
+ public fixed uint Reserved133C[58];
+ public uint InvalidateSamplerCacheNoWfi;
+ public InvalidateCacheLines InvalidateSamplerCacheNoWfiLines => (InvalidateCacheLines)((InvalidateSamplerCacheNoWfi >> 0) & 0x1);
+ public int InvalidateSamplerCacheNoWfiTag => (int)((InvalidateSamplerCacheNoWfi >> 4) & 0x3FFFFF);
+ public fixed uint Reserved1428[64];
+ public uint SetShaderExceptions;
+ public bool SetShaderExceptionsEnable => (SetShaderExceptions & 0x1) != 0;
+ public fixed uint Reserved152C[9];
+ public uint SetRenderEnableA;
+ public int SetRenderEnableAOffsetUpper => (int)((SetRenderEnableA >> 0) & 0xFF);
+ public uint SetRenderEnableB;
+ public uint SetRenderEnableC;
+ public int SetRenderEnableCMode => (int)((SetRenderEnableC >> 0) & 0x7);
+ public uint SetTexSamplerPoolA;
+ public int SetTexSamplerPoolAOffsetUpper => (int)((SetTexSamplerPoolA >> 0) & 0xFF);
+ public uint SetTexSamplerPoolB;
+ public uint SetTexSamplerPoolC;
+ public int SetTexSamplerPoolCMaximumIndex => (int)((SetTexSamplerPoolC >> 0) & 0xFFFFF);
+ public fixed uint Reserved1568[3];
+ public uint SetTexHeaderPoolA;
+ public int SetTexHeaderPoolAOffsetUpper => (int)((SetTexHeaderPoolA >> 0) & 0xFF);
+ public uint SetTexHeaderPoolB;
+ public uint SetTexHeaderPoolC;
+ public int SetTexHeaderPoolCMaximumIndex => (int)((SetTexHeaderPoolC >> 0) & 0x3FFFFF);
+ public fixed uint Reserved1580[34];
+ public uint SetProgramRegionA;
+ public int SetProgramRegionAAddressUpper => (int)((SetProgramRegionA >> 0) & 0xFF);
+ public uint SetProgramRegionB;
+ public fixed uint Reserved1610[34];
+ public uint InvalidateShaderCachesNoWfi;
+ public bool InvalidateShaderCachesNoWfiInstruction => (InvalidateShaderCachesNoWfi & 0x1) != 0;
+ public bool InvalidateShaderCachesNoWfiGlobalData => (InvalidateShaderCachesNoWfi & 0x10) != 0;
+ public bool InvalidateShaderCachesNoWfiConstant => (InvalidateShaderCachesNoWfi & 0x1000) != 0;
+ public fixed uint Reserved169C[170];
+ public uint SetRenderEnableOverride;
+ public SetRenderEnableOverrideMode SetRenderEnableOverrideMode => (SetRenderEnableOverrideMode)((SetRenderEnableOverride >> 0) & 0x3);
+ public fixed uint Reserved1948[57];
+ public uint PipeNop;
+ public uint SetSpare00;
+ public uint SetSpare01;
+ public uint SetSpare02;
+ public uint SetSpare03;
+ public fixed uint Reserved1A40[48];
+ public uint SetReportSemaphoreA;
+ public int SetReportSemaphoreAOffsetUpper => (int)((SetReportSemaphoreA >> 0) & 0xFF);
+ public uint SetReportSemaphoreB;
+ public uint SetReportSemaphoreC;
+ public uint SetReportSemaphoreD;
+ public SetReportSemaphoreDOperation SetReportSemaphoreDOperation => (SetReportSemaphoreDOperation)((SetReportSemaphoreD >> 0) & 0x3);
+ public bool SetReportSemaphoreDAwakenEnable => (SetReportSemaphoreD & 0x100000) != 0;
+ public SetReportSemaphoreDStructureSize SetReportSemaphoreDStructureSize => (SetReportSemaphoreDStructureSize)((SetReportSemaphoreD >> 28) & 0x1);
+ public bool SetReportSemaphoreDFlushDisable => (SetReportSemaphoreD & 0x4) != 0;
+ public bool SetReportSemaphoreDReductionEnable => (SetReportSemaphoreD & 0x8) != 0;
+ public SetReportSemaphoreDReductionOp SetReportSemaphoreDReductionOp => (SetReportSemaphoreDReductionOp)((SetReportSemaphoreD >> 9) & 0x7);
+ public SetReportSemaphoreDReductionFormat SetReportSemaphoreDReductionFormat => (SetReportSemaphoreDReductionFormat)((SetReportSemaphoreD >> 17) & 0x3);
+ public fixed uint Reserved1B10[702];
+ public uint SetBindlessTexture;
+ public int SetBindlessTextureConstantBufferSlotSelect => (int)((SetBindlessTexture >> 0) & 0x7);
+ public uint SetTrapHandler;
+ public fixed uint Reserved2610[843];
+ public Array8 SetShaderPerformanceCounterValueUpper;
+ public Array8 SetShaderPerformanceCounterValue;
+ public Array8 SetShaderPerformanceCounterEvent;
+ public int SetShaderPerformanceCounterEventEvent(int i) => (int)((SetShaderPerformanceCounterEvent[i] >> 0) & 0xFF);
+ public Array8 SetShaderPerformanceCounterControlA;
+ public int SetShaderPerformanceCounterControlAEvent0(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 0) & 0x3);
+ public int SetShaderPerformanceCounterControlABitSelect0(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 2) & 0x7);
+ public int SetShaderPerformanceCounterControlAEvent1(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 5) & 0x3);
+ public int SetShaderPerformanceCounterControlABitSelect1(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 7) & 0x7);
+ public int SetShaderPerformanceCounterControlAEvent2(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 10) & 0x3);
+ public int SetShaderPerformanceCounterControlABitSelect2(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 12) & 0x7);
+ public int SetShaderPerformanceCounterControlAEvent3(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 15) & 0x3);
+ public int SetShaderPerformanceCounterControlABitSelect3(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 17) & 0x7);
+ public int SetShaderPerformanceCounterControlAEvent4(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 20) & 0x3);
+ public int SetShaderPerformanceCounterControlABitSelect4(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 22) & 0x7);
+ public int SetShaderPerformanceCounterControlAEvent5(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 25) & 0x3);
+ public int SetShaderPerformanceCounterControlABitSelect5(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 27) & 0x7);
+ public int SetShaderPerformanceCounterControlASpare(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 30) & 0x3);
+ public Array8 SetShaderPerformanceCounterControlB;
+ public bool SetShaderPerformanceCounterControlBEdge(int i) => (SetShaderPerformanceCounterControlB[i] & 0x1) != 0;
+ public int SetShaderPerformanceCounterControlBMode(int i) => (int)((SetShaderPerformanceCounterControlB[i] >> 1) & 0x3);
+ public bool SetShaderPerformanceCounterControlBWindowed(int i) => (SetShaderPerformanceCounterControlB[i] & 0x8) != 0;
+ public int SetShaderPerformanceCounterControlBFunc(int i) => (int)((SetShaderPerformanceCounterControlB[i] >> 4) & 0xFFFF);
+ public uint SetShaderPerformanceCounterTrapControl;
+ public int SetShaderPerformanceCounterTrapControlMask => (int)((SetShaderPerformanceCounterTrapControl >> 0) & 0xFF);
+ public uint StartShaderPerformanceCounter;
+ public int StartShaderPerformanceCounterCounterMask => (int)((StartShaderPerformanceCounter >> 0) & 0xFF);
+ public uint StopShaderPerformanceCounter;
+ public int StopShaderPerformanceCounterCounterMask => (int)((StopShaderPerformanceCounter >> 0) & 0xFF);
+ public fixed uint Reserved33E8[6];
+ public MmeShadowScratch SetMmeShadowScratch;
+#pragma warning restore CS0649
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeQmd.cs b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeQmd.cs
new file mode 100644
index 00000000..1b20e41c
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeQmd.cs
@@ -0,0 +1,275 @@
+using Ryujinx.Graphics.Gpu.Engine.Types;
+using System;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Compute
+{
+ ///
+ /// Type of the dependent Queue Meta Data.
+ ///
+ enum DependentQmdType
+ {
+ Queue,
+ Grid
+ }
+
+ ///
+ /// Type of the release memory barrier.
+ ///
+ enum ReleaseMembarType
+ {
+ FeNone,
+ FeSysmembar
+ }
+
+ ///
+ /// Type of the CWD memory barrier.
+ ///
+ enum CwdMembarType
+ {
+ L1None,
+ L1Sysmembar,
+ L1Membar
+ }
+
+ ///
+ /// NaN behavior of 32-bits float operations on the shader.
+ ///
+ enum Fp32NanBehavior
+ {
+ Legacy,
+ Fp64Compatible
+ }
+
+ ///
+ /// NaN behavior of 32-bits float to integer conversion on the shader.
+ ///
+ enum Fp32F2iNanBehavior
+ {
+ PassZero,
+ PassIndefinite
+ }
+
+ ///
+ /// Limit of calls.
+ ///
+ enum ApiVisibleCallLimit
+ {
+ _32,
+ NoCheck
+ }
+
+ ///
+ /// Shared memory bank mapping mode.
+ ///
+ enum SharedMemoryBankMapping
+ {
+ FourBytesPerBank,
+ EightBytesPerBank
+ }
+
+ ///
+ /// Denormal behavior of 32-bits float narrowing instructions.
+ ///
+ enum Fp32NarrowInstruction
+ {
+ KeepDenorms,
+ FlushDenorms
+ }
+
+ ///
+ /// Configuration of the L1 cache.
+ ///
+ enum L1Configuration
+ {
+ DirectlyAddressableMemorySize16kb,
+ DirectlyAddressableMemorySize32kb,
+ DirectlyAddressableMemorySize48kb
+ }
+
+ ///
+ /// Reduction operation.
+ ///
+ enum ReductionOp
+ {
+ RedAdd,
+ RedMin,
+ RedMax,
+ RedInc,
+ RedDec,
+ RedAnd,
+ RedOr,
+ RedXor
+ }
+
+ ///
+ /// Reduction format.
+ ///
+ enum ReductionFormat
+ {
+ Unsigned32,
+ Signed32
+ }
+
+ ///
+ /// Size of a structure in words.
+ ///
+ enum StructureSize
+ {
+ FourWords,
+ OneWord
+ }
+
+ ///
+ /// Compute Queue Meta Data.
+ ///
+ unsafe struct ComputeQmd
+ {
+ private fixed int _words[64];
+
+ public int OuterPut => BitRange(30, 0);
+ public bool OuterOverflow => Bit(31);
+ public int OuterGet => BitRange(62, 32);
+ public bool OuterStickyOverflow => Bit(63);
+ public int InnerGet => BitRange(94, 64);
+ public bool InnerOverflow => Bit(95);
+ public int InnerPut => BitRange(126, 96);
+ public bool InnerStickyOverflow => Bit(127);
+ public int QmdReservedAA => BitRange(159, 128);
+ public int DependentQmdPointer => BitRange(191, 160);
+ public int QmdGroupId => BitRange(197, 192);
+ public bool SmGlobalCachingEnable => Bit(198);
+ public bool RunCtaInOneSmPartition => Bit(199);
+ public bool IsQueue => Bit(200);
+ public bool AddToHeadOfQmdGroupLinkedList => Bit(201);
+ public bool SemaphoreReleaseEnable0 => Bit(202);
+ public bool SemaphoreReleaseEnable1 => Bit(203);
+ public bool RequireSchedulingPcas => Bit(204);
+ public bool DependentQmdScheduleEnable => Bit(205);
+ public DependentQmdType DependentQmdType => (DependentQmdType)BitRange(206, 206);
+ public bool DependentQmdFieldCopy => Bit(207);
+ public int QmdReservedB => BitRange(223, 208);
+ public int CircularQueueSize => BitRange(248, 224);
+ public bool QmdReservedC => Bit(249);
+ public bool InvalidateTextureHeaderCache => Bit(250);
+ public bool InvalidateTextureSamplerCache => Bit(251);
+ public bool InvalidateTextureDataCache => Bit(252);
+ public bool InvalidateShaderDataCache => Bit(253);
+ public bool InvalidateInstructionCache => Bit(254);
+ public bool InvalidateShaderConstantCache => Bit(255);
+ public int ProgramOffset => BitRange(287, 256);
+ public int CircularQueueAddrLower => BitRange(319, 288);
+ public int CircularQueueAddrUpper => BitRange(327, 320);
+ public int QmdReservedD => BitRange(335, 328);
+ public int CircularQueueEntrySize => BitRange(351, 336);
+ public int CwdReferenceCountId => BitRange(357, 352);
+ public int CwdReferenceCountDeltaMinusOne => BitRange(365, 358);
+ public ReleaseMembarType ReleaseMembarType => (ReleaseMembarType)BitRange(366, 366);
+ public bool CwdReferenceCountIncrEnable => Bit(367);
+ public CwdMembarType CwdMembarType => (CwdMembarType)BitRange(369, 368);
+ public bool SequentiallyRunCtas => Bit(370);
+ public bool CwdReferenceCountDecrEnable => Bit(371);
+ public bool Throttled => Bit(372);
+ public Fp32NanBehavior Fp32NanBehavior => (Fp32NanBehavior)BitRange(376, 376);
+ public Fp32F2iNanBehavior Fp32F2iNanBehavior => (Fp32F2iNanBehavior)BitRange(377, 377);
+ public ApiVisibleCallLimit ApiVisibleCallLimit => (ApiVisibleCallLimit)BitRange(378, 378);
+ public SharedMemoryBankMapping SharedMemoryBankMapping => (SharedMemoryBankMapping)BitRange(379, 379);
+ public SamplerIndex SamplerIndex => (SamplerIndex)BitRange(382, 382);
+ public Fp32NarrowInstruction Fp32NarrowInstruction => (Fp32NarrowInstruction)BitRange(383, 383);
+ public int CtaRasterWidth => BitRange(415, 384);
+ public int CtaRasterHeight => BitRange(431, 416);
+ public int CtaRasterDepth => BitRange(447, 432);
+ public int CtaRasterWidthResume => BitRange(479, 448);
+ public int CtaRasterHeightResume => BitRange(495, 480);
+ public int CtaRasterDepthResume => BitRange(511, 496);
+ public int QueueEntriesPerCtaMinusOne => BitRange(518, 512);
+ public int CoalesceWaitingPeriod => BitRange(529, 522);
+ public int SharedMemorySize => BitRange(561, 544);
+ public int QmdReservedG => BitRange(575, 562);
+ public int QmdVersion => BitRange(579, 576);
+ public int QmdMajorVersion => BitRange(583, 580);
+ public int QmdReservedH => BitRange(591, 584);
+ public int CtaThreadDimension0 => BitRange(607, 592);
+ public int CtaThreadDimension1 => BitRange(623, 608);
+ public int CtaThreadDimension2 => BitRange(639, 624);
+ public bool ConstantBufferValid(int i) => Bit(640 + i * 1);
+ public int QmdReservedI => BitRange(668, 648);
+ public L1Configuration L1Configuration => (L1Configuration)BitRange(671, 669);
+ public int SmDisableMaskLower => BitRange(703, 672);
+ public int SmDisableMaskUpper => BitRange(735, 704);
+ public int Release0AddressLower => BitRange(767, 736);
+ public int Release0AddressUpper => BitRange(775, 768);
+ public int QmdReservedJ => BitRange(783, 776);
+ public ReductionOp Release0ReductionOp => (ReductionOp)BitRange(790, 788);
+ public bool QmdReservedK => Bit(791);
+ public ReductionFormat Release0ReductionFormat => (ReductionFormat)BitRange(793, 792);
+ public bool Release0ReductionEnable => Bit(794);
+ public StructureSize Release0StructureSize => (StructureSize)BitRange(799, 799);
+ public int Release0Payload => BitRange(831, 800);
+ public int Release1AddressLower => BitRange(863, 832);
+ public int Release1AddressUpper => BitRange(871, 864);
+ public int QmdReservedL => BitRange(879, 872);
+ public ReductionOp Release1ReductionOp => (ReductionOp)BitRange(886, 884);
+ public bool QmdReservedM => Bit(887);
+ public ReductionFormat Release1ReductionFormat => (ReductionFormat)BitRange(889, 888);
+ public bool Release1ReductionEnable => Bit(890);
+ public StructureSize Release1StructureSize => (StructureSize)BitRange(895, 895);
+ public int Release1Payload => BitRange(927, 896);
+ public int ConstantBufferAddrLower(int i) => BitRange(959 + i * 64, 928 + i * 64);
+ public int ConstantBufferAddrUpper(int i) => BitRange(967 + i * 64, 960 + i * 64);
+ public int ConstantBufferReservedAddr(int i) => BitRange(973 + i * 64, 968 + i * 64);
+ public bool ConstantBufferInvalidate(int i) => Bit(974 + i * 64);
+ public int ConstantBufferSize(int i) => BitRange(991 + i * 64, 975 + i * 64);
+ public int ShaderLocalMemoryLowSize => BitRange(1463, 1440);
+ public int QmdReservedN => BitRange(1466, 1464);
+ public int BarrierCount => BitRange(1471, 1467);
+ public int ShaderLocalMemoryHighSize => BitRange(1495, 1472);
+ public int RegisterCount => BitRange(1503, 1496);
+ public int ShaderLocalMemoryCrsSize => BitRange(1527, 1504);
+ public int SassVersion => BitRange(1535, 1528);
+ public int HwOnlyInnerGet => BitRange(1566, 1536);
+ public bool HwOnlyRequireSchedulingPcas => Bit(1567);
+ public int HwOnlyInnerPut => BitRange(1598, 1568);
+ public bool HwOnlyScgType => Bit(1599);
+ public int HwOnlySpanListHeadIndex => BitRange(1629, 1600);
+ public bool QmdReservedQ => Bit(1630);
+ public bool HwOnlySpanListHeadIndexValid => Bit(1631);
+ public int HwOnlySkedNextQmdPointer => BitRange(1663, 1632);
+ public int QmdSpareE => BitRange(1695, 1664);
+ public int QmdSpareF => BitRange(1727, 1696);
+ public int QmdSpareG => BitRange(1759, 1728);
+ public int QmdSpareH => BitRange(1791, 1760);
+ public int QmdSpareI => BitRange(1823, 1792);
+ public int QmdSpareJ => BitRange(1855, 1824);
+ public int QmdSpareK => BitRange(1887, 1856);
+ public int QmdSpareL => BitRange(1919, 1888);
+ public int QmdSpareM => BitRange(1951, 1920);
+ public int QmdSpareN => BitRange(1983, 1952);
+ public int DebugIdUpper => BitRange(2015, 1984);
+ public int DebugIdLower => BitRange(2047, 2016);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private bool Bit(int bit)
+ {
+ if ((uint)bit >= 64 * 32)
+ {
+ throw new ArgumentOutOfRangeException(nameof(bit));
+ }
+
+ return (_words[bit >> 5] & (1 << (bit & 31))) != 0;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private int BitRange(int upper, int lower)
+ {
+ if ((uint)lower >= 64 * 32)
+ {
+ throw new ArgumentOutOfRangeException(nameof(lower));
+ }
+
+ int mask = (int)(uint.MaxValue >> (32 - (upper - lower + 1)));
+
+ return (_words[lower >> 5] >> (lower & 31)) & mask;
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/ConditionalRenderEnabled.cs b/src/Ryujinx.Graphics.Gpu/Engine/ConditionalRenderEnabled.cs
new file mode 100644
index 00000000..5581b5cc
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/ConditionalRenderEnabled.cs
@@ -0,0 +1,12 @@
+namespace Ryujinx.Graphics.Gpu.Engine
+{
+ ///
+ /// Conditional rendering enable.
+ ///
+ enum ConditionalRenderEnabled
+ {
+ False,
+ True,
+ Host
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/DeviceStateWithShadow.cs b/src/Ryujinx.Graphics.Gpu/Engine/DeviceStateWithShadow.cs
new file mode 100644
index 00000000..74a9aa04
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/DeviceStateWithShadow.cs
@@ -0,0 +1,96 @@
+using Ryujinx.Graphics.Device;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Diagnostics.CodeAnalysis;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine
+{
+ ///
+ /// State interface with a shadow memory control register.
+ ///
+ interface IShadowState
+ {
+ ///
+ /// MME shadow ram control mode.
+ ///
+ SetMmeShadowRamControlMode SetMmeShadowRamControlMode { get; }
+ }
+
+ ///
+ /// Represents a device's state, with a additional shadow state.
+ ///
+ /// Type of the state
+ class DeviceStateWithShadow<[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicFields)] TState> : IDeviceState where TState : unmanaged, IShadowState
+ {
+ private readonly DeviceState _state;
+ private readonly DeviceState _shadowState;
+
+ ///
+ /// Current device state.
+ ///
+ public ref TState State => ref _state.State;
+
+ ///
+ /// Creates a new instance of the device state, with shadow state.
+ ///
+ /// Optional that will be called if a register specified by name is read or written
+ /// Optional callback to be used for debug log messages
+ public DeviceStateWithShadow(IReadOnlyDictionary callbacks = null, Action debugLogCallback = null)
+ {
+ _state = new DeviceState(callbacks, debugLogCallback);
+ _shadowState = new DeviceState();
+ }
+
+ ///
+ /// Reads a value from a register.
+ ///
+ /// Register offset in bytes
+ /// Value stored on the register
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public int Read(int offset)
+ {
+ return _state.Read(offset);
+ }
+
+ ///
+ /// Writes a value to a register.
+ ///
+ /// Register offset in bytes
+ /// Value to be written
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void Write(int offset, int value)
+ {
+ WriteWithRedundancyCheck(offset, value, out _);
+ }
+
+ ///
+ /// Writes a value to a register, returning a value indicating if
+ /// is different from the current value on the register.
+ ///
+ /// Register offset in bytes
+ /// Value to be written
+ /// True if the value was changed, false otherwise
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void WriteWithRedundancyCheck(int offset, int value, out bool changed)
+ {
+ var shadowRamControl = _state.State.SetMmeShadowRamControlMode;
+ if (shadowRamControl == SetMmeShadowRamControlMode.MethodPassthrough || offset < 0x200)
+ {
+ _state.WriteWithRedundancyCheck(offset, value, out changed);
+ }
+ else if (shadowRamControl == SetMmeShadowRamControlMode.MethodTrack ||
+ shadowRamControl == SetMmeShadowRamControlMode.MethodTrackWithFilter)
+ {
+ _shadowState.Write(offset, value);
+ _state.WriteWithRedundancyCheck(offset, value, out changed);
+ }
+ else /* if (shadowRamControl == SetMmeShadowRamControlMode.MethodReplay) */
+ {
+ Debug.Assert(shadowRamControl == SetMmeShadowRamControlMode.MethodReplay);
+ _state.WriteWithRedundancyCheck(offset, _shadowState.Read(offset), out changed);
+ }
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs
new file mode 100644
index 00000000..fd93cd8b
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs
@@ -0,0 +1,635 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.Gpu.Engine.Threed;
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Texture;
+using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Dma
+{
+ ///
+ /// Represents a DMA copy engine class.
+ ///
+ class DmaClass : IDeviceState
+ {
+ private readonly GpuContext _context;
+ private readonly GpuChannel _channel;
+ private readonly ThreedClass _3dEngine;
+ private readonly DeviceState _state;
+
+ ///
+ /// Copy flags passed on DMA launch.
+ ///
+ [Flags]
+ private enum CopyFlags
+ {
+ SrcLinear = 1 << 7,
+ DstLinear = 1 << 8,
+ MultiLineEnable = 1 << 9,
+ RemapEnable = 1 << 10
+ }
+
+ ///
+ /// Texture parameters for copy.
+ ///
+ private struct TextureParams
+ {
+ ///
+ /// Copy region X coordinate.
+ ///
+ public readonly int RegionX;
+
+ ///
+ /// Copy region Y coordinate.
+ ///
+ public readonly int RegionY;
+
+ ///
+ /// Offset from the base pointer of the data in memory.
+ ///
+ public readonly int BaseOffset;
+
+ ///
+ /// Bytes per pixel.
+ ///
+ public readonly int Bpp;
+
+ ///
+ /// Whether the texture is linear. If false, the texture is block linear.
+ ///
+ public readonly bool Linear;
+
+ ///
+ /// Pixel offset from XYZ coordinates calculator.
+ ///
+ public readonly OffsetCalculator Calculator;
+
+ ///
+ /// Creates texture parameters.
+ ///
+ /// Copy region X coordinate
+ /// Copy region Y coordinate
+ /// Offset from the base pointer of the data in memory
+ /// Bytes per pixel
+ /// Whether the texture is linear. If false, the texture is block linear
+ /// Pixel offset from XYZ coordinates calculator
+ public TextureParams(int regionX, int regionY, int baseOffset, int bpp, bool linear, OffsetCalculator calculator)
+ {
+ RegionX = regionX;
+ RegionY = regionY;
+ BaseOffset = baseOffset;
+ Bpp = bpp;
+ Linear = linear;
+ Calculator = calculator;
+ }
+ }
+
+ [StructLayout(LayoutKind.Sequential, Size = 3, Pack = 1)]
+ private struct UInt24
+ {
+ public byte Byte0;
+ public byte Byte1;
+ public byte Byte2;
+ }
+
+ ///
+ /// Creates a new instance of the DMA copy engine class.
+ ///
+ /// GPU context
+ /// GPU channel
+ /// 3D engine
+ public DmaClass(GpuContext context, GpuChannel channel, ThreedClass threedEngine)
+ {
+ _context = context;
+ _channel = channel;
+ _3dEngine = threedEngine;
+ _state = new DeviceState(new Dictionary
+ {
+ { nameof(DmaClassState.LaunchDma), new RwCallback(LaunchDma, null) }
+ });
+ }
+
+ ///
+ /// Reads data from the class registers.
+ ///
+ /// Register byte offset
+ /// Data at the specified offset
+ public int Read(int offset) => _state.Read(offset);
+
+ ///
+ /// Writes data to the class registers.
+ ///
+ /// Register byte offset
+ /// Data to be written
+ public void Write(int offset, int data) => _state.Write(offset, data);
+
+ ///
+ /// Determine if a buffer-to-texture region covers the entirety of a texture.
+ ///
+ /// Texture to compare
+ /// True if the texture is linear, false if block linear
+ /// Texture bytes per pixel
+ /// Texture stride
+ /// Number of pixels to be copied
+ /// Number of lines to be copied
+ ///
+ private static bool IsTextureCopyComplete(DmaTexture tex, bool linear, int bpp, int stride, int xCount, int yCount)
+ {
+ if (linear)
+ {
+ // If the stride is negative, the texture has to be flipped, so
+ // the fast copy is not trivial, use the slow path.
+ if (stride <= 0)
+ {
+ return false;
+ }
+
+ int alignWidth = Constants.StrideAlignment / bpp;
+ return stride / bpp == BitUtils.AlignUp(xCount, alignWidth);
+ }
+ else
+ {
+ int alignWidth = Constants.GobAlignment / bpp;
+ return tex.RegionX == 0 &&
+ tex.RegionY == 0 &&
+ tex.Width == BitUtils.AlignUp(xCount, alignWidth) &&
+ tex.Height == yCount;
+ }
+ }
+
+ ///
+ /// Releases a semaphore for a given LaunchDma method call.
+ ///
+ /// The LaunchDma call argument
+ private void ReleaseSemaphore(int argument)
+ {
+ LaunchDmaSemaphoreType type = (LaunchDmaSemaphoreType)((argument >> 3) & 0x3);
+ if (type != LaunchDmaSemaphoreType.None)
+ {
+ ulong address = ((ulong)_state.State.SetSemaphoreA << 32) | _state.State.SetSemaphoreB;
+ if (type == LaunchDmaSemaphoreType.ReleaseOneWordSemaphore)
+ {
+ _channel.MemoryManager.Write(address, _state.State.SetSemaphorePayload);
+ }
+ else /* if (type == LaunchDmaSemaphoreType.ReleaseFourWordSemaphore) */
+ {
+ _channel.MemoryManager.Write(address + 8, _context.GetTimestamp());
+ _channel.MemoryManager.Write(address, (ulong)_state.State.SetSemaphorePayload);
+ }
+ }
+ }
+
+ ///
+ /// Performs a buffer to buffer, or buffer to texture copy.
+ ///
+ /// The LaunchDma call argument
+ private void DmaCopy(int argument)
+ {
+ var memoryManager = _channel.MemoryManager;
+
+ CopyFlags copyFlags = (CopyFlags)argument;
+
+ bool srcLinear = copyFlags.HasFlag(CopyFlags.SrcLinear);
+ bool dstLinear = copyFlags.HasFlag(CopyFlags.DstLinear);
+ bool copy2D = copyFlags.HasFlag(CopyFlags.MultiLineEnable);
+ bool remap = copyFlags.HasFlag(CopyFlags.RemapEnable);
+
+ uint size = _state.State.LineLengthIn;
+
+ if (size == 0)
+ {
+ return;
+ }
+
+ ulong srcGpuVa = ((ulong)_state.State.OffsetInUpperUpper << 32) | _state.State.OffsetInLower;
+ ulong dstGpuVa = ((ulong)_state.State.OffsetOutUpperUpper << 32) | _state.State.OffsetOutLower;
+
+ int xCount = (int)_state.State.LineLengthIn;
+ int yCount = (int)_state.State.LineCount;
+
+ _3dEngine.CreatePendingSyncs();
+ _3dEngine.FlushUboDirty();
+
+ if (copy2D)
+ {
+ // Buffer to texture copy.
+ int componentSize = (int)_state.State.SetRemapComponentsComponentSize + 1;
+ int srcComponents = (int)_state.State.SetRemapComponentsNumSrcComponents + 1;
+ int dstComponents = (int)_state.State.SetRemapComponentsNumDstComponents + 1;
+ int srcBpp = remap ? srcComponents * componentSize : 1;
+ int dstBpp = remap ? dstComponents * componentSize : 1;
+
+ var dst = Unsafe.As(ref _state.State.SetDstBlockSize);
+ var src = Unsafe.As(ref _state.State.SetSrcBlockSize);
+
+ int srcRegionX = 0, srcRegionY = 0, dstRegionX = 0, dstRegionY = 0;
+
+ if (!srcLinear)
+ {
+ srcRegionX = src.RegionX;
+ srcRegionY = src.RegionY;
+ }
+
+ if (!dstLinear)
+ {
+ dstRegionX = dst.RegionX;
+ dstRegionY = dst.RegionY;
+ }
+
+ int srcStride = (int)_state.State.PitchIn;
+ int dstStride = (int)_state.State.PitchOut;
+
+ var srcCalculator = new OffsetCalculator(
+ src.Width,
+ src.Height,
+ srcStride,
+ srcLinear,
+ src.MemoryLayout.UnpackGobBlocksInY(),
+ src.MemoryLayout.UnpackGobBlocksInZ(),
+ srcBpp);
+
+ var dstCalculator = new OffsetCalculator(
+ dst.Width,
+ dst.Height,
+ dstStride,
+ dstLinear,
+ dst.MemoryLayout.UnpackGobBlocksInY(),
+ dst.MemoryLayout.UnpackGobBlocksInZ(),
+ dstBpp);
+
+ (int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(srcRegionX, srcRegionY, xCount, yCount);
+ (int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dstRegionX, dstRegionY, xCount, yCount);
+
+ if (srcLinear && srcStride < 0)
+ {
+ srcBaseOffset += srcStride * (yCount - 1);
+ }
+
+ if (dstLinear && dstStride < 0)
+ {
+ dstBaseOffset += dstStride * (yCount - 1);
+ }
+
+ ReadOnlySpan srcSpan = memoryManager.GetSpan(srcGpuVa + (ulong)srcBaseOffset, srcSize, true);
+
+ bool completeSource = IsTextureCopyComplete(src, srcLinear, srcBpp, srcStride, xCount, yCount);
+ bool completeDest = IsTextureCopyComplete(dst, dstLinear, dstBpp, dstStride, xCount, yCount);
+
+ if (completeSource && completeDest)
+ {
+ var target = memoryManager.Physical.TextureCache.FindTexture(
+ memoryManager,
+ dstGpuVa,
+ dstBpp,
+ dstStride,
+ dst.Height,
+ xCount,
+ yCount,
+ dstLinear,
+ dst.MemoryLayout.UnpackGobBlocksInY(),
+ dst.MemoryLayout.UnpackGobBlocksInZ());
+
+ if (target != null)
+ {
+ byte[] data;
+ if (srcLinear)
+ {
+ data = LayoutConverter.ConvertLinearStridedToLinear(
+ target.Info.Width,
+ target.Info.Height,
+ 1,
+ 1,
+ xCount * srcBpp,
+ srcStride,
+ target.Info.FormatInfo.BytesPerPixel,
+ srcSpan);
+ }
+ else
+ {
+ data = LayoutConverter.ConvertBlockLinearToLinear(
+ src.Width,
+ src.Height,
+ src.Depth,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ srcBpp,
+ src.MemoryLayout.UnpackGobBlocksInY(),
+ src.MemoryLayout.UnpackGobBlocksInZ(),
+ 1,
+ new SizeInfo((int)target.Size),
+ srcSpan);
+ }
+
+ target.SynchronizeMemory();
+ target.SetData(data);
+ target.SignalModified();
+ return;
+ }
+ else if (srcCalculator.LayoutMatches(dstCalculator))
+ {
+ // No layout conversion has to be performed, just copy the data entirely.
+ memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, srcSpan);
+ return;
+ }
+ }
+
+ // OPT: This allocates a (potentially) huge temporary array and then copies an existing
+ // region of memory into it, data that might get overwritten entirely anyways. Ideally this should
+ // all be rewritten to use pooled arrays, but that gets complicated with packed data and strides
+ Span dstSpan = memoryManager.GetSpan(dstGpuVa + (ulong)dstBaseOffset, dstSize).ToArray();
+
+ TextureParams srcParams = new TextureParams(srcRegionX, srcRegionY, srcBaseOffset, srcBpp, srcLinear, srcCalculator);
+ TextureParams dstParams = new TextureParams(dstRegionX, dstRegionY, dstBaseOffset, dstBpp, dstLinear, dstCalculator);
+
+ // If remapping is enabled, we always copy the components directly, in order.
+ // If it's enabled, but the mapping is just XYZW, we also copy them in order.
+ bool isIdentityRemap = !remap ||
+ (_state.State.SetRemapComponentsDstX == SetRemapComponentsDst.SrcX &&
+ (dstComponents < 2 || _state.State.SetRemapComponentsDstY == SetRemapComponentsDst.SrcY) &&
+ (dstComponents < 3 || _state.State.SetRemapComponentsDstZ == SetRemapComponentsDst.SrcZ) &&
+ (dstComponents < 4 || _state.State.SetRemapComponentsDstW == SetRemapComponentsDst.SrcW));
+
+ if (isIdentityRemap)
+ {
+ // The order of the components doesn't change, so we can just copy directly
+ // (with layout conversion if necessary).
+
+ switch (srcBpp)
+ {
+ case 1: Copy(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 2: Copy(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 4: Copy(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 8: Copy(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 12: Copy(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 16: Copy>(dstSpan, srcSpan, dstParams, srcParams); break;
+ default: throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.");
+ }
+ }
+ else
+ {
+ // The order or value of the components might change.
+
+ switch (componentSize)
+ {
+ case 1: CopyShuffle(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 2: CopyShuffle(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 3: CopyShuffle(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 4: CopyShuffle(dstSpan, srcSpan, dstParams, srcParams); break;
+ default: throw new NotSupportedException($"Unable to copy ${componentSize} component size.");
+ }
+ }
+
+ memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, dstSpan);
+ }
+ else
+ {
+ if (remap &&
+ _state.State.SetRemapComponentsDstX == SetRemapComponentsDst.ConstA &&
+ _state.State.SetRemapComponentsDstY == SetRemapComponentsDst.ConstA &&
+ _state.State.SetRemapComponentsDstZ == SetRemapComponentsDst.ConstA &&
+ _state.State.SetRemapComponentsDstW == SetRemapComponentsDst.ConstA &&
+ _state.State.SetRemapComponentsNumSrcComponents == SetRemapComponentsNumComponents.One &&
+ _state.State.SetRemapComponentsNumDstComponents == SetRemapComponentsNumComponents.One &&
+ _state.State.SetRemapComponentsComponentSize == SetRemapComponentsComponentSize.Four)
+ {
+ // Fast path for clears when remap is enabled.
+ memoryManager.Physical.BufferCache.ClearBuffer(memoryManager, dstGpuVa, size * 4, _state.State.SetRemapConstA);
+ }
+ else
+ {
+ // TODO: Implement remap functionality.
+ // Buffer to buffer copy.
+
+ bool srcIsPitchKind = memoryManager.GetKind(srcGpuVa).IsPitch();
+ bool dstIsPitchKind = memoryManager.GetKind(dstGpuVa).IsPitch();
+
+ if (!srcIsPitchKind && dstIsPitchKind)
+ {
+ CopyGobBlockLinearToLinear(memoryManager, srcGpuVa, dstGpuVa, size);
+ }
+ else if (srcIsPitchKind && !dstIsPitchKind)
+ {
+ CopyGobLinearToBlockLinear(memoryManager, srcGpuVa, dstGpuVa, size);
+ }
+ else
+ {
+ memoryManager.Physical.BufferCache.CopyBuffer(memoryManager, srcGpuVa, dstGpuVa, size);
+ }
+ }
+ }
+ }
+
+ ///
+ /// Copies data from one texture to another, while performing layout conversion if necessary.
+ ///
+ /// Pixel type
+ /// Destination texture memory region
+ /// Source texture memory region
+ /// Destination texture parameters
+ /// Source texture parameters
+ private unsafe void Copy(Span dstSpan, ReadOnlySpan srcSpan, TextureParams dst, TextureParams src) where T : unmanaged
+ {
+ int xCount = (int)_state.State.LineLengthIn;
+ int yCount = (int)_state.State.LineCount;
+
+ if (src.Linear && dst.Linear && src.Bpp == dst.Bpp)
+ {
+ // Optimized path for purely linear copies - we don't need to calculate every single byte offset,
+ // and we can make use of Span.CopyTo which is very very fast (even compared to pointers)
+ for (int y = 0; y < yCount; y++)
+ {
+ src.Calculator.SetY(src.RegionY + y);
+ dst.Calculator.SetY(dst.RegionY + y);
+ int srcOffset = src.Calculator.GetOffset(src.RegionX);
+ int dstOffset = dst.Calculator.GetOffset(dst.RegionX);
+ srcSpan.Slice(srcOffset - src.BaseOffset, xCount * src.Bpp)
+ .CopyTo(dstSpan.Slice(dstOffset - dst.BaseOffset, xCount * dst.Bpp));
+ }
+ }
+ else
+ {
+ fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan)
+ {
+ byte* dstBase = dstPtr - dst.BaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset.
+ byte* srcBase = srcPtr - src.BaseOffset;
+
+ for (int y = 0; y < yCount; y++)
+ {
+ src.Calculator.SetY(src.RegionY + y);
+ dst.Calculator.SetY(dst.RegionY + y);
+
+ for (int x = 0; x < xCount; x++)
+ {
+ int srcOffset = src.Calculator.GetOffset(src.RegionX + x);
+ int dstOffset = dst.Calculator.GetOffset(dst.RegionX + x);
+
+ *(T*)(dstBase + dstOffset) = *(T*)(srcBase + srcOffset);
+ }
+ }
+ }
+ }
+ }
+
+ ///
+ /// Sets texture pixel data to a constant value, while performing layout conversion if necessary.
+ ///
+ /// Pixel type
+ /// Destination texture memory region
+ /// Destination texture parameters
+ /// Constant pixel value to be set
+ private unsafe void Fill(Span dstSpan, TextureParams dst, T fillValue) where T : unmanaged
+ {
+ int xCount = (int)_state.State.LineLengthIn;
+ int yCount = (int)_state.State.LineCount;
+
+ fixed (byte* dstPtr = dstSpan)
+ {
+ byte* dstBase = dstPtr - dst.BaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset.
+
+ for (int y = 0; y < yCount; y++)
+ {
+ dst.Calculator.SetY(dst.RegionY + y);
+
+ for (int x = 0; x < xCount; x++)
+ {
+ int dstOffset = dst.Calculator.GetOffset(dst.RegionX + x);
+
+ *(T*)(dstBase + dstOffset) = fillValue;
+ }
+ }
+ }
+ }
+
+ ///
+ /// Copies data from one texture to another, while performing layout conversion and component shuffling if necessary.
+ ///
+ /// Pixel type
+ /// Destination texture memory region
+ /// Source texture memory region
+ /// Destination texture parameters
+ /// Source texture parameters
+ private void CopyShuffle(Span dstSpan, ReadOnlySpan srcSpan, TextureParams dst, TextureParams src) where T : unmanaged
+ {
+ int dstComponents = (int)_state.State.SetRemapComponentsNumDstComponents + 1;
+
+ for (int i = 0; i < dstComponents; i++)
+ {
+ SetRemapComponentsDst componentsDst = i switch
+ {
+ 0 => _state.State.SetRemapComponentsDstX,
+ 1 => _state.State.SetRemapComponentsDstY,
+ 2 => _state.State.SetRemapComponentsDstZ,
+ _ => _state.State.SetRemapComponentsDstW
+ };
+
+ switch (componentsDst)
+ {
+ case SetRemapComponentsDst.SrcX:
+ Copy(dstSpan.Slice(Unsafe.SizeOf() * i), srcSpan, dst, src);
+ break;
+ case SetRemapComponentsDst.SrcY:
+ Copy(dstSpan.Slice(Unsafe.SizeOf() * i), srcSpan.Slice(Unsafe.SizeOf()), dst, src);
+ break;
+ case SetRemapComponentsDst.SrcZ:
+ Copy(dstSpan.Slice(Unsafe.SizeOf() * i), srcSpan.Slice(Unsafe.SizeOf() * 2), dst, src);
+ break;
+ case SetRemapComponentsDst.SrcW:
+ Copy(dstSpan.Slice(Unsafe.SizeOf() * i), srcSpan.Slice(Unsafe.SizeOf() * 3), dst, src);
+ break;
+ case SetRemapComponentsDst.ConstA:
+ Fill(dstSpan.Slice(Unsafe.SizeOf() * i), dst, Unsafe.As(ref _state.State.SetRemapConstA));
+ break;
+ case SetRemapComponentsDst.ConstB:
+ Fill(dstSpan.Slice(Unsafe.SizeOf() * i), dst, Unsafe.As(ref _state.State.SetRemapConstB));
+ break;
+ }
+ }
+ }
+
+ ///
+ /// Copies block linear data with block linear GOBs to a block linear destination with linear GOBs.
+ ///
+ /// GPU memory manager
+ /// Source GPU virtual address
+ /// Destination GPU virtual address
+ /// Size in bytes of the copy
+ private static void CopyGobBlockLinearToLinear(MemoryManager memoryManager, ulong srcGpuVa, ulong dstGpuVa, ulong size)
+ {
+ if (((srcGpuVa | dstGpuVa | size) & 0xf) == 0)
+ {
+ for (ulong offset = 0; offset < size; offset += 16)
+ {
+ Vector128 data = memoryManager.Read>(ConvertGobLinearToBlockLinearAddress(srcGpuVa + offset), true);
+ memoryManager.Write(dstGpuVa + offset, data);
+ }
+ }
+ else
+ {
+ for (ulong offset = 0; offset < size; offset++)
+ {
+ byte data = memoryManager.Read(ConvertGobLinearToBlockLinearAddress(srcGpuVa + offset), true);
+ memoryManager.Write(dstGpuVa + offset, data);
+ }
+ }
+ }
+
+ ///
+ /// Copies block linear data with linear GOBs to a block linear destination with block linear GOBs.
+ ///
+ /// GPU memory manager
+ /// Source GPU virtual address
+ /// Destination GPU virtual address
+ /// Size in bytes of the copy
+ private static void CopyGobLinearToBlockLinear(MemoryManager memoryManager, ulong srcGpuVa, ulong dstGpuVa, ulong size)
+ {
+ if (((srcGpuVa | dstGpuVa | size) & 0xf) == 0)
+ {
+ for (ulong offset = 0; offset < size; offset += 16)
+ {
+ Vector128 data = memoryManager.Read>(srcGpuVa + offset, true);
+ memoryManager.Write(ConvertGobLinearToBlockLinearAddress(dstGpuVa + offset), data);
+ }
+ }
+ else
+ {
+ for (ulong offset = 0; offset < size; offset++)
+ {
+ byte data = memoryManager.Read(srcGpuVa + offset, true);
+ memoryManager.Write(ConvertGobLinearToBlockLinearAddress(dstGpuVa + offset), data);
+ }
+ }
+ }
+
+ ///
+ /// Calculates the GOB block linear address from a linear address.
+ ///
+ /// Linear address
+ /// Block linear address
+ private static ulong ConvertGobLinearToBlockLinearAddress(ulong address)
+ {
+ // y2 y1 y0 x5 x4 x3 x2 x1 x0 -> x5 y2 y1 x4 y0 x3 x2 x1 x0
+ return (address & ~0x1f0UL) |
+ ((address & 0x40) >> 2) |
+ ((address & 0x10) << 1) |
+ ((address & 0x180) >> 1) |
+ ((address & 0x20) << 3);
+ }
+
+ ///
+ /// Performs a buffer to buffer, or buffer to texture copy, then optionally releases a semaphore.
+ ///
+ /// Method call argument
+ private void LaunchDma(int argument)
+ {
+ DmaCopy(argument);
+ ReleaseSemaphore(argument);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClassState.cs b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClassState.cs
new file mode 100644
index 00000000..7de4d5f0
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClassState.cs
@@ -0,0 +1,271 @@
+// This file was auto-generated from NVIDIA official Maxwell definitions.
+
+namespace Ryujinx.Graphics.Gpu.Engine.Dma
+{
+ ///
+ /// Physical mode target.
+ ///
+ enum SetPhysModeTarget
+ {
+ LocalFb = 0,
+ CoherentSysmem = 1,
+ NoncoherentSysmem = 2,
+ }
+
+ ///
+ /// DMA data transfer type.
+ ///
+ enum LaunchDmaDataTransferType
+ {
+ None = 0,
+ Pipelined = 1,
+ NonPipelined = 2,
+ }
+
+ ///
+ /// DMA semaphore type.
+ ///
+ enum LaunchDmaSemaphoreType
+ {
+ None = 0,
+ ReleaseOneWordSemaphore = 1,
+ ReleaseFourWordSemaphore = 2,
+ }
+
+ ///
+ /// DMA interrupt type.
+ ///
+ enum LaunchDmaInterruptType
+ {
+ None = 0,
+ Blocking = 1,
+ NonBlocking = 2,
+ }
+
+ ///
+ /// DMA destination memory layout.
+ ///
+ enum LaunchDmaMemoryLayout
+ {
+ Blocklinear = 0,
+ Pitch = 1,
+ }
+
+ ///
+ /// DMA type.
+ ///
+ enum LaunchDmaType
+ {
+ Virtual = 0,
+ Physical = 1,
+ }
+
+ ///
+ /// DMA semaphore reduction operation.
+ ///
+ enum LaunchDmaSemaphoreReduction
+ {
+ Imin = 0,
+ Imax = 1,
+ Ixor = 2,
+ Iand = 3,
+ Ior = 4,
+ Iadd = 5,
+ Inc = 6,
+ Dec = 7,
+ Fadd = 10,
+ }
+
+ ///
+ /// DMA semaphore reduction signedness.
+ ///
+ enum LaunchDmaSemaphoreReductionSign
+ {
+ Signed = 0,
+ Unsigned = 1,
+ }
+
+ ///
+ /// DMA L2 cache bypass.
+ ///
+ enum LaunchDmaBypassL2
+ {
+ UsePteSetting = 0,
+ ForceVolatile = 1,
+ }
+
+ ///
+ /// DMA component remapping source component.
+ ///
+ enum SetRemapComponentsDst
+ {
+ SrcX = 0,
+ SrcY = 1,
+ SrcZ = 2,
+ SrcW = 3,
+ ConstA = 4,
+ ConstB = 5,
+ NoWrite = 6,
+ }
+
+ ///
+ /// DMA component remapping component size.
+ ///
+ enum SetRemapComponentsComponentSize
+ {
+ One = 0,
+ Two = 1,
+ Three = 2,
+ Four = 3,
+ }
+
+ ///
+ /// DMA component remapping number of components.
+ ///
+ enum SetRemapComponentsNumComponents
+ {
+ One = 0,
+ Two = 1,
+ Three = 2,
+ Four = 3,
+ }
+
+ ///
+ /// Width in GOBs of the destination texture.
+ ///
+ enum SetBlockSizeWidth
+ {
+ QuarterGob = 14,
+ OneGob = 0,
+ }
+
+ ///
+ /// Height in GOBs of the destination texture.
+ ///
+ enum SetBlockSizeHeight
+ {
+ OneGob = 0,
+ TwoGobs = 1,
+ FourGobs = 2,
+ EightGobs = 3,
+ SixteenGobs = 4,
+ ThirtytwoGobs = 5,
+ }
+
+ ///
+ /// Depth in GOBs of the destination texture.
+ ///
+ enum SetBlockSizeDepth
+ {
+ OneGob = 0,
+ TwoGobs = 1,
+ FourGobs = 2,
+ EightGobs = 3,
+ SixteenGobs = 4,
+ ThirtytwoGobs = 5,
+ }
+
+ ///
+ /// Height of a single GOB in lines.
+ ///
+ enum SetBlockSizeGobHeight
+ {
+ GobHeightTesla4 = 0,
+ GobHeightFermi8 = 1,
+ }
+
+ ///
+ /// DMA copy class state.
+ ///
+ unsafe struct DmaClassState
+ {
+#pragma warning disable CS0649
+ public fixed uint Reserved00[64];
+ public uint Nop;
+ public fixed uint Reserved104[15];
+ public uint PmTrigger;
+ public fixed uint Reserved144[63];
+ public uint SetSemaphoreA;
+ public int SetSemaphoreAUpper => (int)((SetSemaphoreA >> 0) & 0xFF);
+ public uint SetSemaphoreB;
+ public uint SetSemaphorePayload;
+ public fixed uint Reserved24C[2];
+ public uint SetRenderEnableA;
+ public int SetRenderEnableAUpper => (int)((SetRenderEnableA >> 0) & 0xFF);
+ public uint SetRenderEnableB;
+ public uint SetRenderEnableC;
+ public int SetRenderEnableCMode => (int)((SetRenderEnableC >> 0) & 0x7);
+ public uint SetSrcPhysMode;
+ public SetPhysModeTarget SetSrcPhysModeTarget => (SetPhysModeTarget)((SetSrcPhysMode >> 0) & 0x3);
+ public uint SetDstPhysMode;
+ public SetPhysModeTarget SetDstPhysModeTarget => (SetPhysModeTarget)((SetDstPhysMode >> 0) & 0x3);
+ public fixed uint Reserved268[38];
+ public uint LaunchDma;
+ public LaunchDmaDataTransferType LaunchDmaDataTransferType => (LaunchDmaDataTransferType)((LaunchDma >> 0) & 0x3);
+ public bool LaunchDmaFlushEnable => (LaunchDma & 0x4) != 0;
+ public LaunchDmaSemaphoreType LaunchDmaSemaphoreType => (LaunchDmaSemaphoreType)((LaunchDma >> 3) & 0x3);
+ public LaunchDmaInterruptType LaunchDmaInterruptType => (LaunchDmaInterruptType)((LaunchDma >> 5) & 0x3);
+ public LaunchDmaMemoryLayout LaunchDmaSrcMemoryLayout => (LaunchDmaMemoryLayout)((LaunchDma >> 7) & 0x1);
+ public LaunchDmaMemoryLayout LaunchDmaDstMemoryLayout => (LaunchDmaMemoryLayout)((LaunchDma >> 8) & 0x1);
+ public bool LaunchDmaMultiLineEnable => (LaunchDma & 0x200) != 0;
+ public bool LaunchDmaRemapEnable => (LaunchDma & 0x400) != 0;
+ public bool LaunchDmaForceRmwdisable => (LaunchDma & 0x800) != 0;
+ public LaunchDmaType LaunchDmaSrcType => (LaunchDmaType)((LaunchDma >> 12) & 0x1);
+ public LaunchDmaType LaunchDmaDstType => (LaunchDmaType)((LaunchDma >> 13) & 0x1);
+ public LaunchDmaSemaphoreReduction LaunchDmaSemaphoreReduction => (LaunchDmaSemaphoreReduction)((LaunchDma >> 14) & 0xF);
+ public LaunchDmaSemaphoreReductionSign LaunchDmaSemaphoreReductionSign => (LaunchDmaSemaphoreReductionSign)((LaunchDma >> 18) & 0x1);
+ public bool LaunchDmaSemaphoreReductionEnable => (LaunchDma & 0x80000) != 0;
+ public LaunchDmaBypassL2 LaunchDmaBypassL2 => (LaunchDmaBypassL2)((LaunchDma >> 20) & 0x1);
+ public fixed uint Reserved304[63];
+ public uint OffsetInUpper;
+ public int OffsetInUpperUpper => (int)((OffsetInUpper >> 0) & 0xFF);
+ public uint OffsetInLower;
+ public uint OffsetOutUpper;
+ public int OffsetOutUpperUpper => (int)((OffsetOutUpper >> 0) & 0xFF);
+ public uint OffsetOutLower;
+ public uint PitchIn;
+ public uint PitchOut;
+ public uint LineLengthIn;
+ public uint LineCount;
+ public fixed uint Reserved420[184];
+ public uint SetRemapConstA;
+ public uint SetRemapConstB;
+ public uint SetRemapComponents;
+ public SetRemapComponentsDst SetRemapComponentsDstX => (SetRemapComponentsDst)((SetRemapComponents >> 0) & 0x7);
+ public SetRemapComponentsDst SetRemapComponentsDstY => (SetRemapComponentsDst)((SetRemapComponents >> 4) & 0x7);
+ public SetRemapComponentsDst SetRemapComponentsDstZ => (SetRemapComponentsDst)((SetRemapComponents >> 8) & 0x7);
+ public SetRemapComponentsDst SetRemapComponentsDstW => (SetRemapComponentsDst)((SetRemapComponents >> 12) & 0x7);
+ public SetRemapComponentsComponentSize SetRemapComponentsComponentSize => (SetRemapComponentsComponentSize)((SetRemapComponents >> 16) & 0x3);
+ public SetRemapComponentsNumComponents SetRemapComponentsNumSrcComponents => (SetRemapComponentsNumComponents)((SetRemapComponents >> 20) & 0x3);
+ public SetRemapComponentsNumComponents SetRemapComponentsNumDstComponents => (SetRemapComponentsNumComponents)((SetRemapComponents >> 24) & 0x3);
+ public uint SetDstBlockSize;
+ public SetBlockSizeWidth SetDstBlockSizeWidth => (SetBlockSizeWidth)((SetDstBlockSize >> 0) & 0xF);
+ public SetBlockSizeHeight SetDstBlockSizeHeight => (SetBlockSizeHeight)((SetDstBlockSize >> 4) & 0xF);
+ public SetBlockSizeDepth SetDstBlockSizeDepth => (SetBlockSizeDepth)((SetDstBlockSize >> 8) & 0xF);
+ public SetBlockSizeGobHeight SetDstBlockSizeGobHeight => (SetBlockSizeGobHeight)((SetDstBlockSize >> 12) & 0xF);
+ public uint SetDstWidth;
+ public uint SetDstHeight;
+ public uint SetDstDepth;
+ public uint SetDstLayer;
+ public uint SetDstOrigin;
+ public int SetDstOriginX => (int)((SetDstOrigin >> 0) & 0xFFFF);
+ public int SetDstOriginY => (int)((SetDstOrigin >> 16) & 0xFFFF);
+ public uint Reserved724;
+ public uint SetSrcBlockSize;
+ public SetBlockSizeWidth SetSrcBlockSizeWidth => (SetBlockSizeWidth)((SetSrcBlockSize >> 0) & 0xF);
+ public SetBlockSizeHeight SetSrcBlockSizeHeight => (SetBlockSizeHeight)((SetSrcBlockSize >> 4) & 0xF);
+ public SetBlockSizeDepth SetSrcBlockSizeDepth => (SetBlockSizeDepth)((SetSrcBlockSize >> 8) & 0xF);
+ public SetBlockSizeGobHeight SetSrcBlockSizeGobHeight => (SetBlockSizeGobHeight)((SetSrcBlockSize >> 12) & 0xF);
+ public uint SetSrcWidth;
+ public uint SetSrcHeight;
+ public uint SetSrcDepth;
+ public uint SetSrcLayer;
+ public uint SetSrcOrigin;
+ public int SetSrcOriginX => (int)((SetSrcOrigin >> 0) & 0xFFFF);
+ public int SetSrcOriginY => (int)((SetSrcOrigin >> 16) & 0xFFFF);
+ public fixed uint Reserved740[629];
+ public uint PmTriggerEnd;
+ public fixed uint Reserved1118[2490];
+#pragma warning restore CS0649
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaTexture.cs b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaTexture.cs
new file mode 100644
index 00000000..6873ff40
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaTexture.cs
@@ -0,0 +1,20 @@
+using Ryujinx.Graphics.Gpu.Engine.Types;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Dma
+{
+ ///
+ /// Buffer to texture copy parameters.
+ ///
+ struct DmaTexture
+ {
+#pragma warning disable CS0649
+ public MemoryLayout MemoryLayout;
+ public int Width;
+ public int Height;
+ public int Depth;
+ public int RegionZ;
+ public ushort RegionX;
+ public ushort RegionY;
+#pragma warning restore CS0649
+ }
+}
\ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/CompressedMethod.cs b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/CompressedMethod.cs
new file mode 100644
index 00000000..458dc8f6
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/CompressedMethod.cs
@@ -0,0 +1,41 @@
+// This file was auto-generated from NVIDIA official Maxwell definitions.
+
+namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
+{
+ enum TertOp
+ {
+ Grp0IncMethod = 0,
+ Grp0SetSubDevMask = 1,
+ Grp0StoreSubDevMask = 2,
+ Grp0UseSubDevMask = 3,
+ Grp2NonIncMethod = 0
+ }
+
+ enum SecOp
+ {
+ Grp0UseTert = 0,
+ IncMethod = 1,
+ Grp2UseTert = 2,
+ NonIncMethod = 3,
+ ImmdDataMethod = 4,
+ OneInc = 5,
+ Reserved6 = 6,
+ EndPbSegment = 7
+ }
+
+ struct CompressedMethod
+ {
+#pragma warning disable CS0649
+ public uint Method;
+#pragma warning restore CS0649
+ public int MethodAddressOld => (int)((Method >> 2) & 0x7FF);
+ public int MethodAddress => (int)((Method >> 0) & 0xFFF);
+ public int SubdeviceMask => (int)((Method >> 4) & 0xFFF);
+ public int MethodSubchannel => (int)((Method >> 13) & 0x7);
+ public TertOp TertOp => (TertOp)((Method >> 16) & 0x3);
+ public int MethodCountOld => (int)((Method >> 18) & 0x7FF);
+ public int MethodCount => (int)((Method >> 16) & 0x1FFF);
+ public int ImmdData => (int)((Method >> 16) & 0x1FFF);
+ public SecOp SecOp => (SecOp)((Method >> 29) & 0x7);
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPEntry.cs b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPEntry.cs
new file mode 100644
index 00000000..b1b236e7
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPEntry.cs
@@ -0,0 +1,55 @@
+// This file was auto-generated from NVIDIA official Maxwell definitions.
+
+namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
+{
+ enum Entry0Fetch
+ {
+ Unconditional = 0,
+ Conditional = 1,
+ }
+
+ enum Entry1Priv
+ {
+ User = 0,
+ Kernel = 1,
+ }
+
+ enum Entry1Level
+ {
+ Main = 0,
+ Subroutine = 1,
+ }
+
+ enum Entry1Sync
+ {
+ Proceed = 0,
+ Wait = 1,
+ }
+
+ enum Entry1Opcode
+ {
+ Nop = 0,
+ Illegal = 1,
+ Crc = 2,
+ PbCrc = 3,
+ }
+
+ struct GPEntry
+ {
+#pragma warning disable CS0649
+ public uint Entry0;
+#pragma warning restore CS0649
+ public Entry0Fetch Entry0Fetch => (Entry0Fetch)((Entry0 >> 0) & 0x1);
+ public int Entry0Get => (int)((Entry0 >> 2) & 0x3FFFFFFF);
+ public int Entry0Operand => (int)(Entry0);
+#pragma warning disable CS0649
+ public uint Entry1;
+#pragma warning restore CS0649
+ public int Entry1GetHi => (int)((Entry1 >> 0) & 0xFF);
+ public Entry1Priv Entry1Priv => (Entry1Priv)((Entry1 >> 8) & 0x1);
+ public Entry1Level Entry1Level => (Entry1Level)((Entry1 >> 9) & 0x1);
+ public int Entry1Length => (int)((Entry1 >> 10) & 0x1FFFFF);
+ public Entry1Sync Entry1Sync => (Entry1Sync)((Entry1 >> 31) & 0x1);
+ public Entry1Opcode Entry1Opcode => (Entry1Opcode)((Entry1 >> 0) & 0xFF);
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs
new file mode 100644
index 00000000..e80d98a1
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs
@@ -0,0 +1,248 @@
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.Gpu.Engine.MME;
+using System;
+using System.Collections.Generic;
+using System.Threading;
+
+namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
+{
+ ///
+ /// Represents a GPU General Purpose FIFO class.
+ ///
+ class GPFifoClass : IDeviceState
+ {
+ private readonly GpuContext _context;
+ private readonly GPFifoProcessor _parent;
+ private readonly DeviceState _state;
+
+ private int _previousSubChannel;
+ private bool _createSyncPending;
+
+ private const int MacrosCount = 0x80;
+
+ // Note: The size of the macro memory is unknown, we just make
+ // a guess here and use 256kb as the size. Increase if needed.
+ private const int MacroCodeSize = 256 * 256;
+
+ private readonly Macro[] _macros;
+ private readonly int[] _macroCode;
+
+ ///
+ /// Creates a new instance of the GPU General Purpose FIFO class.
+ ///
+ /// GPU context
+ /// Parent GPU General Purpose FIFO processor
+ public GPFifoClass(GpuContext context, GPFifoProcessor parent)
+ {
+ _context = context;
+ _parent = parent;
+ _state = new DeviceState(new Dictionary
+ {
+ { nameof(GPFifoClassState.Semaphored), new RwCallback(Semaphored, null) },
+ { nameof(GPFifoClassState.Syncpointb), new RwCallback(Syncpointb, null) },
+ { nameof(GPFifoClassState.WaitForIdle), new RwCallback(WaitForIdle, null) },
+ { nameof(GPFifoClassState.SetReference), new RwCallback(SetReference, null) },
+ { nameof(GPFifoClassState.LoadMmeInstructionRam), new RwCallback(LoadMmeInstructionRam, null) },
+ { nameof(GPFifoClassState.LoadMmeStartAddressRam), new RwCallback(LoadMmeStartAddressRam, null) },
+ { nameof(GPFifoClassState.SetMmeShadowRamControl), new RwCallback(SetMmeShadowRamControl, null) }
+ });
+
+ _macros = new Macro[MacrosCount];
+ _macroCode = new int[MacroCodeSize];
+ }
+
+ ///
+ /// Create any syncs from WaitForIdle command that are currently pending.
+ ///
+ public void CreatePendingSyncs()
+ {
+ if (_createSyncPending)
+ {
+ _createSyncPending = false;
+ _context.CreateHostSyncIfNeeded(false, false);
+ }
+ }
+
+ ///
+ /// Reads data from the class registers.
+ ///
+ /// Register byte offset
+ /// Data at the specified offset
+ public int Read(int offset) => _state.Read(offset);
+
+ ///
+ /// Writes data to the class registers.
+ ///
+ /// Register byte offset
+ /// Data to be written
+ public void Write(int offset, int data) => _state.Write(offset, data);
+
+ ///
+ /// Writes a GPU counter to guest memory.
+ ///
+ /// Method call argument
+ public void Semaphored(int argument)
+ {
+ ulong address = ((ulong)_state.State.SemaphorebOffsetLower << 2) |
+ ((ulong)_state.State.SemaphoreaOffsetUpper << 32);
+
+ int value = _state.State.SemaphorecPayload;
+
+ SemaphoredOperation operation = _state.State.SemaphoredOperation;
+
+ if (_state.State.SemaphoredReleaseSize == SemaphoredReleaseSize.SixteenBytes)
+ {
+ _parent.MemoryManager.Write(address + 4, 0);
+ _parent.MemoryManager.Write(address + 8, _context.GetTimestamp());
+ }
+
+ // TODO: Acquire operations (Wait), interrupts for invalid combinations.
+ if (operation == SemaphoredOperation.Release)
+ {
+ _parent.MemoryManager.Write(address, value);
+ }
+ else if (operation == SemaphoredOperation.Reduction)
+ {
+ bool signed = _state.State.SemaphoredFormat == SemaphoredFormat.Signed;
+
+ int mem = _parent.MemoryManager.Read(address);
+
+ switch (_state.State.SemaphoredReduction)
+ {
+ case SemaphoredReduction.Min:
+ value = signed ? Math.Min(mem, value) : (int)Math.Min((uint)mem, (uint)value);
+ break;
+ case SemaphoredReduction.Max:
+ value = signed ? Math.Max(mem, value) : (int)Math.Max((uint)mem, (uint)value);
+ break;
+ case SemaphoredReduction.Xor:
+ value ^= mem;
+ break;
+ case SemaphoredReduction.And:
+ value &= mem;
+ break;
+ case SemaphoredReduction.Or:
+ value |= mem;
+ break;
+ case SemaphoredReduction.Add:
+ value += mem;
+ break;
+ case SemaphoredReduction.Inc:
+ value = (uint)mem < (uint)value ? mem + 1 : 0;
+ break;
+ case SemaphoredReduction.Dec:
+ value = (uint)mem > 0 && (uint)mem <= (uint)value ? mem - 1 : value;
+ break;
+ }
+
+ _parent.MemoryManager.Write(address, value);
+ }
+ }
+
+ ///
+ /// Apply a fence operation on a syncpoint.
+ ///
+ /// Method call argument
+ public void Syncpointb(int argument)
+ {
+ SyncpointbOperation operation = _state.State.SyncpointbOperation;
+
+ uint syncpointId = (uint)_state.State.SyncpointbSyncptIndex;
+
+ if (operation == SyncpointbOperation.Wait)
+ {
+ uint threshold = (uint)_state.State.SyncpointaPayload;
+
+ _context.Synchronization.WaitOnSyncpoint(syncpointId, threshold, Timeout.InfiniteTimeSpan);
+ }
+ else if (operation == SyncpointbOperation.Incr)
+ {
+ _context.CreateHostSyncIfNeeded(true, true);
+ _context.Synchronization.IncrementSyncpoint(syncpointId);
+ }
+
+ _context.AdvanceSequence();
+ }
+
+ ///
+ /// Waits for the GPU to be idle.
+ ///
+ /// Method call argument
+ public void WaitForIdle(int argument)
+ {
+ _parent.PerformDeferredDraws();
+ _context.Renderer.Pipeline.Barrier();
+
+ _createSyncPending = true;
+ }
+
+ ///
+ /// Used as an indirect data barrier on NVN. When used, access to previously written data must be coherent.
+ ///
+ /// Method call argument
+ public void SetReference(int argument)
+ {
+ _context.Renderer.Pipeline.CommandBufferBarrier();
+
+ _context.CreateHostSyncIfNeeded(false, true);
+ }
+
+ ///
+ /// Sends macro code/data to the MME.
+ ///
+ /// Method call argument
+ public void LoadMmeInstructionRam(int argument)
+ {
+ _macroCode[_state.State.LoadMmeInstructionRamPointer++] = argument;
+ }
+
+ ///
+ /// Binds a macro index to a position for the MME
+ ///
+ /// Method call argument
+ public void LoadMmeStartAddressRam(int argument)
+ {
+ _macros[_state.State.LoadMmeStartAddressRamPointer++] = new Macro(argument);
+ }
+
+ ///
+ /// Changes the shadow RAM control.
+ ///
+ /// Method call argument
+ public void SetMmeShadowRamControl(int argument)
+ {
+ _parent.SetShadowRamControl(argument);
+ }
+
+ ///
+ /// Pushes an argument to a macro.
+ ///
+ /// Index of the macro
+ /// GPU virtual address where the command word is located
+ /// Argument to be pushed to the macro
+ public void MmePushArgument(int index, ulong gpuVa, int argument)
+ {
+ _macros[index].PushArgument(gpuVa, argument);
+ }
+
+ ///
+ /// Prepares a macro for execution.
+ ///
+ /// Index of the macro
+ /// Initial argument passed to the macro
+ public void MmeStart(int index, int argument)
+ {
+ _macros[index].StartExecution(_context, _parent, _macroCode, argument);
+ }
+
+ ///
+ /// Executes a macro.
+ ///
+ /// Index of the macro
+ /// Current GPU state
+ public void CallMme(int index, IDeviceState state)
+ {
+ _macros[index].Execute(_macroCode, state);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClassState.cs b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClassState.cs
new file mode 100644
index 00000000..07d062eb
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClassState.cs
@@ -0,0 +1,233 @@
+// This file was auto-generated from NVIDIA official Maxwell definitions.
+
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
+{
+ ///
+ /// Semaphore operation.
+ ///
+ enum SemaphoredOperation
+ {
+ Acquire = 1,
+ Release = 2,
+ AcqGeq = 4,
+ AcqAnd = 8,
+ Reduction = 16
+ }
+
+ ///
+ /// Semaphore acquire switch enable.
+ ///
+ enum SemaphoredAcquireSwitch
+ {
+ Disabled = 0,
+ Enabled = 1
+ }
+
+ ///
+ /// Semaphore release interrupt wait enable.
+ ///
+ enum SemaphoredReleaseWfi
+ {
+ En = 0,
+ Dis = 1
+ }
+
+ ///
+ /// Semaphore release structure size.
+ ///
+ enum SemaphoredReleaseSize
+ {
+ SixteenBytes = 0,
+ FourBytes = 1
+ }
+
+ ///
+ /// Semaphore reduction operation.
+ ///
+ enum SemaphoredReduction
+ {
+ Min = 0,
+ Max = 1,
+ Xor = 2,
+ And = 3,
+ Or = 4,
+ Add = 5,
+ Inc = 6,
+ Dec = 7
+ }
+
+ ///
+ /// Semaphore format.
+ ///
+ enum SemaphoredFormat
+ {
+ Signed = 0,
+ Unsigned = 1
+ }
+
+ ///
+ /// Memory Translation Lookaside Buffer Page Directory Buffer invalidation.
+ ///
+ enum MemOpCTlbInvalidatePdb
+ {
+ One = 0,
+ All = 1
+ }
+
+ ///
+ /// Memory Translation Lookaside Buffer GPC invalidation enable.
+ ///
+ enum MemOpCTlbInvalidateGpc
+ {
+ Enable = 0,
+ Disable = 1
+ }
+
+ ///
+ /// Memory Translation Lookaside Buffer invalidation target.
+ ///
+ enum MemOpCTlbInvalidateTarget
+ {
+ VidMem = 0,
+ SysMemCoherent = 2,
+ SysMemNoncoherent = 3
+ }
+
+ ///
+ /// Memory operation.
+ ///
+ enum MemOpDOperation
+ {
+ Membar = 5,
+ MmuTlbInvalidate = 9,
+ L2PeermemInvalidate = 13,
+ L2SysmemInvalidate = 14,
+ L2CleanComptags = 15,
+ L2FlushDirty = 16
+ }
+
+ ///
+ /// Syncpoint operation.
+ ///
+ enum SyncpointbOperation
+ {
+ Wait = 0,
+ Incr = 1
+ }
+
+ ///
+ /// Syncpoint wait switch enable.
+ ///
+ enum SyncpointbWaitSwitch
+ {
+ Dis = 0,
+ En = 1
+ }
+
+ ///
+ /// Wait for interrupt scope.
+ ///
+ enum WfiScope
+ {
+ CurrentScgType = 0,
+ All = 1
+ }
+
+ ///
+ /// Yield operation.
+ ///
+ enum YieldOp
+ {
+ Nop = 0,
+ PbdmaTimeslice = 1,
+ RunlistTimeslice = 2,
+ Tsg = 3
+ }
+
+ ///
+ /// General Purpose FIFO class state.
+ ///
+ struct GPFifoClassState
+ {
+#pragma warning disable CS0649
+ public uint SetObject;
+ public int SetObjectNvclass => (int)((SetObject >> 0) & 0xFFFF);
+ public int SetObjectEngine => (int)((SetObject >> 16) & 0x1F);
+ public uint Illegal;
+ public int IllegalHandle => (int)(Illegal);
+ public uint Nop;
+ public int NopHandle => (int)(Nop);
+ public uint Reserved0C;
+ public uint Semaphorea;
+ public int SemaphoreaOffsetUpper => (int)((Semaphorea >> 0) & 0xFF);
+ public uint Semaphoreb;
+ public int SemaphorebOffsetLower => (int)((Semaphoreb >> 2) & 0x3FFFFFFF);
+ public uint Semaphorec;
+ public int SemaphorecPayload => (int)(Semaphorec);
+ public uint Semaphored;
+ public SemaphoredOperation SemaphoredOperation => (SemaphoredOperation)((Semaphored >> 0) & 0x1F);
+ public SemaphoredAcquireSwitch SemaphoredAcquireSwitch => (SemaphoredAcquireSwitch)((Semaphored >> 12) & 0x1);
+ public SemaphoredReleaseWfi SemaphoredReleaseWfi => (SemaphoredReleaseWfi)((Semaphored >> 20) & 0x1);
+ public SemaphoredReleaseSize SemaphoredReleaseSize => (SemaphoredReleaseSize)((Semaphored >> 24) & 0x1);
+ public SemaphoredReduction SemaphoredReduction => (SemaphoredReduction)((Semaphored >> 27) & 0xF);
+ public SemaphoredFormat SemaphoredFormat => (SemaphoredFormat)((Semaphored >> 31) & 0x1);
+ public uint NonStallInterrupt;
+ public int NonStallInterruptHandle => (int)(NonStallInterrupt);
+ public uint FbFlush;
+ public int FbFlushHandle => (int)(FbFlush);
+ public uint Reserved28;
+ public uint Reserved2C;
+ public uint MemOpC;
+ public int MemOpCOperandLow => (int)((MemOpC >> 2) & 0x3FFFFFFF);
+ public MemOpCTlbInvalidatePdb MemOpCTlbInvalidatePdb => (MemOpCTlbInvalidatePdb)((MemOpC >> 0) & 0x1);
+ public MemOpCTlbInvalidateGpc MemOpCTlbInvalidateGpc => (MemOpCTlbInvalidateGpc)((MemOpC >> 1) & 0x1);
+ public MemOpCTlbInvalidateTarget MemOpCTlbInvalidateTarget => (MemOpCTlbInvalidateTarget)((MemOpC >> 10) & 0x3);
+ public int MemOpCTlbInvalidateAddrLo => (int)((MemOpC >> 12) & 0xFFFFF);
+ public uint MemOpD;
+ public int MemOpDOperandHigh => (int)((MemOpD >> 0) & 0xFF);
+ public MemOpDOperation MemOpDOperation => (MemOpDOperation)((MemOpD >> 27) & 0x1F);
+ public int MemOpDTlbInvalidateAddrHi => (int)((MemOpD >> 0) & 0xFF);
+ public uint Reserved38;
+ public uint Reserved3C;
+ public uint Reserved40;
+ public uint Reserved44;
+ public uint Reserved48;
+ public uint Reserved4C;
+ public uint SetReference;
+ public int SetReferenceCount => (int)(SetReference);
+ public uint Reserved54;
+ public uint Reserved58;
+ public uint Reserved5C;
+ public uint Reserved60;
+ public uint Reserved64;
+ public uint Reserved68;
+ public uint Reserved6C;
+ public uint Syncpointa;
+ public int SyncpointaPayload => (int)(Syncpointa);
+ public uint Syncpointb;
+ public SyncpointbOperation SyncpointbOperation => (SyncpointbOperation)((Syncpointb >> 0) & 0x1);
+ public SyncpointbWaitSwitch SyncpointbWaitSwitch => (SyncpointbWaitSwitch)((Syncpointb >> 4) & 0x1);
+ public int SyncpointbSyncptIndex => (int)((Syncpointb >> 8) & 0xFFF);
+ public uint Wfi;
+ public WfiScope WfiScope => (WfiScope)((Wfi >> 0) & 0x1);
+ public uint CrcCheck;
+ public int CrcCheckValue => (int)(CrcCheck);
+ public uint Yield;
+ public YieldOp YieldOp => (YieldOp)((Yield >> 0) & 0x3);
+ // TODO: Eventually move this to per-engine state.
+ public Array31 Reserved84;
+ public uint NoOperation;
+ public uint SetNotifyA;
+ public uint SetNotifyB;
+ public uint Notify;
+ public uint WaitForIdle;
+ public uint LoadMmeInstructionRamPointer;
+ public uint LoadMmeInstructionRam;
+ public uint LoadMmeStartAddressRamPointer;
+ public uint LoadMmeStartAddressRam;
+ public uint SetMmeShadowRamControl;
+#pragma warning restore CS0649
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs
new file mode 100644
index 00000000..cd29a9da
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs
@@ -0,0 +1,262 @@
+using Ryujinx.Graphics.Gpu.Memory;
+using System;
+using System.Collections.Concurrent;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Threading;
+
+namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
+{
+ ///
+ /// Represents a GPU General Purpose FIFO device.
+ ///
+ public sealed class GPFifoDevice : IDisposable
+ {
+ ///
+ /// Indicates if the command buffer has pre-fetch enabled.
+ ///
+ private enum CommandBufferType
+ {
+ Prefetch,
+ NoPrefetch
+ }
+
+ ///
+ /// Command buffer data.
+ ///
+ private struct CommandBuffer
+ {
+ ///
+ /// Processor used to process the command buffer. Contains channel state.
+ ///
+ public GPFifoProcessor Processor;
+
+ ///
+ /// The type of the command buffer.
+ ///
+ public CommandBufferType Type;
+
+ ///
+ /// Fetched data.
+ ///
+ public int[] Words;
+
+ ///
+ /// The GPFIFO entry address (used in mode).
+ ///
+ public ulong EntryAddress;
+
+ ///
+ /// The count of entries inside this GPFIFO entry.
+ ///
+ public uint EntryCount;
+
+ ///
+ /// Get the entries for the command buffer from memory.
+ ///
+ /// The memory manager used to fetch the data
+ /// If true, flushes potential GPU written data before reading the command buffer
+ /// The fetched data
+ private ReadOnlySpan GetWords(MemoryManager memoryManager, bool flush)
+ {
+ return MemoryMarshal.Cast(memoryManager.GetSpan(EntryAddress, (int)EntryCount * 4, flush));
+ }
+
+ ///
+ /// Prefetch the command buffer.
+ ///
+ /// The memory manager used to fetch the data
+ public void Prefetch(MemoryManager memoryManager)
+ {
+ Words = GetWords(memoryManager, true).ToArray();
+ }
+
+ ///
+ /// Fetch the command buffer.
+ ///
+ /// The memory manager used to fetch the data
+ /// If true, flushes potential GPU written data before reading the command buffer
+ /// The command buffer words
+ public ReadOnlySpan Fetch(MemoryManager memoryManager, bool flush)
+ {
+ return Words ?? GetWords(memoryManager, flush);
+ }
+ }
+
+ private readonly ConcurrentQueue _commandBufferQueue;
+
+ private CommandBuffer _currentCommandBuffer;
+ private GPFifoProcessor _prevChannelProcessor;
+
+ private readonly bool _ibEnable;
+ private readonly GpuContext _context;
+ private readonly AutoResetEvent _event;
+
+ private bool _interrupt;
+ private int _flushSkips;
+
+ ///
+ /// Creates a new instance of the GPU General Purpose FIFO device.
+ ///
+ /// GPU context that the GPFIFO belongs to
+ internal GPFifoDevice(GpuContext context)
+ {
+ _commandBufferQueue = new ConcurrentQueue();
+ _ibEnable = true;
+ _context = context;
+ _event = new AutoResetEvent(false);
+ }
+
+ ///
+ /// Signal the FIFO that there are new entries to process.
+ ///
+ public void SignalNewEntries()
+ {
+ _event.Set();
+ }
+
+ ///
+ /// Push a GPFIFO entry in the form of a prefetched command buffer.
+ /// It is intended to be used by nvservices to handle special cases.
+ ///
+ /// Processor used to process
+ /// The command buffer containing the prefetched commands
+ internal void PushHostCommandBuffer(GPFifoProcessor processor, int[] commandBuffer)
+ {
+ _commandBufferQueue.Enqueue(new CommandBuffer
+ {
+ Processor = processor,
+ Type = CommandBufferType.Prefetch,
+ Words = commandBuffer,
+ EntryAddress = ulong.MaxValue,
+ EntryCount = (uint)commandBuffer.Length
+ });
+ }
+
+ ///
+ /// Create a CommandBuffer from a GPFIFO entry.
+ ///
+ /// Processor used to process the command buffer pointed to by
+ /// The GPFIFO entry
+ /// A new CommandBuffer based on the GPFIFO entry
+ private static CommandBuffer CreateCommandBuffer(GPFifoProcessor processor, GPEntry entry)
+ {
+ CommandBufferType type = CommandBufferType.Prefetch;
+
+ if (entry.Entry1Sync == Entry1Sync.Wait)
+ {
+ type = CommandBufferType.NoPrefetch;
+ }
+
+ ulong startAddress = ((ulong)entry.Entry0Get << 2) | ((ulong)entry.Entry1GetHi << 32);
+
+ return new CommandBuffer
+ {
+ Processor = processor,
+ Type = type,
+ Words = null,
+ EntryAddress = startAddress,
+ EntryCount = (uint)entry.Entry1Length
+ };
+ }
+
+ ///
+ /// Pushes GPFIFO entries.
+ ///
+ /// Processor used to process the command buffers pointed to by
+ /// GPFIFO entries
+ internal void PushEntries(GPFifoProcessor processor, ReadOnlySpan entries)
+ {
+ bool beforeBarrier = true;
+
+ for (int index = 0; index < entries.Length; index++)
+ {
+ ulong entry = entries[index];
+
+ CommandBuffer commandBuffer = CreateCommandBuffer(processor, Unsafe.As(ref entry));
+
+ if (beforeBarrier && commandBuffer.Type == CommandBufferType.Prefetch)
+ {
+ commandBuffer.Prefetch(processor.MemoryManager);
+ }
+
+ if (commandBuffer.Type == CommandBufferType.NoPrefetch)
+ {
+ beforeBarrier = false;
+ }
+
+ _commandBufferQueue.Enqueue(commandBuffer);
+ }
+ }
+
+ ///
+ /// Waits until commands are pushed to the FIFO.
+ ///
+ /// True if commands were received, false if wait timed out
+ public bool WaitForCommands()
+ {
+ return !_commandBufferQueue.IsEmpty || (_event.WaitOne(8) && !_commandBufferQueue.IsEmpty);
+ }
+
+ ///
+ /// Processes commands pushed to the FIFO.
+ ///
+ public void DispatchCalls()
+ {
+ // Use this opportunity to also dispose any pending channels that were closed.
+ _context.RunDeferredActions();
+
+ // Process command buffers.
+ while (_ibEnable && !_interrupt && _commandBufferQueue.TryDequeue(out CommandBuffer entry))
+ {
+ bool flushCommandBuffer = true;
+
+ if (_flushSkips != 0)
+ {
+ _flushSkips--;
+ flushCommandBuffer = false;
+ }
+
+ _currentCommandBuffer = entry;
+ ReadOnlySpan words = entry.Fetch(entry.Processor.MemoryManager, flushCommandBuffer);
+
+ // If we are changing the current channel,
+ // we need to force all the host state to be updated.
+ if (_prevChannelProcessor != entry.Processor)
+ {
+ _prevChannelProcessor = entry.Processor;
+ entry.Processor.ForceAllDirty();
+ }
+
+ entry.Processor.Process(entry.EntryAddress, words);
+ }
+
+ _interrupt = false;
+ }
+
+ ///
+ /// Sets the number of flushes that should be skipped for subsequent command buffers.
+ ///
+ ///
+ /// This can improve performance when command buffer data only needs to be consumed by the GPU.
+ ///
+ /// The amount of flushes that should be skipped
+ internal void SetFlushSkips(int count)
+ {
+ _flushSkips = count;
+ }
+
+ ///
+ /// Interrupts command processing. This will break out of the DispatchCalls loop.
+ ///
+ public void Interrupt()
+ {
+ _interrupt = true;
+ }
+
+ ///
+ /// Disposes of resources used for GPFifo command processing.
+ ///
+ public void Dispose() => _event.Dispose();
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs
new file mode 100644
index 00000000..3fb3feee
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs
@@ -0,0 +1,331 @@
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.Gpu.Engine.Compute;
+using Ryujinx.Graphics.Gpu.Engine.Dma;
+using Ryujinx.Graphics.Gpu.Engine.InlineToMemory;
+using Ryujinx.Graphics.Gpu.Engine.Threed;
+using Ryujinx.Graphics.Gpu.Engine.Twod;
+using Ryujinx.Graphics.Gpu.Memory;
+using System;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
+{
+ ///
+ /// Represents a GPU General Purpose FIFO command processor.
+ ///
+ class GPFifoProcessor
+ {
+ private const int MacrosCount = 0x80;
+ private const int MacroIndexMask = MacrosCount - 1;
+
+ private const int LoadInlineDataMethodOffset = 0x6d;
+ private const int UniformBufferUpdateDataMethodOffset = 0x8e4;
+
+ private readonly GpuChannel _channel;
+
+ ///
+ /// Channel memory manager.
+ ///
+ public MemoryManager MemoryManager => _channel.MemoryManager;
+
+ ///
+ /// 3D Engine.
+ ///
+ public ThreedClass ThreedClass => _3dClass;
+
+ ///
+ /// Internal GPFIFO state.
+ ///
+ private struct DmaState
+ {
+ public int Method;
+ public int SubChannel;
+ public int MethodCount;
+ public bool NonIncrementing;
+ public bool IncrementOnce;
+ }
+
+ private DmaState _state;
+
+ private readonly ThreedClass _3dClass;
+ private readonly ComputeClass _computeClass;
+ private readonly InlineToMemoryClass _i2mClass;
+ private readonly TwodClass _2dClass;
+ private readonly DmaClass _dmaClass;
+
+ private readonly GPFifoClass _fifoClass;
+
+ ///
+ /// Creates a new instance of the GPU General Purpose FIFO command processor.
+ ///
+ /// GPU context
+ /// Channel that the GPFIFO processor belongs to
+ public GPFifoProcessor(GpuContext context, GpuChannel channel)
+ {
+ _channel = channel;
+
+ _fifoClass = new GPFifoClass(context, this);
+ _3dClass = new ThreedClass(context, channel, _fifoClass);
+ _computeClass = new ComputeClass(context, channel, _3dClass);
+ _i2mClass = new InlineToMemoryClass(context, channel);
+ _2dClass = new TwodClass(channel);
+ _dmaClass = new DmaClass(context, channel, _3dClass);
+ }
+
+ ///
+ /// Processes a command buffer.
+ ///
+ /// Base GPU virtual address of the command buffer
+ /// Command buffer
+ public void Process(ulong baseGpuVa, ReadOnlySpan commandBuffer)
+ {
+ for (int index = 0; index < commandBuffer.Length; index++)
+ {
+ int command = commandBuffer[index];
+
+ ulong gpuVa = baseGpuVa + (ulong)index * 4;
+
+ if (_state.MethodCount != 0)
+ {
+ if (TryFastI2mBufferUpdate(commandBuffer, ref index))
+ {
+ continue;
+ }
+
+ Send(gpuVa, _state.Method, command, _state.SubChannel, _state.MethodCount <= 1);
+
+ if (!_state.NonIncrementing)
+ {
+ _state.Method++;
+ }
+
+ if (_state.IncrementOnce)
+ {
+ _state.NonIncrementing = true;
+ }
+
+ _state.MethodCount--;
+ }
+ else
+ {
+ CompressedMethod meth = Unsafe.As(ref command);
+
+ if (TryFastUniformBufferUpdate(meth, commandBuffer, index))
+ {
+ index += meth.MethodCount;
+ continue;
+ }
+
+ switch (meth.SecOp)
+ {
+ case SecOp.IncMethod:
+ case SecOp.NonIncMethod:
+ case SecOp.OneInc:
+ _state.Method = meth.MethodAddress;
+ _state.SubChannel = meth.MethodSubchannel;
+ _state.MethodCount = meth.MethodCount;
+ _state.IncrementOnce = meth.SecOp == SecOp.OneInc;
+ _state.NonIncrementing = meth.SecOp == SecOp.NonIncMethod;
+ break;
+ case SecOp.ImmdDataMethod:
+ Send(gpuVa, meth.MethodAddress, meth.ImmdData, meth.MethodSubchannel, true);
+ break;
+ }
+ }
+ }
+
+ _3dClass.FlushUboDirty();
+ }
+
+ ///
+ /// Tries to perform a fast Inline-to-Memory data update.
+ /// If successful, all data will be copied at once, and
+ /// command buffer entries will be consumed.
+ ///
+ /// Command buffer where the data is contained
+ /// Offset at where the data is located, auto-incremented on success
+ /// True if the fast copy was successful, false otherwise
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private bool TryFastI2mBufferUpdate(ReadOnlySpan commandBuffer, ref int offset)
+ {
+ if (_state.Method == LoadInlineDataMethodOffset && _state.NonIncrementing && _state.SubChannel <= 2)
+ {
+ int availableCount = commandBuffer.Length - offset;
+ int consumeCount = Math.Min(_state.MethodCount, availableCount);
+
+ var data = commandBuffer.Slice(offset, consumeCount);
+
+ if (_state.SubChannel == 0)
+ {
+ _3dClass.LoadInlineData(data);
+ }
+ else if (_state.SubChannel == 1)
+ {
+ _computeClass.LoadInlineData(data);
+ }
+ else /* if (_state.SubChannel == 2) */
+ {
+ _i2mClass.LoadInlineData(data);
+ }
+
+ offset += consumeCount - 1;
+ _state.MethodCount -= consumeCount;
+
+ return true;
+ }
+
+ return false;
+ }
+
+ ///
+ /// Tries to perform a fast constant buffer data update.
+ /// If successful, all data will be copied at once, and + 1
+ /// command buffer entries will be consumed.
+ ///
+ /// Compressed method to be checked
+ /// Command buffer where is contained
+ /// Offset at where is located
+ /// True if the fast copy was successful, false otherwise
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private bool TryFastUniformBufferUpdate(CompressedMethod meth, ReadOnlySpan commandBuffer, int offset)
+ {
+ int availableCount = commandBuffer.Length - offset;
+
+ if (meth.MethodAddress == UniformBufferUpdateDataMethodOffset &&
+ meth.MethodCount < availableCount &&
+ meth.SecOp == SecOp.NonIncMethod)
+ {
+ _3dClass.ConstantBufferUpdate(commandBuffer.Slice(offset + 1, meth.MethodCount));
+
+ return true;
+ }
+
+ return false;
+ }
+
+ ///
+ /// Sends a uncompressed method for processing by the graphics pipeline.
+ ///
+ /// GPU virtual address where the command word is located
+ /// Method to be processed
+ private void Send(ulong gpuVa, int offset, int argument, int subChannel, bool isLastCall)
+ {
+ if (offset < 0x60)
+ {
+ _fifoClass.Write(offset * 4, argument);
+ }
+ else if (offset < 0xe00)
+ {
+ offset *= 4;
+
+ switch (subChannel)
+ {
+ case 0:
+ _3dClass.Write(offset, argument);
+ break;
+ case 1:
+ _computeClass.Write(offset, argument);
+ break;
+ case 2:
+ _i2mClass.Write(offset, argument);
+ break;
+ case 3:
+ _2dClass.Write(offset, argument);
+ break;
+ case 4:
+ _dmaClass.Write(offset, argument);
+ break;
+ }
+ }
+ else
+ {
+ IDeviceState state = subChannel switch
+ {
+ 0 => _3dClass,
+ 3 => _2dClass,
+ _ => null
+ };
+
+ if (state != null)
+ {
+ int macroIndex = (offset >> 1) & MacroIndexMask;
+
+ if ((offset & 1) != 0)
+ {
+ _fifoClass.MmePushArgument(macroIndex, gpuVa, argument);
+ }
+ else
+ {
+ _fifoClass.MmeStart(macroIndex, argument);
+ }
+
+ if (isLastCall)
+ {
+ _fifoClass.CallMme(macroIndex, state);
+
+ _3dClass.PerformDeferredDraws();
+ }
+ }
+ }
+ }
+
+ ///
+ /// Writes data directly to the state of the specified class.
+ ///
+ /// ID of the class to write the data into
+ /// State offset in bytes
+ /// Value to be written
+ public void Write(ClassId classId, int offset, int value)
+ {
+ switch (classId)
+ {
+ case ClassId.Threed:
+ _3dClass.Write(offset, value);
+ break;
+ case ClassId.Compute:
+ _computeClass.Write(offset, value);
+ break;
+ case ClassId.InlineToMemory:
+ _i2mClass.Write(offset, value);
+ break;
+ case ClassId.Twod:
+ _2dClass.Write(offset, value);
+ break;
+ case ClassId.Dma:
+ _dmaClass.Write(offset, value);
+ break;
+ case ClassId.GPFifo:
+ _fifoClass.Write(offset, value);
+ break;
+ }
+ }
+
+ ///
+ /// Sets the shadow ram control value of all sub-channels.
+ ///
+ /// New shadow ram control value
+ public void SetShadowRamControl(int control)
+ {
+ _3dClass.SetShadowRamControl(control);
+ }
+
+ ///
+ /// Forces a full host state update by marking all state as modified,
+ /// and also requests all GPU resources in use to be rebound.
+ ///
+ public void ForceAllDirty()
+ {
+ _3dClass.ForceStateDirty();
+ _channel.BufferManager.Rebind();
+ _channel.TextureManager.Rebind();
+ }
+
+ ///
+ /// Perform any deferred draws.
+ ///
+ public void PerformDeferredDraws()
+ {
+ _3dClass.PerformDeferredDraws();
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs
new file mode 100644
index 00000000..e1d7e940
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs
@@ -0,0 +1,273 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.Texture;
+using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+
+namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory
+{
+ ///
+ /// Represents a Inline-to-Memory engine class.
+ ///
+ class InlineToMemoryClass : IDeviceState
+ {
+ private readonly GpuContext _context;
+ private readonly GpuChannel _channel;
+ private readonly DeviceState _state;
+
+ private bool _isLinear;
+
+ private int _offset;
+ private int _size;
+
+ private ulong _dstGpuVa;
+ private int _dstX;
+ private int _dstY;
+ private int _dstWidth;
+ private int _dstHeight;
+ private int _dstStride;
+ private int _dstGobBlocksInY;
+ private int _dstGobBlocksInZ;
+ private int _lineLengthIn;
+ private int _lineCount;
+
+ private bool _finished;
+
+ private int[] _buffer;
+
+ ///
+ /// Creates a new instance of the Inline-to-Memory engine class.
+ ///
+ /// GPU context
+ /// GPU channel
+ /// Indicates if the internal state should be initialized. Set to false if part of another engine
+ public InlineToMemoryClass(GpuContext context, GpuChannel channel, bool initializeState)
+ {
+ _context = context;
+ _channel = channel;
+
+ if (initializeState)
+ {
+ _state = new DeviceState(new Dictionary
+ {
+ { nameof(InlineToMemoryClassState.LaunchDma), new RwCallback(LaunchDma, null) },
+ { nameof(InlineToMemoryClassState.LoadInlineData), new RwCallback(LoadInlineData, null) }
+ });
+ }
+ }
+
+ ///
+ /// Creates a new instance of the inline-to-memory engine class.
+ ///
+ /// GPU context
+ /// GPU channel
+ public InlineToMemoryClass(GpuContext context, GpuChannel channel) : this(context, channel, true)
+ {
+ }
+
+ ///
+ /// Reads data from the class registers.
+ ///
+ /// Register byte offset
+ /// Data at the specified offset
+ public int Read(int offset) => _state.Read(offset);
+
+ ///
+ /// Writes data to the class registers.
+ ///
+ /// Register byte offset
+ /// Data to be written
+ public void Write(int offset, int data) => _state.Write(offset, data);
+
+ ///
+ /// Launches Inline-to-Memory engine DMA copy.
+ ///
+ /// Method call argument
+ private void LaunchDma(int argument)
+ {
+ LaunchDma(ref _state.State, argument);
+ }
+
+ ///
+ /// Launches Inline-to-Memory engine DMA copy.
+ ///
+ /// Current class state
+ /// Method call argument
+ public void LaunchDma(ref InlineToMemoryClassState state, int argument)
+ {
+ _isLinear = (argument & 1) != 0;
+
+ _offset = 0;
+ _size = (int)(BitUtils.AlignUp(state.LineLengthIn, 4) * state.LineCount);
+
+ int count = _size / 4;
+
+ if (_buffer == null || _buffer.Length < count)
+ {
+ _buffer = new int[count];
+ }
+
+ ulong dstGpuVa = ((ulong)state.OffsetOutUpperValue << 32) | state.OffsetOut;
+
+ _dstGpuVa = dstGpuVa;
+ _dstX = state.SetDstOriginBytesXV;
+ _dstY = state.SetDstOriginSamplesYV;
+ _dstWidth = (int)state.SetDstWidth;
+ _dstHeight = (int)state.SetDstHeight;
+ _dstStride = (int)state.PitchOut;
+ _dstGobBlocksInY = 1 << (int)state.SetDstBlockSizeHeight;
+ _dstGobBlocksInZ = 1 << (int)state.SetDstBlockSizeDepth;
+ _lineLengthIn = (int)state.LineLengthIn;
+ _lineCount = (int)state.LineCount;
+
+ _finished = false;
+ }
+
+ ///
+ /// Pushes a block of data to the Inline-to-Memory engine.
+ ///
+ /// Data to push
+ public void LoadInlineData(ReadOnlySpan data)
+ {
+ if (!_finished)
+ {
+ int copySize = Math.Min(data.Length, _buffer.Length - _offset);
+ data.Slice(0, copySize).CopyTo(new Span(_buffer).Slice(_offset, copySize));
+
+ _offset += copySize;
+
+ if (_offset * 4 >= _size)
+ {
+ FinishTransfer();
+ }
+ }
+ }
+
+ ///
+ /// Pushes a word of data to the Inline-to-Memory engine.
+ ///
+ /// Method call argument
+ public void LoadInlineData(int argument)
+ {
+ if (!_finished)
+ {
+ _buffer[_offset++] = argument;
+
+ if (_offset * 4 >= _size)
+ {
+ FinishTransfer();
+ }
+ }
+ }
+
+ ///
+ /// Performs actual copy of the inline data after the transfer is finished.
+ ///
+ private void FinishTransfer()
+ {
+ var memoryManager = _channel.MemoryManager;
+
+ var data = MemoryMarshal.Cast(_buffer).Slice(0, _size);
+
+ if (_isLinear && _lineCount == 1)
+ {
+ memoryManager.WriteTrackedResource(_dstGpuVa, data.Slice(0, _lineLengthIn));
+ _context.AdvanceSequence();
+ }
+ else
+ {
+ // TODO: Verify if the destination X/Y and width/height are taken into account
+ // for linear texture transfers. If not, we can use the fast path for that aswell.
+ // Right now the copy code at the bottom assumes that it is used on both which might be incorrect.
+ if (!_isLinear)
+ {
+ var target = memoryManager.Physical.TextureCache.FindTexture(
+ memoryManager,
+ _dstGpuVa,
+ 1,
+ _dstStride,
+ _dstHeight,
+ _lineLengthIn,
+ _lineCount,
+ _isLinear,
+ _dstGobBlocksInY,
+ _dstGobBlocksInZ);
+
+ if (target != null)
+ {
+ target.SynchronizeMemory();
+ target.SetData(data, 0, 0, new GAL.Rectangle(_dstX, _dstY, _lineLengthIn / target.Info.FormatInfo.BytesPerPixel, _lineCount));
+ target.SignalModified();
+
+ return;
+ }
+ }
+
+ var dstCalculator = new OffsetCalculator(
+ _dstWidth,
+ _dstHeight,
+ _dstStride,
+ _isLinear,
+ _dstGobBlocksInY,
+ 1);
+
+ int srcOffset = 0;
+
+ for (int y = _dstY; y < _dstY + _lineCount; y++)
+ {
+ int x1 = _dstX;
+ int x2 = _dstX + _lineLengthIn;
+ int x1Round = BitUtils.AlignUp(_dstX, 16);
+ int x2Trunc = BitUtils.AlignDown(x2, 16);
+
+ int x = x1;
+
+ if (x1Round <= x2)
+ {
+ for (; x < x1Round; x++, srcOffset++)
+ {
+ int dstOffset = dstCalculator.GetOffset(x, y);
+
+ ulong dstAddress = _dstGpuVa + (uint)dstOffset;
+
+ memoryManager.Write(dstAddress, data[srcOffset]);
+ }
+ }
+
+ for (; x < x2Trunc; x += 16, srcOffset += 16)
+ {
+ int dstOffset = dstCalculator.GetOffset(x, y);
+
+ ulong dstAddress = _dstGpuVa + (uint)dstOffset;
+
+ memoryManager.Write(dstAddress, MemoryMarshal.Cast>(data.Slice(srcOffset, 16))[0]);
+ }
+
+ for (; x < x2; x++, srcOffset++)
+ {
+ int dstOffset = dstCalculator.GetOffset(x, y);
+
+ ulong dstAddress = _dstGpuVa + (uint)dstOffset;
+
+ memoryManager.Write(dstAddress, data[srcOffset]);
+ }
+
+ // All lines must be aligned to 4 bytes, as the data is pushed one word at a time.
+ // If our copy length is not a multiple of 4, then we need to skip the padding bytes here.
+ int misalignment = _lineLengthIn & 3;
+
+ if (misalignment != 0)
+ {
+ srcOffset += 4 - misalignment;
+ }
+ }
+
+ _context.AdvanceSequence();
+ }
+
+ _finished = true;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs b/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs
new file mode 100644
index 00000000..d0c82a5e
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs
@@ -0,0 +1,181 @@
+// This file was auto-generated from NVIDIA official Maxwell definitions.
+
+namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory
+{
+ ///
+ /// Notify type.
+ ///
+ enum NotifyType
+ {
+ WriteOnly = 0,
+ WriteThenAwaken = 1,
+ }
+
+ ///
+ /// Width in GOBs of the destination texture.
+ ///
+ enum SetDstBlockSizeWidth
+ {
+ OneGob = 0,
+ }
+
+ ///
+ /// Height in GOBs of the destination texture.
+ ///
+ enum SetDstBlockSizeHeight
+ {
+ OneGob = 0,
+ TwoGobs = 1,
+ FourGobs = 2,
+ EightGobs = 3,
+ SixteenGobs = 4,
+ ThirtytwoGobs = 5,
+ }
+
+ ///
+ /// Depth in GOBs of the destination texture.
+ ///
+ enum SetDstBlockSizeDepth
+ {
+ OneGob = 0,
+ TwoGobs = 1,
+ FourGobs = 2,
+ EightGobs = 3,
+ SixteenGobs = 4,
+ ThirtytwoGobs = 5,
+ }
+
+ ///
+ /// Memory layout of the destination texture.
+ ///
+ enum LaunchDmaDstMemoryLayout
+ {
+ Blocklinear = 0,
+ Pitch = 1,
+ }
+
+ ///
+ /// DMA completion type.
+ ///
+ enum LaunchDmaCompletionType
+ {
+ FlushDisable = 0,
+ FlushOnly = 1,
+ ReleaseSemaphore = 2,
+ }
+
+ ///
+ /// DMA interrupt type.
+ ///
+ enum LaunchDmaInterruptType
+ {
+ None = 0,
+ Interrupt = 1,
+ }
+
+ ///
+ /// DMA semaphore structure size.
+ ///
+ enum LaunchDmaSemaphoreStructSize
+ {
+ FourWords = 0,
+ OneWord = 1,
+ }
+
+ ///
+ /// DMA semaphore reduction operation.
+ ///
+ enum LaunchDmaReductionOp
+ {
+ RedAdd = 0,
+ RedMin = 1,
+ RedMax = 2,
+ RedInc = 3,
+ RedDec = 4,
+ RedAnd = 5,
+ RedOr = 6,
+ RedXor = 7,
+ }
+
+ ///
+ /// DMA semaphore reduction format.
+ ///
+ enum LaunchDmaReductionFormat
+ {
+ Unsigned32 = 0,
+ Signed32 = 1,
+ }
+
+ ///
+ /// Inline-to-Memory class state.
+ ///
+ unsafe struct InlineToMemoryClassState
+ {
+#pragma warning disable CS0649
+ public uint SetObject;
+ public int SetObjectClassId => (int)((SetObject >> 0) & 0xFFFF);
+ public int SetObjectEngineId => (int)((SetObject >> 16) & 0x1F);
+ public fixed uint Reserved04[63];
+ public uint NoOperation;
+ public uint SetNotifyA;
+ public int SetNotifyAAddressUpper => (int)((SetNotifyA >> 0) & 0xFF);
+ public uint SetNotifyB;
+ public uint Notify;
+ public NotifyType NotifyType => (NotifyType)(Notify);
+ public uint WaitForIdle;
+ public fixed uint Reserved114[7];
+ public uint SetGlobalRenderEnableA;
+ public int SetGlobalRenderEnableAOffsetUpper => (int)((SetGlobalRenderEnableA >> 0) & 0xFF);
+ public uint SetGlobalRenderEnableB;
+ public uint SetGlobalRenderEnableC;
+ public int SetGlobalRenderEnableCMode => (int)((SetGlobalRenderEnableC >> 0) & 0x7);
+ public uint SendGoIdle;
+ public uint PmTrigger;
+ public uint PmTriggerWfi;
+ public fixed uint Reserved148[2];
+ public uint SetInstrumentationMethodHeader;
+ public uint SetInstrumentationMethodData;
+ public fixed uint Reserved158[10];
+ public uint LineLengthIn;
+ public uint LineCount;
+ public uint OffsetOutUpper;
+ public int OffsetOutUpperValue => (int)((OffsetOutUpper >> 0) & 0xFF);
+ public uint OffsetOut;
+ public uint PitchOut;
+ public uint SetDstBlockSize;
+ public SetDstBlockSizeWidth SetDstBlockSizeWidth => (SetDstBlockSizeWidth)((SetDstBlockSize >> 0) & 0xF);
+ public SetDstBlockSizeHeight SetDstBlockSizeHeight => (SetDstBlockSizeHeight)((SetDstBlockSize >> 4) & 0xF);
+ public SetDstBlockSizeDepth SetDstBlockSizeDepth => (SetDstBlockSizeDepth)((SetDstBlockSize >> 8) & 0xF);
+ public uint SetDstWidth;
+ public uint SetDstHeight;
+ public uint SetDstDepth;
+ public uint SetDstLayer;
+ public uint SetDstOriginBytesX;
+ public int SetDstOriginBytesXV => (int)((SetDstOriginBytesX >> 0) & 0xFFFFF);
+ public uint SetDstOriginSamplesY;
+ public int SetDstOriginSamplesYV => (int)((SetDstOriginSamplesY >> 0) & 0xFFFF);
+ public uint LaunchDma;
+ public LaunchDmaDstMemoryLayout LaunchDmaDstMemoryLayout => (LaunchDmaDstMemoryLayout)((LaunchDma >> 0) & 0x1);
+ public LaunchDmaCompletionType LaunchDmaCompletionType => (LaunchDmaCompletionType)((LaunchDma >> 4) & 0x3);
+ public LaunchDmaInterruptType LaunchDmaInterruptType => (LaunchDmaInterruptType)((LaunchDma >> 8) & 0x3);
+ public LaunchDmaSemaphoreStructSize LaunchDmaSemaphoreStructSize => (LaunchDmaSemaphoreStructSize)((LaunchDma >> 12) & 0x1);
+ public bool LaunchDmaReductionEnable => (LaunchDma & 0x2) != 0;
+ public LaunchDmaReductionOp LaunchDmaReductionOp => (LaunchDmaReductionOp)((LaunchDma >> 13) & 0x7);
+ public LaunchDmaReductionFormat LaunchDmaReductionFormat => (LaunchDmaReductionFormat)((LaunchDma >> 2) & 0x3);
+ public bool LaunchDmaSysmembarDisable => (LaunchDma & 0x40) != 0;
+ public uint LoadInlineData;
+ public fixed uint Reserved1B8[9];
+ public uint SetI2mSemaphoreA;
+ public int SetI2mSemaphoreAOffsetUpper => (int)((SetI2mSemaphoreA >> 0) & 0xFF);
+ public uint SetI2mSemaphoreB;
+ public uint SetI2mSemaphoreC;
+ public fixed uint Reserved1E8[2];
+ public uint SetI2mSpareNoop00;
+ public uint SetI2mSpareNoop01;
+ public uint SetI2mSpareNoop02;
+ public uint SetI2mSpareNoop03;
+ public fixed uint Reserved200[3200];
+ public MmeShadowScratch SetMmeShadowScratch;
+#pragma warning restore CS0649
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/AluOperation.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/AluOperation.cs
new file mode 100644
index 00000000..eeef9c67
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/AluOperation.cs
@@ -0,0 +1,15 @@
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ ///
+ /// GPU Macro Arithmetic and Logic unit operation.
+ ///
+ enum AluOperation
+ {
+ AluReg = 0,
+ AddImmediate = 1,
+ BitfieldReplace = 2,
+ BitfieldExtractLslImm = 3,
+ BitfieldExtractLslReg = 4,
+ ReadImmediate = 5
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/AluRegOperation.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/AluRegOperation.cs
new file mode 100644
index 00000000..f3e05d38
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/AluRegOperation.cs
@@ -0,0 +1,18 @@
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ ///
+ /// GPU Macro Arithmetic and Logic unit binary register-to-register operation.
+ ///
+ enum AluRegOperation
+ {
+ Add = 0,
+ AddWithCarry = 1,
+ Subtract = 2,
+ SubtractWithBorrow = 3,
+ BitwiseExclusiveOr = 8,
+ BitwiseOr = 9,
+ BitwiseAnd = 10,
+ BitwiseAndNot = 11,
+ BitwiseNotAnd = 12
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/AssignmentOperation.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/AssignmentOperation.cs
new file mode 100644
index 00000000..dc336026
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/AssignmentOperation.cs
@@ -0,0 +1,17 @@
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ ///
+ /// GPU Macro assignment operation.
+ ///
+ enum AssignmentOperation
+ {
+ IgnoreAndFetch = 0,
+ Move = 1,
+ MoveAndSetMaddr = 2,
+ FetchAndSend = 3,
+ MoveAndSend = 4,
+ FetchAndSetMaddr = 5,
+ MoveAndSetMaddrThenFetchAndSend = 6,
+ MoveAndSetMaddrThenSendHigh = 7
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs
new file mode 100644
index 00000000..117961db
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs
@@ -0,0 +1,52 @@
+using Ryujinx.Graphics.Device;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ ///
+ /// FIFO word.
+ ///
+ readonly struct FifoWord
+ {
+ ///
+ /// GPU virtual address where the word is located in memory.
+ ///
+ public ulong GpuVa { get; }
+
+ ///
+ /// Word value.
+ ///
+ public int Word { get; }
+
+ ///
+ /// Creates a new FIFO word.
+ ///
+ /// GPU virtual address where the word is located in memory
+ /// Word value
+ public FifoWord(ulong gpuVa, int word)
+ {
+ GpuVa = gpuVa;
+ Word = word;
+ }
+ }
+
+ ///
+ /// Macro Execution Engine interface.
+ ///
+ interface IMacroEE
+ {
+ ///
+ /// Arguments FIFO.
+ ///
+ Queue Fifo { get; }
+
+ ///
+ /// Should execute the GPU Macro code being passed.
+ ///
+ /// Code to be executed
+ /// GPU state at the time of the call
+ /// First argument to be passed to the GPU Macro
+ void Execute(ReadOnlySpan code, IDeviceState state, int arg0);
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs
new file mode 100644
index 00000000..12a3ac02
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs
@@ -0,0 +1,101 @@
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.Gpu.Engine.GPFifo;
+using System;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ ///
+ /// GPU macro program.
+ ///
+ struct Macro
+ {
+ ///
+ /// Word offset of the code on the code memory.
+ ///
+ public int Position { get; }
+
+ private IMacroEE _executionEngine;
+ private bool _executionPending;
+ private int _argument;
+ private MacroHLEFunctionName _hleFunction;
+
+ ///
+ /// Creates a new instance of the GPU cached macro program.
+ ///
+ /// Macro code start position
+ public Macro(int position)
+ {
+ Position = position;
+
+ _executionEngine = null;
+ _executionPending = false;
+ _argument = 0;
+ _hleFunction = MacroHLEFunctionName.None;
+ }
+
+ ///
+ /// Sets the first argument for the macro call.
+ ///
+ /// GPU context where the macro code is being executed
+ /// GPU GP FIFO command processor
+ /// Code to be executed
+ /// First argument
+ public void StartExecution(GpuContext context, GPFifoProcessor processor, ReadOnlySpan code, int argument)
+ {
+ _argument = argument;
+
+ _executionPending = true;
+
+ if (_executionEngine == null)
+ {
+ if (GraphicsConfig.EnableMacroHLE && MacroHLETable.TryGetMacroHLEFunction(code.Slice(Position), context.Capabilities, out _hleFunction))
+ {
+ _executionEngine = new MacroHLE(processor, _hleFunction);
+ }
+ else if (GraphicsConfig.EnableMacroJit)
+ {
+ _executionEngine = new MacroJit();
+ }
+ else
+ {
+ _executionEngine = new MacroInterpreter();
+ }
+ }
+
+ // We don't consume the parameter buffer value, so we don't need to flush it.
+ // Doing so improves performance if the value was written by a GPU shader.
+ if (_hleFunction == MacroHLEFunctionName.DrawElementsIndirect)
+ {
+ context.GPFifo.SetFlushSkips(1);
+ }
+ else if (_hleFunction == MacroHLEFunctionName.MultiDrawElementsIndirectCount)
+ {
+ context.GPFifo.SetFlushSkips(2);
+ }
+ }
+
+ ///
+ /// Starts executing the macro program code.
+ ///
+ /// Program code
+ /// Current GPU state
+ public void Execute(ReadOnlySpan code, IDeviceState state)
+ {
+ if (_executionPending)
+ {
+ _executionPending = false;
+ _executionEngine?.Execute(code.Slice(Position), state, _argument);
+ }
+ }
+
+ ///
+ /// Pushes an argument to the macro call argument FIFO.
+ ///
+ /// GPU virtual address where the command word is located
+ /// Argument to be pushed
+ public void PushArgument(ulong gpuVa, int argument)
+ {
+ _executionEngine?.Fifo.Enqueue(new FifoWord(gpuVa, argument));
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs
new file mode 100644
index 00000000..8630bbc4
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs
@@ -0,0 +1,341 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Engine.GPFifo;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ ///
+ /// Macro High-level emulation.
+ ///
+ class MacroHLE : IMacroEE
+ {
+ private const int ColorLayerCountOffset = 0x818;
+ private const int ColorStructSize = 0x40;
+ private const int ZetaLayerCountOffset = 0x1230;
+
+ private const int IndirectDataEntrySize = 0x10;
+ private const int IndirectIndexedDataEntrySize = 0x14;
+
+ private readonly GPFifoProcessor _processor;
+ private readonly MacroHLEFunctionName _functionName;
+
+ ///
+ /// Arguments FIFO.
+ ///
+ public Queue Fifo { get; }
+
+ ///
+ /// Creates a new instance of the HLE macro handler.
+ ///
+ /// GPU GP FIFO command processor
+ /// Name of the HLE macro function to be called
+ public MacroHLE(GPFifoProcessor processor, MacroHLEFunctionName functionName)
+ {
+ _processor = processor;
+ _functionName = functionName;
+
+ Fifo = new Queue();
+ }
+
+ ///
+ /// Executes a macro program until it exits.
+ ///
+ /// Code of the program to execute
+ /// GPU state at the time of the call
+ /// Optional argument passed to the program, 0 if not used
+ public void Execute(ReadOnlySpan code, IDeviceState state, int arg0)
+ {
+ switch (_functionName)
+ {
+ case MacroHLEFunctionName.ClearColor:
+ ClearColor(state, arg0);
+ break;
+ case MacroHLEFunctionName.ClearDepthStencil:
+ ClearDepthStencil(state, arg0);
+ break;
+ case MacroHLEFunctionName.DrawArraysInstanced:
+ DrawArraysInstanced(state, arg0);
+ break;
+ case MacroHLEFunctionName.DrawElementsInstanced:
+ DrawElementsInstanced(state, arg0);
+ break;
+ case MacroHLEFunctionName.DrawElementsIndirect:
+ DrawElementsIndirect(state, arg0);
+ break;
+ case MacroHLEFunctionName.MultiDrawElementsIndirectCount:
+ MultiDrawElementsIndirectCount(state, arg0);
+ break;
+ default:
+ throw new NotImplementedException(_functionName.ToString());
+ }
+
+ // It should be empty at this point, but clear it just to be safe.
+ Fifo.Clear();
+ }
+
+ ///
+ /// Clears one bound color target.
+ ///
+ /// GPU state at the time of the call
+ /// First argument of the call
+ private void ClearColor(IDeviceState state, int arg0)
+ {
+ int index = (arg0 >> 6) & 0xf;
+ int layerCount = state.Read(ColorLayerCountOffset + index * ColorStructSize);
+
+ _processor.ThreedClass.Clear(arg0, layerCount);
+ }
+
+ ///
+ /// Clears the current depth-stencil target.
+ ///
+ /// GPU state at the time of the call
+ /// First argument of the call
+ private void ClearDepthStencil(IDeviceState state, int arg0)
+ {
+ int layerCount = state.Read(ZetaLayerCountOffset);
+
+ _processor.ThreedClass.Clear(arg0, layerCount);
+ }
+
+ ///
+ /// Performs a draw.
+ ///
+ /// GPU state at the time of the call
+ /// First argument of the call
+ private void DrawArraysInstanced(IDeviceState state, int arg0)
+ {
+ var topology = (PrimitiveTopology)arg0;
+
+ var count = FetchParam();
+ var instanceCount = FetchParam();
+ var firstVertex = FetchParam();
+ var firstInstance = FetchParam();
+
+ if (ShouldSkipDraw(state, instanceCount.Word))
+ {
+ return;
+ }
+
+ _processor.ThreedClass.Draw(
+ topology,
+ count.Word,
+ instanceCount.Word,
+ 0,
+ firstVertex.Word,
+ firstInstance.Word,
+ indexed: false);
+ }
+
+ ///
+ /// Performs a indexed draw.
+ ///
+ /// GPU state at the time of the call
+ /// First argument of the call
+ private void DrawElementsInstanced(IDeviceState state, int arg0)
+ {
+ var topology = (PrimitiveTopology)arg0;
+
+ var count = FetchParam();
+ var instanceCount = FetchParam();
+ var firstIndex = FetchParam();
+ var firstVertex = FetchParam();
+ var firstInstance = FetchParam();
+
+ if (ShouldSkipDraw(state, instanceCount.Word))
+ {
+ return;
+ }
+
+ _processor.ThreedClass.Draw(
+ topology,
+ count.Word,
+ instanceCount.Word,
+ firstIndex.Word,
+ firstVertex.Word,
+ firstInstance.Word,
+ indexed: true);
+ }
+
+ ///
+ /// Performs a indirect indexed draw, with parameters from a GPU buffer.
+ ///
+ /// GPU state at the time of the call
+ /// First argument of the call
+ private void DrawElementsIndirect(IDeviceState state, int arg0)
+ {
+ var topology = (PrimitiveTopology)arg0;
+
+ var count = FetchParam();
+ var instanceCount = FetchParam();
+ var firstIndex = FetchParam();
+ var firstVertex = FetchParam();
+ var firstInstance = FetchParam();
+
+ ulong indirectBufferGpuVa = count.GpuVa;
+
+ var bufferCache = _processor.MemoryManager.Physical.BufferCache;
+
+ bool useBuffer = bufferCache.CheckModified(_processor.MemoryManager, indirectBufferGpuVa, IndirectIndexedDataEntrySize, out ulong indirectBufferAddress);
+
+ if (useBuffer)
+ {
+ int indexCount = firstIndex.Word + count.Word;
+
+ _processor.ThreedClass.DrawIndirect(
+ topology,
+ indirectBufferAddress,
+ 0,
+ 1,
+ IndirectIndexedDataEntrySize,
+ indexCount,
+ Threed.IndirectDrawType.DrawIndexedIndirect);
+ }
+ else
+ {
+ if (ShouldSkipDraw(state, instanceCount.Word))
+ {
+ return;
+ }
+
+ _processor.ThreedClass.Draw(
+ topology,
+ count.Word,
+ instanceCount.Word,
+ firstIndex.Word,
+ firstVertex.Word,
+ firstInstance.Word,
+ indexed: true);
+ }
+ }
+
+ ///
+ /// Performs a indirect indexed multi-draw, with parameters from a GPU buffer.
+ ///
+ /// GPU state at the time of the call
+ /// First argument of the call
+ private void MultiDrawElementsIndirectCount(IDeviceState state, int arg0)
+ {
+ int arg1 = FetchParam().Word;
+ int arg2 = FetchParam().Word;
+ int arg3 = FetchParam().Word;
+
+ int startDraw = arg0;
+ int endDraw = arg1;
+ var topology = (PrimitiveTopology)arg2;
+ int paddingWords = arg3;
+ int stride = paddingWords * 4 + 0x14;
+
+ ulong parameterBufferGpuVa = FetchParam().GpuVa;
+
+ int maxDrawCount = endDraw - startDraw;
+
+ if (startDraw != 0)
+ {
+ int drawCount = _processor.MemoryManager.Read(parameterBufferGpuVa, tracked: true);
+
+ // Calculate maximum draw count based on the previous draw count and current draw count.
+ if ((uint)drawCount <= (uint)startDraw)
+ {
+ // The start draw is past our total draw count, so all draws were already performed.
+ maxDrawCount = 0;
+ }
+ else
+ {
+ // Perform just the missing number of draws.
+ maxDrawCount = (int)Math.Min((uint)maxDrawCount, (uint)(drawCount - startDraw));
+ }
+ }
+
+ if (maxDrawCount == 0)
+ {
+ Fifo.Clear();
+ return;
+ }
+
+ ulong indirectBufferGpuVa = 0;
+ int indexCount = 0;
+
+ for (int i = 0; i < maxDrawCount; i++)
+ {
+ var count = FetchParam();
+ var instanceCount = FetchParam();
+ var firstIndex = FetchParam();
+ var firstVertex = FetchParam();
+ var firstInstance = FetchParam();
+
+ if (i == 0)
+ {
+ indirectBufferGpuVa = count.GpuVa;
+ }
+
+ indexCount = Math.Max(indexCount, count.Word + firstIndex.Word);
+
+ if (i != maxDrawCount - 1)
+ {
+ for (int j = 0; j < paddingWords; j++)
+ {
+ FetchParam();
+ }
+ }
+ }
+
+ var bufferCache = _processor.MemoryManager.Physical.BufferCache;
+
+ ulong indirectBufferSize = (ulong)maxDrawCount * (ulong)stride;
+
+ ulong indirectBufferAddress = bufferCache.TranslateAndCreateBuffer(_processor.MemoryManager, indirectBufferGpuVa, indirectBufferSize);
+ ulong parameterBufferAddress = bufferCache.TranslateAndCreateBuffer(_processor.MemoryManager, parameterBufferGpuVa, 4);
+
+ _processor.ThreedClass.DrawIndirect(
+ topology,
+ indirectBufferAddress,
+ parameterBufferAddress,
+ maxDrawCount,
+ stride,
+ indexCount,
+ Threed.IndirectDrawType.DrawIndexedIndirectCount);
+ }
+
+ ///
+ /// Checks if the draw should be skipped, because the masked instance count is zero.
+ ///
+ /// Current GPU state
+ /// Draw instance count
+ /// True if the draw should be skipped, false otherwise
+ private static bool ShouldSkipDraw(IDeviceState state, int instanceCount)
+ {
+ return (Read(state, 0xd1b) & instanceCount) == 0;
+ }
+
+ ///
+ /// Fetches a arguments from the arguments FIFO.
+ ///
+ /// The call argument, or a 0 value with null address if the FIFO is empty
+ private FifoWord FetchParam()
+ {
+ if (!Fifo.TryDequeue(out var value))
+ {
+ Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument.");
+
+ return new FifoWord(0UL, 0);
+ }
+
+ return value;
+ }
+
+ ///
+ /// Reads data from a GPU register.
+ ///
+ /// Current GPU state
+ /// Register offset to read
+ /// GPU register value
+ private static int Read(IDeviceState state, int reg)
+ {
+ return state.Read(reg * 4);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs
new file mode 100644
index 00000000..751867fc
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs
@@ -0,0 +1,16 @@
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ ///
+ /// Name of the High-level implementation of a Macro function.
+ ///
+ enum MacroHLEFunctionName
+ {
+ None,
+ ClearColor,
+ ClearDepthStencil,
+ DrawArraysInstanced,
+ DrawElementsInstanced,
+ DrawElementsIndirect,
+ MultiDrawElementsIndirectCount
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs
new file mode 100644
index 00000000..719e170f
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs
@@ -0,0 +1,113 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.GAL;
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ ///
+ /// Table with information about High-level implementations of GPU Macro code.
+ ///
+ static class MacroHLETable
+ {
+ ///
+ /// Macroo High-level implementation table entry.
+ ///
+ readonly struct TableEntry
+ {
+ ///
+ /// Name of the Macro function.
+ ///
+ public MacroHLEFunctionName Name { get; }
+
+ ///
+ /// Hash of the original binary Macro function code.
+ ///
+ public Hash128 Hash { get; }
+
+ ///
+ /// Size (in bytes) of the original binary Macro function code.
+ ///
+ public int Length { get; }
+
+ ///
+ /// Creates a new table entry.
+ ///
+ /// Name of the Macro function
+ /// Hash of the original binary Macro function code
+ /// Size (in bytes) of the original binary Macro function code
+ public TableEntry(MacroHLEFunctionName name, Hash128 hash, int length)
+ {
+ Name = name;
+ Hash = hash;
+ Length = length;
+ }
+ }
+
+ private static readonly TableEntry[] _table = new TableEntry[]
+ {
+ new TableEntry(MacroHLEFunctionName.ClearColor, new Hash128(0xA9FB28D1DC43645A, 0xB177E5D2EAE67FB0), 0x28),
+ new TableEntry(MacroHLEFunctionName.ClearDepthStencil, new Hash128(0x1B96CB77D4879F4F, 0x8557032FE0C965FB), 0x24),
+ new TableEntry(MacroHLEFunctionName.DrawArraysInstanced, new Hash128(0x197FB416269DBC26, 0x34288C01DDA82202), 0x48),
+ new TableEntry(MacroHLEFunctionName.DrawElementsInstanced, new Hash128(0x1A501FD3D54EC8E0, 0x6CF570CF79DA74D6), 0x5c),
+ new TableEntry(MacroHLEFunctionName.DrawElementsIndirect, new Hash128(0x86A3E8E903AF8F45, 0xD35BBA07C23860A4), 0x7c),
+ new TableEntry(MacroHLEFunctionName.MultiDrawElementsIndirectCount, new Hash128(0x890AF57ED3FB1C37, 0x35D0C95C61F5386F), 0x19C)
+ };
+
+ ///
+ /// Checks if the host supports all features required by the HLE macro.
+ ///
+ /// Host capabilities
+ /// Name of the HLE macro to be checked
+ /// True if the host supports the HLE macro, false otherwise
+ private static bool IsMacroHLESupported(Capabilities caps, MacroHLEFunctionName name)
+ {
+ if (name == MacroHLEFunctionName.ClearColor ||
+ name == MacroHLEFunctionName.ClearDepthStencil ||
+ name == MacroHLEFunctionName.DrawArraysInstanced ||
+ name == MacroHLEFunctionName.DrawElementsInstanced ||
+ name == MacroHLEFunctionName.DrawElementsIndirect)
+ {
+ return true;
+ }
+ else if (name == MacroHLEFunctionName.MultiDrawElementsIndirectCount)
+ {
+ return caps.SupportsIndirectParameters;
+ }
+
+ return false;
+ }
+
+ ///
+ /// Checks if there's a fast, High-level implementation of the specified Macro code available.
+ ///
+ /// Macro code to be checked
+ /// Renderer capabilities to check for this macro HLE support
+ /// Name of the function if a implementation is available and supported, otherwise
+ /// True if there is a implementation available and supported, false otherwise
+ public static bool TryGetMacroHLEFunction(ReadOnlySpan code, Capabilities caps, out MacroHLEFunctionName name)
+ {
+ var mc = MemoryMarshal.Cast(code);
+
+ for (int i = 0; i < _table.Length; i++)
+ {
+ ref var entry = ref _table[i];
+
+ var hash = XXHash128.ComputeHash(mc.Slice(0, entry.Length));
+ if (hash == entry.Hash)
+ {
+ if (IsMacroHLESupported(caps, entry.Name))
+ {
+ name = entry.Name;
+ return true;
+ }
+
+ break;
+ }
+ }
+
+ name = MacroHLEFunctionName.None;
+ return false;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs
new file mode 100644
index 00000000..df6ee040
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs
@@ -0,0 +1,400 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Device;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ ///
+ /// Macro code interpreter.
+ ///
+ class MacroInterpreter : IMacroEE
+ {
+ ///
+ /// Arguments FIFO.
+ ///
+ public Queue Fifo { get; }
+
+ private int[] _gprs;
+
+ private int _methAddr;
+ private int _methIncr;
+
+ private bool _carry;
+
+ private int _opCode;
+ private int _pipeOp;
+
+ private bool _ignoreExitFlag;
+
+ private int _pc;
+
+ ///
+ /// Creates a new instance of the macro code interpreter.
+ ///
+ public MacroInterpreter()
+ {
+ Fifo = new Queue();
+
+ _gprs = new int[8];
+ }
+
+ ///
+ /// Executes a macro program until it exits.
+ ///
+ /// Code of the program to execute
+ /// Current GPU state
+ /// Optional argument passed to the program, 0 if not used
+ public void Execute(ReadOnlySpan code, IDeviceState state, int arg0)
+ {
+ Reset();
+
+ _gprs[1] = arg0;
+
+ _pc = 0;
+
+ FetchOpCode(code);
+
+ while (Step(code, state))
+ {
+ }
+
+ // Due to the delay slot, we still need to execute
+ // one more instruction before we actually exit.
+ Step(code, state);
+ }
+
+ ///
+ /// Resets the internal interpreter state.
+ /// Call each time you run a new program.
+ ///
+ private void Reset()
+ {
+ for (int index = 0; index < _gprs.Length; index++)
+ {
+ _gprs[index] = 0;
+ }
+
+ _methAddr = 0;
+ _methIncr = 0;
+
+ _carry = false;
+ }
+
+ ///
+ /// Executes a single instruction of the program.
+ ///
+ /// Program code to execute
+ /// Current GPU state
+ /// True to continue execution, false if the program exited
+ private bool Step(ReadOnlySpan code, IDeviceState state)
+ {
+ int baseAddr = _pc - 1;
+
+ FetchOpCode(code);
+
+ if ((_opCode & 7) < 7)
+ {
+ // Operation produces a value.
+ AssignmentOperation asgOp = (AssignmentOperation)((_opCode >> 4) & 7);
+
+ int result = GetAluResult(state);
+
+ switch (asgOp)
+ {
+ // Fetch parameter and ignore result.
+ case AssignmentOperation.IgnoreAndFetch:
+ SetDstGpr(FetchParam());
+ break;
+ // Move result.
+ case AssignmentOperation.Move:
+ SetDstGpr(result);
+ break;
+ // Move result and use as Method Address.
+ case AssignmentOperation.MoveAndSetMaddr:
+ SetDstGpr(result);
+ SetMethAddr(result);
+ break;
+ // Fetch parameter and send result.
+ case AssignmentOperation.FetchAndSend:
+ SetDstGpr(FetchParam());
+ Send(state, result);
+ break;
+ // Move and send result.
+ case AssignmentOperation.MoveAndSend:
+ SetDstGpr(result);
+ Send(state, result);
+ break;
+ // Fetch parameter and use result as Method Address.
+ case AssignmentOperation.FetchAndSetMaddr:
+ SetDstGpr(FetchParam());
+ SetMethAddr(result);
+ break;
+ // Move result and use as Method Address, then fetch and send parameter.
+ case AssignmentOperation.MoveAndSetMaddrThenFetchAndSend:
+ SetDstGpr(result);
+ SetMethAddr(result);
+ Send(state, FetchParam());
+ break;
+ // Move result and use as Method Address, then send bits 17:12 of result.
+ case AssignmentOperation.MoveAndSetMaddrThenSendHigh:
+ SetDstGpr(result);
+ SetMethAddr(result);
+ Send(state, (result >> 12) & 0x3f);
+ break;
+ }
+ }
+ else
+ {
+ // Branch.
+ bool onNotZero = ((_opCode >> 4) & 1) != 0;
+
+ bool taken = onNotZero
+ ? GetGprA() != 0
+ : GetGprA() == 0;
+
+ if (taken)
+ {
+ _pc = baseAddr + GetImm();
+
+ bool noDelays = (_opCode & 0x20) != 0;
+
+ if (noDelays)
+ {
+ FetchOpCode(code);
+ }
+ else
+ {
+ // The delay slot instruction exit flag should be ignored.
+ _ignoreExitFlag = true;
+ }
+
+ return true;
+ }
+ }
+
+ bool exit = (_opCode & 0x80) != 0 && !_ignoreExitFlag;
+
+ _ignoreExitFlag = false;
+
+ return !exit;
+ }
+
+ ///
+ /// Fetches a single operation code from the program code.
+ ///
+ /// Program code
+ private void FetchOpCode(ReadOnlySpan code)
+ {
+ _opCode = _pipeOp;
+ _pipeOp = code[_pc++];
+ }
+
+ ///
+ /// Gets the result of the current Arithmetic and Logic unit operation.
+ ///
+ /// Current GPU state
+ /// Operation result
+ private int GetAluResult(IDeviceState state)
+ {
+ AluOperation op = (AluOperation)(_opCode & 7);
+
+ switch (op)
+ {
+ case AluOperation.AluReg:
+ return GetAluResult((AluRegOperation)((_opCode >> 17) & 0x1f), GetGprA(), GetGprB());
+
+ case AluOperation.AddImmediate:
+ return GetGprA() + GetImm();
+
+ case AluOperation.BitfieldReplace:
+ case AluOperation.BitfieldExtractLslImm:
+ case AluOperation.BitfieldExtractLslReg:
+ int bfSrcBit = (_opCode >> 17) & 0x1f;
+ int bfSize = (_opCode >> 22) & 0x1f;
+ int bfDstBit = (_opCode >> 27) & 0x1f;
+
+ int bfMask = (1 << bfSize) - 1;
+
+ int dst = GetGprA();
+ int src = GetGprB();
+
+ switch (op)
+ {
+ case AluOperation.BitfieldReplace:
+ src = (int)((uint)src >> bfSrcBit) & bfMask;
+
+ dst &= ~(bfMask << bfDstBit);
+
+ dst |= src << bfDstBit;
+
+ return dst;
+
+ case AluOperation.BitfieldExtractLslImm:
+ src = (int)((uint)src >> dst) & bfMask;
+
+ return src << bfDstBit;
+
+ case AluOperation.BitfieldExtractLslReg:
+ src = (int)((uint)src >> bfSrcBit) & bfMask;
+
+ return src << dst;
+ }
+
+ break;
+
+ case AluOperation.ReadImmediate:
+ return Read(state, GetGprA() + GetImm());
+ }
+
+ throw new InvalidOperationException($"Invalid operation \"{op}\" on instruction 0x{_opCode:X8}.");
+ }
+
+ ///
+ /// Gets the result of an Arithmetic and Logic operation using registers.
+ ///
+ /// Arithmetic and Logic unit operation with registers
+ /// First operand value
+ /// Second operand value
+ /// Operation result
+ private int GetAluResult(AluRegOperation aluOp, int a, int b)
+ {
+ ulong result;
+
+ switch (aluOp)
+ {
+ case AluRegOperation.Add:
+ result = (ulong)a + (ulong)b;
+
+ _carry = result > 0xffffffff;
+
+ return (int)result;
+
+ case AluRegOperation.AddWithCarry:
+ result = (ulong)a + (ulong)b + (_carry ? 1UL : 0UL);
+
+ _carry = result > 0xffffffff;
+
+ return (int)result;
+
+ case AluRegOperation.Subtract:
+ result = (ulong)a - (ulong)b;
+
+ _carry = result < 0x100000000;
+
+ return (int)result;
+
+ case AluRegOperation.SubtractWithBorrow:
+ result = (ulong)a - (ulong)b - (_carry ? 0UL : 1UL);
+
+ _carry = result < 0x100000000;
+
+ return (int)result;
+
+ case AluRegOperation.BitwiseExclusiveOr: return a ^ b;
+ case AluRegOperation.BitwiseOr: return a | b;
+ case AluRegOperation.BitwiseAnd: return a & b;
+ case AluRegOperation.BitwiseAndNot: return a & ~b;
+ case AluRegOperation.BitwiseNotAnd: return ~(a & b);
+ }
+
+ throw new InvalidOperationException($"Invalid operation \"{aluOp}\" on instruction 0x{_opCode:X8}.");
+ }
+
+ ///
+ /// Extracts a 32-bits signed integer constant from the current operation code.
+ ///
+ /// The 32-bits immediate value encoded at the current operation code
+ private int GetImm()
+ {
+ // Note: The immediate is signed, the sign-extension is intended here.
+ return _opCode >> 14;
+ }
+
+ ///
+ /// Sets the current method address, for method calls.
+ ///
+ /// Packed address and increment value
+ private void SetMethAddr(int value)
+ {
+ _methAddr = (value >> 0) & 0xfff;
+ _methIncr = (value >> 12) & 0x3f;
+ }
+
+ ///
+ /// Sets the destination register value.
+ ///
+ /// Value to set (usually the operation result)
+ private void SetDstGpr(int value)
+ {
+ _gprs[(_opCode >> 8) & 7] = value;
+ }
+
+ ///
+ /// Gets first operand value from the respective register.
+ ///
+ /// Operand value
+ private int GetGprA()
+ {
+ return GetGprValue((_opCode >> 11) & 7);
+ }
+
+ ///
+ /// Gets second operand value from the respective register.
+ ///
+ /// Operand value
+ private int GetGprB()
+ {
+ return GetGprValue((_opCode >> 14) & 7);
+ }
+
+ ///
+ /// Gets the value from a register, or 0 if the R0 register is specified.
+ ///
+ /// Index of the register
+ /// Register value
+ private int GetGprValue(int index)
+ {
+ return index != 0 ? _gprs[index] : 0;
+ }
+
+ ///
+ /// Fetches a call argument from the call argument FIFO.
+ ///
+ /// The call argument, or 0 if the FIFO is empty
+ private int FetchParam()
+ {
+ if (!Fifo.TryDequeue(out var value))
+ {
+ Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument.");
+
+ return 0;
+ }
+
+ return value.Word;
+ }
+
+ ///
+ /// Reads data from a GPU register.
+ ///
+ /// Current GPU state
+ /// Register offset to read
+ /// GPU register value
+ private int Read(IDeviceState state, int reg)
+ {
+ return state.Read(reg * 4);
+ }
+
+ ///
+ /// Performs a GPU method call.
+ ///
+ /// Current GPU state
+ /// Call argument
+ private void Send(IDeviceState state, int value)
+ {
+ state.Write(_methAddr * 4, value);
+
+ _methAddr += _methIncr;
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs
new file mode 100644
index 00000000..4077f74e
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs
@@ -0,0 +1,39 @@
+using Ryujinx.Graphics.Device;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ ///
+ /// Represents a execution engine that uses a Just-in-Time compiler for fast execution.
+ ///
+ class MacroJit : IMacroEE
+ {
+ private readonly MacroJitContext _context = new MacroJitContext();
+
+ ///
+ /// Arguments FIFO.
+ ///
+ public Queue Fifo => _context.Fifo;
+
+ private MacroJitCompiler.MacroExecute _execute;
+
+ ///
+ /// Executes a macro program until it exits.
+ ///
+ /// Code of the program to execute
+ /// Current GPU state
+ /// Optional argument passed to the program, 0 if not used
+ public void Execute(ReadOnlySpan code, IDeviceState state, int arg0)
+ {
+ if (_execute == null)
+ {
+ MacroJitCompiler compiler = new MacroJitCompiler();
+
+ _execute = compiler.Compile(code);
+ }
+
+ _execute(_context, state, arg0);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitCompiler.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitCompiler.cs
new file mode 100644
index 00000000..f8d839fa
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitCompiler.cs
@@ -0,0 +1,517 @@
+using Ryujinx.Graphics.Device;
+using System;
+using System.Collections.Generic;
+using System.Reflection.Emit;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ ///
+ /// Represents a Macro Just-in-Time compiler.
+ /// R
+ class MacroJitCompiler
+ {
+ private readonly DynamicMethod _meth;
+ private readonly ILGenerator _ilGen;
+ private readonly LocalBuilder[] _gprs;
+ private readonly LocalBuilder _carry;
+ private readonly LocalBuilder _methAddr;
+ private readonly LocalBuilder _methIncr;
+
+ ///
+ /// Creates a new instance of the Macro Just-in-Time compiler.
+ ///
+ public MacroJitCompiler()
+ {
+ _meth = new DynamicMethod("Macro", typeof(void), new Type[] { typeof(MacroJitContext), typeof(IDeviceState), typeof(int) });
+ _ilGen = _meth.GetILGenerator();
+ _gprs = new LocalBuilder[8];
+
+ for (int i = 1; i < 8; i++)
+ {
+ _gprs[i] = _ilGen.DeclareLocal(typeof(int));
+ }
+
+ _carry = _ilGen.DeclareLocal(typeof(int));
+ _methAddr = _ilGen.DeclareLocal(typeof(int));
+ _methIncr = _ilGen.DeclareLocal(typeof(int));
+
+ _ilGen.Emit(OpCodes.Ldarg_2);
+ _ilGen.Emit(OpCodes.Stloc, _gprs[1]);
+ }
+
+ public delegate void MacroExecute(MacroJitContext context, IDeviceState state, int arg0);
+
+ ///
+ /// Translates a new piece of GPU Macro code into host executable code.
+ ///
+ /// Code to be translated
+ /// Delegate of the host compiled code
+ public MacroExecute Compile(ReadOnlySpan code)
+ {
+ Dictionary labels = new Dictionary();
+
+ int lastTarget = 0;
+ int i;
+
+ // Collect all branch targets.
+ for (i = 0; i < code.Length; i++)
+ {
+ int opCode = code[i];
+
+ if ((opCode & 7) == 7)
+ {
+ int target = i + (opCode >> 14);
+
+ if (!labels.ContainsKey(target))
+ {
+ labels.Add(target, _ilGen.DefineLabel());
+ }
+
+ if (lastTarget < target)
+ {
+ lastTarget = target;
+ }
+ }
+
+ bool exit = (opCode & 0x80) != 0;
+
+ if (exit && i >= lastTarget)
+ {
+ break;
+ }
+ }
+
+ // Code generation.
+ for (i = 0; i < code.Length; i++)
+ {
+ if (labels.TryGetValue(i, out Label label))
+ {
+ _ilGen.MarkLabel(label);
+ }
+
+ Emit(code, i, labels);
+
+ int opCode = code[i];
+
+ bool exit = (opCode & 0x80) != 0;
+
+ if (exit)
+ {
+ Emit(code, i + 1, labels);
+ _ilGen.Emit(OpCodes.Ret);
+
+ if (i >= lastTarget)
+ {
+ break;
+ }
+ }
+ }
+
+ if (i == code.Length)
+ {
+ _ilGen.Emit(OpCodes.Ret);
+ }
+
+ return _meth.CreateDelegate();
+ }
+
+ ///
+ /// Emits IL equivalent to the Macro instruction at a given offset.
+ ///
+ /// GPU Macro code
+ /// Offset, in words, where the instruction is located
+ /// Labels for Macro branch targets, used by branch instructions
+ private void Emit(ReadOnlySpan code, int offset, Dictionary labels)
+ {
+ int opCode = code[offset];
+
+ if ((opCode & 7) < 7)
+ {
+ // Operation produces a value.
+ AssignmentOperation asgOp = (AssignmentOperation)((opCode >> 4) & 7);
+
+ EmitAluOp(opCode);
+
+ switch (asgOp)
+ {
+ // Fetch parameter and ignore result.
+ case AssignmentOperation.IgnoreAndFetch:
+ _ilGen.Emit(OpCodes.Pop);
+ EmitFetchParam();
+ EmitStoreDstGpr(opCode);
+ break;
+ // Move result.
+ case AssignmentOperation.Move:
+ EmitStoreDstGpr(opCode);
+ break;
+ // Move result and use as Method Address.
+ case AssignmentOperation.MoveAndSetMaddr:
+ _ilGen.Emit(OpCodes.Dup);
+ EmitStoreDstGpr(opCode);
+ EmitStoreMethAddr();
+ break;
+ // Fetch parameter and send result.
+ case AssignmentOperation.FetchAndSend:
+ EmitFetchParam();
+ EmitStoreDstGpr(opCode);
+ EmitSend();
+ break;
+ // Move and send result.
+ case AssignmentOperation.MoveAndSend:
+ _ilGen.Emit(OpCodes.Dup);
+ EmitStoreDstGpr(opCode);
+ EmitSend();
+ break;
+ // Fetch parameter and use result as Method Address.
+ case AssignmentOperation.FetchAndSetMaddr:
+ EmitFetchParam();
+ EmitStoreDstGpr(opCode);
+ EmitStoreMethAddr();
+ break;
+ // Move result and use as Method Address, then fetch and send parameter.
+ case AssignmentOperation.MoveAndSetMaddrThenFetchAndSend:
+ _ilGen.Emit(OpCodes.Dup);
+ EmitStoreDstGpr(opCode);
+ EmitStoreMethAddr();
+ EmitFetchParam();
+ EmitSend();
+ break;
+ // Move result and use as Method Address, then send bits 17:12 of result.
+ case AssignmentOperation.MoveAndSetMaddrThenSendHigh:
+ _ilGen.Emit(OpCodes.Dup);
+ _ilGen.Emit(OpCodes.Dup);
+ EmitStoreDstGpr(opCode);
+ EmitStoreMethAddr();
+ _ilGen.Emit(OpCodes.Ldc_I4, 12);
+ _ilGen.Emit(OpCodes.Shr_Un);
+ _ilGen.Emit(OpCodes.Ldc_I4, 0x3f);
+ _ilGen.Emit(OpCodes.And);
+ EmitSend();
+ break;
+ }
+ }
+ else
+ {
+ // Branch.
+ bool onNotZero = ((opCode >> 4) & 1) != 0;
+
+ EmitLoadGprA(opCode);
+
+ Label lblSkip = _ilGen.DefineLabel();
+
+ if (onNotZero)
+ {
+ _ilGen.Emit(OpCodes.Brfalse, lblSkip);
+ }
+ else
+ {
+ _ilGen.Emit(OpCodes.Brtrue, lblSkip);
+ }
+
+ bool noDelays = (opCode & 0x20) != 0;
+
+ if (!noDelays)
+ {
+ Emit(code, offset + 1, labels);
+ }
+
+ int target = offset + (opCode >> 14);
+
+ _ilGen.Emit(OpCodes.Br, labels[target]);
+
+ _ilGen.MarkLabel(lblSkip);
+ }
+ }
+
+ ///
+ /// Emits IL for a Arithmetic and Logic Unit instruction.
+ ///
+ /// Instruction to be translated
+ /// Throw when the instruction encoding is invalid
+ private void EmitAluOp(int opCode)
+ {
+ AluOperation op = (AluOperation)(opCode & 7);
+
+ switch (op)
+ {
+ case AluOperation.AluReg:
+ EmitAluOp((AluRegOperation)((opCode >> 17) & 0x1f), opCode);
+ break;
+
+ case AluOperation.AddImmediate:
+ EmitLoadGprA(opCode);
+ EmitLoadImm(opCode);
+ _ilGen.Emit(OpCodes.Add);
+ break;
+
+ case AluOperation.BitfieldReplace:
+ case AluOperation.BitfieldExtractLslImm:
+ case AluOperation.BitfieldExtractLslReg:
+ int bfSrcBit = (opCode >> 17) & 0x1f;
+ int bfSize = (opCode >> 22) & 0x1f;
+ int bfDstBit = (opCode >> 27) & 0x1f;
+
+ int bfMask = (1 << bfSize) - 1;
+
+ switch (op)
+ {
+ case AluOperation.BitfieldReplace:
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Ldc_I4, bfSrcBit);
+ _ilGen.Emit(OpCodes.Shr_Un);
+ _ilGen.Emit(OpCodes.Ldc_I4, bfMask);
+ _ilGen.Emit(OpCodes.And);
+ _ilGen.Emit(OpCodes.Ldc_I4, bfDstBit);
+ _ilGen.Emit(OpCodes.Shl);
+ EmitLoadGprA(opCode);
+ _ilGen.Emit(OpCodes.Ldc_I4, ~(bfMask << bfDstBit));
+ _ilGen.Emit(OpCodes.And);
+ _ilGen.Emit(OpCodes.Or);
+ break;
+
+ case AluOperation.BitfieldExtractLslImm:
+ EmitLoadGprB(opCode);
+ EmitLoadGprA(opCode);
+ _ilGen.Emit(OpCodes.Shr_Un);
+ _ilGen.Emit(OpCodes.Ldc_I4, bfMask);
+ _ilGen.Emit(OpCodes.And);
+ _ilGen.Emit(OpCodes.Ldc_I4, bfDstBit);
+ _ilGen.Emit(OpCodes.Shl);
+ break;
+
+ case AluOperation.BitfieldExtractLslReg:
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Ldc_I4, bfSrcBit);
+ _ilGen.Emit(OpCodes.Shr_Un);
+ _ilGen.Emit(OpCodes.Ldc_I4, bfMask);
+ _ilGen.Emit(OpCodes.And);
+ EmitLoadGprA(opCode);
+ _ilGen.Emit(OpCodes.Shl);
+ break;
+ }
+ break;
+
+ case AluOperation.ReadImmediate:
+ _ilGen.Emit(OpCodes.Ldarg_1);
+ EmitLoadGprA(opCode);
+ EmitLoadImm(opCode);
+ _ilGen.Emit(OpCodes.Add);
+ _ilGen.Emit(OpCodes.Call, typeof(MacroJitContext).GetMethod(nameof(MacroJitContext.Read)));
+ break;
+
+ default:
+ throw new InvalidOperationException($"Invalid operation \"{op}\" on instruction 0x{opCode:X8}.");
+ }
+ }
+
+ ///
+ /// Emits IL for a binary Arithmetic and Logic Unit instruction.
+ ///
+ /// Arithmetic and Logic Unit instruction
+ /// Raw instruction
+ /// Throw when the instruction encoding is invalid
+ private void EmitAluOp(AluRegOperation aluOp, int opCode)
+ {
+ switch (aluOp)
+ {
+ case AluRegOperation.Add:
+ EmitLoadGprA(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ _ilGen.Emit(OpCodes.Add);
+ _ilGen.Emit(OpCodes.Dup);
+ _ilGen.Emit(OpCodes.Ldc_I8, 0xffffffffL);
+ _ilGen.Emit(OpCodes.Cgt_Un);
+ _ilGen.Emit(OpCodes.Stloc, _carry);
+ _ilGen.Emit(OpCodes.Conv_U4);
+ break;
+ case AluRegOperation.AddWithCarry:
+ EmitLoadGprA(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ _ilGen.Emit(OpCodes.Ldloc_S, _carry);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ _ilGen.Emit(OpCodes.Add);
+ _ilGen.Emit(OpCodes.Add);
+ _ilGen.Emit(OpCodes.Dup);
+ _ilGen.Emit(OpCodes.Ldc_I8, 0xffffffffL);
+ _ilGen.Emit(OpCodes.Cgt_Un);
+ _ilGen.Emit(OpCodes.Stloc, _carry);
+ _ilGen.Emit(OpCodes.Conv_U4);
+ break;
+ case AluRegOperation.Subtract:
+ EmitLoadGprA(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ _ilGen.Emit(OpCodes.Sub);
+ _ilGen.Emit(OpCodes.Dup);
+ _ilGen.Emit(OpCodes.Ldc_I8, 0x100000000L);
+ _ilGen.Emit(OpCodes.Clt_Un);
+ _ilGen.Emit(OpCodes.Stloc, _carry);
+ _ilGen.Emit(OpCodes.Conv_U4);
+ break;
+ case AluRegOperation.SubtractWithBorrow:
+ EmitLoadGprA(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ _ilGen.Emit(OpCodes.Ldc_I4_1);
+ _ilGen.Emit(OpCodes.Ldloc_S, _carry);
+ _ilGen.Emit(OpCodes.Sub);
+ _ilGen.Emit(OpCodes.Conv_U8);
+ _ilGen.Emit(OpCodes.Sub);
+ _ilGen.Emit(OpCodes.Sub);
+ _ilGen.Emit(OpCodes.Dup);
+ _ilGen.Emit(OpCodes.Ldc_I8, 0x100000000L);
+ _ilGen.Emit(OpCodes.Clt_Un);
+ _ilGen.Emit(OpCodes.Stloc, _carry);
+ _ilGen.Emit(OpCodes.Conv_U4);
+ break;
+ case AluRegOperation.BitwiseExclusiveOr:
+ EmitLoadGprA(opCode);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Xor);
+ break;
+ case AluRegOperation.BitwiseOr:
+ EmitLoadGprA(opCode);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Or);
+ break;
+ case AluRegOperation.BitwiseAnd:
+ EmitLoadGprA(opCode);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.And);
+ break;
+ case AluRegOperation.BitwiseAndNot:
+ EmitLoadGprA(opCode);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.Not);
+ _ilGen.Emit(OpCodes.And);
+ break;
+ case AluRegOperation.BitwiseNotAnd:
+ EmitLoadGprA(opCode);
+ EmitLoadGprB(opCode);
+ _ilGen.Emit(OpCodes.And);
+ _ilGen.Emit(OpCodes.Not);
+ break;
+ default:
+ throw new InvalidOperationException($"Invalid operation \"{aluOp}\" on instruction 0x{opCode:X8}.");
+ }
+ }
+
+ ///
+ /// Loads a immediate value on the IL evaluation stack.
+ ///
+ /// Instruction from where the immediate should be extracted
+ private void EmitLoadImm(int opCode)
+ {
+ // Note: The immediate is signed, the sign-extension is intended here.
+ _ilGen.Emit(OpCodes.Ldc_I4, opCode >> 14);
+ }
+
+ ///
+ /// Loads a value from the General Purpose register specified as first operand on the IL evaluation stack.
+ ///
+ /// Instruction from where the register number should be extracted
+ private void EmitLoadGprA(int opCode)
+ {
+ EmitLoadGpr((opCode >> 11) & 7);
+ }
+
+ ///
+ /// Loads a value from the General Purpose register specified as second operand on the IL evaluation stack.
+ ///
+ /// Instruction from where the register number should be extracted
+ private void EmitLoadGprB(int opCode)
+ {
+ EmitLoadGpr((opCode >> 14) & 7);
+ }
+
+ ///
+ /// Loads a value a General Purpose register on the IL evaluation stack.
+ ///
+ ///
+ /// Register number 0 has a hardcoded value of 0.
+ ///
+ /// Register number
+ private void EmitLoadGpr(int index)
+ {
+ if (index == 0)
+ {
+ _ilGen.Emit(OpCodes.Ldc_I4_0);
+ }
+ else
+ {
+ _ilGen.Emit(OpCodes.Ldloc_S, _gprs[index]);
+ }
+ }
+
+ ///
+ /// Emits a call to the method that fetches an argument from the arguments FIFO.
+ /// The argument is pushed into the IL evaluation stack.
+ ///
+ private void EmitFetchParam()
+ {
+ _ilGen.Emit(OpCodes.Ldarg_0);
+ _ilGen.Emit(OpCodes.Call, typeof(MacroJitContext).GetMethod(nameof(MacroJitContext.FetchParam)));
+ }
+
+ ///
+ /// Stores the value on the top of the IL evaluation stack into a General Purpose register.
+ ///
+ ///
+ /// Register number 0 does not exist, reads are hardcoded to 0, and writes are simply discarded.
+ ///
+ /// Instruction from where the register number should be extracted
+ private void EmitStoreDstGpr(int opCode)
+ {
+ int index = (opCode >> 8) & 7;
+
+ if (index == 0)
+ {
+ _ilGen.Emit(OpCodes.Pop);
+ }
+ else
+ {
+ _ilGen.Emit(OpCodes.Stloc_S, _gprs[index]);
+ }
+ }
+
+ ///
+ /// Stores the value on the top of the IL evaluation stack as method address.
+ /// This will be used on subsequent send calls as the destination method address.
+ /// Additionally, the 6 bits starting at bit 12 will be used as increment value,
+ /// added to the method address after each sent value.
+ ///
+ private void EmitStoreMethAddr()
+ {
+ _ilGen.Emit(OpCodes.Dup);
+ _ilGen.Emit(OpCodes.Ldc_I4, 0xfff);
+ _ilGen.Emit(OpCodes.And);
+ _ilGen.Emit(OpCodes.Stloc_S, _methAddr);
+ _ilGen.Emit(OpCodes.Ldc_I4, 12);
+ _ilGen.Emit(OpCodes.Shr_Un);
+ _ilGen.Emit(OpCodes.Ldc_I4, 0x3f);
+ _ilGen.Emit(OpCodes.And);
+ _ilGen.Emit(OpCodes.Stloc_S, _methIncr);
+ }
+
+ ///
+ /// Sends the value on the top of the IL evaluation stack to the GPU,
+ /// using the current method address.
+ ///
+ private void EmitSend()
+ {
+ _ilGen.Emit(OpCodes.Ldarg_1);
+ _ilGen.Emit(OpCodes.Ldloc_S, _methAddr);
+ _ilGen.Emit(OpCodes.Call, typeof(MacroJitContext).GetMethod(nameof(MacroJitContext.Send)));
+ _ilGen.Emit(OpCodes.Ldloc_S, _methAddr);
+ _ilGen.Emit(OpCodes.Ldloc_S, _methIncr);
+ _ilGen.Emit(OpCodes.Add);
+ _ilGen.Emit(OpCodes.Stloc_S, _methAddr);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs
new file mode 100644
index 00000000..52c2a11b
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs
@@ -0,0 +1,55 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Device;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ ///
+ /// Represents a Macro Just-in-Time compiler execution context.
+ ///
+ class MacroJitContext
+ {
+ ///
+ /// Arguments FIFO.
+ ///
+ public Queue Fifo { get; } = new Queue();
+
+ ///
+ /// Fetches a arguments from the arguments FIFO.
+ ///
+ /// The call argument, or 0 if the FIFO is empty
+ public int FetchParam()
+ {
+ if (!Fifo.TryDequeue(out var value))
+ {
+ Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument.");
+
+ return 0;
+ }
+
+ return value.Word;
+ }
+
+ ///
+ /// Reads data from a GPU register.
+ ///
+ /// Current GPU state
+ /// Register offset to read
+ /// GPU register value
+ public static int Read(IDeviceState state, int reg)
+ {
+ return state.Read(reg * 4);
+ }
+
+ ///
+ /// Performs a GPU method call.
+ ///
+ /// Call argument
+ /// Current GPU state
+ /// Address, in words, of the method
+ public static void Send(int value, IDeviceState state, int methAddr)
+ {
+ state.Write(methAddr * 4, value);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MmeShadowScratch.cs b/src/Ryujinx.Graphics.Gpu/Engine/MmeShadowScratch.cs
new file mode 100644
index 00000000..44cd8213
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/MmeShadowScratch.cs
@@ -0,0 +1,18 @@
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine
+{
+ ///
+ /// Represents temporary storage used by macros.
+ ///
+ [StructLayout(LayoutKind.Sequential, Size = 1024)]
+ struct MmeShadowScratch
+ {
+#pragma warning disable CS0169
+ private uint _e0;
+#pragma warning restore CS0169
+ public ref uint this[int index] => ref AsSpan()[index];
+ public Span AsSpan() => MemoryMarshal.CreateSpan(ref _e0, 256);
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/SetMmeShadowRamControlMode.cs b/src/Ryujinx.Graphics.Gpu/Engine/SetMmeShadowRamControlMode.cs
new file mode 100644
index 00000000..060d35ca
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/SetMmeShadowRamControlMode.cs
@@ -0,0 +1,13 @@
+namespace Ryujinx.Graphics.Gpu.Engine
+{
+ ///
+ /// MME shadow RAM control mode.
+ ///
+ enum SetMmeShadowRamControlMode
+ {
+ MethodTrack = 0,
+ MethodTrackWithFilter = 1,
+ MethodPassthrough = 2,
+ MethodReplay = 3,
+ }
+}
\ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/ShaderTexture.cs b/src/Ryujinx.Graphics.Gpu/Engine/ShaderTexture.cs
new file mode 100644
index 00000000..e1e3085b
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/ShaderTexture.cs
@@ -0,0 +1,111 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Shader;
+
+namespace Ryujinx.Graphics.Gpu.Engine
+{
+ ///
+ /// Shader texture properties conversion methods.
+ ///
+ static class ShaderTexture
+ {
+ ///
+ /// Gets a texture target from a sampler type.
+ ///
+ /// Sampler type
+ /// Texture target value
+ public static Target GetTarget(SamplerType type)
+ {
+ type &= ~(SamplerType.Indexed | SamplerType.Shadow);
+
+ switch (type)
+ {
+ case SamplerType.Texture1D:
+ return Target.Texture1D;
+
+ case SamplerType.TextureBuffer:
+ return Target.TextureBuffer;
+
+ case SamplerType.Texture1D | SamplerType.Array:
+ return Target.Texture1DArray;
+
+ case SamplerType.Texture2D:
+ return Target.Texture2D;
+
+ case SamplerType.Texture2D | SamplerType.Array:
+ return Target.Texture2DArray;
+
+ case SamplerType.Texture2D | SamplerType.Multisample:
+ return Target.Texture2DMultisample;
+
+ case SamplerType.Texture2D | SamplerType.Multisample | SamplerType.Array:
+ return Target.Texture2DMultisampleArray;
+
+ case SamplerType.Texture3D:
+ return Target.Texture3D;
+
+ case SamplerType.TextureCube:
+ return Target.Cubemap;
+
+ case SamplerType.TextureCube | SamplerType.Array:
+ return Target.CubemapArray;
+ }
+
+ Logger.Warning?.Print(LogClass.Gpu, $"Invalid sampler type \"{type}\".");
+
+ return Target.Texture2D;
+ }
+
+ ///
+ /// Gets a texture format from a shader image format.
+ ///
+ /// Shader image format
+ /// Texture format
+ public static Format GetFormat(TextureFormat format)
+ {
+ return format switch
+ {
+ TextureFormat.R8Unorm => Format.R8Unorm,
+ TextureFormat.R8Snorm => Format.R8Snorm,
+ TextureFormat.R8Uint => Format.R8Uint,
+ TextureFormat.R8Sint => Format.R8Sint,
+ TextureFormat.R16Float => Format.R16Float,
+ TextureFormat.R16Unorm => Format.R16Unorm,
+ TextureFormat.R16Snorm => Format.R16Snorm,
+ TextureFormat.R16Uint => Format.R16Uint,
+ TextureFormat.R16Sint => Format.R16Sint,
+ TextureFormat.R32Float => Format.R32Float,
+ TextureFormat.R32Uint => Format.R32Uint,
+ TextureFormat.R32Sint => Format.R32Sint,
+ TextureFormat.R8G8Unorm => Format.R8G8Unorm,
+ TextureFormat.R8G8Snorm => Format.R8G8Snorm,
+ TextureFormat.R8G8Uint => Format.R8G8Uint,
+ TextureFormat.R8G8Sint => Format.R8G8Sint,
+ TextureFormat.R16G16Float => Format.R16G16Float,
+ TextureFormat.R16G16Unorm => Format.R16G16Unorm,
+ TextureFormat.R16G16Snorm => Format.R16G16Snorm,
+ TextureFormat.R16G16Uint => Format.R16G16Uint,
+ TextureFormat.R16G16Sint => Format.R16G16Sint,
+ TextureFormat.R32G32Float => Format.R32G32Float,
+ TextureFormat.R32G32Uint => Format.R32G32Uint,
+ TextureFormat.R32G32Sint => Format.R32G32Sint,
+ TextureFormat.R8G8B8A8Unorm => Format.R8G8B8A8Unorm,
+ TextureFormat.R8G8B8A8Snorm => Format.R8G8B8A8Snorm,
+ TextureFormat.R8G8B8A8Uint => Format.R8G8B8A8Uint,
+ TextureFormat.R8G8B8A8Sint => Format.R8G8B8A8Sint,
+ TextureFormat.R16G16B16A16Float => Format.R16G16B16A16Float,
+ TextureFormat.R16G16B16A16Unorm => Format.R16G16B16A16Unorm,
+ TextureFormat.R16G16B16A16Snorm => Format.R16G16B16A16Snorm,
+ TextureFormat.R16G16B16A16Uint => Format.R16G16B16A16Uint,
+ TextureFormat.R16G16B16A16Sint => Format.R16G16B16A16Sint,
+ TextureFormat.R32G32B32A32Float => Format.R32G32B32A32Float,
+ TextureFormat.R32G32B32A32Uint => Format.R32G32B32A32Uint,
+ TextureFormat.R32G32B32A32Sint => Format.R32G32B32A32Sint,
+ TextureFormat.R10G10B10A2Unorm => Format.R10G10B10A2Unorm,
+ TextureFormat.R10G10B10A2Uint => Format.R10G10B10A2Uint,
+ TextureFormat.R11G11B10Float => Format.R11G11B10Float,
+ _ => 0
+ };
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendFunctions.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendFunctions.cs
new file mode 100644
index 00000000..a40b9cc4
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendFunctions.cs
@@ -0,0 +1,4226 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.GAL;
+using System.Globalization;
+using System.Runtime.InteropServices;
+using System.Text;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Threed.Blender
+{
+ static class AdvancedBlendFunctions
+ {
+ public static readonly AdvancedBlendUcode[] Table = new AdvancedBlendUcode[]
+ {
+ new AdvancedBlendUcode(AdvancedBlendOp.PlusClamped, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedPlusClampedPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.PlusClampedAlpha, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedPlusClampedAlphaPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.PlusDarker, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedPlusDarkerPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedMultiplyPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedScreenPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedOverlayPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedDarkenPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedLightenPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedColorDodgePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedColorBurnPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedHardLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedSoftLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedDifferencePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Minus, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedMinusPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.MinusClamped, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedMinusClampedPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedExclusionPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Contrast, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedContrastPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Invert, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedInvertPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedInvertRGBPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.InvertOvg, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedInvertOvgPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedLinearDodgePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedLinearBurnPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedVividLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedLinearLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedPinLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedHardMixPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Red, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedRedPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Green, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedGreenPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Blue, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedBluePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedHslHuePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedHslSaturationPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedHslColorPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedHslLuminosityPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Src, AdvancedBlendOverlap.Disjoint, true, GenDisjointSrcPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Dst, AdvancedBlendOverlap.Disjoint, true, GenDisjointDstPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Disjoint, true, GenDisjointSrcOverPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Disjoint, true, GenDisjointDstOverPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Disjoint, true, GenDisjointSrcInPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstIn, AdvancedBlendOverlap.Disjoint, true, GenDisjointDstInPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Disjoint, true, GenDisjointSrcOutPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstOut, AdvancedBlendOverlap.Disjoint, true, GenDisjointDstOutPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Disjoint, true, GenDisjointSrcAtopPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Disjoint, true, GenDisjointDstAtopPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Disjoint, true, GenDisjointXorPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Plus, AdvancedBlendOverlap.Disjoint, true, GenDisjointPlusPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Disjoint, true, GenDisjointMultiplyPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Disjoint, true, GenDisjointScreenPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Disjoint, true, GenDisjointOverlayPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Disjoint, true, GenDisjointDarkenPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Disjoint, true, GenDisjointLightenPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Disjoint, true, GenDisjointColorDodgePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Disjoint, true, GenDisjointColorBurnPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Disjoint, true, GenDisjointHardLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Disjoint, true, GenDisjointSoftLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Disjoint, true, GenDisjointDifferencePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Disjoint, true, GenDisjointExclusionPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Invert, AdvancedBlendOverlap.Disjoint, true, GenDisjointInvertPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Disjoint, true, GenDisjointInvertRGBPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Disjoint, true, GenDisjointLinearDodgePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Disjoint, true, GenDisjointLinearBurnPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Disjoint, true, GenDisjointVividLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Disjoint, true, GenDisjointLinearLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Disjoint, true, GenDisjointPinLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Disjoint, true, GenDisjointHardMixPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Disjoint, true, GenDisjointHslHuePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Disjoint, true, GenDisjointHslSaturationPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Disjoint, true, GenDisjointHslColorPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Disjoint, true, GenDisjointHslLuminosityPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Src, AdvancedBlendOverlap.Conjoint, true, GenConjointSrcPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Dst, AdvancedBlendOverlap.Conjoint, true, GenConjointDstPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Conjoint, true, GenConjointSrcOverPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Conjoint, true, GenConjointDstOverPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Conjoint, true, GenConjointSrcInPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstIn, AdvancedBlendOverlap.Conjoint, true, GenConjointDstInPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Conjoint, true, GenConjointSrcOutPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstOut, AdvancedBlendOverlap.Conjoint, true, GenConjointDstOutPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Conjoint, true, GenConjointSrcAtopPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Conjoint, true, GenConjointDstAtopPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Conjoint, true, GenConjointXorPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Conjoint, true, GenConjointMultiplyPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Conjoint, true, GenConjointScreenPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Conjoint, true, GenConjointOverlayPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Conjoint, true, GenConjointDarkenPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Conjoint, true, GenConjointLightenPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Conjoint, true, GenConjointColorDodgePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Conjoint, true, GenConjointColorBurnPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Conjoint, true, GenConjointHardLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Conjoint, true, GenConjointSoftLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Conjoint, true, GenConjointDifferencePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Conjoint, true, GenConjointExclusionPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.Invert, AdvancedBlendOverlap.Conjoint, true, GenConjointInvertPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Conjoint, true, GenConjointInvertRGBPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Conjoint, true, GenConjointLinearDodgePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Conjoint, true, GenConjointLinearBurnPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Conjoint, true, GenConjointVividLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Conjoint, true, GenConjointLinearLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Conjoint, true, GenConjointPinLightPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Conjoint, true, GenConjointHardMixPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Conjoint, true, GenConjointHslHuePremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Conjoint, true, GenConjointHslSaturationPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Conjoint, true, GenConjointHslColorPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Conjoint, true, GenConjointHslLuminosityPremul),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedDstOver),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedSrcIn),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedSrcOut),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedSrcAtop),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedDstAtop),
+ new AdvancedBlendUcode(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedXor),
+ new AdvancedBlendUcode(AdvancedBlendOp.PlusClamped, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedPlusClamped),
+ new AdvancedBlendUcode(AdvancedBlendOp.PlusClampedAlpha, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedPlusClampedAlpha),
+ new AdvancedBlendUcode(AdvancedBlendOp.PlusDarker, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedPlusDarker),
+ new AdvancedBlendUcode(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedMultiply),
+ new AdvancedBlendUcode(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedScreen),
+ new AdvancedBlendUcode(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedOverlay),
+ new AdvancedBlendUcode(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedDarken),
+ new AdvancedBlendUcode(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedLighten),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedColorDodge),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedColorBurn),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedHardLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedSoftLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedDifference),
+ new AdvancedBlendUcode(AdvancedBlendOp.Minus, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedMinus),
+ new AdvancedBlendUcode(AdvancedBlendOp.MinusClamped, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedMinusClamped),
+ new AdvancedBlendUcode(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedExclusion),
+ new AdvancedBlendUcode(AdvancedBlendOp.Contrast, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedContrast),
+ new AdvancedBlendUcode(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedInvertRGB),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedLinearDodge),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedLinearBurn),
+ new AdvancedBlendUcode(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedVividLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedLinearLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedPinLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedHardMix),
+ new AdvancedBlendUcode(AdvancedBlendOp.Red, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedRed),
+ new AdvancedBlendUcode(AdvancedBlendOp.Green, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedGreen),
+ new AdvancedBlendUcode(AdvancedBlendOp.Blue, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedBlue),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedHslHue),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedHslSaturation),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedHslColor),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedHslLuminosity),
+ new AdvancedBlendUcode(AdvancedBlendOp.Src, AdvancedBlendOverlap.Disjoint, false, GenDisjointSrc),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Disjoint, false, GenDisjointSrcOver),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Disjoint, false, GenDisjointDstOver),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Disjoint, false, GenDisjointSrcIn),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Disjoint, false, GenDisjointSrcOut),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Disjoint, false, GenDisjointSrcAtop),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Disjoint, false, GenDisjointDstAtop),
+ new AdvancedBlendUcode(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Disjoint, false, GenDisjointXor),
+ new AdvancedBlendUcode(AdvancedBlendOp.Plus, AdvancedBlendOverlap.Disjoint, false, GenDisjointPlus),
+ new AdvancedBlendUcode(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Disjoint, false, GenDisjointMultiply),
+ new AdvancedBlendUcode(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Disjoint, false, GenDisjointScreen),
+ new AdvancedBlendUcode(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Disjoint, false, GenDisjointOverlay),
+ new AdvancedBlendUcode(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Disjoint, false, GenDisjointDarken),
+ new AdvancedBlendUcode(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Disjoint, false, GenDisjointLighten),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Disjoint, false, GenDisjointColorDodge),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Disjoint, false, GenDisjointColorBurn),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Disjoint, false, GenDisjointHardLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Disjoint, false, GenDisjointSoftLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Disjoint, false, GenDisjointDifference),
+ new AdvancedBlendUcode(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Disjoint, false, GenDisjointExclusion),
+ new AdvancedBlendUcode(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Disjoint, false, GenDisjointInvertRGB),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Disjoint, false, GenDisjointLinearDodge),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Disjoint, false, GenDisjointLinearBurn),
+ new AdvancedBlendUcode(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Disjoint, false, GenDisjointVividLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Disjoint, false, GenDisjointLinearLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Disjoint, false, GenDisjointPinLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Disjoint, false, GenDisjointHardMix),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Disjoint, false, GenDisjointHslHue),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Disjoint, false, GenDisjointHslSaturation),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Disjoint, false, GenDisjointHslColor),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Disjoint, false, GenDisjointHslLuminosity),
+ new AdvancedBlendUcode(AdvancedBlendOp.Src, AdvancedBlendOverlap.Conjoint, false, GenConjointSrc),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Conjoint, false, GenConjointSrcOver),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Conjoint, false, GenConjointDstOver),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Conjoint, false, GenConjointSrcIn),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Conjoint, false, GenConjointSrcOut),
+ new AdvancedBlendUcode(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Conjoint, false, GenConjointSrcAtop),
+ new AdvancedBlendUcode(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Conjoint, false, GenConjointDstAtop),
+ new AdvancedBlendUcode(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Conjoint, false, GenConjointXor),
+ new AdvancedBlendUcode(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Conjoint, false, GenConjointMultiply),
+ new AdvancedBlendUcode(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Conjoint, false, GenConjointScreen),
+ new AdvancedBlendUcode(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Conjoint, false, GenConjointOverlay),
+ new AdvancedBlendUcode(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Conjoint, false, GenConjointDarken),
+ new AdvancedBlendUcode(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Conjoint, false, GenConjointLighten),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Conjoint, false, GenConjointColorDodge),
+ new AdvancedBlendUcode(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Conjoint, false, GenConjointColorBurn),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Conjoint, false, GenConjointHardLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Conjoint, false, GenConjointSoftLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Conjoint, false, GenConjointDifference),
+ new AdvancedBlendUcode(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Conjoint, false, GenConjointExclusion),
+ new AdvancedBlendUcode(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Conjoint, false, GenConjointInvertRGB),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Conjoint, false, GenConjointLinearDodge),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Conjoint, false, GenConjointLinearBurn),
+ new AdvancedBlendUcode(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Conjoint, false, GenConjointVividLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Conjoint, false, GenConjointLinearLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Conjoint, false, GenConjointPinLight),
+ new AdvancedBlendUcode(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Conjoint, false, GenConjointHardMix),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Conjoint, false, GenConjointHslHue),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Conjoint, false, GenConjointHslSaturation),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Conjoint, false, GenConjointHslColor),
+ new AdvancedBlendUcode(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Conjoint, false, GenConjointHslLuminosity)
+ };
+
+ public static string GenTable()
+ {
+ // This can be used to generate the table on AdvancedBlendPreGenTable.
+
+ StringBuilder sb = new StringBuilder();
+
+ sb.AppendLine($"private static Dictionary _entries = new()");
+ sb.AppendLine("{");
+
+ foreach (var entry in Table)
+ {
+ Hash128 hash = XXHash128.ComputeHash(MemoryMarshal.Cast(entry.Code));
+
+ string[] constants = new string[entry.Constants != null ? entry.Constants.Length : 0];
+
+ for (int i = 0; i < constants.Length; i++)
+ {
+ RgbFloat rgb = entry.Constants[i];
+
+ constants[i] = string.Format(CultureInfo.InvariantCulture, "new " + nameof(RgbFloat) + "({0}f, {1}f, {2}f)", rgb.R, rgb.G, rgb.B);
+ }
+
+ string constantList = constants.Length > 0 ? $"new[] {{ {string.Join(", ", constants)} }}" : $"Array.Empty<{nameof(RgbFloat)}>()";
+
+ static string EnumValue(string name, object value)
+ {
+ if (value.ToString() == "0")
+ {
+ return "0";
+ }
+
+ return $"{name}.{value}";
+ }
+
+ string alpha = $"new {nameof(FixedFunctionAlpha)}({EnumValue(nameof(BlendUcodeEnable), entry.Alpha.Enable)}, {EnumValue(nameof(BlendOp), entry.Alpha.AlphaOp)}, {EnumValue(nameof(BlendFactor), entry.Alpha.AlphaSrcFactor)}, {EnumValue(nameof(BlendFactor), entry.Alpha.AlphaDstFactor)})";
+
+ sb.AppendLine($" {{ new Hash128(0x{hash.Low:X16}, 0x{hash.High:X16}), new AdvancedBlendEntry({nameof(AdvancedBlendOp)}.{entry.Op}, {nameof(AdvancedBlendOverlap)}.{entry.Overlap}, {(entry.SrcPreMultiplied ? "true" : "false")}, {constantList}, {alpha}) }},");
+ }
+
+ sb.AppendLine("};");
+
+ return sb.ToString();
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedPlusClampedPremul(ref UcodeAssembler asm)
+ {
+ asm.Add(CC.T, Dest.PBR, OpBD.DstRGB, OpBD.SrcRGB);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedPlusClampedAlphaPremul(ref UcodeAssembler asm)
+ {
+ asm.Add(CC.T, Dest.Temp0, OpBD.DstRGB, OpBD.SrcRGB);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedPlusDarkerPremul(ref UcodeAssembler asm)
+ {
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne);
+ asm.Add(CC.T, Dest.PBR, OpBD.PBR, OpBD.SrcRGB);
+ asm.Add(CC.T, Dest.PBR, OpBD.PBR, OpBD.DstRGB);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.SrcAAA);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedMultiplyPremul(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstRGB);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedScreenPremul(ref UcodeAssembler asm)
+ {
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.DstRGB);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedOverlayPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp2, OpBD.Temp1, OpAC.Temp2, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedDarkenPremul(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Min(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedLightenPremul(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedColorDodgePremul(ref UcodeAssembler asm)
+ {
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.SrcRGB);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.SrcAAA);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.DstRGB);
+ asm.Min(CC.GT, Dest.PBR, OpAC.DstAAA, OpBD.PBR);
+ asm.Mul(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.SrcAAA);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.DstRGB, OpBD.ConstantZero);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedColorBurnPremul(ref UcodeAssembler asm)
+ {
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.DstAAA, OpBD.SrcAAA, OpAC.SrcAAA, OpBD.DstRGB);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcRGB);
+ asm.Mul(CC.T, Dest.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.SrcAAA, OpBD.DstAAA, OpAC.SrcAAA, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantZero);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.DstAAA, OpBD.DstRGB);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHardLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.Temp2, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp2, OpBD.Temp1, OpAC.Temp2, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedSoftLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(4, 0.25f, 0.25f, 0.25f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(0, 0.2605f, 0.2605f, 0.2605f);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(1, -0.7817f, -0.7817f, -0.7817f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(2, 0.3022f, 0.3022f, 0.3022f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(3, 0.2192f, 0.2192f, 0.2192f);
+ asm.Add(CC.GT, Dest.Temp0, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(5, 16f, 16f, 16f);
+ asm.Mul(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(6, 12f, 12f, 12f);
+ asm.Mmsub(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(7, 3f, 3f, 3f);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mmsub(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.ConstantOne, OpAC.Temp1, OpBD.Temp1);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedDifferencePremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.Temp2, OpBD.Temp1);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedMinusPremul(ref UcodeAssembler asm)
+ {
+ asm.Sub(CC.T, Dest.Temp0, OpBD.DstRGB, OpBD.SrcRGB);
+ return new FixedFunctionAlpha(BlendOp.ReverseSubtractGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedMinusClampedPremul(ref UcodeAssembler asm)
+ {
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstRGB, OpBD.SrcRGB);
+ asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedExclusionPremul(ref UcodeAssembler asm)
+ {
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.DstRGB);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.DstRGB);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedContrastPremul(ref UcodeAssembler asm)
+ {
+ asm.SetConstant(0, 2f, 2f, 2f);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.ConstantRGB, OpAC.DstAAA, OpBD.ConstantOne);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.SrcAAA, OpBD.ConstantOne);
+ asm.Mul(CC.T, Dest.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.PBR, OpBD.DstAAA);
+ asm.SetConstant(1, 0.5f, 0.5f, 0.5f);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantRGB);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedInvertPremul(ref UcodeAssembler asm)
+ {
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA, OpAC.SrcAAA, OpBD.DstRGB);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.OneMinusSrcAAA, OpAC.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedInvertRGBPremul(ref UcodeAssembler asm)
+ {
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.DstAAA, OpAC.SrcRGB, OpBD.DstRGB);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.OneMinusSrcAAA, OpAC.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedInvertOvgPremul(ref UcodeAssembler asm)
+ {
+ asm.Sub(CC.T, Dest.PBR, OpBD.ConstantOne, OpBD.DstRGB);
+ asm.Mmadd(CC.T, Dest.Temp0, OpAC.SrcAAA, OpBD.PBR, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedLinearDodgePremul(ref UcodeAssembler asm)
+ {
+ asm.Mmadd(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedLinearBurnPremul(ref UcodeAssembler asm)
+ {
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedVividLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantRGB);
+ asm.Sub(CC.GE, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Add(CC.GE, Dest.PBR, OpBD.PBR, OpBD.PBR);
+ asm.Rcp(CC.GE, Dest.PBR, OpAC.PBR);
+ asm.Mul(CC.GE, Dest.PBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GE, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Add(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Rcp(CC.LT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantZero);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantOne);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedLinearLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 2f, 2f, 2f);
+ asm.Madd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedPinLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.Temp0, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero);
+ asm.Add(CC.LE, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Min(CC.LE, Dest.Temp0, OpAC.PBR, OpBD.Temp1);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHardMixPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mul(CC.LT, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedRedPremul(ref UcodeAssembler asm)
+ {
+ asm.Mov(CC.T, Dest.Temp0, OpBD.DstRGB);
+ asm.Mov(CC.T, Dest.Temp0.R, OpBD.SrcRGB);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedGreenPremul(ref UcodeAssembler asm)
+ {
+ asm.Mov(CC.T, Dest.Temp0, OpBD.DstRGB);
+ asm.Mov(CC.T, Dest.Temp0.G, OpBD.SrcRGB);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedBluePremul(ref UcodeAssembler asm)
+ {
+ asm.Mov(CC.T, Dest.Temp0, OpBD.DstRGB);
+ asm.Mov(CC.T, Dest.Temp0.B, OpBD.SrcRGB);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHslHuePremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp2, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.Temp2.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp2);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHslSaturationPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp1, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.Temp1.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp1);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHslColorPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp2, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp2, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHslLuminosityPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp2, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp2.BBB, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp1, OpBD.Temp1, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp2);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp1, OpBD.Temp2);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp2);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp1, OpBD.Temp2, OpAC.Temp2, OpBD.Temp2);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp2);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointSrcPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointDstPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.Temp1, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointSrcOverPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp2);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointDstOverPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp1);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointSrcInPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Sub(CC.T, Dest.Temp1.RToA, OpBD.DstAAA, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointDstInPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.Temp1, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Sub(CC.T, Dest.Temp1.RToA, OpBD.DstAAA, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointSrcOutPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointDstOutPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointSrcAtopPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointDstAtopPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.Temp1, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointXorPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ asm.Min(CC.T, Dest.Temp1, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Add(CC.T, Dest.Temp1.RToA, OpBD.Temp1, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointPlusPremul(ref UcodeAssembler asm)
+ {
+ asm.Add(CC.T, Dest.Temp0, OpBD.DstRGB, OpBD.SrcRGB);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointMultiplyPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointScreenPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointOverlayPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp2, OpBD.Temp1, OpAC.Temp2, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointDarkenPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointLightenPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Max(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointColorDodgePremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.Temp0);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp1, OpBD.ConstantZero);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantZero);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointColorBurnPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.Temp2, OpBD.ConstantZero);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.Temp2);
+ asm.Mmsub(CC.GT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Max(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHardLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.Temp2, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp2, OpBD.Temp1, OpAC.Temp2, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointSoftLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(4, 0.25f, 0.25f, 0.25f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(0, 0.2605f, 0.2605f, 0.2605f);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(1, -0.7817f, -0.7817f, -0.7817f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(2, 0.3022f, 0.3022f, 0.3022f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(3, 0.2192f, 0.2192f, 0.2192f);
+ asm.Add(CC.GT, Dest.Temp0, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(5, 16f, 16f, 16f);
+ asm.Mul(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(6, 12f, 12f, 12f);
+ asm.Mmsub(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(7, 3f, 3f, 3f);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mmsub(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.ConstantOne, OpAC.Temp1, OpBD.Temp1);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointDifferencePremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.Temp2, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointExclusionPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.Temp1);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointInvertPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp0, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointInvertRGBPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.ConstantOne, OpAC.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp0, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointLinearDodgePremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointLinearBurnPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointVividLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantRGB);
+ asm.Sub(CC.GE, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Add(CC.GE, Dest.PBR, OpBD.PBR, OpBD.PBR);
+ asm.Rcp(CC.GE, Dest.PBR, OpAC.PBR);
+ asm.Mul(CC.GE, Dest.PBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GE, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Add(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Rcp(CC.LT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantZero);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantOne);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointLinearLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 2f, 2f, 2f);
+ asm.Madd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointPinLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.Temp0, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero);
+ asm.Add(CC.LE, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Min(CC.LE, Dest.Temp0, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHardMixPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mul(CC.LT, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHslHuePremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp2, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.Temp2.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp2);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHslSaturationPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp1, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.Temp1.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp1);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHslColorPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp2, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp2, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHslLuminosityPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp2, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp2.BBB, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp1, OpBD.Temp1, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp2);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp1, OpBD.Temp2);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp2);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp1, OpBD.Temp2, OpAC.Temp2, OpBD.Temp2);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp2);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenConjointSrcPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointDstPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointSrcOverPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp2, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointDstOverPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointSrcInPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MinimumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointDstInPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MinimumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointSrcOutPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenConjointDstOutPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenConjointSrcAtopPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointDstAtopPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointXorPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ asm.Sub(CC.T, Dest.Temp1.CC, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Sub(CC.LT, Dest.Temp1, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mov(CC.T, Dest.Temp1.RToA, OpBD.Temp1);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenConjointMultiplyPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointScreenPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointOverlayPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp2, OpBD.Temp1, OpAC.Temp2, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointDarkenPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointLightenPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Max(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointColorDodgePremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.Temp0);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp1, OpBD.ConstantZero);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointColorBurnPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.Temp2, OpBD.ConstantZero);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.Temp2);
+ asm.Mmsub(CC.GT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Max(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHardLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.Temp2, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp2, OpBD.Temp1, OpAC.Temp2, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointSoftLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(4, 0.25f, 0.25f, 0.25f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(0, 0.2605f, 0.2605f, 0.2605f);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(1, -0.7817f, -0.7817f, -0.7817f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(2, 0.3022f, 0.3022f, 0.3022f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(3, 0.2192f, 0.2192f, 0.2192f);
+ asm.Add(CC.GT, Dest.Temp0, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(5, 16f, 16f, 16f);
+ asm.Mul(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(6, 12f, 12f, 12f);
+ asm.Mmsub(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(7, 3f, 3f, 3f);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mmsub(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.ConstantOne, OpAC.Temp1, OpBD.Temp1);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointDifferencePremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.Temp2, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointExclusionPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.Temp1);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointInvertPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointInvertRGBPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.ConstantOne, OpAC.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointLinearDodgePremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointLinearBurnPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointVividLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantRGB);
+ asm.Sub(CC.GE, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2);
+ asm.Add(CC.GE, Dest.PBR, OpBD.PBR, OpBD.PBR);
+ asm.Rcp(CC.GE, Dest.PBR, OpAC.PBR);
+ asm.Mul(CC.GE, Dest.PBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GE, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Add(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Rcp(CC.LT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantZero);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantOne);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointLinearLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 2f, 2f, 2f);
+ asm.Madd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointPinLightPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.Temp0, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero);
+ asm.Add(CC.LE, Dest.PBR, OpBD.Temp2, OpBD.Temp2);
+ asm.Min(CC.LE, Dest.Temp0, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHardMixPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mul(CC.LT, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHslHuePremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp2, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.Temp2.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp2);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHslSaturationPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp1, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.GT, Dest.Temp1.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp1);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHslColorPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp2, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp2, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHslLuminosityPremul(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp2, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp2.BBB, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp1, OpBD.Temp1, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp2);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp1, OpBD.Temp2);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp2);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp1, OpBD.Temp2, OpAC.Temp2, OpBD.Temp2);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp2);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedDstOver(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedSrcIn(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.PBR, OpBD.DstAAA);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.DstAlphaGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedSrcOut(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.PBR, OpBD.OneMinusDstAAA);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneMinusDstAlphaGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedSrcAtop(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.PBR, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.OneMinusSrcAAA, OpAC.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedDstAtop(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedXor(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.PBR, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.OneMinusSrcAAA, OpAC.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneMinusDstAlphaGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedPlusClamped(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Add(CC.T, Dest.PBR, OpBD.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedPlusClampedAlpha(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedPlusDarker(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne);
+ asm.Add(CC.T, Dest.PBR, OpBD.PBR, OpBD.Temp2);
+ asm.Add(CC.T, Dest.PBR, OpBD.PBR, OpBD.DstRGB);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.SrcAAA);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedMultiply(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.PBR, OpBD.DstRGB);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedScreen(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.PBR, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.DstRGB);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedOverlay(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.SrcRGB, OpBD.Temp1, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedDarken(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.PBR, OpBD.DstAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Min(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedLighten(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.PBR, OpBD.DstAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedColorDodge(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.PBR);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.SrcAAA);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.DstRGB);
+ asm.Min(CC.GT, Dest.PBR, OpAC.DstAAA, OpBD.PBR);
+ asm.Mul(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.SrcAAA);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.DstRGB, OpBD.ConstantZero);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedColorBurn(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.DstAAA, OpBD.SrcAAA, OpAC.SrcAAA, OpBD.DstRGB);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.Temp2);
+ asm.Mul(CC.T, Dest.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.SrcAAA, OpBD.DstAAA, OpAC.SrcAAA, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantZero);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.DstAAA, OpBD.DstRGB);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHardLight(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.SrcRGB, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.SrcRGB, OpBD.Temp1, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedSoftLight(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(4, 0.25f, 0.25f, 0.25f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(0, 0.2605f, 0.2605f, 0.2605f);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(1, -0.7817f, -0.7817f, -0.7817f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(2, 0.3022f, 0.3022f, 0.3022f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(3, 0.2192f, 0.2192f, 0.2192f);
+ asm.Add(CC.GT, Dest.Temp0, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(5, 16f, 16f, 16f);
+ asm.Mul(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(6, 12f, 12f, 12f);
+ asm.Mmsub(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(7, 3f, 3f, 3f);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mmsub(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.ConstantOne, OpAC.Temp1, OpBD.Temp1);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedDifference(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.SrcRGB);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.SrcRGB, OpBD.Temp1);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedMinus(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Sub(CC.T, Dest.Temp0, OpBD.DstRGB, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.ReverseSubtractGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedMinusClamped(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstRGB, OpBD.PBR);
+ asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedExclusion(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.PBR, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.DstRGB);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.DstRGB);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedContrast(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.SetConstant(0, 2f, 2f, 2f);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.ConstantRGB, OpAC.DstAAA, OpBD.ConstantOne);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.Temp2, OpBD.ConstantRGB, OpAC.SrcAAA, OpBD.ConstantOne);
+ asm.Mul(CC.T, Dest.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.PBR, OpBD.DstAAA);
+ asm.SetConstant(1, 0.5f, 0.5f, 0.5f);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantRGB);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedInvertRGB(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.DstAAA, OpAC.PBR, OpBD.DstRGB);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.OneMinusSrcAAA, OpAC.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedLinearDodge(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mmadd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedLinearBurn(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.PBR, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedVividLight(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantRGB);
+ asm.Sub(CC.GE, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Add(CC.GE, Dest.PBR, OpBD.PBR, OpBD.PBR);
+ asm.Rcp(CC.GE, Dest.PBR, OpAC.PBR);
+ asm.Mul(CC.GE, Dest.PBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GE, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Add(CC.LT, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Rcp(CC.LT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantZero);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantOne);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedLinearLight(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 2f, 2f, 2f);
+ asm.Madd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedPinLight(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.Temp0, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero);
+ asm.Add(CC.LE, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Min(CC.LE, Dest.Temp0, OpAC.PBR, OpBD.Temp1);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHardMix(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mul(CC.LT, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedRed(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.DstRGB);
+ asm.Mov(CC.T, Dest.Temp0.R, OpBD.Temp2);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedGreen(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.DstRGB);
+ asm.Mov(CC.T, Dest.Temp0.G, OpBD.Temp2);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedBlue(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.DstRGB);
+ asm.Mov(CC.T, Dest.Temp0.B, OpBD.Temp2);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHslHue(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.SrcRGB, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.Temp2.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp2);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.PBR, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHslSaturation(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp1, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.Temp1.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp1);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.PBR, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHslColor(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.SrcRGB, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.SrcRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.PBR, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenUncorrelatedHslLuminosity(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.SrcRGB, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp2.BBB, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp1, OpBD.Temp1, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp2);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp1, OpBD.Temp2);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp2);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp1, OpBD.Temp2, OpAC.Temp2, OpBD.Temp2);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp2);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Mmadd(CC.T, Dest.Temp1, OpAC.PBR, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA);
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointSrc(ref UcodeAssembler asm)
+ {
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointSrcOver(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.SrcRGB);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointDstOver(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp1);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointSrcIn(ref UcodeAssembler asm)
+ {
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Sub(CC.T, Dest.Temp1.RToA, OpBD.DstAAA, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointSrcOut(ref UcodeAssembler asm)
+ {
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointSrcAtop(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointDstAtop(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.Temp1, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointXor(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ asm.Min(CC.T, Dest.Temp1, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Add(CC.T, Dest.Temp1.RToA, OpBD.Temp1, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointPlus(ref UcodeAssembler asm)
+ {
+ asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA);
+ asm.Add(CC.T, Dest.Temp0, OpBD.DstRGB, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointMultiply(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointScreen(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointOverlay(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.SrcRGB, OpBD.Temp1, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointDarken(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointLighten(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Max(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointColorDodge(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.Temp0);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp1, OpBD.ConstantZero);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantZero);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointColorBurn(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.SrcRGB, OpBD.ConstantZero);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.SrcRGB);
+ asm.Mmsub(CC.GT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Max(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHardLight(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.SrcRGB, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.SrcRGB, OpBD.Temp1, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointSoftLight(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(4, 0.25f, 0.25f, 0.25f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(0, 0.2605f, 0.2605f, 0.2605f);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(1, -0.7817f, -0.7817f, -0.7817f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(2, 0.3022f, 0.3022f, 0.3022f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(3, 0.2192f, 0.2192f, 0.2192f);
+ asm.Add(CC.GT, Dest.Temp0, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(5, 16f, 16f, 16f);
+ asm.Mul(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(6, 12f, 12f, 12f);
+ asm.Mmsub(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(7, 3f, 3f, 3f);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mmsub(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.ConstantOne, OpAC.Temp1, OpBD.Temp1);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointDifference(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.SrcRGB);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.SrcRGB, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointExclusion(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointInvertRGB(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp0, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenDisjointLinearDodge(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointLinearBurn(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointVividLight(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantRGB);
+ asm.Sub(CC.GE, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Add(CC.GE, Dest.PBR, OpBD.PBR, OpBD.PBR);
+ asm.Rcp(CC.GE, Dest.PBR, OpAC.PBR);
+ asm.Mul(CC.GE, Dest.PBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GE, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Add(CC.LT, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Rcp(CC.LT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantZero);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantOne);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointLinearLight(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 2f, 2f, 2f);
+ asm.Madd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointPinLight(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.Temp0, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero);
+ asm.Add(CC.LE, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Min(CC.LE, Dest.Temp0, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHardMix(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mul(CC.LT, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHslHue(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.SrcRGB, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.Temp2.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp2);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHslSaturation(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp1, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.Temp1.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp1);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHslColor(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.SrcRGB, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.SrcRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenDisjointHslLuminosity(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.SrcRGB, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp2.BBB, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp1, OpBD.Temp1, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp2);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp1, OpBD.Temp2);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp2);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp1, OpBD.Temp2, OpAC.Temp2, OpBD.Temp2);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp2);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenConjointSrc(ref UcodeAssembler asm)
+ {
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointSrcOver(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointDstOver(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointSrcIn(ref UcodeAssembler asm)
+ {
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MinimumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointSrcOut(ref UcodeAssembler asm)
+ {
+ asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenConjointSrcAtop(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointDstAtop(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointXor(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ asm.Sub(CC.T, Dest.Temp1.CC, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Sub(CC.LT, Dest.Temp1, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mov(CC.T, Dest.Temp1.RToA, OpBD.Temp1);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0);
+ return FixedFunctionAlpha.Disabled;
+ }
+
+ private static FixedFunctionAlpha GenConjointMultiply(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointScreen(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointOverlay(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.SrcRGB, OpBD.Temp1, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointDarken(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointLighten(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Max(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointColorDodge(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.Temp0);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp1, OpBD.ConstantZero);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointColorBurn(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.SrcRGB, OpBD.ConstantZero);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.SrcRGB);
+ asm.Mmsub(CC.GT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Max(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHardLight(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.SrcRGB, OpBD.ConstantRGB);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.SrcRGB, OpBD.Temp1, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointSoftLight(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(4, 0.25f, 0.25f, 0.25f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(0, 0.2605f, 0.2605f, 0.2605f);
+ asm.Mul(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(1, -0.7817f, -0.7817f, -0.7817f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(2, 0.3022f, 0.3022f, 0.3022f);
+ asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(3, 0.2192f, 0.2192f, 0.2192f);
+ asm.Add(CC.GT, Dest.Temp0, OpBD.PBR, OpBD.ConstantRGB);
+ asm.SetConstant(5, 16f, 16f, 16f);
+ asm.Mul(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(6, 12f, 12f, 12f);
+ asm.Mmsub(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.SetConstant(7, 3f, 3f, 3f);
+ asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mmsub(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.ConstantOne, OpAC.Temp1, OpBD.Temp1);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointDifference(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.SrcRGB);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.SrcRGB, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointExclusion(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointInvertRGB(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mmsub(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA);
+ asm.Mul(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0);
+ return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointLinearDodge(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointLinearBurn(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointVividLight(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.5f, 0.5f, 0.5f);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantRGB);
+ asm.Sub(CC.GE, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB);
+ asm.Add(CC.GE, Dest.PBR, OpBD.PBR, OpBD.PBR);
+ asm.Rcp(CC.GE, Dest.PBR, OpAC.PBR);
+ asm.Mul(CC.GE, Dest.PBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GE, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Add(CC.LT, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Rcp(CC.LT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.LT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantZero);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantOne);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointLinearLight(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 2f, 2f, 2f);
+ asm.Madd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero);
+ asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointPinLight(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.Temp0, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero);
+ asm.Add(CC.LE, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB);
+ asm.Min(CC.LE, Dest.Temp0, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHardMix(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Mul(CC.LT, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHslHue(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.SrcRGB, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.Temp2.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp2);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHslSaturation(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.PBR);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0);
+ asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp1, OpAC.Temp0, OpBD.PBR);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Min(CC.GT, Dest.Temp1.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp1);
+ asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHslColor(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.SrcRGB, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp2, OpBD.SrcRGB, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+
+ private static FixedFunctionAlpha GenConjointHslLuminosity(ref UcodeAssembler asm)
+ {
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.SetConstant(0, 0.3f, 0.59f, 0.11f);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.SrcRGB, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.Temp2.BBB, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp1, OpBD.ConstantRGB);
+ asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR);
+ asm.Sub(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Add(CC.T, Dest.Temp1, OpBD.Temp1, OpBD.PBR);
+ asm.Mov(CC.T, Dest.Temp0, OpBD.PBR);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne);
+ asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp2);
+ asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR);
+ asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp2);
+ asm.Sub(CC.GT, Dest.PBR, OpBD.Temp1, OpBD.Temp2);
+ asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp2);
+ asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1);
+ asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp1);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.PBR);
+ asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR);
+ asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp1, OpBD.Temp2, OpAC.Temp2, OpBD.Temp2);
+ asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp2);
+ asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA);
+ asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR);
+ asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA);
+ asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR);
+ asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA);
+ asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR);
+ return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl);
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendManager.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendManager.cs
new file mode 100644
index 00000000..8072c6af
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendManager.cs
@@ -0,0 +1,115 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.GAL;
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Threed.Blender
+{
+ ///
+ /// Advanced blend manager.
+ ///
+ class AdvancedBlendManager
+ {
+ private const int InstructionRamSize = 128;
+ private const int InstructionRamSizeMask = InstructionRamSize - 1;
+
+ private readonly DeviceStateWithShadow _state;
+
+ private readonly uint[] _code;
+ private int _ip;
+
+ ///
+ /// Creates a new instance of the advanced blend manager.
+ ///
+ /// GPU state of the channel owning this manager
+ public AdvancedBlendManager(DeviceStateWithShadow state)
+ {
+ _state = state;
+ _code = new uint[InstructionRamSize];
+ }
+
+ ///
+ /// Sets the start offset of the blend microcode in memory.
+ ///
+ /// Method call argument
+ public void LoadBlendUcodeStart(int argument)
+ {
+ _ip = argument;
+ }
+
+ ///
+ /// Pushes one word of blend microcode.
+ ///
+ /// Method call argument
+ public void LoadBlendUcodeInstruction(int argument)
+ {
+ _code[_ip++ & InstructionRamSizeMask] = (uint)argument;
+ }
+
+ ///
+ /// Tries to identify the current advanced blend function being used,
+ /// given the current state and microcode that was uploaded.
+ ///
+ /// Advanced blend descriptor
+ /// True if the function was found, false otherwise
+ public bool TryGetAdvancedBlend(out AdvancedBlendDescriptor descriptor)
+ {
+ Span currentCode = new Span(_code);
+ byte codeLength = (byte)_state.State.BlendUcodeSize;
+
+ if (currentCode.Length > codeLength)
+ {
+ currentCode = currentCode.Slice(0, codeLength);
+ }
+
+ Hash128 hash = XXHash128.ComputeHash(MemoryMarshal.Cast(currentCode));
+
+ descriptor = default;
+
+ if (!AdvancedBlendPreGenTable.Entries.TryGetValue(hash, out var entry))
+ {
+ return false;
+ }
+
+ if (entry.Constants != null)
+ {
+ bool constantsMatch = true;
+
+ for (int i = 0; i < entry.Constants.Length; i++)
+ {
+ RgbFloat constant = entry.Constants[i];
+ RgbHalf constant2 = _state.State.BlendUcodeConstants[i];
+
+ if ((Half)constant.R != constant2.UnpackR() ||
+ (Half)constant.G != constant2.UnpackG() ||
+ (Half)constant.B != constant2.UnpackB())
+ {
+ constantsMatch = false;
+ break;
+ }
+ }
+
+ if (!constantsMatch)
+ {
+ return false;
+ }
+ }
+
+ if (entry.Alpha.Enable != _state.State.BlendUcodeEnable)
+ {
+ return false;
+ }
+
+ if (entry.Alpha.Enable == BlendUcodeEnable.EnableRGBA &&
+ (entry.Alpha.AlphaOp != _state.State.BlendStateCommon.AlphaOp ||
+ entry.Alpha.AlphaSrcFactor != _state.State.BlendStateCommon.AlphaSrcFactor ||
+ entry.Alpha.AlphaDstFactor != _state.State.BlendStateCommon.AlphaDstFactor))
+ {
+ return false;
+ }
+
+ descriptor = new AdvancedBlendDescriptor(entry.Op, entry.Overlap, entry.SrcPreMultiplied);
+ return true;
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendPreGenTable.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendPreGenTable.cs
new file mode 100644
index 00000000..d35d8abf
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendPreGenTable.cs
@@ -0,0 +1,273 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.GAL;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Threed.Blender
+{
+ ///
+ /// Advanced blend function entry.
+ ///
+ struct AdvancedBlendEntry
+ {
+ ///
+ /// Advanced blend operation.
+ ///
+ public AdvancedBlendOp Op { get; }
+
+ ///
+ /// Advanced blend overlap mode.
+ ///
+ public AdvancedBlendOverlap Overlap { get; }
+
+ ///
+ /// Whenever the source input is pre-multiplied.
+ ///
+ public bool SrcPreMultiplied { get; }
+
+ ///
+ /// Constants used by the microcode.
+ ///
+ public RgbFloat[] Constants { get; }
+
+ ///
+ /// Fixed function alpha state.
+ ///
+ public FixedFunctionAlpha Alpha { get; }
+
+ ///
+ /// Creates a new advanced blend function entry.
+ ///
+ /// Advanced blend operation
+ /// Advanced blend overlap mode
+ /// Whenever the source input is pre-multiplied
+ /// Constants used by the microcode
+ /// Fixed function alpha state
+ public AdvancedBlendEntry(
+ AdvancedBlendOp op,
+ AdvancedBlendOverlap overlap,
+ bool srcPreMultiplied,
+ RgbFloat[] constants,
+ FixedFunctionAlpha alpha)
+ {
+ Op = op;
+ Overlap = overlap;
+ SrcPreMultiplied = srcPreMultiplied;
+ Constants = constants;
+ Alpha = alpha;
+ }
+ }
+
+ ///
+ /// Pre-generated hash table with advanced blend functions used by the driver.
+ ///
+ static class AdvancedBlendPreGenTable
+ {
+ ///
+ /// Advanced blend functions dictionary.
+ ///
+ public static readonly IReadOnlyDictionary Entries = new Dictionary()
+ {
+ { new Hash128(0x19ECF57B83DE31F7, 0x5BAE759246F264C0), new AdvancedBlendEntry(AdvancedBlendOp.PlusClamped, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xDE1B14A356A1A9ED, 0x59D803593C607C1D), new AdvancedBlendEntry(AdvancedBlendOp.PlusClampedAlpha, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x1A3C3A6D32DEC368, 0xBCAE519EC6AAA045), new AdvancedBlendEntry(AdvancedBlendOp.PlusDarker, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x6FD380261A63B240, 0x17C3B335DBB9E3DB), new AdvancedBlendEntry(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x1D39164823D3A2D1, 0xC45350959CE1C8FB), new AdvancedBlendEntry(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x18DF09FF53B129FE, 0xC02EDA33C36019F6), new AdvancedBlendEntry(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x5973E583271EBF06, 0x711497D75D1272E0), new AdvancedBlendEntry(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x4759E0E5DA54D5E8, 0x1FDD57C0C38AFA1F), new AdvancedBlendEntry(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x337684D43CCE97FA, 0x0139E30CC529E1C9), new AdvancedBlendEntry(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xDA59E85D8428992D, 0x1D3D7C64C9EF0132), new AdvancedBlendEntry(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x9455B949298CE805, 0xE73D3301518BE98A), new AdvancedBlendEntry(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xBDD3B4DEDBE336AA, 0xBFA4DCD50D535DEE), new AdvancedBlendEntry(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.2605f, 0.2605f, 0.2605f), new RgbFloat(-0.7817f, -0.7817f, -0.7817f), new RgbFloat(0.3022f, 0.3022f, 0.3022f), new RgbFloat(0.2192f, 0.2192f, 0.2192f), new RgbFloat(0.25f, 0.25f, 0.25f), new RgbFloat(16f, 16f, 16f), new RgbFloat(12f, 12f, 12f), new RgbFloat(3f, 3f, 3f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x22D4E970A028649A, 0x4F3FCB055FCED965), new AdvancedBlendEntry(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xA346A91311D72114, 0x151A27A3FB0A1904), new AdvancedBlendEntry(AdvancedBlendOp.Minus, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.ReverseSubtractGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x8A307241061FACD6, 0xA39D1826440B8EE7), new AdvancedBlendEntry(AdvancedBlendOp.MinusClamped, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xB3BE569485EFFFE0, 0x0BA4E269B3CFB165), new AdvancedBlendEntry(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x36FCA3277DC11822, 0x2BC0F6CAC2029672), new AdvancedBlendEntry(AdvancedBlendOp.Contrast, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(2f, 2f, 2f), new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x4A6226AF2DE9BD7F, 0xEB890D7DA716F73A), new AdvancedBlendEntry(AdvancedBlendOp.Invert, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0xF364CAA94E160FEB, 0xBF364512C72A3797), new AdvancedBlendEntry(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x6BF791AB4AC19C87, 0x6FA17A994EA0FCDE), new AdvancedBlendEntry(AdvancedBlendOp.InvertOvg, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x053C75A0AE0BB222, 0x03C791FEEB59754C), new AdvancedBlendEntry(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x25762AB40B6CBDE9, 0x595E9A968AC4F01C), new AdvancedBlendEntry(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xC2D05E2DBE16955D, 0xB8659C7A3FCFA7CE), new AdvancedBlendEntry(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x223F220B8F74CBFB, 0xD3DD19D7C39209A5), new AdvancedBlendEntry(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(2f, 2f, 2f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xD0DAE57A9F1FE78A, 0x353796BCFB8CE30B), new AdvancedBlendEntry(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x601C8CBEC07FF8FF, 0xB8E22882360E8695), new AdvancedBlendEntry(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x3A55B7B78C76A7A8, 0x206F503B2D9FFEAA), new AdvancedBlendEntry(AdvancedBlendOp.Red, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x80BC65C7831388E5, 0xC652457B2C766AEC), new AdvancedBlendEntry(AdvancedBlendOp.Green, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x3D3A912E5833EE13, 0x307895951349EE33), new AdvancedBlendEntry(AdvancedBlendOp.Blue, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x289105BE92E81803, 0xFD8F1F03D15C53B4), new AdvancedBlendEntry(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x007AE3BD140764EB, 0x0EE05A0D2E80BBAE), new AdvancedBlendEntry(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x77F7EE0DB3FDDB96, 0xDEA47C881306DB3E), new AdvancedBlendEntry(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x66F4E9A7D73CA157, 0x1486058A177DB11C), new AdvancedBlendEntry(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x593E9F331612D618, 0x9D217BEFA4EB919A), new AdvancedBlendEntry(AdvancedBlendOp.Src, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) },
+ { new Hash128(0x0A5194C5E6891106, 0xDD8EC6586106557C), new AdvancedBlendEntry(AdvancedBlendOp.Dst, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x8D77173D5E06E916, 0x06AB190E7D10F4D4), new AdvancedBlendEntry(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x655B4EBC148981DA, 0x455999EF2B9BD28A), new AdvancedBlendEntry(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x98F5437D5F518929, 0xBFF4A6E83183DB63), new AdvancedBlendEntry(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x6ADDEFE3B9CEF2FD, 0xB6F6272AFECB1AAB), new AdvancedBlendEntry(AdvancedBlendOp.DstIn, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x80953F0953BF05B1, 0xD59ABFAA34F8196F), new AdvancedBlendEntry(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xA401D9AA2A39C121, 0xFC0C8005C22AD7E3), new AdvancedBlendEntry(AdvancedBlendOp.DstOut, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x06274FB7CA9CDD22, 0x6CE8188B1A9AB6EF), new AdvancedBlendEntry(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x0B079BE7F7F70817, 0xB72E7736CA51E321), new AdvancedBlendEntry(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) },
+ { new Hash128(0x66215C99403CEDDE, 0x900B733D62204C48), new AdvancedBlendEntry(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x12DEF2AD900CAD6C, 0x58CF5CC3004910DF), new AdvancedBlendEntry(AdvancedBlendOp.Plus, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x272BA3A49F64DAE4, 0xAC70B96C00A99EAF), new AdvancedBlendEntry(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x206C34AAA7D3F545, 0xDA4B30CACAA483A0), new AdvancedBlendEntry(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x3D93494920D257BE, 0xDCC573BE1F5F4449), new AdvancedBlendEntry(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x0D7417D80191107B, 0xEAF40547827E005F), new AdvancedBlendEntry(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xEC1B03E8C883F9C9, 0x2D3CA044C58C01B4), new AdvancedBlendEntry(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x58A19A0135D68B31, 0x82F35B97AED068E5), new AdvancedBlendEntry(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x20489F9AB36CC0E3, 0x20499874219E35EE), new AdvancedBlendEntry(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xBB176935E5EE05BF, 0x95B26D4D30EA7A14), new AdvancedBlendEntry(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x5FF9393C908ACFED, 0x068B0BD875773ABF), new AdvancedBlendEntry(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.2605f, 0.2605f, 0.2605f), new RgbFloat(-0.7817f, -0.7817f, -0.7817f), new RgbFloat(0.3022f, 0.3022f, 0.3022f), new RgbFloat(0.2192f, 0.2192f, 0.2192f), new RgbFloat(0.25f, 0.25f, 0.25f), new RgbFloat(16f, 16f, 16f), new RgbFloat(12f, 12f, 12f), new RgbFloat(3f, 3f, 3f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x03181F8711C9802C, 0x6B02C7C6B224FE7B), new AdvancedBlendEntry(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x2EE2209021F6B977, 0xF3AFA1491B8B89FC), new AdvancedBlendEntry(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xD8BA4DD2EDE4DC9E, 0x01006114977CF715), new AdvancedBlendEntry(AdvancedBlendOp.Invert, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0xD156B99835A2D8ED, 0x2D0BEE9E135EA7A7), new AdvancedBlendEntry(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x20CE8C898ED4BE27, 0x1514900B6F5E8F66), new AdvancedBlendEntry(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xCDE5F743820BA2D9, 0x917845FE2ECB083D), new AdvancedBlendEntry(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xEB03DF4A0C1D14CD, 0xBAE2E831C6E8FFE4), new AdvancedBlendEntry(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x1DC9E49AABC779AC, 0x4053A1441EB713D3), new AdvancedBlendEntry(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(2f, 2f, 2f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xFBDEF776248F7B3E, 0xE05EEFD65AC47CB7), new AdvancedBlendEntry(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x415A1A48E03AA6E7, 0x046D7EE33CA46B9A), new AdvancedBlendEntry(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Disjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x59A6901EC9BB2041, 0x2F3E19CE5EEC3EBE), new AdvancedBlendEntry(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x044B2B6E105221DA, 0x3089BBC033F994AF), new AdvancedBlendEntry(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x374A5A24AA8E6CC5, 0x29930FAA6215FA2B), new AdvancedBlendEntry(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x30CD0F7AF0CF26F9, 0x06CCA6744DE7DCF5), new AdvancedBlendEntry(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x1A6C9A1F6FE494A5, 0xA0CFAF77617E54DD), new AdvancedBlendEntry(AdvancedBlendOp.Src, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) },
+ { new Hash128(0x081AF6DAAB1C8717, 0xBFEDCE59AE3DC9AC), new AdvancedBlendEntry(AdvancedBlendOp.Dst, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x3518E44573AB68BA, 0xC96EE71AF9F8F546), new AdvancedBlendEntry(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0xF89E81FE8D73C96F, 0x4583A04577A0F21C), new AdvancedBlendEntry(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0xDF4026421CB61119, 0x14115A1F5139AFC7), new AdvancedBlendEntry(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MinimumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x91A20262C3E3A695, 0x0B3A102BFCDC6B1C), new AdvancedBlendEntry(AdvancedBlendOp.DstIn, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MinimumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x44F4C7CCFEB9EBFA, 0xF68394E6D56E5C2F), new AdvancedBlendEntry(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xB89F17C7021E9760, 0x430357EE0F7188EF), new AdvancedBlendEntry(AdvancedBlendOp.DstOut, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xDA2D20EA4242B8A0, 0x0D1EC05B72E3838F), new AdvancedBlendEntry(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x855DFEE1208D11B9, 0x77C6E3DDCFE30B85), new AdvancedBlendEntry(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) },
+ { new Hash128(0x9B3808439683FD58, 0x123DCBE4705AB25E), new AdvancedBlendEntry(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xA42CF045C248A00A, 0x0C6C63C24EA0B0C1), new AdvancedBlendEntry(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x320A83B6D00C8059, 0x796EDAB3EB7314BC), new AdvancedBlendEntry(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x45253AC9ABFFC613, 0x8F92EA70195FB573), new AdvancedBlendEntry(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x1A5D263B588274B6, 0x167D305F6C794179), new AdvancedBlendEntry(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x709C1A837FE966AC, 0x75D8CE49E8A78EDB), new AdvancedBlendEntry(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x8265C26F85E4145F, 0x932E6CCBF37CB600), new AdvancedBlendEntry(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x3F252B3FEF983F27, 0x9370D7EEFEFA1A9E), new AdvancedBlendEntry(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x66A334A4AEA41078, 0xCB52254E1E395231), new AdvancedBlendEntry(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0xFDD05C53B25F0035, 0xB7E3ECEE166C222F), new AdvancedBlendEntry(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.2605f, 0.2605f, 0.2605f), new RgbFloat(-0.7817f, -0.7817f, -0.7817f), new RgbFloat(0.3022f, 0.3022f, 0.3022f), new RgbFloat(0.2192f, 0.2192f, 0.2192f), new RgbFloat(0.25f, 0.25f, 0.25f), new RgbFloat(16f, 16f, 16f), new RgbFloat(12f, 12f, 12f), new RgbFloat(3f, 3f, 3f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x25D932A77FFED81A, 0xA50D797B0FCA94E8), new AdvancedBlendEntry(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x4A953B6F5F7D341C, 0xDC05CFB50DDB5DC1), new AdvancedBlendEntry(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x838CB660C4F41F6D, 0x9E7D958697543495), new AdvancedBlendEntry(AdvancedBlendOp.Invert, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x4DF6EC1348A8F797, 0xA128E0CD69DB5A64), new AdvancedBlendEntry(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x178CDFAB9A015295, 0x2BF40EA72E596D57), new AdvancedBlendEntry(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x338FC99050E56AFD, 0x2AF41CF82BE602BF), new AdvancedBlendEntry(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x62E02ED60D1E978E, 0xBF726B3E68C11E4D), new AdvancedBlendEntry(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0xFBAF92DD4C101502, 0x7AF2EDA6596B819D), new AdvancedBlendEntry(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(2f, 2f, 2f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x0EF1241F65D4B50A, 0xE8D85DFA6AEDDB84), new AdvancedBlendEntry(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x77FE024B5C9D4A18, 0xF19D48A932F6860F), new AdvancedBlendEntry(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Conjoint, true, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x9C88CBFA2E09D857, 0x0A0361704CBEEE1D), new AdvancedBlendEntry(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x5B94127FA190E640, 0x8D1FEFF837A91268), new AdvancedBlendEntry(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0xB9C9105B7E063DDB, 0xF6A70E1D511B96FD), new AdvancedBlendEntry(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0xF0751AAE332B3ED1, 0xC40146F5C83C2533), new AdvancedBlendEntry(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x579EB12F595F75AD, 0x151BF0504703B81B), new AdvancedBlendEntry(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xF9CA152C03AC8C62, 0x1581336205E5CF47), new AdvancedBlendEntry(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.DstAlphaGl, BlendFactor.ZeroGl)) },
+ { new Hash128(0x98ACD8BB5E195D0F, 0x91F937672BE899F0), new AdvancedBlendEntry(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneMinusDstAlphaGl, BlendFactor.ZeroGl)) },
+ { new Hash128(0xBF97F10FC301F44C, 0x75721789F0D48548), new AdvancedBlendEntry(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x1B982263B8B08A10, 0x3350C76E2E1B27DF), new AdvancedBlendEntry(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) },
+ { new Hash128(0xFF20AC79F64EDED8, 0xAF9025B2D97B9273), new AdvancedBlendEntry(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneMinusDstAlphaGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x9FFD986600FB112F, 0x384FDDF4E060139A), new AdvancedBlendEntry(AdvancedBlendOp.PlusClamped, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x0425E40B5B8B3B52, 0x5880CBED7CAB631C), new AdvancedBlendEntry(AdvancedBlendOp.PlusClampedAlpha, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x16DAC8593F28623A, 0x233DBC82325B8AED), new AdvancedBlendEntry(AdvancedBlendOp.PlusDarker, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xB37E5F234B9F0948, 0xD5F957A2ECD98FD6), new AdvancedBlendEntry(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xCA0FDADD1D20DBE3, 0x1A5C15CCBF1AC538), new AdvancedBlendEntry(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x1C48304D73A9DF3A, 0x891DB93FA36E3450), new AdvancedBlendEntry(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x53200F2279B7FA39, 0x051C2462EBF6789C), new AdvancedBlendEntry(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xB88BFB80714DCD5C, 0xEBD6938D744E6A41), new AdvancedBlendEntry(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xE33DC2A25FC1A976, 0x08B3DBB1F3027D45), new AdvancedBlendEntry(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xCE97E71615370316, 0xE131AE49D3A4D62B), new AdvancedBlendEntry(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xE059FD265149B256, 0x94AF817AC348F61F), new AdvancedBlendEntry(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x16D31333D477E231, 0x9A98AAC84F72CC62), new AdvancedBlendEntry(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.2605f, 0.2605f, 0.2605f), new RgbFloat(-0.7817f, -0.7817f, -0.7817f), new RgbFloat(0.3022f, 0.3022f, 0.3022f), new RgbFloat(0.2192f, 0.2192f, 0.2192f), new RgbFloat(0.25f, 0.25f, 0.25f), new RgbFloat(16f, 16f, 16f), new RgbFloat(12f, 12f, 12f), new RgbFloat(3f, 3f, 3f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x47FC3B0776366D3C, 0xE96D9BD83B277874), new AdvancedBlendEntry(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x7230401E3FEA1F3B, 0xF0D15F05D3D1E309), new AdvancedBlendEntry(AdvancedBlendOp.Minus, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.ReverseSubtractGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x188212F9303742F5, 0x100C51CB96E03591), new AdvancedBlendEntry(AdvancedBlendOp.MinusClamped, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x52B755D296B44DC5, 0x4003B87275625973), new AdvancedBlendEntry(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xD873ED973ADF7EAD, 0x73E68B57D92034E7), new AdvancedBlendEntry(AdvancedBlendOp.Contrast, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(2f, 2f, 2f), new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x471F9FA34B945ACB, 0x10524D1410B3C402), new AdvancedBlendEntry(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x99F569454EA0EF32, 0x6FC70A8B3A07DC8B), new AdvancedBlendEntry(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x5AD55F950067AC7E, 0x4BA60A4FBABDD0AC), new AdvancedBlendEntry(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x03FF2C858C9C4C5B, 0xE95AE7F561FB60E9), new AdvancedBlendEntry(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x6DC0E510C7BCF9D2, 0xAE805D7CECDCB5C1), new AdvancedBlendEntry(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(2f, 2f, 2f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x44832332CED5C054, 0x2F8D5536C085B30A), new AdvancedBlendEntry(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x4AB4D387618AC51F, 0x495B46E0555F4B32), new AdvancedBlendEntry(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x99282B49405A01A8, 0xD6FA93F864F24A8E), new AdvancedBlendEntry(AdvancedBlendOp.Red, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x37B30C1064FBD23E, 0x5D068366F42317C2), new AdvancedBlendEntry(AdvancedBlendOp.Green, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x760FAE9D59E04BC2, 0xA40AD483EA01435E), new AdvancedBlendEntry(AdvancedBlendOp.Blue, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0xE786950FD9D1C6EF, 0xF9FDD5AF6451D239), new AdvancedBlendEntry(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x052458BB4788B0CA, 0x8AC58FDCA1F45EF5), new AdvancedBlendEntry(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x6AFC3837D1D31920, 0xB9D49C2FE49642C6), new AdvancedBlendEntry(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0xAFC2911949317E01, 0xD5B63636F5CB3422), new AdvancedBlendEntry(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) },
+ { new Hash128(0x13B46DF507CC2C53, 0x86DE26517E6BF0A7), new AdvancedBlendEntry(AdvancedBlendOp.Src, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) },
+ { new Hash128(0x5C372442474BE410, 0x79ECD3C0C496EF2E), new AdvancedBlendEntry(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x74AAB45DBF5336E9, 0x01BFC4E181DAD442), new AdvancedBlendEntry(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x43239E282A36C85C, 0x36FB65560E46AD0F), new AdvancedBlendEntry(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x1A3BA8A7583B8F7A, 0xE64E41D548033180), new AdvancedBlendEntry(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x32BBB9859E9B565D, 0x3D5CE94FE55F18B5), new AdvancedBlendEntry(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0xD947A0766AE3C0FC, 0x391E5D53E86F4ED6), new AdvancedBlendEntry(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) },
+ { new Hash128(0xBD9A7C08BDFD8CE6, 0x905407634901355E), new AdvancedBlendEntry(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x8395475BCB0D7A8C, 0x48AF5DD501D44A70), new AdvancedBlendEntry(AdvancedBlendOp.Plus, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x80AAC23FEBD4A3E5, 0xEA8C70F0B4DE52DE), new AdvancedBlendEntry(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x2F3AD1B0F1B3FD09, 0xC0EBC784BFAB8EA3), new AdvancedBlendEntry(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x52B54032F2F70BFF, 0xC941D6FDED674765), new AdvancedBlendEntry(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xCA7B86F72EC6A99B, 0x55868A131AFE359E), new AdvancedBlendEntry(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x377919B60BD133CA, 0x0FD611627664EF40), new AdvancedBlendEntry(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x9D4A0C5EE1153887, 0x7B869EBA218C589B), new AdvancedBlendEntry(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x311F2A858545D123, 0xB4D09C802480AD62), new AdvancedBlendEntry(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xCF78AA6A83AFA689, 0x9DC48B0C2182A3E1), new AdvancedBlendEntry(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xC3018CD6F1CF62D1, 0x016E32DD9087B1BB), new AdvancedBlendEntry(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.2605f, 0.2605f, 0.2605f), new RgbFloat(-0.7817f, -0.7817f, -0.7817f), new RgbFloat(0.3022f, 0.3022f, 0.3022f), new RgbFloat(0.2192f, 0.2192f, 0.2192f), new RgbFloat(0.25f, 0.25f, 0.25f), new RgbFloat(16f, 16f, 16f), new RgbFloat(12f, 12f, 12f), new RgbFloat(3f, 3f, 3f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x9CB62CE0E956EE29, 0x0FB67F503E60B3AD), new AdvancedBlendEntry(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x3589A13C16EF3BFA, 0x15B29BFC91F3BDFB), new AdvancedBlendEntry(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x3502CA5FB7529917, 0xFA51BFD0D1688071), new AdvancedBlendEntry(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) },
+ { new Hash128(0x62ADC25AD6D0A923, 0x76CB6D238276D3A3), new AdvancedBlendEntry(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x09FDEB1116A9D52C, 0x85BB8627CD5C2733), new AdvancedBlendEntry(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x0709FED1B65E18EB, 0x5BC3AA4D99EC19CF), new AdvancedBlendEntry(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xB18D28AE5DE4C723, 0xE820AA2B75C9C02E), new AdvancedBlendEntry(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(2f, 2f, 2f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x6743C51621497480, 0x4B164E40858834AE), new AdvancedBlendEntry(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x63D1E181E34A2944, 0x1AE292C9D9F12819), new AdvancedBlendEntry(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Disjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x079523298250BFF6, 0xC0C793510603CDB5), new AdvancedBlendEntry(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x4C9D0A973C805EA6, 0xD1FF59AD5156B93C), new AdvancedBlendEntry(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x1E914678F3057BCD, 0xD503AE389C12D229), new AdvancedBlendEntry(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0x9FDBADE5556C5311, 0x03F0CBC798FC5C94), new AdvancedBlendEntry(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) },
+ { new Hash128(0xE39451534635403C, 0x606CC1CA1F452388), new AdvancedBlendEntry(AdvancedBlendOp.Src, AdvancedBlendOverlap.Conjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) },
+ { new Hash128(0x1D39F0F0A1008AA6, 0xBFDF2B97E6C3F125), new AdvancedBlendEntry(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Conjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0xDB81BED30D5BDBEA, 0xAF0B2856EB93AD2C), new AdvancedBlendEntry(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Conjoint, false, Array.Empty(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) },
+ { new Hash128(0x83F69CCF1D0A79B6, 0x70D31332797430AC), new AdvancedBlendEntry(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Conjoint, false, Array.Empty