diff options
| author | TSR Berry <20988865+TSRBerry@users.noreply.github.com> | 2023-04-08 01:22:00 +0200 |
|---|---|---|
| committer | Mary <thog@protonmail.com> | 2023-04-27 23:51:14 +0200 |
| commit | cee712105850ac3385cd0091a923438167433f9f (patch) | |
| tree | 4a5274b21d8b7f938c0d0ce18736d3f2993b11b1 /src/Ryujinx.Graphics.Gpu/Engine | |
| parent | cd124bda587ef09668a971fa1cac1c3f0cfc9f21 (diff) | |
Move solution and projects to src
Diffstat (limited to 'src/Ryujinx.Graphics.Gpu/Engine')
60 files changed, 17424 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs new file mode 100644 index 00000000..2ac738fd --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs @@ -0,0 +1,219 @@ +using Ryujinx.Graphics.Device; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.InlineToMemory; +using Ryujinx.Graphics.Gpu.Engine.Threed; +using Ryujinx.Graphics.Gpu.Engine.Types; +using Ryujinx.Graphics.Gpu.Image; +using Ryujinx.Graphics.Gpu.Shader; +using Ryujinx.Graphics.Shader; +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Gpu.Engine.Compute +{ + /// <summary> + /// Represents a compute engine class. + /// </summary> + class ComputeClass : IDeviceState + { + private readonly GpuContext _context; + private readonly GpuChannel _channel; + private readonly ThreedClass _3dEngine; + private readonly DeviceState<ComputeClassState> _state; + + private readonly InlineToMemoryClass _i2mClass; + + /// <summary> + /// Creates a new instance of the compute engine class. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="channel">GPU channel</param> + /// <param name="threedEngine">3D engine</param> + public ComputeClass(GpuContext context, GpuChannel channel, ThreedClass threedEngine) + { + _context = context; + _channel = channel; + _3dEngine = threedEngine; + _state = new DeviceState<ComputeClassState>(new Dictionary<string, RwCallback> + { + { nameof(ComputeClassState.LaunchDma), new RwCallback(LaunchDma, null) }, + { nameof(ComputeClassState.LoadInlineData), new RwCallback(LoadInlineData, null) }, + { nameof(ComputeClassState.SendSignalingPcasB), new RwCallback(SendSignalingPcasB, null) } + }); + + _i2mClass = new InlineToMemoryClass(context, channel, initializeState: false); + } + + /// <summary> + /// Reads data from the class registers. + /// </summary> + /// <param name="offset">Register byte offset</param> + /// <returns>Data at the specified offset</returns> + public int Read(int offset) => _state.Read(offset); + + /// <summary> + /// Writes data to the class registers. + /// </summary> + /// <param name="offset">Register byte offset</param> + /// <param name="data">Data to be written</param> + public void Write(int offset, int data) => _state.Write(offset, data); + + /// <summary> + /// Launches the Inline-to-Memory DMA copy operation. + /// </summary> + /// <param name="argument">Method call argument</param> + private void LaunchDma(int argument) + { + _i2mClass.LaunchDma(ref Unsafe.As<ComputeClassState, InlineToMemoryClassState>(ref _state.State), argument); + } + + /// <summary> + /// Pushes a block of data to the Inline-to-Memory engine. + /// </summary> + /// <param name="data">Data to push</param> + public void LoadInlineData(ReadOnlySpan<int> data) + { + _i2mClass.LoadInlineData(data); + } + + /// <summary> + /// Pushes a word of data to the Inline-to-Memory engine. + /// </summary> + /// <param name="argument">Method call argument</param> + private void LoadInlineData(int argument) + { + _i2mClass.LoadInlineData(argument); + } + + /// <summary> + /// Performs the compute dispatch operation. + /// </summary> + /// <param name="argument">Method call argument</param> + private void SendSignalingPcasB(int argument) + { + var memoryManager = _channel.MemoryManager; + + // Since we're going to change the state, make sure any pending instanced draws are done. + _3dEngine.PerformDeferredDraws(); + + // Make sure all pending uniform buffer data is written to memory. + _3dEngine.FlushUboDirty(); + + uint qmdAddress = _state.State.SendPcasA; + + var qmd = _channel.MemoryManager.Read<ComputeQmd>((ulong)qmdAddress << 8); + + ulong shaderGpuVa = ((ulong)_state.State.SetProgramRegionAAddressUpper << 32) | _state.State.SetProgramRegionB; + + shaderGpuVa += (uint)qmd.ProgramOffset; + + int localMemorySize = qmd.ShaderLocalMemoryLowSize + qmd.ShaderLocalMemoryHighSize; + + int sharedMemorySize = Math.Min(qmd.SharedMemorySize, _context.Capabilities.MaximumComputeSharedMemorySize); + + for (int index = 0; index < Constants.TotalCpUniformBuffers; index++) + { + if (!qmd.ConstantBufferValid(index)) + { + continue; + } + + ulong gpuVa = (uint)qmd.ConstantBufferAddrLower(index) | (ulong)qmd.ConstantBufferAddrUpper(index) << 32; + ulong size = (ulong)qmd.ConstantBufferSize(index); + + _channel.BufferManager.SetComputeUniformBuffer(index, gpuVa, size); + } + + ulong samplerPoolGpuVa = ((ulong)_state.State.SetTexSamplerPoolAOffsetUpper << 32) | _state.State.SetTexSamplerPoolB; + ulong texturePoolGpuVa = ((ulong)_state.State.SetTexHeaderPoolAOffsetUpper << 32) | _state.State.SetTexHeaderPoolB; + + GpuChannelPoolState poolState = new GpuChannelPoolState( + texturePoolGpuVa, + _state.State.SetTexHeaderPoolCMaximumIndex, + _state.State.SetBindlessTextureConstantBufferSlotSelect); + + GpuChannelComputeState computeState = new GpuChannelComputeState( + qmd.CtaThreadDimension0, + qmd.CtaThreadDimension1, + qmd.CtaThreadDimension2, + localMemorySize, + sharedMemorySize, + _channel.BufferManager.HasUnalignedStorageBuffers); + + CachedShaderProgram cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa); + + _context.Renderer.Pipeline.SetProgram(cs.HostProgram); + + _channel.TextureManager.SetComputeSamplerPool(samplerPoolGpuVa, _state.State.SetTexSamplerPoolCMaximumIndex, qmd.SamplerIndex); + _channel.TextureManager.SetComputeTexturePool(texturePoolGpuVa, _state.State.SetTexHeaderPoolCMaximumIndex); + _channel.TextureManager.SetComputeTextureBufferIndex(_state.State.SetBindlessTextureConstantBufferSlotSelect); + + ShaderProgramInfo info = cs.Shaders[0].Info; + + bool hasUnaligned = _channel.BufferManager.HasUnalignedStorageBuffers; + + for (int index = 0; index < info.SBuffers.Count; index++) + { + BufferDescriptor sb = info.SBuffers[index]; + + ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0); + + int sbDescOffset = 0x310 + sb.Slot * 0x10; + + sbDescAddress += (ulong)sbDescOffset; + + SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress); + + _channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags); + } + + if ((_channel.BufferManager.HasUnalignedStorageBuffers) != hasUnaligned) + { + // Refetch the shader, as assumptions about storage buffer alignment have changed. + cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa); + + _context.Renderer.Pipeline.SetProgram(cs.HostProgram); + + info = cs.Shaders[0].Info; + } + + for (int index = 0; index < info.CBuffers.Count; index++) + { + BufferDescriptor cb = info.CBuffers[index]; + + // NVN uses the "hardware" constant buffer for anything that is less than 8, + // and those are already bound above. + // Anything greater than or equal to 8 uses the emulated constant buffers. + // They are emulated using global memory loads. + if (cb.Slot < 8) + { + continue; + } + + ulong cbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0); + + int cbDescOffset = 0x260 + (cb.Slot - 8) * 0x10; + + cbDescAddress += (ulong)cbDescOffset; + + SbDescriptor cbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(cbDescAddress); + + _channel.BufferManager.SetComputeUniformBuffer(cb.Slot, cbDescriptor.PackAddress(), (uint)cbDescriptor.Size); + } + + _channel.BufferManager.SetComputeBufferBindings(cs.Bindings); + + _channel.TextureManager.SetComputeBindings(cs.Bindings); + + // Should never return false for mismatching spec state, since the shader was fetched above. + _channel.TextureManager.CommitComputeBindings(cs.SpecializationState); + + _channel.BufferManager.CommitComputeBindings(); + + _context.Renderer.Pipeline.DispatchCompute(qmd.CtaRasterWidth, qmd.CtaRasterHeight, qmd.CtaRasterDepth); + + _3dEngine.ForceShaderUpdate(); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClassState.cs b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClassState.cs new file mode 100644 index 00000000..5d81de5d --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClassState.cs @@ -0,0 +1,435 @@ +// This file was auto-generated from NVIDIA official Maxwell definitions. + +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Gpu.Engine.InlineToMemory; + +namespace Ryujinx.Graphics.Gpu.Engine.Compute +{ + /// <summary> + /// Notify type. + /// </summary> + enum NotifyType + { + WriteOnly = 0, + WriteThenAwaken = 1, + } + + /// <summary> + /// CWD control SM selection. + /// </summary> + enum SetCwdControlSmSelection + { + LoadBalanced = 0, + RoundRobin = 1, + } + + /// <summary> + /// Cache lines to invalidate. + /// </summary> + enum InvalidateCacheLines + { + All = 0, + One = 1, + } + + /// <summary> + /// GWC SCG type. + /// </summary> + enum SetGwcScgTypeScgType + { + GraphicsCompute0 = 0, + Compute1 = 1, + } + + /// <summary> + /// Render enable override mode. + /// </summary> + enum SetRenderEnableOverrideMode + { + UseRenderEnable = 0, + AlwaysRender = 1, + NeverRender = 2, + } + + /// <summary> + /// Semaphore report operation. + /// </summary> + enum SetReportSemaphoreDOperation + { + Release = 0, + Trap = 3, + } + + /// <summary> + /// Semaphore report structure size. + /// </summary> + enum SetReportSemaphoreDStructureSize + { + FourWords = 0, + OneWord = 1, + } + + /// <summary> + /// Semaphore report reduction operation. + /// </summary> + enum SetReportSemaphoreDReductionOp + { + RedAdd = 0, + RedMin = 1, + RedMax = 2, + RedInc = 3, + RedDec = 4, + RedAnd = 5, + RedOr = 6, + RedXor = 7, + } + + /// <summary> + /// Semaphore report reduction format. + /// </summary> + enum SetReportSemaphoreDReductionFormat + { + Unsigned32 = 0, + Signed32 = 1, + } + + /// <summary> + /// Compute class state. + /// </summary> + unsafe struct ComputeClassState + { +#pragma warning disable CS0649 + public uint SetObject; + public int SetObjectClassId => (int)((SetObject >> 0) & 0xFFFF); + public int SetObjectEngineId => (int)((SetObject >> 16) & 0x1F); + public fixed uint Reserved04[63]; + public uint NoOperation; + public uint SetNotifyA; + public int SetNotifyAAddressUpper => (int)((SetNotifyA >> 0) & 0xFF); + public uint SetNotifyB; + public uint Notify; + public NotifyType NotifyType => (NotifyType)(Notify); + public uint WaitForIdle; + public fixed uint Reserved114[7]; + public uint SetGlobalRenderEnableA; + public int SetGlobalRenderEnableAOffsetUpper => (int)((SetGlobalRenderEnableA >> 0) & 0xFF); + public uint SetGlobalRenderEnableB; + public uint SetGlobalRenderEnableC; + public int SetGlobalRenderEnableCMode => (int)((SetGlobalRenderEnableC >> 0) & 0x7); + public uint SendGoIdle; + public uint PmTrigger; + public uint PmTriggerWfi; + public fixed uint Reserved148[2]; + public uint SetInstrumentationMethodHeader; + public uint SetInstrumentationMethodData; + public fixed uint Reserved158[10]; + public uint LineLengthIn; + public uint LineCount; + public uint OffsetOutUpper; + public int OffsetOutUpperValue => (int)((OffsetOutUpper >> 0) & 0xFF); + public uint OffsetOut; + public uint PitchOut; + public uint SetDstBlockSize; + public SetDstBlockSizeWidth SetDstBlockSizeWidth => (SetDstBlockSizeWidth)((SetDstBlockSize >> 0) & 0xF); + public SetDstBlockSizeHeight SetDstBlockSizeHeight => (SetDstBlockSizeHeight)((SetDstBlockSize >> 4) & 0xF); + public SetDstBlockSizeDepth SetDstBlockSizeDepth => (SetDstBlockSizeDepth)((SetDstBlockSize >> 8) & 0xF); + public uint SetDstWidth; + public uint SetDstHeight; + public uint SetDstDepth; + public uint SetDstLayer; + public uint SetDstOriginBytesX; + public int SetDstOriginBytesXV => (int)((SetDstOriginBytesX >> 0) & 0xFFFFF); + public uint SetDstOriginSamplesY; + public int SetDstOriginSamplesYV => (int)((SetDstOriginSamplesY >> 0) & 0xFFFF); + public uint LaunchDma; + public LaunchDmaDstMemoryLayout LaunchDmaDstMemoryLayout => (LaunchDmaDstMemoryLayout)((LaunchDma >> 0) & 0x1); + public LaunchDmaCompletionType LaunchDmaCompletionType => (LaunchDmaCompletionType)((LaunchDma >> 4) & 0x3); + public LaunchDmaInterruptType LaunchDmaInterruptType => (LaunchDmaInterruptType)((LaunchDma >> 8) & 0x3); + public LaunchDmaSemaphoreStructSize LaunchDmaSemaphoreStructSize => (LaunchDmaSemaphoreStructSize)((LaunchDma >> 12) & 0x1); + public bool LaunchDmaReductionEnable => (LaunchDma & 0x2) != 0; + public LaunchDmaReductionOp LaunchDmaReductionOp => (LaunchDmaReductionOp)((LaunchDma >> 13) & 0x7); + public LaunchDmaReductionFormat LaunchDmaReductionFormat => (LaunchDmaReductionFormat)((LaunchDma >> 2) & 0x3); + public bool LaunchDmaSysmembarDisable => (LaunchDma & 0x40) != 0; + public uint LoadInlineData; + public fixed uint Reserved1B8[9]; + public uint SetI2mSemaphoreA; + public int SetI2mSemaphoreAOffsetUpper => (int)((SetI2mSemaphoreA >> 0) & 0xFF); + public uint SetI2mSemaphoreB; + public uint SetI2mSemaphoreC; + public fixed uint Reserved1E8[2]; + public uint SetI2mSpareNoop00; + public uint SetI2mSpareNoop01; + public uint SetI2mSpareNoop02; + public uint SetI2mSpareNoop03; + public uint SetValidSpanOverflowAreaA; + public int SetValidSpanOverflowAreaAAddressUpper => (int)((SetValidSpanOverflowAreaA >> 0) & 0xFF); + public uint SetValidSpanOverflowAreaB; + public uint SetValidSpanOverflowAreaC; + public uint SetCoalesceWaitingPeriodUnit; + public uint PerfmonTransfer; + public uint SetShaderSharedMemoryWindow; + public uint SetSelectMaxwellTextureHeaders; + public bool SetSelectMaxwellTextureHeadersV => (SetSelectMaxwellTextureHeaders & 0x1) != 0; + public uint InvalidateShaderCaches; + public bool InvalidateShaderCachesInstruction => (InvalidateShaderCaches & 0x1) != 0; + public bool InvalidateShaderCachesData => (InvalidateShaderCaches & 0x10) != 0; + public bool InvalidateShaderCachesConstant => (InvalidateShaderCaches & 0x1000) != 0; + public bool InvalidateShaderCachesLocks => (InvalidateShaderCaches & 0x2) != 0; + public bool InvalidateShaderCachesFlushData => (InvalidateShaderCaches & 0x4) != 0; + public uint SetReservedSwMethod00; + public uint SetReservedSwMethod01; + public uint SetReservedSwMethod02; + public uint SetReservedSwMethod03; + public uint SetReservedSwMethod04; + public uint SetReservedSwMethod05; + public uint SetReservedSwMethod06; + public uint SetReservedSwMethod07; + public uint SetCwdControl; + public SetCwdControlSmSelection SetCwdControlSmSelection => (SetCwdControlSmSelection)((SetCwdControl >> 0) & 0x1); + public uint InvalidateTextureHeaderCacheNoWfi; + public InvalidateCacheLines InvalidateTextureHeaderCacheNoWfiLines => (InvalidateCacheLines)((InvalidateTextureHeaderCacheNoWfi >> 0) & 0x1); + public int InvalidateTextureHeaderCacheNoWfiTag => (int)((InvalidateTextureHeaderCacheNoWfi >> 4) & 0x3FFFFF); + public uint SetCwdRefCounter; + public int SetCwdRefCounterSelect => (int)((SetCwdRefCounter >> 0) & 0x3F); + public int SetCwdRefCounterValue => (int)((SetCwdRefCounter >> 8) & 0xFFFF); + public uint SetReservedSwMethod08; + public uint SetReservedSwMethod09; + public uint SetReservedSwMethod10; + public uint SetReservedSwMethod11; + public uint SetReservedSwMethod12; + public uint SetReservedSwMethod13; + public uint SetReservedSwMethod14; + public uint SetReservedSwMethod15; + public uint SetGwcScgType; + public SetGwcScgTypeScgType SetGwcScgTypeScgType => (SetGwcScgTypeScgType)((SetGwcScgType >> 0) & 0x1); + public uint SetScgControl; + public int SetScgControlCompute1MaxSmCount => (int)((SetScgControl >> 0) & 0x1FF); + public uint InvalidateConstantBufferCacheA; + public int InvalidateConstantBufferCacheAAddressUpper => (int)((InvalidateConstantBufferCacheA >> 0) & 0xFF); + public uint InvalidateConstantBufferCacheB; + public uint InvalidateConstantBufferCacheC; + public int InvalidateConstantBufferCacheCByteCount => (int)((InvalidateConstantBufferCacheC >> 0) & 0x1FFFF); + public bool InvalidateConstantBufferCacheCThruL2 => (InvalidateConstantBufferCacheC & 0x80000000) != 0; + public uint SetComputeClassVersion; + public int SetComputeClassVersionCurrent => (int)((SetComputeClassVersion >> 0) & 0xFFFF); + public int SetComputeClassVersionOldestSupported => (int)((SetComputeClassVersion >> 16) & 0xFFFF); + public uint CheckComputeClassVersion; + public int CheckComputeClassVersionCurrent => (int)((CheckComputeClassVersion >> 0) & 0xFFFF); + public int CheckComputeClassVersionOldestSupported => (int)((CheckComputeClassVersion >> 16) & 0xFFFF); + public uint SetQmdVersion; + public int SetQmdVersionCurrent => (int)((SetQmdVersion >> 0) & 0xFFFF); + public int SetQmdVersionOldestSupported => (int)((SetQmdVersion >> 16) & 0xFFFF); + public uint SetWfiConfig; + public bool SetWfiConfigEnableScgTypeWfi => (SetWfiConfig & 0x1) != 0; + public uint CheckQmdVersion; + public int CheckQmdVersionCurrent => (int)((CheckQmdVersion >> 0) & 0xFFFF); + public int CheckQmdVersionOldestSupported => (int)((CheckQmdVersion >> 16) & 0xFFFF); + public uint WaitForIdleScgType; + public uint InvalidateSkedCaches; + public bool InvalidateSkedCachesV => (InvalidateSkedCaches & 0x1) != 0; + public uint SetScgRenderEnableControl; + public bool SetScgRenderEnableControlCompute1UsesRenderEnable => (SetScgRenderEnableControl & 0x1) != 0; + public fixed uint Reserved2A0[4]; + public uint SetCwdSlotCount; + public int SetCwdSlotCountV => (int)((SetCwdSlotCount >> 0) & 0xFF); + public uint SendPcasA; + public uint SendPcasB; + public int SendPcasBFrom => (int)((SendPcasB >> 0) & 0xFFFFFF); + public int SendPcasBDelta => (int)((SendPcasB >> 24) & 0xFF); + public uint SendSignalingPcasB; + public bool SendSignalingPcasBInvalidate => (SendSignalingPcasB & 0x1) != 0; + public bool SendSignalingPcasBSchedule => (SendSignalingPcasB & 0x2) != 0; + public fixed uint Reserved2C0[9]; + public uint SetShaderLocalMemoryNonThrottledA; + public int SetShaderLocalMemoryNonThrottledASizeUpper => (int)((SetShaderLocalMemoryNonThrottledA >> 0) & 0xFF); + public uint SetShaderLocalMemoryNonThrottledB; + public uint SetShaderLocalMemoryNonThrottledC; + public int SetShaderLocalMemoryNonThrottledCMaxSmCount => (int)((SetShaderLocalMemoryNonThrottledC >> 0) & 0x1FF); + public uint SetShaderLocalMemoryThrottledA; + public int SetShaderLocalMemoryThrottledASizeUpper => (int)((SetShaderLocalMemoryThrottledA >> 0) & 0xFF); + public uint SetShaderLocalMemoryThrottledB; + public uint SetShaderLocalMemoryThrottledC; + public int SetShaderLocalMemoryThrottledCMaxSmCount => (int)((SetShaderLocalMemoryThrottledC >> 0) & 0x1FF); + public fixed uint Reserved2FC[5]; + public uint SetSpaVersion; + public int SetSpaVersionMinor => (int)((SetSpaVersion >> 0) & 0xFF); + public int SetSpaVersionMajor => (int)((SetSpaVersion >> 8) & 0xFF); + public fixed uint Reserved314[123]; + public uint SetFalcon00; + public uint SetFalcon01; + public uint SetFalcon02; + public uint SetFalcon03; + public uint SetFalcon04; + public uint SetFalcon05; + public uint SetFalcon06; + public uint SetFalcon07; + public uint SetFalcon08; + public uint SetFalcon09; + public uint SetFalcon10; + public uint SetFalcon11; + public uint SetFalcon12; + public uint SetFalcon13; + public uint SetFalcon14; + public uint SetFalcon15; + public uint SetFalcon16; + public uint SetFalcon17; + public uint SetFalcon18; + public uint SetFalcon19; + public uint SetFalcon20; + public uint SetFalcon21; + public uint SetFalcon22; + public uint SetFalcon23; + public uint SetFalcon24; + public uint SetFalcon25; + public uint SetFalcon26; + public uint SetFalcon27; + public uint SetFalcon28; + public uint SetFalcon29; + public uint SetFalcon30; + public uint SetFalcon31; + public fixed uint Reserved580[127]; + public uint SetShaderLocalMemoryWindow; + public fixed uint Reserved780[4]; + public uint SetShaderLocalMemoryA; + public int SetShaderLocalMemoryAAddressUpper => (int)((SetShaderLocalMemoryA >> 0) & 0xFF); + public uint SetShaderLocalMemoryB; + public fixed uint Reserved798[383]; + public uint SetShaderCacheControl; + public bool SetShaderCacheControlIcachePrefetchEnable => (SetShaderCacheControl & 0x1) != 0; + public fixed uint ReservedD98[19]; + public uint SetSmTimeoutInterval; + public int SetSmTimeoutIntervalCounterBit => (int)((SetSmTimeoutInterval >> 0) & 0x3F); + public fixed uint ReservedDE8[87]; + public uint SetSpareNoop12; + public uint SetSpareNoop13; + public uint SetSpareNoop14; + public uint SetSpareNoop15; + public fixed uint ReservedF54[59]; + public uint SetSpareNoop00; + public uint SetSpareNoop01; + public uint SetSpareNoop02; + public uint SetSpareNoop03; + public uint SetSpareNoop04; + public uint SetSpareNoop05; + public uint SetSpareNoop06; + public uint SetSpareNoop07; + public uint SetSpareNoop08; + public uint SetSpareNoop09; + public uint SetSpareNoop10; + public uint SetSpareNoop11; + public fixed uint Reserved1070[103]; + public uint InvalidateSamplerCacheAll; + public bool InvalidateSamplerCacheAllV => (InvalidateSamplerCacheAll & 0x1) != 0; + public uint InvalidateTextureHeaderCacheAll; + public bool InvalidateTextureHeaderCacheAllV => (InvalidateTextureHeaderCacheAll & 0x1) != 0; + public fixed uint Reserved1214[29]; + public uint InvalidateTextureDataCacheNoWfi; + public InvalidateCacheLines InvalidateTextureDataCacheNoWfiLines => (InvalidateCacheLines)((InvalidateTextureDataCacheNoWfi >> 0) & 0x1); + public int InvalidateTextureDataCacheNoWfiTag => (int)((InvalidateTextureDataCacheNoWfi >> 4) & 0x3FFFFF); + public fixed uint Reserved128C[7]; + public uint ActivatePerfSettingsForComputeContext; + public bool ActivatePerfSettingsForComputeContextAll => (ActivatePerfSettingsForComputeContext & 0x1) != 0; + public fixed uint Reserved12AC[33]; + public uint InvalidateSamplerCache; + public InvalidateCacheLines InvalidateSamplerCacheLines => (InvalidateCacheLines)((InvalidateSamplerCache >> 0) & 0x1); + public int InvalidateSamplerCacheTag => (int)((InvalidateSamplerCache >> 4) & 0x3FFFFF); + public uint InvalidateTextureHeaderCache; + public InvalidateCacheLines InvalidateTextureHeaderCacheLines => (InvalidateCacheLines)((InvalidateTextureHeaderCache >> 0) & 0x1); + public int InvalidateTextureHeaderCacheTag => (int)((InvalidateTextureHeaderCache >> 4) & 0x3FFFFF); + public uint InvalidateTextureDataCache; + public InvalidateCacheLines InvalidateTextureDataCacheLines => (InvalidateCacheLines)((InvalidateTextureDataCache >> 0) & 0x1); + public int InvalidateTextureDataCacheTag => (int)((InvalidateTextureDataCache >> 4) & 0x3FFFFF); + public fixed uint Reserved133C[58]; + public uint InvalidateSamplerCacheNoWfi; + public InvalidateCacheLines InvalidateSamplerCacheNoWfiLines => (InvalidateCacheLines)((InvalidateSamplerCacheNoWfi >> 0) & 0x1); + public int InvalidateSamplerCacheNoWfiTag => (int)((InvalidateSamplerCacheNoWfi >> 4) & 0x3FFFFF); + public fixed uint Reserved1428[64]; + public uint SetShaderExceptions; + public bool SetShaderExceptionsEnable => (SetShaderExceptions & 0x1) != 0; + public fixed uint Reserved152C[9]; + public uint SetRenderEnableA; + public int SetRenderEnableAOffsetUpper => (int)((SetRenderEnableA >> 0) & 0xFF); + public uint SetRenderEnableB; + public uint SetRenderEnableC; + public int SetRenderEnableCMode => (int)((SetRenderEnableC >> 0) & 0x7); + public uint SetTexSamplerPoolA; + public int SetTexSamplerPoolAOffsetUpper => (int)((SetTexSamplerPoolA >> 0) & 0xFF); + public uint SetTexSamplerPoolB; + public uint SetTexSamplerPoolC; + public int SetTexSamplerPoolCMaximumIndex => (int)((SetTexSamplerPoolC >> 0) & 0xFFFFF); + public fixed uint Reserved1568[3]; + public uint SetTexHeaderPoolA; + public int SetTexHeaderPoolAOffsetUpper => (int)((SetTexHeaderPoolA >> 0) & 0xFF); + public uint SetTexHeaderPoolB; + public uint SetTexHeaderPoolC; + public int SetTexHeaderPoolCMaximumIndex => (int)((SetTexHeaderPoolC >> 0) & 0x3FFFFF); + public fixed uint Reserved1580[34]; + public uint SetProgramRegionA; + public int SetProgramRegionAAddressUpper => (int)((SetProgramRegionA >> 0) & 0xFF); + public uint SetProgramRegionB; + public fixed uint Reserved1610[34]; + public uint InvalidateShaderCachesNoWfi; + public bool InvalidateShaderCachesNoWfiInstruction => (InvalidateShaderCachesNoWfi & 0x1) != 0; + public bool InvalidateShaderCachesNoWfiGlobalData => (InvalidateShaderCachesNoWfi & 0x10) != 0; + public bool InvalidateShaderCachesNoWfiConstant => (InvalidateShaderCachesNoWfi & 0x1000) != 0; + public fixed uint Reserved169C[170]; + public uint SetRenderEnableOverride; + public SetRenderEnableOverrideMode SetRenderEnableOverrideMode => (SetRenderEnableOverrideMode)((SetRenderEnableOverride >> 0) & 0x3); + public fixed uint Reserved1948[57]; + public uint PipeNop; + public uint SetSpare00; + public uint SetSpare01; + public uint SetSpare02; + public uint SetSpare03; + public fixed uint Reserved1A40[48]; + public uint SetReportSemaphoreA; + public int SetReportSemaphoreAOffsetUpper => (int)((SetReportSemaphoreA >> 0) & 0xFF); + public uint SetReportSemaphoreB; + public uint SetReportSemaphoreC; + public uint SetReportSemaphoreD; + public SetReportSemaphoreDOperation SetReportSemaphoreDOperation => (SetReportSemaphoreDOperation)((SetReportSemaphoreD >> 0) & 0x3); + public bool SetReportSemaphoreDAwakenEnable => (SetReportSemaphoreD & 0x100000) != 0; + public SetReportSemaphoreDStructureSize SetReportSemaphoreDStructureSize => (SetReportSemaphoreDStructureSize)((SetReportSemaphoreD >> 28) & 0x1); + public bool SetReportSemaphoreDFlushDisable => (SetReportSemaphoreD & 0x4) != 0; + public bool SetReportSemaphoreDReductionEnable => (SetReportSemaphoreD & 0x8) != 0; + public SetReportSemaphoreDReductionOp SetReportSemaphoreDReductionOp => (SetReportSemaphoreDReductionOp)((SetReportSemaphoreD >> 9) & 0x7); + public SetReportSemaphoreDReductionFormat SetReportSemaphoreDReductionFormat => (SetReportSemaphoreDReductionFormat)((SetReportSemaphoreD >> 17) & 0x3); + public fixed uint Reserved1B10[702]; + public uint SetBindlessTexture; + public int SetBindlessTextureConstantBufferSlotSelect => (int)((SetBindlessTexture >> 0) & 0x7); + public uint SetTrapHandler; + public fixed uint Reserved2610[843]; + public Array8<uint> SetShaderPerformanceCounterValueUpper; + public Array8<uint> SetShaderPerformanceCounterValue; + public Array8<uint> SetShaderPerformanceCounterEvent; + public int SetShaderPerformanceCounterEventEvent(int i) => (int)((SetShaderPerformanceCounterEvent[i] >> 0) & 0xFF); + public Array8<uint> SetShaderPerformanceCounterControlA; + public int SetShaderPerformanceCounterControlAEvent0(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 0) & 0x3); + public int SetShaderPerformanceCounterControlABitSelect0(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 2) & 0x7); + public int SetShaderPerformanceCounterControlAEvent1(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 5) & 0x3); + public int SetShaderPerformanceCounterControlABitSelect1(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 7) & 0x7); + public int SetShaderPerformanceCounterControlAEvent2(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 10) & 0x3); + public int SetShaderPerformanceCounterControlABitSelect2(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 12) & 0x7); + public int SetShaderPerformanceCounterControlAEvent3(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 15) & 0x3); + public int SetShaderPerformanceCounterControlABitSelect3(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 17) & 0x7); + public int SetShaderPerformanceCounterControlAEvent4(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 20) & 0x3); + public int SetShaderPerformanceCounterControlABitSelect4(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 22) & 0x7); + public int SetShaderPerformanceCounterControlAEvent5(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 25) & 0x3); + public int SetShaderPerformanceCounterControlABitSelect5(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 27) & 0x7); + public int SetShaderPerformanceCounterControlASpare(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 30) & 0x3); + public Array8<uint> SetShaderPerformanceCounterControlB; + public bool SetShaderPerformanceCounterControlBEdge(int i) => (SetShaderPerformanceCounterControlB[i] & 0x1) != 0; + public int SetShaderPerformanceCounterControlBMode(int i) => (int)((SetShaderPerformanceCounterControlB[i] >> 1) & 0x3); + public bool SetShaderPerformanceCounterControlBWindowed(int i) => (SetShaderPerformanceCounterControlB[i] & 0x8) != 0; + public int SetShaderPerformanceCounterControlBFunc(int i) => (int)((SetShaderPerformanceCounterControlB[i] >> 4) & 0xFFFF); + public uint SetShaderPerformanceCounterTrapControl; + public int SetShaderPerformanceCounterTrapControlMask => (int)((SetShaderPerformanceCounterTrapControl >> 0) & 0xFF); + public uint StartShaderPerformanceCounter; + public int StartShaderPerformanceCounterCounterMask => (int)((StartShaderPerformanceCounter >> 0) & 0xFF); + public uint StopShaderPerformanceCounter; + public int StopShaderPerformanceCounterCounterMask => (int)((StopShaderPerformanceCounter >> 0) & 0xFF); + public fixed uint Reserved33E8[6]; + public MmeShadowScratch SetMmeShadowScratch; +#pragma warning restore CS0649 + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeQmd.cs b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeQmd.cs new file mode 100644 index 00000000..1b20e41c --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeQmd.cs @@ -0,0 +1,275 @@ +using Ryujinx.Graphics.Gpu.Engine.Types; +using System; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Gpu.Engine.Compute +{ + /// <summary> + /// Type of the dependent Queue Meta Data. + /// </summary> + enum DependentQmdType + { + Queue, + Grid + } + + /// <summary> + /// Type of the release memory barrier. + /// </summary> + enum ReleaseMembarType + { + FeNone, + FeSysmembar + } + + /// <summary> + /// Type of the CWD memory barrier. + /// </summary> + enum CwdMembarType + { + L1None, + L1Sysmembar, + L1Membar + } + + /// <summary> + /// NaN behavior of 32-bits float operations on the shader. + /// </summary> + enum Fp32NanBehavior + { + Legacy, + Fp64Compatible + } + + /// <summary> + /// NaN behavior of 32-bits float to integer conversion on the shader. + /// </summary> + enum Fp32F2iNanBehavior + { + PassZero, + PassIndefinite + } + + /// <summary> + /// Limit of calls. + /// </summary> + enum ApiVisibleCallLimit + { + _32, + NoCheck + } + + /// <summary> + /// Shared memory bank mapping mode. + /// </summary> + enum SharedMemoryBankMapping + { + FourBytesPerBank, + EightBytesPerBank + } + + /// <summary> + /// Denormal behavior of 32-bits float narrowing instructions. + /// </summary> + enum Fp32NarrowInstruction + { + KeepDenorms, + FlushDenorms + } + + /// <summary> + /// Configuration of the L1 cache. + /// </summary> + enum L1Configuration + { + DirectlyAddressableMemorySize16kb, + DirectlyAddressableMemorySize32kb, + DirectlyAddressableMemorySize48kb + } + + /// <summary> + /// Reduction operation. + /// </summary> + enum ReductionOp + { + RedAdd, + RedMin, + RedMax, + RedInc, + RedDec, + RedAnd, + RedOr, + RedXor + } + + /// <summary> + /// Reduction format. + /// </summary> + enum ReductionFormat + { + Unsigned32, + Signed32 + } + + /// <summary> + /// Size of a structure in words. + /// </summary> + enum StructureSize + { + FourWords, + OneWord + } + + /// <summary> + /// Compute Queue Meta Data. + /// </summary> + unsafe struct ComputeQmd + { + private fixed int _words[64]; + + public int OuterPut => BitRange(30, 0); + public bool OuterOverflow => Bit(31); + public int OuterGet => BitRange(62, 32); + public bool OuterStickyOverflow => Bit(63); + public int InnerGet => BitRange(94, 64); + public bool InnerOverflow => Bit(95); + public int InnerPut => BitRange(126, 96); + public bool InnerStickyOverflow => Bit(127); + public int QmdReservedAA => BitRange(159, 128); + public int DependentQmdPointer => BitRange(191, 160); + public int QmdGroupId => BitRange(197, 192); + public bool SmGlobalCachingEnable => Bit(198); + public bool RunCtaInOneSmPartition => Bit(199); + public bool IsQueue => Bit(200); + public bool AddToHeadOfQmdGroupLinkedList => Bit(201); + public bool SemaphoreReleaseEnable0 => Bit(202); + public bool SemaphoreReleaseEnable1 => Bit(203); + public bool RequireSchedulingPcas => Bit(204); + public bool DependentQmdScheduleEnable => Bit(205); + public DependentQmdType DependentQmdType => (DependentQmdType)BitRange(206, 206); + public bool DependentQmdFieldCopy => Bit(207); + public int QmdReservedB => BitRange(223, 208); + public int CircularQueueSize => BitRange(248, 224); + public bool QmdReservedC => Bit(249); + public bool InvalidateTextureHeaderCache => Bit(250); + public bool InvalidateTextureSamplerCache => Bit(251); + public bool InvalidateTextureDataCache => Bit(252); + public bool InvalidateShaderDataCache => Bit(253); + public bool InvalidateInstructionCache => Bit(254); + public bool InvalidateShaderConstantCache => Bit(255); + public int ProgramOffset => BitRange(287, 256); + public int CircularQueueAddrLower => BitRange(319, 288); + public int CircularQueueAddrUpper => BitRange(327, 320); + public int QmdReservedD => BitRange(335, 328); + public int CircularQueueEntrySize => BitRange(351, 336); + public int CwdReferenceCountId => BitRange(357, 352); + public int CwdReferenceCountDeltaMinusOne => BitRange(365, 358); + public ReleaseMembarType ReleaseMembarType => (ReleaseMembarType)BitRange(366, 366); + public bool CwdReferenceCountIncrEnable => Bit(367); + public CwdMembarType CwdMembarType => (CwdMembarType)BitRange(369, 368); + public bool SequentiallyRunCtas => Bit(370); + public bool CwdReferenceCountDecrEnable => Bit(371); + public bool Throttled => Bit(372); + public Fp32NanBehavior Fp32NanBehavior => (Fp32NanBehavior)BitRange(376, 376); + public Fp32F2iNanBehavior Fp32F2iNanBehavior => (Fp32F2iNanBehavior)BitRange(377, 377); + public ApiVisibleCallLimit ApiVisibleCallLimit => (ApiVisibleCallLimit)BitRange(378, 378); + public SharedMemoryBankMapping SharedMemoryBankMapping => (SharedMemoryBankMapping)BitRange(379, 379); + public SamplerIndex SamplerIndex => (SamplerIndex)BitRange(382, 382); + public Fp32NarrowInstruction Fp32NarrowInstruction => (Fp32NarrowInstruction)BitRange(383, 383); + public int CtaRasterWidth => BitRange(415, 384); + public int CtaRasterHeight => BitRange(431, 416); + public int CtaRasterDepth => BitRange(447, 432); + public int CtaRasterWidthResume => BitRange(479, 448); + public int CtaRasterHeightResume => BitRange(495, 480); + public int CtaRasterDepthResume => BitRange(511, 496); + public int QueueEntriesPerCtaMinusOne => BitRange(518, 512); + public int CoalesceWaitingPeriod => BitRange(529, 522); + public int SharedMemorySize => BitRange(561, 544); + public int QmdReservedG => BitRange(575, 562); + public int QmdVersion => BitRange(579, 576); + public int QmdMajorVersion => BitRange(583, 580); + public int QmdReservedH => BitRange(591, 584); + public int CtaThreadDimension0 => BitRange(607, 592); + public int CtaThreadDimension1 => BitRange(623, 608); + public int CtaThreadDimension2 => BitRange(639, 624); + public bool ConstantBufferValid(int i) => Bit(640 + i * 1); + public int QmdReservedI => BitRange(668, 648); + public L1Configuration L1Configuration => (L1Configuration)BitRange(671, 669); + public int SmDisableMaskLower => BitRange(703, 672); + public int SmDisableMaskUpper => BitRange(735, 704); + public int Release0AddressLower => BitRange(767, 736); + public int Release0AddressUpper => BitRange(775, 768); + public int QmdReservedJ => BitRange(783, 776); + public ReductionOp Release0ReductionOp => (ReductionOp)BitRange(790, 788); + public bool QmdReservedK => Bit(791); + public ReductionFormat Release0ReductionFormat => (ReductionFormat)BitRange(793, 792); + public bool Release0ReductionEnable => Bit(794); + public StructureSize Release0StructureSize => (StructureSize)BitRange(799, 799); + public int Release0Payload => BitRange(831, 800); + public int Release1AddressLower => BitRange(863, 832); + public int Release1AddressUpper => BitRange(871, 864); + public int QmdReservedL => BitRange(879, 872); + public ReductionOp Release1ReductionOp => (ReductionOp)BitRange(886, 884); + public bool QmdReservedM => Bit(887); + public ReductionFormat Release1ReductionFormat => (ReductionFormat)BitRange(889, 888); + public bool Release1ReductionEnable => Bit(890); + public StructureSize Release1StructureSize => (StructureSize)BitRange(895, 895); + public int Release1Payload => BitRange(927, 896); + public int ConstantBufferAddrLower(int i) => BitRange(959 + i * 64, 928 + i * 64); + public int ConstantBufferAddrUpper(int i) => BitRange(967 + i * 64, 960 + i * 64); + public int ConstantBufferReservedAddr(int i) => BitRange(973 + i * 64, 968 + i * 64); + public bool ConstantBufferInvalidate(int i) => Bit(974 + i * 64); + public int ConstantBufferSize(int i) => BitRange(991 + i * 64, 975 + i * 64); + public int ShaderLocalMemoryLowSize => BitRange(1463, 1440); + public int QmdReservedN => BitRange(1466, 1464); + public int BarrierCount => BitRange(1471, 1467); + public int ShaderLocalMemoryHighSize => BitRange(1495, 1472); + public int RegisterCount => BitRange(1503, 1496); + public int ShaderLocalMemoryCrsSize => BitRange(1527, 1504); + public int SassVersion => BitRange(1535, 1528); + public int HwOnlyInnerGet => BitRange(1566, 1536); + public bool HwOnlyRequireSchedulingPcas => Bit(1567); + public int HwOnlyInnerPut => BitRange(1598, 1568); + public bool HwOnlyScgType => Bit(1599); + public int HwOnlySpanListHeadIndex => BitRange(1629, 1600); + public bool QmdReservedQ => Bit(1630); + public bool HwOnlySpanListHeadIndexValid => Bit(1631); + public int HwOnlySkedNextQmdPointer => BitRange(1663, 1632); + public int QmdSpareE => BitRange(1695, 1664); + public int QmdSpareF => BitRange(1727, 1696); + public int QmdSpareG => BitRange(1759, 1728); + public int QmdSpareH => BitRange(1791, 1760); + public int QmdSpareI => BitRange(1823, 1792); + public int QmdSpareJ => BitRange(1855, 1824); + public int QmdSpareK => BitRange(1887, 1856); + public int QmdSpareL => BitRange(1919, 1888); + public int QmdSpareM => BitRange(1951, 1920); + public int QmdSpareN => BitRange(1983, 1952); + public int DebugIdUpper => BitRange(2015, 1984); + public int DebugIdLower => BitRange(2047, 2016); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool Bit(int bit) + { + if ((uint)bit >= 64 * 32) + { + throw new ArgumentOutOfRangeException(nameof(bit)); + } + + return (_words[bit >> 5] & (1 << (bit & 31))) != 0; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private int BitRange(int upper, int lower) + { + if ((uint)lower >= 64 * 32) + { + throw new ArgumentOutOfRangeException(nameof(lower)); + } + + int mask = (int)(uint.MaxValue >> (32 - (upper - lower + 1))); + + return (_words[lower >> 5] >> (lower & 31)) & mask; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Engine/ConditionalRenderEnabled.cs b/src/Ryujinx.Graphics.Gpu/Engine/ConditionalRenderEnabled.cs new file mode 100644 index 00000000..5581b5cc --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/ConditionalRenderEnabled.cs @@ -0,0 +1,12 @@ +namespace Ryujinx.Graphics.Gpu.Engine +{ + /// <summary> + /// Conditional rendering enable. + /// </summary> + enum ConditionalRenderEnabled + { + False, + True, + Host + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/DeviceStateWithShadow.cs b/src/Ryujinx.Graphics.Gpu/Engine/DeviceStateWithShadow.cs new file mode 100644 index 00000000..74a9aa04 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/DeviceStateWithShadow.cs @@ -0,0 +1,96 @@ +using Ryujinx.Graphics.Device; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Gpu.Engine +{ + /// <summary> + /// State interface with a shadow memory control register. + /// </summary> + interface IShadowState + { + /// <summary> + /// MME shadow ram control mode. + /// </summary> + SetMmeShadowRamControlMode SetMmeShadowRamControlMode { get; } + } + + /// <summary> + /// Represents a device's state, with a additional shadow state. + /// </summary> + /// <typeparam name="TState">Type of the state</typeparam> + class DeviceStateWithShadow<[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicFields)] TState> : IDeviceState where TState : unmanaged, IShadowState + { + private readonly DeviceState<TState> _state; + private readonly DeviceState<TState> _shadowState; + + /// <summary> + /// Current device state. + /// </summary> + public ref TState State => ref _state.State; + + /// <summary> + /// Creates a new instance of the device state, with shadow state. + /// </summary> + /// <param name="callbacks">Optional that will be called if a register specified by name is read or written</param> + /// <param name="debugLogCallback">Optional callback to be used for debug log messages</param> + public DeviceStateWithShadow(IReadOnlyDictionary<string, RwCallback> callbacks = null, Action<string> debugLogCallback = null) + { + _state = new DeviceState<TState>(callbacks, debugLogCallback); + _shadowState = new DeviceState<TState>(); + } + + /// <summary> + /// Reads a value from a register. + /// </summary> + /// <param name="offset">Register offset in bytes</param> + /// <returns>Value stored on the register</returns> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int Read(int offset) + { + return _state.Read(offset); + } + + /// <summary> + /// Writes a value to a register. + /// </summary> + /// <param name="offset">Register offset in bytes</param> + /// <param name="value">Value to be written</param> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Write(int offset, int value) + { + WriteWithRedundancyCheck(offset, value, out _); + } + + /// <summary> + /// Writes a value to a register, returning a value indicating if <paramref name="value"/> + /// is different from the current value on the register. + /// </summary> + /// <param name="offset">Register offset in bytes</param> + /// <param name="value">Value to be written</param> + /// <param name="changed">True if the value was changed, false otherwise</param> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void WriteWithRedundancyCheck(int offset, int value, out bool changed) + { + var shadowRamControl = _state.State.SetMmeShadowRamControlMode; + if (shadowRamControl == SetMmeShadowRamControlMode.MethodPassthrough || offset < 0x200) + { + _state.WriteWithRedundancyCheck(offset, value, out changed); + } + else if (shadowRamControl == SetMmeShadowRamControlMode.MethodTrack || + shadowRamControl == SetMmeShadowRamControlMode.MethodTrackWithFilter) + { + _shadowState.Write(offset, value); + _state.WriteWithRedundancyCheck(offset, value, out changed); + } + else /* if (shadowRamControl == SetMmeShadowRamControlMode.MethodReplay) */ + { + Debug.Assert(shadowRamControl == SetMmeShadowRamControlMode.MethodReplay); + _state.WriteWithRedundancyCheck(offset, _shadowState.Read(offset), out changed); + } + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs new file mode 100644 index 00000000..fd93cd8b --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs @@ -0,0 +1,635 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Device; +using Ryujinx.Graphics.Gpu.Engine.Threed; +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Texture; +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; + +namespace Ryujinx.Graphics.Gpu.Engine.Dma +{ + /// <summary> + /// Represents a DMA copy engine class. + /// </summary> + class DmaClass : IDeviceState + { + private readonly GpuContext _context; + private readonly GpuChannel _channel; + private readonly ThreedClass _3dEngine; + private readonly DeviceState<DmaClassState> _state; + + /// <summary> + /// Copy flags passed on DMA launch. + /// </summary> + [Flags] + private enum CopyFlags + { + SrcLinear = 1 << 7, + DstLinear = 1 << 8, + MultiLineEnable = 1 << 9, + RemapEnable = 1 << 10 + } + + /// <summary> + /// Texture parameters for copy. + /// </summary> + private struct TextureParams + { + /// <summary> + /// Copy region X coordinate. + /// </summary> + public readonly int RegionX; + + /// <summary> + /// Copy region Y coordinate. + /// </summary> + public readonly int RegionY; + + /// <summary> + /// Offset from the base pointer of the data in memory. + /// </summary> + public readonly int BaseOffset; + + /// <summary> + /// Bytes per pixel. + /// </summary> + public readonly int Bpp; + + /// <summary> + /// Whether the texture is linear. If false, the texture is block linear. + /// </summary> + public readonly bool Linear; + + /// <summary> + /// Pixel offset from XYZ coordinates calculator. + /// </summary> + public readonly OffsetCalculator Calculator; + + /// <summary> + /// Creates texture parameters. + /// </summary> + /// <param name="regionX">Copy region X coordinate</param> + /// <param name="regionY">Copy region Y coordinate</param> + /// <param name="baseOffset">Offset from the base pointer of the data in memory</param> + /// <param name="bpp">Bytes per pixel</param> + /// <param name="linear">Whether the texture is linear. If false, the texture is block linear</param> + /// <param name="calculator">Pixel offset from XYZ coordinates calculator</param> + public TextureParams(int regionX, int regionY, int baseOffset, int bpp, bool linear, OffsetCalculator calculator) + { + RegionX = regionX; + RegionY = regionY; + BaseOffset = baseOffset; + Bpp = bpp; + Linear = linear; + Calculator = calculator; + } + } + + [StructLayout(LayoutKind.Sequential, Size = 3, Pack = 1)] + private struct UInt24 + { + public byte Byte0; + public byte Byte1; + public byte Byte2; + } + + /// <summary> + /// Creates a new instance of the DMA copy engine class. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="channel">GPU channel</param> + /// <param name="threedEngine">3D engine</param> + public DmaClass(GpuContext context, GpuChannel channel, ThreedClass threedEngine) + { + _context = context; + _channel = channel; + _3dEngine = threedEngine; + _state = new DeviceState<DmaClassState>(new Dictionary<string, RwCallback> + { + { nameof(DmaClassState.LaunchDma), new RwCallback(LaunchDma, null) } + }); + } + + /// <summary> + /// Reads data from the class registers. + /// </summary> + /// <param name="offset">Register byte offset</param> + /// <returns>Data at the specified offset</returns> + public int Read(int offset) => _state.Read(offset); + + /// <summary> + /// Writes data to the class registers. + /// </summary> + /// <param name="offset">Register byte offset</param> + /// <param name="data">Data to be written</param> + public void Write(int offset, int data) => _state.Write(offset, data); + + /// <summary> + /// Determine if a buffer-to-texture region covers the entirety of a texture. + /// </summary> + /// <param name="tex">Texture to compare</param> + /// <param name="linear">True if the texture is linear, false if block linear</param> + /// <param name="bpp">Texture bytes per pixel</param> + /// <param name="stride">Texture stride</param> + /// <param name="xCount">Number of pixels to be copied</param> + /// <param name="yCount">Number of lines to be copied</param> + /// <returns></returns> + private static bool IsTextureCopyComplete(DmaTexture tex, bool linear, int bpp, int stride, int xCount, int yCount) + { + if (linear) + { + // If the stride is negative, the texture has to be flipped, so + // the fast copy is not trivial, use the slow path. + if (stride <= 0) + { + return false; + } + + int alignWidth = Constants.StrideAlignment / bpp; + return stride / bpp == BitUtils.AlignUp(xCount, alignWidth); + } + else + { + int alignWidth = Constants.GobAlignment / bpp; + return tex.RegionX == 0 && + tex.RegionY == 0 && + tex.Width == BitUtils.AlignUp(xCount, alignWidth) && + tex.Height == yCount; + } + } + + /// <summary> + /// Releases a semaphore for a given LaunchDma method call. + /// </summary> + /// <param name="argument">The LaunchDma call argument</param> + private void ReleaseSemaphore(int argument) + { + LaunchDmaSemaphoreType type = (LaunchDmaSemaphoreType)((argument >> 3) & 0x3); + if (type != LaunchDmaSemaphoreType.None) + { + ulong address = ((ulong)_state.State.SetSemaphoreA << 32) | _state.State.SetSemaphoreB; + if (type == LaunchDmaSemaphoreType.ReleaseOneWordSemaphore) + { + _channel.MemoryManager.Write(address, _state.State.SetSemaphorePayload); + } + else /* if (type == LaunchDmaSemaphoreType.ReleaseFourWordSemaphore) */ + { + _channel.MemoryManager.Write(address + 8, _context.GetTimestamp()); + _channel.MemoryManager.Write(address, (ulong)_state.State.SetSemaphorePayload); + } + } + } + + /// <summary> + /// Performs a buffer to buffer, or buffer to texture copy. + /// </summary> + /// <param name="argument">The LaunchDma call argument</param> + private void DmaCopy(int argument) + { + var memoryManager = _channel.MemoryManager; + + CopyFlags copyFlags = (CopyFlags)argument; + + bool srcLinear = copyFlags.HasFlag(CopyFlags.SrcLinear); + bool dstLinear = copyFlags.HasFlag(CopyFlags.DstLinear); + bool copy2D = copyFlags.HasFlag(CopyFlags.MultiLineEnable); + bool remap = copyFlags.HasFlag(CopyFlags.RemapEnable); + + uint size = _state.State.LineLengthIn; + + if (size == 0) + { + return; + } + + ulong srcGpuVa = ((ulong)_state.State.OffsetInUpperUpper << 32) | _state.State.OffsetInLower; + ulong dstGpuVa = ((ulong)_state.State.OffsetOutUpperUpper << 32) | _state.State.OffsetOutLower; + + int xCount = (int)_state.State.LineLengthIn; + int yCount = (int)_state.State.LineCount; + + _3dEngine.CreatePendingSyncs(); + _3dEngine.FlushUboDirty(); + + if (copy2D) + { + // Buffer to texture copy. + int componentSize = (int)_state.State.SetRemapComponentsComponentSize + 1; + int srcComponents = (int)_state.State.SetRemapComponentsNumSrcComponents + 1; + int dstComponents = (int)_state.State.SetRemapComponentsNumDstComponents + 1; + int srcBpp = remap ? srcComponents * componentSize : 1; + int dstBpp = remap ? dstComponents * componentSize : 1; + + var dst = Unsafe.As<uint, DmaTexture>(ref _state.State.SetDstBlockSize); + var src = Unsafe.As<uint, DmaTexture>(ref _state.State.SetSrcBlockSize); + + int srcRegionX = 0, srcRegionY = 0, dstRegionX = 0, dstRegionY = 0; + + if (!srcLinear) + { + srcRegionX = src.RegionX; + srcRegionY = src.RegionY; + } + + if (!dstLinear) + { + dstRegionX = dst.RegionX; + dstRegionY = dst.RegionY; + } + + int srcStride = (int)_state.State.PitchIn; + int dstStride = (int)_state.State.PitchOut; + + var srcCalculator = new OffsetCalculator( + src.Width, + src.Height, + srcStride, + srcLinear, + src.MemoryLayout.UnpackGobBlocksInY(), + src.MemoryLayout.UnpackGobBlocksInZ(), + srcBpp); + + var dstCalculator = new OffsetCalculator( + dst.Width, + dst.Height, + dstStride, + dstLinear, + dst.MemoryLayout.UnpackGobBlocksInY(), + dst.MemoryLayout.UnpackGobBlocksInZ(), + dstBpp); + + (int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(srcRegionX, srcRegionY, xCount, yCount); + (int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dstRegionX, dstRegionY, xCount, yCount); + + if (srcLinear && srcStride < 0) + { + srcBaseOffset += srcStride * (yCount - 1); + } + + if (dstLinear && dstStride < 0) + { + dstBaseOffset += dstStride * (yCount - 1); + } + + ReadOnlySpan<byte> srcSpan = memoryManager.GetSpan(srcGpuVa + (ulong)srcBaseOffset, srcSize, true); + + bool completeSource = IsTextureCopyComplete(src, srcLinear, srcBpp, srcStride, xCount, yCount); + bool completeDest = IsTextureCopyComplete(dst, dstLinear, dstBpp, dstStride, xCount, yCount); + + if (completeSource && completeDest) + { + var target = memoryManager.Physical.TextureCache.FindTexture( + memoryManager, + dstGpuVa, + dstBpp, + dstStride, + dst.Height, + xCount, + yCount, + dstLinear, + dst.MemoryLayout.UnpackGobBlocksInY(), + dst.MemoryLayout.UnpackGobBlocksInZ()); + + if (target != null) + { + byte[] data; + if (srcLinear) + { + data = LayoutConverter.ConvertLinearStridedToLinear( + target.Info.Width, + target.Info.Height, + 1, + 1, + xCount * srcBpp, + srcStride, + target.Info.FormatInfo.BytesPerPixel, + srcSpan); + } + else + { + data = LayoutConverter.ConvertBlockLinearToLinear( + src.Width, + src.Height, + src.Depth, + 1, + 1, + 1, + 1, + 1, + srcBpp, + src.MemoryLayout.UnpackGobBlocksInY(), + src.MemoryLayout.UnpackGobBlocksInZ(), + 1, + new SizeInfo((int)target.Size), + srcSpan); + } + + target.SynchronizeMemory(); + target.SetData(data); + target.SignalModified(); + return; + } + else if (srcCalculator.LayoutMatches(dstCalculator)) + { + // No layout conversion has to be performed, just copy the data entirely. + memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, srcSpan); + return; + } + } + + // OPT: This allocates a (potentially) huge temporary array and then copies an existing + // region of memory into it, data that might get overwritten entirely anyways. Ideally this should + // all be rewritten to use pooled arrays, but that gets complicated with packed data and strides + Span<byte> dstSpan = memoryManager.GetSpan(dstGpuVa + (ulong)dstBaseOffset, dstSize).ToArray(); + + TextureParams srcParams = new TextureParams(srcRegionX, srcRegionY, srcBaseOffset, srcBpp, srcLinear, srcCalculator); + TextureParams dstParams = new TextureParams(dstRegionX, dstRegionY, dstBaseOffset, dstBpp, dstLinear, dstCalculator); + + // If remapping is enabled, we always copy the components directly, in order. + // If it's enabled, but the mapping is just XYZW, we also copy them in order. + bool isIdentityRemap = !remap || + (_state.State.SetRemapComponentsDstX == SetRemapComponentsDst.SrcX && + (dstComponents < 2 || _state.State.SetRemapComponentsDstY == SetRemapComponentsDst.SrcY) && + (dstComponents < 3 || _state.State.SetRemapComponentsDstZ == SetRemapComponentsDst.SrcZ) && + (dstComponents < 4 || _state.State.SetRemapComponentsDstW == SetRemapComponentsDst.SrcW)); + + if (isIdentityRemap) + { + // The order of the components doesn't change, so we can just copy directly + // (with layout conversion if necessary). + + switch (srcBpp) + { + case 1: Copy<byte>(dstSpan, srcSpan, dstParams, srcParams); break; + case 2: Copy<ushort>(dstSpan, srcSpan, dstParams, srcParams); break; + case 4: Copy<uint>(dstSpan, srcSpan, dstParams, srcParams); break; + case 8: Copy<ulong>(dstSpan, srcSpan, dstParams, srcParams); break; + case 12: Copy<Bpp12Pixel>(dstSpan, srcSpan, dstParams, srcParams); break; + case 16: Copy<Vector128<byte>>(dstSpan, srcSpan, dstParams, srcParams); break; + default: throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format."); + } + } + else + { + // The order or value of the components might change. + + switch (componentSize) + { + case 1: CopyShuffle<byte>(dstSpan, srcSpan, dstParams, srcParams); break; + case 2: CopyShuffle<ushort>(dstSpan, srcSpan, dstParams, srcParams); break; + case 3: CopyShuffle<UInt24>(dstSpan, srcSpan, dstParams, srcParams); break; + case 4: CopyShuffle<uint>(dstSpan, srcSpan, dstParams, srcParams); break; + default: throw new NotSupportedException($"Unable to copy ${componentSize} component size."); + } + } + + memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, dstSpan); + } + else + { + if (remap && + _state.State.SetRemapComponentsDstX == SetRemapComponentsDst.ConstA && + _state.State.SetRemapComponentsDstY == SetRemapComponentsDst.ConstA && + _state.State.SetRemapComponentsDstZ == SetRemapComponentsDst.ConstA && + _state.State.SetRemapComponentsDstW == SetRemapComponentsDst.ConstA && + _state.State.SetRemapComponentsNumSrcComponents == SetRemapComponentsNumComponents.One && + _state.State.SetRemapComponentsNumDstComponents == SetRemapComponentsNumComponents.One && + _state.State.SetRemapComponentsComponentSize == SetRemapComponentsComponentSize.Four) + { + // Fast path for clears when remap is enabled. + memoryManager.Physical.BufferCache.ClearBuffer(memoryManager, dstGpuVa, size * 4, _state.State.SetRemapConstA); + } + else + { + // TODO: Implement remap functionality. + // Buffer to buffer copy. + + bool srcIsPitchKind = memoryManager.GetKind(srcGpuVa).IsPitch(); + bool dstIsPitchKind = memoryManager.GetKind(dstGpuVa).IsPitch(); + + if (!srcIsPitchKind && dstIsPitchKind) + { + CopyGobBlockLinearToLinear(memoryManager, srcGpuVa, dstGpuVa, size); + } + else if (srcIsPitchKind && !dstIsPitchKind) + { + CopyGobLinearToBlockLinear(memoryManager, srcGpuVa, dstGpuVa, size); + } + else + { + memoryManager.Physical.BufferCache.CopyBuffer(memoryManager, srcGpuVa, dstGpuVa, size); + } + } + } + } + + /// <summary> + /// Copies data from one texture to another, while performing layout conversion if necessary. + /// </summary> + /// <typeparam name="T">Pixel type</typeparam> + /// <param name="dstSpan">Destination texture memory region</param> + /// <param name="srcSpan">Source texture memory region</param> + /// <param name="dst">Destination texture parameters</param> + /// <param name="src">Source texture parameters</param> + private unsafe void Copy<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan, TextureParams dst, TextureParams src) where T : unmanaged + { + int xCount = (int)_state.State.LineLengthIn; + int yCount = (int)_state.State.LineCount; + + if (src.Linear && dst.Linear && src.Bpp == dst.Bpp) + { + // Optimized path for purely linear copies - we don't need to calculate every single byte offset, + // and we can make use of Span.CopyTo which is very very fast (even compared to pointers) + for (int y = 0; y < yCount; y++) + { + src.Calculator.SetY(src.RegionY + y); + dst.Calculator.SetY(dst.RegionY + y); + int srcOffset = src.Calculator.GetOffset(src.RegionX); + int dstOffset = dst.Calculator.GetOffset(dst.RegionX); + srcSpan.Slice(srcOffset - src.BaseOffset, xCount * src.Bpp) + .CopyTo(dstSpan.Slice(dstOffset - dst.BaseOffset, xCount * dst.Bpp)); + } + } + else + { + fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan) + { + byte* dstBase = dstPtr - dst.BaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset. + byte* srcBase = srcPtr - src.BaseOffset; + + for (int y = 0; y < yCount; y++) + { + src.Calculator.SetY(src.RegionY + y); + dst.Calculator.SetY(dst.RegionY + y); + + for (int x = 0; x < xCount; x++) + { + int srcOffset = src.Calculator.GetOffset(src.RegionX + x); + int dstOffset = dst.Calculator.GetOffset(dst.RegionX + x); + + *(T*)(dstBase + dstOffset) = *(T*)(srcBase + srcOffset); + } + } + } + } + } + + /// <summary> + /// Sets texture pixel data to a constant value, while performing layout conversion if necessary. + /// </summary> + /// <typeparam name="T">Pixel type</typeparam> + /// <param name="dstSpan">Destination texture memory region</param> + /// <param name="dst">Destination texture parameters</param> + /// <param name="fillValue">Constant pixel value to be set</param> + private unsafe void Fill<T>(Span<byte> dstSpan, TextureParams dst, T fillValue) where T : unmanaged + { + int xCount = (int)_state.State.LineLengthIn; + int yCount = (int)_state.State.LineCount; + + fixed (byte* dstPtr = dstSpan) + { + byte* dstBase = dstPtr - dst.BaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset. + + for (int y = 0; y < yCount; y++) + { + dst.Calculator.SetY(dst.RegionY + y); + + for (int x = 0; x < xCount; x++) + { + int dstOffset = dst.Calculator.GetOffset(dst.RegionX + x); + + *(T*)(dstBase + dstOffset) = fillValue; + } + } + } + } + + /// <summary> + /// Copies data from one texture to another, while performing layout conversion and component shuffling if necessary. + /// </summary> + /// <typeparam name="T">Pixel type</typeparam> + /// <param name="dstSpan">Destination texture memory region</param> + /// <param name="srcSpan">Source texture memory region</param> + /// <param name="dst">Destination texture parameters</param> + /// <param name="src">Source texture parameters</param> + private void CopyShuffle<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan, TextureParams dst, TextureParams src) where T : unmanaged + { + int dstComponents = (int)_state.State.SetRemapComponentsNumDstComponents + 1; + + for (int i = 0; i < dstComponents; i++) + { + SetRemapComponentsDst componentsDst = i switch + { + 0 => _state.State.SetRemapComponentsDstX, + 1 => _state.State.SetRemapComponentsDstY, + 2 => _state.State.SetRemapComponentsDstZ, + _ => _state.State.SetRemapComponentsDstW + }; + + switch (componentsDst) + { + case SetRemapComponentsDst.SrcX: + Copy<T>(dstSpan.Slice(Unsafe.SizeOf<T>() * i), srcSpan, dst, src); + break; + case SetRemapComponentsDst.SrcY: + Copy<T>(dstSpan.Slice(Unsafe.SizeOf<T>() * i), srcSpan.Slice(Unsafe.SizeOf<T>()), dst, src); + break; + case SetRemapComponentsDst.SrcZ: + Copy<T>(dstSpan.Slice(Unsafe.SizeOf<T>() * i), srcSpan.Slice(Unsafe.SizeOf<T>() * 2), dst, src); + break; + case SetRemapComponentsDst.SrcW: + Copy<T>(dstSpan.Slice(Unsafe.SizeOf<T>() * i), srcSpan.Slice(Unsafe.SizeOf<T>() * 3), dst, src); + break; + case SetRemapComponentsDst.ConstA: + Fill<T>(dstSpan.Slice(Unsafe.SizeOf<T>() * i), dst, Unsafe.As<uint, T>(ref _state.State.SetRemapConstA)); + break; + case SetRemapComponentsDst.ConstB: + Fill<T>(dstSpan.Slice(Unsafe.SizeOf<T>() * i), dst, Unsafe.As<uint, T>(ref _state.State.SetRemapConstB)); + break; + } + } + } + + /// <summary> + /// Copies block linear data with block linear GOBs to a block linear destination with linear GOBs. + /// </summary> + /// <param name="memoryManager">GPU memory manager</param> + /// <param name="srcGpuVa">Source GPU virtual address</param> + /// <param name="dstGpuVa">Destination GPU virtual address</param> + /// <param name="size">Size in bytes of the copy</param> + private static void CopyGobBlockLinearToLinear(MemoryManager memoryManager, ulong srcGpuVa, ulong dstGpuVa, ulong size) + { + if (((srcGpuVa | dstGpuVa | size) & 0xf) == 0) + { + for (ulong offset = 0; offset < size; offset += 16) + { + Vector128<byte> data = memoryManager.Read<Vector128<byte>>(ConvertGobLinearToBlockLinearAddress(srcGpuVa + offset), true); + memoryManager.Write(dstGpuVa + offset, data); + } + } + else + { + for (ulong offset = 0; offset < size; offset++) + { + byte data = memoryManager.Read<byte>(ConvertGobLinearToBlockLinearAddress(srcGpuVa + offset), true); + memoryManager.Write(dstGpuVa + offset, data); + } + } + } + + /// <summary> + /// Copies block linear data with linear GOBs to a block linear destination with block linear GOBs. + /// </summary> + /// <param name="memoryManager">GPU memory manager</param> + /// <param name="srcGpuVa">Source GPU virtual address</param> + /// <param name="dstGpuVa">Destination GPU virtual address</param> + /// <param name="size">Size in bytes of the copy</param> + private static void CopyGobLinearToBlockLinear(MemoryManager memoryManager, ulong srcGpuVa, ulong dstGpuVa, ulong size) + { + if (((srcGpuVa | dstGpuVa | size) & 0xf) == 0) + { + for (ulong offset = 0; offset < size; offset += 16) + { + Vector128<byte> data = memoryManager.Read<Vector128<byte>>(srcGpuVa + offset, true); + memoryManager.Write(ConvertGobLinearToBlockLinearAddress(dstGpuVa + offset), data); + } + } + else + { + for (ulong offset = 0; offset < size; offset++) + { + byte data = memoryManager.Read<byte>(srcGpuVa + offset, true); + memoryManager.Write(ConvertGobLinearToBlockLinearAddress(dstGpuVa + offset), data); + } + } + } + + /// <summary> + /// Calculates the GOB block linear address from a linear address. + /// </summary> + /// <param name="address">Linear address</param> + /// <returns>Block linear address</returns> + private static ulong ConvertGobLinearToBlockLinearAddress(ulong address) + { + // y2 y1 y0 x5 x4 x3 x2 x1 x0 -> x5 y2 y1 x4 y0 x3 x2 x1 x0 + return (address & ~0x1f0UL) | + ((address & 0x40) >> 2) | + ((address & 0x10) << 1) | + ((address & 0x180) >> 1) | + ((address & 0x20) << 3); + } + + /// <summary> + /// Performs a buffer to buffer, or buffer to texture copy, then optionally releases a semaphore. + /// </summary> + /// <param name="argument">Method call argument</param> + private void LaunchDma(int argument) + { + DmaCopy(argument); + ReleaseSemaphore(argument); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClassState.cs b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClassState.cs new file mode 100644 index 00000000..7de4d5f0 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClassState.cs @@ -0,0 +1,271 @@ +// This file was auto-generated from NVIDIA official Maxwell definitions. + +namespace Ryujinx.Graphics.Gpu.Engine.Dma +{ + /// <summary> + /// Physical mode target. + /// </summary> + enum SetPhysModeTarget + { + LocalFb = 0, + CoherentSysmem = 1, + NoncoherentSysmem = 2, + } + + /// <summary> + /// DMA data transfer type. + /// </summary> + enum LaunchDmaDataTransferType + { + None = 0, + Pipelined = 1, + NonPipelined = 2, + } + + /// <summary> + /// DMA semaphore type. + /// </summary> + enum LaunchDmaSemaphoreType + { + None = 0, + ReleaseOneWordSemaphore = 1, + ReleaseFourWordSemaphore = 2, + } + + /// <summary> + /// DMA interrupt type. + /// </summary> + enum LaunchDmaInterruptType + { + None = 0, + Blocking = 1, + NonBlocking = 2, + } + + /// <summary> + /// DMA destination memory layout. + /// </summary> + enum LaunchDmaMemoryLayout + { + Blocklinear = 0, + Pitch = 1, + } + + /// <summary> + /// DMA type. + /// </summary> + enum LaunchDmaType + { + Virtual = 0, + Physical = 1, + } + + /// <summary> + /// DMA semaphore reduction operation. + /// </summary> + enum LaunchDmaSemaphoreReduction + { + Imin = 0, + Imax = 1, + Ixor = 2, + Iand = 3, + Ior = 4, + Iadd = 5, + Inc = 6, + Dec = 7, + Fadd = 10, + } + + /// <summary> + /// DMA semaphore reduction signedness. + /// </summary> + enum LaunchDmaSemaphoreReductionSign + { + Signed = 0, + Unsigned = 1, + } + + /// <summary> + /// DMA L2 cache bypass. + /// </summary> + enum LaunchDmaBypassL2 + { + UsePteSetting = 0, + ForceVolatile = 1, + } + + /// <summary> + /// DMA component remapping source component. + /// </summary> + enum SetRemapComponentsDst + { + SrcX = 0, + SrcY = 1, + SrcZ = 2, + SrcW = 3, + ConstA = 4, + ConstB = 5, + NoWrite = 6, + } + + /// <summary> + /// DMA component remapping component size. + /// </summary> + enum SetRemapComponentsComponentSize + { + One = 0, + Two = 1, + Three = 2, + Four = 3, + } + + /// <summary> + /// DMA component remapping number of components. + /// </summary> + enum SetRemapComponentsNumComponents + { + One = 0, + Two = 1, + Three = 2, + Four = 3, + } + + /// <summary> + /// Width in GOBs of the destination texture. + /// </summary> + enum SetBlockSizeWidth + { + QuarterGob = 14, + OneGob = 0, + } + + /// <summary> + /// Height in GOBs of the destination texture. + /// </summary> + enum SetBlockSizeHeight + { + OneGob = 0, + TwoGobs = 1, + FourGobs = 2, + EightGobs = 3, + SixteenGobs = 4, + ThirtytwoGobs = 5, + } + + /// <summary> + /// Depth in GOBs of the destination texture. + /// </summary> + enum SetBlockSizeDepth + { + OneGob = 0, + TwoGobs = 1, + FourGobs = 2, + EightGobs = 3, + SixteenGobs = 4, + ThirtytwoGobs = 5, + } + + /// <summary> + /// Height of a single GOB in lines. + /// </summary> + enum SetBlockSizeGobHeight + { + GobHeightTesla4 = 0, + GobHeightFermi8 = 1, + } + + /// <summary> + /// DMA copy class state. + /// </summary> + unsafe struct DmaClassState + { +#pragma warning disable CS0649 + public fixed uint Reserved00[64]; + public uint Nop; + public fixed uint Reserved104[15]; + public uint PmTrigger; + public fixed uint Reserved144[63]; + public uint SetSemaphoreA; + public int SetSemaphoreAUpper => (int)((SetSemaphoreA >> 0) & 0xFF); + public uint SetSemaphoreB; + public uint SetSemaphorePayload; + public fixed uint Reserved24C[2]; + public uint SetRenderEnableA; + public int SetRenderEnableAUpper => (int)((SetRenderEnableA >> 0) & 0xFF); + public uint SetRenderEnableB; + public uint SetRenderEnableC; + public int SetRenderEnableCMode => (int)((SetRenderEnableC >> 0) & 0x7); + public uint SetSrcPhysMode; + public SetPhysModeTarget SetSrcPhysModeTarget => (SetPhysModeTarget)((SetSrcPhysMode >> 0) & 0x3); + public uint SetDstPhysMode; + public SetPhysModeTarget SetDstPhysModeTarget => (SetPhysModeTarget)((SetDstPhysMode >> 0) & 0x3); + public fixed uint Reserved268[38]; + public uint LaunchDma; + public LaunchDmaDataTransferType LaunchDmaDataTransferType => (LaunchDmaDataTransferType)((LaunchDma >> 0) & 0x3); + public bool LaunchDmaFlushEnable => (LaunchDma & 0x4) != 0; + public LaunchDmaSemaphoreType LaunchDmaSemaphoreType => (LaunchDmaSemaphoreType)((LaunchDma >> 3) & 0x3); + public LaunchDmaInterruptType LaunchDmaInterruptType => (LaunchDmaInterruptType)((LaunchDma >> 5) & 0x3); + public LaunchDmaMemoryLayout LaunchDmaSrcMemoryLayout => (LaunchDmaMemoryLayout)((LaunchDma >> 7) & 0x1); + public LaunchDmaMemoryLayout LaunchDmaDstMemoryLayout => (LaunchDmaMemoryLayout)((LaunchDma >> 8) & 0x1); + public bool LaunchDmaMultiLineEnable => (LaunchDma & 0x200) != 0; + public bool LaunchDmaRemapEnable => (LaunchDma & 0x400) != 0; + public bool LaunchDmaForceRmwdisable => (LaunchDma & 0x800) != 0; + public LaunchDmaType LaunchDmaSrcType => (LaunchDmaType)((LaunchDma >> 12) & 0x1); + public LaunchDmaType LaunchDmaDstType => (LaunchDmaType)((LaunchDma >> 13) & 0x1); + public LaunchDmaSemaphoreReduction LaunchDmaSemaphoreReduction => (LaunchDmaSemaphoreReduction)((LaunchDma >> 14) & 0xF); + public LaunchDmaSemaphoreReductionSign LaunchDmaSemaphoreReductionSign => (LaunchDmaSemaphoreReductionSign)((LaunchDma >> 18) & 0x1); + public bool LaunchDmaSemaphoreReductionEnable => (LaunchDma & 0x80000) != 0; + public LaunchDmaBypassL2 LaunchDmaBypassL2 => (LaunchDmaBypassL2)((LaunchDma >> 20) & 0x1); + public fixed uint Reserved304[63]; + public uint OffsetInUpper; + public int OffsetInUpperUpper => (int)((OffsetInUpper >> 0) & 0xFF); + public uint OffsetInLower; + public uint OffsetOutUpper; + public int OffsetOutUpperUpper => (int)((OffsetOutUpper >> 0) & 0xFF); + public uint OffsetOutLower; + public uint PitchIn; + public uint PitchOut; + public uint LineLengthIn; + public uint LineCount; + public fixed uint Reserved420[184]; + public uint SetRemapConstA; + public uint SetRemapConstB; + public uint SetRemapComponents; + public SetRemapComponentsDst SetRemapComponentsDstX => (SetRemapComponentsDst)((SetRemapComponents >> 0) & 0x7); + public SetRemapComponentsDst SetRemapComponentsDstY => (SetRemapComponentsDst)((SetRemapComponents >> 4) & 0x7); + public SetRemapComponentsDst SetRemapComponentsDstZ => (SetRemapComponentsDst)((SetRemapComponents >> 8) & 0x7); + public SetRemapComponentsDst SetRemapComponentsDstW => (SetRemapComponentsDst)((SetRemapComponents >> 12) & 0x7); + public SetRemapComponentsComponentSize SetRemapComponentsComponentSize => (SetRemapComponentsComponentSize)((SetRemapComponents >> 16) & 0x3); + public SetRemapComponentsNumComponents SetRemapComponentsNumSrcComponents => (SetRemapComponentsNumComponents)((SetRemapComponents >> 20) & 0x3); + public SetRemapComponentsNumComponents SetRemapComponentsNumDstComponents => (SetRemapComponentsNumComponents)((SetRemapComponents >> 24) & 0x3); + public uint SetDstBlockSize; + public SetBlockSizeWidth SetDstBlockSizeWidth => (SetBlockSizeWidth)((SetDstBlockSize >> 0) & 0xF); + public SetBlockSizeHeight SetDstBlockSizeHeight => (SetBlockSizeHeight)((SetDstBlockSize >> 4) & 0xF); + public SetBlockSizeDepth SetDstBlockSizeDepth => (SetBlockSizeDepth)((SetDstBlockSize >> 8) & 0xF); + public SetBlockSizeGobHeight SetDstBlockSizeGobHeight => (SetBlockSizeGobHeight)((SetDstBlockSize >> 12) & 0xF); + public uint SetDstWidth; + public uint SetDstHeight; + public uint SetDstDepth; + public uint SetDstLayer; + public uint SetDstOrigin; + public int SetDstOriginX => (int)((SetDstOrigin >> 0) & 0xFFFF); + public int SetDstOriginY => (int)((SetDstOrigin >> 16) & 0xFFFF); + public uint Reserved724; + public uint SetSrcBlockSize; + public SetBlockSizeWidth SetSrcBlockSizeWidth => (SetBlockSizeWidth)((SetSrcBlockSize >> 0) & 0xF); + public SetBlockSizeHeight SetSrcBlockSizeHeight => (SetBlockSizeHeight)((SetSrcBlockSize >> 4) & 0xF); + public SetBlockSizeDepth SetSrcBlockSizeDepth => (SetBlockSizeDepth)((SetSrcBlockSize >> 8) & 0xF); + public SetBlockSizeGobHeight SetSrcBlockSizeGobHeight => (SetBlockSizeGobHeight)((SetSrcBlockSize >> 12) & 0xF); + public uint SetSrcWidth; + public uint SetSrcHeight; + public uint SetSrcDepth; + public uint SetSrcLayer; + public uint SetSrcOrigin; + public int SetSrcOriginX => (int)((SetSrcOrigin >> 0) & 0xFFFF); + public int SetSrcOriginY => (int)((SetSrcOrigin >> 16) & 0xFFFF); + public fixed uint Reserved740[629]; + public uint PmTriggerEnd; + public fixed uint Reserved1118[2490]; +#pragma warning restore CS0649 + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaTexture.cs b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaTexture.cs new file mode 100644 index 00000000..6873ff40 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaTexture.cs @@ -0,0 +1,20 @@ +using Ryujinx.Graphics.Gpu.Engine.Types; + +namespace Ryujinx.Graphics.Gpu.Engine.Dma +{ + /// <summary> + /// Buffer to texture copy parameters. + /// </summary> + struct DmaTexture + { +#pragma warning disable CS0649 + public MemoryLayout MemoryLayout; + public int Width; + public int Height; + public int Depth; + public int RegionZ; + public ushort RegionX; + public ushort RegionY; +#pragma warning restore CS0649 + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/CompressedMethod.cs b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/CompressedMethod.cs new file mode 100644 index 00000000..458dc8f6 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/CompressedMethod.cs @@ -0,0 +1,41 @@ +// This file was auto-generated from NVIDIA official Maxwell definitions. + +namespace Ryujinx.Graphics.Gpu.Engine.GPFifo +{ + enum TertOp + { + Grp0IncMethod = 0, + Grp0SetSubDevMask = 1, + Grp0StoreSubDevMask = 2, + Grp0UseSubDevMask = 3, + Grp2NonIncMethod = 0 + } + + enum SecOp + { + Grp0UseTert = 0, + IncMethod = 1, + Grp2UseTert = 2, + NonIncMethod = 3, + ImmdDataMethod = 4, + OneInc = 5, + Reserved6 = 6, + EndPbSegment = 7 + } + + struct CompressedMethod + { +#pragma warning disable CS0649 + public uint Method; +#pragma warning restore CS0649 + public int MethodAddressOld => (int)((Method >> 2) & 0x7FF); + public int MethodAddress => (int)((Method >> 0) & 0xFFF); + public int SubdeviceMask => (int)((Method >> 4) & 0xFFF); + public int MethodSubchannel => (int)((Method >> 13) & 0x7); + public TertOp TertOp => (TertOp)((Method >> 16) & 0x3); + public int MethodCountOld => (int)((Method >> 18) & 0x7FF); + public int MethodCount => (int)((Method >> 16) & 0x1FFF); + public int ImmdData => (int)((Method >> 16) & 0x1FFF); + public SecOp SecOp => (SecOp)((Method >> 29) & 0x7); + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPEntry.cs b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPEntry.cs new file mode 100644 index 00000000..b1b236e7 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPEntry.cs @@ -0,0 +1,55 @@ +// This file was auto-generated from NVIDIA official Maxwell definitions. + +namespace Ryujinx.Graphics.Gpu.Engine.GPFifo +{ + enum Entry0Fetch + { + Unconditional = 0, + Conditional = 1, + } + + enum Entry1Priv + { + User = 0, + Kernel = 1, + } + + enum Entry1Level + { + Main = 0, + Subroutine = 1, + } + + enum Entry1Sync + { + Proceed = 0, + Wait = 1, + } + + enum Entry1Opcode + { + Nop = 0, + Illegal = 1, + Crc = 2, + PbCrc = 3, + } + + struct GPEntry + { +#pragma warning disable CS0649 + public uint Entry0; +#pragma warning restore CS0649 + public Entry0Fetch Entry0Fetch => (Entry0Fetch)((Entry0 >> 0) & 0x1); + public int Entry0Get => (int)((Entry0 >> 2) & 0x3FFFFFFF); + public int Entry0Operand => (int)(Entry0); +#pragma warning disable CS0649 + public uint Entry1; +#pragma warning restore CS0649 + public int Entry1GetHi => (int)((Entry1 >> 0) & 0xFF); + public Entry1Priv Entry1Priv => (Entry1Priv)((Entry1 >> 8) & 0x1); + public Entry1Level Entry1Level => (Entry1Level)((Entry1 >> 9) & 0x1); + public int Entry1Length => (int)((Entry1 >> 10) & 0x1FFFFF); + public Entry1Sync Entry1Sync => (Entry1Sync)((Entry1 >> 31) & 0x1); + public Entry1Opcode Entry1Opcode => (Entry1Opcode)((Entry1 >> 0) & 0xFF); + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs new file mode 100644 index 00000000..e80d98a1 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs @@ -0,0 +1,248 @@ +using Ryujinx.Graphics.Device; +using Ryujinx.Graphics.Gpu.Engine.MME; +using System; +using System.Collections.Generic; +using System.Threading; + +namespace Ryujinx.Graphics.Gpu.Engine.GPFifo +{ + /// <summary> + /// Represents a GPU General Purpose FIFO class. + /// </summary> + class GPFifoClass : IDeviceState + { + private readonly GpuContext _context; + private readonly GPFifoProcessor _parent; + private readonly DeviceState<GPFifoClassState> _state; + + private int _previousSubChannel; + private bool _createSyncPending; + + private const int MacrosCount = 0x80; + + // Note: The size of the macro memory is unknown, we just make + // a guess here and use 256kb as the size. Increase if needed. + private const int MacroCodeSize = 256 * 256; + + private readonly Macro[] _macros; + private readonly int[] _macroCode; + + /// <summary> + /// Creates a new instance of the GPU General Purpose FIFO class. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="parent">Parent GPU General Purpose FIFO processor</param> + public GPFifoClass(GpuContext context, GPFifoProcessor parent) + { + _context = context; + _parent = parent; + _state = new DeviceState<GPFifoClassState>(new Dictionary<string, RwCallback> + { + { nameof(GPFifoClassState.Semaphored), new RwCallback(Semaphored, null) }, + { nameof(GPFifoClassState.Syncpointb), new RwCallback(Syncpointb, null) }, + { nameof(GPFifoClassState.WaitForIdle), new RwCallback(WaitForIdle, null) }, + { nameof(GPFifoClassState.SetReference), new RwCallback(SetReference, null) }, + { nameof(GPFifoClassState.LoadMmeInstructionRam), new RwCallback(LoadMmeInstructionRam, null) }, + { nameof(GPFifoClassState.LoadMmeStartAddressRam), new RwCallback(LoadMmeStartAddressRam, null) }, + { nameof(GPFifoClassState.SetMmeShadowRamControl), new RwCallback(SetMmeShadowRamControl, null) } + }); + + _macros = new Macro[MacrosCount]; + _macroCode = new int[MacroCodeSize]; + } + + /// <summary> + /// Create any syncs from WaitForIdle command that are currently pending. + /// </summary> + public void CreatePendingSyncs() + { + if (_createSyncPending) + { + _createSyncPending = false; + _context.CreateHostSyncIfNeeded(false, false); + } + } + + /// <summary> + /// Reads data from the class registers. + /// </summary> + /// <param name="offset">Register byte offset</param> + /// <returns>Data at the specified offset</returns> + public int Read(int offset) => _state.Read(offset); + + /// <summary> + /// Writes data to the class registers. + /// </summary> + /// <param name="offset">Register byte offset</param> + /// <param name="data">Data to be written</param> + public void Write(int offset, int data) => _state.Write(offset, data); + + /// <summary> + /// Writes a GPU counter to guest memory. + /// </summary> + /// <param name="argument">Method call argument</param> + public void Semaphored(int argument) + { + ulong address = ((ulong)_state.State.SemaphorebOffsetLower << 2) | + ((ulong)_state.State.SemaphoreaOffsetUpper << 32); + + int value = _state.State.SemaphorecPayload; + + SemaphoredOperation operation = _state.State.SemaphoredOperation; + + if (_state.State.SemaphoredReleaseSize == SemaphoredReleaseSize.SixteenBytes) + { + _parent.MemoryManager.Write(address + 4, 0); + _parent.MemoryManager.Write(address + 8, _context.GetTimestamp()); + } + + // TODO: Acquire operations (Wait), interrupts for invalid combinations. + if (operation == SemaphoredOperation.Release) + { + _parent.MemoryManager.Write(address, value); + } + else if (operation == SemaphoredOperation.Reduction) + { + bool signed = _state.State.SemaphoredFormat == SemaphoredFormat.Signed; + + int mem = _parent.MemoryManager.Read<int>(address); + + switch (_state.State.SemaphoredReduction) + { + case SemaphoredReduction.Min: + value = signed ? Math.Min(mem, value) : (int)Math.Min((uint)mem, (uint)value); + break; + case SemaphoredReduction.Max: + value = signed ? Math.Max(mem, value) : (int)Math.Max((uint)mem, (uint)value); + break; + case SemaphoredReduction.Xor: + value ^= mem; + break; + case SemaphoredReduction.And: + value &= mem; + break; + case SemaphoredReduction.Or: + value |= mem; + break; + case SemaphoredReduction.Add: + value += mem; + break; + case SemaphoredReduction.Inc: + value = (uint)mem < (uint)value ? mem + 1 : 0; + break; + case SemaphoredReduction.Dec: + value = (uint)mem > 0 && (uint)mem <= (uint)value ? mem - 1 : value; + break; + } + + _parent.MemoryManager.Write(address, value); + } + } + + /// <summary> + /// Apply a fence operation on a syncpoint. + /// </summary> + /// <param name="argument">Method call argument</param> + public void Syncpointb(int argument) + { + SyncpointbOperation operation = _state.State.SyncpointbOperation; + + uint syncpointId = (uint)_state.State.SyncpointbSyncptIndex; + + if (operation == SyncpointbOperation.Wait) + { + uint threshold = (uint)_state.State.SyncpointaPayload; + + _context.Synchronization.WaitOnSyncpoint(syncpointId, threshold, Timeout.InfiniteTimeSpan); + } + else if (operation == SyncpointbOperation.Incr) + { + _context.CreateHostSyncIfNeeded(true, true); + _context.Synchronization.IncrementSyncpoint(syncpointId); + } + + _context.AdvanceSequence(); + } + + /// <summary> + /// Waits for the GPU to be idle. + /// </summary> + /// <param name="argument">Method call argument</param> + public void WaitForIdle(int argument) + { + _parent.PerformDeferredDraws(); + _context.Renderer.Pipeline.Barrier(); + + _createSyncPending = true; + } + + /// <summary> + /// Used as an indirect data barrier on NVN. When used, access to previously written data must be coherent. + /// </summary> + /// <param name="argument">Method call argument</param> + public void SetReference(int argument) + { + _context.Renderer.Pipeline.CommandBufferBarrier(); + + _context.CreateHostSyncIfNeeded(false, true); + } + + /// <summary> + /// Sends macro code/data to the MME. + /// </summary> + /// <param name="argument">Method call argument</param> + public void LoadMmeInstructionRam(int argument) + { + _macroCode[_state.State.LoadMmeInstructionRamPointer++] = argument; + } + + /// <summary> + /// Binds a macro index to a position for the MME + /// </summary> + /// <param name="argument">Method call argument</param> + public void LoadMmeStartAddressRam(int argument) + { + _macros[_state.State.LoadMmeStartAddressRamPointer++] = new Macro(argument); + } + + /// <summary> + /// Changes the shadow RAM control. + /// </summary> + /// <param name="argument">Method call argument</param> + public void SetMmeShadowRamControl(int argument) + { + _parent.SetShadowRamControl(argument); + } + + /// <summary> + /// Pushes an argument to a macro. + /// </summary> + /// <param name="index">Index of the macro</param> + /// <param name="gpuVa">GPU virtual address where the command word is located</param> + /// <param name="argument">Argument to be pushed to the macro</param> + public void MmePushArgument(int index, ulong gpuVa, int argument) + { + _macros[index].PushArgument(gpuVa, argument); + } + + /// <summary> + /// Prepares a macro for execution. + /// </summary> + /// <param name="index">Index of the macro</param> + /// <param name="argument">Initial argument passed to the macro</param> + public void MmeStart(int index, int argument) + { + _macros[index].StartExecution(_context, _parent, _macroCode, argument); + } + + /// <summary> + /// Executes a macro. + /// </summary> + /// <param name="index">Index of the macro</param> + /// <param name="state">Current GPU state</param> + public void CallMme(int index, IDeviceState state) + { + _macros[index].Execute(_macroCode, state); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClassState.cs b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClassState.cs new file mode 100644 index 00000000..07d062eb --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClassState.cs @@ -0,0 +1,233 @@ +// This file was auto-generated from NVIDIA official Maxwell definitions. + +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Gpu.Engine.GPFifo +{ + /// <summary> + /// Semaphore operation. + /// </summary> + enum SemaphoredOperation + { + Acquire = 1, + Release = 2, + AcqGeq = 4, + AcqAnd = 8, + Reduction = 16 + } + + /// <summary> + /// Semaphore acquire switch enable. + /// </summary> + enum SemaphoredAcquireSwitch + { + Disabled = 0, + Enabled = 1 + } + + /// <summary> + /// Semaphore release interrupt wait enable. + /// </summary> + enum SemaphoredReleaseWfi + { + En = 0, + Dis = 1 + } + + /// <summary> + /// Semaphore release structure size. + /// </summary> + enum SemaphoredReleaseSize + { + SixteenBytes = 0, + FourBytes = 1 + } + + /// <summary> + /// Semaphore reduction operation. + /// </summary> + enum SemaphoredReduction + { + Min = 0, + Max = 1, + Xor = 2, + And = 3, + Or = 4, + Add = 5, + Inc = 6, + Dec = 7 + } + + /// <summary> + /// Semaphore format. + /// </summary> + enum SemaphoredFormat + { + Signed = 0, + Unsigned = 1 + } + + /// <summary> + /// Memory Translation Lookaside Buffer Page Directory Buffer invalidation. + /// </summary> + enum MemOpCTlbInvalidatePdb + { + One = 0, + All = 1 + } + + /// <summary> + /// Memory Translation Lookaside Buffer GPC invalidation enable. + /// </summary> + enum MemOpCTlbInvalidateGpc + { + Enable = 0, + Disable = 1 + } + + /// <summary> + /// Memory Translation Lookaside Buffer invalidation target. + /// </summary> + enum MemOpCTlbInvalidateTarget + { + VidMem = 0, + SysMemCoherent = 2, + SysMemNoncoherent = 3 + } + + /// <summary> + /// Memory operation. + /// </summary> + enum MemOpDOperation + { + Membar = 5, + MmuTlbInvalidate = 9, + L2PeermemInvalidate = 13, + L2SysmemInvalidate = 14, + L2CleanComptags = 15, + L2FlushDirty = 16 + } + + /// <summary> + /// Syncpoint operation. + /// </summary> + enum SyncpointbOperation + { + Wait = 0, + Incr = 1 + } + + /// <summary> + /// Syncpoint wait switch enable. + /// </summary> + enum SyncpointbWaitSwitch + { + Dis = 0, + En = 1 + } + + /// <summary> + /// Wait for interrupt scope. + /// </summary> + enum WfiScope + { + CurrentScgType = 0, + All = 1 + } + + /// <summary> + /// Yield operation. + /// </summary> + enum YieldOp + { + Nop = 0, + PbdmaTimeslice = 1, + RunlistTimeslice = 2, + Tsg = 3 + } + + /// <summary> + /// General Purpose FIFO class state. + /// </summary> + struct GPFifoClassState + { +#pragma warning disable CS0649 + public uint SetObject; + public int SetObjectNvclass => (int)((SetObject >> 0) & 0xFFFF); + public int SetObjectEngine => (int)((SetObject >> 16) & 0x1F); + public uint Illegal; + public int IllegalHandle => (int)(Illegal); + public uint Nop; + public int NopHandle => (int)(Nop); + public uint Reserved0C; + public uint Semaphorea; + public int SemaphoreaOffsetUpper => (int)((Semaphorea >> 0) & 0xFF); + public uint Semaphoreb; + public int SemaphorebOffsetLower => (int)((Semaphoreb >> 2) & 0x3FFFFFFF); + public uint Semaphorec; + public int SemaphorecPayload => (int)(Semaphorec); + public uint Semaphored; + public SemaphoredOperation SemaphoredOperation => (SemaphoredOperation)((Semaphored >> 0) & 0x1F); + public SemaphoredAcquireSwitch SemaphoredAcquireSwitch => (SemaphoredAcquireSwitch)((Semaphored >> 12) & 0x1); + public SemaphoredReleaseWfi SemaphoredReleaseWfi => (SemaphoredReleaseWfi)((Semaphored >> 20) & 0x1); + public SemaphoredReleaseSize SemaphoredReleaseSize => (SemaphoredReleaseSize)((Semaphored >> 24) & 0x1); + public SemaphoredReduction SemaphoredReduction => (SemaphoredReduction)((Semaphored >> 27) & 0xF); + public SemaphoredFormat SemaphoredFormat => (SemaphoredFormat)((Semaphored >> 31) & 0x1); + public uint NonStallInterrupt; + public int NonStallInterruptHandle => (int)(NonStallInterrupt); + public uint FbFlush; + public int FbFlushHandle => (int)(FbFlush); + public uint Reserved28; + public uint Reserved2C; + public uint MemOpC; + public int MemOpCOperandLow => (int)((MemOpC >> 2) & 0x3FFFFFFF); + public MemOpCTlbInvalidatePdb MemOpCTlbInvalidatePdb => (MemOpCTlbInvalidatePdb)((MemOpC >> 0) & 0x1); + public MemOpCTlbInvalidateGpc MemOpCTlbInvalidateGpc => (MemOpCTlbInvalidateGpc)((MemOpC >> 1) & 0x1); + public MemOpCTlbInvalidateTarget MemOpCTlbInvalidateTarget => (MemOpCTlbInvalidateTarget)((MemOpC >> 10) & 0x3); + public int MemOpCTlbInvalidateAddrLo => (int)((MemOpC >> 12) & 0xFFFFF); + public uint MemOpD; + public int MemOpDOperandHigh => (int)((MemOpD >> 0) & 0xFF); + public MemOpDOperation MemOpDOperation => (MemOpDOperation)((MemOpD >> 27) & 0x1F); + public int MemOpDTlbInvalidateAddrHi => (int)((MemOpD >> 0) & 0xFF); + public uint Reserved38; + public uint Reserved3C; + public uint Reserved40; + public uint Reserved44; + public uint Reserved48; + public uint Reserved4C; + public uint SetReference; + public int SetReferenceCount => (int)(SetReference); + public uint Reserved54; + public uint Reserved58; + public uint Reserved5C; + public uint Reserved60; + public uint Reserved64; + public uint Reserved68; + public uint Reserved6C; + public uint Syncpointa; + public int SyncpointaPayload => (int)(Syncpointa); + public uint Syncpointb; + public SyncpointbOperation SyncpointbOperation => (SyncpointbOperation)((Syncpointb >> 0) & 0x1); + public SyncpointbWaitSwitch SyncpointbWaitSwitch => (SyncpointbWaitSwitch)((Syncpointb >> 4) & 0x1); + public int SyncpointbSyncptIndex => (int)((Syncpointb >> 8) & 0xFFF); + public uint Wfi; + public WfiScope WfiScope => (WfiScope)((Wfi >> 0) & 0x1); + public uint CrcCheck; + public int CrcCheckValue => (int)(CrcCheck); + public uint Yield; + public YieldOp YieldOp => (YieldOp)((Yield >> 0) & 0x3); + // TODO: Eventually move this to per-engine state. + public Array31<uint> Reserved84; + public uint NoOperation; + public uint SetNotifyA; + public uint SetNotifyB; + public uint Notify; + public uint WaitForIdle; + public uint LoadMmeInstructionRamPointer; + public uint LoadMmeInstructionRam; + public uint LoadMmeStartAddressRamPointer; + public uint LoadMmeStartAddressRam; + public uint SetMmeShadowRamControl; +#pragma warning restore CS0649 + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs new file mode 100644 index 00000000..cd29a9da --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs @@ -0,0 +1,262 @@ +using Ryujinx.Graphics.Gpu.Memory; +using System; +using System.Collections.Concurrent; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Threading; + +namespace Ryujinx.Graphics.Gpu.Engine.GPFifo +{ + /// <summary> + /// Represents a GPU General Purpose FIFO device. + /// </summary> + public sealed class GPFifoDevice : IDisposable + { + /// <summary> + /// Indicates if the command buffer has pre-fetch enabled. + /// </summary> + private enum CommandBufferType + { + Prefetch, + NoPrefetch + } + + /// <summary> + /// Command buffer data. + /// </summary> + private struct CommandBuffer + { + /// <summary> + /// Processor used to process the command buffer. Contains channel state. + /// </summary> + public GPFifoProcessor Processor; + + /// <summary> + /// The type of the command buffer. + /// </summary> + public CommandBufferType Type; + + /// <summary> + /// Fetched data. + /// </summary> + public int[] Words; + + /// <summary> + /// The GPFIFO entry address (used in <see cref="CommandBufferType.NoPrefetch"/> mode). + /// </summary> + public ulong EntryAddress; + + /// <summary> + /// The count of entries inside this GPFIFO entry. + /// </summary> + public uint EntryCount; + + /// <summary> + /// Get the entries for the command buffer from memory. + /// </summary> + /// <param name="memoryManager">The memory manager used to fetch the data</param> + /// <param name="flush">If true, flushes potential GPU written data before reading the command buffer</param> + /// <returns>The fetched data</returns> + private ReadOnlySpan<int> GetWords(MemoryManager memoryManager, bool flush) + { + return MemoryMarshal.Cast<byte, int>(memoryManager.GetSpan(EntryAddress, (int)EntryCount * 4, flush)); + } + + /// <summary> + /// Prefetch the command buffer. + /// </summary> + /// <param name="memoryManager">The memory manager used to fetch the data</param> + public void Prefetch(MemoryManager memoryManager) + { + Words = GetWords(memoryManager, true).ToArray(); + } + + /// <summary> + /// Fetch the command buffer. + /// </summary> + /// <param name="memoryManager">The memory manager used to fetch the data</param> + /// <param name="flush">If true, flushes potential GPU written data before reading the command buffer</param> + /// <returns>The command buffer words</returns> + public ReadOnlySpan<int> Fetch(MemoryManager memoryManager, bool flush) + { + return Words ?? GetWords(memoryManager, flush); + } + } + + private readonly ConcurrentQueue<CommandBuffer> _commandBufferQueue; + + private CommandBuffer _currentCommandBuffer; + private GPFifoProcessor _prevChannelProcessor; + + private readonly bool _ibEnable; + private readonly GpuContext _context; + private readonly AutoResetEvent _event; + + private bool _interrupt; + private int _flushSkips; + + /// <summary> + /// Creates a new instance of the GPU General Purpose FIFO device. + /// </summary> + /// <param name="context">GPU context that the GPFIFO belongs to</param> + internal GPFifoDevice(GpuContext context) + { + _commandBufferQueue = new ConcurrentQueue<CommandBuffer>(); + _ibEnable = true; + _context = context; + _event = new AutoResetEvent(false); + } + + /// <summary> + /// Signal the FIFO that there are new entries to process. + /// </summary> + public void SignalNewEntries() + { + _event.Set(); + } + + /// <summary> + /// Push a GPFIFO entry in the form of a prefetched command buffer. + /// It is intended to be used by nvservices to handle special cases. + /// </summary> + /// <param name="processor">Processor used to process <paramref name="commandBuffer"/></param> + /// <param name="commandBuffer">The command buffer containing the prefetched commands</param> + internal void PushHostCommandBuffer(GPFifoProcessor processor, int[] commandBuffer) + { + _commandBufferQueue.Enqueue(new CommandBuffer + { + Processor = processor, + Type = CommandBufferType.Prefetch, + Words = commandBuffer, + EntryAddress = ulong.MaxValue, + EntryCount = (uint)commandBuffer.Length + }); + } + + /// <summary> + /// Create a CommandBuffer from a GPFIFO entry. + /// </summary> + /// <param name="processor">Processor used to process the command buffer pointed to by <paramref name="entry"/></param> + /// <param name="entry">The GPFIFO entry</param> + /// <returns>A new CommandBuffer based on the GPFIFO entry</returns> + private static CommandBuffer CreateCommandBuffer(GPFifoProcessor processor, GPEntry entry) + { + CommandBufferType type = CommandBufferType.Prefetch; + + if (entry.Entry1Sync == Entry1Sync.Wait) + { + type = CommandBufferType.NoPrefetch; + } + + ulong startAddress = ((ulong)entry.Entry0Get << 2) | ((ulong)entry.Entry1GetHi << 32); + + return new CommandBuffer + { + Processor = processor, + Type = type, + Words = null, + EntryAddress = startAddress, + EntryCount = (uint)entry.Entry1Length + }; + } + + /// <summary> + /// Pushes GPFIFO entries. + /// </summary> + /// <param name="processor">Processor used to process the command buffers pointed to by <paramref name="entries"/></param> + /// <param name="entries">GPFIFO entries</param> + internal void PushEntries(GPFifoProcessor processor, ReadOnlySpan<ulong> entries) + { + bool beforeBarrier = true; + + for (int index = 0; index < entries.Length; index++) + { + ulong entry = entries[index]; + + CommandBuffer commandBuffer = CreateCommandBuffer(processor, Unsafe.As<ulong, GPEntry>(ref entry)); + + if (beforeBarrier && commandBuffer.Type == CommandBufferType.Prefetch) + { + commandBuffer.Prefetch(processor.MemoryManager); + } + + if (commandBuffer.Type == CommandBufferType.NoPrefetch) + { + beforeBarrier = false; + } + + _commandBufferQueue.Enqueue(commandBuffer); + } + } + + /// <summary> + /// Waits until commands are pushed to the FIFO. + /// </summary> + /// <returns>True if commands were received, false if wait timed out</returns> + public bool WaitForCommands() + { + return !_commandBufferQueue.IsEmpty || (_event.WaitOne(8) && !_commandBufferQueue.IsEmpty); + } + + /// <summary> + /// Processes commands pushed to the FIFO. + /// </summary> + public void DispatchCalls() + { + // Use this opportunity to also dispose any pending channels that were closed. + _context.RunDeferredActions(); + + // Process command buffers. + while (_ibEnable && !_interrupt && _commandBufferQueue.TryDequeue(out CommandBuffer entry)) + { + bool flushCommandBuffer = true; + + if (_flushSkips != 0) + { + _flushSkips--; + flushCommandBuffer = false; + } + + _currentCommandBuffer = entry; + ReadOnlySpan<int> words = entry.Fetch(entry.Processor.MemoryManager, flushCommandBuffer); + + // If we are changing the current channel, + // we need to force all the host state to be updated. + if (_prevChannelProcessor != entry.Processor) + { + _prevChannelProcessor = entry.Processor; + entry.Processor.ForceAllDirty(); + } + + entry.Processor.Process(entry.EntryAddress, words); + } + + _interrupt = false; + } + + /// <summary> + /// Sets the number of flushes that should be skipped for subsequent command buffers. + /// </summary> + /// <remarks> + /// This can improve performance when command buffer data only needs to be consumed by the GPU. + /// </remarks> + /// <param name="count">The amount of flushes that should be skipped</param> + internal void SetFlushSkips(int count) + { + _flushSkips = count; + } + + /// <summary> + /// Interrupts command processing. This will break out of the DispatchCalls loop. + /// </summary> + public void Interrupt() + { + _interrupt = true; + } + + /// <summary> + /// Disposes of resources used for GPFifo command processing. + /// </summary> + public void Dispose() => _event.Dispose(); + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs new file mode 100644 index 00000000..3fb3feee --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs @@ -0,0 +1,331 @@ +using Ryujinx.Graphics.Device; +using Ryujinx.Graphics.Gpu.Engine.Compute; +using Ryujinx.Graphics.Gpu.Engine.Dma; +using Ryujinx.Graphics.Gpu.Engine.InlineToMemory; +using Ryujinx.Graphics.Gpu.Engine.Threed; +using Ryujinx.Graphics.Gpu.Engine.Twod; +using Ryujinx.Graphics.Gpu.Memory; +using System; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Gpu.Engine.GPFifo +{ + /// <summary> + /// Represents a GPU General Purpose FIFO command processor. + /// </summary> + class GPFifoProcessor + { + private const int MacrosCount = 0x80; + private const int MacroIndexMask = MacrosCount - 1; + + private const int LoadInlineDataMethodOffset = 0x6d; + private const int UniformBufferUpdateDataMethodOffset = 0x8e4; + + private readonly GpuChannel _channel; + + /// <summary> + /// Channel memory manager. + /// </summary> + public MemoryManager MemoryManager => _channel.MemoryManager; + + /// <summary> + /// 3D Engine. + /// </summary> + public ThreedClass ThreedClass => _3dClass; + + /// <summary> + /// Internal GPFIFO state. + /// </summary> + private struct DmaState + { + public int Method; + public int SubChannel; + public int MethodCount; + public bool NonIncrementing; + public bool IncrementOnce; + } + + private DmaState _state; + + private readonly ThreedClass _3dClass; + private readonly ComputeClass _computeClass; + private readonly InlineToMemoryClass _i2mClass; + private readonly TwodClass _2dClass; + private readonly DmaClass _dmaClass; + + private readonly GPFifoClass _fifoClass; + + /// <summary> + /// Creates a new instance of the GPU General Purpose FIFO command processor. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="channel">Channel that the GPFIFO processor belongs to</param> + public GPFifoProcessor(GpuContext context, GpuChannel channel) + { + _channel = channel; + + _fifoClass = new GPFifoClass(context, this); + _3dClass = new ThreedClass(context, channel, _fifoClass); + _computeClass = new ComputeClass(context, channel, _3dClass); + _i2mClass = new InlineToMemoryClass(context, channel); + _2dClass = new TwodClass(channel); + _dmaClass = new DmaClass(context, channel, _3dClass); + } + + /// <summary> + /// Processes a command buffer. + /// </summary> + /// <param name="baseGpuVa">Base GPU virtual address of the command buffer</param> + /// <param name="commandBuffer">Command buffer</param> + public void Process(ulong baseGpuVa, ReadOnlySpan<int> commandBuffer) + { + for (int index = 0; index < commandBuffer.Length; index++) + { + int command = commandBuffer[index]; + + ulong gpuVa = baseGpuVa + (ulong)index * 4; + + if (_state.MethodCount != 0) + { + if (TryFastI2mBufferUpdate(commandBuffer, ref index)) + { + continue; + } + + Send(gpuVa, _state.Method, command, _state.SubChannel, _state.MethodCount <= 1); + + if (!_state.NonIncrementing) + { + _state.Method++; + } + + if (_state.IncrementOnce) + { + _state.NonIncrementing = true; + } + + _state.MethodCount--; + } + else + { + CompressedMethod meth = Unsafe.As<int, CompressedMethod>(ref command); + + if (TryFastUniformBufferUpdate(meth, commandBuffer, index)) + { + index += meth.MethodCount; + continue; + } + + switch (meth.SecOp) + { + case SecOp.IncMethod: + case SecOp.NonIncMethod: + case SecOp.OneInc: + _state.Method = meth.MethodAddress; + _state.SubChannel = meth.MethodSubchannel; + _state.MethodCount = meth.MethodCount; + _state.IncrementOnce = meth.SecOp == SecOp.OneInc; + _state.NonIncrementing = meth.SecOp == SecOp.NonIncMethod; + break; + case SecOp.ImmdDataMethod: + Send(gpuVa, meth.MethodAddress, meth.ImmdData, meth.MethodSubchannel, true); + break; + } + } + } + + _3dClass.FlushUboDirty(); + } + + /// <summary> + /// Tries to perform a fast Inline-to-Memory data update. + /// If successful, all data will be copied at once, and <see cref="DmaState.MethodCount"/> + /// command buffer entries will be consumed. + /// </summary> + /// <param name="commandBuffer">Command buffer where the data is contained</param> + /// <param name="offset">Offset at <paramref name="commandBuffer"/> where the data is located, auto-incremented on success</param> + /// <returns>True if the fast copy was successful, false otherwise</returns> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool TryFastI2mBufferUpdate(ReadOnlySpan<int> commandBuffer, ref int offset) + { + if (_state.Method == LoadInlineDataMethodOffset && _state.NonIncrementing && _state.SubChannel <= 2) + { + int availableCount = commandBuffer.Length - offset; + int consumeCount = Math.Min(_state.MethodCount, availableCount); + + var data = commandBuffer.Slice(offset, consumeCount); + + if (_state.SubChannel == 0) + { + _3dClass.LoadInlineData(data); + } + else if (_state.SubChannel == 1) + { + _computeClass.LoadInlineData(data); + } + else /* if (_state.SubChannel == 2) */ + { + _i2mClass.LoadInlineData(data); + } + + offset += consumeCount - 1; + _state.MethodCount -= consumeCount; + + return true; + } + + return false; + } + + /// <summary> + /// Tries to perform a fast constant buffer data update. + /// If successful, all data will be copied at once, and <see cref="CompressedMethod.MethodCount"/> + 1 + /// command buffer entries will be consumed. + /// </summary> + /// <param name="meth">Compressed method to be checked</param> + /// <param name="commandBuffer">Command buffer where <paramref name="meth"/> is contained</param> + /// <param name="offset">Offset at <paramref name="commandBuffer"/> where <paramref name="meth"/> is located</param> + /// <returns>True if the fast copy was successful, false otherwise</returns> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool TryFastUniformBufferUpdate(CompressedMethod meth, ReadOnlySpan<int> commandBuffer, int offset) + { + int availableCount = commandBuffer.Length - offset; + + if (meth.MethodAddress == UniformBufferUpdateDataMethodOffset && + meth.MethodCount < availableCount && + meth.SecOp == SecOp.NonIncMethod) + { + _3dClass.ConstantBufferUpdate(commandBuffer.Slice(offset + 1, meth.MethodCount)); + + return true; + } + + return false; + } + + /// <summary> + /// Sends a uncompressed method for processing by the graphics pipeline. + /// </summary> + /// <param name="gpuVa">GPU virtual address where the command word is located</param> + /// <param name="meth">Method to be processed</param> + private void Send(ulong gpuVa, int offset, int argument, int subChannel, bool isLastCall) + { + if (offset < 0x60) + { + _fifoClass.Write(offset * 4, argument); + } + else if (offset < 0xe00) + { + offset *= 4; + + switch (subChannel) + { + case 0: + _3dClass.Write(offset, argument); + break; + case 1: + _computeClass.Write(offset, argument); + break; + case 2: + _i2mClass.Write(offset, argument); + break; + case 3: + _2dClass.Write(offset, argument); + break; + case 4: + _dmaClass.Write(offset, argument); + break; + } + } + else + { + IDeviceState state = subChannel switch + { + 0 => _3dClass, + 3 => _2dClass, + _ => null + }; + + if (state != null) + { + int macroIndex = (offset >> 1) & MacroIndexMask; + + if ((offset & 1) != 0) + { + _fifoClass.MmePushArgument(macroIndex, gpuVa, argument); + } + else + { + _fifoClass.MmeStart(macroIndex, argument); + } + + if (isLastCall) + { + _fifoClass.CallMme(macroIndex, state); + + _3dClass.PerformDeferredDraws(); + } + } + } + } + + /// <summary> + /// Writes data directly to the state of the specified class. + /// </summary> + /// <param name="classId">ID of the class to write the data into</param> + /// <param name="offset">State offset in bytes</param> + /// <param name="value">Value to be written</param> + public void Write(ClassId classId, int offset, int value) + { + switch (classId) + { + case ClassId.Threed: + _3dClass.Write(offset, value); + break; + case ClassId.Compute: + _computeClass.Write(offset, value); + break; + case ClassId.InlineToMemory: + _i2mClass.Write(offset, value); + break; + case ClassId.Twod: + _2dClass.Write(offset, value); + break; + case ClassId.Dma: + _dmaClass.Write(offset, value); + break; + case ClassId.GPFifo: + _fifoClass.Write(offset, value); + break; + } + } + + /// <summary> + /// Sets the shadow ram control value of all sub-channels. + /// </summary> + /// <param name="control">New shadow ram control value</param> + public void SetShadowRamControl(int control) + { + _3dClass.SetShadowRamControl(control); + } + + /// <summary> + /// Forces a full host state update by marking all state as modified, + /// and also requests all GPU resources in use to be rebound. + /// </summary> + public void ForceAllDirty() + { + _3dClass.ForceStateDirty(); + _channel.BufferManager.Rebind(); + _channel.TextureManager.Rebind(); + } + + /// <summary> + /// Perform any deferred draws. + /// </summary> + public void PerformDeferredDraws() + { + _3dClass.PerformDeferredDraws(); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs new file mode 100644 index 00000000..e1d7e940 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs @@ -0,0 +1,273 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Device; +using Ryujinx.Graphics.Texture; +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; + +namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory +{ + /// <summary> + /// Represents a Inline-to-Memory engine class. + /// </summary> + class InlineToMemoryClass : IDeviceState + { + private readonly GpuContext _context; + private readonly GpuChannel _channel; + private readonly DeviceState<InlineToMemoryClassState> _state; + + private bool _isLinear; + + private int _offset; + private int _size; + + private ulong _dstGpuVa; + private int _dstX; + private int _dstY; + private int _dstWidth; + private int _dstHeight; + private int _dstStride; + private int _dstGobBlocksInY; + private int _dstGobBlocksInZ; + private int _lineLengthIn; + private int _lineCount; + + private bool _finished; + + private int[] _buffer; + + /// <summary> + /// Creates a new instance of the Inline-to-Memory engine class. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="channel">GPU channel</param> + /// <param name="initializeState">Indicates if the internal state should be initialized. Set to false if part of another engine</param> + public InlineToMemoryClass(GpuContext context, GpuChannel channel, bool initializeState) + { + _context = context; + _channel = channel; + + if (initializeState) + { + _state = new DeviceState<InlineToMemoryClassState>(new Dictionary<string, RwCallback> + { + { nameof(InlineToMemoryClassState.LaunchDma), new RwCallback(LaunchDma, null) }, + { nameof(InlineToMemoryClassState.LoadInlineData), new RwCallback(LoadInlineData, null) } + }); + } + } + + /// <summary> + /// Creates a new instance of the inline-to-memory engine class. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="channel">GPU channel</param> + public InlineToMemoryClass(GpuContext context, GpuChannel channel) : this(context, channel, true) + { + } + + /// <summary> + /// Reads data from the class registers. + /// </summary> + /// <param name="offset">Register byte offset</param> + /// <returns>Data at the specified offset</returns> + public int Read(int offset) => _state.Read(offset); + + /// <summary> + /// Writes data to the class registers. + /// </summary> + /// <param name="offset">Register byte offset</param> + /// <param name="data">Data to be written</param> + public void Write(int offset, int data) => _state.Write(offset, data); + + /// <summary> + /// Launches Inline-to-Memory engine DMA copy. + /// </summary> + /// <param name="argument">Method call argument</param> + private void LaunchDma(int argument) + { + LaunchDma(ref _state.State, argument); + } + + /// <summary> + /// Launches Inline-to-Memory engine DMA copy. + /// </summary> + /// <param name="state">Current class state</param> + /// <param name="argument">Method call argument</param> + public void LaunchDma(ref InlineToMemoryClassState state, int argument) + { + _isLinear = (argument & 1) != 0; + + _offset = 0; + _size = (int)(BitUtils.AlignUp<uint>(state.LineLengthIn, 4) * state.LineCount); + + int count = _size / 4; + + if (_buffer == null || _buffer.Length < count) + { + _buffer = new int[count]; + } + + ulong dstGpuVa = ((ulong)state.OffsetOutUpperValue << 32) | state.OffsetOut; + + _dstGpuVa = dstGpuVa; + _dstX = state.SetDstOriginBytesXV; + _dstY = state.SetDstOriginSamplesYV; + _dstWidth = (int)state.SetDstWidth; + _dstHeight = (int)state.SetDstHeight; + _dstStride = (int)state.PitchOut; + _dstGobBlocksInY = 1 << (int)state.SetDstBlockSizeHeight; + _dstGobBlocksInZ = 1 << (int)state.SetDstBlockSizeDepth; + _lineLengthIn = (int)state.LineLengthIn; + _lineCount = (int)state.LineCount; + + _finished = false; + } + + /// <summary> + /// Pushes a block of data to the Inline-to-Memory engine. + /// </summary> + /// <param name="data">Data to push</param> + public void LoadInlineData(ReadOnlySpan<int> data) + { + if (!_finished) + { + int copySize = Math.Min(data.Length, _buffer.Length - _offset); + data.Slice(0, copySize).CopyTo(new Span<int>(_buffer).Slice(_offset, copySize)); + + _offset += copySize; + + if (_offset * 4 >= _size) + { + FinishTransfer(); + } + } + } + + /// <summary> + /// Pushes a word of data to the Inline-to-Memory engine. + /// </summary> + /// <param name="argument">Method call argument</param> + public void LoadInlineData(int argument) + { + if (!_finished) + { + _buffer[_offset++] = argument; + + if (_offset * 4 >= _size) + { + FinishTransfer(); + } + } + } + + /// <summary> + /// Performs actual copy of the inline data after the transfer is finished. + /// </summary> + private void FinishTransfer() + { + var memoryManager = _channel.MemoryManager; + + var data = MemoryMarshal.Cast<int, byte>(_buffer).Slice(0, _size); + + if (_isLinear && _lineCount == 1) + { + memoryManager.WriteTrackedResource(_dstGpuVa, data.Slice(0, _lineLengthIn)); + _context.AdvanceSequence(); + } + else + { + // TODO: Verify if the destination X/Y and width/height are taken into account + // for linear texture transfers. If not, we can use the fast path for that aswell. + // Right now the copy code at the bottom assumes that it is used on both which might be incorrect. + if (!_isLinear) + { + var target = memoryManager.Physical.TextureCache.FindTexture( + memoryManager, + _dstGpuVa, + 1, + _dstStride, + _dstHeight, + _lineLengthIn, + _lineCount, + _isLinear, + _dstGobBlocksInY, + _dstGobBlocksInZ); + + if (target != null) + { + target.SynchronizeMemory(); + target.SetData(data, 0, 0, new GAL.Rectangle<int>(_dstX, _dstY, _lineLengthIn / target.Info.FormatInfo.BytesPerPixel, _lineCount)); + target.SignalModified(); + + return; + } + } + + var dstCalculator = new OffsetCalculator( + _dstWidth, + _dstHeight, + _dstStride, + _isLinear, + _dstGobBlocksInY, + 1); + + int srcOffset = 0; + + for (int y = _dstY; y < _dstY + _lineCount; y++) + { + int x1 = _dstX; + int x2 = _dstX + _lineLengthIn; + int x1Round = BitUtils.AlignUp(_dstX, 16); + int x2Trunc = BitUtils.AlignDown(x2, 16); + + int x = x1; + + if (x1Round <= x2) + { + for (; x < x1Round; x++, srcOffset++) + { + int dstOffset = dstCalculator.GetOffset(x, y); + + ulong dstAddress = _dstGpuVa + (uint)dstOffset; + + memoryManager.Write(dstAddress, data[srcOffset]); + } + } + + for (; x < x2Trunc; x += 16, srcOffset += 16) + { + int dstOffset = dstCalculator.GetOffset(x, y); + + ulong dstAddress = _dstGpuVa + (uint)dstOffset; + + memoryManager.Write(dstAddress, MemoryMarshal.Cast<byte, Vector128<byte>>(data.Slice(srcOffset, 16))[0]); + } + + for (; x < x2; x++, srcOffset++) + { + int dstOffset = dstCalculator.GetOffset(x, y); + + ulong dstAddress = _dstGpuVa + (uint)dstOffset; + + memoryManager.Write(dstAddress, data[srcOffset]); + } + + // All lines must be aligned to 4 bytes, as the data is pushed one word at a time. + // If our copy length is not a multiple of 4, then we need to skip the padding bytes here. + int misalignment = _lineLengthIn & 3; + + if (misalignment != 0) + { + srcOffset += 4 - misalignment; + } + } + + _context.AdvanceSequence(); + } + + _finished = true; + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs b/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs new file mode 100644 index 00000000..d0c82a5e --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs @@ -0,0 +1,181 @@ +// This file was auto-generated from NVIDIA official Maxwell definitions. + +namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory +{ + /// <summary> + /// Notify type. + /// </summary> + enum NotifyType + { + WriteOnly = 0, + WriteThenAwaken = 1, + } + + /// <summary> + /// Width in GOBs of the destination texture. + /// </summary> + enum SetDstBlockSizeWidth + { + OneGob = 0, + } + + /// <summary> + /// Height in GOBs of the destination texture. + /// </summary> + enum SetDstBlockSizeHeight + { + OneGob = 0, + TwoGobs = 1, + FourGobs = 2, + EightGobs = 3, + SixteenGobs = 4, + ThirtytwoGobs = 5, + } + + /// <summary> + /// Depth in GOBs of the destination texture. + /// </summary> + enum SetDstBlockSizeDepth + { + OneGob = 0, + TwoGobs = 1, + FourGobs = 2, + EightGobs = 3, + SixteenGobs = 4, + ThirtytwoGobs = 5, + } + + /// <summary> + /// Memory layout of the destination texture. + /// </summary> + enum LaunchDmaDstMemoryLayout + { + Blocklinear = 0, + Pitch = 1, + } + + /// <summary> + /// DMA completion type. + /// </summary> + enum LaunchDmaCompletionType + { + FlushDisable = 0, + FlushOnly = 1, + ReleaseSemaphore = 2, + } + + /// <summary> + /// DMA interrupt type. + /// </summary> + enum LaunchDmaInterruptType + { + None = 0, + Interrupt = 1, + } + + /// <summary> + /// DMA semaphore structure size. + /// </summary> + enum LaunchDmaSemaphoreStructSize + { + FourWords = 0, + OneWord = 1, + } + + /// <summary> + /// DMA semaphore reduction operation. + /// </summary> + enum LaunchDmaReductionOp + { + RedAdd = 0, + RedMin = 1, + RedMax = 2, + RedInc = 3, + RedDec = 4, + RedAnd = 5, + RedOr = 6, + RedXor = 7, + } + + /// <summary> + /// DMA semaphore reduction format. + /// </summary> + enum LaunchDmaReductionFormat + { + Unsigned32 = 0, + Signed32 = 1, + } + + /// <summary> + /// Inline-to-Memory class state. + /// </summary> + unsafe struct InlineToMemoryClassState + { +#pragma warning disable CS0649 + public uint SetObject; + public int SetObjectClassId => (int)((SetObject >> 0) & 0xFFFF); + public int SetObjectEngineId => (int)((SetObject >> 16) & 0x1F); + public fixed uint Reserved04[63]; + public uint NoOperation; + public uint SetNotifyA; + public int SetNotifyAAddressUpper => (int)((SetNotifyA >> 0) & 0xFF); + public uint SetNotifyB; + public uint Notify; + public NotifyType NotifyType => (NotifyType)(Notify); + public uint WaitForIdle; + public fixed uint Reserved114[7]; + public uint SetGlobalRenderEnableA; + public int SetGlobalRenderEnableAOffsetUpper => (int)((SetGlobalRenderEnableA >> 0) & 0xFF); + public uint SetGlobalRenderEnableB; + public uint SetGlobalRenderEnableC; + public int SetGlobalRenderEnableCMode => (int)((SetGlobalRenderEnableC >> 0) & 0x7); + public uint SendGoIdle; + public uint PmTrigger; + public uint PmTriggerWfi; + public fixed uint Reserved148[2]; + public uint SetInstrumentationMethodHeader; + public uint SetInstrumentationMethodData; + public fixed uint Reserved158[10]; + public uint LineLengthIn; + public uint LineCount; + public uint OffsetOutUpper; + public int OffsetOutUpperValue => (int)((OffsetOutUpper >> 0) & 0xFF); + public uint OffsetOut; + public uint PitchOut; + public uint SetDstBlockSize; + public SetDstBlockSizeWidth SetDstBlockSizeWidth => (SetDstBlockSizeWidth)((SetDstBlockSize >> 0) & 0xF); + public SetDstBlockSizeHeight SetDstBlockSizeHeight => (SetDstBlockSizeHeight)((SetDstBlockSize >> 4) & 0xF); + public SetDstBlockSizeDepth SetDstBlockSizeDepth => (SetDstBlockSizeDepth)((SetDstBlockSize >> 8) & 0xF); + public uint SetDstWidth; + public uint SetDstHeight; + public uint SetDstDepth; + public uint SetDstLayer; + public uint SetDstOriginBytesX; + public int SetDstOriginBytesXV => (int)((SetDstOriginBytesX >> 0) & 0xFFFFF); + public uint SetDstOriginSamplesY; + public int SetDstOriginSamplesYV => (int)((SetDstOriginSamplesY >> 0) & 0xFFFF); + public uint LaunchDma; + public LaunchDmaDstMemoryLayout LaunchDmaDstMemoryLayout => (LaunchDmaDstMemoryLayout)((LaunchDma >> 0) & 0x1); + public LaunchDmaCompletionType LaunchDmaCompletionType => (LaunchDmaCompletionType)((LaunchDma >> 4) & 0x3); + public LaunchDmaInterruptType LaunchDmaInterruptType => (LaunchDmaInterruptType)((LaunchDma >> 8) & 0x3); + public LaunchDmaSemaphoreStructSize LaunchDmaSemaphoreStructSize => (LaunchDmaSemaphoreStructSize)((LaunchDma >> 12) & 0x1); + public bool LaunchDmaReductionEnable => (LaunchDma & 0x2) != 0; + public LaunchDmaReductionOp LaunchDmaReductionOp => (LaunchDmaReductionOp)((LaunchDma >> 13) & 0x7); + public LaunchDmaReductionFormat LaunchDmaReductionFormat => (LaunchDmaReductionFormat)((LaunchDma >> 2) & 0x3); + public bool LaunchDmaSysmembarDisable => (LaunchDma & 0x40) != 0; + public uint LoadInlineData; + public fixed uint Reserved1B8[9]; + public uint SetI2mSemaphoreA; + public int SetI2mSemaphoreAOffsetUpper => (int)((SetI2mSemaphoreA >> 0) & 0xFF); + public uint SetI2mSemaphoreB; + public uint SetI2mSemaphoreC; + public fixed uint Reserved1E8[2]; + public uint SetI2mSpareNoop00; + public uint SetI2mSpareNoop01; + public uint SetI2mSpareNoop02; + public uint SetI2mSpareNoop03; + public fixed uint Reserved200[3200]; + public MmeShadowScratch SetMmeShadowScratch; +#pragma warning restore CS0649 + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/AluOperation.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/AluOperation.cs new file mode 100644 index 00000000..eeef9c67 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/AluOperation.cs @@ -0,0 +1,15 @@ +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// <summary> + /// GPU Macro Arithmetic and Logic unit operation. + /// </summary> + enum AluOperation + { + AluReg = 0, + AddImmediate = 1, + BitfieldReplace = 2, + BitfieldExtractLslImm = 3, + BitfieldExtractLslReg = 4, + ReadImmediate = 5 + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/AluRegOperation.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/AluRegOperation.cs new file mode 100644 index 00000000..f3e05d38 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/AluRegOperation.cs @@ -0,0 +1,18 @@ +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// <summary> + /// GPU Macro Arithmetic and Logic unit binary register-to-register operation. + /// </summary> + enum AluRegOperation + { + Add = 0, + AddWithCarry = 1, + Subtract = 2, + SubtractWithBorrow = 3, + BitwiseExclusiveOr = 8, + BitwiseOr = 9, + BitwiseAnd = 10, + BitwiseAndNot = 11, + BitwiseNotAnd = 12 + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/AssignmentOperation.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/AssignmentOperation.cs new file mode 100644 index 00000000..dc336026 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/AssignmentOperation.cs @@ -0,0 +1,17 @@ +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// <summary> + /// GPU Macro assignment operation. + /// </summary> + enum AssignmentOperation + { + IgnoreAndFetch = 0, + Move = 1, + MoveAndSetMaddr = 2, + FetchAndSend = 3, + MoveAndSend = 4, + FetchAndSetMaddr = 5, + MoveAndSetMaddrThenFetchAndSend = 6, + MoveAndSetMaddrThenSendHigh = 7 + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs new file mode 100644 index 00000000..117961db --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs @@ -0,0 +1,52 @@ +using Ryujinx.Graphics.Device; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// <summary> + /// FIFO word. + /// </summary> + readonly struct FifoWord + { + /// <summary> + /// GPU virtual address where the word is located in memory. + /// </summary> + public ulong GpuVa { get; } + + /// <summary> + /// Word value. + /// </summary> + public int Word { get; } + + /// <summary> + /// Creates a new FIFO word. + /// </summary> + /// <param name="gpuVa">GPU virtual address where the word is located in memory</param> + /// <param name="word">Word value</param> + public FifoWord(ulong gpuVa, int word) + { + GpuVa = gpuVa; + Word = word; + } + } + + /// <summary> + /// Macro Execution Engine interface. + /// </summary> + interface IMacroEE + { + /// <summary> + /// Arguments FIFO. + /// </summary> + Queue<FifoWord> Fifo { get; } + + /// <summary> + /// Should execute the GPU Macro code being passed. + /// </summary> + /// <param name="code">Code to be executed</param> + /// <param name="state">GPU state at the time of the call</param> + /// <param name="arg0">First argument to be passed to the GPU Macro</param> + void Execute(ReadOnlySpan<int> code, IDeviceState state, int arg0); + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs new file mode 100644 index 00000000..12a3ac02 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs @@ -0,0 +1,101 @@ +using Ryujinx.Graphics.Device; +using Ryujinx.Graphics.Gpu.Engine.GPFifo; +using System; + +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// <summary> + /// GPU macro program. + /// </summary> + struct Macro + { + /// <summary> + /// Word offset of the code on the code memory. + /// </summary> + public int Position { get; } + + private IMacroEE _executionEngine; + private bool _executionPending; + private int _argument; + private MacroHLEFunctionName _hleFunction; + + /// <summary> + /// Creates a new instance of the GPU cached macro program. + /// </summary> + /// <param name="position">Macro code start position</param> + public Macro(int position) + { + Position = position; + + _executionEngine = null; + _executionPending = false; + _argument = 0; + _hleFunction = MacroHLEFunctionName.None; + } + + /// <summary> + /// Sets the first argument for the macro call. + /// </summary> + /// <param name="context">GPU context where the macro code is being executed</param> + /// <param name="processor">GPU GP FIFO command processor</param> + /// <param name="code">Code to be executed</param> + /// <param name="argument">First argument</param> + public void StartExecution(GpuContext context, GPFifoProcessor processor, ReadOnlySpan<int> code, int argument) + { + _argument = argument; + + _executionPending = true; + + if (_executionEngine == null) + { + if (GraphicsConfig.EnableMacroHLE && MacroHLETable.TryGetMacroHLEFunction(code.Slice(Position), context.Capabilities, out _hleFunction)) + { + _executionEngine = new MacroHLE(processor, _hleFunction); + } + else if (GraphicsConfig.EnableMacroJit) + { + _executionEngine = new MacroJit(); + } + else + { + _executionEngine = new MacroInterpreter(); + } + } + + // We don't consume the parameter buffer value, so we don't need to flush it. + // Doing so improves performance if the value was written by a GPU shader. + if (_hleFunction == MacroHLEFunctionName.DrawElementsIndirect) + { + context.GPFifo.SetFlushSkips(1); + } + else if (_hleFunction == MacroHLEFunctionName.MultiDrawElementsIndirectCount) + { + context.GPFifo.SetFlushSkips(2); + } + } + + /// <summary> + /// Starts executing the macro program code. + /// </summary> + /// <param name="code">Program code</param> + /// <param name="state">Current GPU state</param> + public void Execute(ReadOnlySpan<int> code, IDeviceState state) + { + if (_executionPending) + { + _executionPending = false; + _executionEngine?.Execute(code.Slice(Position), state, _argument); + } + } + + /// <summary> + /// Pushes an argument to the macro call argument FIFO. + /// </summary> + /// <param name="gpuVa">GPU virtual address where the command word is located</param> + /// <param name="argument">Argument to be pushed</param> + public void PushArgument(ulong gpuVa, int argument) + { + _executionEngine?.Fifo.Enqueue(new FifoWord(gpuVa, argument)); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs new file mode 100644 index 00000000..8630bbc4 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs @@ -0,0 +1,341 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.Device; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.GPFifo; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// <summary> + /// Macro High-level emulation. + /// </summary> + class MacroHLE : IMacroEE + { + private const int ColorLayerCountOffset = 0x818; + private const int ColorStructSize = 0x40; + private const int ZetaLayerCountOffset = 0x1230; + + private const int IndirectDataEntrySize = 0x10; + private const int IndirectIndexedDataEntrySize = 0x14; + + private readonly GPFifoProcessor _processor; + private readonly MacroHLEFunctionName _functionName; + + /// <summary> + /// Arguments FIFO. + /// </summary> + public Queue<FifoWord> Fifo { get; } + + /// <summary> + /// Creates a new instance of the HLE macro handler. + /// </summary> + /// <param name="processor">GPU GP FIFO command processor</param> + /// <param name="functionName">Name of the HLE macro function to be called</param> + public MacroHLE(GPFifoProcessor processor, MacroHLEFunctionName functionName) + { + _processor = processor; + _functionName = functionName; + + Fifo = new Queue<FifoWord>(); + } + + /// <summary> + /// Executes a macro program until it exits. + /// </summary> + /// <param name="code">Code of the program to execute</param> + /// <param name="state">GPU state at the time of the call</param> + /// <param name="arg0">Optional argument passed to the program, 0 if not used</param> + public void Execute(ReadOnlySpan<int> code, IDeviceState state, int arg0) + { + switch (_functionName) + { + case MacroHLEFunctionName.ClearColor: + ClearColor(state, arg0); + break; + case MacroHLEFunctionName.ClearDepthStencil: + ClearDepthStencil(state, arg0); + break; + case MacroHLEFunctionName.DrawArraysInstanced: + DrawArraysInstanced(state, arg0); + break; + case MacroHLEFunctionName.DrawElementsInstanced: + DrawElementsInstanced(state, arg0); + break; + case MacroHLEFunctionName.DrawElementsIndirect: + DrawElementsIndirect(state, arg0); + break; + case MacroHLEFunctionName.MultiDrawElementsIndirectCount: + MultiDrawElementsIndirectCount(state, arg0); + break; + default: + throw new NotImplementedException(_functionName.ToString()); + } + + // It should be empty at this point, but clear it just to be safe. + Fifo.Clear(); + } + + /// <summary> + /// Clears one bound color target. + /// </summary> + /// <param name="state">GPU state at the time of the call</param> + /// <param name="arg0">First argument of the call</param> + private void ClearColor(IDeviceState state, int arg0) + { + int index = (arg0 >> 6) & 0xf; + int layerCount = state.Read(ColorLayerCountOffset + index * ColorStructSize); + + _processor.ThreedClass.Clear(arg0, layerCount); + } + + /// <summary> + /// Clears the current depth-stencil target. + /// </summary> + /// <param name="state">GPU state at the time of the call</param> + /// <param name="arg0">First argument of the call</param> + private void ClearDepthStencil(IDeviceState state, int arg0) + { + int layerCount = state.Read(ZetaLayerCountOffset); + + _processor.ThreedClass.Clear(arg0, layerCount); + } + + /// <summary> + /// Performs a draw. + /// </summary> + /// <param name="state">GPU state at the time of the call</param> + /// <param name="arg0">First argument of the call</param> + private void DrawArraysInstanced(IDeviceState state, int arg0) + { + var topology = (PrimitiveTopology)arg0; + + var count = FetchParam(); + var instanceCount = FetchParam(); + var firstVertex = FetchParam(); + var firstInstance = FetchParam(); + + if (ShouldSkipDraw(state, instanceCount.Word)) + { + return; + } + + _processor.ThreedClass.Draw( + topology, + count.Word, + instanceCount.Word, + 0, + firstVertex.Word, + firstInstance.Word, + indexed: false); + } + + /// <summary> + /// Performs a indexed draw. + /// </summary> + /// <param name="state">GPU state at the time of the call</param> + /// <param name="arg0">First argument of the call</param> + private void DrawElementsInstanced(IDeviceState state, int arg0) + { + var topology = (PrimitiveTopology)arg0; + + var count = FetchParam(); + var instanceCount = FetchParam(); + var firstIndex = FetchParam(); + var firstVertex = FetchParam(); + var firstInstance = FetchParam(); + + if (ShouldSkipDraw(state, instanceCount.Word)) + { + return; + } + + _processor.ThreedClass.Draw( + topology, + count.Word, + instanceCount.Word, + firstIndex.Word, + firstVertex.Word, + firstInstance.Word, + indexed: true); + } + + /// <summary> + /// Performs a indirect indexed draw, with parameters from a GPU buffer. + /// </summary> + /// <param name="state">GPU state at the time of the call</param> + /// <param name="arg0">First argument of the call</param> + private void DrawElementsIndirect(IDeviceState state, int arg0) + { + var topology = (PrimitiveTopology)arg0; + + var count = FetchParam(); + var instanceCount = FetchParam(); + var firstIndex = FetchParam(); + var firstVertex = FetchParam(); + var firstInstance = FetchParam(); + + ulong indirectBufferGpuVa = count.GpuVa; + + var bufferCache = _processor.MemoryManager.Physical.BufferCache; + + bool useBuffer = bufferCache.CheckModified(_processor.MemoryManager, indirectBufferGpuVa, IndirectIndexedDataEntrySize, out ulong indirectBufferAddress); + + if (useBuffer) + { + int indexCount = firstIndex.Word + count.Word; + + _processor.ThreedClass.DrawIndirect( + topology, + indirectBufferAddress, + 0, + 1, + IndirectIndexedDataEntrySize, + indexCount, + Threed.IndirectDrawType.DrawIndexedIndirect); + } + else + { + if (ShouldSkipDraw(state, instanceCount.Word)) + { + return; + } + + _processor.ThreedClass.Draw( + topology, + count.Word, + instanceCount.Word, + firstIndex.Word, + firstVertex.Word, + firstInstance.Word, + indexed: true); + } + } + + /// <summary> + /// Performs a indirect indexed multi-draw, with parameters from a GPU buffer. + /// </summary> + /// <param name="state">GPU state at the time of the call</param> + /// <param name="arg0">First argument of the call</param> + private void MultiDrawElementsIndirectCount(IDeviceState state, int arg0) + { + int arg1 = FetchParam().Word; + int arg2 = FetchParam().Word; + int arg3 = FetchParam().Word; + + int startDraw = arg0; + int endDraw = arg1; + var topology = (PrimitiveTopology)arg2; + int paddingWords = arg3; + int stride = paddingWords * 4 + 0x14; + + ulong parameterBufferGpuVa = FetchParam().GpuVa; + + int maxDrawCount = endDraw - startDraw; + + if (startDraw != 0) + { + int drawCount = _processor.MemoryManager.Read<int>(parameterBufferGpuVa, tracked: true); + + // Calculate maximum draw count based on the previous draw count and current draw count. + if ((uint)drawCount <= (uint)startDraw) + { + // The start draw is past our total draw count, so all draws were already performed. + maxDrawCount = 0; + } + else + { + // Perform just the missing number of draws. + maxDrawCount = (int)Math.Min((uint)maxDrawCount, (uint)(drawCount - startDraw)); + } + } + + if (maxDrawCount == 0) + { + Fifo.Clear(); + return; + } + + ulong indirectBufferGpuVa = 0; + int indexCount = 0; + + for (int i = 0; i < maxDrawCount; i++) + { + var count = FetchParam(); + var instanceCount = FetchParam(); + var firstIndex = FetchParam(); + var firstVertex = FetchParam(); + var firstInstance = FetchParam(); + + if (i == 0) + { + indirectBufferGpuVa = count.GpuVa; + } + + indexCount = Math.Max(indexCount, count.Word + firstIndex.Word); + + if (i != maxDrawCount - 1) + { + for (int j = 0; j < paddingWords; j++) + { + FetchParam(); + } + } + } + + var bufferCache = _processor.MemoryManager.Physical.BufferCache; + + ulong indirectBufferSize = (ulong)maxDrawCount * (ulong)stride; + + ulong indirectBufferAddress = bufferCache.TranslateAndCreateBuffer(_processor.MemoryManager, indirectBufferGpuVa, indirectBufferSize); + ulong parameterBufferAddress = bufferCache.TranslateAndCreateBuffer(_processor.MemoryManager, parameterBufferGpuVa, 4); + + _processor.ThreedClass.DrawIndirect( + topology, + indirectBufferAddress, + parameterBufferAddress, + maxDrawCount, + stride, + indexCount, + Threed.IndirectDrawType.DrawIndexedIndirectCount); + } + + /// <summary> + /// Checks if the draw should be skipped, because the masked instance count is zero. + /// </summary> + /// <param name="state">Current GPU state</param> + /// <param name="instanceCount">Draw instance count</param> + /// <returns>True if the draw should be skipped, false otherwise</returns> + private static bool ShouldSkipDraw(IDeviceState state, int instanceCount) + { + return (Read(state, 0xd1b) & instanceCount) == 0; + } + + /// <summary> + /// Fetches a arguments from the arguments FIFO. + /// </summary> + /// <returns>The call argument, or a 0 value with null address if the FIFO is empty</returns> + private FifoWord FetchParam() + { + if (!Fifo.TryDequeue(out var value)) + { + Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument."); + + return new FifoWord(0UL, 0); + } + + return value; + } + + /// <summary> + /// Reads data from a GPU register. + /// </summary> + /// <param name="state">Current GPU state</param> + /// <param name="reg">Register offset to read</param> + /// <returns>GPU register value</returns> + private static int Read(IDeviceState state, int reg) + { + return state.Read(reg * 4); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs new file mode 100644 index 00000000..751867fc --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs @@ -0,0 +1,16 @@ +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// <summary> + /// Name of the High-level implementation of a Macro function. + /// </summary> + enum MacroHLEFunctionName + { + None, + ClearColor, + ClearDepthStencil, + DrawArraysInstanced, + DrawElementsInstanced, + DrawElementsIndirect, + MultiDrawElementsIndirectCount + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs new file mode 100644 index 00000000..719e170f --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs @@ -0,0 +1,113 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.GAL; +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// <summary> + /// Table with information about High-level implementations of GPU Macro code. + /// </summary> + static class MacroHLETable + { + /// <summary> + /// Macroo High-level implementation table entry. + /// </summary> + readonly struct TableEntry + { + /// <summary> + /// Name of the Macro function. + /// </summary> + public MacroHLEFunctionName Name { get; } + + /// <summary> + /// Hash of the original binary Macro function code. + /// </summary> + public Hash128 Hash { get; } + + /// <summary> + /// Size (in bytes) of the original binary Macro function code. + /// </summary> + public int Length { get; } + + /// <summary> + /// Creates a new table entry. + /// </summary> + /// <param name="name">Name of the Macro function</param> + /// <param name="hash">Hash of the original binary Macro function code</param> + /// <param name="length">Size (in bytes) of the original binary Macro function code</param> + public TableEntry(MacroHLEFunctionName name, Hash128 hash, int length) + { + Name = name; + Hash = hash; + Length = length; + } + } + + private static readonly TableEntry[] _table = new TableEntry[] + { + new TableEntry(MacroHLEFunctionName.ClearColor, new Hash128(0xA9FB28D1DC43645A, 0xB177E5D2EAE67FB0), 0x28), + new TableEntry(MacroHLEFunctionName.ClearDepthStencil, new Hash128(0x1B96CB77D4879F4F, 0x8557032FE0C965FB), 0x24), + new TableEntry(MacroHLEFunctionName.DrawArraysInstanced, new Hash128(0x197FB416269DBC26, 0x34288C01DDA82202), 0x48), + new TableEntry(MacroHLEFunctionName.DrawElementsInstanced, new Hash128(0x1A501FD3D54EC8E0, 0x6CF570CF79DA74D6), 0x5c), + new TableEntry(MacroHLEFunctionName.DrawElementsIndirect, new Hash128(0x86A3E8E903AF8F45, 0xD35BBA07C23860A4), 0x7c), + new TableEntry(MacroHLEFunctionName.MultiDrawElementsIndirectCount, new Hash128(0x890AF57ED3FB1C37, 0x35D0C95C61F5386F), 0x19C) + }; + + /// <summary> + /// Checks if the host supports all features required by the HLE macro. + /// </summary> + /// <param name="caps">Host capabilities</param> + /// <param name="name">Name of the HLE macro to be checked</param> + /// <returns>True if the host supports the HLE macro, false otherwise</returns> + private static bool IsMacroHLESupported(Capabilities caps, MacroHLEFunctionName name) + { + if (name == MacroHLEFunctionName.ClearColor || + name == MacroHLEFunctionName.ClearDepthStencil || + name == MacroHLEFunctionName.DrawArraysInstanced || + name == MacroHLEFunctionName.DrawElementsInstanced || + name == MacroHLEFunctionName.DrawElementsIndirect) + { + return true; + } + else if (name == MacroHLEFunctionName.MultiDrawElementsIndirectCount) + { + return caps.SupportsIndirectParameters; + } + + return false; + } + + /// <summary> + /// Checks if there's a fast, High-level implementation of the specified Macro code available. + /// </summary> + /// <param name="code">Macro code to be checked</param> + /// <param name="caps">Renderer capabilities to check for this macro HLE support</param> + /// <param name="name">Name of the function if a implementation is available and supported, otherwise <see cref="MacroHLEFunctionName.None"/></param> + /// <returns>True if there is a implementation available and supported, false otherwise</returns> + public static bool TryGetMacroHLEFunction(ReadOnlySpan<int> code, Capabilities caps, out MacroHLEFunctionName name) + { + var mc = MemoryMarshal.Cast<int, byte>(code); + + for (int i = 0; i < _table.Length; i++) + { + ref var entry = ref _table[i]; + + var hash = XXHash128.ComputeHash(mc.Slice(0, entry.Length)); + if (hash == entry.Hash) + { + if (IsMacroHLESupported(caps, entry.Name)) + { + name = entry.Name; + return true; + } + + break; + } + } + + name = MacroHLEFunctionName.None; + return false; + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs new file mode 100644 index 00000000..df6ee040 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs @@ -0,0 +1,400 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.Device; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// <summary> + /// Macro code interpreter. + /// </summary> + class MacroInterpreter : IMacroEE + { + /// <summary> + /// Arguments FIFO. + /// </summary> + public Queue<FifoWord> Fifo { get; } + + private int[] _gprs; + + private int _methAddr; + private int _methIncr; + + private bool _carry; + + private int _opCode; + private int _pipeOp; + + private bool _ignoreExitFlag; + + private int _pc; + + /// <summary> + /// Creates a new instance of the macro code interpreter. + /// </summary> + public MacroInterpreter() + { + Fifo = new Queue<FifoWord>(); + + _gprs = new int[8]; + } + + /// <summary> + /// Executes a macro program until it exits. + /// </summary> + /// <param name="code">Code of the program to execute</param> + /// <param name="state">Current GPU state</param> + /// <param name="arg0">Optional argument passed to the program, 0 if not used</param> + public void Execute(ReadOnlySpan<int> code, IDeviceState state, int arg0) + { + Reset(); + + _gprs[1] = arg0; + + _pc = 0; + + FetchOpCode(code); + + while (Step(code, state)) + { + } + + // Due to the delay slot, we still need to execute + // one more instruction before we actually exit. + Step(code, state); + } + + /// <summary> + /// Resets the internal interpreter state. + /// Call each time you run a new program. + /// </summary> + private void Reset() + { + for (int index = 0; index < _gprs.Length; index++) + { + _gprs[index] = 0; + } + + _methAddr = 0; + _methIncr = 0; + + _carry = false; + } + + /// <summary> + /// Executes a single instruction of the program. + /// </summary> + /// <param name="code">Program code to execute</param> + /// <param name="state">Current GPU state</param> + /// <returns>True to continue execution, false if the program exited</returns> + private bool Step(ReadOnlySpan<int> code, IDeviceState state) + { + int baseAddr = _pc - 1; + + FetchOpCode(code); + + if ((_opCode & 7) < 7) + { + // Operation produces a value. + AssignmentOperation asgOp = (AssignmentOperation)((_opCode >> 4) & 7); + + int result = GetAluResult(state); + + switch (asgOp) + { + // Fetch parameter and ignore result. + case AssignmentOperation.IgnoreAndFetch: + SetDstGpr(FetchParam()); + break; + // Move result. + case AssignmentOperation.Move: + SetDstGpr(result); + break; + // Move result and use as Method Address. + case AssignmentOperation.MoveAndSetMaddr: + SetDstGpr(result); + SetMethAddr(result); + break; + // Fetch parameter and send result. + case AssignmentOperation.FetchAndSend: + SetDstGpr(FetchParam()); + Send(state, result); + break; + // Move and send result. + case AssignmentOperation.MoveAndSend: + SetDstGpr(result); + Send(state, result); + break; + // Fetch parameter and use result as Method Address. + case AssignmentOperation.FetchAndSetMaddr: + SetDstGpr(FetchParam()); + SetMethAddr(result); + break; + // Move result and use as Method Address, then fetch and send parameter. + case AssignmentOperation.MoveAndSetMaddrThenFetchAndSend: + SetDstGpr(result); + SetMethAddr(result); + Send(state, FetchParam()); + break; + // Move result and use as Method Address, then send bits 17:12 of result. + case AssignmentOperation.MoveAndSetMaddrThenSendHigh: + SetDstGpr(result); + SetMethAddr(result); + Send(state, (result >> 12) & 0x3f); + break; + } + } + else + { + // Branch. + bool onNotZero = ((_opCode >> 4) & 1) != 0; + + bool taken = onNotZero + ? GetGprA() != 0 + : GetGprA() == 0; + + if (taken) + { + _pc = baseAddr + GetImm(); + + bool noDelays = (_opCode & 0x20) != 0; + + if (noDelays) + { + FetchOpCode(code); + } + else + { + // The delay slot instruction exit flag should be ignored. + _ignoreExitFlag = true; + } + + return true; + } + } + + bool exit = (_opCode & 0x80) != 0 && !_ignoreExitFlag; + + _ignoreExitFlag = false; + + return !exit; + } + + /// <summary> + /// Fetches a single operation code from the program code. + /// </summary> + /// <param name="code">Program code</param> + private void FetchOpCode(ReadOnlySpan<int> code) + { + _opCode = _pipeOp; + _pipeOp = code[_pc++]; + } + + /// <summary> + /// Gets the result of the current Arithmetic and Logic unit operation. + /// </summary> + /// <param name="state">Current GPU state</param> + /// <returns>Operation result</returns> + private int GetAluResult(IDeviceState state) + { + AluOperation op = (AluOperation)(_opCode & 7); + + switch (op) + { + case AluOperation.AluReg: + return GetAluResult((AluRegOperation)((_opCode >> 17) & 0x1f), GetGprA(), GetGprB()); + + case AluOperation.AddImmediate: + return GetGprA() + GetImm(); + + case AluOperation.BitfieldReplace: + case AluOperation.BitfieldExtractLslImm: + case AluOperation.BitfieldExtractLslReg: + int bfSrcBit = (_opCode >> 17) & 0x1f; + int bfSize = (_opCode >> 22) & 0x1f; + int bfDstBit = (_opCode >> 27) & 0x1f; + + int bfMask = (1 << bfSize) - 1; + + int dst = GetGprA(); + int src = GetGprB(); + + switch (op) + { + case AluOperation.BitfieldReplace: + src = (int)((uint)src >> bfSrcBit) & bfMask; + + dst &= ~(bfMask << bfDstBit); + + dst |= src << bfDstBit; + + return dst; + + case AluOperation.BitfieldExtractLslImm: + src = (int)((uint)src >> dst) & bfMask; + + return src << bfDstBit; + + case AluOperation.BitfieldExtractLslReg: + src = (int)((uint)src >> bfSrcBit) & bfMask; + + return src << dst; + } + + break; + + case AluOperation.ReadImmediate: + return Read(state, GetGprA() + GetImm()); + } + + throw new InvalidOperationException($"Invalid operation \"{op}\" on instruction 0x{_opCode:X8}."); + } + + /// <summary> + /// Gets the result of an Arithmetic and Logic operation using registers. + /// </summary> + /// <param name="aluOp">Arithmetic and Logic unit operation with registers</param> + /// <param name="a">First operand value</param> + /// <param name="b">Second operand value</param> + /// <returns>Operation result</returns> + private int GetAluResult(AluRegOperation aluOp, int a, int b) + { + ulong result; + + switch (aluOp) + { + case AluRegOperation.Add: + result = (ulong)a + (ulong)b; + + _carry = result > 0xffffffff; + + return (int)result; + + case AluRegOperation.AddWithCarry: + result = (ulong)a + (ulong)b + (_carry ? 1UL : 0UL); + + _carry = result > 0xffffffff; + + return (int)result; + + case AluRegOperation.Subtract: + result = (ulong)a - (ulong)b; + + _carry = result < 0x100000000; + + return (int)result; + + case AluRegOperation.SubtractWithBorrow: + result = (ulong)a - (ulong)b - (_carry ? 0UL : 1UL); + + _carry = result < 0x100000000; + + return (int)result; + + case AluRegOperation.BitwiseExclusiveOr: return a ^ b; + case AluRegOperation.BitwiseOr: return a | b; + case AluRegOperation.BitwiseAnd: return a & b; + case AluRegOperation.BitwiseAndNot: return a & ~b; + case AluRegOperation.BitwiseNotAnd: return ~(a & b); + } + + throw new InvalidOperationException($"Invalid operation \"{aluOp}\" on instruction 0x{_opCode:X8}."); + } + + /// <summary> + /// Extracts a 32-bits signed integer constant from the current operation code. + /// </summary> + /// <returns>The 32-bits immediate value encoded at the current operation code</returns> + private int GetImm() + { + // Note: The immediate is signed, the sign-extension is intended here. + return _opCode >> 14; + } + + /// <summary> + /// Sets the current method address, for method calls. + /// </summary> + /// <param name="value">Packed address and increment value</param> + private void SetMethAddr(int value) + { + _methAddr = (value >> 0) & 0xfff; + _methIncr = (value >> 12) & 0x3f; + } + + /// <summary> + /// Sets the destination register value. + /// </summary> + /// <param name="value">Value to set (usually the operation result)</param> + private void SetDstGpr(int value) + { + _gprs[(_opCode >> 8) & 7] = value; + } + + /// <summary> + /// Gets first operand value from the respective register. + /// </summary> + /// <returns>Operand value</returns> + private int GetGprA() + { + return GetGprValue((_opCode >> 11) & 7); + } + + /// <summary> + /// Gets second operand value from the respective register. + /// </summary> + /// <returns>Operand value</returns> + private int GetGprB() + { + return GetGprValue((_opCode >> 14) & 7); + } + + /// <summary> + /// Gets the value from a register, or 0 if the R0 register is specified. + /// </summary> + /// <param name="index">Index of the register</param> + /// <returns>Register value</returns> + private int GetGprValue(int index) + { + return index != 0 ? _gprs[index] : 0; + } + + /// <summary> + /// Fetches a call argument from the call argument FIFO. + /// </summary> + /// <returns>The call argument, or 0 if the FIFO is empty</returns> + private int FetchParam() + { + if (!Fifo.TryDequeue(out var value)) + { + Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument."); + + return 0; + } + + return value.Word; + } + + /// <summary> + /// Reads data from a GPU register. + /// </summary> + /// <param name="state">Current GPU state</param> + /// <param name="reg">Register offset to read</param> + /// <returns>GPU register value</returns> + private int Read(IDeviceState state, int reg) + { + return state.Read(reg * 4); + } + + /// <summary> + /// Performs a GPU method call. + /// </summary> + /// <param name="state">Current GPU state</param> + /// <param name="value">Call argument</param> + private void Send(IDeviceState state, int value) + { + state.Write(_methAddr * 4, value); + + _methAddr += _methIncr; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs new file mode 100644 index 00000000..4077f74e --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs @@ -0,0 +1,39 @@ +using Ryujinx.Graphics.Device; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// <summary> + /// Represents a execution engine that uses a Just-in-Time compiler for fast execution. + /// </summary> + class MacroJit : IMacroEE + { + private readonly MacroJitContext _context = new MacroJitContext(); + + /// <summary> + /// Arguments FIFO. + /// </summary> + public Queue<FifoWord> Fifo => _context.Fifo; + + private MacroJitCompiler.MacroExecute _execute; + + /// <summary> + /// Executes a macro program until it exits. + /// </summary> + /// <param name="code">Code of the program to execute</param> + /// <param name="state">Current GPU state</param> + /// <param name="arg0">Optional argument passed to the program, 0 if not used</param> + public void Execute(ReadOnlySpan<int> code, IDeviceState state, int arg0) + { + if (_execute == null) + { + MacroJitCompiler compiler = new MacroJitCompiler(); + + _execute = compiler.Compile(code); + } + + _execute(_context, state, arg0); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitCompiler.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitCompiler.cs new file mode 100644 index 00000000..f8d839fa --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitCompiler.cs @@ -0,0 +1,517 @@ +using Ryujinx.Graphics.Device; +using System; +using System.Collections.Generic; +using System.Reflection.Emit; + +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// <summary> + /// Represents a Macro Just-in-Time compiler. + /// </summary>R + class MacroJitCompiler + { + private readonly DynamicMethod _meth; + private readonly ILGenerator _ilGen; + private readonly LocalBuilder[] _gprs; + private readonly LocalBuilder _carry; + private readonly LocalBuilder _methAddr; + private readonly LocalBuilder _methIncr; + + /// <summary> + /// Creates a new instance of the Macro Just-in-Time compiler. + /// </summary> + public MacroJitCompiler() + { + _meth = new DynamicMethod("Macro", typeof(void), new Type[] { typeof(MacroJitContext), typeof(IDeviceState), typeof(int) }); + _ilGen = _meth.GetILGenerator(); + _gprs = new LocalBuilder[8]; + + for (int i = 1; i < 8; i++) + { + _gprs[i] = _ilGen.DeclareLocal(typeof(int)); + } + + _carry = _ilGen.DeclareLocal(typeof(int)); + _methAddr = _ilGen.DeclareLocal(typeof(int)); + _methIncr = _ilGen.DeclareLocal(typeof(int)); + + _ilGen.Emit(OpCodes.Ldarg_2); + _ilGen.Emit(OpCodes.Stloc, _gprs[1]); + } + + public delegate void MacroExecute(MacroJitContext context, IDeviceState state, int arg0); + + /// <summary> + /// Translates a new piece of GPU Macro code into host executable code. + /// </summary> + /// <param name="code">Code to be translated</param> + /// <returns>Delegate of the host compiled code</returns> + public MacroExecute Compile(ReadOnlySpan<int> code) + { + Dictionary<int, Label> labels = new Dictionary<int, Label>(); + + int lastTarget = 0; + int i; + + // Collect all branch targets. + for (i = 0; i < code.Length; i++) + { + int opCode = code[i]; + + if ((opCode & 7) == 7) + { + int target = i + (opCode >> 14); + + if (!labels.ContainsKey(target)) + { + labels.Add(target, _ilGen.DefineLabel()); + } + + if (lastTarget < target) + { + lastTarget = target; + } + } + + bool exit = (opCode & 0x80) != 0; + + if (exit && i >= lastTarget) + { + break; + } + } + + // Code generation. + for (i = 0; i < code.Length; i++) + { + if (labels.TryGetValue(i, out Label label)) + { + _ilGen.MarkLabel(label); + } + + Emit(code, i, labels); + + int opCode = code[i]; + + bool exit = (opCode & 0x80) != 0; + + if (exit) + { + Emit(code, i + 1, labels); + _ilGen.Emit(OpCodes.Ret); + + if (i >= lastTarget) + { + break; + } + } + } + + if (i == code.Length) + { + _ilGen.Emit(OpCodes.Ret); + } + + return _meth.CreateDelegate<MacroExecute>(); + } + + /// <summary> + /// Emits IL equivalent to the Macro instruction at a given offset. + /// </summary> + /// <param name="code">GPU Macro code</param> + /// <param name="offset">Offset, in words, where the instruction is located</param> + /// <param name="labels">Labels for Macro branch targets, used by branch instructions</param> + private void Emit(ReadOnlySpan<int> code, int offset, Dictionary<int, Label> labels) + { + int opCode = code[offset]; + + if ((opCode & 7) < 7) + { + // Operation produces a value. + AssignmentOperation asgOp = (AssignmentOperation)((opCode >> 4) & 7); + + EmitAluOp(opCode); + + switch (asgOp) + { + // Fetch parameter and ignore result. + case AssignmentOperation.IgnoreAndFetch: + _ilGen.Emit(OpCodes.Pop); + EmitFetchParam(); + EmitStoreDstGpr(opCode); + break; + // Move result. + case AssignmentOperation.Move: + EmitStoreDstGpr(opCode); + break; + // Move result and use as Method Address. + case AssignmentOperation.MoveAndSetMaddr: + _ilGen.Emit(OpCodes.Dup); + EmitStoreDstGpr(opCode); + EmitStoreMethAddr(); + break; + // Fetch parameter and send result. + case AssignmentOperation.FetchAndSend: + EmitFetchParam(); + EmitStoreDstGpr(opCode); + EmitSend(); + break; + // Move and send result. + case AssignmentOperation.MoveAndSend: + _ilGen.Emit(OpCodes.Dup); + EmitStoreDstGpr(opCode); + EmitSend(); + break; + // Fetch parameter and use result as Method Address. + case AssignmentOperation.FetchAndSetMaddr: + EmitFetchParam(); + EmitStoreDstGpr(opCode); + EmitStoreMethAddr(); + break; + // Move result and use as Method Address, then fetch and send parameter. + case AssignmentOperation.MoveAndSetMaddrThenFetchAndSend: + _ilGen.Emit(OpCodes.Dup); + EmitStoreDstGpr(opCode); + EmitStoreMethAddr(); + EmitFetchParam(); + EmitSend(); + break; + // Move result and use as Method Address, then send bits 17:12 of result. + case AssignmentOperation.MoveAndSetMaddrThenSendHigh: + _ilGen.Emit(OpCodes.Dup); + _ilGen.Emit(OpCodes.Dup); + EmitStoreDstGpr(opCode); + EmitStoreMethAddr(); + _ilGen.Emit(OpCodes.Ldc_I4, 12); + _ilGen.Emit(OpCodes.Shr_Un); + _ilGen.Emit(OpCodes.Ldc_I4, 0x3f); + _ilGen.Emit(OpCodes.And); + EmitSend(); + break; + } + } + else + { + // Branch. + bool onNotZero = ((opCode >> 4) & 1) != 0; + + EmitLoadGprA(opCode); + + Label lblSkip = _ilGen.DefineLabel(); + + if (onNotZero) + { + _ilGen.Emit(OpCodes.Brfalse, lblSkip); + } + else + { + _ilGen.Emit(OpCodes.Brtrue, lblSkip); + } + + bool noDelays = (opCode & 0x20) != 0; + + if (!noDelays) + { + Emit(code, offset + 1, labels); + } + + int target = offset + (opCode >> 14); + + _ilGen.Emit(OpCodes.Br, labels[target]); + + _ilGen.MarkLabel(lblSkip); + } + } + + /// <summary> + /// Emits IL for a Arithmetic and Logic Unit instruction. + /// </summary> + /// <param name="opCode">Instruction to be translated</param> + /// <exception cref="InvalidOperationException">Throw when the instruction encoding is invalid</exception> + private void EmitAluOp(int opCode) + { + AluOperation op = (AluOperation)(opCode & 7); + + switch (op) + { + case AluOperation.AluReg: + EmitAluOp((AluRegOperation)((opCode >> 17) & 0x1f), opCode); + break; + + case AluOperation.AddImmediate: + EmitLoadGprA(opCode); + EmitLoadImm(opCode); + _ilGen.Emit(OpCodes.Add); + break; + + case AluOperation.BitfieldReplace: + case AluOperation.BitfieldExtractLslImm: + case AluOperation.BitfieldExtractLslReg: + int bfSrcBit = (opCode >> 17) & 0x1f; + int bfSize = (opCode >> 22) & 0x1f; + int bfDstBit = (opCode >> 27) & 0x1f; + + int bfMask = (1 << bfSize) - 1; + + switch (op) + { + case AluOperation.BitfieldReplace: + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.Ldc_I4, bfSrcBit); + _ilGen.Emit(OpCodes.Shr_Un); + _ilGen.Emit(OpCodes.Ldc_I4, bfMask); + _ilGen.Emit(OpCodes.And); + _ilGen.Emit(OpCodes.Ldc_I4, bfDstBit); + _ilGen.Emit(OpCodes.Shl); + EmitLoadGprA(opCode); + _ilGen.Emit(OpCodes.Ldc_I4, ~(bfMask << bfDstBit)); + _ilGen.Emit(OpCodes.And); + _ilGen.Emit(OpCodes.Or); + break; + + case AluOperation.BitfieldExtractLslImm: + EmitLoadGprB(opCode); + EmitLoadGprA(opCode); + _ilGen.Emit(OpCodes.Shr_Un); + _ilGen.Emit(OpCodes.Ldc_I4, bfMask); + _ilGen.Emit(OpCodes.And); + _ilGen.Emit(OpCodes.Ldc_I4, bfDstBit); + _ilGen.Emit(OpCodes.Shl); + break; + + case AluOperation.BitfieldExtractLslReg: + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.Ldc_I4, bfSrcBit); + _ilGen.Emit(OpCodes.Shr_Un); + _ilGen.Emit(OpCodes.Ldc_I4, bfMask); + _ilGen.Emit(OpCodes.And); + EmitLoadGprA(opCode); + _ilGen.Emit(OpCodes.Shl); + break; + } + break; + + case AluOperation.ReadImmediate: + _ilGen.Emit(OpCodes.Ldarg_1); + EmitLoadGprA(opCode); + EmitLoadImm(opCode); + _ilGen.Emit(OpCodes.Add); + _ilGen.Emit(OpCodes.Call, typeof(MacroJitContext).GetMethod(nameof(MacroJitContext.Read))); + break; + + default: + throw new InvalidOperationException($"Invalid operation \"{op}\" on instruction 0x{opCode:X8}."); + } + } + + /// <summary> + /// Emits IL for a binary Arithmetic and Logic Unit instruction. + /// </summary> + /// <param name="aluOp">Arithmetic and Logic Unit instruction</param> + /// <param name="opCode">Raw instruction</param> + /// <exception cref="InvalidOperationException">Throw when the instruction encoding is invalid</exception> + private void EmitAluOp(AluRegOperation aluOp, int opCode) + { + switch (aluOp) + { + case AluRegOperation.Add: + EmitLoadGprA(opCode); + _ilGen.Emit(OpCodes.Conv_U8); + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.Conv_U8); + _ilGen.Emit(OpCodes.Add); + _ilGen.Emit(OpCodes.Dup); + _ilGen.Emit(OpCodes.Ldc_I8, 0xffffffffL); + _ilGen.Emit(OpCodes.Cgt_Un); + _ilGen.Emit(OpCodes.Stloc, _carry); + _ilGen.Emit(OpCodes.Conv_U4); + break; + case AluRegOperation.AddWithCarry: + EmitLoadGprA(opCode); + _ilGen.Emit(OpCodes.Conv_U8); + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.Conv_U8); + _ilGen.Emit(OpCodes.Ldloc_S, _carry); + _ilGen.Emit(OpCodes.Conv_U8); + _ilGen.Emit(OpCodes.Add); + _ilGen.Emit(OpCodes.Add); + _ilGen.Emit(OpCodes.Dup); + _ilGen.Emit(OpCodes.Ldc_I8, 0xffffffffL); + _ilGen.Emit(OpCodes.Cgt_Un); + _ilGen.Emit(OpCodes.Stloc, _carry); + _ilGen.Emit(OpCodes.Conv_U4); + break; + case AluRegOperation.Subtract: + EmitLoadGprA(opCode); + _ilGen.Emit(OpCodes.Conv_U8); + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.Conv_U8); + _ilGen.Emit(OpCodes.Sub); + _ilGen.Emit(OpCodes.Dup); + _ilGen.Emit(OpCodes.Ldc_I8, 0x100000000L); + _ilGen.Emit(OpCodes.Clt_Un); + _ilGen.Emit(OpCodes.Stloc, _carry); + _ilGen.Emit(OpCodes.Conv_U4); + break; + case AluRegOperation.SubtractWithBorrow: + EmitLoadGprA(opCode); + _ilGen.Emit(OpCodes.Conv_U8); + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.Conv_U8); + _ilGen.Emit(OpCodes.Ldc_I4_1); + _ilGen.Emit(OpCodes.Ldloc_S, _carry); + _ilGen.Emit(OpCodes.Sub); + _ilGen.Emit(OpCodes.Conv_U8); + _ilGen.Emit(OpCodes.Sub); + _ilGen.Emit(OpCodes.Sub); + _ilGen.Emit(OpCodes.Dup); + _ilGen.Emit(OpCodes.Ldc_I8, 0x100000000L); + _ilGen.Emit(OpCodes.Clt_Un); + _ilGen.Emit(OpCodes.Stloc, _carry); + _ilGen.Emit(OpCodes.Conv_U4); + break; + case AluRegOperation.BitwiseExclusiveOr: + EmitLoadGprA(opCode); + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.Xor); + break; + case AluRegOperation.BitwiseOr: + EmitLoadGprA(opCode); + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.Or); + break; + case AluRegOperation.BitwiseAnd: + EmitLoadGprA(opCode); + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.And); + break; + case AluRegOperation.BitwiseAndNot: + EmitLoadGprA(opCode); + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.Not); + _ilGen.Emit(OpCodes.And); + break; + case AluRegOperation.BitwiseNotAnd: + EmitLoadGprA(opCode); + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.And); + _ilGen.Emit(OpCodes.Not); + break; + default: + throw new InvalidOperationException($"Invalid operation \"{aluOp}\" on instruction 0x{opCode:X8}."); + } + } + + /// <summary> + /// Loads a immediate value on the IL evaluation stack. + /// </summary> + /// <param name="opCode">Instruction from where the immediate should be extracted</param> + private void EmitLoadImm(int opCode) + { + // Note: The immediate is signed, the sign-extension is intended here. + _ilGen.Emit(OpCodes.Ldc_I4, opCode >> 14); + } + + /// <summary> + /// Loads a value from the General Purpose register specified as first operand on the IL evaluation stack. + /// </summary> + /// <param name="opCode">Instruction from where the register number should be extracted</param> + private void EmitLoadGprA(int opCode) + { + EmitLoadGpr((opCode >> 11) & 7); + } + + /// <summary> + /// Loads a value from the General Purpose register specified as second operand on the IL evaluation stack. + /// </summary> + /// <param name="opCode">Instruction from where the register number should be extracted</param> + private void EmitLoadGprB(int opCode) + { + EmitLoadGpr((opCode >> 14) & 7); + } + + /// <summary> + /// Loads a value a General Purpose register on the IL evaluation stack. + /// </summary> + /// <remarks> + /// Register number 0 has a hardcoded value of 0. + /// </remarks> + /// <param name="index">Register number</param> + private void EmitLoadGpr(int index) + { + if (index == 0) + { + _ilGen.Emit(OpCodes.Ldc_I4_0); + } + else + { + _ilGen.Emit(OpCodes.Ldloc_S, _gprs[index]); + } + } + + /// <summary> + /// Emits a call to the method that fetches an argument from the arguments FIFO. + /// The argument is pushed into the IL evaluation stack. + /// </summary> + private void EmitFetchParam() + { + _ilGen.Emit(OpCodes.Ldarg_0); + _ilGen.Emit(OpCodes.Call, typeof(MacroJitContext).GetMethod(nameof(MacroJitContext.FetchParam))); + } + + /// <summary> + /// Stores the value on the top of the IL evaluation stack into a General Purpose register. + /// </summary> + /// <remarks> + /// Register number 0 does not exist, reads are hardcoded to 0, and writes are simply discarded. + /// </remarks> + /// <param name="opCode">Instruction from where the register number should be extracted</param> + private void EmitStoreDstGpr(int opCode) + { + int index = (opCode >> 8) & 7; + + if (index == 0) + { + _ilGen.Emit(OpCodes.Pop); + } + else + { + _ilGen.Emit(OpCodes.Stloc_S, _gprs[index]); + } + } + + /// <summary> + /// Stores the value on the top of the IL evaluation stack as method address. + /// This will be used on subsequent send calls as the destination method address. + /// Additionally, the 6 bits starting at bit 12 will be used as increment value, + /// added to the method address after each sent value. + /// </summary> + private void EmitStoreMethAddr() + { + _ilGen.Emit(OpCodes.Dup); + _ilGen.Emit(OpCodes.Ldc_I4, 0xfff); + _ilGen.Emit(OpCodes.And); + _ilGen.Emit(OpCodes.Stloc_S, _methAddr); + _ilGen.Emit(OpCodes.Ldc_I4, 12); + _ilGen.Emit(OpCodes.Shr_Un); + _ilGen.Emit(OpCodes.Ldc_I4, 0x3f); + _ilGen.Emit(OpCodes.And); + _ilGen.Emit(OpCodes.Stloc_S, _methIncr); + } + + /// <summary> + /// Sends the value on the top of the IL evaluation stack to the GPU, + /// using the current method address. + /// </summary> + private void EmitSend() + { + _ilGen.Emit(OpCodes.Ldarg_1); + _ilGen.Emit(OpCodes.Ldloc_S, _methAddr); + _ilGen.Emit(OpCodes.Call, typeof(MacroJitContext).GetMethod(nameof(MacroJitContext.Send))); + _ilGen.Emit(OpCodes.Ldloc_S, _methAddr); + _ilGen.Emit(OpCodes.Ldloc_S, _methIncr); + _ilGen.Emit(OpCodes.Add); + _ilGen.Emit(OpCodes.Stloc_S, _methAddr); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs new file mode 100644 index 00000000..52c2a11b --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs @@ -0,0 +1,55 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.Device; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// <summary> + /// Represents a Macro Just-in-Time compiler execution context. + /// </summary> + class MacroJitContext + { + /// <summary> + /// Arguments FIFO. + /// </summary> + public Queue<FifoWord> Fifo { get; } = new Queue<FifoWord>(); + + /// <summary> + /// Fetches a arguments from the arguments FIFO. + /// </summary> + /// <returns>The call argument, or 0 if the FIFO is empty</returns> + public int FetchParam() + { + if (!Fifo.TryDequeue(out var value)) + { + Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument."); + + return 0; + } + + return value.Word; + } + + /// <summary> + /// Reads data from a GPU register. + /// </summary> + /// <param name="state">Current GPU state</param> + /// <param name="reg">Register offset to read</param> + /// <returns>GPU register value</returns> + public static int Read(IDeviceState state, int reg) + { + return state.Read(reg * 4); + } + + /// <summary> + /// Performs a GPU method call. + /// </summary> + /// <param name="value">Call argument</param> + /// <param name="state">Current GPU state</param> + /// <param name="methAddr">Address, in words, of the method</param> + public static void Send(int value, IDeviceState state, int methAddr) + { + state.Write(methAddr * 4, value); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MmeShadowScratch.cs b/src/Ryujinx.Graphics.Gpu/Engine/MmeShadowScratch.cs new file mode 100644 index 00000000..44cd8213 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/MmeShadowScratch.cs @@ -0,0 +1,18 @@ +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Engine +{ + /// <summary> + /// Represents temporary storage used by macros. + /// </summary> + [StructLayout(LayoutKind.Sequential, Size = 1024)] + struct MmeShadowScratch + { +#pragma warning disable CS0169 + private uint _e0; +#pragma warning restore CS0169 + public ref uint this[int index] => ref AsSpan()[index]; + public Span<uint> AsSpan() => MemoryMarshal.CreateSpan(ref _e0, 256); + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/SetMmeShadowRamControlMode.cs b/src/Ryujinx.Graphics.Gpu/Engine/SetMmeShadowRamControlMode.cs new file mode 100644 index 00000000..060d35ca --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/SetMmeShadowRamControlMode.cs @@ -0,0 +1,13 @@ +namespace Ryujinx.Graphics.Gpu.Engine +{ + /// <summary> + /// MME shadow RAM control mode. + /// </summary> + enum SetMmeShadowRamControlMode + { + MethodTrack = 0, + MethodTrackWithFilter = 1, + MethodPassthrough = 2, + MethodReplay = 3, + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Engine/ShaderTexture.cs b/src/Ryujinx.Graphics.Gpu/Engine/ShaderTexture.cs new file mode 100644 index 00000000..e1e3085b --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/ShaderTexture.cs @@ -0,0 +1,111 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; + +namespace Ryujinx.Graphics.Gpu.Engine +{ + /// <summary> + /// Shader texture properties conversion methods. + /// </summary> + static class ShaderTexture + { + /// <summary> + /// Gets a texture target from a sampler type. + /// </summary> + /// <param name="type">Sampler type</param> + /// <returns>Texture target value</returns> + public static Target GetTarget(SamplerType type) + { + type &= ~(SamplerType.Indexed | SamplerType.Shadow); + + switch (type) + { + case SamplerType.Texture1D: + return Target.Texture1D; + + case SamplerType.TextureBuffer: + return Target.TextureBuffer; + + case SamplerType.Texture1D | SamplerType.Array: + return Target.Texture1DArray; + + case SamplerType.Texture2D: + return Target.Texture2D; + + case SamplerType.Texture2D | SamplerType.Array: + return Target.Texture2DArray; + + case SamplerType.Texture2D | SamplerType.Multisample: + return Target.Texture2DMultisample; + + case SamplerType.Texture2D | SamplerType.Multisample | SamplerType.Array: + return Target.Texture2DMultisampleArray; + + case SamplerType.Texture3D: + return Target.Texture3D; + + case SamplerType.TextureCube: + return Target.Cubemap; + + case SamplerType.TextureCube | SamplerType.Array: + return Target.CubemapArray; + } + + Logger.Warning?.Print(LogClass.Gpu, $"Invalid sampler type \"{type}\"."); + + return Target.Texture2D; + } + + /// <summary> + /// Gets a texture format from a shader image format. + /// </summary> + /// <param name="format">Shader image format</param> + /// <returns>Texture format</returns> + public static Format GetFormat(TextureFormat format) + { + return format switch + { + TextureFormat.R8Unorm => Format.R8Unorm, + TextureFormat.R8Snorm => Format.R8Snorm, + TextureFormat.R8Uint => Format.R8Uint, + TextureFormat.R8Sint => Format.R8Sint, + TextureFormat.R16Float => Format.R16Float, + TextureFormat.R16Unorm => Format.R16Unorm, + TextureFormat.R16Snorm => Format.R16Snorm, + TextureFormat.R16Uint => Format.R16Uint, + TextureFormat.R16Sint => Format.R16Sint, + TextureFormat.R32Float => Format.R32Float, + TextureFormat.R32Uint => Format.R32Uint, + TextureFormat.R32Sint => Format.R32Sint, + TextureFormat.R8G8Unorm => Format.R8G8Unorm, + TextureFormat.R8G8Snorm => Format.R8G8Snorm, + TextureFormat.R8G8Uint => Format.R8G8Uint, + TextureFormat.R8G8Sint => Format.R8G8Sint, + TextureFormat.R16G16Float => Format.R16G16Float, + TextureFormat.R16G16Unorm => Format.R16G16Unorm, + TextureFormat.R16G16Snorm => Format.R16G16Snorm, + TextureFormat.R16G16Uint => Format.R16G16Uint, + TextureFormat.R16G16Sint => Format.R16G16Sint, + TextureFormat.R32G32Float => Format.R32G32Float, + TextureFormat.R32G32Uint => Format.R32G32Uint, + TextureFormat.R32G32Sint => Format.R32G32Sint, + TextureFormat.R8G8B8A8Unorm => Format.R8G8B8A8Unorm, + TextureFormat.R8G8B8A8Snorm => Format.R8G8B8A8Snorm, + TextureFormat.R8G8B8A8Uint => Format.R8G8B8A8Uint, + TextureFormat.R8G8B8A8Sint => Format.R8G8B8A8Sint, + TextureFormat.R16G16B16A16Float => Format.R16G16B16A16Float, + TextureFormat.R16G16B16A16Unorm => Format.R16G16B16A16Unorm, + TextureFormat.R16G16B16A16Snorm => Format.R16G16B16A16Snorm, + TextureFormat.R16G16B16A16Uint => Format.R16G16B16A16Uint, + TextureFormat.R16G16B16A16Sint => Format.R16G16B16A16Sint, + TextureFormat.R32G32B32A32Float => Format.R32G32B32A32Float, + TextureFormat.R32G32B32A32Uint => Format.R32G32B32A32Uint, + TextureFormat.R32G32B32A32Sint => Format.R32G32B32A32Sint, + TextureFormat.R10G10B10A2Unorm => Format.R10G10B10A2Unorm, + TextureFormat.R10G10B10A2Uint => Format.R10G10B10A2Uint, + TextureFormat.R11G11B10Float => Format.R11G11B10Float, + _ => 0 + }; + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendFunctions.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendFunctions.cs new file mode 100644 index 00000000..a40b9cc4 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendFunctions.cs @@ -0,0 +1,4226 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.GAL; +using System.Globalization; +using System.Runtime.InteropServices; +using System.Text; + +namespace Ryujinx.Graphics.Gpu.Engine.Threed.Blender +{ + static class AdvancedBlendFunctions + { + public static readonly AdvancedBlendUcode[] Table = new AdvancedBlendUcode[] + { + new AdvancedBlendUcode(AdvancedBlendOp.PlusClamped, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedPlusClampedPremul), + new AdvancedBlendUcode(AdvancedBlendOp.PlusClampedAlpha, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedPlusClampedAlphaPremul), + new AdvancedBlendUcode(AdvancedBlendOp.PlusDarker, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedPlusDarkerPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedMultiplyPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedScreenPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedOverlayPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedDarkenPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedLightenPremul), + new AdvancedBlendUcode(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedColorDodgePremul), + new AdvancedBlendUcode(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedColorBurnPremul), + new AdvancedBlendUcode(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedHardLightPremul), + new AdvancedBlendUcode(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedSoftLightPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedDifferencePremul), + new AdvancedBlendUcode(AdvancedBlendOp.Minus, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedMinusPremul), + new AdvancedBlendUcode(AdvancedBlendOp.MinusClamped, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedMinusClampedPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedExclusionPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Contrast, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedContrastPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Invert, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedInvertPremul), + new AdvancedBlendUcode(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedInvertRGBPremul), + new AdvancedBlendUcode(AdvancedBlendOp.InvertOvg, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedInvertOvgPremul), + new AdvancedBlendUcode(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedLinearDodgePremul), + new AdvancedBlendUcode(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedLinearBurnPremul), + new AdvancedBlendUcode(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedVividLightPremul), + new AdvancedBlendUcode(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedLinearLightPremul), + new AdvancedBlendUcode(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedPinLightPremul), + new AdvancedBlendUcode(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedHardMixPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Red, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedRedPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Green, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedGreenPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Blue, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedBluePremul), + new AdvancedBlendUcode(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedHslHuePremul), + new AdvancedBlendUcode(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedHslSaturationPremul), + new AdvancedBlendUcode(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedHslColorPremul), + new AdvancedBlendUcode(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Uncorrelated, true, GenUncorrelatedHslLuminosityPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Src, AdvancedBlendOverlap.Disjoint, true, GenDisjointSrcPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Dst, AdvancedBlendOverlap.Disjoint, true, GenDisjointDstPremul), + new AdvancedBlendUcode(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Disjoint, true, GenDisjointSrcOverPremul), + new AdvancedBlendUcode(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Disjoint, true, GenDisjointDstOverPremul), + new AdvancedBlendUcode(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Disjoint, true, GenDisjointSrcInPremul), + new AdvancedBlendUcode(AdvancedBlendOp.DstIn, AdvancedBlendOverlap.Disjoint, true, GenDisjointDstInPremul), + new AdvancedBlendUcode(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Disjoint, true, GenDisjointSrcOutPremul), + new AdvancedBlendUcode(AdvancedBlendOp.DstOut, AdvancedBlendOverlap.Disjoint, true, GenDisjointDstOutPremul), + new AdvancedBlendUcode(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Disjoint, true, GenDisjointSrcAtopPremul), + new AdvancedBlendUcode(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Disjoint, true, GenDisjointDstAtopPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Disjoint, true, GenDisjointXorPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Plus, AdvancedBlendOverlap.Disjoint, true, GenDisjointPlusPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Disjoint, true, GenDisjointMultiplyPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Disjoint, true, GenDisjointScreenPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Disjoint, true, GenDisjointOverlayPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Disjoint, true, GenDisjointDarkenPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Disjoint, true, GenDisjointLightenPremul), + new AdvancedBlendUcode(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Disjoint, true, GenDisjointColorDodgePremul), + new AdvancedBlendUcode(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Disjoint, true, GenDisjointColorBurnPremul), + new AdvancedBlendUcode(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Disjoint, true, GenDisjointHardLightPremul), + new AdvancedBlendUcode(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Disjoint, true, GenDisjointSoftLightPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Disjoint, true, GenDisjointDifferencePremul), + new AdvancedBlendUcode(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Disjoint, true, GenDisjointExclusionPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Invert, AdvancedBlendOverlap.Disjoint, true, GenDisjointInvertPremul), + new AdvancedBlendUcode(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Disjoint, true, GenDisjointInvertRGBPremul), + new AdvancedBlendUcode(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Disjoint, true, GenDisjointLinearDodgePremul), + new AdvancedBlendUcode(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Disjoint, true, GenDisjointLinearBurnPremul), + new AdvancedBlendUcode(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Disjoint, true, GenDisjointVividLightPremul), + new AdvancedBlendUcode(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Disjoint, true, GenDisjointLinearLightPremul), + new AdvancedBlendUcode(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Disjoint, true, GenDisjointPinLightPremul), + new AdvancedBlendUcode(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Disjoint, true, GenDisjointHardMixPremul), + new AdvancedBlendUcode(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Disjoint, true, GenDisjointHslHuePremul), + new AdvancedBlendUcode(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Disjoint, true, GenDisjointHslSaturationPremul), + new AdvancedBlendUcode(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Disjoint, true, GenDisjointHslColorPremul), + new AdvancedBlendUcode(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Disjoint, true, GenDisjointHslLuminosityPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Src, AdvancedBlendOverlap.Conjoint, true, GenConjointSrcPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Dst, AdvancedBlendOverlap.Conjoint, true, GenConjointDstPremul), + new AdvancedBlendUcode(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Conjoint, true, GenConjointSrcOverPremul), + new AdvancedBlendUcode(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Conjoint, true, GenConjointDstOverPremul), + new AdvancedBlendUcode(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Conjoint, true, GenConjointSrcInPremul), + new AdvancedBlendUcode(AdvancedBlendOp.DstIn, AdvancedBlendOverlap.Conjoint, true, GenConjointDstInPremul), + new AdvancedBlendUcode(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Conjoint, true, GenConjointSrcOutPremul), + new AdvancedBlendUcode(AdvancedBlendOp.DstOut, AdvancedBlendOverlap.Conjoint, true, GenConjointDstOutPremul), + new AdvancedBlendUcode(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Conjoint, true, GenConjointSrcAtopPremul), + new AdvancedBlendUcode(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Conjoint, true, GenConjointDstAtopPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Conjoint, true, GenConjointXorPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Conjoint, true, GenConjointMultiplyPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Conjoint, true, GenConjointScreenPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Conjoint, true, GenConjointOverlayPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Conjoint, true, GenConjointDarkenPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Conjoint, true, GenConjointLightenPremul), + new AdvancedBlendUcode(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Conjoint, true, GenConjointColorDodgePremul), + new AdvancedBlendUcode(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Conjoint, true, GenConjointColorBurnPremul), + new AdvancedBlendUcode(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Conjoint, true, GenConjointHardLightPremul), + new AdvancedBlendUcode(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Conjoint, true, GenConjointSoftLightPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Conjoint, true, GenConjointDifferencePremul), + new AdvancedBlendUcode(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Conjoint, true, GenConjointExclusionPremul), + new AdvancedBlendUcode(AdvancedBlendOp.Invert, AdvancedBlendOverlap.Conjoint, true, GenConjointInvertPremul), + new AdvancedBlendUcode(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Conjoint, true, GenConjointInvertRGBPremul), + new AdvancedBlendUcode(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Conjoint, true, GenConjointLinearDodgePremul), + new AdvancedBlendUcode(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Conjoint, true, GenConjointLinearBurnPremul), + new AdvancedBlendUcode(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Conjoint, true, GenConjointVividLightPremul), + new AdvancedBlendUcode(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Conjoint, true, GenConjointLinearLightPremul), + new AdvancedBlendUcode(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Conjoint, true, GenConjointPinLightPremul), + new AdvancedBlendUcode(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Conjoint, true, GenConjointHardMixPremul), + new AdvancedBlendUcode(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Conjoint, true, GenConjointHslHuePremul), + new AdvancedBlendUcode(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Conjoint, true, GenConjointHslSaturationPremul), + new AdvancedBlendUcode(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Conjoint, true, GenConjointHslColorPremul), + new AdvancedBlendUcode(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Conjoint, true, GenConjointHslLuminosityPremul), + new AdvancedBlendUcode(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedDstOver), + new AdvancedBlendUcode(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedSrcIn), + new AdvancedBlendUcode(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedSrcOut), + new AdvancedBlendUcode(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedSrcAtop), + new AdvancedBlendUcode(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedDstAtop), + new AdvancedBlendUcode(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedXor), + new AdvancedBlendUcode(AdvancedBlendOp.PlusClamped, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedPlusClamped), + new AdvancedBlendUcode(AdvancedBlendOp.PlusClampedAlpha, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedPlusClampedAlpha), + new AdvancedBlendUcode(AdvancedBlendOp.PlusDarker, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedPlusDarker), + new AdvancedBlendUcode(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedMultiply), + new AdvancedBlendUcode(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedScreen), + new AdvancedBlendUcode(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedOverlay), + new AdvancedBlendUcode(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedDarken), + new AdvancedBlendUcode(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedLighten), + new AdvancedBlendUcode(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedColorDodge), + new AdvancedBlendUcode(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedColorBurn), + new AdvancedBlendUcode(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedHardLight), + new AdvancedBlendUcode(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedSoftLight), + new AdvancedBlendUcode(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedDifference), + new AdvancedBlendUcode(AdvancedBlendOp.Minus, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedMinus), + new AdvancedBlendUcode(AdvancedBlendOp.MinusClamped, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedMinusClamped), + new AdvancedBlendUcode(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedExclusion), + new AdvancedBlendUcode(AdvancedBlendOp.Contrast, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedContrast), + new AdvancedBlendUcode(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedInvertRGB), + new AdvancedBlendUcode(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedLinearDodge), + new AdvancedBlendUcode(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedLinearBurn), + new AdvancedBlendUcode(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedVividLight), + new AdvancedBlendUcode(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedLinearLight), + new AdvancedBlendUcode(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedPinLight), + new AdvancedBlendUcode(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedHardMix), + new AdvancedBlendUcode(AdvancedBlendOp.Red, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedRed), + new AdvancedBlendUcode(AdvancedBlendOp.Green, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedGreen), + new AdvancedBlendUcode(AdvancedBlendOp.Blue, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedBlue), + new AdvancedBlendUcode(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedHslHue), + new AdvancedBlendUcode(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedHslSaturation), + new AdvancedBlendUcode(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedHslColor), + new AdvancedBlendUcode(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Uncorrelated, false, GenUncorrelatedHslLuminosity), + new AdvancedBlendUcode(AdvancedBlendOp.Src, AdvancedBlendOverlap.Disjoint, false, GenDisjointSrc), + new AdvancedBlendUcode(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Disjoint, false, GenDisjointSrcOver), + new AdvancedBlendUcode(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Disjoint, false, GenDisjointDstOver), + new AdvancedBlendUcode(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Disjoint, false, GenDisjointSrcIn), + new AdvancedBlendUcode(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Disjoint, false, GenDisjointSrcOut), + new AdvancedBlendUcode(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Disjoint, false, GenDisjointSrcAtop), + new AdvancedBlendUcode(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Disjoint, false, GenDisjointDstAtop), + new AdvancedBlendUcode(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Disjoint, false, GenDisjointXor), + new AdvancedBlendUcode(AdvancedBlendOp.Plus, AdvancedBlendOverlap.Disjoint, false, GenDisjointPlus), + new AdvancedBlendUcode(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Disjoint, false, GenDisjointMultiply), + new AdvancedBlendUcode(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Disjoint, false, GenDisjointScreen), + new AdvancedBlendUcode(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Disjoint, false, GenDisjointOverlay), + new AdvancedBlendUcode(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Disjoint, false, GenDisjointDarken), + new AdvancedBlendUcode(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Disjoint, false, GenDisjointLighten), + new AdvancedBlendUcode(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Disjoint, false, GenDisjointColorDodge), + new AdvancedBlendUcode(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Disjoint, false, GenDisjointColorBurn), + new AdvancedBlendUcode(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Disjoint, false, GenDisjointHardLight), + new AdvancedBlendUcode(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Disjoint, false, GenDisjointSoftLight), + new AdvancedBlendUcode(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Disjoint, false, GenDisjointDifference), + new AdvancedBlendUcode(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Disjoint, false, GenDisjointExclusion), + new AdvancedBlendUcode(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Disjoint, false, GenDisjointInvertRGB), + new AdvancedBlendUcode(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Disjoint, false, GenDisjointLinearDodge), + new AdvancedBlendUcode(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Disjoint, false, GenDisjointLinearBurn), + new AdvancedBlendUcode(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Disjoint, false, GenDisjointVividLight), + new AdvancedBlendUcode(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Disjoint, false, GenDisjointLinearLight), + new AdvancedBlendUcode(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Disjoint, false, GenDisjointPinLight), + new AdvancedBlendUcode(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Disjoint, false, GenDisjointHardMix), + new AdvancedBlendUcode(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Disjoint, false, GenDisjointHslHue), + new AdvancedBlendUcode(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Disjoint, false, GenDisjointHslSaturation), + new AdvancedBlendUcode(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Disjoint, false, GenDisjointHslColor), + new AdvancedBlendUcode(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Disjoint, false, GenDisjointHslLuminosity), + new AdvancedBlendUcode(AdvancedBlendOp.Src, AdvancedBlendOverlap.Conjoint, false, GenConjointSrc), + new AdvancedBlendUcode(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Conjoint, false, GenConjointSrcOver), + new AdvancedBlendUcode(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Conjoint, false, GenConjointDstOver), + new AdvancedBlendUcode(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Conjoint, false, GenConjointSrcIn), + new AdvancedBlendUcode(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Conjoint, false, GenConjointSrcOut), + new AdvancedBlendUcode(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Conjoint, false, GenConjointSrcAtop), + new AdvancedBlendUcode(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Conjoint, false, GenConjointDstAtop), + new AdvancedBlendUcode(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Conjoint, false, GenConjointXor), + new AdvancedBlendUcode(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Conjoint, false, GenConjointMultiply), + new AdvancedBlendUcode(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Conjoint, false, GenConjointScreen), + new AdvancedBlendUcode(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Conjoint, false, GenConjointOverlay), + new AdvancedBlendUcode(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Conjoint, false, GenConjointDarken), + new AdvancedBlendUcode(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Conjoint, false, GenConjointLighten), + new AdvancedBlendUcode(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Conjoint, false, GenConjointColorDodge), + new AdvancedBlendUcode(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Conjoint, false, GenConjointColorBurn), + new AdvancedBlendUcode(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Conjoint, false, GenConjointHardLight), + new AdvancedBlendUcode(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Conjoint, false, GenConjointSoftLight), + new AdvancedBlendUcode(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Conjoint, false, GenConjointDifference), + new AdvancedBlendUcode(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Conjoint, false, GenConjointExclusion), + new AdvancedBlendUcode(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Conjoint, false, GenConjointInvertRGB), + new AdvancedBlendUcode(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Conjoint, false, GenConjointLinearDodge), + new AdvancedBlendUcode(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Conjoint, false, GenConjointLinearBurn), + new AdvancedBlendUcode(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Conjoint, false, GenConjointVividLight), + new AdvancedBlendUcode(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Conjoint, false, GenConjointLinearLight), + new AdvancedBlendUcode(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Conjoint, false, GenConjointPinLight), + new AdvancedBlendUcode(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Conjoint, false, GenConjointHardMix), + new AdvancedBlendUcode(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Conjoint, false, GenConjointHslHue), + new AdvancedBlendUcode(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Conjoint, false, GenConjointHslSaturation), + new AdvancedBlendUcode(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Conjoint, false, GenConjointHslColor), + new AdvancedBlendUcode(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Conjoint, false, GenConjointHslLuminosity) + }; + + public static string GenTable() + { + // This can be used to generate the table on AdvancedBlendPreGenTable. + + StringBuilder sb = new StringBuilder(); + + sb.AppendLine($"private static Dictionary<Hash128, AdvancedBlendEntry> _entries = new()"); + sb.AppendLine("{"); + + foreach (var entry in Table) + { + Hash128 hash = XXHash128.ComputeHash(MemoryMarshal.Cast<uint, byte>(entry.Code)); + + string[] constants = new string[entry.Constants != null ? entry.Constants.Length : 0]; + + for (int i = 0; i < constants.Length; i++) + { + RgbFloat rgb = entry.Constants[i]; + + constants[i] = string.Format(CultureInfo.InvariantCulture, "new " + nameof(RgbFloat) + "({0}f, {1}f, {2}f)", rgb.R, rgb.G, rgb.B); + } + + string constantList = constants.Length > 0 ? $"new[] {{ {string.Join(", ", constants)} }}" : $"Array.Empty<{nameof(RgbFloat)}>()"; + + static string EnumValue(string name, object value) + { + if (value.ToString() == "0") + { + return "0"; + } + + return $"{name}.{value}"; + } + + string alpha = $"new {nameof(FixedFunctionAlpha)}({EnumValue(nameof(BlendUcodeEnable), entry.Alpha.Enable)}, {EnumValue(nameof(BlendOp), entry.Alpha.AlphaOp)}, {EnumValue(nameof(BlendFactor), entry.Alpha.AlphaSrcFactor)}, {EnumValue(nameof(BlendFactor), entry.Alpha.AlphaDstFactor)})"; + + sb.AppendLine($" {{ new Hash128(0x{hash.Low:X16}, 0x{hash.High:X16}), new AdvancedBlendEntry({nameof(AdvancedBlendOp)}.{entry.Op}, {nameof(AdvancedBlendOverlap)}.{entry.Overlap}, {(entry.SrcPreMultiplied ? "true" : "false")}, {constantList}, {alpha}) }},"); + } + + sb.AppendLine("};"); + + return sb.ToString(); + } + + private static FixedFunctionAlpha GenUncorrelatedPlusClampedPremul(ref UcodeAssembler asm) + { + asm.Add(CC.T, Dest.PBR, OpBD.DstRGB, OpBD.SrcRGB); + asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenUncorrelatedPlusClampedAlphaPremul(ref UcodeAssembler asm) + { + asm.Add(CC.T, Dest.Temp0, OpBD.DstRGB, OpBD.SrcRGB); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne); + asm.Min(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenUncorrelatedPlusDarkerPremul(ref UcodeAssembler asm) + { + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne); + asm.Add(CC.T, Dest.PBR, OpBD.PBR, OpBD.SrcRGB); + asm.Add(CC.T, Dest.PBR, OpBD.PBR, OpBD.DstRGB); + asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.SrcAAA); + asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.DstAAA); + asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenUncorrelatedMultiplyPremul(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstRGB); + asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedScreenPremul(ref UcodeAssembler asm) + { + asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.DstRGB); + asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedOverlayPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.5f, 0.5f, 0.5f); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantRGB); + asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp2, OpBD.Temp1, OpAC.Temp2, OpBD.Temp1); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedDarkenPremul(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.SrcAAA); + asm.Min(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR); + asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedLightenPremul(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.SrcAAA); + asm.Max(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR); + asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedColorDodgePremul(ref UcodeAssembler asm) + { + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.SrcRGB); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.SrcAAA); + asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.DstRGB); + asm.Min(CC.GT, Dest.PBR, OpAC.DstAAA, OpBD.PBR); + asm.Mul(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.SrcAAA); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.DstAAA); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.DstRGB, OpBD.ConstantZero); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedColorBurnPremul(ref UcodeAssembler asm) + { + asm.Mmsub(CC.T, Dest.Temp0, OpAC.DstAAA, OpBD.SrcAAA, OpAC.SrcAAA, OpBD.DstRGB); + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcRGB); + asm.Mul(CC.T, Dest.PBR, OpAC.Temp0, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.PBR); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.SrcAAA, OpBD.DstAAA, OpAC.SrcAAA, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantZero); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.DstAAA, OpBD.DstRGB); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedHardLightPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.5f, 0.5f, 0.5f); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.Temp2, OpBD.ConstantRGB); + asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp2, OpBD.Temp1, OpAC.Temp2, OpBD.Temp1); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedSoftLightPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(4, 0.25f, 0.25f, 0.25f); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantRGB); + asm.SetConstant(0, 0.2605f, 0.2605f, 0.2605f); + asm.Mul(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(1, -0.7817f, -0.7817f, -0.7817f); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(2, 0.3022f, 0.3022f, 0.3022f); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(3, 0.2192f, 0.2192f, 0.2192f); + asm.Add(CC.GT, Dest.Temp0, OpBD.PBR, OpBD.ConstantRGB); + asm.SetConstant(5, 16f, 16f, 16f); + asm.Mul(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(6, 12f, 12f, 12f); + asm.Mmsub(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(7, 3f, 3f, 3f); + asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Mmsub(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.ConstantOne, OpAC.Temp1, OpBD.Temp1); + asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedDifferencePremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp2); + asm.Sub(CC.LT, Dest.Temp0, OpBD.Temp2, OpBD.Temp1); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedMinusPremul(ref UcodeAssembler asm) + { + asm.Sub(CC.T, Dest.Temp0, OpBD.DstRGB, OpBD.SrcRGB); + return new FixedFunctionAlpha(BlendOp.ReverseSubtractGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenUncorrelatedMinusClampedPremul(ref UcodeAssembler asm) + { + asm.Sub(CC.T, Dest.PBR, OpBD.DstRGB, OpBD.SrcRGB); + asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero); + asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Max(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantZero); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenUncorrelatedExclusionPremul(ref UcodeAssembler asm) + { + asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.DstRGB); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.DstRGB); + asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedContrastPremul(ref UcodeAssembler asm) + { + asm.SetConstant(0, 2f, 2f, 2f); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.ConstantRGB, OpAC.DstAAA, OpBD.ConstantOne); + asm.Mmsub(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.SrcAAA, OpBD.ConstantOne); + asm.Mul(CC.T, Dest.PBR, OpAC.Temp0, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.PBR, OpBD.DstAAA); + asm.SetConstant(1, 0.5f, 0.5f, 0.5f); + asm.Mul(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantRGB); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenUncorrelatedInvertPremul(ref UcodeAssembler asm) + { + asm.Mmsub(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA, OpAC.SrcAAA, OpBD.DstRGB); + asm.Madd(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.OneMinusSrcAAA, OpAC.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenUncorrelatedInvertRGBPremul(ref UcodeAssembler asm) + { + asm.Mmsub(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.DstAAA, OpAC.SrcRGB, OpBD.DstRGB); + asm.Madd(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.OneMinusSrcAAA, OpAC.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenUncorrelatedInvertOvgPremul(ref UcodeAssembler asm) + { + asm.Sub(CC.T, Dest.PBR, OpBD.ConstantOne, OpBD.DstRGB); + asm.Mmadd(CC.T, Dest.Temp0, OpAC.SrcAAA, OpBD.PBR, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedLinearDodgePremul(ref UcodeAssembler asm) + { + asm.Mmadd(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR); + asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedLinearBurnPremul(ref UcodeAssembler asm) + { + asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcAAA, OpBD.DstAAA); + asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero); + asm.Mmadd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedVividLightPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.5f, 0.5f, 0.5f); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantRGB); + asm.Sub(CC.GE, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2); + asm.Add(CC.GE, Dest.PBR, OpBD.PBR, OpBD.PBR); + asm.Rcp(CC.GE, Dest.PBR, OpAC.PBR); + asm.Mul(CC.GE, Dest.PBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GE, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Add(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.Temp2); + asm.Rcp(CC.LT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne); + asm.Sub(CC.LT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantZero); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantOne); + asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedLinearLightPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 2f, 2f, 2f); + asm.Madd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedPinLightPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2); + asm.Sub(CC.T, Dest.Temp0, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.Temp1); + asm.Max(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero); + asm.Add(CC.LE, Dest.PBR, OpBD.Temp2, OpBD.Temp2); + asm.Min(CC.LE, Dest.Temp0, OpAC.PBR, OpBD.Temp1); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedHardMixPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Mul(CC.LT, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedRedPremul(ref UcodeAssembler asm) + { + asm.Mov(CC.T, Dest.Temp0, OpBD.DstRGB); + asm.Mov(CC.T, Dest.Temp0.R, OpBD.SrcRGB); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenUncorrelatedGreenPremul(ref UcodeAssembler asm) + { + asm.Mov(CC.T, Dest.Temp0, OpBD.DstRGB); + asm.Mov(CC.T, Dest.Temp0.G, OpBD.SrcRGB); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenUncorrelatedBluePremul(ref UcodeAssembler asm) + { + asm.Mov(CC.T, Dest.Temp0, OpBD.DstRGB); + asm.Mov(CC.T, Dest.Temp0.B, OpBD.SrcRGB); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenUncorrelatedHslHuePremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp2); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0); + asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp2, OpAC.Temp0, OpBD.PBR); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.Temp2.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp2); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedHslSaturationPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.PBR); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0); + asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp1, OpAC.Temp0, OpBD.PBR); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.GT, Dest.Temp1.GBR, OpAC.PBR, OpBD.Temp2); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp1); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedHslColorPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp2, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Add(CC.T, Dest.Temp2, OpBD.Temp2, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedHslLuminosityPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp2, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp2.BBB, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp1, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Add(CC.T, Dest.Temp1, OpBD.Temp1, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp2); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp1, OpBD.Temp2); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp2); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp1, OpBD.Temp2, OpAC.Temp2, OpBD.Temp2); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp2); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.SrcRGB, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenDisjointSrcPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR, OpAC.Temp0); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl); + } + + private static FixedFunctionAlpha GenDisjointDstPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.Temp1, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenDisjointSrcOverPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp2); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointDstOverPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp1); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointSrcInPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Sub(CC.T, Dest.Temp1.RToA, OpBD.DstAAA, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointDstInPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.Temp1, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Sub(CC.T, Dest.Temp1.RToA, OpBD.DstAAA, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointSrcOutPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointDstOutPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointSrcAtopPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenDisjointDstAtopPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.Temp1, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR, OpAC.Temp0); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl); + } + + private static FixedFunctionAlpha GenDisjointXorPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0); + asm.Min(CC.T, Dest.Temp1, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Add(CC.T, Dest.Temp1.RToA, OpBD.Temp1, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointPlusPremul(ref UcodeAssembler asm) + { + asm.Add(CC.T, Dest.Temp0, OpBD.DstRGB, OpBD.SrcRGB); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenDisjointMultiplyPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointScreenPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.Temp1); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointOverlayPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.5f, 0.5f, 0.5f); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantRGB); + asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp2, OpBD.Temp1, OpAC.Temp2, OpBD.Temp1); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointDarkenPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointLightenPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Max(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointColorDodgePremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.ConstantOne, OpBD.Temp2); + asm.Rcp(CC.GT, Dest.PBR, OpAC.Temp0); + asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp1, OpBD.ConstantZero); + asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantZero); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointColorBurnPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.Temp2, OpBD.ConstantZero); + asm.Rcp(CC.GT, Dest.PBR, OpAC.Temp2); + asm.Mmsub(CC.GT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Max(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.ConstantOne, OpBD.Temp1); + asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointHardLightPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.5f, 0.5f, 0.5f); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.Temp2, OpBD.ConstantRGB); + asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp2, OpBD.Temp1, OpAC.Temp2, OpBD.Temp1); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointSoftLightPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(4, 0.25f, 0.25f, 0.25f); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantRGB); + asm.SetConstant(0, 0.2605f, 0.2605f, 0.2605f); + asm.Mul(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(1, -0.7817f, -0.7817f, -0.7817f); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(2, 0.3022f, 0.3022f, 0.3022f); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(3, 0.2192f, 0.2192f, 0.2192f); + asm.Add(CC.GT, Dest.Temp0, OpBD.PBR, OpBD.ConstantRGB); + asm.SetConstant(5, 16f, 16f, 16f); + asm.Mul(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(6, 12f, 12f, 12f); + asm.Mmsub(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(7, 3f, 3f, 3f); + asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Mmsub(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.ConstantOne, OpAC.Temp1, OpBD.Temp1); + asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointDifferencePremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp2); + asm.Sub(CC.LT, Dest.Temp0, OpBD.Temp2, OpBD.Temp1); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointExclusionPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.Temp1); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.Temp1); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointInvertPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp0, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenDisjointInvertRGBPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.ConstantOne, OpAC.Temp2, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp0, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenDisjointLinearDodgePremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointLinearBurnPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointVividLightPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.5f, 0.5f, 0.5f); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantRGB); + asm.Sub(CC.GE, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2); + asm.Add(CC.GE, Dest.PBR, OpBD.PBR, OpBD.PBR); + asm.Rcp(CC.GE, Dest.PBR, OpAC.PBR); + asm.Mul(CC.GE, Dest.PBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GE, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Add(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.Temp2); + asm.Rcp(CC.LT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne); + asm.Sub(CC.LT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantZero); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantOne); + asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointLinearLightPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 2f, 2f, 2f); + asm.Madd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointPinLightPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2); + asm.Sub(CC.T, Dest.Temp0, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.Temp1); + asm.Max(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero); + asm.Add(CC.LE, Dest.PBR, OpBD.Temp2, OpBD.Temp2); + asm.Min(CC.LE, Dest.Temp0, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointHardMixPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Mul(CC.LT, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointHslHuePremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp2); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0); + asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp2, OpAC.Temp0, OpBD.PBR); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.Temp2.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp2); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1); + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointHslSaturationPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.PBR); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0); + asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp1, OpAC.Temp0, OpBD.PBR); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.GT, Dest.Temp1.GBR, OpAC.PBR, OpBD.Temp2); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp1); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1); + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointHslColorPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp2, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Add(CC.T, Dest.Temp2, OpBD.Temp2, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1); + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointHslLuminosityPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp2, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp2.BBB, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp1, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Add(CC.T, Dest.Temp1, OpBD.Temp1, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp2); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp1, OpBD.Temp2); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp2); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp1, OpBD.Temp2, OpAC.Temp2, OpBD.Temp2); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp2); + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.Temp2, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenConjointSrcPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR, OpAC.Temp0); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl); + } + + private static FixedFunctionAlpha GenConjointDstPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointSrcOverPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp2, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointDstOverPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointSrcInPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MinimumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointDstInPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MinimumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointSrcOutPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Max(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantZero); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenConjointDstOutPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Mul(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Max(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantZero); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenConjointSrcAtopPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointDstAtopPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR, OpAC.Temp0); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl); + } + + private static FixedFunctionAlpha GenConjointXorPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0); + asm.Sub(CC.T, Dest.Temp1.CC, OpBD.DstAAA, OpBD.SrcAAA); + asm.Sub(CC.LT, Dest.Temp1, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mov(CC.T, Dest.Temp1.RToA, OpBD.Temp1); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenConjointMultiplyPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mul(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointScreenPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.Temp1); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointOverlayPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.5f, 0.5f, 0.5f); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantRGB); + asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp2, OpBD.Temp1, OpAC.Temp2, OpBD.Temp1); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointDarkenPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointLightenPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Max(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointColorDodgePremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.ConstantOne, OpBD.Temp2); + asm.Rcp(CC.GT, Dest.PBR, OpAC.Temp0); + asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp1, OpBD.ConstantZero); + asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantZero); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointColorBurnPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.Temp2, OpBD.ConstantZero); + asm.Rcp(CC.GT, Dest.PBR, OpAC.Temp2); + asm.Mmsub(CC.GT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Max(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.ConstantOne, OpBD.Temp1); + asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointHardLightPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.5f, 0.5f, 0.5f); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.Temp2, OpBD.ConstantRGB); + asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp2, OpBD.Temp1, OpAC.Temp2, OpBD.Temp1); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointSoftLightPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(4, 0.25f, 0.25f, 0.25f); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantRGB); + asm.SetConstant(0, 0.2605f, 0.2605f, 0.2605f); + asm.Mul(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(1, -0.7817f, -0.7817f, -0.7817f); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(2, 0.3022f, 0.3022f, 0.3022f); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(3, 0.2192f, 0.2192f, 0.2192f); + asm.Add(CC.GT, Dest.Temp0, OpBD.PBR, OpBD.ConstantRGB); + asm.SetConstant(5, 16f, 16f, 16f); + asm.Mul(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(6, 12f, 12f, 12f); + asm.Mmsub(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(7, 3f, 3f, 3f); + asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Mmsub(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.ConstantOne, OpAC.Temp1, OpBD.Temp1); + asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointDifferencePremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp2); + asm.Sub(CC.LT, Dest.Temp0, OpBD.Temp2, OpBD.Temp1); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointExclusionPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.Temp1); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.Temp1); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointInvertPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointInvertRGBPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.ConstantOne, OpAC.Temp2, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointLinearDodgePremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointLinearBurnPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointVividLightPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.5f, 0.5f, 0.5f); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantRGB); + asm.Sub(CC.GE, Dest.PBR, OpBD.ConstantOne, OpBD.Temp2); + asm.Add(CC.GE, Dest.PBR, OpBD.PBR, OpBD.PBR); + asm.Rcp(CC.GE, Dest.PBR, OpAC.PBR); + asm.Mul(CC.GE, Dest.PBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GE, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Add(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.Temp2); + asm.Rcp(CC.LT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne); + asm.Sub(CC.LT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantZero); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantOne); + asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointLinearLightPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 2f, 2f, 2f); + asm.Madd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointPinLightPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.Temp2); + asm.Sub(CC.T, Dest.Temp0, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.Temp1); + asm.Max(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero); + asm.Add(CC.LE, Dest.PBR, OpBD.Temp2, OpBD.Temp2); + asm.Min(CC.LE, Dest.Temp0, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointHardMixPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Mul(CC.LT, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointHslHuePremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp2); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0); + asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp2, OpAC.Temp0, OpBD.PBR); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.Temp2.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp2); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1); + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointHslSaturationPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.PBR); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0); + asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp1, OpAC.Temp0, OpBD.PBR); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.GT, Dest.Temp1.GBR, OpAC.PBR, OpBD.Temp2); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp1); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1); + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointHslColorPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp2, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Add(CC.T, Dest.Temp2, OpBD.Temp2, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1); + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointHslLuminosityPremul(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp2, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp2.BBB, OpAC.Temp2, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp1, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Add(CC.T, Dest.Temp1, OpBD.Temp1, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp2); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp1, OpBD.Temp2); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp2); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp1, OpBD.Temp2, OpAC.Temp2, OpBD.Temp2); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp2); + asm.Rcp(CC.T, Dest.PBR, OpAC.SrcAAA); + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.PBR); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp2, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenUncorrelatedDstOver(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.SrcAAA); + asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedSrcIn(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.PBR, OpBD.DstAAA); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.DstAlphaGl, BlendFactor.ZeroGl); + } + + private static FixedFunctionAlpha GenUncorrelatedSrcOut(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.PBR, OpBD.OneMinusDstAAA); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneMinusDstAlphaGl, BlendFactor.ZeroGl); + } + + private static FixedFunctionAlpha GenUncorrelatedSrcAtop(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.PBR, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.OneMinusSrcAAA, OpAC.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenUncorrelatedDstAtop(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.SrcAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl); + } + + private static FixedFunctionAlpha GenUncorrelatedXor(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.PBR, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.OneMinusSrcAAA, OpAC.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneMinusDstAlphaGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedPlusClamped(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Add(CC.T, Dest.PBR, OpBD.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenUncorrelatedPlusClampedAlpha(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Add(CC.T, Dest.Temp0, OpBD.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne); + asm.Min(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenUncorrelatedPlusDarker(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne); + asm.Add(CC.T, Dest.PBR, OpBD.PBR, OpBD.Temp2); + asm.Add(CC.T, Dest.PBR, OpBD.PBR, OpBD.DstRGB); + asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.SrcAAA); + asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.DstAAA); + asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenUncorrelatedMultiply(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.PBR, OpBD.DstRGB); + asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedScreen(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mmadd(CC.T, Dest.PBR, OpAC.PBR, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.DstRGB); + asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedOverlay(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.5f, 0.5f, 0.5f); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantRGB); + asm.Mmadd(CC.LE, Dest.Temp0, OpAC.SrcRGB, OpBD.Temp1, OpAC.SrcRGB, OpBD.Temp1); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedDarken(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.PBR, OpBD.DstAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.SrcAAA); + asm.Min(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR); + asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedLighten(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.PBR, OpBD.DstAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.SrcAAA); + asm.Max(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR); + asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedColorDodge(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.PBR); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.SrcAAA); + asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.DstRGB); + asm.Min(CC.GT, Dest.PBR, OpAC.DstAAA, OpBD.PBR); + asm.Mul(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.SrcAAA); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.DstAAA); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.DstRGB, OpBD.ConstantZero); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedColorBurn(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.DstAAA, OpBD.SrcAAA, OpAC.SrcAAA, OpBD.DstRGB); + asm.Rcp(CC.T, Dest.PBR, OpAC.Temp2); + asm.Mul(CC.T, Dest.PBR, OpAC.Temp0, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.PBR); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.SrcAAA, OpBD.DstAAA, OpAC.SrcAAA, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp2, OpBD.ConstantZero); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.DstAAA, OpBD.DstRGB); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedHardLight(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.5f, 0.5f, 0.5f); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.SrcRGB, OpBD.ConstantRGB); + asm.Mmadd(CC.LE, Dest.Temp0, OpAC.SrcRGB, OpBD.Temp1, OpAC.SrcRGB, OpBD.Temp1); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedSoftLight(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(4, 0.25f, 0.25f, 0.25f); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantRGB); + asm.SetConstant(0, 0.2605f, 0.2605f, 0.2605f); + asm.Mul(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(1, -0.7817f, -0.7817f, -0.7817f); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(2, 0.3022f, 0.3022f, 0.3022f); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(3, 0.2192f, 0.2192f, 0.2192f); + asm.Add(CC.GT, Dest.Temp0, OpBD.PBR, OpBD.ConstantRGB); + asm.SetConstant(5, 16f, 16f, 16f); + asm.Mul(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(6, 12f, 12f, 12f); + asm.Mmsub(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(7, 3f, 3f, 3f); + asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Mmsub(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.ConstantOne, OpAC.Temp1, OpBD.Temp1); + asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB); + asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedDifference(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.SrcRGB); + asm.Sub(CC.LT, Dest.Temp0, OpBD.SrcRGB, OpBD.Temp1); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedMinus(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Sub(CC.T, Dest.Temp0, OpBD.DstRGB, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.ReverseSubtractGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenUncorrelatedMinusClamped(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Sub(CC.T, Dest.PBR, OpBD.DstRGB, OpBD.PBR); + asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero); + asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Max(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantZero); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenUncorrelatedExclusion(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mmadd(CC.T, Dest.PBR, OpAC.PBR, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.DstRGB); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.Temp2, OpBD.DstRGB); + asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedContrast(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.SetConstant(0, 2f, 2f, 2f); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.ConstantRGB, OpAC.DstAAA, OpBD.ConstantOne); + asm.Mmsub(CC.T, Dest.PBR, OpAC.Temp2, OpBD.ConstantRGB, OpAC.SrcAAA, OpBD.ConstantOne); + asm.Mul(CC.T, Dest.PBR, OpAC.Temp0, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.PBR, OpBD.DstAAA); + asm.SetConstant(1, 0.5f, 0.5f, 0.5f); + asm.Mul(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantRGB); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenUncorrelatedInvertRGB(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.DstAAA, OpAC.PBR, OpBD.DstRGB); + asm.Madd(CC.T, Dest.Temp0, OpAC.DstRGB, OpBD.OneMinusSrcAAA, OpAC.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenUncorrelatedLinearDodge(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mmadd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR); + asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedLinearBurn(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mmadd(CC.T, Dest.PBR, OpAC.PBR, OpBD.DstAAA, OpAC.DstRGB, OpBD.SrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcAAA, OpBD.DstAAA); + asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero); + asm.Mmadd(CC.T, Dest.PBR, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Add(CC.T, Dest.Temp0, OpBD.Temp0, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedVividLight(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.5f, 0.5f, 0.5f); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantRGB); + asm.Sub(CC.GE, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB); + asm.Add(CC.GE, Dest.PBR, OpBD.PBR, OpBD.PBR); + asm.Rcp(CC.GE, Dest.PBR, OpAC.PBR); + asm.Mul(CC.GE, Dest.PBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GE, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Add(CC.LT, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB); + asm.Rcp(CC.LT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne); + asm.Sub(CC.LT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantZero); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantOne); + asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedLinearLight(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 2f, 2f, 2f); + asm.Madd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedPinLight(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB); + asm.Sub(CC.T, Dest.Temp0, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.Temp1); + asm.Max(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero); + asm.Add(CC.LE, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB); + asm.Min(CC.LE, Dest.Temp0, OpAC.PBR, OpBD.Temp1); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedHardMix(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Mul(CC.LT, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.Temp2, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedRed(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mov(CC.T, Dest.Temp0, OpBD.DstRGB); + asm.Mov(CC.T, Dest.Temp0.R, OpBD.Temp2); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenUncorrelatedGreen(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mov(CC.T, Dest.Temp0, OpBD.DstRGB); + asm.Mov(CC.T, Dest.Temp0.G, OpBD.Temp2); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenUncorrelatedBlue(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.Temp2, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mov(CC.T, Dest.Temp0, OpBD.DstRGB); + asm.Mov(CC.T, Dest.Temp0.B, OpBD.Temp2); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenUncorrelatedHslHue(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.SrcRGB); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.SrcRGB); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0); + asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.SrcRGB, OpAC.Temp0, OpBD.PBR); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.Temp2.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp2); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.PBR, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedHslSaturation(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.PBR); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0); + asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp1, OpAC.Temp0, OpBD.PBR); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Min(CC.GT, Dest.Temp1.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp1); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.PBR, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedHslColor(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.SrcRGB, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Add(CC.T, Dest.Temp2, OpBD.SrcRGB, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.PBR, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenUncorrelatedHslLuminosity(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.SrcRGB, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp2.BBB, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp1, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Add(CC.T, Dest.Temp1, OpBD.Temp1, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp2); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp1, OpBD.Temp2); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp2); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp1, OpBD.Temp2, OpAC.Temp2, OpBD.Temp2); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp2); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Mmadd(CC.T, Dest.Temp1, OpAC.PBR, OpBD.OneMinusDstAAA, OpAC.DstRGB, OpBD.OneMinusSrcAAA); + asm.Mul(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.DstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl); + } + + private static FixedFunctionAlpha GenDisjointSrc(ref UcodeAssembler asm) + { + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR, OpAC.Temp0); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl); + } + + private static FixedFunctionAlpha GenDisjointSrcOver(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.SrcRGB); + asm.Madd(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointDstOver(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp1); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointSrcIn(ref UcodeAssembler asm) + { + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Sub(CC.T, Dest.Temp1.RToA, OpBD.DstAAA, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointSrcOut(ref UcodeAssembler asm) + { + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointSrcAtop(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenDisjointDstAtop(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.Temp1, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR, OpAC.Temp0); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl); + } + + private static FixedFunctionAlpha GenDisjointXor(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0); + asm.Min(CC.T, Dest.Temp1, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Add(CC.T, Dest.Temp1.RToA, OpBD.Temp1, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointPlus(ref UcodeAssembler asm) + { + asm.Mul(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.SrcAAA); + asm.Add(CC.T, Dest.Temp0, OpBD.DstRGB, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenDisjointMultiply(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointScreen(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.Temp1); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointOverlay(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.5f, 0.5f, 0.5f); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantRGB); + asm.Mmadd(CC.LE, Dest.Temp0, OpAC.SrcRGB, OpBD.Temp1, OpAC.SrcRGB, OpBD.Temp1); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointDarken(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointLighten(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Max(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointColorDodge(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.ConstantOne, OpBD.SrcRGB); + asm.Rcp(CC.GT, Dest.PBR, OpAC.Temp0); + asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp1, OpBD.ConstantZero); + asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantZero); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointColorBurn(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.SrcRGB, OpBD.ConstantZero); + asm.Rcp(CC.GT, Dest.PBR, OpAC.SrcRGB); + asm.Mmsub(CC.GT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Max(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.ConstantOne, OpBD.Temp1); + asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointHardLight(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.5f, 0.5f, 0.5f); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.SrcRGB, OpBD.ConstantRGB); + asm.Mmadd(CC.LE, Dest.Temp0, OpAC.SrcRGB, OpBD.Temp1, OpAC.SrcRGB, OpBD.Temp1); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointSoftLight(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(4, 0.25f, 0.25f, 0.25f); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantRGB); + asm.SetConstant(0, 0.2605f, 0.2605f, 0.2605f); + asm.Mul(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(1, -0.7817f, -0.7817f, -0.7817f); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(2, 0.3022f, 0.3022f, 0.3022f); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(3, 0.2192f, 0.2192f, 0.2192f); + asm.Add(CC.GT, Dest.Temp0, OpBD.PBR, OpBD.ConstantRGB); + asm.SetConstant(5, 16f, 16f, 16f); + asm.Mul(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(6, 12f, 12f, 12f); + asm.Mmsub(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(7, 3f, 3f, 3f); + asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Mmsub(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.ConstantOne, OpAC.Temp1, OpBD.Temp1); + asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB); + asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointDifference(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.SrcRGB); + asm.Sub(CC.LT, Dest.Temp0, OpBD.SrcRGB, OpBD.Temp1); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointExclusion(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.Temp1); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.Temp1); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointInvertRGB(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.Temp0, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenDisjointLinearDodge(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR); + asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointLinearBurn(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointVividLight(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.5f, 0.5f, 0.5f); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantRGB); + asm.Sub(CC.GE, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB); + asm.Add(CC.GE, Dest.PBR, OpBD.PBR, OpBD.PBR); + asm.Rcp(CC.GE, Dest.PBR, OpAC.PBR); + asm.Mul(CC.GE, Dest.PBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GE, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Add(CC.LT, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB); + asm.Rcp(CC.LT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne); + asm.Sub(CC.LT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantZero); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantOne); + asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointLinearLight(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 2f, 2f, 2f); + asm.Madd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointPinLight(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB); + asm.Sub(CC.T, Dest.Temp0, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.Temp1); + asm.Max(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero); + asm.Add(CC.LE, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB); + asm.Min(CC.LE, Dest.Temp0, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointHardMix(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Mul(CC.LT, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointHslHue(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.SrcRGB); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.SrcRGB); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0); + asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.SrcRGB, OpAC.Temp0, OpBD.PBR); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.Temp2.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp2); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointHslSaturation(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.PBR); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0); + asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp1, OpAC.Temp0, OpBD.PBR); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Min(CC.GT, Dest.Temp1.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp1); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointHslColor(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.SrcRGB, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Add(CC.T, Dest.Temp2, OpBD.SrcRGB, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenDisjointHslLuminosity(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.SrcRGB, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp2.BBB, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp1, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Add(CC.T, Dest.Temp1, OpBD.Temp1, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp2); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp1, OpBD.Temp2); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp2); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp1, OpBD.Temp2, OpAC.Temp2, OpBD.Temp2); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp2); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.OneMinusSrcAAA); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.Temp1, OpAC.PBR, OpBD.Temp0); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.SrcAAA, OpBD.OneMinusDstAAA); + asm.Madd(CC.T, Dest.Temp0, OpAC.PBR, OpBD.SrcRGB, OpAC.Temp0); + asm.Add(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Min(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenConjointSrc(ref UcodeAssembler asm) + { + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Madd(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR, OpAC.Temp0); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl); + } + + private static FixedFunctionAlpha GenConjointSrcOver(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.SrcRGB); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.SrcRGB, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointDstOver(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp1, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointSrcIn(ref UcodeAssembler asm) + { + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MinimumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointSrcOut(ref UcodeAssembler asm) + { + asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Max(CC.T, Dest.Temp1.RToA, OpAC.PBR, OpBD.ConstantZero); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenConjointSrcAtop(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointDstAtop(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Madd(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR, OpAC.Temp0); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl); + } + + private static FixedFunctionAlpha GenConjointXor(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.SrcAAA, OpBD.DstAAA); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0); + asm.Sub(CC.T, Dest.Temp1.CC, OpBD.DstAAA, OpBD.SrcAAA); + asm.Sub(CC.LT, Dest.Temp1, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mov(CC.T, Dest.Temp1.RToA, OpBD.Temp1); + asm.Mov(CC.T, Dest.Temp0, OpBD.Temp0); + return FixedFunctionAlpha.Disabled; + } + + private static FixedFunctionAlpha GenConjointMultiply(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mul(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointScreen(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.Temp1); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointOverlay(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.5f, 0.5f, 0.5f); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantRGB); + asm.Mmadd(CC.LE, Dest.Temp0, OpAC.SrcRGB, OpBD.Temp1, OpAC.SrcRGB, OpBD.Temp1); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointDarken(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Min(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointLighten(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Max(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointColorDodge(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.ConstantOne, OpBD.SrcRGB); + asm.Rcp(CC.GT, Dest.PBR, OpAC.Temp0); + asm.Mul(CC.GT, Dest.PBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.Temp1, OpBD.ConstantZero); + asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantZero); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointColorBurn(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.SrcRGB, OpBD.ConstantZero); + asm.Rcp(CC.GT, Dest.PBR, OpAC.SrcRGB); + asm.Mmsub(CC.GT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Max(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.ConstantOne, OpBD.Temp1); + asm.Mov(CC.LE, Dest.Temp0, OpBD.ConstantOne); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointHardLight(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.5f, 0.5f, 0.5f); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.SrcRGB, OpBD.ConstantRGB); + asm.Mmadd(CC.LE, Dest.Temp0, OpAC.SrcRGB, OpBD.Temp1, OpAC.SrcRGB, OpBD.Temp1); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.PBR); + asm.Sub(CC.GT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointSoftLight(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(4, 0.25f, 0.25f, 0.25f); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantRGB); + asm.SetConstant(0, 0.2605f, 0.2605f, 0.2605f); + asm.Mul(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(1, -0.7817f, -0.7817f, -0.7817f); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(2, 0.3022f, 0.3022f, 0.3022f); + asm.Mmadd(CC.GT, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(3, 0.2192f, 0.2192f, 0.2192f); + asm.Add(CC.GT, Dest.Temp0, OpBD.PBR, OpBD.ConstantRGB); + asm.SetConstant(5, 16f, 16f, 16f); + asm.Mul(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(6, 12f, 12f, 12f); + asm.Mmsub(CC.LE, Dest.PBR, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.SetConstant(7, 3f, 3f, 3f); + asm.Mmadd(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp1, OpBD.ConstantRGB); + asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Mmsub(CC.LE, Dest.Temp0, OpAC.Temp1, OpBD.ConstantOne, OpAC.Temp1, OpBD.Temp1); + asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB); + asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointDifference(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.SrcRGB); + asm.Sub(CC.LT, Dest.Temp0, OpBD.SrcRGB, OpBD.Temp1); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointExclusion(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR); + asm.Mmsub(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.Temp1); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.Temp1); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointInvertRGB(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mmsub(CC.T, Dest.Temp0, OpAC.SrcRGB, OpBD.ConstantOne, OpAC.SrcRGB, OpBD.PBR); + asm.Min(CC.T, Dest.PBR, OpAC.DstAAA, OpBD.SrcAAA); + asm.Mul(CC.T, Dest.Temp0, OpAC.Temp0, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Madd(CC.T, Dest.Temp0, OpAC.Temp1, OpBD.PBR, OpAC.Temp0); + return new FixedFunctionAlpha(BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointLinearDodge(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR); + asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointLinearBurn(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Max(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantZero); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointVividLight(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.5f, 0.5f, 0.5f); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantRGB); + asm.Sub(CC.GE, Dest.PBR, OpBD.ConstantOne, OpBD.SrcRGB); + asm.Add(CC.GE, Dest.PBR, OpBD.PBR, OpBD.PBR); + asm.Rcp(CC.GE, Dest.PBR, OpAC.PBR); + asm.Mul(CC.GE, Dest.PBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GE, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Add(CC.LT, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB); + asm.Rcp(CC.LT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.LT, Dest.PBR, OpAC.PBR, OpBD.ConstantOne); + asm.Sub(CC.LT, Dest.Temp0, OpBD.ConstantOne, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantZero); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcRGB, OpBD.ConstantOne); + asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointLinearLight(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 2f, 2f, 2f); + asm.Madd(CC.T, Dest.PBR, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Max(CC.T, Dest.PBR, OpAC.PBR, OpBD.ConstantZero); + asm.Min(CC.T, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointPinLight(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB); + asm.Sub(CC.T, Dest.Temp0, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.Temp1); + asm.Max(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.ConstantZero); + asm.Add(CC.LE, Dest.PBR, OpBD.SrcRGB, OpBD.SrcRGB); + asm.Min(CC.LE, Dest.Temp0, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointHardMix(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Add(CC.T, Dest.PBR, OpBD.SrcRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Mul(CC.LT, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Mov(CC.GE, Dest.Temp0, OpBD.ConstantOne); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointHslHue(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.SrcRGB); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.SrcRGB); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0); + asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.SrcRGB, OpAC.Temp0, OpBD.PBR); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.Temp2.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp2); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointHslSaturation(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.PBR); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.T, Dest.Temp0.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.T, Dest.Temp0.CC, OpBD.PBR, OpBD.Temp0); + asm.Rcp(CC.GT, Dest.Temp0, OpAC.Temp0); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.Temp1, OpAC.Temp0, OpBD.PBR); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB); + asm.Min(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Min(CC.GT, Dest.Temp1.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Mov(CC.GT, Dest.PBR.GBR, OpBD.SrcRGB); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Max(CC.GT, Dest.PBR.GBR, OpAC.PBR, OpBD.SrcRGB); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp0, OpBD.Temp1); + asm.Mul(CC.LE, Dest.Temp0, OpAC.SrcAAA, OpBD.ConstantZero); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp0, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp0, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Add(CC.T, Dest.Temp2, OpBD.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointHslColor(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.PBR, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp1.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.SrcRGB, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Add(CC.T, Dest.Temp2, OpBD.SrcRGB, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp1); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp2, OpBD.Temp1); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp1); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp2); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp1, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp2, OpBD.Temp1, OpAC.Temp1, OpBD.Temp1); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp1); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + + private static FixedFunctionAlpha GenConjointHslLuminosity(ref UcodeAssembler asm) + { + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.SetConstant(0, 0.3f, 0.59f, 0.11f); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.SrcRGB, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.Temp2.BBB, OpAC.SrcRGB, OpBD.ConstantRGB, OpAC.PBR); + asm.Mul(CC.T, Dest.PBR.RRR, OpAC.Temp1, OpBD.ConstantRGB); + asm.Madd(CC.T, Dest.PBR.GGG, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Madd(CC.T, Dest.PBR.BBB, OpAC.Temp1, OpBD.ConstantRGB, OpAC.PBR); + asm.Sub(CC.T, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Add(CC.T, Dest.Temp1, OpBD.Temp1, OpBD.PBR); + asm.Mov(CC.T, Dest.Temp0, OpBD.PBR); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Max(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.PBR, OpBD.ConstantOne); + asm.Add(CC.GT, Dest.PBR, OpBD.PBR, OpBD.ConstantOne); + asm.Sub(CC.GT, Dest.PBR, OpBD.PBR, OpBD.Temp2); + asm.Rcp(CC.GT, Dest.PBR, OpAC.PBR); + asm.Mmsub(CC.GT, Dest.Temp0, OpAC.PBR, OpBD.ConstantOne, OpAC.PBR, OpBD.Temp2); + asm.Sub(CC.GT, Dest.PBR, OpBD.Temp1, OpBD.Temp2); + asm.Madd(CC.GT, Dest.Temp0, OpAC.Temp0, OpBD.PBR, OpAC.Temp2); + asm.Mov(CC.T, Dest.PBR.GBR, OpBD.Temp1); + asm.Min(CC.T, Dest.PBR.GBR, OpAC.PBR, OpBD.Temp1); + asm.Min(CC.T, Dest.PBR.GBR.CC, OpAC.PBR, OpBD.Temp1); + asm.Sub(CC.LT, Dest.PBR, OpBD.Temp2, OpBD.PBR); + asm.Rcp(CC.LT, Dest.Temp0, OpAC.PBR); + asm.Mmsub(CC.LT, Dest.PBR, OpAC.Temp1, OpBD.Temp2, OpAC.Temp2, OpBD.Temp2); + asm.Madd(CC.LT, Dest.Temp0, OpAC.PBR, OpBD.Temp0, OpAC.Temp2); + asm.Rcp(CC.T, Dest.PBR, OpAC.DstAAA); + asm.Mul(CC.T, Dest.Temp1, OpAC.DstRGB, OpBD.PBR); + asm.Sub(CC.T, Dest.PBR.CC, OpBD.SrcAAA, OpBD.DstAAA); + asm.Mmadd(CC.GE, Dest.Temp0, OpAC.Temp0, OpBD.DstAAA, OpAC.SrcRGB, OpBD.PBR); + asm.Sub(CC.LT, Dest.PBR, OpBD.DstAAA, OpBD.SrcAAA); + asm.Mmadd(CC.LT, Dest.Temp0, OpAC.Temp0, OpBD.SrcAAA, OpAC.Temp1, OpBD.PBR); + return new FixedFunctionAlpha(BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendManager.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendManager.cs new file mode 100644 index 00000000..8072c6af --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendManager.cs @@ -0,0 +1,115 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.GAL; +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Engine.Threed.Blender +{ + /// <summary> + /// Advanced blend manager. + /// </summary> + class AdvancedBlendManager + { + private const int InstructionRamSize = 128; + private const int InstructionRamSizeMask = InstructionRamSize - 1; + + private readonly DeviceStateWithShadow<ThreedClassState> _state; + + private readonly uint[] _code; + private int _ip; + + /// <summary> + /// Creates a new instance of the advanced blend manager. + /// </summary> + /// <param name="state">GPU state of the channel owning this manager</param> + public AdvancedBlendManager(DeviceStateWithShadow<ThreedClassState> state) + { + _state = state; + _code = new uint[InstructionRamSize]; + } + + /// <summary> + /// Sets the start offset of the blend microcode in memory. + /// </summary> + /// <param name="argument">Method call argument</param> + public void LoadBlendUcodeStart(int argument) + { + _ip = argument; + } + + /// <summary> + /// Pushes one word of blend microcode. + /// </summary> + /// <param name="argument">Method call argument</param> + public void LoadBlendUcodeInstruction(int argument) + { + _code[_ip++ & InstructionRamSizeMask] = (uint)argument; + } + + /// <summary> + /// Tries to identify the current advanced blend function being used, + /// given the current state and microcode that was uploaded. + /// </summary> + /// <param name="descriptor">Advanced blend descriptor</param> + /// <returns>True if the function was found, false otherwise</returns> + public bool TryGetAdvancedBlend(out AdvancedBlendDescriptor descriptor) + { + Span<uint> currentCode = new Span<uint>(_code); + byte codeLength = (byte)_state.State.BlendUcodeSize; + + if (currentCode.Length > codeLength) + { + currentCode = currentCode.Slice(0, codeLength); + } + + Hash128 hash = XXHash128.ComputeHash(MemoryMarshal.Cast<uint, byte>(currentCode)); + + descriptor = default; + + if (!AdvancedBlendPreGenTable.Entries.TryGetValue(hash, out var entry)) + { + return false; + } + + if (entry.Constants != null) + { + bool constantsMatch = true; + + for (int i = 0; i < entry.Constants.Length; i++) + { + RgbFloat constant = entry.Constants[i]; + RgbHalf constant2 = _state.State.BlendUcodeConstants[i]; + + if ((Half)constant.R != constant2.UnpackR() || + (Half)constant.G != constant2.UnpackG() || + (Half)constant.B != constant2.UnpackB()) + { + constantsMatch = false; + break; + } + } + + if (!constantsMatch) + { + return false; + } + } + + if (entry.Alpha.Enable != _state.State.BlendUcodeEnable) + { + return false; + } + + if (entry.Alpha.Enable == BlendUcodeEnable.EnableRGBA && + (entry.Alpha.AlphaOp != _state.State.BlendStateCommon.AlphaOp || + entry.Alpha.AlphaSrcFactor != _state.State.BlendStateCommon.AlphaSrcFactor || + entry.Alpha.AlphaDstFactor != _state.State.BlendStateCommon.AlphaDstFactor)) + { + return false; + } + + descriptor = new AdvancedBlendDescriptor(entry.Op, entry.Overlap, entry.SrcPreMultiplied); + return true; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendPreGenTable.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendPreGenTable.cs new file mode 100644 index 00000000..d35d8abf --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendPreGenTable.cs @@ -0,0 +1,273 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.GAL; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Engine.Threed.Blender +{ + /// <summary> + /// Advanced blend function entry. + /// </summary> + struct AdvancedBlendEntry + { + /// <summary> + /// Advanced blend operation. + /// </summary> + public AdvancedBlendOp Op { get; } + + /// <summary> + /// Advanced blend overlap mode. + /// </summary> + public AdvancedBlendOverlap Overlap { get; } + + /// <summary> + /// Whenever the source input is pre-multiplied. + /// </summary> + public bool SrcPreMultiplied { get; } + + /// <summary> + /// Constants used by the microcode. + /// </summary> + public RgbFloat[] Constants { get; } + + /// <summary> + /// Fixed function alpha state. + /// </summary> + public FixedFunctionAlpha Alpha { get; } + + /// <summary> + /// Creates a new advanced blend function entry. + /// </summary> + /// <param name="op">Advanced blend operation</param> + /// <param name="overlap">Advanced blend overlap mode</param> + /// <param name="srcPreMultiplied">Whenever the source input is pre-multiplied</param> + /// <param name="constants">Constants used by the microcode</param> + /// <param name="alpha">Fixed function alpha state</param> + public AdvancedBlendEntry( + AdvancedBlendOp op, + AdvancedBlendOverlap overlap, + bool srcPreMultiplied, + RgbFloat[] constants, + FixedFunctionAlpha alpha) + { + Op = op; + Overlap = overlap; + SrcPreMultiplied = srcPreMultiplied; + Constants = constants; + Alpha = alpha; + } + } + + /// <summary> + /// Pre-generated hash table with advanced blend functions used by the driver. + /// </summary> + static class AdvancedBlendPreGenTable + { + /// <summary> + /// Advanced blend functions dictionary. + /// </summary> + public static readonly IReadOnlyDictionary<Hash128, AdvancedBlendEntry> Entries = new Dictionary<Hash128, AdvancedBlendEntry>() + { + { new Hash128(0x19ECF57B83DE31F7, 0x5BAE759246F264C0), new AdvancedBlendEntry(AdvancedBlendOp.PlusClamped, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0xDE1B14A356A1A9ED, 0x59D803593C607C1D), new AdvancedBlendEntry(AdvancedBlendOp.PlusClampedAlpha, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x1A3C3A6D32DEC368, 0xBCAE519EC6AAA045), new AdvancedBlendEntry(AdvancedBlendOp.PlusDarker, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x6FD380261A63B240, 0x17C3B335DBB9E3DB), new AdvancedBlendEntry(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x1D39164823D3A2D1, 0xC45350959CE1C8FB), new AdvancedBlendEntry(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x18DF09FF53B129FE, 0xC02EDA33C36019F6), new AdvancedBlendEntry(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x5973E583271EBF06, 0x711497D75D1272E0), new AdvancedBlendEntry(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x4759E0E5DA54D5E8, 0x1FDD57C0C38AFA1F), new AdvancedBlendEntry(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x337684D43CCE97FA, 0x0139E30CC529E1C9), new AdvancedBlendEntry(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0xDA59E85D8428992D, 0x1D3D7C64C9EF0132), new AdvancedBlendEntry(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x9455B949298CE805, 0xE73D3301518BE98A), new AdvancedBlendEntry(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0xBDD3B4DEDBE336AA, 0xBFA4DCD50D535DEE), new AdvancedBlendEntry(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.2605f, 0.2605f, 0.2605f), new RgbFloat(-0.7817f, -0.7817f, -0.7817f), new RgbFloat(0.3022f, 0.3022f, 0.3022f), new RgbFloat(0.2192f, 0.2192f, 0.2192f), new RgbFloat(0.25f, 0.25f, 0.25f), new RgbFloat(16f, 16f, 16f), new RgbFloat(12f, 12f, 12f), new RgbFloat(3f, 3f, 3f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x22D4E970A028649A, 0x4F3FCB055FCED965), new AdvancedBlendEntry(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0xA346A91311D72114, 0x151A27A3FB0A1904), new AdvancedBlendEntry(AdvancedBlendOp.Minus, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.ReverseSubtractGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x8A307241061FACD6, 0xA39D1826440B8EE7), new AdvancedBlendEntry(AdvancedBlendOp.MinusClamped, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0xB3BE569485EFFFE0, 0x0BA4E269B3CFB165), new AdvancedBlendEntry(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x36FCA3277DC11822, 0x2BC0F6CAC2029672), new AdvancedBlendEntry(AdvancedBlendOp.Contrast, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(2f, 2f, 2f), new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0x4A6226AF2DE9BD7F, 0xEB890D7DA716F73A), new AdvancedBlendEntry(AdvancedBlendOp.Invert, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0xF364CAA94E160FEB, 0xBF364512C72A3797), new AdvancedBlendEntry(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0x6BF791AB4AC19C87, 0x6FA17A994EA0FCDE), new AdvancedBlendEntry(AdvancedBlendOp.InvertOvg, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x053C75A0AE0BB222, 0x03C791FEEB59754C), new AdvancedBlendEntry(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x25762AB40B6CBDE9, 0x595E9A968AC4F01C), new AdvancedBlendEntry(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0xC2D05E2DBE16955D, 0xB8659C7A3FCFA7CE), new AdvancedBlendEntry(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x223F220B8F74CBFB, 0xD3DD19D7C39209A5), new AdvancedBlendEntry(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(2f, 2f, 2f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0xD0DAE57A9F1FE78A, 0x353796BCFB8CE30B), new AdvancedBlendEntry(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x601C8CBEC07FF8FF, 0xB8E22882360E8695), new AdvancedBlendEntry(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x3A55B7B78C76A7A8, 0x206F503B2D9FFEAA), new AdvancedBlendEntry(AdvancedBlendOp.Red, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0x80BC65C7831388E5, 0xC652457B2C766AEC), new AdvancedBlendEntry(AdvancedBlendOp.Green, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0x3D3A912E5833EE13, 0x307895951349EE33), new AdvancedBlendEntry(AdvancedBlendOp.Blue, AdvancedBlendOverlap.Uncorrelated, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0x289105BE92E81803, 0xFD8F1F03D15C53B4), new AdvancedBlendEntry(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x007AE3BD140764EB, 0x0EE05A0D2E80BBAE), new AdvancedBlendEntry(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x77F7EE0DB3FDDB96, 0xDEA47C881306DB3E), new AdvancedBlendEntry(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x66F4E9A7D73CA157, 0x1486058A177DB11C), new AdvancedBlendEntry(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Uncorrelated, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x593E9F331612D618, 0x9D217BEFA4EB919A), new AdvancedBlendEntry(AdvancedBlendOp.Src, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) }, + { new Hash128(0x0A5194C5E6891106, 0xDD8EC6586106557C), new AdvancedBlendEntry(AdvancedBlendOp.Dst, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0x8D77173D5E06E916, 0x06AB190E7D10F4D4), new AdvancedBlendEntry(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x655B4EBC148981DA, 0x455999EF2B9BD28A), new AdvancedBlendEntry(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x98F5437D5F518929, 0xBFF4A6E83183DB63), new AdvancedBlendEntry(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x6ADDEFE3B9CEF2FD, 0xB6F6272AFECB1AAB), new AdvancedBlendEntry(AdvancedBlendOp.DstIn, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x80953F0953BF05B1, 0xD59ABFAA34F8196F), new AdvancedBlendEntry(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0xA401D9AA2A39C121, 0xFC0C8005C22AD7E3), new AdvancedBlendEntry(AdvancedBlendOp.DstOut, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x06274FB7CA9CDD22, 0x6CE8188B1A9AB6EF), new AdvancedBlendEntry(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0x0B079BE7F7F70817, 0xB72E7736CA51E321), new AdvancedBlendEntry(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) }, + { new Hash128(0x66215C99403CEDDE, 0x900B733D62204C48), new AdvancedBlendEntry(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x12DEF2AD900CAD6C, 0x58CF5CC3004910DF), new AdvancedBlendEntry(AdvancedBlendOp.Plus, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x272BA3A49F64DAE4, 0xAC70B96C00A99EAF), new AdvancedBlendEntry(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x206C34AAA7D3F545, 0xDA4B30CACAA483A0), new AdvancedBlendEntry(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x3D93494920D257BE, 0xDCC573BE1F5F4449), new AdvancedBlendEntry(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x0D7417D80191107B, 0xEAF40547827E005F), new AdvancedBlendEntry(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0xEC1B03E8C883F9C9, 0x2D3CA044C58C01B4), new AdvancedBlendEntry(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x58A19A0135D68B31, 0x82F35B97AED068E5), new AdvancedBlendEntry(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x20489F9AB36CC0E3, 0x20499874219E35EE), new AdvancedBlendEntry(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0xBB176935E5EE05BF, 0x95B26D4D30EA7A14), new AdvancedBlendEntry(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x5FF9393C908ACFED, 0x068B0BD875773ABF), new AdvancedBlendEntry(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.2605f, 0.2605f, 0.2605f), new RgbFloat(-0.7817f, -0.7817f, -0.7817f), new RgbFloat(0.3022f, 0.3022f, 0.3022f), new RgbFloat(0.2192f, 0.2192f, 0.2192f), new RgbFloat(0.25f, 0.25f, 0.25f), new RgbFloat(16f, 16f, 16f), new RgbFloat(12f, 12f, 12f), new RgbFloat(3f, 3f, 3f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x03181F8711C9802C, 0x6B02C7C6B224FE7B), new AdvancedBlendEntry(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x2EE2209021F6B977, 0xF3AFA1491B8B89FC), new AdvancedBlendEntry(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0xD8BA4DD2EDE4DC9E, 0x01006114977CF715), new AdvancedBlendEntry(AdvancedBlendOp.Invert, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0xD156B99835A2D8ED, 0x2D0BEE9E135EA7A7), new AdvancedBlendEntry(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0x20CE8C898ED4BE27, 0x1514900B6F5E8F66), new AdvancedBlendEntry(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0xCDE5F743820BA2D9, 0x917845FE2ECB083D), new AdvancedBlendEntry(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0xEB03DF4A0C1D14CD, 0xBAE2E831C6E8FFE4), new AdvancedBlendEntry(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x1DC9E49AABC779AC, 0x4053A1441EB713D3), new AdvancedBlendEntry(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(2f, 2f, 2f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0xFBDEF776248F7B3E, 0xE05EEFD65AC47CB7), new AdvancedBlendEntry(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x415A1A48E03AA6E7, 0x046D7EE33CA46B9A), new AdvancedBlendEntry(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Disjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x59A6901EC9BB2041, 0x2F3E19CE5EEC3EBE), new AdvancedBlendEntry(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x044B2B6E105221DA, 0x3089BBC033F994AF), new AdvancedBlendEntry(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x374A5A24AA8E6CC5, 0x29930FAA6215FA2B), new AdvancedBlendEntry(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x30CD0F7AF0CF26F9, 0x06CCA6744DE7DCF5), new AdvancedBlendEntry(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Disjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x1A6C9A1F6FE494A5, 0xA0CFAF77617E54DD), new AdvancedBlendEntry(AdvancedBlendOp.Src, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) }, + { new Hash128(0x081AF6DAAB1C8717, 0xBFEDCE59AE3DC9AC), new AdvancedBlendEntry(AdvancedBlendOp.Dst, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0x3518E44573AB68BA, 0xC96EE71AF9F8F546), new AdvancedBlendEntry(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0xF89E81FE8D73C96F, 0x4583A04577A0F21C), new AdvancedBlendEntry(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0xDF4026421CB61119, 0x14115A1F5139AFC7), new AdvancedBlendEntry(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MinimumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x91A20262C3E3A695, 0x0B3A102BFCDC6B1C), new AdvancedBlendEntry(AdvancedBlendOp.DstIn, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MinimumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x44F4C7CCFEB9EBFA, 0xF68394E6D56E5C2F), new AdvancedBlendEntry(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0xB89F17C7021E9760, 0x430357EE0F7188EF), new AdvancedBlendEntry(AdvancedBlendOp.DstOut, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0xDA2D20EA4242B8A0, 0x0D1EC05B72E3838F), new AdvancedBlendEntry(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0x855DFEE1208D11B9, 0x77C6E3DDCFE30B85), new AdvancedBlendEntry(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) }, + { new Hash128(0x9B3808439683FD58, 0x123DCBE4705AB25E), new AdvancedBlendEntry(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0xA42CF045C248A00A, 0x0C6C63C24EA0B0C1), new AdvancedBlendEntry(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x320A83B6D00C8059, 0x796EDAB3EB7314BC), new AdvancedBlendEntry(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x45253AC9ABFFC613, 0x8F92EA70195FB573), new AdvancedBlendEntry(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x1A5D263B588274B6, 0x167D305F6C794179), new AdvancedBlendEntry(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x709C1A837FE966AC, 0x75D8CE49E8A78EDB), new AdvancedBlendEntry(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x8265C26F85E4145F, 0x932E6CCBF37CB600), new AdvancedBlendEntry(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x3F252B3FEF983F27, 0x9370D7EEFEFA1A9E), new AdvancedBlendEntry(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x66A334A4AEA41078, 0xCB52254E1E395231), new AdvancedBlendEntry(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0xFDD05C53B25F0035, 0xB7E3ECEE166C222F), new AdvancedBlendEntry(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.2605f, 0.2605f, 0.2605f), new RgbFloat(-0.7817f, -0.7817f, -0.7817f), new RgbFloat(0.3022f, 0.3022f, 0.3022f), new RgbFloat(0.2192f, 0.2192f, 0.2192f), new RgbFloat(0.25f, 0.25f, 0.25f), new RgbFloat(16f, 16f, 16f), new RgbFloat(12f, 12f, 12f), new RgbFloat(3f, 3f, 3f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x25D932A77FFED81A, 0xA50D797B0FCA94E8), new AdvancedBlendEntry(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x4A953B6F5F7D341C, 0xDC05CFB50DDB5DC1), new AdvancedBlendEntry(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x838CB660C4F41F6D, 0x9E7D958697543495), new AdvancedBlendEntry(AdvancedBlendOp.Invert, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0x4DF6EC1348A8F797, 0xA128E0CD69DB5A64), new AdvancedBlendEntry(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0x178CDFAB9A015295, 0x2BF40EA72E596D57), new AdvancedBlendEntry(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x338FC99050E56AFD, 0x2AF41CF82BE602BF), new AdvancedBlendEntry(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x62E02ED60D1E978E, 0xBF726B3E68C11E4D), new AdvancedBlendEntry(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0xFBAF92DD4C101502, 0x7AF2EDA6596B819D), new AdvancedBlendEntry(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(2f, 2f, 2f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x0EF1241F65D4B50A, 0xE8D85DFA6AEDDB84), new AdvancedBlendEntry(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x77FE024B5C9D4A18, 0xF19D48A932F6860F), new AdvancedBlendEntry(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Conjoint, true, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x9C88CBFA2E09D857, 0x0A0361704CBEEE1D), new AdvancedBlendEntry(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x5B94127FA190E640, 0x8D1FEFF837A91268), new AdvancedBlendEntry(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0xB9C9105B7E063DDB, 0xF6A70E1D511B96FD), new AdvancedBlendEntry(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0xF0751AAE332B3ED1, 0xC40146F5C83C2533), new AdvancedBlendEntry(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Conjoint, true, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x579EB12F595F75AD, 0x151BF0504703B81B), new AdvancedBlendEntry(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0xF9CA152C03AC8C62, 0x1581336205E5CF47), new AdvancedBlendEntry(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.DstAlphaGl, BlendFactor.ZeroGl)) }, + { new Hash128(0x98ACD8BB5E195D0F, 0x91F937672BE899F0), new AdvancedBlendEntry(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneMinusDstAlphaGl, BlendFactor.ZeroGl)) }, + { new Hash128(0xBF97F10FC301F44C, 0x75721789F0D48548), new AdvancedBlendEntry(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0x1B982263B8B08A10, 0x3350C76E2E1B27DF), new AdvancedBlendEntry(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) }, + { new Hash128(0xFF20AC79F64EDED8, 0xAF9025B2D97B9273), new AdvancedBlendEntry(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneMinusDstAlphaGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x9FFD986600FB112F, 0x384FDDF4E060139A), new AdvancedBlendEntry(AdvancedBlendOp.PlusClamped, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x0425E40B5B8B3B52, 0x5880CBED7CAB631C), new AdvancedBlendEntry(AdvancedBlendOp.PlusClampedAlpha, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x16DAC8593F28623A, 0x233DBC82325B8AED), new AdvancedBlendEntry(AdvancedBlendOp.PlusDarker, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0xB37E5F234B9F0948, 0xD5F957A2ECD98FD6), new AdvancedBlendEntry(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0xCA0FDADD1D20DBE3, 0x1A5C15CCBF1AC538), new AdvancedBlendEntry(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x1C48304D73A9DF3A, 0x891DB93FA36E3450), new AdvancedBlendEntry(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x53200F2279B7FA39, 0x051C2462EBF6789C), new AdvancedBlendEntry(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0xB88BFB80714DCD5C, 0xEBD6938D744E6A41), new AdvancedBlendEntry(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0xE33DC2A25FC1A976, 0x08B3DBB1F3027D45), new AdvancedBlendEntry(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0xCE97E71615370316, 0xE131AE49D3A4D62B), new AdvancedBlendEntry(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0xE059FD265149B256, 0x94AF817AC348F61F), new AdvancedBlendEntry(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x16D31333D477E231, 0x9A98AAC84F72CC62), new AdvancedBlendEntry(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.2605f, 0.2605f, 0.2605f), new RgbFloat(-0.7817f, -0.7817f, -0.7817f), new RgbFloat(0.3022f, 0.3022f, 0.3022f), new RgbFloat(0.2192f, 0.2192f, 0.2192f), new RgbFloat(0.25f, 0.25f, 0.25f), new RgbFloat(16f, 16f, 16f), new RgbFloat(12f, 12f, 12f), new RgbFloat(3f, 3f, 3f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x47FC3B0776366D3C, 0xE96D9BD83B277874), new AdvancedBlendEntry(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x7230401E3FEA1F3B, 0xF0D15F05D3D1E309), new AdvancedBlendEntry(AdvancedBlendOp.Minus, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.ReverseSubtractGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x188212F9303742F5, 0x100C51CB96E03591), new AdvancedBlendEntry(AdvancedBlendOp.MinusClamped, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x52B755D296B44DC5, 0x4003B87275625973), new AdvancedBlendEntry(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0xD873ED973ADF7EAD, 0x73E68B57D92034E7), new AdvancedBlendEntry(AdvancedBlendOp.Contrast, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(2f, 2f, 2f), new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0x471F9FA34B945ACB, 0x10524D1410B3C402), new AdvancedBlendEntry(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0x99F569454EA0EF32, 0x6FC70A8B3A07DC8B), new AdvancedBlendEntry(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x5AD55F950067AC7E, 0x4BA60A4FBABDD0AC), new AdvancedBlendEntry(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x03FF2C858C9C4C5B, 0xE95AE7F561FB60E9), new AdvancedBlendEntry(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x6DC0E510C7BCF9D2, 0xAE805D7CECDCB5C1), new AdvancedBlendEntry(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(2f, 2f, 2f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x44832332CED5C054, 0x2F8D5536C085B30A), new AdvancedBlendEntry(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x4AB4D387618AC51F, 0x495B46E0555F4B32), new AdvancedBlendEntry(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x99282B49405A01A8, 0xD6FA93F864F24A8E), new AdvancedBlendEntry(AdvancedBlendOp.Red, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0x37B30C1064FBD23E, 0x5D068366F42317C2), new AdvancedBlendEntry(AdvancedBlendOp.Green, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0x760FAE9D59E04BC2, 0xA40AD483EA01435E), new AdvancedBlendEntry(AdvancedBlendOp.Blue, AdvancedBlendOverlap.Uncorrelated, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0xE786950FD9D1C6EF, 0xF9FDD5AF6451D239), new AdvancedBlendEntry(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x052458BB4788B0CA, 0x8AC58FDCA1F45EF5), new AdvancedBlendEntry(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x6AFC3837D1D31920, 0xB9D49C2FE49642C6), new AdvancedBlendEntry(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0xAFC2911949317E01, 0xD5B63636F5CB3422), new AdvancedBlendEntry(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Uncorrelated, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneMinusSrcAlphaGl)) }, + { new Hash128(0x13B46DF507CC2C53, 0x86DE26517E6BF0A7), new AdvancedBlendEntry(AdvancedBlendOp.Src, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) }, + { new Hash128(0x5C372442474BE410, 0x79ECD3C0C496EF2E), new AdvancedBlendEntry(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x74AAB45DBF5336E9, 0x01BFC4E181DAD442), new AdvancedBlendEntry(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x43239E282A36C85C, 0x36FB65560E46AD0F), new AdvancedBlendEntry(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x1A3BA8A7583B8F7A, 0xE64E41D548033180), new AdvancedBlendEntry(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x32BBB9859E9B565D, 0x3D5CE94FE55F18B5), new AdvancedBlendEntry(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0xD947A0766AE3C0FC, 0x391E5D53E86F4ED6), new AdvancedBlendEntry(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) }, + { new Hash128(0xBD9A7C08BDFD8CE6, 0x905407634901355E), new AdvancedBlendEntry(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x8395475BCB0D7A8C, 0x48AF5DD501D44A70), new AdvancedBlendEntry(AdvancedBlendOp.Plus, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x80AAC23FEBD4A3E5, 0xEA8C70F0B4DE52DE), new AdvancedBlendEntry(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x2F3AD1B0F1B3FD09, 0xC0EBC784BFAB8EA3), new AdvancedBlendEntry(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x52B54032F2F70BFF, 0xC941D6FDED674765), new AdvancedBlendEntry(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0xCA7B86F72EC6A99B, 0x55868A131AFE359E), new AdvancedBlendEntry(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x377919B60BD133CA, 0x0FD611627664EF40), new AdvancedBlendEntry(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x9D4A0C5EE1153887, 0x7B869EBA218C589B), new AdvancedBlendEntry(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x311F2A858545D123, 0xB4D09C802480AD62), new AdvancedBlendEntry(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0xCF78AA6A83AFA689, 0x9DC48B0C2182A3E1), new AdvancedBlendEntry(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0xC3018CD6F1CF62D1, 0x016E32DD9087B1BB), new AdvancedBlendEntry(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.2605f, 0.2605f, 0.2605f), new RgbFloat(-0.7817f, -0.7817f, -0.7817f), new RgbFloat(0.3022f, 0.3022f, 0.3022f), new RgbFloat(0.2192f, 0.2192f, 0.2192f), new RgbFloat(0.25f, 0.25f, 0.25f), new RgbFloat(16f, 16f, 16f), new RgbFloat(12f, 12f, 12f), new RgbFloat(3f, 3f, 3f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x9CB62CE0E956EE29, 0x0FB67F503E60B3AD), new AdvancedBlendEntry(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x3589A13C16EF3BFA, 0x15B29BFC91F3BDFB), new AdvancedBlendEntry(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x3502CA5FB7529917, 0xFA51BFD0D1688071), new AdvancedBlendEntry(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0x62ADC25AD6D0A923, 0x76CB6D238276D3A3), new AdvancedBlendEntry(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x09FDEB1116A9D52C, 0x85BB8627CD5C2733), new AdvancedBlendEntry(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x0709FED1B65E18EB, 0x5BC3AA4D99EC19CF), new AdvancedBlendEntry(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0xB18D28AE5DE4C723, 0xE820AA2B75C9C02E), new AdvancedBlendEntry(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(2f, 2f, 2f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x6743C51621497480, 0x4B164E40858834AE), new AdvancedBlendEntry(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x63D1E181E34A2944, 0x1AE292C9D9F12819), new AdvancedBlendEntry(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Disjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x079523298250BFF6, 0xC0C793510603CDB5), new AdvancedBlendEntry(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x4C9D0A973C805EA6, 0xD1FF59AD5156B93C), new AdvancedBlendEntry(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x1E914678F3057BCD, 0xD503AE389C12D229), new AdvancedBlendEntry(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0x9FDBADE5556C5311, 0x03F0CBC798FC5C94), new AdvancedBlendEntry(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Disjoint, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0xE39451534635403C, 0x606CC1CA1F452388), new AdvancedBlendEntry(AdvancedBlendOp.Src, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) }, + { new Hash128(0x1D39F0F0A1008AA6, 0xBFDF2B97E6C3F125), new AdvancedBlendEntry(AdvancedBlendOp.SrcOver, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0xDB81BED30D5BDBEA, 0xAF0B2856EB93AD2C), new AdvancedBlendEntry(AdvancedBlendOp.DstOver, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x83F69CCF1D0A79B6, 0x70D31332797430AC), new AdvancedBlendEntry(AdvancedBlendOp.SrcIn, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MinimumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x7B87F807AB7A8F5C, 0x1241A2A01FB31771), new AdvancedBlendEntry(AdvancedBlendOp.SrcOut, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0xF557172E20D5272D, 0xC1961F8C7A5D2820), new AdvancedBlendEntry(AdvancedBlendOp.SrcAtop, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0xA8476B3944DBBC9B, 0x84A2F6AF97B15FDF), new AdvancedBlendEntry(AdvancedBlendOp.DstAtop, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.OneGl, BlendFactor.ZeroGl)) }, + { new Hash128(0x3259602B55414DA3, 0x72AACCC00B5A9D10), new AdvancedBlendEntry(AdvancedBlendOp.Xor, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, 0, 0, 0)) }, + { new Hash128(0xC0CB8C10F36EDCD6, 0x8C2D088AD8191E1C), new AdvancedBlendEntry(AdvancedBlendOp.Multiply, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x81806C451C6255EF, 0x5AA8AC9A08941A15), new AdvancedBlendEntry(AdvancedBlendOp.Screen, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0xE55A6537F4568198, 0xCA8735390B799B19), new AdvancedBlendEntry(AdvancedBlendOp.Overlay, AdvancedBlendOverlap.Conjoint, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x5C044BA14536DDA3, 0xBCE0123ED7D510EC), new AdvancedBlendEntry(AdvancedBlendOp.Darken, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x6788346C405BE130, 0x372A4BB199C01F9F), new AdvancedBlendEntry(AdvancedBlendOp.Lighten, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x510EDC2A34E2856B, 0xE1727A407E294254), new AdvancedBlendEntry(AdvancedBlendOp.ColorDodge, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x4B7BE01BD398C7A8, 0x5BFF79BC00672C18), new AdvancedBlendEntry(AdvancedBlendOp.ColorBurn, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x213B43845540CFEC, 0xDA857411CF1CCFCE), new AdvancedBlendEntry(AdvancedBlendOp.HardLight, AdvancedBlendOverlap.Conjoint, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x765AFA6732E783F1, 0x8F1CABF1BC78A014), new AdvancedBlendEntry(AdvancedBlendOp.SoftLight, AdvancedBlendOverlap.Conjoint, false, new[] { new RgbFloat(0.2605f, 0.2605f, 0.2605f), new RgbFloat(-0.7817f, -0.7817f, -0.7817f), new RgbFloat(0.3022f, 0.3022f, 0.3022f), new RgbFloat(0.2192f, 0.2192f, 0.2192f), new RgbFloat(0.25f, 0.25f, 0.25f), new RgbFloat(16f, 16f, 16f), new RgbFloat(12f, 12f, 12f), new RgbFloat(3f, 3f, 3f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0xA4A5DE1CC06F6CB1, 0xA0634A0011001709), new AdvancedBlendEntry(AdvancedBlendOp.Difference, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x81F32BD8816EA796, 0x697EE86683165170), new AdvancedBlendEntry(AdvancedBlendOp.Exclusion, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0xB870C209EAA5F092, 0xAF5FD923909CAA1F), new AdvancedBlendEntry(AdvancedBlendOp.InvertRGB, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.AddGl, BlendFactor.ZeroGl, BlendFactor.OneGl)) }, + { new Hash128(0x3649A9F5C936FB83, 0xDD7C834897AA182A), new AdvancedBlendEntry(AdvancedBlendOp.LinearDodge, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0xD72A2B1097A5995C, 0x3D41B2763A913654), new AdvancedBlendEntry(AdvancedBlendOp.LinearBurn, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x551E212B9F6C454A, 0xB0DFA05BEB3C37FA), new AdvancedBlendEntry(AdvancedBlendOp.VividLight, AdvancedBlendOverlap.Conjoint, false, new[] { new RgbFloat(0.5f, 0.5f, 0.5f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x681B5A313B7416BF, 0xCB1CBAEEB4D81500), new AdvancedBlendEntry(AdvancedBlendOp.LinearLight, AdvancedBlendOverlap.Conjoint, false, new[] { new RgbFloat(2f, 2f, 2f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x9343A18BD4B16777, 0xEDB4AC1C8972C3A4), new AdvancedBlendEntry(AdvancedBlendOp.PinLight, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0xC960BF6D8519DE28, 0x78D8557FD405D119), new AdvancedBlendEntry(AdvancedBlendOp.HardMix, AdvancedBlendOverlap.Conjoint, false, Array.Empty<RgbFloat>(), new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x65A7B01FDC73A46C, 0x297E096ED5CC4D8A), new AdvancedBlendEntry(AdvancedBlendOp.HslHue, AdvancedBlendOverlap.Conjoint, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0xD9C99BA4A6CDC13B, 0x3CFF0ACEDC2EE150), new AdvancedBlendEntry(AdvancedBlendOp.HslSaturation, AdvancedBlendOverlap.Conjoint, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x6BC00DA6EB922BD1, 0x5FD4C11F2A685234), new AdvancedBlendEntry(AdvancedBlendOp.HslColor, AdvancedBlendOverlap.Conjoint, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + { new Hash128(0x8652300E32D93050, 0x9460E7B449132371), new AdvancedBlendEntry(AdvancedBlendOp.HslLuminosity, AdvancedBlendOverlap.Conjoint, false, new[] { new RgbFloat(0.3f, 0.59f, 0.11f) }, new FixedFunctionAlpha(BlendUcodeEnable.EnableRGB, BlendOp.MaximumGl, BlendFactor.OneGl, BlendFactor.OneGl)) }, + }; + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendUcode.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendUcode.cs new file mode 100644 index 00000000..f06b4bf7 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/AdvancedBlendUcode.cs @@ -0,0 +1,126 @@ +using Ryujinx.Graphics.GAL; + +namespace Ryujinx.Graphics.Gpu.Engine.Threed.Blender +{ + /// <summary> + /// Fixed function alpha state used for a advanced blend function. + /// </summary> + struct FixedFunctionAlpha + { + /// <summary> + /// Fixed function alpha state with alpha blending disabled. + /// </summary> + public static FixedFunctionAlpha Disabled => new FixedFunctionAlpha(BlendUcodeEnable.EnableRGBA, default, default, default); + + /// <summary> + /// Individual enable bits for the RGB and alpha components. + /// </summary> + public BlendUcodeEnable Enable { get; } + + /// <summary> + /// Alpha blend operation. + /// </summary> + public BlendOp AlphaOp { get; } + + /// <summary> + /// Value multiplied with the blend source operand. + /// </summary> + public BlendFactor AlphaSrcFactor { get; } + + /// <summary> + /// Value multiplied with the blend destination operand. + /// </summary> + public BlendFactor AlphaDstFactor { get; } + + /// <summary> + /// Creates a new blend fixed function alpha state. + /// </summary> + /// <param name="enable">Individual enable bits for the RGB and alpha components</param> + /// <param name="alphaOp">Alpha blend operation</param> + /// <param name="alphaSrc">Value multiplied with the blend source operand</param> + /// <param name="alphaDst">Value multiplied with the blend destination operand</param> + public FixedFunctionAlpha(BlendUcodeEnable enable, BlendOp alphaOp, BlendFactor alphaSrc, BlendFactor alphaDst) + { + Enable = enable; + AlphaOp = alphaOp; + AlphaSrcFactor = alphaSrc; + AlphaDstFactor = alphaDst; + } + + /// <summary> + /// Creates a new blend fixed function alpha state. + /// </summary> + /// <param name="alphaOp">Alpha blend operation</param> + /// <param name="alphaSrc">Value multiplied with the blend source operand</param> + /// <param name="alphaDst">Value multiplied with the blend destination operand</param> + public FixedFunctionAlpha(BlendOp alphaOp, BlendFactor alphaSrc, BlendFactor alphaDst) : this(BlendUcodeEnable.EnableRGB, alphaOp, alphaSrc, alphaDst) + { + } + } + + /// <summary> + /// Blend microcode assembly function delegate. + /// </summary> + /// <param name="asm">Assembler</param> + /// <returns>Fixed function alpha state for the microcode</returns> + delegate FixedFunctionAlpha GenUcodeFunc(ref UcodeAssembler asm); + + /// <summary> + /// Advanced blend microcode state. + /// </summary> + struct AdvancedBlendUcode + { + /// <summary> + /// Advanced blend operation. + /// </summary> + public AdvancedBlendOp Op { get; } + + /// <summary> + /// Advanced blend overlap mode. + /// </summary> + public AdvancedBlendOverlap Overlap { get; } + + /// <summary> + /// Whenever the source input is pre-multiplied. + /// </summary> + public bool SrcPreMultiplied { get; } + + /// <summary> + /// Fixed function alpha state. + /// </summary> + public FixedFunctionAlpha Alpha { get; } + + /// <summary> + /// Microcode. + /// </summary> + public uint[] Code { get; } + + /// <summary> + /// Constants used by the microcode. + /// </summary> + public RgbFloat[] Constants { get; } + + /// <summary> + /// Creates a new advanced blend state. + /// </summary> + /// <param name="op">Advanced blend operation</param> + /// <param name="overlap">Advanced blend overlap mode</param> + /// <param name="srcPreMultiplied">Whenever the source input is pre-multiplied</param> + /// <param name="genFunc">Function that will generate the advanced blend microcode</param> + public AdvancedBlendUcode( + AdvancedBlendOp op, + AdvancedBlendOverlap overlap, + bool srcPreMultiplied, + GenUcodeFunc genFunc) + { + Op = op; + Overlap = overlap; + SrcPreMultiplied = srcPreMultiplied; + + UcodeAssembler asm = new UcodeAssembler(); + Alpha = genFunc(ref asm); + Code = asm.GetCode(); + Constants = asm.GetConstants(); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/UcodeAssembler.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/UcodeAssembler.cs new file mode 100644 index 00000000..f854787e --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/Blender/UcodeAssembler.cs @@ -0,0 +1,305 @@ +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Engine.Threed.Blender +{ + /// <summary> + /// Blend microcode instruction. + /// </summary> + enum Instruction + { + Mmadd = 0, + Mmsub = 1, + Min = 2, + Max = 3, + Rcp = 4, + Add = 5, + Sub = 6 + } + + /// <summary> + /// Blend microcode condition code. + /// </summary> + enum CC + { + F = 0, + T = 1, + EQ = 2, + NE = 3, + LT = 4, + LE = 5, + GT = 6, + GE = 7 + } + + /// <summary> + /// Blend microcode opend B or D value. + /// </summary> + enum OpBD + { + ConstantZero = 0x0, + ConstantOne = 0x1, + SrcRGB = 0x2, + SrcAAA = 0x3, + OneMinusSrcAAA = 0x4, + DstRGB = 0x5, + DstAAA = 0x6, + OneMinusDstAAA = 0x7, + Temp0 = 0x9, + Temp1 = 0xa, + Temp2 = 0xb, + PBR = 0xc, + ConstantRGB = 0xd + } + + /// <summary> + /// Blend microcode operand A or C value. + /// </summary> + enum OpAC + { + SrcRGB = 0, + DstRGB = 1, + SrcAAA = 2, + DstAAA = 3, + Temp0 = 4, + Temp1 = 5, + Temp2 = 6, + PBR = 7 + } + + /// <summary> + /// Blend microcode destination operand. + /// </summary> + enum OpDst + { + Temp0 = 0, + Temp1 = 1, + Temp2 = 2, + PBR = 3 + } + + /// <summary> + /// Blend microcode input swizzle. + /// </summary> + enum Swizzle + { + RGB = 0, + GBR = 1, + RRR = 2, + GGG = 3, + BBB = 4, + RToA = 5 + } + + /// <summary> + /// Blend microcode output components. + /// </summary> + enum WriteMask + { + RGB = 0, + R = 1, + G = 2, + B = 3 + } + + /// <summary> + /// Floating-point RGB color values. + /// </summary> + struct RgbFloat + { + /// <summary> + /// Red component value. + /// </summary> + public float R { get; } + + /// <summary> + /// Green component value. + /// </summary> + public float G { get; } + + /// <summary> + /// Blue component value. + /// </summary> + public float B { get; } + + /// <summary> + /// Creates a new floating-point RGB value. + /// </summary> + /// <param name="r">Red component value</param> + /// <param name="g">Green component value</param> + /// <param name="b">Blue component value</param> + public RgbFloat(float r, float g, float b) + { + R = r; + G = g; + B = b; + } + } + + /// <summary> + /// Blend microcode destination operand, including swizzle, write mask and condition code update flag. + /// </summary> + struct Dest + { + public static Dest Temp0 => new Dest(OpDst.Temp0, Swizzle.RGB, WriteMask.RGB, false); + public static Dest Temp1 => new Dest(OpDst.Temp1, Swizzle.RGB, WriteMask.RGB, false); + public static Dest Temp2 => new Dest(OpDst.Temp2, Swizzle.RGB, WriteMask.RGB, false); + public static Dest PBR => new Dest(OpDst.PBR, Swizzle.RGB, WriteMask.RGB, false); + + public Dest GBR => new Dest(Dst, Swizzle.GBR, WriteMask, WriteCC); + public Dest RRR => new Dest(Dst, Swizzle.RRR, WriteMask, WriteCC); + public Dest GGG => new Dest(Dst, Swizzle.GGG, WriteMask, WriteCC); + public Dest BBB => new Dest(Dst, Swizzle.BBB, WriteMask, WriteCC); + public Dest RToA => new Dest(Dst, Swizzle.RToA, WriteMask, WriteCC); + + public Dest R => new Dest(Dst, Swizzle, WriteMask.R, WriteCC); + public Dest G => new Dest(Dst, Swizzle, WriteMask.G, WriteCC); + public Dest B => new Dest(Dst, Swizzle, WriteMask.B, WriteCC); + + public Dest CC => new Dest(Dst, Swizzle, WriteMask, true); + + public OpDst Dst { get; } + public Swizzle Swizzle { get; } + public WriteMask WriteMask { get; } + public bool WriteCC { get; } + + /// <summary> + /// Creates a new blend microcode destination operand. + /// </summary> + /// <param name="dst">Operand</param> + /// <param name="swizzle">Swizzle</param> + /// <param name="writeMask">Write maks</param> + /// <param name="writeCC">Indicates if condition codes should be updated</param> + public Dest(OpDst dst, Swizzle swizzle, WriteMask writeMask, bool writeCC) + { + Dst = dst; + Swizzle = swizzle; + WriteMask = writeMask; + WriteCC = writeCC; + } + } + + /// <summary> + /// Blend microcode operaiton. + /// </summary> + struct UcodeOp + { + public readonly uint Word; + + /// <summary> + /// Creates a new blend microcode operation. + /// </summary> + /// <param name="cc">Condition code that controls whenever the operation is executed or not</param> + /// <param name="inst">Instruction</param> + /// <param name="constIndex">Index on the constant table of the constant used by any constant operand</param> + /// <param name="dest">Destination operand</param> + /// <param name="srcA">First input operand</param> + /// <param name="srcB">Second input operand</param> + /// <param name="srcC">Third input operand</param> + /// <param name="srcD">Fourth input operand</param> + public UcodeOp(CC cc, Instruction inst, int constIndex, Dest dest, OpAC srcA, OpBD srcB, OpAC srcC, OpBD srcD) + { + Word = (uint)cc | + ((uint)inst << 3) | + ((uint)constIndex << 6) | + ((uint)srcA << 9) | + ((uint)srcB << 12) | + ((uint)srcC << 16) | + ((uint)srcD << 19) | + ((uint)dest.Swizzle << 23) | + ((uint)dest.WriteMask << 26) | + ((uint)dest.Dst << 28) | + (dest.WriteCC ? (1u << 31) : 0); + } + } + + /// <summary> + /// Blend microcode assembler. + /// </summary> + struct UcodeAssembler + { + private List<uint> _code; + private RgbFloat[] _constants; + private int _constantIndex; + + public void Mul(CC cc, Dest dest, OpAC srcA, OpBD srcB) + { + Assemble(cc, Instruction.Mmadd, dest, srcA, srcB, OpAC.SrcRGB, OpBD.ConstantZero); + } + + public void Madd(CC cc, Dest dest, OpAC srcA, OpBD srcB, OpAC srcC) + { + Assemble(cc, Instruction.Mmadd, dest, srcA, srcB, srcC, OpBD.ConstantOne); + } + + public void Mmadd(CC cc, Dest dest, OpAC srcA, OpBD srcB, OpAC srcC, OpBD srcD) + { + Assemble(cc, Instruction.Mmadd, dest, srcA, srcB, srcC, srcD); + } + + public void Mmsub(CC cc, Dest dest, OpAC srcA, OpBD srcB, OpAC srcC, OpBD srcD) + { + Assemble(cc, Instruction.Mmsub, dest, srcA, srcB, srcC, srcD); + } + + public void Min(CC cc, Dest dest, OpAC srcA, OpBD srcB) + { + Assemble(cc, Instruction.Min, dest, srcA, srcB, OpAC.SrcRGB, OpBD.ConstantZero); + } + + public void Max(CC cc, Dest dest, OpAC srcA, OpBD srcB) + { + Assemble(cc, Instruction.Max, dest, srcA, srcB, OpAC.SrcRGB, OpBD.ConstantZero); + } + + public void Rcp(CC cc, Dest dest, OpAC srcA) + { + Assemble(cc, Instruction.Rcp, dest, srcA, OpBD.ConstantZero, OpAC.SrcRGB, OpBD.ConstantZero); + } + + public void Mov(CC cc, Dest dest, OpBD srcB) + { + Assemble(cc, Instruction.Add, dest, OpAC.SrcRGB, srcB, OpAC.SrcRGB, OpBD.ConstantZero); + } + + public void Add(CC cc, Dest dest, OpBD srcB, OpBD srcD) + { + Assemble(cc, Instruction.Add, dest, OpAC.SrcRGB, srcB, OpAC.SrcRGB, srcD); + } + + public void Sub(CC cc, Dest dest, OpBD srcB, OpBD srcD) + { + Assemble(cc, Instruction.Sub, dest, OpAC.SrcRGB, srcB, OpAC.SrcRGB, srcD); + } + + private void Assemble(CC cc, Instruction inst, Dest dest, OpAC srcA, OpBD srcB, OpAC srcC, OpBD srcD) + { + (_code ??= new List<uint>()).Add(new UcodeOp(cc, inst, _constantIndex, dest, srcA, srcB, srcC, srcD).Word); + } + + public void SetConstant(int index, float r, float g, float b) + { + if (_constants == null) + { + _constants = new RgbFloat[index + 1]; + } + else if (_constants.Length <= index) + { + Array.Resize(ref _constants, index + 1); + } + + _constants[index] = new RgbFloat(r, g, b); + _constantIndex = index; + } + + public uint[] GetCode() + { + return _code?.ToArray(); + } + + public RgbFloat[] GetConstants() + { + return _constants; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ConditionalRendering.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ConditionalRendering.cs new file mode 100644 index 00000000..a6b62a4a --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ConditionalRendering.cs @@ -0,0 +1,130 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.Types; +using Ryujinx.Graphics.Gpu.Memory; + +namespace Ryujinx.Graphics.Gpu.Engine.Threed +{ + /// <summary> + /// Helper methods used for conditional rendering. + /// </summary> + static class ConditionalRendering + { + /// <summary> + /// Checks if draws and clears should be performed, according + /// to currently set conditional rendering conditions. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="memoryManager">Memory manager bound to the channel currently executing</param> + /// <param name="address">Conditional rendering buffer address</param> + /// <param name="condition">Conditional rendering condition</param> + /// <returns>True if rendering is enabled, false otherwise</returns> + public static ConditionalRenderEnabled GetRenderEnable(GpuContext context, MemoryManager memoryManager, GpuVa address, Condition condition) + { + switch (condition) + { + case Condition.Always: + return ConditionalRenderEnabled.True; + case Condition.Never: + return ConditionalRenderEnabled.False; + case Condition.ResultNonZero: + return CounterNonZero(context, memoryManager, address.Pack()); + case Condition.Equal: + return CounterCompare(context, memoryManager, address.Pack(), true); + case Condition.NotEqual: + return CounterCompare(context, memoryManager, address.Pack(), false); + } + + Logger.Warning?.Print(LogClass.Gpu, $"Invalid conditional render condition \"{condition}\"."); + + return ConditionalRenderEnabled.True; + } + + /// <summary> + /// Checks if the counter value at a given GPU memory address is non-zero. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="memoryManager">Memory manager bound to the channel currently executing</param> + /// <param name="gpuVa">GPU virtual address of the counter value</param> + /// <returns>True if the value is not zero, false otherwise. Returns host if handling with host conditional rendering</returns> + private static ConditionalRenderEnabled CounterNonZero(GpuContext context, MemoryManager memoryManager, ulong gpuVa) + { + ICounterEvent evt = memoryManager.CounterCache.FindEvent(gpuVa); + + if (evt == null) + { + return ConditionalRenderEnabled.False; + } + + if (context.Renderer.Pipeline.TryHostConditionalRendering(evt, 0L, false)) + { + return ConditionalRenderEnabled.Host; + } + else + { + evt.Flush(); + return (memoryManager.Read<ulong>(gpuVa, true) != 0) ? ConditionalRenderEnabled.True : ConditionalRenderEnabled.False; + } + } + + /// <summary> + /// Checks if the counter at a given GPU memory address passes a specified equality comparison. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="memoryManager">Memory manager bound to the channel currently executing</param> + /// <param name="gpuVa">GPU virtual address</param> + /// <param name="isEqual">True to check if the values are equal, false to check if they are not equal</param> + /// <returns>True if the condition is met, false otherwise. Returns host if handling with host conditional rendering</returns> + private static ConditionalRenderEnabled CounterCompare(GpuContext context, MemoryManager memoryManager, ulong gpuVa, bool isEqual) + { + ICounterEvent evt = FindEvent(memoryManager.CounterCache, gpuVa); + ICounterEvent evt2 = FindEvent(memoryManager.CounterCache, gpuVa + 16); + + bool useHost; + + if (evt != null && evt2 == null) + { + useHost = context.Renderer.Pipeline.TryHostConditionalRendering(evt, memoryManager.Read<ulong>(gpuVa + 16), isEqual); + } + else if (evt == null && evt2 != null) + { + useHost = context.Renderer.Pipeline.TryHostConditionalRendering(evt2, memoryManager.Read<ulong>(gpuVa), isEqual); + } + else if (evt != null && evt2 != null) + { + useHost = context.Renderer.Pipeline.TryHostConditionalRendering(evt, evt2, isEqual); + } + else + { + useHost = false; + } + + if (useHost) + { + return ConditionalRenderEnabled.Host; + } + else + { + evt?.Flush(); + evt2?.Flush(); + + ulong x = memoryManager.Read<ulong>(gpuVa, true); + ulong y = memoryManager.Read<ulong>(gpuVa + 16, true); + + return (isEqual ? x == y : x != y) ? ConditionalRenderEnabled.True : ConditionalRenderEnabled.False; + } + } + + /// <summary> + /// Tries to find a counter that is supposed to be written at the specified address, + /// returning the related event. + /// </summary> + /// <param name="counterCache">GPU counter cache to search on</param> + /// <param name="gpuVa">GPU virtual address where the counter is supposed to be written</param> + /// <returns>The counter event, or null if not present</returns> + private static ICounterEvent FindEvent(CounterCache counterCache, ulong gpuVa) + { + return counterCache.FindEvent(gpuVa); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ConstantBufferUpdater.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ConstantBufferUpdater.cs new file mode 100644 index 00000000..5c936616 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ConstantBufferUpdater.cs @@ -0,0 +1,183 @@ +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Engine.Threed +{ + /// <summary> + /// Constant buffer updater. + /// </summary> + class ConstantBufferUpdater + { + private const int UniformDataCacheSize = 512; + + private readonly GpuChannel _channel; + private readonly DeviceStateWithShadow<ThreedClassState> _state; + + // State associated with direct uniform buffer updates. + // This state is used to attempt to batch together consecutive updates. + private ulong _ubBeginCpuAddress = 0; + private ulong _ubFollowUpAddress = 0; + private ulong _ubByteCount = 0; + private int _ubIndex = 0; + private int[] _ubData = new int[UniformDataCacheSize]; + + /// <summary> + /// Creates a new instance of the constant buffer updater. + /// </summary> + /// <param name="channel">GPU channel</param> + /// <param name="state">Channel state</param> + public ConstantBufferUpdater(GpuChannel channel, DeviceStateWithShadow<ThreedClassState> state) + { + _channel = channel; + _state = state; + } + + /// <summary> + /// Binds a uniform buffer for the vertex shader stage. + /// </summary> + /// <param name="argument">Method call argument</param> + public void BindVertex(int argument) + { + Bind(argument, ShaderType.Vertex); + } + + /// <summary> + /// Binds a uniform buffer for the tessellation control shader stage. + /// </summary> + /// <param name="argument">Method call argument</param> + public void BindTessControl(int argument) + { + Bind(argument, ShaderType.TessellationControl); + } + + /// <summary> + /// Binds a uniform buffer for the tessellation evaluation shader stage. + /// </summary> + /// <param name="argument">Method call argument</param> + public void BindTessEvaluation(int argument) + { + Bind(argument, ShaderType.TessellationEvaluation); + } + + /// <summary> + /// Binds a uniform buffer for the geometry shader stage. + /// </summary> + /// <param name="argument">Method call argument</param> + public void BindGeometry(int argument) + { + Bind(argument, ShaderType.Geometry); + } + + /// <summary> + /// Binds a uniform buffer for the fragment shader stage. + /// </summary> + /// <param name="argument">Method call argument</param> + public void BindFragment(int argument) + { + Bind(argument, ShaderType.Fragment); + } + + /// <summary> + /// Binds a uniform buffer for the specified shader stage. + /// </summary> + /// <param name="argument">Method call argument</param> + /// <param name="type">Shader stage that will access the uniform buffer</param> + private void Bind(int argument, ShaderType type) + { + bool enable = (argument & 1) != 0; + + int index = (argument >> 4) & 0x1f; + + FlushUboDirty(); + + if (enable) + { + var uniformBuffer = _state.State.UniformBufferState; + + ulong address = uniformBuffer.Address.Pack(); + + _channel.BufferManager.SetGraphicsUniformBuffer((int)type, index, address, (uint)uniformBuffer.Size); + } + else + { + _channel.BufferManager.SetGraphicsUniformBuffer((int)type, index, 0, 0); + } + } + + /// <summary> + /// Flushes any queued UBO updates. + /// </summary> + public void FlushUboDirty() + { + if (_ubFollowUpAddress != 0) + { + var memoryManager = _channel.MemoryManager; + + Span<byte> data = MemoryMarshal.Cast<int, byte>(_ubData.AsSpan(0, (int)(_ubByteCount / 4))); + + if (memoryManager.Physical.WriteWithRedundancyCheck(_ubBeginCpuAddress, data)) + { + memoryManager.Physical.BufferCache.ForceDirty(memoryManager, _ubFollowUpAddress - _ubByteCount, _ubByteCount); + } + + _ubFollowUpAddress = 0; + _ubIndex = 0; + } + } + + /// <summary> + /// Updates the uniform buffer data with inline data. + /// </summary> + /// <param name="argument">New uniform buffer data word</param> + public void Update(int argument) + { + var uniformBuffer = _state.State.UniformBufferState; + + ulong address = uniformBuffer.Address.Pack() + (uint)uniformBuffer.Offset; + + if (_ubFollowUpAddress != address || _ubIndex == _ubData.Length) + { + FlushUboDirty(); + + _ubByteCount = 0; + _ubBeginCpuAddress = _channel.MemoryManager.Translate(address); + } + + _ubData[_ubIndex++] = argument; + + _ubFollowUpAddress = address + 4; + _ubByteCount += 4; + + _state.State.UniformBufferState.Offset += 4; + } + + /// <summary> + /// Updates the uniform buffer data with inline data. + /// </summary> + /// <param name="data">Data to be written to the uniform buffer</param> + public void Update(ReadOnlySpan<int> data) + { + var uniformBuffer = _state.State.UniformBufferState; + + ulong address = uniformBuffer.Address.Pack() + (uint)uniformBuffer.Offset; + + ulong size = (ulong)data.Length * 4; + + if (_ubFollowUpAddress != address || _ubIndex + data.Length > _ubData.Length) + { + FlushUboDirty(); + + _ubByteCount = 0; + _ubBeginCpuAddress = _channel.MemoryManager.Translate(address); + } + + data.CopyTo(_ubData.AsSpan(_ubIndex)); + _ubIndex += data.Length; + + _ubFollowUpAddress = address + size; + _ubByteCount += size; + + _state.State.UniformBufferState.Offset += data.Length * 4; + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs new file mode 100644 index 00000000..7438ba03 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs @@ -0,0 +1,856 @@ +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.Types; +using Ryujinx.Graphics.Gpu.Memory; +using System; + +namespace Ryujinx.Graphics.Gpu.Engine.Threed +{ + /// <summary> + /// Draw manager. + /// </summary> + class DrawManager + { + // Since we don't know the index buffer size for indirect draws, + // we must assume a minimum and maximum size and use that for buffer data update purposes. + private const int MinIndirectIndexCount = 0x10000; + private const int MaxIndirectIndexCount = 0x4000000; + + private readonly GpuContext _context; + private readonly GpuChannel _channel; + private readonly DeviceStateWithShadow<ThreedClassState> _state; + private readonly DrawState _drawState; + private readonly SpecializationStateUpdater _currentSpecState; + private bool _topologySet; + + private bool _instancedDrawPending; + private bool _instancedIndexed; + private bool _instancedIndexedInline; + + private int _instancedFirstIndex; + private int _instancedFirstVertex; + private int _instancedFirstInstance; + private int _instancedIndexCount; + private int _instancedDrawStateFirst; + private int _instancedDrawStateCount; + + private int _instanceIndex; + + private const int VertexBufferFirstMethodOffset = 0x35d; + private const int IndexBufferCountMethodOffset = 0x5f8; + + /// <summary> + /// Creates a new instance of the draw manager. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="channel">GPU channel</param> + /// <param name="state">Channel state</param> + /// <param name="drawState">Draw state</param> + /// <param name="spec">Specialization state updater</param> + public DrawManager(GpuContext context, GpuChannel channel, DeviceStateWithShadow<ThreedClassState> state, DrawState drawState, SpecializationStateUpdater spec) + { + _context = context; + _channel = channel; + _state = state; + _drawState = drawState; + _currentSpecState = spec; + } + + /// <summary> + /// Marks the entire state as dirty, forcing a full host state update before the next draw. + /// </summary> + public void ForceStateDirty() + { + _topologySet = false; + } + + /// <summary> + /// Pushes four 8-bit index buffer elements. + /// </summary> + /// <param name="argument">Method call argument</param> + public void VbElementU8(int argument) + { + _drawState.IbStreamer.VbElementU8(_context.Renderer, argument); + } + + /// <summary> + /// Pushes two 16-bit index buffer elements. + /// </summary> + /// <param name="argument">Method call argument</param> + public void VbElementU16(int argument) + { + _drawState.IbStreamer.VbElementU16(_context.Renderer, argument); + } + + /// <summary> + /// Pushes one 32-bit index buffer element. + /// </summary> + /// <param name="argument">Method call argument</param> + public void VbElementU32(int argument) + { + _drawState.IbStreamer.VbElementU32(_context.Renderer, argument); + } + + /// <summary> + /// Finishes the draw call. + /// This draws geometry on the bound buffers based on the current GPU state. + /// </summary> + /// <param name="engine">3D engine where this method is being called</param> + /// <param name="argument">Method call argument</param> + public void DrawEnd(ThreedClass engine, int argument) + { + DrawEnd( + engine, + _state.State.IndexBufferState.First, + (int)_state.State.IndexBufferCount, + _state.State.VertexBufferDrawState.First, + _state.State.VertexBufferDrawState.Count); + } + + /// <summary> + /// Finishes the draw call. + /// This draws geometry on the bound buffers based on the current GPU state. + /// </summary> + /// <param name="engine">3D engine where this method is being called</param> + /// <param name="firstIndex">Index of the first index buffer element used on the draw</param> + /// <param name="indexCount">Number of index buffer elements used on the draw</param> + /// <param name="drawFirstVertex">Index of the first vertex used on the draw</param> + /// <param name="drawVertexCount">Number of vertices used on the draw</param> + private void DrawEnd(ThreedClass engine, int firstIndex, int indexCount, int drawFirstVertex, int drawVertexCount) + { + ConditionalRenderEnabled renderEnable = ConditionalRendering.GetRenderEnable( + _context, + _channel.MemoryManager, + _state.State.RenderEnableAddress, + _state.State.RenderEnableCondition); + + if (renderEnable == ConditionalRenderEnabled.False || _instancedDrawPending) + { + if (renderEnable == ConditionalRenderEnabled.False) + { + PerformDeferredDraws(); + } + + _drawState.DrawIndexed = false; + + if (renderEnable == ConditionalRenderEnabled.Host) + { + _context.Renderer.Pipeline.EndHostConditionalRendering(); + } + + return; + } + + _drawState.FirstIndex = firstIndex; + _drawState.IndexCount = indexCount; + _drawState.DrawFirstVertex = drawFirstVertex; + _drawState.DrawVertexCount = drawVertexCount; + _currentSpecState.SetHasConstantBufferDrawParameters(false); + + engine.UpdateState(); + + bool instanced = _drawState.VsUsesInstanceId || _drawState.IsAnyVbInstanced; + + if (instanced) + { + _instancedDrawPending = true; + + int ibCount = _drawState.IbStreamer.InlineIndexCount; + + _instancedIndexed = _drawState.DrawIndexed; + _instancedIndexedInline = ibCount != 0; + + _instancedFirstIndex = firstIndex; + _instancedFirstVertex = (int)_state.State.FirstVertex; + _instancedFirstInstance = (int)_state.State.FirstInstance; + + _instancedIndexCount = ibCount != 0 ? ibCount : indexCount; + + _instancedDrawStateFirst = drawFirstVertex; + _instancedDrawStateCount = drawVertexCount; + + _drawState.DrawIndexed = false; + + if (renderEnable == ConditionalRenderEnabled.Host) + { + _context.Renderer.Pipeline.EndHostConditionalRendering(); + } + + return; + } + + int firstInstance = (int)_state.State.FirstInstance; + + int inlineIndexCount = _drawState.IbStreamer.GetAndResetInlineIndexCount(_context.Renderer); + + if (inlineIndexCount != 0) + { + int firstVertex = (int)_state.State.FirstVertex; + + BufferRange br = new BufferRange(_drawState.IbStreamer.GetInlineIndexBuffer(), 0, inlineIndexCount * 4); + + _channel.BufferManager.SetIndexBuffer(br, IndexType.UInt); + + _context.Renderer.Pipeline.DrawIndexed(inlineIndexCount, 1, firstIndex, firstVertex, firstInstance); + } + else if (_drawState.DrawIndexed) + { + int firstVertex = (int)_state.State.FirstVertex; + + _context.Renderer.Pipeline.DrawIndexed(indexCount, 1, firstIndex, firstVertex, firstInstance); + } + else + { + var drawState = _state.State.VertexBufferDrawState; + + _context.Renderer.Pipeline.Draw(drawVertexCount, 1, drawFirstVertex, firstInstance); + } + + _drawState.DrawIndexed = false; + + if (renderEnable == ConditionalRenderEnabled.Host) + { + _context.Renderer.Pipeline.EndHostConditionalRendering(); + } + } + + /// <summary> + /// Starts draw. + /// This sets primitive type and instanced draw parameters. + /// </summary> + /// <param name="argument">Method call argument</param> + public void DrawBegin(int argument) + { + bool incrementInstance = (argument & (1 << 26)) != 0; + bool resetInstance = (argument & (1 << 27)) == 0; + + PrimitiveType type = (PrimitiveType)(argument & 0xffff); + DrawBegin(incrementInstance, resetInstance, type); + } + + /// <summary> + /// Starts draw. + /// This sets primitive type and instanced draw parameters. + /// </summary> + /// <param name="incrementInstance">Indicates if the current instance should be incremented</param> + /// <param name="resetInstance">Indicates if the current instance should be set to zero</param> + /// <param name="primitiveType">Primitive type</param> + private void DrawBegin(bool incrementInstance, bool resetInstance, PrimitiveType primitiveType) + { + if (incrementInstance) + { + _instanceIndex++; + } + else if (resetInstance) + { + PerformDeferredDraws(); + + _instanceIndex = 0; + } + + PrimitiveTopology topology; + + if (_state.State.PrimitiveTypeOverrideEnable) + { + PrimitiveTypeOverride typeOverride = _state.State.PrimitiveTypeOverride; + topology = typeOverride.Convert(); + } + else + { + topology = primitiveType.Convert(); + } + + UpdateTopology(topology); + } + + /// <summary> + /// Updates the current primitive topology if needed. + /// </summary> + /// <param name="topology">New primitive topology</param> + private void UpdateTopology(PrimitiveTopology topology) + { + if (_drawState.Topology != topology || !_topologySet) + { + _context.Renderer.Pipeline.SetPrimitiveTopology(topology); + _currentSpecState.SetTopology(topology); + _drawState.Topology = topology; + _topologySet = true; + } + } + + /// <summary> + /// Sets the index buffer count. + /// This also sets internal state that indicates that the next draw is an indexed draw. + /// </summary> + /// <param name="argument">Method call argument</param> + public void SetIndexBufferCount(int argument) + { + _drawState.DrawIndexed = true; + } + + // TODO: Verify if the index type is implied from the method that is called, + // or if it uses the state index type on hardware. + + /// <summary> + /// Performs a indexed draw with 8-bit index buffer elements. + /// </summary> + /// <param name="engine">3D engine where this method is being called</param> + /// <param name="argument">Method call argument</param> + public void DrawIndexBuffer8BeginEndInstanceFirst(ThreedClass engine, int argument) + { + DrawIndexBufferBeginEndInstance(engine, argument, false); + } + + /// <summary> + /// Performs a indexed draw with 16-bit index buffer elements. + /// </summary> + /// <param name="engine">3D engine where this method is being called</param> + /// <param name="argument">Method call argument</param> + public void DrawIndexBuffer16BeginEndInstanceFirst(ThreedClass engine, int argument) + { + DrawIndexBufferBeginEndInstance(engine, argument, false); + } + + /// <summary> + /// Performs a indexed draw with 32-bit index buffer elements. + /// </summary> + /// <param name="engine">3D engine where this method is being called</param> + /// <param name="argument">Method call argument</param> + public void DrawIndexBuffer32BeginEndInstanceFirst(ThreedClass engine, int argument) + { + DrawIndexBufferBeginEndInstance(engine, argument, false); + } + + /// <summary> + /// Performs a indexed draw with 8-bit index buffer elements, + /// while also pre-incrementing the current instance value. + /// </summary> + /// <param name="engine">3D engine where this method is being called</param> + /// <param name="argument">Method call argument</param> + public void DrawIndexBuffer8BeginEndInstanceSubsequent(ThreedClass engine, int argument) + { + DrawIndexBufferBeginEndInstance(engine, argument, true); + } + + /// <summary> + /// Performs a indexed draw with 16-bit index buffer elements, + /// while also pre-incrementing the current instance value. + /// </summary> + /// <param name="engine">3D engine where this method is being called</param> + /// <param name="argument">Method call argument</param> + public void DrawIndexBuffer16BeginEndInstanceSubsequent(ThreedClass engine, int argument) + { + DrawIndexBufferBeginEndInstance(engine, argument, true); + } + + /// <summary> + /// Performs a indexed draw with 32-bit index buffer elements, + /// while also pre-incrementing the current instance value. + /// </summary> + /// <param name="engine">3D engine where this method is being called</param> + /// <param name="argument">Method call argument</param> + public void DrawIndexBuffer32BeginEndInstanceSubsequent(ThreedClass engine, int argument) + { + DrawIndexBufferBeginEndInstance(engine, argument, true); + } + + /// <summary> + /// Performs a indexed draw with a low number of index buffer elements, + /// while optionally also pre-incrementing the current instance value. + /// </summary> + /// <param name="engine">3D engine where this method is being called</param> + /// <param name="argument">Method call argument</param> + /// <param name="instanced">True to increment the current instance value, false otherwise</param> + private void DrawIndexBufferBeginEndInstance(ThreedClass engine, int argument, bool instanced) + { + DrawBegin(instanced, !instanced, (PrimitiveType)((argument >> 28) & 0xf)); + + int firstIndex = argument & 0xffff; + int indexCount = (argument >> 16) & 0xfff; + + bool oldDrawIndexed = _drawState.DrawIndexed; + + _drawState.DrawIndexed = true; + engine.ForceStateDirty(IndexBufferCountMethodOffset * 4); + + DrawEnd(engine, firstIndex, indexCount, 0, 0); + + _drawState.DrawIndexed = oldDrawIndexed; + } + + /// <summary> + /// Performs a non-indexed draw with the specified topology, index and count. + /// </summary> + /// <param name="engine">3D engine where this method is being called</param> + /// <param name="argument">Method call argument</param> + public void DrawVertexArrayBeginEndInstanceFirst(ThreedClass engine, int argument) + { + DrawVertexArrayBeginEndInstance(engine, argument, false); + } + + /// <summary> + /// Performs a non-indexed draw with the specified topology, index and count, + /// while incrementing the current instance. + /// </summary> + /// <param name="engine">3D engine where this method is being called</param> + /// <param name="argument">Method call argument</param> + public void DrawVertexArrayBeginEndInstanceSubsequent(ThreedClass engine, int argument) + { + DrawVertexArrayBeginEndInstance(engine, argument, true); + } + + /// <summary> + /// Performs a indexed draw with a low number of index buffer elements, + /// while optionally also pre-incrementing the current instance value. + /// </summary> + /// <param name="engine">3D engine where this method is being called</param> + /// <param name="argument">Method call argument</param> + /// <param name="instanced">True to increment the current instance value, false otherwise</param> + private void DrawVertexArrayBeginEndInstance(ThreedClass engine, int argument, bool instanced) + { + DrawBegin(instanced, !instanced, (PrimitiveType)((argument >> 28) & 0xf)); + + int firstVertex = argument & 0xffff; + int vertexCount = (argument >> 16) & 0xfff; + + bool oldDrawIndexed = _drawState.DrawIndexed; + + _drawState.DrawIndexed = false; + engine.ForceStateDirty(VertexBufferFirstMethodOffset * 4); + + DrawEnd(engine, 0, 0, firstVertex, vertexCount); + + _drawState.DrawIndexed = oldDrawIndexed; + } + + /// <summary> + /// Performs a texture draw with a source texture and sampler ID, along with source + /// and destination coordinates and sizes. + /// </summary> + /// <param name="engine">3D engine where this method is being called</param> + /// <param name="argument">Method call argument</param> + public void DrawTexture(ThreedClass engine, int argument) + { + static float FixedToFloat(int fixedValue) + { + return fixedValue * (1f / 4096); + } + + float dstX0 = FixedToFloat(_state.State.DrawTextureDstX); + float dstY0 = FixedToFloat(_state.State.DrawTextureDstY); + float dstWidth = FixedToFloat(_state.State.DrawTextureDstWidth); + float dstHeight = FixedToFloat(_state.State.DrawTextureDstHeight); + + // TODO: Confirm behaviour on hardware. + // When this is active, the origin appears to be on the bottom. + if (_state.State.YControl.HasFlag(YControl.NegateY)) + { + dstY0 -= dstHeight; + } + + float dstX1 = dstX0 + dstWidth; + float dstY1 = dstY0 + dstHeight; + + float srcX0 = FixedToFloat(_state.State.DrawTextureSrcX); + float srcY0 = FixedToFloat(_state.State.DrawTextureSrcY); + float srcX1 = ((float)_state.State.DrawTextureDuDx / (1UL << 32)) * dstWidth + srcX0; + float srcY1 = ((float)_state.State.DrawTextureDvDy / (1UL << 32)) * dstHeight + srcY0; + + engine.UpdateState(ulong.MaxValue & ~(1UL << StateUpdater.ShaderStateIndex)); + + _channel.TextureManager.UpdateRenderTargets(); + + int textureId = _state.State.DrawTextureTextureId; + int samplerId = _state.State.DrawTextureSamplerId; + + (var texture, var sampler) = _channel.TextureManager.GetGraphicsTextureAndSampler(textureId, samplerId); + + srcX0 *= texture.ScaleFactor; + srcY0 *= texture.ScaleFactor; + srcX1 *= texture.ScaleFactor; + srcY1 *= texture.ScaleFactor; + + float dstScale = _channel.TextureManager.RenderTargetScale; + + dstX0 *= dstScale; + dstY0 *= dstScale; + dstX1 *= dstScale; + dstY1 *= dstScale; + + _context.Renderer.Pipeline.DrawTexture( + texture?.HostTexture, + sampler?.GetHostSampler(texture), + new Extents2DF(srcX0, srcY0, srcX1, srcY1), + new Extents2DF(dstX0, dstY0, dstX1, dstY1)); + } + + /// <summary> + /// Performs a indexed or non-indexed draw. + /// </summary> + /// <param name="engine">3D engine where this method is being called</param> + /// <param name="topology">Primitive topology</param> + /// <param name="count">Index count for indexed draws, vertex count for non-indexed draws</param> + /// <param name="instanceCount">Instance count</param> + /// <param name="firstIndex">First index on the index buffer for indexed draws, ignored for non-indexed draws</param> + /// <param name="firstVertex">First vertex on the vertex buffer</param> + /// <param name="firstInstance">First instance</param> + /// <param name="indexed">True if the draw is indexed, false otherwise</param> + public void Draw( + ThreedClass engine, + PrimitiveTopology topology, + int count, + int instanceCount, + int firstIndex, + int firstVertex, + int firstInstance, + bool indexed) + { + UpdateTopology(topology); + + ConditionalRenderEnabled renderEnable = ConditionalRendering.GetRenderEnable( + _context, + _channel.MemoryManager, + _state.State.RenderEnableAddress, + _state.State.RenderEnableCondition); + + if (renderEnable == ConditionalRenderEnabled.False) + { + _drawState.DrawIndexed = false; + return; + } + + if (indexed) + { + _drawState.FirstIndex = firstIndex; + _drawState.IndexCount = count; + _state.State.FirstVertex = (uint)firstVertex; + engine.ForceStateDirty(IndexBufferCountMethodOffset * 4); + } + else + { + _drawState.DrawFirstVertex = firstVertex; + _drawState.DrawVertexCount = count; + engine.ForceStateDirty(VertexBufferFirstMethodOffset * 4); + } + + _state.State.FirstInstance = (uint)firstInstance; + + _drawState.DrawIndexed = indexed; + _currentSpecState.SetHasConstantBufferDrawParameters(true); + + engine.UpdateState(); + + if (indexed) + { + _context.Renderer.Pipeline.DrawIndexed(count, instanceCount, firstIndex, firstVertex, firstInstance); + _state.State.FirstVertex = 0; + } + else + { + _context.Renderer.Pipeline.Draw(count, instanceCount, firstVertex, firstInstance); + } + + _state.State.FirstInstance = 0; + + _drawState.DrawIndexed = false; + + if (renderEnable == ConditionalRenderEnabled.Host) + { + _context.Renderer.Pipeline.EndHostConditionalRendering(); + } + } + + /// <summary> + /// Performs a indirect draw, with parameters from a GPU buffer. + /// </summary> + /// <param name="engine">3D engine where this method is being called</param> + /// <param name="topology">Primitive topology</param> + /// <param name="indirectBufferAddress">Address of the buffer with the draw parameters, such as count, first index, etc</param> + /// <param name="parameterBufferAddress">Address of the buffer with the draw count</param> + /// <param name="maxDrawCount">Maximum number of draws that can be made</param> + /// <param name="stride">Distance in bytes between each entry on the data pointed to by <paramref name="indirectBufferAddress"/></param> + /// <param name="indexCount">Maximum number of indices that the draw can consume</param> + /// <param name="drawType">Type of the indirect draw, which can be indexed or non-indexed, with or without a draw count</param> + public void DrawIndirect( + ThreedClass engine, + PrimitiveTopology topology, + ulong indirectBufferAddress, + ulong parameterBufferAddress, + int maxDrawCount, + int stride, + int indexCount, + IndirectDrawType drawType) + { + UpdateTopology(topology); + + ConditionalRenderEnabled renderEnable = ConditionalRendering.GetRenderEnable( + _context, + _channel.MemoryManager, + _state.State.RenderEnableAddress, + _state.State.RenderEnableCondition); + + if (renderEnable == ConditionalRenderEnabled.False) + { + _drawState.DrawIndexed = false; + return; + } + + PhysicalMemory memory = _channel.MemoryManager.Physical; + + bool hasCount = (drawType & IndirectDrawType.Count) != 0; + bool indexed = (drawType & IndirectDrawType.Indexed) != 0; + + if (indexed) + { + indexCount = Math.Clamp(indexCount, MinIndirectIndexCount, MaxIndirectIndexCount); + _drawState.FirstIndex = 0; + _drawState.IndexCount = indexCount; + engine.ForceStateDirty(IndexBufferCountMethodOffset * 4); + } + + _drawState.DrawIndexed = indexed; + _drawState.DrawIndirect = true; + _currentSpecState.SetHasConstantBufferDrawParameters(true); + + engine.UpdateState(); + + if (hasCount) + { + var indirectBuffer = memory.BufferCache.GetBufferRange(indirectBufferAddress, (ulong)maxDrawCount * (ulong)stride); + var parameterBuffer = memory.BufferCache.GetBufferRange(parameterBufferAddress, 4); + + if (indexed) + { + _context.Renderer.Pipeline.DrawIndexedIndirectCount(indirectBuffer, parameterBuffer, maxDrawCount, stride); + } + else + { + _context.Renderer.Pipeline.DrawIndirectCount(indirectBuffer, parameterBuffer, maxDrawCount, stride); + } + } + else + { + var indirectBuffer = memory.BufferCache.GetBufferRange(indirectBufferAddress, (ulong)stride); + + if (indexed) + { + _context.Renderer.Pipeline.DrawIndexedIndirect(indirectBuffer); + } + else + { + _context.Renderer.Pipeline.DrawIndirect(indirectBuffer); + } + } + + _drawState.DrawIndexed = false; + _drawState.DrawIndirect = false; + + if (renderEnable == ConditionalRenderEnabled.Host) + { + _context.Renderer.Pipeline.EndHostConditionalRendering(); + } + } + + /// <summary> + /// Perform any deferred draws. + /// This is used for instanced draws. + /// Since each instance is a separate draw, we defer the draw and accumulate the instance count. + /// Once we detect the last instanced draw, then we perform the host instanced draw, + /// with the accumulated instance count. + /// </summary> + public void PerformDeferredDraws() + { + // Perform any pending instanced draw. + if (_instancedDrawPending) + { + _instancedDrawPending = false; + + bool indexedInline = _instancedIndexedInline; + + if (_instancedIndexed || indexedInline) + { + if (indexedInline) + { + int inlineIndexCount = _drawState.IbStreamer.GetAndResetInlineIndexCount(_context.Renderer); + BufferRange br = new BufferRange(_drawState.IbStreamer.GetInlineIndexBuffer(), 0, inlineIndexCount * 4); + + _channel.BufferManager.SetIndexBuffer(br, IndexType.UInt); + } + + _context.Renderer.Pipeline.DrawIndexed( + _instancedIndexCount, + _instanceIndex + 1, + _instancedFirstIndex, + _instancedFirstVertex, + _instancedFirstInstance); + } + else + { + _context.Renderer.Pipeline.Draw( + _instancedDrawStateCount, + _instanceIndex + 1, + _instancedDrawStateFirst, + _instancedFirstInstance); + } + } + } + + /// <summary> + /// Clears the current color and depth-stencil buffers. + /// Which buffers should be cleared can also be specified with the argument. + /// </summary> + /// <param name="engine">3D engine where this method is being called</param> + /// <param name="argument">Method call argument</param> + public void Clear(ThreedClass engine, int argument) + { + Clear(engine, argument, 1); + } + + /// <summary> + /// Clears the current color and depth-stencil buffers. + /// Which buffers should be cleared can also specified with the arguments. + /// </summary> + /// <param name="engine">3D engine where this method is being called</param> + /// <param name="argument">Method call argument</param> + /// <param name="layerCount">For array and 3D textures, indicates how many layers should be cleared</param> + public void Clear(ThreedClass engine, int argument, int layerCount) + { + ConditionalRenderEnabled renderEnable = ConditionalRendering.GetRenderEnable( + _context, + _channel.MemoryManager, + _state.State.RenderEnableAddress, + _state.State.RenderEnableCondition); + + if (renderEnable == ConditionalRenderEnabled.False) + { + return; + } + + bool clearDepth = (argument & 1) != 0; + bool clearStencil = (argument & 2) != 0; + uint componentMask = (uint)((argument >> 2) & 0xf); + int index = (argument >> 6) & 0xf; + int layer = (argument >> 10) & 0x3ff; + + RenderTargetUpdateFlags updateFlags = RenderTargetUpdateFlags.SingleColor; + + if (layer != 0 || layerCount > 1) + { + updateFlags |= RenderTargetUpdateFlags.Layered; + } + + if (clearDepth || clearStencil) + { + updateFlags |= RenderTargetUpdateFlags.UpdateDepthStencil; + } + + engine.UpdateRenderTargetState(updateFlags, singleUse: componentMask != 0 ? index : -1); + + // If there is a mismatch on the host clip region and the one explicitly defined by the guest + // on the screen scissor state, then we need to force only one texture to be bound to avoid + // host clipping. + var screenScissorState = _state.State.ScreenScissorState; + + // Must happen after UpdateRenderTargetState to have up-to-date clip region values. + bool clipMismatch = (screenScissorState.X | screenScissorState.Y) != 0 || + screenScissorState.Width != _channel.TextureManager.ClipRegionWidth || + screenScissorState.Height != _channel.TextureManager.ClipRegionHeight; + + bool clearAffectedByStencilMask = (_state.State.ClearFlags & 1) != 0; + bool clearAffectedByScissor = (_state.State.ClearFlags & 0x100) != 0; + bool needsCustomScissor = !clearAffectedByScissor || clipMismatch; + + // Scissor and rasterizer discard also affect clears. + ulong updateMask = 1UL << StateUpdater.RasterizerStateIndex; + + if (!needsCustomScissor) + { + updateMask |= 1UL << StateUpdater.ScissorStateIndex; + } + + engine.UpdateState(updateMask); + + if (needsCustomScissor) + { + int scissorX = screenScissorState.X; + int scissorY = screenScissorState.Y; + int scissorW = screenScissorState.Width; + int scissorH = screenScissorState.Height; + + if (clearAffectedByScissor && _state.State.ScissorState[0].Enable) + { + ref var scissorState = ref _state.State.ScissorState[0]; + + scissorX = Math.Max(scissorX, scissorState.X1); + scissorY = Math.Max(scissorY, scissorState.Y1); + scissorW = Math.Min(scissorW, scissorState.X2 - scissorState.X1); + scissorH = Math.Min(scissorH, scissorState.Y2 - scissorState.Y1); + } + + float scale = _channel.TextureManager.RenderTargetScale; + if (scale != 1f) + { + scissorX = (int)(scissorX * scale); + scissorY = (int)(scissorY * scale); + scissorW = (int)MathF.Ceiling(scissorW * scale); + scissorH = (int)MathF.Ceiling(scissorH * scale); + } + + Span<Rectangle<int>> scissors = stackalloc Rectangle<int>[] + { + new Rectangle<int>(scissorX, scissorY, scissorW, scissorH) + }; + + _context.Renderer.Pipeline.SetScissors(scissors); + } + + _channel.TextureManager.UpdateRenderTargets(); + + if (componentMask != 0) + { + var clearColor = _state.State.ClearColors; + + ColorF color = new ColorF(clearColor.Red, clearColor.Green, clearColor.Blue, clearColor.Alpha); + + _context.Renderer.Pipeline.ClearRenderTargetColor(index, layer, layerCount, componentMask, color); + } + + if (clearDepth || clearStencil) + { + float depthValue = _state.State.ClearDepthValue; + int stencilValue = (int)_state.State.ClearStencilValue; + + int stencilMask = 0; + + if (clearStencil) + { + stencilMask = clearAffectedByStencilMask ? _state.State.StencilTestState.FrontMask : 0xff; + } + + if (clipMismatch) + { + _channel.TextureManager.UpdateRenderTargetDepthStencil(); + } + + _context.Renderer.Pipeline.ClearRenderTargetDepthStencil( + layer, + layerCount, + depthValue, + clearDepth, + stencilValue, + stencilMask); + } + + if (needsCustomScissor) + { + engine.UpdateScissorState(); + } + + engine.UpdateRenderTargetState(RenderTargetUpdateFlags.UpdateAll); + + if (renderEnable == ConditionalRenderEnabled.Host) + { + _context.Renderer.Pipeline.EndHostConditionalRendering(); + } + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawState.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawState.cs new file mode 100644 index 00000000..42ec2442 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawState.cs @@ -0,0 +1,65 @@ +using Ryujinx.Graphics.GAL; + +namespace Ryujinx.Graphics.Gpu.Engine.Threed +{ + /// <summary> + /// Draw state. + /// </summary> + class DrawState + { + /// <summary> + /// First index to be used for the draw on the index buffer. + /// </summary> + public int FirstIndex; + + /// <summary> + /// Number of indices to be used for the draw on the index buffer. + /// </summary> + public int IndexCount; + + /// <summary> + /// First vertex used on non-indexed draws. This value is stored somewhere else on indexed draws. + /// </summary> + public int DrawFirstVertex; + + /// <summary> + /// Vertex count used on non-indexed draws. Indexed draws have a index count instead. + /// </summary> + public int DrawVertexCount; + + /// <summary> + /// Indicates if the next draw will be a indexed draw. + /// </summary> + public bool DrawIndexed; + + /// <summary> + /// Indicates if the next draw will be a indirect draw. + /// </summary> + public bool DrawIndirect; + + /// <summary> + /// Indicates if any of the currently used vertex shaders reads the instance ID. + /// </summary> + public bool VsUsesInstanceId; + + /// <summary> + /// Indicates if any of the currently used vertex buffers is instanced. + /// </summary> + public bool IsAnyVbInstanced; + + /// <summary> + /// Indicates that the draw is writing the base vertex, base instance and draw index to Constant Buffer 0. + /// </summary> + public bool HasConstantBufferDrawParameters; + + /// <summary> + /// Primitive topology for the next draw. + /// </summary> + public PrimitiveTopology Topology; + + /// <summary> + /// Index buffer data streamer for inline index buffer updates, such as those used in legacy OpenGL. + /// </summary> + public IbStreamer IbStreamer = new IbStreamer(); + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/IbStreamer.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/IbStreamer.cs new file mode 100644 index 00000000..80d8c00b --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/IbStreamer.cs @@ -0,0 +1,194 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.GAL; +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Engine.Threed +{ + /// <summary> + /// Holds inline index buffer state. + /// The inline index buffer data is sent to the GPU through the command buffer. + /// </summary> + struct IbStreamer + { + private const int BufferCapacity = 256; // Must be a power of 2. + + private BufferHandle _inlineIndexBuffer; + private int _inlineIndexBufferSize; + private int _inlineIndexCount; + private uint[] _buffer; + private int _bufferOffset; + + /// <summary> + /// Indicates if any index buffer data has been pushed. + /// </summary> + public bool HasInlineIndexData => _inlineIndexCount != 0; + + /// <summary> + /// Total numbers of indices that have been pushed. + /// </summary> + public int InlineIndexCount => _inlineIndexCount; + + /// <summary> + /// Gets the handle for the host buffer currently holding the inline index buffer data. + /// </summary> + /// <returns>Host buffer handle</returns> + public BufferHandle GetInlineIndexBuffer() + { + return _inlineIndexBuffer; + } + + /// <summary> + /// Gets the number of elements on the current inline index buffer, + /// while also reseting it to zero for the next draw. + /// </summary> + /// <param name="renderer">Host renderer</param> + /// <returns>Inline index bufffer count</returns> + public int GetAndResetInlineIndexCount(IRenderer renderer) + { + UpdateRemaining(renderer); + int temp = _inlineIndexCount; + _inlineIndexCount = 0; + return temp; + } + + /// <summary> + /// Pushes four 8-bit index buffer elements. + /// </summary> + /// <param name="renderer">Host renderer</param> + /// <param name="argument">Method call argument</param> + public void VbElementU8(IRenderer renderer, int argument) + { + byte i0 = (byte)argument; + byte i1 = (byte)(argument >> 8); + byte i2 = (byte)(argument >> 16); + byte i3 = (byte)(argument >> 24); + + int offset = _inlineIndexCount; + + PushData(renderer, offset, i0); + PushData(renderer, offset + 1, i1); + PushData(renderer, offset + 2, i2); + PushData(renderer, offset + 3, i3); + + _inlineIndexCount += 4; + } + + /// <summary> + /// Pushes two 16-bit index buffer elements. + /// </summary> + /// <param name="renderer">Host renderer</param> + /// <param name="argument">Method call argument</param> + public void VbElementU16(IRenderer renderer, int argument) + { + ushort i0 = (ushort)argument; + ushort i1 = (ushort)(argument >> 16); + + int offset = _inlineIndexCount; + + PushData(renderer, offset, i0); + PushData(renderer, offset + 1, i1); + + _inlineIndexCount += 2; + } + + /// <summary> + /// Pushes one 32-bit index buffer element. + /// </summary> + /// <param name="renderer">Host renderer</param> + /// <param name="argument">Method call argument</param> + public void VbElementU32(IRenderer renderer, int argument) + { + uint i0 = (uint)argument; + + int offset = _inlineIndexCount++; + + PushData(renderer, offset, i0); + } + + /// <summary> + /// Pushes a 32-bit value to the index buffer. + /// </summary> + /// <param name="renderer">Host renderer</param> + /// <param name="offset">Offset where the data should be written, in 32-bit words</param> + /// <param name="value">Index value to be written</param> + private void PushData(IRenderer renderer, int offset, uint value) + { + if (_buffer == null) + { + _buffer = new uint[BufferCapacity]; + } + + // We upload data in chunks. + // If we are at the start of a chunk, then the buffer might be full, + // in that case we need to submit any existing data before overwriting the buffer. + int subOffset = offset & (BufferCapacity - 1); + + if (subOffset == 0 && offset != 0) + { + int baseOffset = (offset - BufferCapacity) * sizeof(uint); + BufferHandle buffer = GetInlineIndexBuffer(renderer, baseOffset, BufferCapacity * sizeof(uint)); + renderer.SetBufferData(buffer, baseOffset, MemoryMarshal.Cast<uint, byte>(_buffer)); + } + + _buffer[subOffset] = value; + } + + /// <summary> + /// Makes sure that any pending data is submitted to the GPU before the index buffer is used. + /// </summary> + /// <param name="renderer">Host renderer</param> + private void UpdateRemaining(IRenderer renderer) + { + int offset = _inlineIndexCount; + if (offset == 0) + { + return; + } + + int count = offset & (BufferCapacity - 1); + if (count == 0) + { + count = BufferCapacity; + } + + int baseOffset = (offset - count) * sizeof(uint); + int length = count * sizeof(uint); + BufferHandle buffer = GetInlineIndexBuffer(renderer, baseOffset, length); + renderer.SetBufferData(buffer, baseOffset, MemoryMarshal.Cast<uint, byte>(_buffer).Slice(0, length)); + } + + /// <summary> + /// Gets the handle of a buffer large enough to hold the data that will be written to <paramref name="offset"/>. + /// </summary> + /// <param name="renderer">Host renderer</param> + /// <param name="offset">Offset where the data will be written</param> + /// <param name="length">Number of bytes that will be written</param> + /// <returns>Buffer handle</returns> + private BufferHandle GetInlineIndexBuffer(IRenderer renderer, int offset, int length) + { + // Calculate a reasonable size for the buffer that can fit all the data, + // and that also won't require frequent resizes if we need to push more data. + int size = BitUtils.AlignUp(offset + length + 0x10, 0x200); + + if (_inlineIndexBuffer == BufferHandle.Null) + { + _inlineIndexBuffer = renderer.CreateBuffer(size); + _inlineIndexBufferSize = size; + } + else if (_inlineIndexBufferSize < size) + { + BufferHandle oldBuffer = _inlineIndexBuffer; + int oldSize = _inlineIndexBufferSize; + + _inlineIndexBuffer = renderer.CreateBuffer(size); + _inlineIndexBufferSize = size; + + renderer.Pipeline.CopyBuffer(oldBuffer, _inlineIndexBuffer, 0, 0, oldSize); + renderer.DeleteBuffer(oldBuffer); + } + + return _inlineIndexBuffer; + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/IndirectDrawType.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/IndirectDrawType.cs new file mode 100644 index 00000000..d78aa498 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/IndirectDrawType.cs @@ -0,0 +1,38 @@ +namespace Ryujinx.Graphics.Gpu.Engine.Threed +{ + /// <summary> + /// Indirect draw type, which can be indexed or non-indexed, with or without a draw count. + /// </summary> + enum IndirectDrawType + { + /// <summary> + /// Non-indexed draw without draw count. + /// </summary> + DrawIndirect = 0, + + /// <summary> + /// Indexed draw without draw count. + /// </summary> + DrawIndexedIndirect = Indexed, + + /// <summary> + /// Non-indexed draw with draw count. + /// </summary> + DrawIndirectCount = Count, + + /// <summary> + /// Indexed draw with draw count. + /// </summary> + DrawIndexedIndirectCount = Indexed | Count, + + /// <summary> + /// Indexed flag. + /// </summary> + Indexed = 1 << 0, + + /// <summary> + /// Draw count flag. + /// </summary> + Count = 1 << 1 + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/RenderTargetUpdateFlags.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/RenderTargetUpdateFlags.cs new file mode 100644 index 00000000..cf2e818c --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/RenderTargetUpdateFlags.cs @@ -0,0 +1,41 @@ +using System; + +namespace Ryujinx.Graphics.Gpu.Engine.Threed +{ + /// <summary> + /// Flags indicating how the render targets should be updated. + /// </summary> + [Flags] + enum RenderTargetUpdateFlags + { + /// <summary> + /// No flags. + /// </summary> + None = 0, + + /// <summary> + /// Get render target index from the control register. + /// </summary> + UseControl = 1 << 0, + + /// <summary> + /// Indicates that all render targets are 2D array textures. + /// </summary> + Layered = 1 << 1, + + /// <summary> + /// Indicates that only a single color target will be used. + /// </summary> + SingleColor = 1 << 2, + + /// <summary> + /// Indicates that the depth-stencil target will be used. + /// </summary> + UpdateDepthStencil = 1 << 3, + + /// <summary> + /// Default update flags for draw. + /// </summary> + UpdateAll = UseControl | UpdateDepthStencil + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/SemaphoreUpdater.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/SemaphoreUpdater.cs new file mode 100644 index 00000000..63a2c841 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/SemaphoreUpdater.cs @@ -0,0 +1,190 @@ +using Ryujinx.Graphics.GAL; +using System; + +namespace Ryujinx.Graphics.Gpu.Engine.Threed +{ + /// <summary> + /// Semaphore updater. + /// </summary> + class SemaphoreUpdater + { + /// <summary> + /// GPU semaphore operation. + /// </summary> + private enum SemaphoreOperation + { + Release = 0, + Acquire = 1, + Counter = 2 + } + + /// <summary> + /// Counter type for GPU counter reset. + /// </summary> + private enum ResetCounterType + { + SamplesPassed = 1, + ZcullStats = 2, + TransformFeedbackPrimitivesWritten = 0x10, + InputVertices = 0x12, + InputPrimitives = 0x13, + VertexShaderInvocations = 0x15, + TessControlShaderInvocations = 0x16, + TessEvaluationShaderInvocations = 0x17, + TessEvaluationShaderPrimitives = 0x18, + GeometryShaderInvocations = 0x1a, + GeometryShaderPrimitives = 0x1b, + ClipperInputPrimitives = 0x1c, + ClipperOutputPrimitives = 0x1d, + FragmentShaderInvocations = 0x1e, + PrimitivesGenerated = 0x1f + } + + /// <summary> + /// Counter type for GPU counter reporting. + /// </summary> + private enum ReportCounterType + { + Payload = 0, + InputVertices = 1, + InputPrimitives = 3, + VertexShaderInvocations = 5, + GeometryShaderInvocations = 7, + GeometryShaderPrimitives = 9, + ZcullStats0 = 0xa, + TransformFeedbackPrimitivesWritten = 0xb, + ZcullStats1 = 0xc, + ZcullStats2 = 0xe, + ClipperInputPrimitives = 0xf, + ZcullStats3 = 0x10, + ClipperOutputPrimitives = 0x11, + PrimitivesGenerated = 0x12, + FragmentShaderInvocations = 0x13, + SamplesPassed = 0x15, + TransformFeedbackOffset = 0x1a, + TessControlShaderInvocations = 0x1b, + TessEvaluationShaderInvocations = 0x1d, + TessEvaluationShaderPrimitives = 0x1f + } + + private readonly GpuContext _context; + private readonly GpuChannel _channel; + private readonly DeviceStateWithShadow<ThreedClassState> _state; + + /// <summary> + /// Creates a new instance of the semaphore updater. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="channel">GPU channel</param> + /// <param name="state">Channel state</param> + public SemaphoreUpdater(GpuContext context, GpuChannel channel, DeviceStateWithShadow<ThreedClassState> state) + { + _context = context; + _channel = channel; + _state = state; + } + + /// <summary> + /// Resets the value of an internal GPU counter back to zero. + /// </summary> + /// <param name="argument">Method call argument</param> + public void ResetCounter(int argument) + { + ResetCounterType type = (ResetCounterType)argument; + + switch (type) + { + case ResetCounterType.SamplesPassed: + _context.Renderer.ResetCounter(CounterType.SamplesPassed); + break; + case ResetCounterType.PrimitivesGenerated: + _context.Renderer.ResetCounter(CounterType.PrimitivesGenerated); + break; + case ResetCounterType.TransformFeedbackPrimitivesWritten: + _context.Renderer.ResetCounter(CounterType.TransformFeedbackPrimitivesWritten); + break; + } + } + + /// <summary> + /// Writes a GPU counter to guest memory. + /// </summary> + /// <param name="argument">Method call argument</param> + public void Report(int argument) + { + SemaphoreOperation op = (SemaphoreOperation)(argument & 3); + ReportCounterType type = (ReportCounterType)((argument >> 23) & 0x1f); + + switch (op) + { + case SemaphoreOperation.Release: ReleaseSemaphore(); break; + case SemaphoreOperation.Counter: ReportCounter(type); break; + } + } + + /// <summary> + /// Writes (or Releases) a GPU semaphore value to guest memory. + /// </summary> + private void ReleaseSemaphore() + { + _channel.MemoryManager.Write(_state.State.SemaphoreAddress.Pack(), _state.State.SemaphorePayload); + + _context.AdvanceSequence(); + } + + /// <summary> + /// Packed GPU counter data (including GPU timestamp) in memory. + /// </summary> + private struct CounterData + { + public ulong Counter; + public ulong Timestamp; + } + + /// <summary> + /// Writes a GPU counter to guest memory. + /// This also writes the current timestamp value. + /// </summary> + /// <param name="type">Counter to be written to memory</param> + private void ReportCounter(ReportCounterType type) + { + ulong gpuVa = _state.State.SemaphoreAddress.Pack(); + + ulong ticks = _context.GetTimestamp(); + + ICounterEvent counter = null; + + void resultHandler(object evt, ulong result) + { + CounterData counterData = new CounterData + { + Counter = result, + Timestamp = ticks + }; + + if (counter?.Invalid != true) + { + _channel.MemoryManager.Write(gpuVa, counterData); + } + } + + switch (type) + { + case ReportCounterType.Payload: + resultHandler(null, (ulong)_state.State.SemaphorePayload); + break; + case ReportCounterType.SamplesPassed: + counter = _context.Renderer.ReportCounter(CounterType.SamplesPassed, resultHandler, false); + break; + case ReportCounterType.PrimitivesGenerated: + counter = _context.Renderer.ReportCounter(CounterType.PrimitivesGenerated, resultHandler, false); + break; + case ReportCounterType.TransformFeedbackPrimitivesWritten: + counter = _context.Renderer.ReportCounter(CounterType.TransformFeedbackPrimitivesWritten, resultHandler, false); + break; + } + + _channel.MemoryManager.CounterCache.AddOrUpdate(gpuVa, counter); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/SpecializationStateUpdater.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/SpecializationStateUpdater.cs new file mode 100644 index 00000000..a8af5497 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/SpecializationStateUpdater.cs @@ -0,0 +1,346 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.Types; +using Ryujinx.Graphics.Gpu.Shader; +using Ryujinx.Graphics.Shader; + +namespace Ryujinx.Graphics.Gpu.Engine.Threed +{ + /// <summary> + /// Maintains a "current" specialiation state, and provides a flag to check if it has changed meaningfully. + /// </summary> + internal class SpecializationStateUpdater + { + private readonly GpuContext _context; + private GpuChannelGraphicsState _graphics; + private GpuChannelPoolState _pool; + + private bool _usesDrawParameters; + private bool _usesTopology; + + private bool _changed; + + /// <summary> + /// Creates a new instance of the specialization state updater class. + /// </summary> + /// <param name="context">GPU context</param> + public SpecializationStateUpdater(GpuContext context) + { + _context = context; + } + + /// <summary> + /// Signal that the specialization state has changed. + /// </summary> + private void Signal() + { + _changed = true; + } + + /// <summary> + /// Checks if the specialization state has changed since the last check. + /// </summary> + /// <returns>True if it has changed, false otherwise</returns> + public bool HasChanged() + { + if (_changed) + { + _changed = false; + return true; + } + else + { + return false; + } + } + + /// <summary> + /// Sets the active shader, clearing the dirty state and recording if certain specializations are noteworthy. + /// </summary> + /// <param name="gs">The active shader</param> + public void SetShader(CachedShaderProgram gs) + { + _usesDrawParameters = gs.Shaders[1]?.Info.UsesDrawParameters ?? false; + _usesTopology = gs.SpecializationState.IsPrimitiveTopologyQueried(); + + _changed = false; + } + + /// <summary> + /// Get the current graphics state. + /// </summary> + /// <returns>GPU graphics state</returns> + public ref GpuChannelGraphicsState GetGraphicsState() + { + return ref _graphics; + } + + /// <summary> + /// Get the current pool state. + /// </summary> + /// <returns>GPU pool state</returns> + public ref GpuChannelPoolState GetPoolState() + { + return ref _pool; + } + + /// <summary> + /// Early Z force enable. + /// </summary> + /// <param name="value">The new value</param> + public void SetEarlyZForce(bool value) + { + _graphics.EarlyZForce = value; + + Signal(); + } + + /// <summary> + /// Primitive topology of current draw. + /// </summary> + /// <param name="value">The new value</param> + public void SetTopology(PrimitiveTopology value) + { + if (value != _graphics.Topology) + { + _graphics.Topology = value; + + if (_usesTopology) + { + Signal(); + } + } + } + + /// <summary> + /// Tessellation mode. + /// </summary> + /// <param name="value">The new value</param> + public void SetTessellationMode(TessMode value) + { + if (value.Packed != _graphics.TessellationMode.Packed) + { + _graphics.TessellationMode = value; + + Signal(); + } + } + + /// <summary> + /// Updates alpha-to-coverage state, and sets it as changed. + /// </summary> + /// <param name="enable">Whether alpha-to-coverage is enabled</param> + /// <param name="ditherEnable">Whether alpha-to-coverage dithering is enabled</param> + public void SetAlphaToCoverageEnable(bool enable, bool ditherEnable) + { + _graphics.AlphaToCoverageEnable = enable; + _graphics.AlphaToCoverageDitherEnable = ditherEnable; + + Signal(); + } + + /// <summary> + /// Indicates whether the viewport transform is disabled. + /// </summary> + /// <param name="value">The new value</param> + public void SetViewportTransformDisable(bool value) + { + if (value != _graphics.ViewportTransformDisable) + { + _graphics.ViewportTransformDisable = value; + + Signal(); + } + } + + /// <summary> + /// Depth mode zero to one or minus one to one. + /// </summary> + /// <param name="value">The new value</param> + public void SetDepthMode(bool value) + { + if (value != _graphics.DepthMode) + { + _graphics.DepthMode = value; + + Signal(); + } + } + + /// <summary> + /// Indicates if the point size is set on the shader or is fixed. + /// </summary> + /// <param name="value">The new value</param> + public void SetProgramPointSizeEnable(bool value) + { + if (value != _graphics.ProgramPointSizeEnable) + { + _graphics.ProgramPointSizeEnable = value; + + Signal(); + } + } + + /// <summary> + /// Point size used if <see cref="SetProgramPointSizeEnable" /> is provided false. + /// </summary> + /// <param name="value">The new value</param> + public void SetPointSize(float value) + { + if (value != _graphics.PointSize) + { + _graphics.PointSize = value; + + Signal(); + } + } + + /// <summary> + /// Updates alpha test specialization state, and sets it as changed. + /// </summary> + /// <param name="enable">Whether alpha test is enabled</param> + /// <param name="reference">The value to compare with the fragment output alpha</param> + /// <param name="op">The comparison that decides if the fragment should be discarded</param> + public void SetAlphaTest(bool enable, float reference, CompareOp op) + { + _graphics.AlphaTestEnable = enable; + _graphics.AlphaTestReference = reference; + _graphics.AlphaTestCompare = op; + + Signal(); + } + + /// <summary> + /// Updates the type of the vertex attributes consumed by the shader. + /// </summary> + /// <param name="state">The new state</param> + public void SetAttributeTypes(ref Array32<VertexAttribState> state) + { + bool changed = false; + ref Array32<AttributeType> attributeTypes = ref _graphics.AttributeTypes; + + for (int location = 0; location < state.Length; location++) + { + VertexAttribType type = state[location].UnpackType(); + + AttributeType value = type switch + { + VertexAttribType.Sint => AttributeType.Sint, + VertexAttribType.Uint => AttributeType.Uint, + _ => AttributeType.Float + }; + + if (attributeTypes[location] != value) + { + attributeTypes[location] = value; + changed = true; + } + } + + if (changed) + { + Signal(); + } + } + + /// <summary> + /// Updates the type of the outputs produced by the fragment shader based on the current render target state. + /// </summary> + /// <param name="rtControl">The render target control register</param> + /// <param name="state">The color attachment state</param> + public void SetFragmentOutputTypes(RtControl rtControl, ref Array8<RtColorState> state) + { + bool changed = false; + int count = rtControl.UnpackCount(); + + for (int index = 0; index < Constants.TotalRenderTargets; index++) + { + int rtIndex = rtControl.UnpackPermutationIndex(index); + + var colorState = state[rtIndex]; + + if (index < count && StateUpdater.IsRtEnabled(colorState)) + { + Format format = colorState.Format.Convert().Format; + + AttributeType type = format.IsInteger() ? (format.IsSint() ? AttributeType.Sint : AttributeType.Uint) : AttributeType.Float; + + if (type != _graphics.FragmentOutputTypes[index]) + { + _graphics.FragmentOutputTypes[index] = type; + changed = true; + } + } + } + + if (changed && _context.Capabilities.NeedsFragmentOutputSpecialization) + { + Signal(); + } + } + + /// <summary> + /// Indicates that the draw is writing the base vertex, base instance and draw index to Constant Buffer 0. + /// </summary> + /// <param name="value">The new value</param> + public void SetHasConstantBufferDrawParameters(bool value) + { + if (value != _graphics.HasConstantBufferDrawParameters) + { + _graphics.HasConstantBufferDrawParameters = value; + + if (_usesDrawParameters) + { + Signal(); + } + } + } + + /// <summary> + /// Indicates that any storage buffer use is unaligned. + /// </summary> + /// <param name="value">The new value</param> + /// <returns>True if the unaligned state changed, false otherwise</returns> + public bool SetHasUnalignedStorageBuffer(bool value) + { + if (value != _graphics.HasUnalignedStorageBuffer) + { + _graphics.HasUnalignedStorageBuffer = value; + + Signal(); + + return true; + } + + return false; + } + + /// <summary> + /// Sets the GPU pool state. + /// </summary> + /// <param name="state">The new state</param> + public void SetPoolState(GpuChannelPoolState state) + { + if (!state.Equals(_pool)) + { + _pool = state; + + Signal(); + } + } + + /// <summary> + /// Sets the dual-source blend enabled state. + /// </summary> + /// <param name="enabled">True if blending is enabled and using dual-source blend</param> + public void SetDualSourceBlendEnabled(bool enabled) + { + if (enabled != _graphics.DualSourceBlendEnable) + { + _graphics.DualSourceBlendEnable = enabled; + + Signal(); + } + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdateTracker.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdateTracker.cs new file mode 100644 index 00000000..7c730967 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdateTracker.cs @@ -0,0 +1,177 @@ +using Ryujinx.Graphics.Device; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Engine.Threed +{ + /// <summary> + /// State update callback entry, with the callback function and associated field names. + /// </summary> + readonly struct StateUpdateCallbackEntry + { + /// <summary> + /// Callback function, to be called if the register was written as the state needs to be updated. + /// </summary> + public Action Callback { get; } + + /// <summary> + /// Name of the state fields (registers) associated with the callback function. + /// </summary> + public string[] FieldNames { get; } + + /// <summary> + /// Creates a new state update callback entry. + /// </summary> + /// <param name="callback">Callback function, to be called if the register was written as the state needs to be updated</param> + /// <param name="fieldNames">Name of the state fields (registers) associated with the callback function</param> + public StateUpdateCallbackEntry(Action callback, params string[] fieldNames) + { + Callback = callback; + FieldNames = fieldNames; + } + } + + /// <summary> + /// GPU state update tracker. + /// </summary> + /// <typeparam name="TState">State type</typeparam> + class StateUpdateTracker<[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicFields)] TState> + { + private const int BlockSize = 0xe00; + private const int RegisterSize = sizeof(uint); + + private readonly byte[] _registerToGroupMapping; + private readonly Action[] _callbacks; + private ulong _dirtyMask; + + /// <summary> + /// Creates a new instance of the state update tracker. + /// </summary> + /// <param name="entries">Update tracker callback entries</param> + public StateUpdateTracker(StateUpdateCallbackEntry[] entries) + { + _registerToGroupMapping = new byte[BlockSize]; + _callbacks = new Action[entries.Length]; + + var fieldToDelegate = new Dictionary<string, int>(); + + for (int entryIndex = 0; entryIndex < entries.Length; entryIndex++) + { + var entry = entries[entryIndex]; + + foreach (var fieldName in entry.FieldNames) + { + fieldToDelegate.Add(fieldName, entryIndex); + } + + _callbacks[entryIndex] = entry.Callback; + } + + var fields = typeof(TState).GetFields(); + int offset = 0; + + for (int fieldIndex = 0; fieldIndex < fields.Length; fieldIndex++) + { + var field = fields[fieldIndex]; + + int sizeOfField = SizeCalculator.SizeOf(field.FieldType); + + if (fieldToDelegate.TryGetValue(field.Name, out int entryIndex)) + { + for (int i = 0; i < ((sizeOfField + 3) & ~3); i += 4) + { + _registerToGroupMapping[(offset + i) / RegisterSize] = (byte)(entryIndex + 1); + } + } + + offset += sizeOfField; + } + + Debug.Assert(offset == Unsafe.SizeOf<TState>()); + } + + /// <summary> + /// Sets a register as modified. + /// </summary> + /// <param name="offset">Register offset in bytes</param> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void SetDirty(int offset) + { + uint index = (uint)offset / RegisterSize; + + if (index < BlockSize) + { + int groupIndex = Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(_registerToGroupMapping), (IntPtr)index); + if (groupIndex != 0) + { + groupIndex--; + _dirtyMask |= 1UL << groupIndex; + } + } + } + + /// <summary> + /// Forces a register group as dirty, by index. + /// </summary> + /// <param name="groupIndex">Index of the group to be dirtied</param> + public void ForceDirty(int groupIndex) + { + if ((uint)groupIndex >= _callbacks.Length) + { + throw new ArgumentOutOfRangeException(nameof(groupIndex)); + } + + _dirtyMask |= 1UL << groupIndex; + } + + /// <summary> + /// Forces all register groups as dirty, triggering a full update on the next call to <see cref="Update"/>. + /// </summary> + public void SetAllDirty() + { + Debug.Assert(_callbacks.Length <= sizeof(ulong) * 8); + _dirtyMask = ulong.MaxValue >> ((sizeof(ulong) * 8) - _callbacks.Length); + } + + /// <summary> + /// Check if the given register group is dirty without clearing it. + /// </summary> + /// <param name="groupIndex">Index of the group to check</param> + /// <returns>True if dirty, false otherwise</returns> + public bool IsDirty(int groupIndex) + { + return (_dirtyMask & (1UL << groupIndex)) != 0; + } + + /// <summary> + /// Check all the groups specified by <paramref name="checkMask"/> for modification, and update if modified. + /// </summary> + /// <param name="checkMask">Mask, where each bit set corresponds to a group index that should be checked</param> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Update(ulong checkMask) + { + ulong mask = _dirtyMask & checkMask; + if (mask == 0) + { + return; + } + + do + { + int groupIndex = BitOperations.TrailingZeroCount(mask); + + _callbacks[groupIndex](); + + mask &= ~(1UL << groupIndex); + } + while (mask != 0); + + _dirtyMask &= ~checkMask; + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs new file mode 100644 index 00000000..00e09a31 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs @@ -0,0 +1,1448 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.Threed.Blender; +using Ryujinx.Graphics.Gpu.Engine.Types; +using Ryujinx.Graphics.Gpu.Image; +using Ryujinx.Graphics.Gpu.Shader; +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Texture; +using System; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Gpu.Engine.Threed +{ + /// <summary> + /// GPU state updater. + /// </summary> + class StateUpdater + { + public const int ShaderStateIndex = 26; + public const int RasterizerStateIndex = 15; + public const int ScissorStateIndex = 16; + public const int VertexBufferStateIndex = 0; + public const int PrimitiveRestartStateIndex = 12; + public const int RenderTargetStateIndex = 27; + + private readonly GpuContext _context; + private readonly GpuChannel _channel; + private readonly DeviceStateWithShadow<ThreedClassState> _state; + private readonly DrawState _drawState; + private readonly AdvancedBlendManager _blendManager; + + private readonly StateUpdateTracker<ThreedClassState> _updateTracker; + + private readonly ShaderProgramInfo[] _currentProgramInfo; + private ShaderSpecializationState _shaderSpecState; + private SpecializationStateUpdater _currentSpecState; + + private ProgramPipelineState _pipeline; + + private bool _vsUsesDrawParameters; + private bool _vtgWritesRtLayer; + private byte _vsClipDistancesWritten; + private uint _vbEnableMask; + + private bool _prevDrawIndexed; + private bool _prevDrawIndirect; + private IndexType _prevIndexType; + private uint _prevFirstVertex; + private bool _prevTfEnable; + + private uint _prevRtNoAlphaMask; + + /// <summary> + /// Creates a new instance of the state updater. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="channel">GPU channel</param> + /// <param name="state">3D engine state</param> + /// <param name="drawState">Draw state</param> + /// <param name="blendManager">Advanced blend manager</param> + /// <param name="spec">Specialization state updater</param> + public StateUpdater( + GpuContext context, + GpuChannel channel, + DeviceStateWithShadow<ThreedClassState> state, + DrawState drawState, + AdvancedBlendManager blendManager, + SpecializationStateUpdater spec) + { + _context = context; + _channel = channel; + _state = state; + _drawState = drawState; + _blendManager = blendManager; + _currentProgramInfo = new ShaderProgramInfo[Constants.ShaderStages]; + _currentSpecState = spec; + + // ShaderState must be updated after other state updates, as specialization/pipeline state is used when fetching shaders. + // Render target state must appear after shader state as it depends on information from the currently bound shader. + // Rasterizer and scissor states are checked by render target clear, their indexes + // must be updated on the constants "RasterizerStateIndex" and "ScissorStateIndex" if modified. + // The vertex buffer state may be forced dirty when a indexed draw starts, the "VertexBufferStateIndex" + // constant must be updated if modified. + // The order of the other state updates doesn't matter. + _updateTracker = new StateUpdateTracker<ThreedClassState>(new[] + { + new StateUpdateCallbackEntry(UpdateVertexBufferState, + nameof(ThreedClassState.VertexBufferDrawState), + nameof(ThreedClassState.VertexBufferInstanced), + nameof(ThreedClassState.VertexBufferState), + nameof(ThreedClassState.VertexBufferEndAddress)), + + // Must be done after vertex buffer updates. + new StateUpdateCallbackEntry(UpdateVertexAttribState, nameof(ThreedClassState.VertexAttribState)), + + new StateUpdateCallbackEntry(UpdateBlendState, + nameof(ThreedClassState.BlendUcodeEnable), + nameof(ThreedClassState.BlendUcodeSize), + nameof(ThreedClassState.BlendIndependent), + nameof(ThreedClassState.BlendConstant), + nameof(ThreedClassState.BlendStateCommon), + nameof(ThreedClassState.BlendEnableCommon), + nameof(ThreedClassState.BlendEnable), + nameof(ThreedClassState.BlendState)), + + new StateUpdateCallbackEntry(UpdateFaceState, nameof(ThreedClassState.FaceState)), + + new StateUpdateCallbackEntry(UpdateStencilTestState, + nameof(ThreedClassState.StencilBackMasks), + nameof(ThreedClassState.StencilTestState), + nameof(ThreedClassState.StencilBackTestState)), + + new StateUpdateCallbackEntry(UpdateDepthTestState, + nameof(ThreedClassState.DepthTestEnable), + nameof(ThreedClassState.DepthWriteEnable), + nameof(ThreedClassState.DepthTestFunc)), + + new StateUpdateCallbackEntry(UpdateTessellationState, + nameof(ThreedClassState.TessMode), + nameof(ThreedClassState.TessOuterLevel), + nameof(ThreedClassState.TessInnerLevel), + nameof(ThreedClassState.PatchVertices)), + + new StateUpdateCallbackEntry(UpdateViewportTransform, + nameof(ThreedClassState.DepthMode), + nameof(ThreedClassState.ViewportTransform), + nameof(ThreedClassState.ViewportExtents), + nameof(ThreedClassState.YControl), + nameof(ThreedClassState.ViewportTransformEnable)), + + new StateUpdateCallbackEntry(UpdateLogicOpState, nameof(ThreedClassState.LogicOpState)), + + new StateUpdateCallbackEntry(UpdateDepthClampState, nameof(ThreedClassState.ViewVolumeClipControl)), + + new StateUpdateCallbackEntry(UpdatePolygonMode, + nameof(ThreedClassState.PolygonModeFront), + nameof(ThreedClassState.PolygonModeBack)), + + new StateUpdateCallbackEntry(UpdateDepthBiasState, + nameof(ThreedClassState.DepthBiasState), + nameof(ThreedClassState.DepthBiasFactor), + nameof(ThreedClassState.DepthBiasUnits), + nameof(ThreedClassState.DepthBiasClamp)), + + new StateUpdateCallbackEntry(UpdatePrimitiveRestartState, nameof(ThreedClassState.PrimitiveRestartState)), + + new StateUpdateCallbackEntry(UpdateLineState, + nameof(ThreedClassState.LineWidthSmooth), + nameof(ThreedClassState.LineSmoothEnable)), + + new StateUpdateCallbackEntry(UpdateRtColorMask, + nameof(ThreedClassState.RtColorMaskShared), + nameof(ThreedClassState.RtColorMask)), + + new StateUpdateCallbackEntry(UpdateRasterizerState, nameof(ThreedClassState.RasterizeEnable)), + + new StateUpdateCallbackEntry(UpdateScissorState, + nameof(ThreedClassState.ScissorState), + nameof(ThreedClassState.ScreenScissorState)), + + new StateUpdateCallbackEntry(UpdateTfBufferState, nameof(ThreedClassState.TfBufferState)), + new StateUpdateCallbackEntry(UpdateUserClipState, nameof(ThreedClassState.ClipDistanceEnable)), + + new StateUpdateCallbackEntry(UpdateAlphaTestState, + nameof(ThreedClassState.AlphaTestEnable), + nameof(ThreedClassState.AlphaTestRef), + nameof(ThreedClassState.AlphaTestFunc)), + + new StateUpdateCallbackEntry(UpdateSamplerPoolState, + nameof(ThreedClassState.SamplerPoolState), + nameof(ThreedClassState.SamplerIndex)), + + new StateUpdateCallbackEntry(UpdateTexturePoolState, nameof(ThreedClassState.TexturePoolState)), + + new StateUpdateCallbackEntry(UpdatePointState, + nameof(ThreedClassState.PointSize), + nameof(ThreedClassState.VertexProgramPointSize), + nameof(ThreedClassState.PointSpriteEnable), + nameof(ThreedClassState.PointCoordReplace)), + + new StateUpdateCallbackEntry(UpdateIndexBufferState, + nameof(ThreedClassState.IndexBufferState), + nameof(ThreedClassState.IndexBufferCount)), + + new StateUpdateCallbackEntry(UpdateMultisampleState, + nameof(ThreedClassState.AlphaToCoverageDitherEnable), + nameof(ThreedClassState.MultisampleControl)), + + new StateUpdateCallbackEntry(UpdateEarlyZState, + nameof(ThreedClassState.EarlyZForce)), + + new StateUpdateCallbackEntry(UpdateShaderState, + nameof(ThreedClassState.ShaderBaseAddress), + nameof(ThreedClassState.ShaderState)), + + new StateUpdateCallbackEntry(UpdateRenderTargetState, + nameof(ThreedClassState.RtColorState), + nameof(ThreedClassState.RtDepthStencilState), + nameof(ThreedClassState.RtControl), + nameof(ThreedClassState.RtDepthStencilSize), + nameof(ThreedClassState.RtDepthStencilEnable)), + }); + } + + /// <summary> + /// Sets a register at a specific offset as dirty. + /// This must be called if the register value was modified. + /// </summary> + /// <param name="offset">Register offset</param> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void SetDirty(int offset) + { + _updateTracker.SetDirty(offset); + } + + /// <summary> + /// Force all the guest state to be marked as dirty. + /// The next call to <see cref="Update"/> will update all the host state. + /// </summary> + public void SetAllDirty() + { + _updateTracker.SetAllDirty(); + } + + /// <summary> + /// Updates host state for any modified guest state, since the last time this function was called. + /// </summary> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Update() + { + // The vertex buffer size is calculated using a different + // method when doing indexed draws, so we need to make sure + // to update the vertex buffers if we are doing a regular + // draw after a indexed one and vice-versa. + if (_drawState.DrawIndexed != _prevDrawIndexed) + { + _updateTracker.ForceDirty(VertexBufferStateIndex); + + // If PrimitiveRestartDrawArrays is false and this is a non-indexed draw, we need to ensure primitive restart is disabled. + // If PrimitiveRestartDrawArrays is false and this is a indexed draw, we need to ensure primitive restart enable matches GPU state. + // If PrimitiveRestartDrawArrays is true, then primitive restart enable should always match GPU state. + // That is because "PrimitiveRestartDrawArrays" is not configurable on the backend, it is always + // true on OpenGL and always false on Vulkan. + if (!_state.State.PrimitiveRestartDrawArrays && _state.State.PrimitiveRestartState.Enable) + { + _updateTracker.ForceDirty(PrimitiveRestartStateIndex); + } + + _prevDrawIndexed = _drawState.DrawIndexed; + } + + // Some draw parameters are used to restrict the vertex buffer size, + // but they can't be used on indirect draws because their values are unknown in this case. + // When switching between indirect and non-indirect draw, we need to + // make sure the vertex buffer sizes are still correct. + if (_drawState.DrawIndirect != _prevDrawIndirect) + { + _updateTracker.ForceDirty(VertexBufferStateIndex); + } + + // In some cases, the index type is also used to guess the + // vertex buffer size, so we must update it if the type changed too. + if (_drawState.DrawIndexed && + (_prevIndexType != _state.State.IndexBufferState.Type || + _prevFirstVertex != _state.State.FirstVertex)) + { + _updateTracker.ForceDirty(VertexBufferStateIndex); + _prevIndexType = _state.State.IndexBufferState.Type; + _prevFirstVertex = _state.State.FirstVertex; + } + + bool tfEnable = _state.State.TfEnable; + + if (!tfEnable && _prevTfEnable) + { + _context.Renderer.Pipeline.EndTransformFeedback(); + _prevTfEnable = false; + } + + if (_updateTracker.IsDirty(RenderTargetStateIndex)) + { + UpdateRenderTargetSpecialization(); + } + + _updateTracker.Update(ulong.MaxValue); + + // If any state that the shader depends on changed, + // then we may need to compile/bind a different version + // of the shader for the new state. + if (_shaderSpecState != null && _currentSpecState.HasChanged()) + { + if (!_shaderSpecState.MatchesGraphics(_channel, ref _currentSpecState.GetPoolState(), ref _currentSpecState.GetGraphicsState(), _vsUsesDrawParameters, false)) + { + // Shader must be reloaded. _vtgWritesRtLayer should not change. + UpdateShaderState(); + } + } + + CommitBindings(); + + if (tfEnable && !_prevTfEnable) + { + _context.Renderer.Pipeline.BeginTransformFeedback(_drawState.Topology); + _prevTfEnable = true; + } + } + + /// <summary> + /// Updates the host state for any modified guest state group with the respective bit set on <paramref name="mask"/>. + /// </summary> + /// <param name="mask">Mask, where each bit set corresponds to a group index that should be checked and updated</param> + public void Update(ulong mask) + { + _updateTracker.Update(mask); + } + + /// <summary> + /// Ensures that the bindings are visible to the host GPU. + /// Note: this actually performs the binding using the host graphics API. + /// </summary> + private void CommitBindings() + { + UpdateStorageBuffers(); + + bool unalignedChanged = _currentSpecState.SetHasUnalignedStorageBuffer(_channel.BufferManager.HasUnalignedStorageBuffers); + + if (!_channel.TextureManager.CommitGraphicsBindings(_shaderSpecState) || unalignedChanged) + { + // Shader must be reloaded. _vtgWritesRtLayer should not change. + UpdateShaderState(); + } + + _channel.BufferManager.CommitGraphicsBindings(); + } + + /// <summary> + /// Updates storage buffer bindings. + /// </summary> + private void UpdateStorageBuffers() + { + for (int stage = 0; stage < Constants.ShaderStages; stage++) + { + ShaderProgramInfo info = _currentProgramInfo[stage]; + + if (info == null) + { + continue; + } + + for (int index = 0; index < info.SBuffers.Count; index++) + { + BufferDescriptor sb = info.SBuffers[index]; + + ulong sbDescAddress = _channel.BufferManager.GetGraphicsUniformBufferAddress(stage, 0); + + int sbDescOffset = 0x110 + stage * 0x100 + sb.Slot * 0x10; + + sbDescAddress += (ulong)sbDescOffset; + + SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress); + + _channel.BufferManager.SetGraphicsStorageBuffer(stage, sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags); + } + } + } + + /// <summary> + /// Updates tessellation state based on the guest GPU state. + /// </summary> + private void UpdateTessellationState() + { + _pipeline.PatchControlPoints = (uint)_state.State.PatchVertices; + + _context.Renderer.Pipeline.SetPatchParameters( + _state.State.PatchVertices, + _state.State.TessOuterLevel.AsSpan(), + _state.State.TessInnerLevel.AsSpan()); + + _currentSpecState.SetTessellationMode(_state.State.TessMode); + } + + /// <summary> + /// Updates transform feedback buffer state based on the guest GPU state. + /// </summary> + private void UpdateTfBufferState() + { + for (int index = 0; index < Constants.TotalTransformFeedbackBuffers; index++) + { + TfBufferState tfb = _state.State.TfBufferState[index]; + + if (!tfb.Enable) + { + _channel.BufferManager.SetTransformFeedbackBuffer(index, 0, 0); + + continue; + } + + _channel.BufferManager.SetTransformFeedbackBuffer(index, tfb.Address.Pack(), (uint)tfb.Size); + } + } + + /// <summary> + /// Updates Rasterizer primitive discard state based on guest gpu state. + /// </summary> + private void UpdateRasterizerState() + { + bool enable = _state.State.RasterizeEnable; + _pipeline.RasterizerDiscard = !enable; + _context.Renderer.Pipeline.SetRasterizerDiscard(!enable); + } + + /// <summary> + /// Updates render targets (color and depth-stencil buffers) based on current render target state. + /// </summary> + private void UpdateRenderTargetState() + { + UpdateRenderTargetState(RenderTargetUpdateFlags.UpdateAll); + } + + /// <summary> + /// Updates render targets (color and depth-stencil buffers) based on current render target state. + /// </summary> + /// <param name="updateFlags">Flags indicating which render targets should be updated and how</param> + /// <param name="singleUse">If this is not -1, it indicates that only the given indexed target will be used.</param> + public void UpdateRenderTargetState(RenderTargetUpdateFlags updateFlags, int singleUse = -1) + { + var memoryManager = _channel.MemoryManager; + var rtControl = _state.State.RtControl; + + bool useControl = updateFlags.HasFlag(RenderTargetUpdateFlags.UseControl); + bool layered = updateFlags.HasFlag(RenderTargetUpdateFlags.Layered); + bool singleColor = updateFlags.HasFlag(RenderTargetUpdateFlags.SingleColor); + + int count = useControl ? rtControl.UnpackCount() : Constants.TotalRenderTargets; + + var msaaMode = _state.State.RtMsaaMode; + + int samplesInX = msaaMode.SamplesInX(); + int samplesInY = msaaMode.SamplesInY(); + + var scissor = _state.State.ScreenScissorState; + Size sizeHint = new Size((scissor.X + scissor.Width) * samplesInX, (scissor.Y + scissor.Height) * samplesInY, 1); + + int clipRegionWidth = int.MaxValue; + int clipRegionHeight = int.MaxValue; + + bool changedScale = false; + uint rtNoAlphaMask = 0; + + for (int index = 0; index < Constants.TotalRenderTargets; index++) + { + int rtIndex = useControl ? rtControl.UnpackPermutationIndex(index) : index; + + var colorState = _state.State.RtColorState[rtIndex]; + + if (index >= count || !IsRtEnabled(colorState) || (singleColor && index != singleUse)) + { + changedScale |= _channel.TextureManager.SetRenderTargetColor(index, null); + + continue; + } + + if (colorState.Format.NoAlpha()) + { + rtNoAlphaMask |= 1u << index; + } + + Image.Texture color = memoryManager.Physical.TextureCache.FindOrCreateTexture( + memoryManager, + colorState, + _vtgWritesRtLayer || layered, + samplesInX, + samplesInY, + sizeHint); + + changedScale |= _channel.TextureManager.SetRenderTargetColor(index, color); + + if (color != null) + { + if (clipRegionWidth > color.Width / samplesInX) + { + clipRegionWidth = color.Width / samplesInX; + } + + if (clipRegionHeight > color.Height / samplesInY) + { + clipRegionHeight = color.Height / samplesInY; + } + } + } + + bool dsEnable = _state.State.RtDepthStencilEnable; + + Image.Texture depthStencil = null; + + if (dsEnable && updateFlags.HasFlag(RenderTargetUpdateFlags.UpdateDepthStencil)) + { + var dsState = _state.State.RtDepthStencilState; + var dsSize = _state.State.RtDepthStencilSize; + + depthStencil = memoryManager.Physical.TextureCache.FindOrCreateTexture( + memoryManager, + dsState, + dsSize, + _vtgWritesRtLayer || layered, + samplesInX, + samplesInY, + sizeHint); + + if (depthStencil != null) + { + if (clipRegionWidth > depthStencil.Width / samplesInX) + { + clipRegionWidth = depthStencil.Width / samplesInX; + } + + if (clipRegionHeight > depthStencil.Height / samplesInY) + { + clipRegionHeight = depthStencil.Height / samplesInY; + } + } + } + + changedScale |= _channel.TextureManager.SetRenderTargetDepthStencil(depthStencil); + + if (changedScale) + { + float oldScale = _channel.TextureManager.RenderTargetScale; + _channel.TextureManager.UpdateRenderTargetScale(singleUse); + + if (oldScale != _channel.TextureManager.RenderTargetScale) + { + _context.Renderer.Pipeline.SetRenderTargetScale(_channel.TextureManager.RenderTargetScale); + + UpdateViewportTransform(); + UpdateScissorState(); + } + } + + _channel.TextureManager.SetClipRegion(clipRegionWidth, clipRegionHeight); + + if (useControl && _prevRtNoAlphaMask != rtNoAlphaMask) + { + _prevRtNoAlphaMask = rtNoAlphaMask; + + UpdateBlendState(); + } + } + + /// <summary> + /// Updates specialization state based on render target state. + /// </summary> + public void UpdateRenderTargetSpecialization() + { + _currentSpecState.SetFragmentOutputTypes(_state.State.RtControl, ref _state.State.RtColorState); + } + + /// <summary> + /// Checks if a render target color buffer is used. + /// </summary> + /// <param name="colorState">Color buffer information</param> + /// <returns>True if the specified buffer is enabled/used, false otherwise</returns> + internal static bool IsRtEnabled(RtColorState colorState) + { + // Colors are disabled by writing 0 to the format. + return colorState.Format != 0 && colorState.WidthOrStride != 0; + } + + /// <summary> + /// Updates host scissor test state based on current GPU state. + /// </summary> + public void UpdateScissorState() + { + const int MinX = 0; + const int MinY = 0; + const int MaxW = 0xffff; + const int MaxH = 0xffff; + + Span<Rectangle<int>> regions = stackalloc Rectangle<int>[Constants.TotalViewports]; + + for (int index = 0; index < Constants.TotalViewports; index++) + { + ScissorState scissor = _state.State.ScissorState[index]; + + bool enable = scissor.Enable && (scissor.X1 != MinX || + scissor.Y1 != MinY || + scissor.X2 != MaxW || + scissor.Y2 != MaxH); + + if (enable) + { + int x = scissor.X1; + int y = scissor.Y1; + int width = scissor.X2 - x; + int height = scissor.Y2 - y; + + if (_state.State.YControl.HasFlag(YControl.NegateY)) + { + ref var screenScissor = ref _state.State.ScreenScissorState; + y = screenScissor.Height - height - y; + + if (y < 0) + { + height += y; + y = 0; + } + } + + float scale = _channel.TextureManager.RenderTargetScale; + if (scale != 1f) + { + x = (int)(x * scale); + y = (int)(y * scale); + width = (int)MathF.Ceiling(width * scale); + height = (int)MathF.Ceiling(height * scale); + } + + regions[index] = new Rectangle<int>(x, y, width, height); + } + else + { + regions[index] = new Rectangle<int>(MinX, MinY, MaxW, MaxH); + } + } + + _context.Renderer.Pipeline.SetScissors(regions); + } + + /// <summary> + /// Updates host depth clamp state based on current GPU state. + /// </summary> + /// <param name="state">Current GPU state</param> + private void UpdateDepthClampState() + { + ViewVolumeClipControl clip = _state.State.ViewVolumeClipControl; + bool clamp = (clip & ViewVolumeClipControl.DepthClampDisabled) == 0; + + _pipeline.DepthClampEnable = clamp; + _context.Renderer.Pipeline.SetDepthClamp(clamp); + } + + /// <summary> + /// Updates host alpha test state based on current GPU state. + /// </summary> + private void UpdateAlphaTestState() + { + _context.Renderer.Pipeline.SetAlphaTest( + _state.State.AlphaTestEnable, + _state.State.AlphaTestRef, + _state.State.AlphaTestFunc); + + _currentSpecState.SetAlphaTest( + _state.State.AlphaTestEnable, + _state.State.AlphaTestRef, + _state.State.AlphaTestFunc); + } + + /// <summary> + /// Updates host depth test state based on current GPU state. + /// </summary> + private void UpdateDepthTestState() + { + DepthTestDescriptor descriptor = new DepthTestDescriptor( + _state.State.DepthTestEnable, + _state.State.DepthWriteEnable, + _state.State.DepthTestFunc); + + _pipeline.DepthTest = descriptor; + _context.Renderer.Pipeline.SetDepthTest(descriptor); + } + + /// <summary> + /// Updates host viewport transform and clipping state based on current GPU state. + /// </summary> + private void UpdateViewportTransform() + { + var yControl = _state.State.YControl; + var face = _state.State.FaceState; + + bool disableTransform = _state.State.ViewportTransformEnable == 0; + + UpdateFrontFace(yControl, face.FrontFace); + UpdateDepthMode(); + + bool flipY = yControl.HasFlag(YControl.NegateY); + + Span<Viewport> viewports = stackalloc Viewport[Constants.TotalViewports]; + + for (int index = 0; index < Constants.TotalViewports; index++) + { + if (disableTransform) + { + ref var scissor = ref _state.State.ScreenScissorState; + + float rScale = _channel.TextureManager.RenderTargetScale; + var scissorRect = new Rectangle<float>(0, 0, (scissor.X + scissor.Width) * rScale, (scissor.Y + scissor.Height) * rScale); + + viewports[index] = new Viewport(scissorRect, ViewportSwizzle.PositiveX, ViewportSwizzle.PositiveY, ViewportSwizzle.PositiveZ, ViewportSwizzle.PositiveW, 0, 1); + continue; + } + + ref var transform = ref _state.State.ViewportTransform[index]; + ref var extents = ref _state.State.ViewportExtents[index]; + + float scaleX = MathF.Abs(transform.ScaleX); + float scaleY = transform.ScaleY; + + if (flipY) + { + scaleY = -scaleY; + } + + if (!_context.Capabilities.SupportsViewportSwizzle && transform.UnpackSwizzleY() == ViewportSwizzle.NegativeY) + { + scaleY = -scaleY; + } + + float x = transform.TranslateX - scaleX; + float y = transform.TranslateY - scaleY; + + float width = scaleX * 2; + float height = scaleY * 2; + + float scale = _channel.TextureManager.RenderTargetScale; + if (scale != 1f) + { + x *= scale; + y *= scale; + width *= scale; + height *= scale; + } + + Rectangle<float> region = new Rectangle<float>(x, y, width, height); + + ViewportSwizzle swizzleX = transform.UnpackSwizzleX(); + ViewportSwizzle swizzleY = transform.UnpackSwizzleY(); + ViewportSwizzle swizzleZ = transform.UnpackSwizzleZ(); + ViewportSwizzle swizzleW = transform.UnpackSwizzleW(); + + float depthNear = extents.DepthNear; + float depthFar = extents.DepthFar; + + if (transform.ScaleZ < 0) + { + float temp = depthNear; + depthNear = depthFar; + depthFar = temp; + } + + viewports[index] = new Viewport(region, swizzleX, swizzleY, swizzleZ, swizzleW, depthNear, depthFar); + } + + _context.Renderer.Pipeline.SetDepthMode(GetDepthMode()); + _context.Renderer.Pipeline.SetViewports(viewports, disableTransform); + + _currentSpecState.SetViewportTransformDisable(_state.State.ViewportTransformEnable == 0); + _currentSpecState.SetDepthMode(GetDepthMode() == DepthMode.MinusOneToOne); + } + + /// <summary> + /// Updates the depth mode (0 to 1 or -1 to 1) based on the current viewport and depth mode register state. + /// </summary> + private void UpdateDepthMode() + { + _context.Renderer.Pipeline.SetDepthMode(GetDepthMode()); + } + + /// <summary> + /// Updates polygon mode state based on current GPU state. + /// </summary> + private void UpdatePolygonMode() + { + _context.Renderer.Pipeline.SetPolygonMode(_state.State.PolygonModeFront, _state.State.PolygonModeBack); + } + + /// <summary> + /// Updates host depth bias (also called polygon offset) state based on current GPU state. + /// </summary> + private void UpdateDepthBiasState() + { + var depthBias = _state.State.DepthBiasState; + + float factor = _state.State.DepthBiasFactor; + float units = _state.State.DepthBiasUnits; + float clamp = _state.State.DepthBiasClamp; + + PolygonModeMask enables; + + enables = (depthBias.PointEnable ? PolygonModeMask.Point : 0); + enables |= (depthBias.LineEnable ? PolygonModeMask.Line : 0); + enables |= (depthBias.FillEnable ? PolygonModeMask.Fill : 0); + + _pipeline.BiasEnable = enables; + _context.Renderer.Pipeline.SetDepthBias(enables, factor, units / 2f, clamp); + } + + /// <summary> + /// Updates host stencil test state based on current GPU state. + /// </summary> + private void UpdateStencilTestState() + { + var backMasks = _state.State.StencilBackMasks; + var test = _state.State.StencilTestState; + var backTest = _state.State.StencilBackTestState; + + CompareOp backFunc; + StencilOp backSFail; + StencilOp backDpPass; + StencilOp backDpFail; + int backFuncRef; + int backFuncMask; + int backMask; + + if (backTest.TwoSided) + { + backFunc = backTest.BackFunc; + backSFail = backTest.BackSFail; + backDpPass = backTest.BackDpPass; + backDpFail = backTest.BackDpFail; + backFuncRef = backMasks.FuncRef; + backFuncMask = backMasks.FuncMask; + backMask = backMasks.Mask; + } + else + { + backFunc = test.FrontFunc; + backSFail = test.FrontSFail; + backDpPass = test.FrontDpPass; + backDpFail = test.FrontDpFail; + backFuncRef = test.FrontFuncRef; + backFuncMask = test.FrontFuncMask; + backMask = test.FrontMask; + } + + StencilTestDescriptor descriptor = new StencilTestDescriptor( + test.Enable, + test.FrontFunc, + test.FrontSFail, + test.FrontDpPass, + test.FrontDpFail, + test.FrontFuncRef, + test.FrontFuncMask, + test.FrontMask, + backFunc, + backSFail, + backDpPass, + backDpFail, + backFuncRef, + backFuncMask, + backMask); + + _pipeline.StencilTest = descriptor; + _context.Renderer.Pipeline.SetStencilTest(descriptor); + } + + /// <summary> + /// Updates user-defined clipping based on the guest GPU state. + /// </summary> + private void UpdateUserClipState() + { + uint clipMask = _state.State.ClipDistanceEnable & _vsClipDistancesWritten; + + for (int i = 0; i < Constants.TotalClipDistances; ++i) + { + _context.Renderer.Pipeline.SetUserClipDistance(i, (clipMask & (1 << i)) != 0); + } + } + + /// <summary> + /// Updates current sampler pool address and size based on guest GPU state. + /// </summary> + private void UpdateSamplerPoolState() + { + var texturePool = _state.State.TexturePoolState; + var samplerPool = _state.State.SamplerPoolState; + + var samplerIndex = _state.State.SamplerIndex; + + int maximumId = samplerIndex == SamplerIndex.ViaHeaderIndex + ? texturePool.MaximumId + : samplerPool.MaximumId; + + _channel.TextureManager.SetGraphicsSamplerPool(samplerPool.Address.Pack(), maximumId, samplerIndex); + } + + /// <summary> + /// Updates current texture pool address and size based on guest GPU state. + /// </summary> + private void UpdateTexturePoolState() + { + var texturePool = _state.State.TexturePoolState; + + _channel.TextureManager.SetGraphicsTexturePool(texturePool.Address.Pack(), texturePool.MaximumId); + _channel.TextureManager.SetGraphicsTextureBufferIndex((int)_state.State.TextureBufferIndex); + + _currentSpecState.SetPoolState(GetPoolState()); + } + + /// <summary> + /// Updates host vertex attributes based on guest GPU state. + /// </summary> + private void UpdateVertexAttribState() + { + uint vbEnableMask = _vbEnableMask; + + Span<VertexAttribDescriptor> vertexAttribs = stackalloc VertexAttribDescriptor[Constants.TotalVertexAttribs]; + + for (int index = 0; index < Constants.TotalVertexAttribs; index++) + { + var vertexAttrib = _state.State.VertexAttribState[index]; + + int bufferIndex = vertexAttrib.UnpackBufferIndex(); + + if ((vbEnableMask & (1u << bufferIndex)) == 0) + { + // Using a vertex buffer that doesn't exist is invalid, so let's use a dummy attribute for those cases. + vertexAttribs[index] = new VertexAttribDescriptor(0, 0, true, Format.R32G32B32A32Float); + continue; + } + + if (!FormatTable.TryGetAttribFormat(vertexAttrib.UnpackFormat(), out Format format)) + { + Logger.Debug?.Print(LogClass.Gpu, $"Invalid attribute format 0x{vertexAttrib.UnpackFormat():X}."); + + format = vertexAttrib.UnpackType() switch + { + VertexAttribType.Sint => Format.R32G32B32A32Sint, + VertexAttribType.Uint => Format.R32G32B32A32Uint, + _ => Format.R32G32B32A32Float + }; + } + + vertexAttribs[index] = new VertexAttribDescriptor( + bufferIndex, + vertexAttrib.UnpackOffset(), + vertexAttrib.UnpackIsConstant(), + format); + } + + _pipeline.SetVertexAttribs(vertexAttribs); + _context.Renderer.Pipeline.SetVertexAttribs(vertexAttribs); + _currentSpecState.SetAttributeTypes(ref _state.State.VertexAttribState); + } + + /// <summary> + /// Updates host line width based on guest GPU state. + /// </summary> + private void UpdateLineState() + { + float width = _state.State.LineWidthSmooth; + bool smooth = _state.State.LineSmoothEnable; + + _pipeline.LineWidth = width; + _context.Renderer.Pipeline.SetLineParameters(width, smooth); + } + + /// <summary> + /// Updates host point size based on guest GPU state. + /// </summary> + private void UpdatePointState() + { + float size = _state.State.PointSize; + bool isProgramPointSize = _state.State.VertexProgramPointSize; + bool enablePointSprite = _state.State.PointSpriteEnable; + + // TODO: Need to figure out a way to map PointCoordReplace enable bit. + Origin origin = (_state.State.PointCoordReplace & 4) == 0 ? Origin.LowerLeft : Origin.UpperLeft; + + _context.Renderer.Pipeline.SetPointParameters(size, isProgramPointSize, enablePointSprite, origin); + + _currentSpecState.SetProgramPointSizeEnable(isProgramPointSize); + _currentSpecState.SetPointSize(size); + } + + /// <summary> + /// Updates host primitive restart based on guest GPU state. + /// </summary> + private void UpdatePrimitiveRestartState() + { + PrimitiveRestartState primitiveRestart = _state.State.PrimitiveRestartState; + bool enable = primitiveRestart.Enable && (_drawState.DrawIndexed || _state.State.PrimitiveRestartDrawArrays); + + _pipeline.PrimitiveRestartEnable = enable; + _context.Renderer.Pipeline.SetPrimitiveRestart(enable, primitiveRestart.Index); + } + + /// <summary> + /// Updates host index buffer binding based on guest GPU state. + /// </summary> + private void UpdateIndexBufferState() + { + var indexBuffer = _state.State.IndexBufferState; + + if (_drawState.IndexCount == 0) + { + return; + } + + ulong gpuVa = indexBuffer.Address.Pack(); + + // Do not use the end address to calculate the size, because + // the result may be much larger than the real size of the index buffer. + ulong size = (ulong)(_drawState.FirstIndex + _drawState.IndexCount); + + switch (indexBuffer.Type) + { + case IndexType.UShort: size *= 2; break; + case IndexType.UInt: size *= 4; break; + } + + _channel.BufferManager.SetIndexBuffer(gpuVa, size, indexBuffer.Type); + } + + /// <summary> + /// Updates host vertex buffer bindings based on guest GPU state. + /// </summary> + private void UpdateVertexBufferState() + { + IndexType indexType = _state.State.IndexBufferState.Type; + bool indexTypeSmall = indexType == IndexType.UByte || indexType == IndexType.UShort; + + _drawState.IsAnyVbInstanced = false; + + bool drawIndexed = _drawState.DrawIndexed; + bool drawIndirect = _drawState.DrawIndirect; + int drawFirstVertex = _drawState.DrawFirstVertex; + int drawVertexCount = _drawState.DrawVertexCount; + uint vbEnableMask = 0; + + for (int index = 0; index < Constants.TotalVertexBuffers; index++) + { + var vertexBuffer = _state.State.VertexBufferState[index]; + + if (!vertexBuffer.UnpackEnable()) + { + _pipeline.VertexBuffers[index] = new BufferPipelineDescriptor(false, 0, 0); + _channel.BufferManager.SetVertexBuffer(index, 0, 0, 0, 0); + + continue; + } + + GpuVa endAddress = _state.State.VertexBufferEndAddress[index]; + + ulong address = vertexBuffer.Address.Pack(); + + if (_channel.MemoryManager.IsMapped(address)) + { + vbEnableMask |= 1u << index; + } + + int stride = vertexBuffer.UnpackStride(); + + bool instanced = _state.State.VertexBufferInstanced[index]; + + int divisor = instanced ? vertexBuffer.Divisor : 0; + + _drawState.IsAnyVbInstanced |= divisor != 0; + + ulong vbSize = endAddress.Pack() - address + 1; + ulong size; + + if (_drawState.IbStreamer.HasInlineIndexData || drawIndexed || stride == 0 || instanced) + { + // This size may be (much) larger than the real vertex buffer size. + // Avoid calculating it this way, unless we don't have any other option. + + size = vbSize; + + if (stride > 0 && indexTypeSmall && drawIndexed && !drawIndirect && !instanced) + { + // If the index type is a small integer type, then we might be still able + // to reduce the vertex buffer size based on the maximum possible index value. + + ulong maxVertexBufferSize = indexType == IndexType.UByte ? 0x100UL : 0x10000UL; + + maxVertexBufferSize += _state.State.FirstVertex; + maxVertexBufferSize *= (uint)stride; + + size = Math.Min(size, maxVertexBufferSize); + } + } + else + { + // For non-indexed draws, we can guess the size from the vertex count + // and stride. + + int firstInstance = (int)_state.State.FirstInstance; + + size = Math.Min(vbSize, (ulong)((firstInstance + drawFirstVertex + drawVertexCount) * stride)); + } + + _pipeline.VertexBuffers[index] = new BufferPipelineDescriptor(_channel.MemoryManager.IsMapped(address), stride, divisor); + _channel.BufferManager.SetVertexBuffer(index, address, size, stride, divisor); + } + + if (_vbEnableMask != vbEnableMask) + { + _vbEnableMask = vbEnableMask; + UpdateVertexAttribState(); + } + } + + /// <summary> + /// Updates host face culling and orientation based on guest GPU state. + /// </summary> + private void UpdateFaceState() + { + var yControl = _state.State.YControl; + var face = _state.State.FaceState; + + _pipeline.CullEnable = face.CullEnable; + _pipeline.CullMode = face.CullFace; + _context.Renderer.Pipeline.SetFaceCulling(face.CullEnable, face.CullFace); + + UpdateFrontFace(yControl, face.FrontFace); + } + + /// <summary> + /// Updates the front face based on the current front face and the origin. + /// </summary> + /// <param name="yControl">Y control register value, where the origin is located</param> + /// <param name="frontFace">Front face</param> + private void UpdateFrontFace(YControl yControl, FrontFace frontFace) + { + bool isUpperLeftOrigin = !yControl.HasFlag(YControl.TriangleRastFlip); + + if (isUpperLeftOrigin) + { + frontFace = frontFace == FrontFace.CounterClockwise ? FrontFace.Clockwise : FrontFace.CounterClockwise; + } + + _pipeline.FrontFace = frontFace; + _context.Renderer.Pipeline.SetFrontFace(frontFace); + } + + /// <summary> + /// Updates host render target color masks, based on guest GPU state. + /// This defines which color channels are written to each color buffer. + /// </summary> + private void UpdateRtColorMask() + { + bool rtColorMaskShared = _state.State.RtColorMaskShared; + + Span<uint> componentMasks = stackalloc uint[Constants.TotalRenderTargets]; + + for (int index = 0; index < Constants.TotalRenderTargets; index++) + { + var colorMask = _state.State.RtColorMask[rtColorMaskShared ? 0 : index]; + + uint componentMask; + + componentMask = (colorMask.UnpackRed() ? 1u : 0u); + componentMask |= (colorMask.UnpackGreen() ? 2u : 0u); + componentMask |= (colorMask.UnpackBlue() ? 4u : 0u); + componentMask |= (colorMask.UnpackAlpha() ? 8u : 0u); + + componentMasks[index] = componentMask; + _pipeline.ColorWriteMask[index] = componentMask; + } + + _context.Renderer.Pipeline.SetRenderTargetColorMasks(componentMasks); + } + + /// <summary> + /// Updates host render target color buffer blending state, based on guest state. + /// </summary> + private void UpdateBlendState() + { + if (_state.State.BlendUcodeEnable != BlendUcodeEnable.Disabled) + { + if (_context.Capabilities.SupportsBlendEquationAdvanced && _blendManager.TryGetAdvancedBlend(out var blendDescriptor)) + { + // Try to HLE it using advanced blend on the host if we can. + _context.Renderer.Pipeline.SetBlendState(blendDescriptor); + return; + } + else + { + // TODO: Blend emulation fallback. + } + } + + bool blendIndependent = _state.State.BlendIndependent; + ColorF blendConstant = _state.State.BlendConstant; + + bool dualSourceBlendEnabled = false; + + if (blendIndependent) + { + for (int index = 0; index < Constants.TotalRenderTargets; index++) + { + bool enable = _state.State.BlendEnable[index]; + var blend = _state.State.BlendState[index]; + + var descriptor = new BlendDescriptor( + enable, + blendConstant, + blend.ColorOp, + FilterBlendFactor(blend.ColorSrcFactor, index), + FilterBlendFactor(blend.ColorDstFactor, index), + blend.AlphaOp, + FilterBlendFactor(blend.AlphaSrcFactor, index), + FilterBlendFactor(blend.AlphaDstFactor, index)); + + if (enable && + (blend.ColorSrcFactor.IsDualSource() || + blend.ColorDstFactor.IsDualSource() || + blend.AlphaSrcFactor.IsDualSource() || + blend.AlphaDstFactor.IsDualSource())) + { + dualSourceBlendEnabled = true; + } + + _pipeline.BlendDescriptors[index] = descriptor; + _context.Renderer.Pipeline.SetBlendState(index, descriptor); + } + } + else + { + bool enable = _state.State.BlendEnable[0]; + var blend = _state.State.BlendStateCommon; + + var descriptor = new BlendDescriptor( + enable, + blendConstant, + blend.ColorOp, + FilterBlendFactor(blend.ColorSrcFactor, 0), + FilterBlendFactor(blend.ColorDstFactor, 0), + blend.AlphaOp, + FilterBlendFactor(blend.AlphaSrcFactor, 0), + FilterBlendFactor(blend.AlphaDstFactor, 0)); + + if (enable && + (blend.ColorSrcFactor.IsDualSource() || + blend.ColorDstFactor.IsDualSource() || + blend.AlphaSrcFactor.IsDualSource() || + blend.AlphaDstFactor.IsDualSource())) + { + dualSourceBlendEnabled = true; + } + + for (int index = 0; index < Constants.TotalRenderTargets; index++) + { + _pipeline.BlendDescriptors[index] = descriptor; + _context.Renderer.Pipeline.SetBlendState(index, descriptor); + } + } + + _currentSpecState.SetDualSourceBlendEnabled(dualSourceBlendEnabled); + } + + /// <summary> + /// Gets a blend factor for the color target currently. + /// This will return <paramref name="factor"/> unless the target format has no alpha component, + /// in which case it will replace destination alpha factor with a constant factor of one or zero. + /// </summary> + /// <param name="factor">Input factor</param> + /// <param name="index">Color target index</param> + /// <returns>New blend factor</returns> + private BlendFactor FilterBlendFactor(BlendFactor factor, int index) + { + // If any color target format without alpha is being used, we need to make sure that + // if blend is active, it will not use destination alpha as a factor. + // That is required because RGBX formats are emulated using host RGBA formats. + + if (_state.State.RtColorState[index].Format.NoAlpha()) + { + switch (factor) + { + case BlendFactor.DstAlpha: + case BlendFactor.DstAlphaGl: + factor = BlendFactor.One; + break; + case BlendFactor.OneMinusDstAlpha: + case BlendFactor.OneMinusDstAlphaGl: + factor = BlendFactor.Zero; + break; + } + } + + return factor; + } + + /// <summary> + /// Updates host logical operation state, based on guest state. + /// </summary> + private void UpdateLogicOpState() + { + LogicalOpState logicOpState = _state.State.LogicOpState; + + _pipeline.SetLogicOpState(logicOpState.Enable, logicOpState.LogicalOp); + _context.Renderer.Pipeline.SetLogicOpState(logicOpState.Enable, logicOpState.LogicalOp); + } + + /// <summary> + /// Updates multisample state, based on guest state. + /// </summary> + private void UpdateMultisampleState() + { + bool alphaToCoverageEnable = (_state.State.MultisampleControl & 1) != 0; + bool alphaToOneEnable = (_state.State.MultisampleControl & 0x10) != 0; + + _context.Renderer.Pipeline.SetMultisampleState(new MultisampleDescriptor( + alphaToCoverageEnable, + _state.State.AlphaToCoverageDitherEnable, + alphaToOneEnable)); + + _currentSpecState.SetAlphaToCoverageEnable(alphaToCoverageEnable, _state.State.AlphaToCoverageDitherEnable); + } + + /// <summary> + /// Updates the early z flag, based on guest state. + /// </summary> + private void UpdateEarlyZState() + { + _currentSpecState.SetEarlyZForce(_state.State.EarlyZForce); + } + + /// <summary> + /// Updates host shaders based on the guest GPU state. + /// </summary> + private void UpdateShaderState() + { + var shaderCache = _channel.MemoryManager.Physical.ShaderCache; + + _vtgWritesRtLayer = false; + + ShaderAddresses addresses = new ShaderAddresses(); + Span<ulong> addressesSpan = addresses.AsSpan(); + + ulong baseAddress = _state.State.ShaderBaseAddress.Pack(); + + for (int index = 0; index < 6; index++) + { + var shader = _state.State.ShaderState[index]; + if (!shader.UnpackEnable() && index != 1) + { + continue; + } + + addressesSpan[index] = baseAddress + shader.Offset; + } + + CachedShaderProgram gs = shaderCache.GetGraphicsShader(ref _state.State, ref _pipeline, _channel, ref _currentSpecState.GetPoolState(), ref _currentSpecState.GetGraphicsState(), addresses); + + // Consume the modified flag for spec state so that it isn't checked again. + _currentSpecState.SetShader(gs); + + _shaderSpecState = gs.SpecializationState; + + byte oldVsClipDistancesWritten = _vsClipDistancesWritten; + + _drawState.VsUsesInstanceId = gs.Shaders[1]?.Info.UsesInstanceId ?? false; + _vsUsesDrawParameters = gs.Shaders[1]?.Info.UsesDrawParameters ?? false; + _vsClipDistancesWritten = gs.Shaders[1]?.Info.ClipDistancesWritten ?? 0; + + if (oldVsClipDistancesWritten != _vsClipDistancesWritten) + { + UpdateUserClipState(); + } + + UpdateShaderBindings(gs.Bindings); + + for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++) + { + ShaderProgramInfo info = gs.Shaders[stageIndex + 1]?.Info; + + if (info?.UsesRtLayer == true) + { + _vtgWritesRtLayer = true; + } + + _currentProgramInfo[stageIndex] = info; + } + + _context.Renderer.Pipeline.SetProgram(gs.HostProgram); + } + + /// <summary> + /// Updates bindings consumed by the shader on the texture and buffer managers. + /// </summary> + /// <param name="bindings">Bindings for the active shader</param> + private void UpdateShaderBindings(CachedShaderBindings bindings) + { + _channel.TextureManager.SetGraphicsBindings(bindings); + _channel.BufferManager.SetGraphicsBufferBindings(bindings); + } + + /// <summary> + /// Gets the current texture pool state. + /// </summary> + /// <returns>Texture pool state</returns> + private GpuChannelPoolState GetPoolState() + { + return new GpuChannelPoolState( + _state.State.TexturePoolState.Address.Pack(), + _state.State.TexturePoolState.MaximumId, + (int)_state.State.TextureBufferIndex); + } + + /// <summary> + /// Gets the depth mode that is currently being used (zero to one or minus one to one). + /// </summary> + /// <returns>Current depth mode</returns> + private DepthMode GetDepthMode() + { + ref var transform = ref _state.State.ViewportTransform[0]; + ref var extents = ref _state.State.ViewportExtents[0]; + + DepthMode depthMode; + + if (!float.IsInfinity(extents.DepthNear) && + !float.IsInfinity(extents.DepthFar) && + (extents.DepthFar - extents.DepthNear) != 0) + { + // Try to guess the depth mode being used on the high level API + // based on current transform. + // It is setup like so by said APIs: + // If depth mode is ZeroToOne: + // TranslateZ = Near + // ScaleZ = Far - Near + // If depth mode is MinusOneToOne: + // TranslateZ = (Near + Far) / 2 + // ScaleZ = (Far - Near) / 2 + // DepthNear/Far are sorted such as that Near is always less than Far. + depthMode = extents.DepthNear != transform.TranslateZ && + extents.DepthFar != transform.TranslateZ + ? DepthMode.MinusOneToOne + : DepthMode.ZeroToOne; + } + else + { + // If we can't guess from the viewport transform, then just use the depth mode register. + depthMode = (DepthMode)(_state.State.DepthMode & 1); + } + + return depthMode; + } + + /// <summary> + /// Forces the shaders to be rebound on the next draw. + /// </summary> + public void ForceShaderUpdate() + { + _updateTracker.ForceDirty(ShaderStateIndex); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs new file mode 100644 index 00000000..caeee18e --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs @@ -0,0 +1,620 @@ +using Ryujinx.Graphics.Device; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.GPFifo; +using Ryujinx.Graphics.Gpu.Engine.InlineToMemory; +using Ryujinx.Graphics.Gpu.Engine.Threed.Blender; +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Gpu.Engine.Threed +{ + /// <summary> + /// Represents a 3D engine class. + /// </summary> + class ThreedClass : IDeviceState + { + private readonly GpuContext _context; + private readonly GPFifoClass _fifoClass; + private readonly DeviceStateWithShadow<ThreedClassState> _state; + + private readonly InlineToMemoryClass _i2mClass; + private readonly AdvancedBlendManager _blendManager; + private readonly DrawManager _drawManager; + private readonly SemaphoreUpdater _semaphoreUpdater; + private readonly ConstantBufferUpdater _cbUpdater; + private readonly StateUpdater _stateUpdater; + + /// <summary> + /// Creates a new instance of the 3D engine class. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="channel">GPU channel</param> + public ThreedClass(GpuContext context, GpuChannel channel, GPFifoClass fifoClass) + { + _context = context; + _fifoClass = fifoClass; + _state = new DeviceStateWithShadow<ThreedClassState>(new Dictionary<string, RwCallback> + { + { nameof(ThreedClassState.LaunchDma), new RwCallback(LaunchDma, null) }, + { nameof(ThreedClassState.LoadInlineData), new RwCallback(LoadInlineData, null) }, + { nameof(ThreedClassState.SyncpointAction), new RwCallback(IncrementSyncpoint, null) }, + { nameof(ThreedClassState.InvalidateSamplerCacheNoWfi), new RwCallback(InvalidateSamplerCacheNoWfi, null) }, + { nameof(ThreedClassState.InvalidateTextureHeaderCacheNoWfi), new RwCallback(InvalidateTextureHeaderCacheNoWfi, null) }, + { nameof(ThreedClassState.TextureBarrier), new RwCallback(TextureBarrier, null) }, + { nameof(ThreedClassState.LoadBlendUcodeStart), new RwCallback(LoadBlendUcodeStart, null) }, + { nameof(ThreedClassState.LoadBlendUcodeInstruction), new RwCallback(LoadBlendUcodeInstruction, null) }, + { nameof(ThreedClassState.TextureBarrierTiled), new RwCallback(TextureBarrierTiled, null) }, + { nameof(ThreedClassState.DrawTextureSrcY), new RwCallback(DrawTexture, null) }, + { nameof(ThreedClassState.DrawVertexArrayBeginEndInstanceFirst), new RwCallback(DrawVertexArrayBeginEndInstanceFirst, null) }, + { nameof(ThreedClassState.DrawVertexArrayBeginEndInstanceSubsequent), new RwCallback(DrawVertexArrayBeginEndInstanceSubsequent, null) }, + { nameof(ThreedClassState.VbElementU8), new RwCallback(VbElementU8, null) }, + { nameof(ThreedClassState.VbElementU16), new RwCallback(VbElementU16, null) }, + { nameof(ThreedClassState.VbElementU32), new RwCallback(VbElementU32, null) }, + { nameof(ThreedClassState.ResetCounter), new RwCallback(ResetCounter, null) }, + { nameof(ThreedClassState.RenderEnableCondition), new RwCallback(null, Zero) }, + { nameof(ThreedClassState.DrawEnd), new RwCallback(DrawEnd, null) }, + { nameof(ThreedClassState.DrawBegin), new RwCallback(DrawBegin, null) }, + { nameof(ThreedClassState.DrawIndexBuffer32BeginEndInstanceFirst), new RwCallback(DrawIndexBuffer32BeginEndInstanceFirst, null) }, + { nameof(ThreedClassState.DrawIndexBuffer16BeginEndInstanceFirst), new RwCallback(DrawIndexBuffer16BeginEndInstanceFirst, null) }, + { nameof(ThreedClassState.DrawIndexBuffer8BeginEndInstanceFirst), new RwCallback(DrawIndexBuffer8BeginEndInstanceFirst, null) }, + { nameof(ThreedClassState.DrawIndexBuffer32BeginEndInstanceSubsequent), new RwCallback(DrawIndexBuffer32BeginEndInstanceSubsequent, null) }, + { nameof(ThreedClassState.DrawIndexBuffer16BeginEndInstanceSubsequent), new RwCallback(DrawIndexBuffer16BeginEndInstanceSubsequent, null) }, + { nameof(ThreedClassState.DrawIndexBuffer8BeginEndInstanceSubsequent), new RwCallback(DrawIndexBuffer8BeginEndInstanceSubsequent, null) }, + { nameof(ThreedClassState.IndexBufferCount), new RwCallback(SetIndexBufferCount, null) }, + { nameof(ThreedClassState.Clear), new RwCallback(Clear, null) }, + { nameof(ThreedClassState.SemaphoreControl), new RwCallback(Report, null) }, + { nameof(ThreedClassState.SetFalcon04), new RwCallback(SetFalcon04, null) }, + { nameof(ThreedClassState.UniformBufferUpdateData), new RwCallback(ConstantBufferUpdate, null) }, + { nameof(ThreedClassState.UniformBufferBindVertex), new RwCallback(ConstantBufferBindVertex, null) }, + { nameof(ThreedClassState.UniformBufferBindTessControl), new RwCallback(ConstantBufferBindTessControl, null) }, + { nameof(ThreedClassState.UniformBufferBindTessEvaluation), new RwCallback(ConstantBufferBindTessEvaluation, null) }, + { nameof(ThreedClassState.UniformBufferBindGeometry), new RwCallback(ConstantBufferBindGeometry, null) }, + { nameof(ThreedClassState.UniformBufferBindFragment), new RwCallback(ConstantBufferBindFragment, null) } + }); + + _i2mClass = new InlineToMemoryClass(context, channel, initializeState: false); + + var spec = new SpecializationStateUpdater(context); + var drawState = new DrawState(); + + _drawManager = new DrawManager(context, channel, _state, drawState, spec); + _blendManager = new AdvancedBlendManager(_state); + _semaphoreUpdater = new SemaphoreUpdater(context, channel, _state); + _cbUpdater = new ConstantBufferUpdater(channel, _state); + _stateUpdater = new StateUpdater(context, channel, _state, drawState, _blendManager, spec); + + // This defaults to "always", even without any register write. + // Reads just return 0, regardless of what was set there. + _state.State.RenderEnableCondition = Condition.Always; + } + + /// <summary> + /// Reads data from the class registers. + /// </summary> + /// <param name="offset">Register byte offset</param> + /// <returns>Data at the specified offset</returns> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int Read(int offset) => _state.Read(offset); + + /// <summary> + /// Writes data to the class registers. + /// </summary> + /// <param name="offset">Register byte offset</param> + /// <param name="data">Data to be written</param> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Write(int offset, int data) + { + _state.WriteWithRedundancyCheck(offset, data, out bool valueChanged); + + if (valueChanged) + { + _stateUpdater.SetDirty(offset); + } + } + + /// <summary> + /// Sets the shadow ram control value of all sub-channels. + /// </summary> + /// <param name="control">New shadow ram control value</param> + public void SetShadowRamControl(int control) + { + _state.State.SetMmeShadowRamControl = (uint)control; + } + + /// <summary> + /// Updates current host state for all registers modified since the last call to this method. + /// </summary> + public void UpdateState() + { + _fifoClass.CreatePendingSyncs(); + _cbUpdater.FlushUboDirty(); + _stateUpdater.Update(); + } + + /// <summary> + /// Updates current host state for all registers modified since the last call to this method. + /// </summary> + /// <param name="mask">Mask where each bit set indicates that the respective state group index should be checked</param> + public void UpdateState(ulong mask) + { + _stateUpdater.Update(mask); + } + + /// <summary> + /// Updates render targets (color and depth-stencil buffers) based on current render target state. + /// </summary> + /// <param name="updateFlags">Flags indicating which render targets should be updated and how</param> + /// <param name="singleUse">If this is not -1, it indicates that only the given indexed target will be used.</param> + public void UpdateRenderTargetState(RenderTargetUpdateFlags updateFlags, int singleUse = -1) + { + _stateUpdater.UpdateRenderTargetState(updateFlags, singleUse); + } + + /// <summary> + /// Updates scissor based on current render target state. + /// </summary> + public void UpdateScissorState() + { + _stateUpdater.UpdateScissorState(); + } + + /// <summary> + /// Marks the entire state as dirty, forcing a full host state update before the next draw. + /// </summary> + public void ForceStateDirty() + { + _drawManager.ForceStateDirty(); + _stateUpdater.SetAllDirty(); + } + + /// <summary> + /// Marks the specified register offset as dirty, forcing the associated state to update on the next draw. + /// </summary> + /// <param name="offset">Register offset</param> + public void ForceStateDirty(int offset) + { + _stateUpdater.SetDirty(offset); + } + + /// <summary> + /// Forces the shaders to be rebound on the next draw. + /// </summary> + public void ForceShaderUpdate() + { + _stateUpdater.ForceShaderUpdate(); + } + + /// <summary> + /// Create any syncs from WaitForIdle command that are currently pending. + /// </summary> + public void CreatePendingSyncs() + { + _fifoClass.CreatePendingSyncs(); + } + + /// <summary> + /// Flushes any queued UBO updates. + /// </summary> + public void FlushUboDirty() + { + _cbUpdater.FlushUboDirty(); + } + + /// <summary> + /// Perform any deferred draws. + /// </summary> + public void PerformDeferredDraws() + { + _drawManager.PerformDeferredDraws(); + } + + /// <summary> + /// Updates the currently bound constant buffer. + /// </summary> + /// <param name="data">Data to be written to the buffer</param> + public void ConstantBufferUpdate(ReadOnlySpan<int> data) + { + _cbUpdater.Update(data); + } + + /// <summary> + /// Launches the Inline-to-Memory DMA copy operation. + /// </summary> + /// <param name="argument">Method call argument</param> + private void LaunchDma(int argument) + { + _i2mClass.LaunchDma(ref Unsafe.As<ThreedClassState, InlineToMemoryClassState>(ref _state.State), argument); + } + + /// <summary> + /// Pushes a block of data to the Inline-to-Memory engine. + /// </summary> + /// <param name="data">Data to push</param> + public void LoadInlineData(ReadOnlySpan<int> data) + { + _i2mClass.LoadInlineData(data); + } + + /// <summary> + /// Pushes a word of data to the Inline-to-Memory engine. + /// </summary> + /// <param name="argument">Method call argument</param> + private void LoadInlineData(int argument) + { + _i2mClass.LoadInlineData(argument); + } + + /// <summary> + /// Performs an incrementation on a syncpoint. + /// </summary> + /// <param name="argument">Method call argument</param> + public void IncrementSyncpoint(int argument) + { + uint syncpointId = (uint)argument & 0xFFFF; + + _context.AdvanceSequence(); + _context.CreateHostSyncIfNeeded(true, true); + _context.Renderer.UpdateCounters(); // Poll the query counters, the game may want an updated result. + _context.Synchronization.IncrementSyncpoint(syncpointId); + } + + /// <summary> + /// Invalidates the cache with the sampler descriptors from the sampler pool. + /// </summary> + /// <param name="argument">Method call argument (unused)</param> + private void InvalidateSamplerCacheNoWfi(int argument) + { + _context.AdvanceSequence(); + } + + /// <summary> + /// Invalidates the cache with the texture descriptors from the texture pool. + /// </summary> + /// <param name="argument">Method call argument (unused)</param> + private void InvalidateTextureHeaderCacheNoWfi(int argument) + { + _context.AdvanceSequence(); + } + + /// <summary> + /// Issues a texture barrier. + /// This waits until previous texture writes from the GPU to finish, before + /// performing new operations with said textures. + /// </summary> + /// <param name="argument">Method call argument (unused)</param> + private void TextureBarrier(int argument) + { + _context.Renderer.Pipeline.TextureBarrier(); + } + + /// <summary> + /// Sets the start offset of the blend microcode in memory. + /// </summary> + /// <param name="argument">Method call argument</param> + private void LoadBlendUcodeStart(int argument) + { + _blendManager.LoadBlendUcodeStart(argument); + } + + /// <summary> + /// Pushes one word of blend microcode. + /// </summary> + /// <param name="argument">Method call argument</param> + private void LoadBlendUcodeInstruction(int argument) + { + _blendManager.LoadBlendUcodeInstruction(argument); + } + + /// <summary> + /// Issues a texture barrier. + /// This waits until previous texture writes from the GPU to finish, before + /// performing new operations with said textures. + /// This performs a per-tile wait, it is only valid if both the previous write + /// and current access has the same access patterns. + /// This may be faster than the regular barrier on tile-based rasterizers. + /// </summary> + /// <param name="argument">Method call argument (unused)</param> + private void TextureBarrierTiled(int argument) + { + _context.Renderer.Pipeline.TextureBarrierTiled(); + } + + /// <summary> + /// Draws a texture, without needing to specify shader programs. + /// </summary> + /// <param name="argument">Method call argument</param> + private void DrawTexture(int argument) + { + _drawManager.DrawTexture(this, argument); + } + + /// <summary> + /// Performs a non-indexed draw with the specified topology, index and count. + /// </summary> + /// <param name="argument">Method call argument</param> + private void DrawVertexArrayBeginEndInstanceFirst(int argument) + { + _drawManager.DrawVertexArrayBeginEndInstanceFirst(this, argument); + } + + /// <summary> + /// Performs a non-indexed draw with the specified topology, index and count, + /// while incrementing the current instance. + /// </summary> + /// <param name="argument">Method call argument</param> + private void DrawVertexArrayBeginEndInstanceSubsequent(int argument) + { + _drawManager.DrawVertexArrayBeginEndInstanceSubsequent(this, argument); + } + + /// <summary> + /// Pushes four 8-bit index buffer elements. + /// </summary> + /// <param name="argument">Method call argument</param> + private void VbElementU8(int argument) + { + _drawManager.VbElementU8(argument); + } + + /// <summary> + /// Pushes two 16-bit index buffer elements. + /// </summary> + /// <param name="argument">Method call argument</param> + private void VbElementU16(int argument) + { + _drawManager.VbElementU16(argument); + } + + /// <summary> + /// Pushes one 32-bit index buffer element. + /// </summary> + /// <param name="argument">Method call argument</param> + private void VbElementU32(int argument) + { + _drawManager.VbElementU32(argument); + } + + /// <summary> + /// Resets the value of an internal GPU counter back to zero. + /// </summary> + /// <param name="argument">Method call argument</param> + private void ResetCounter(int argument) + { + _semaphoreUpdater.ResetCounter(argument); + } + + /// <summary> + /// Finishes the draw call. + /// This draws geometry on the bound buffers based on the current GPU state. + /// </summary> + /// <param name="argument">Method call argument</param> + private void DrawEnd(int argument) + { + _drawManager.DrawEnd(this, argument); + } + + /// <summary> + /// Starts draw. + /// This sets primitive type and instanced draw parameters. + /// </summary> + /// <param name="argument">Method call argument</param> + private void DrawBegin(int argument) + { + _drawManager.DrawBegin(argument); + } + + /// <summary> + /// Sets the index buffer count. + /// This also sets internal state that indicates that the next draw is an indexed draw. + /// </summary> + /// <param name="argument">Method call argument</param> + private void SetIndexBufferCount(int argument) + { + _drawManager.SetIndexBufferCount(argument); + } + + /// <summary> + /// Performs a indexed draw with 8-bit index buffer elements. + /// </summary> + /// <param name="argument">Method call argument</param> + private void DrawIndexBuffer8BeginEndInstanceFirst(int argument) + { + _drawManager.DrawIndexBuffer8BeginEndInstanceFirst(this, argument); + } + + /// <summary> + /// Performs a indexed draw with 16-bit index buffer elements. + /// </summary> + /// <param name="argument">Method call argument</param> + private void DrawIndexBuffer16BeginEndInstanceFirst(int argument) + { + _drawManager.DrawIndexBuffer16BeginEndInstanceFirst(this, argument); + } + + /// <summary> + /// Performs a indexed draw with 32-bit index buffer elements. + /// </summary> + /// <param name="argument">Method call argument</param> + private void DrawIndexBuffer32BeginEndInstanceFirst(int argument) + { + _drawManager.DrawIndexBuffer32BeginEndInstanceFirst(this, argument); + } + + /// <summary> + /// Performs a indexed draw with 8-bit index buffer elements, + /// while also pre-incrementing the current instance value. + /// </summary> + /// <param name="argument">Method call argument</param> + private void DrawIndexBuffer8BeginEndInstanceSubsequent(int argument) + { + _drawManager.DrawIndexBuffer8BeginEndInstanceSubsequent(this, argument); + } + + /// <summary> + /// Performs a indexed draw with 16-bit index buffer elements, + /// while also pre-incrementing the current instance value. + /// </summary> + /// <param name="argument">Method call argument</param> + private void DrawIndexBuffer16BeginEndInstanceSubsequent(int argument) + { + _drawManager.DrawIndexBuffer16BeginEndInstanceSubsequent(this, argument); + } + + /// <summary> + /// Performs a indexed draw with 32-bit index buffer elements, + /// while also pre-incrementing the current instance value. + /// </summary> + /// <param name="argument">Method call argument</param> + private void DrawIndexBuffer32BeginEndInstanceSubsequent(int argument) + { + _drawManager.DrawIndexBuffer32BeginEndInstanceSubsequent(this, argument); + } + + /// <summary> + /// Clears the current color and depth-stencil buffers. + /// Which buffers should be cleared is also specified on the argument. + /// </summary> + /// <param name="argument">Method call argument</param> + private void Clear(int argument) + { + _drawManager.Clear(this, argument); + } + + /// <summary> + /// Writes a GPU counter to guest memory. + /// </summary> + /// <param name="argument">Method call argument</param> + private void Report(int argument) + { + _semaphoreUpdater.Report(argument); + } + + /// <summary> + /// Performs high-level emulation of Falcon microcode function number "4". + /// </summary> + /// <param name="argument">Method call argument</param> + private void SetFalcon04(int argument) + { + _state.State.SetMmeShadowScratch[0] = 1; + } + + /// <summary> + /// Updates the uniform buffer data with inline data. + /// </summary> + /// <param name="argument">New uniform buffer data word</param> + private void ConstantBufferUpdate(int argument) + { + _cbUpdater.Update(argument); + } + + /// <summary> + /// Binds a uniform buffer for the vertex shader stage. + /// </summary> + /// <param name="argument">Method call argument</param> + private void ConstantBufferBindVertex(int argument) + { + _cbUpdater.BindVertex(argument); + } + + /// <summary> + /// Binds a uniform buffer for the tessellation control shader stage. + /// </summary> + /// <param name="argument">Method call argument</param> + private void ConstantBufferBindTessControl(int argument) + { + _cbUpdater.BindTessControl(argument); + } + + /// <summary> + /// Binds a uniform buffer for the tessellation evaluation shader stage. + /// </summary> + /// <param name="argument">Method call argument</param> + private void ConstantBufferBindTessEvaluation(int argument) + { + _cbUpdater.BindTessEvaluation(argument); + } + + /// <summary> + /// Binds a uniform buffer for the geometry shader stage. + /// </summary> + /// <param name="argument">Method call argument</param> + private void ConstantBufferBindGeometry(int argument) + { + _cbUpdater.BindGeometry(argument); + } + + /// <summary> + /// Binds a uniform buffer for the fragment shader stage. + /// </summary> + /// <param name="argument">Method call argument</param> + private void ConstantBufferBindFragment(int argument) + { + _cbUpdater.BindFragment(argument); + } + + /// <summary> + /// Generic register read function that just returns 0. + /// </summary> + /// <returns>Zero</returns> + private static int Zero() + { + return 0; + } + + /// <summary> + /// Performs a indexed or non-indexed draw. + /// </summary> + /// <param name="topology">Primitive topology</param> + /// <param name="count">Index count for indexed draws, vertex count for non-indexed draws</param> + /// <param name="instanceCount">Instance count</param> + /// <param name="firstIndex">First index on the index buffer for indexed draws, ignored for non-indexed draws</param> + /// <param name="firstVertex">First vertex on the vertex buffer</param> + /// <param name="firstInstance">First instance</param> + /// <param name="indexed">True if the draw is indexed, false otherwise</param> + public void Draw( + PrimitiveTopology topology, + int count, + int instanceCount, + int firstIndex, + int firstVertex, + int firstInstance, + bool indexed) + { + _drawManager.Draw(this, topology, count, instanceCount, firstIndex, firstVertex, firstInstance, indexed); + } + + /// <summary> + /// Performs a indirect draw, with parameters from a GPU buffer. + /// </summary> + /// <param name="topology">Primitive topology</param> + /// <param name="indirectBufferAddress">Address of the buffer with the draw parameters, such as count, first index, etc</param> + /// <param name="parameterBufferAddress">Address of the buffer with the draw count</param> + /// <param name="maxDrawCount">Maximum number of draws that can be made</param> + /// <param name="stride">Distance in bytes between each entry on the data pointed to by <paramref name="indirectBufferAddress"/></param> + /// <param name="indexCount">Maximum number of indices that the draw can consume</param> + /// <param name="drawType">Type of the indirect draw, which can be indexed or non-indexed, with or without a draw count</param> + public void DrawIndirect( + PrimitiveTopology topology, + ulong indirectBufferAddress, + ulong parameterBufferAddress, + int maxDrawCount, + int stride, + int indexCount, + IndirectDrawType drawType) + { + _drawManager.DrawIndirect(this, topology, indirectBufferAddress, parameterBufferAddress, maxDrawCount, stride, indexCount, drawType); + } + + /// <summary> + /// Clears the current color and depth-stencil buffers. + /// Which buffers should be cleared can also specified with the arguments. + /// </summary> + /// <param name="argument">Method call argument</param> + /// <param name="layerCount">For array and 3D textures, indicates how many layers should be cleared</param> + public void Clear(int argument, int layerCount) + { + _drawManager.Clear(this, argument, layerCount); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClassState.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClassState.cs new file mode 100644 index 00000000..8f26f38f --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClassState.cs @@ -0,0 +1,1048 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.InlineToMemory; +using Ryujinx.Graphics.Gpu.Engine.Types; +using Ryujinx.Graphics.Gpu.Image; +using Ryujinx.Graphics.Shader; +using System; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Gpu.Engine.Threed +{ + /// <summary> + /// Shader stage name. + /// </summary> + enum ShaderType + { + Vertex, + TessellationControl, + TessellationEvaluation, + Geometry, + Fragment + } + + /// <summary> + /// Tessellation mode. + /// </summary> + struct TessMode + { +#pragma warning disable CS0649 + public uint Packed; +#pragma warning restore CS0649 + + /// <summary> + /// Unpacks the tessellation abstract patch type. + /// </summary> + /// <returns>Abtract patch type</returns> + public TessPatchType UnpackPatchType() + { + return (TessPatchType)(Packed & 3); + } + + /// <summary> + /// Unpacks the spacing between tessellated vertices of the patch. + /// </summary> + /// <returns>Spacing between tessellated vertices</returns> + public TessSpacing UnpackSpacing() + { + return (TessSpacing)((Packed >> 4) & 3); + } + + /// <summary> + /// Unpacks the primitive winding order. + /// </summary> + /// <returns>True if clockwise, false if counter-clockwise</returns> + public bool UnpackCw() + { + return (Packed & (1 << 8)) != 0; + } + } + + /// <summary> + /// Transform feedback buffer state. + /// </summary> + struct TfBufferState + { +#pragma warning disable CS0649 + public Boolean32 Enable; + public GpuVa Address; + public int Size; + public int Offset; + public uint Padding0; + public uint Padding1; + public uint Padding2; +#pragma warning restore CS0649 + } + + /// <summary> + /// Transform feedback state. + /// </summary> + struct TfState + { +#pragma warning disable CS0649 + public int BufferIndex; + public int VaryingsCount; + public int Stride; + public uint Padding; +#pragma warning restore CS0649 + } + + /// <summary> + /// Render target color buffer state. + /// </summary> + struct RtColorState + { +#pragma warning disable CS0649 + public GpuVa Address; + public int WidthOrStride; + public int Height; + public ColorFormat Format; + public MemoryLayout MemoryLayout; + public int Depth; + public int LayerSize; + public int BaseLayer; + public int Unknown0x24; + public int Padding0; + public int Padding1; + public int Padding2; + public int Padding3; + public int Padding4; + public int Padding5; +#pragma warning restore CS0649 + } + + /// <summary> + /// Viewport transform parameters, for viewport transformation. + /// </summary> + struct ViewportTransform + { +#pragma warning disable CS0649 + public float ScaleX; + public float ScaleY; + public float ScaleZ; + public float TranslateX; + public float TranslateY; + public float TranslateZ; + public uint Swizzle; + public uint SubpixelPrecisionBias; +#pragma warning restore CS0649 + + /// <summary> + /// Unpacks viewport swizzle of the position X component. + /// </summary> + /// <returns>Swizzle enum value</returns> + public ViewportSwizzle UnpackSwizzleX() + { + return (ViewportSwizzle)(Swizzle & 7); + } + + /// <summary> + /// Unpacks viewport swizzle of the position Y component. + /// </summary> + /// <returns>Swizzle enum value</returns> + public ViewportSwizzle UnpackSwizzleY() + { + return (ViewportSwizzle)((Swizzle >> 4) & 7); + } + + /// <summary> + /// Unpacks viewport swizzle of the position Z component. + /// </summary> + /// <returns>Swizzle enum value</returns> + public ViewportSwizzle UnpackSwizzleZ() + { + return (ViewportSwizzle)((Swizzle >> 8) & 7); + } + + /// <summary> + /// Unpacks viewport swizzle of the position W component. + /// </summary> + /// <returns>Swizzle enum value</returns> + public ViewportSwizzle UnpackSwizzleW() + { + return (ViewportSwizzle)((Swizzle >> 12) & 7); + } + } + + /// <summary> + /// Viewport extents for viewport clipping, also includes depth range. + /// </summary> + struct ViewportExtents + { +#pragma warning disable CS0649 + public ushort X; + public ushort Width; + public ushort Y; + public ushort Height; + public float DepthNear; + public float DepthFar; +#pragma warning restore CS0649 + } + + /// <summary> + /// Draw state for non-indexed draws. + /// </summary> + struct VertexBufferDrawState + { +#pragma warning disable CS0649 + public int First; + public int Count; +#pragma warning restore CS0649 + } + + /// <summary> + /// Color buffer clear color. + /// </summary> + struct ClearColors + { +#pragma warning disable CS0649 + public float Red; + public float Green; + public float Blue; + public float Alpha; +#pragma warning restore CS0649 + } + + /// <summary> + /// Depth bias (also called polygon offset) parameters. + /// </summary> + struct DepthBiasState + { +#pragma warning disable CS0649 + public Boolean32 PointEnable; + public Boolean32 LineEnable; + public Boolean32 FillEnable; +#pragma warning restore CS0649 + } + + /// <summary> + /// Indicates whenever the blend microcode processes RGB and alpha components. + /// </summary> + enum BlendUcodeEnable + { + Disabled = 0, + EnableRGB = 1, + EnableAlpha = 2, + EnableRGBA = 3 + } + + /// <summary> + /// Scissor state. + /// </summary> + struct ScissorState + { +#pragma warning disable CS0649 + public Boolean32 Enable; + public ushort X1; + public ushort X2; + public ushort Y1; + public ushort Y2; + public uint Padding; +#pragma warning restore CS0649 + } + + /// <summary> + /// Stencil test masks for back tests. + /// </summary> + struct StencilBackMasks + { +#pragma warning disable CS0649 + public int FuncRef; + public int Mask; + public int FuncMask; +#pragma warning restore CS0649 + } + + /// <summary> + /// Render target depth-stencil buffer state. + /// </summary> + struct RtDepthStencilState + { +#pragma warning disable CS0649 + public GpuVa Address; + public ZetaFormat Format; + public MemoryLayout MemoryLayout; + public int LayerSize; +#pragma warning restore CS0649 + } + + /// <summary> + /// Screen scissor state. + /// </summary> + struct ScreenScissorState + { +#pragma warning disable CS0649 + public ushort X; + public ushort Width; + public ushort Y; + public ushort Height; +#pragma warning restore CS0649 + } + + /// <summary> + /// Vertex attribute vector and component size. + /// </summary> + enum VertexAttribSize + { + Size32x4 = 1, + Size32x3 = 2, + Size16x4 = 3, + Size32x2 = 4, + Size16x3 = 5, + Size8x4 = 0xa, + Size16x2 = 0xf, + Size32 = 0x12, + Size8x3 = 0x13, + Size8x2 = 0x18, + Size16 = 0x1b, + Size8 = 0x1d, + Rgb10A2 = 0x30, + Rg11B10 = 0x31 + } + + /// <summary> + /// Vertex attribute component type. + /// </summary> + enum VertexAttribType + { + Snorm = 1, + Unorm = 2, + Sint = 3, + Uint = 4, + Uscaled = 5, + Sscaled = 6, + Float = 7 + } + + /// <summary> + /// Vertex buffer attribute state. + /// </summary> + struct VertexAttribState + { +#pragma warning disable CS0649 + public uint Attribute; +#pragma warning restore CS0649 + + /// <summary> + /// Unpacks the index of the vertex buffer this attribute belongs to. + /// </summary> + /// <returns>Vertex buffer index</returns> + public int UnpackBufferIndex() + { + return (int)(Attribute & 0x1f); + } + + /// <summary> + /// Unpacks the attribute constant flag. + /// </summary> + /// <returns>True if the attribute is constant, false otherwise</returns> + public bool UnpackIsConstant() + { + return (Attribute & 0x40) != 0; + } + + /// <summary> + /// Unpacks the offset, in bytes, of the attribute on the vertex buffer. + /// </summary> + /// <returns>Attribute offset in bytes</returns> + public int UnpackOffset() + { + return (int)((Attribute >> 7) & 0x3fff); + } + + /// <summary> + /// Unpacks the Maxwell attribute format integer. + /// </summary> + /// <returns>Attribute format integer</returns> + public uint UnpackFormat() + { + return Attribute & 0x3fe00000; + } + + /// <summary> + /// Unpacks the Maxwell attribute size. + /// </summary> + /// <returns>Attribute size</returns> + public VertexAttribSize UnpackSize() + { + return (VertexAttribSize)((Attribute >> 21) & 0x3f); + } + + /// <summary> + /// Unpacks the Maxwell attribute component type. + /// </summary> + /// <returns>Attribute component type</returns> + public VertexAttribType UnpackType() + { + return (VertexAttribType)((Attribute >> 27) & 7); + } + } + + /// <summary> + /// Render target draw buffers control. + /// </summary> + struct RtControl + { +#pragma warning disable CS0649 + public uint Packed; +#pragma warning restore CS0649 + + /// <summary> + /// Unpacks the number of active draw buffers. + /// </summary> + /// <returns>Number of active draw buffers</returns> + public int UnpackCount() + { + return (int)(Packed & 0xf); + } + + /// <summary> + /// Unpacks the color attachment index for a given draw buffer. + /// </summary> + /// <param name="index">Index of the draw buffer</param> + /// <returns>Attachment index</returns> + public int UnpackPermutationIndex(int index) + { + return (int)((Packed >> (4 + index * 3)) & 7); + } + } + + /// <summary> + /// 3D, 2D or 1D texture size. + /// </summary> + struct Size3D + { +#pragma warning disable CS0649 + public int Width; + public int Height; + public int Depth; +#pragma warning restore CS0649 + } + + /// <summary> + /// Stencil front test state and masks. + /// </summary> + struct StencilTestState + { +#pragma warning disable CS0649 + public Boolean32 Enable; + public StencilOp FrontSFail; + public StencilOp FrontDpFail; + public StencilOp FrontDpPass; + public CompareOp FrontFunc; + public int FrontFuncRef; + public int FrontFuncMask; + public int FrontMask; +#pragma warning restore CS0649 + } + + /// <summary> + /// Screen Y control register. + /// </summary> + [Flags] + enum YControl + { + NegateY = 1 << 0, + TriangleRastFlip = 1 << 4 + } + + /// <summary> + /// RGB color components packed as 16-bit float values. + /// </summary> + struct RgbHalf + { +#pragma warning disable CS0649 + public uint R; + public uint G; + public uint B; + public uint Padding; +#pragma warning restore CS0649 + + /// <summary> + /// Unpacks the red color component as a 16-bit float value. + /// </summary> + /// <returns>The component value</returns> + public Half UnpackR() + { + ushort value = (ushort)R; + return Unsafe.As<ushort, Half>(ref value); + } + + /// <summary> + /// Unpacks the green color component as a 16-bit float value. + /// </summary> + /// <returns>The component value</returns> + public Half UnpackG() + { + ushort value = (ushort)G; + return Unsafe.As<ushort, Half>(ref value); + } + + /// <summary> + /// Unpacks the blue color component as a 16-bit float value. + /// </summary> + /// <returns>The component value</returns> + public Half UnpackB() + { + ushort value = (ushort)B; + return Unsafe.As<ushort, Half>(ref value); + } + } + + /// <summary> + /// Condition for conditional rendering. + /// </summary> + enum Condition + { + Never, + Always, + ResultNonZero, + Equal, + NotEqual + } + + /// <summary> + /// Texture or sampler pool state. + /// </summary> + struct PoolState + { +#pragma warning disable CS0649 + public GpuVa Address; + public int MaximumId; +#pragma warning restore CS0649 + } + + /// <summary> + /// Stencil back test state. + /// </summary> + struct StencilBackTestState + { +#pragma warning disable CS0649 + public Boolean32 TwoSided; + public StencilOp BackSFail; + public StencilOp BackDpFail; + public StencilOp BackDpPass; + public CompareOp BackFunc; +#pragma warning restore CS0649 + } + + /// <summary> + /// Primitive restart state. + /// </summary> + struct PrimitiveRestartState + { +#pragma warning disable CS0649 + public Boolean32 Enable; + public int Index; +#pragma warning restore CS0649 + } + + /// <summary> + /// GPU index buffer state. + /// This is used on indexed draws. + /// </summary> + struct IndexBufferState + { +#pragma warning disable CS0649 + public GpuVa Address; + public GpuVa EndAddress; + public IndexType Type; + public int First; +#pragma warning restore CS0649 + } + + /// <summary> + /// Face culling and orientation parameters. + /// </summary> + struct FaceState + { +#pragma warning disable CS0649 + public Boolean32 CullEnable; + public FrontFace FrontFace; + public Face CullFace; +#pragma warning restore CS0649 + } + + /// <summary> + /// View volume clip control. + /// </summary> + [Flags] + enum ViewVolumeClipControl + { + ForceDepthRangeZeroToOne = 1 << 0, + DepthClampDisabled = 1 << 11 + } + + /// <summary> + /// Logical operation state. + /// </summary> + struct LogicalOpState + { +#pragma warning disable CS0649 + public Boolean32 Enable; + public LogicalOp LogicalOp; +#pragma warning restore CS0649 + } + + /// <summary> + /// Render target color buffer mask. + /// This defines which color channels are written to the color buffer. + /// </summary> + struct RtColorMask + { +#pragma warning disable CS0649 + public uint Packed; +#pragma warning restore CS0649 + + /// <summary> + /// Unpacks red channel enable. + /// </summary> + /// <returns>True to write the new red channel color, false to keep the old value</returns> + public bool UnpackRed() + { + return (Packed & 0x1) != 0; + } + + /// <summary> + /// Unpacks green channel enable. + /// </summary> + /// <returns>True to write the new green channel color, false to keep the old value</returns> + public bool UnpackGreen() + { + return (Packed & 0x10) != 0; + } + + /// <summary> + /// Unpacks blue channel enable. + /// </summary> + /// <returns>True to write the new blue channel color, false to keep the old value</returns> + public bool UnpackBlue() + { + return (Packed & 0x100) != 0; + } + + /// <summary> + /// Unpacks alpha channel enable. + /// </summary> + /// <returns>True to write the new alpha channel color, false to keep the old value</returns> + public bool UnpackAlpha() + { + return (Packed & 0x1000) != 0; + } + } + + /// <summary> + /// Vertex buffer state. + /// </summary> + struct VertexBufferState + { +#pragma warning disable CS0649 + public uint Control; + public GpuVa Address; + public int Divisor; +#pragma warning restore CS0649 + + /// <summary> + /// Vertex buffer stride, defined as the number of bytes occupied by each vertex in memory. + /// </summary> + /// <returns>Vertex buffer stride</returns> + public int UnpackStride() + { + return (int)(Control & 0xfff); + } + + /// <summary> + /// Vertex buffer enable. + /// </summary> + /// <returns>True if the vertex buffer is enabled, false otherwise</returns> + public bool UnpackEnable() + { + return (Control & (1 << 12)) != 0; + } + } + + /// <summary> + /// Color buffer blending parameters, shared by all color buffers. + /// </summary> + struct BlendStateCommon + { +#pragma warning disable CS0649 + public Boolean32 SeparateAlpha; + public BlendOp ColorOp; + public BlendFactor ColorSrcFactor; + public BlendFactor ColorDstFactor; + public BlendOp AlphaOp; + public BlendFactor AlphaSrcFactor; + public uint Unknown0x1354; + public BlendFactor AlphaDstFactor; +#pragma warning restore CS0649 + } + + /// <summary> + /// Color buffer blending parameters. + /// </summary> + struct BlendState + { +#pragma warning disable CS0649 + public Boolean32 SeparateAlpha; + public BlendOp ColorOp; + public BlendFactor ColorSrcFactor; + public BlendFactor ColorDstFactor; + public BlendOp AlphaOp; + public BlendFactor AlphaSrcFactor; + public BlendFactor AlphaDstFactor; + public uint Padding; +#pragma warning restore CS0649 + } + + /// <summary> + /// Graphics shader stage state. + /// </summary> + struct ShaderState + { +#pragma warning disable CS0649 + public uint Control; + public uint Offset; + public uint Unknown0x8; + public int MaxRegisters; + public ShaderType Type; + public uint Unknown0x14; + public uint Unknown0x18; + public uint Unknown0x1c; + public uint Unknown0x20; + public uint Unknown0x24; + public uint Unknown0x28; + public uint Unknown0x2c; + public uint Unknown0x30; + public uint Unknown0x34; + public uint Unknown0x38; + public uint Unknown0x3c; +#pragma warning restore CS0649 + + /// <summary> + /// Unpacks shader enable information. + /// Must be ignored for vertex shaders, those are always enabled. + /// </summary> + /// <returns>True if the stage is enabled, false otherwise</returns> + public bool UnpackEnable() + { + return (Control & 1) != 0; + } + } + + /// <summary> + /// Uniform buffer state for the uniform buffer currently being modified. + /// </summary> + struct UniformBufferState + { +#pragma warning disable CS0649 + public int Size; + public GpuVa Address; + public int Offset; +#pragma warning restore CS0649 + } + + unsafe struct ThreedClassState : IShadowState + { +#pragma warning disable CS0649 + public uint SetObject; + public int SetObjectClassId => (int)((SetObject >> 0) & 0xFFFF); + public int SetObjectEngineId => (int)((SetObject >> 16) & 0x1F); + public fixed uint Reserved04[63]; + public uint NoOperation; + public uint SetNotifyA; + public int SetNotifyAAddressUpper => (int)((SetNotifyA >> 0) & 0xFF); + public uint SetNotifyB; + public uint Notify; + public NotifyType NotifyType => (NotifyType)(Notify); + public uint WaitForIdle; + public uint LoadMmeInstructionRamPointer; + public uint LoadMmeInstructionRam; + public uint LoadMmeStartAddressRamPointer; + public uint LoadMmeStartAddressRam; + public uint SetMmeShadowRamControl; + public SetMmeShadowRamControlMode SetMmeShadowRamControlMode => (SetMmeShadowRamControlMode)((SetMmeShadowRamControl >> 0) & 0x3); + public fixed uint Reserved128[2]; + public uint SetGlobalRenderEnableA; + public int SetGlobalRenderEnableAOffsetUpper => (int)((SetGlobalRenderEnableA >> 0) & 0xFF); + public uint SetGlobalRenderEnableB; + public uint SetGlobalRenderEnableC; + public int SetGlobalRenderEnableCMode => (int)((SetGlobalRenderEnableC >> 0) & 0x7); + public uint SendGoIdle; + public uint PmTrigger; + public uint PmTriggerWfi; + public fixed uint Reserved148[2]; + public uint SetInstrumentationMethodHeader; + public uint SetInstrumentationMethodData; + public fixed uint Reserved158[10]; + public uint LineLengthIn; + public uint LineCount; + public uint OffsetOutUpper; + public int OffsetOutUpperValue => (int)((OffsetOutUpper >> 0) & 0xFF); + public uint OffsetOut; + public uint PitchOut; + public uint SetDstBlockSize; + public SetDstBlockSizeWidth SetDstBlockSizeWidth => (SetDstBlockSizeWidth)((SetDstBlockSize >> 0) & 0xF); + public SetDstBlockSizeHeight SetDstBlockSizeHeight => (SetDstBlockSizeHeight)((SetDstBlockSize >> 4) & 0xF); + public SetDstBlockSizeDepth SetDstBlockSizeDepth => (SetDstBlockSizeDepth)((SetDstBlockSize >> 8) & 0xF); + public uint SetDstWidth; + public uint SetDstHeight; + public uint SetDstDepth; + public uint SetDstLayer; + public uint SetDstOriginBytesX; + public int SetDstOriginBytesXV => (int)((SetDstOriginBytesX >> 0) & 0xFFFFF); + public uint SetDstOriginSamplesY; + public int SetDstOriginSamplesYV => (int)((SetDstOriginSamplesY >> 0) & 0xFFFF); + public uint LaunchDma; + public LaunchDmaDstMemoryLayout LaunchDmaDstMemoryLayout => (LaunchDmaDstMemoryLayout)((LaunchDma >> 0) & 0x1); + public LaunchDmaCompletionType LaunchDmaCompletionType => (LaunchDmaCompletionType)((LaunchDma >> 4) & 0x3); + public LaunchDmaInterruptType LaunchDmaInterruptType => (LaunchDmaInterruptType)((LaunchDma >> 8) & 0x3); + public LaunchDmaSemaphoreStructSize LaunchDmaSemaphoreStructSize => (LaunchDmaSemaphoreStructSize)((LaunchDma >> 12) & 0x1); + public bool LaunchDmaReductionEnable => (LaunchDma & 0x2) != 0; + public LaunchDmaReductionOp LaunchDmaReductionOp => (LaunchDmaReductionOp)((LaunchDma >> 13) & 0x7); + public LaunchDmaReductionFormat LaunchDmaReductionFormat => (LaunchDmaReductionFormat)((LaunchDma >> 2) & 0x3); + public bool LaunchDmaSysmembarDisable => (LaunchDma & 0x40) != 0; + public uint LoadInlineData; + public fixed uint Reserved1B8[22]; + public Boolean32 EarlyZForce; + public fixed uint Reserved214[45]; + public uint SyncpointAction; + public fixed uint Reserved2CC[10]; + public uint BlendUcodeNormalizedDst; + public fixed uint Reserved2F8[10]; + public TessMode TessMode; + public Array4<float> TessOuterLevel; + public Array2<float> TessInnerLevel; + public fixed uint Reserved33C[16]; + public Boolean32 RasterizeEnable; + public Array4<TfBufferState> TfBufferState; + public fixed uint Reserved400[192]; + public Array4<TfState> TfState; + public fixed uint Reserved740[1]; + public Boolean32 TfEnable; + public fixed uint Reserved748[46]; + public Array8<RtColorState> RtColorState; + public Array16<ViewportTransform> ViewportTransform; + public Array16<ViewportExtents> ViewportExtents; + public fixed uint ReservedD00[29]; + public VertexBufferDrawState VertexBufferDrawState; + public uint DepthMode; + public ClearColors ClearColors; + public float ClearDepthValue; + public fixed uint ReservedD94[3]; + public uint ClearStencilValue; + public fixed uint ReservedDA4[2]; + public PolygonMode PolygonModeFront; + public PolygonMode PolygonModeBack; + public Boolean32 PolygonSmoothEnable; + public fixed uint ReservedDB8[2]; + public DepthBiasState DepthBiasState; + public int PatchVertices; + public BlendUcodeEnable BlendUcodeEnable; + public uint BlendUcodeSize; + public fixed uint ReservedDD8[2]; + public uint TextureBarrier; + public uint WatchdogTimer; + public Boolean32 PrimitiveRestartDrawArrays; + public uint ReservedDEC; + public uint LoadBlendUcodeStart; + public uint LoadBlendUcodeInstruction; + public fixed uint ReservedDF8[2]; + public Array16<ScissorState> ScissorState; + public fixed uint ReservedF00[21]; + public StencilBackMasks StencilBackMasks; + public fixed uint ReservedF60[5]; + public uint InvalidateTextures; + public fixed uint ReservedF78[1]; + public uint TextureBarrierTiled; + public fixed uint ReservedF80[4]; + public Boolean32 RtColorMaskShared; + public fixed uint ReservedF94[19]; + public RtDepthStencilState RtDepthStencilState; + public ScreenScissorState ScreenScissorState; + public fixed uint ReservedFFC[33]; + public int DrawTextureDstX; + public int DrawTextureDstY; + public int DrawTextureDstWidth; + public int DrawTextureDstHeight; + public long DrawTextureDuDx; + public long DrawTextureDvDy; + public int DrawTextureSamplerId; + public int DrawTextureTextureId; + public int DrawTextureSrcX; + public int DrawTextureSrcY; + public fixed uint Reserved10B0[18]; + public uint ClearFlags; + public fixed uint Reserved10FC[25]; + public Array32<VertexAttribState> VertexAttribState; + public fixed uint Reserved11E0[13]; + public uint DrawVertexArrayBeginEndInstanceFirst; + public uint DrawVertexArrayBeginEndInstanceSubsequent; + public RtControl RtControl; + public fixed uint Reserved1220[2]; + public Size3D RtDepthStencilSize; + public SamplerIndex SamplerIndex; + public fixed uint Reserved1238[37]; + public Boolean32 DepthTestEnable; + public fixed uint Reserved12D0[4]; + public Boolean32 AlphaToCoverageDitherEnable; + public Boolean32 BlendIndependent; + public Boolean32 DepthWriteEnable; + public Boolean32 AlphaTestEnable; + public fixed uint Reserved12F0[5]; + public uint VbElementU8; + public uint Reserved1308; + public CompareOp DepthTestFunc; + public float AlphaTestRef; + public CompareOp AlphaTestFunc; + public uint Reserved1318; + public ColorF BlendConstant; + public fixed uint Reserved132C[4]; + public BlendStateCommon BlendStateCommon; + public Boolean32 BlendEnableCommon; + public Array8<Boolean32> BlendEnable; + public StencilTestState StencilTestState; + public fixed uint Reserved13A0[3]; + public YControl YControl; + public float LineWidthSmooth; + public float LineWidthAliased; + public fixed uint Reserved13B8[27]; + public uint InvalidateSamplerCacheNoWfi; + public uint InvalidateTextureHeaderCacheNoWfi; + public fixed uint Reserved142C[2]; + public uint FirstVertex; + public uint FirstInstance; + public fixed uint Reserved143C[17]; + public Array8<RgbHalf> BlendUcodeConstants; + public fixed uint Reserved1500[4]; + public uint ClipDistanceEnable; + public uint Reserved1514; + public float PointSize; + public uint Reserved151C; + public Boolean32 PointSpriteEnable; + public fixed uint Reserved1524[3]; + public uint ResetCounter; + public Boolean32 MultisampleEnable; + public Boolean32 RtDepthStencilEnable; + public uint MultisampleControl; + public fixed uint Reserved1540[4]; + public GpuVa RenderEnableAddress; + public Condition RenderEnableCondition; + public PoolState SamplerPoolState; + public uint Reserved1568; + public float DepthBiasFactor; + public Boolean32 LineSmoothEnable; + public PoolState TexturePoolState; + public fixed uint Reserved1580[5]; + public StencilBackTestState StencilBackTestState; + public fixed uint Reserved15A8[5]; + public float DepthBiasUnits; + public fixed uint Reserved15C0[4]; + public TextureMsaaMode RtMsaaMode; + public fixed uint Reserved15D4[5]; + public uint VbElementU32; + public uint Reserved15EC; + public uint VbElementU16; + public fixed uint Reserved15F4[4]; + public uint PointCoordReplace; + public GpuVa ShaderBaseAddress; + public uint Reserved1610; + public uint DrawEnd; + public uint DrawBegin; + public fixed uint Reserved161C[10]; + public PrimitiveRestartState PrimitiveRestartState; + public fixed uint Reserved164C[95]; + public IndexBufferState IndexBufferState; + public uint IndexBufferCount; + public uint DrawIndexBuffer32BeginEndInstanceFirst; + public uint DrawIndexBuffer16BeginEndInstanceFirst; + public uint DrawIndexBuffer8BeginEndInstanceFirst; + public uint DrawIndexBuffer32BeginEndInstanceSubsequent; + public uint DrawIndexBuffer16BeginEndInstanceSubsequent; + public uint DrawIndexBuffer8BeginEndInstanceSubsequent; + public fixed uint Reserved17FC[32]; + public float DepthBiasClamp; + public Array16<Boolean32> VertexBufferInstanced; + public fixed uint Reserved18C0[20]; + public Boolean32 VertexProgramPointSize; + public uint Reserved1914; + public FaceState FaceState; + public fixed uint Reserved1924[2]; + public uint ViewportTransformEnable; + public fixed uint Reserved1930[3]; + public ViewVolumeClipControl ViewVolumeClipControl; + public fixed uint Reserved1940[2]; + public Boolean32 PrimitiveTypeOverrideEnable; + public fixed uint Reserved194C[9]; + public PrimitiveTypeOverride PrimitiveTypeOverride; + public fixed uint Reserved1974[20]; + public LogicalOpState LogicOpState; + public uint Reserved19CC; + public uint Clear; + public fixed uint Reserved19D4[11]; + public Array8<RtColorMask> RtColorMask; + public fixed uint Reserved1A20[56]; + public GpuVa SemaphoreAddress; + public int SemaphorePayload; + public uint SemaphoreControl; + public fixed uint Reserved1B10[60]; + public Array16<VertexBufferState> VertexBufferState; + public fixed uint Reserved1D00[64]; + public Array8<BlendState> BlendState; + public Array16<GpuVa> VertexBufferEndAddress; + public fixed uint Reserved1F80[32]; + public Array6<ShaderState> ShaderState; + public fixed uint Reserved2180[96]; + public uint SetFalcon00; + public uint SetFalcon01; + public uint SetFalcon02; + public uint SetFalcon03; + public uint SetFalcon04; + public uint SetFalcon05; + public uint SetFalcon06; + public uint SetFalcon07; + public uint SetFalcon08; + public uint SetFalcon09; + public uint SetFalcon10; + public uint SetFalcon11; + public uint SetFalcon12; + public uint SetFalcon13; + public uint SetFalcon14; + public uint SetFalcon15; + public uint SetFalcon16; + public uint SetFalcon17; + public uint SetFalcon18; + public uint SetFalcon19; + public uint SetFalcon20; + public uint SetFalcon21; + public uint SetFalcon22; + public uint SetFalcon23; + public uint SetFalcon24; + public uint SetFalcon25; + public uint SetFalcon26; + public uint SetFalcon27; + public uint SetFalcon28; + public uint SetFalcon29; + public uint SetFalcon30; + public uint SetFalcon31; + public UniformBufferState UniformBufferState; + public Array16<uint> UniformBufferUpdateData; + public fixed uint Reserved23D0[16]; + public uint UniformBufferBindVertex; + public fixed uint Reserved2414[7]; + public uint UniformBufferBindTessControl; + public fixed uint Reserved2434[7]; + public uint UniformBufferBindTessEvaluation; + public fixed uint Reserved2454[7]; + public uint UniformBufferBindGeometry; + public fixed uint Reserved2474[7]; + public uint UniformBufferBindFragment; + public fixed uint Reserved2494[93]; + public uint TextureBufferIndex; + public fixed uint Reserved260C[125]; + public Array4<Array32<uint>> TfVaryingLocations; + public fixed uint Reserved2A00[640]; + public MmeShadowScratch SetMmeShadowScratch; +#pragma warning restore CS0649 + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs new file mode 100644 index 00000000..4ce53e78 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs @@ -0,0 +1,379 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Device; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.Types; +using Ryujinx.Graphics.Gpu.Image; +using Ryujinx.Graphics.Texture; +using Ryujinx.Memory; +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; + +namespace Ryujinx.Graphics.Gpu.Engine.Twod +{ + /// <summary> + /// Represents a 2D engine class. + /// </summary> + class TwodClass : IDeviceState + { + private readonly GpuChannel _channel; + private readonly DeviceState<TwodClassState> _state; + + /// <summary> + /// Creates a new instance of the 2D engine class. + /// </summary> + /// <param name="channel">The channel that will make use of the engine</param> + public TwodClass(GpuChannel channel) + { + _channel = channel; + _state = new DeviceState<TwodClassState>(new Dictionary<string, RwCallback> + { + { nameof(TwodClassState.PixelsFromMemorySrcY0Int), new RwCallback(PixelsFromMemorySrcY0Int, null) } + }); + } + + /// <summary> + /// Reads data from the class registers. + /// </summary> + /// <param name="offset">Register byte offset</param> + /// <returns>Data at the specified offset</returns> + public int Read(int offset) => _state.Read(offset); + + /// <summary> + /// Writes data to the class registers. + /// </summary> + /// <param name="offset">Register byte offset</param> + /// <param name="data">Data to be written</param> + public void Write(int offset, int data) => _state.Write(offset, data); + + /// <summary> + /// Determines if data is compatible between the source and destination texture. + /// The two textures must have the same size, layout, and bytes per pixel. + /// </summary> + /// <param name="lhs">Info for the first texture</param> + /// <param name="rhs">Info for the second texture</param> + /// <param name="lhsFormat">Format of the first texture</param> + /// <param name="rhsFormat">Format of the second texture</param> + /// <returns>True if the data is compatible, false otherwise</returns> + private bool IsDataCompatible(TwodTexture lhs, TwodTexture rhs, FormatInfo lhsFormat, FormatInfo rhsFormat) + { + if (lhsFormat.BytesPerPixel != rhsFormat.BytesPerPixel || + lhs.Height != rhs.Height || + lhs.Depth != rhs.Depth || + lhs.LinearLayout != rhs.LinearLayout || + lhs.MemoryLayout.Packed != rhs.MemoryLayout.Packed) + { + return false; + } + + if (lhs.LinearLayout) + { + return lhs.Stride == rhs.Stride; + } + else + { + return lhs.Width == rhs.Width; + } + } + + /// <summary> + /// Determine if the given region covers the full texture, also considering width alignment. + /// </summary> + /// <param name="texture">The texture to check</param> + /// <param name="formatInfo"></param> + /// <param name="x1">Region start x</param> + /// <param name="y1">Region start y</param> + /// <param name="x2">Region end x</param> + /// <param name="y2">Region end y</param> + /// <returns>True if the region covers the full texture, false otherwise</returns> + private bool IsCopyRegionComplete(TwodTexture texture, FormatInfo formatInfo, int x1, int y1, int x2, int y2) + { + if (x1 != 0 || y1 != 0 || y2 != texture.Height) + { + return false; + } + + int width; + int widthAlignment; + + if (texture.LinearLayout) + { + widthAlignment = 1; + width = texture.Stride / formatInfo.BytesPerPixel; + } + else + { + widthAlignment = Constants.GobAlignment / formatInfo.BytesPerPixel; + width = texture.Width; + } + + return width == BitUtils.AlignUp(x2, widthAlignment); + } + + /// <summary> + /// Performs a full data copy between two textures, reading and writing guest memory directly. + /// The textures must have a matching layout, size, and bytes per pixel. + /// </summary> + /// <param name="src">The source texture</param> + /// <param name="dst">The destination texture</param> + /// <param name="w">Copy width</param> + /// <param name="h">Copy height</param> + /// <param name="bpp">Bytes per pixel</param> + private void UnscaledFullCopy(TwodTexture src, TwodTexture dst, int w, int h, int bpp) + { + var srcCalculator = new OffsetCalculator( + w, + h, + src.Stride, + src.LinearLayout, + src.MemoryLayout.UnpackGobBlocksInY(), + src.MemoryLayout.UnpackGobBlocksInZ(), + bpp); + + (int _, int srcSize) = srcCalculator.GetRectangleRange(0, 0, w, h); + + var memoryManager = _channel.MemoryManager; + + ulong srcGpuVa = src.Address.Pack(); + ulong dstGpuVa = dst.Address.Pack(); + + ReadOnlySpan<byte> srcSpan = memoryManager.GetSpan(srcGpuVa, srcSize, true); + + int width; + int height = src.Height; + if (src.LinearLayout) + { + width = src.Stride / bpp; + } + else + { + width = src.Width; + } + + // If the copy is not equal to the width and height of the texture, we will need to copy partially. + // It's worth noting that it has already been established that the src and dst are the same size. + + if (w == width && h == height) + { + memoryManager.Write(dstGpuVa, srcSpan); + } + else + { + using WritableRegion dstRegion = memoryManager.GetWritableRegion(dstGpuVa, srcSize, true); + Span<byte> dstSpan = dstRegion.Memory.Span; + + if (src.LinearLayout) + { + int stride = src.Stride; + int offset = 0; + int lineSize = width * bpp; + + for (int y = 0; y < height; y++) + { + srcSpan.Slice(offset, lineSize).CopyTo(dstSpan.Slice(offset)); + + offset += stride; + } + } + else + { + // Copy with the block linear layout in mind. + // Recreate the offset calculate with bpp 1 for copy. + + int stride = w * bpp; + + srcCalculator = new OffsetCalculator( + stride, + h, + 0, + false, + src.MemoryLayout.UnpackGobBlocksInY(), + src.MemoryLayout.UnpackGobBlocksInZ(), + 1); + + int strideTrunc = BitUtils.AlignDown(stride, 16); + + ReadOnlySpan<Vector128<byte>> srcVec = MemoryMarshal.Cast<byte, Vector128<byte>>(srcSpan); + Span<Vector128<byte>> dstVec = MemoryMarshal.Cast<byte, Vector128<byte>>(dstSpan); + + for (int y = 0; y < h; y++) + { + int x = 0; + + srcCalculator.SetY(y); + + for (; x < strideTrunc; x += 16) + { + int offset = srcCalculator.GetOffset(x) >> 4; + + dstVec[offset] = srcVec[offset]; + } + + for (; x < stride; x++) + { + int offset = srcCalculator.GetOffset(x); + + dstSpan[offset] = srcSpan[offset]; + } + } + } + } + } + + /// <summary> + /// Performs the blit operation, triggered by the register write. + /// </summary> + /// <param name="argument">Method call argument</param> + private void PixelsFromMemorySrcY0Int(int argument) + { + var memoryManager = _channel.MemoryManager; + + var dstCopyTexture = Unsafe.As<uint, TwodTexture>(ref _state.State.SetDstFormat); + var srcCopyTexture = Unsafe.As<uint, TwodTexture>(ref _state.State.SetSrcFormat); + + long srcX = ((long)_state.State.SetPixelsFromMemorySrcX0Int << 32) | (long)(ulong)_state.State.SetPixelsFromMemorySrcX0Frac; + long srcY = ((long)_state.State.PixelsFromMemorySrcY0Int << 32) | (long)(ulong)_state.State.SetPixelsFromMemorySrcY0Frac; + + long duDx = ((long)_state.State.SetPixelsFromMemoryDuDxInt << 32) | (long)(ulong)_state.State.SetPixelsFromMemoryDuDxFrac; + long dvDy = ((long)_state.State.SetPixelsFromMemoryDvDyInt << 32) | (long)(ulong)_state.State.SetPixelsFromMemoryDvDyFrac; + + bool originCorner = _state.State.SetPixelsFromMemorySampleModeOrigin == SetPixelsFromMemorySampleModeOrigin.Corner; + + if (originCorner) + { + // If the origin is corner, it is assumed that the guest API + // is manually centering the origin by adding a offset to the + // source region X/Y coordinates. + // Here we attempt to remove such offset to ensure we have the correct region. + // The offset is calculated as FactorXY / 2.0, where FactorXY = SrcXY / DstXY, + // so we do the same here by dividing the fixed point value by 2, while + // throwing away the fractional part to avoid rounding errors. + srcX -= (duDx >> 33) << 32; + srcY -= (dvDy >> 33) << 32; + } + + int srcX1 = (int)(srcX >> 32); + int srcY1 = (int)(srcY >> 32); + + int srcX2 = srcX1 + (int)((duDx * _state.State.SetPixelsFromMemoryDstWidth + uint.MaxValue) >> 32); + int srcY2 = srcY1 + (int)((dvDy * _state.State.SetPixelsFromMemoryDstHeight + uint.MaxValue) >> 32); + + int dstX1 = (int)_state.State.SetPixelsFromMemoryDstX0; + int dstY1 = (int)_state.State.SetPixelsFromMemoryDstY0; + + int dstX2 = dstX1 + (int)_state.State.SetPixelsFromMemoryDstWidth; + int dstY2 = dstY1 + (int)_state.State.SetPixelsFromMemoryDstHeight; + + // The source and destination textures should at least be as big as the region being requested. + // The hints will only resize within alignment constraints, so out of bound copies won't resize in most cases. + var srcHint = new Size(srcX2, srcY2, 1); + var dstHint = new Size(dstX2, dstY2, 1); + + var srcCopyTextureFormat = srcCopyTexture.Format.Convert(); + + int srcWidthAligned = srcCopyTexture.Stride / srcCopyTextureFormat.BytesPerPixel; + + ulong offset = 0; + + // For an out of bounds copy, we must ensure that the copy wraps to the next line, + // so for a copy from a 64x64 texture, in the region [32, 96[, there are 32 pixels that are + // outside the bounds of the texture. We fill the destination with the first 32 pixels + // of the next line on the source texture. + // This can be done by simply adding an offset to the texture address, so that the initial + // gap is skipped and the copy is inside bounds again. + // This is required by the proprietary guest OpenGL driver. + if (srcCopyTexture.LinearLayout && srcCopyTexture.Width == srcX2 && srcX2 > srcWidthAligned && srcX1 > 0) + { + offset = (ulong)(srcX1 * srcCopyTextureFormat.BytesPerPixel); + srcCopyTexture.Width -= srcX1; + srcX2 -= srcX1; + srcX1 = 0; + } + + FormatInfo dstCopyTextureFormat = dstCopyTexture.Format.Convert(); + + bool canDirectCopy = GraphicsConfig.Fast2DCopy && + srcX2 == dstX2 && srcY2 == dstY2 && + IsDataCompatible(srcCopyTexture, dstCopyTexture, srcCopyTextureFormat, dstCopyTextureFormat) && + IsCopyRegionComplete(srcCopyTexture, srcCopyTextureFormat, srcX1, srcY1, srcX2, srcY2) && + IsCopyRegionComplete(dstCopyTexture, dstCopyTextureFormat, dstX1, dstY1, dstX2, dstY2); + + var srcTexture = memoryManager.Physical.TextureCache.FindOrCreateTexture( + memoryManager, + srcCopyTexture, + offset, + srcCopyTextureFormat, + !canDirectCopy, + false, + srcHint); + + if (srcTexture == null) + { + if (canDirectCopy) + { + // Directly copy the data on CPU. + UnscaledFullCopy(srcCopyTexture, dstCopyTexture, srcX2, srcY2, srcCopyTextureFormat.BytesPerPixel); + } + + return; + } + + memoryManager.Physical.TextureCache.Lift(srcTexture); + + // When the source texture that was found has a depth format, + // we must enforce the target texture also has a depth format, + // as copies between depth and color formats are not allowed. + + if (srcTexture.Format.IsDepthOrStencil()) + { + dstCopyTextureFormat = srcTexture.Info.FormatInfo; + } + else + { + dstCopyTextureFormat = dstCopyTexture.Format.Convert(); + } + + var dstTexture = memoryManager.Physical.TextureCache.FindOrCreateTexture( + memoryManager, + dstCopyTexture, + 0, + dstCopyTextureFormat, + true, + srcTexture.ScaleMode == TextureScaleMode.Scaled, + dstHint); + + if (dstTexture == null) + { + return; + } + + if (srcTexture.Info.Samples > 1 || dstTexture.Info.Samples > 1) + { + srcTexture.PropagateScale(dstTexture); + } + + float scale = srcTexture.ScaleFactor; + float dstScale = dstTexture.ScaleFactor; + + Extents2D srcRegion = new Extents2D( + (int)Math.Ceiling(scale * (srcX1 / srcTexture.Info.SamplesInX)), + (int)Math.Ceiling(scale * (srcY1 / srcTexture.Info.SamplesInY)), + (int)Math.Ceiling(scale * (srcX2 / srcTexture.Info.SamplesInX)), + (int)Math.Ceiling(scale * (srcY2 / srcTexture.Info.SamplesInY))); + + Extents2D dstRegion = new Extents2D( + (int)Math.Ceiling(dstScale * (dstX1 / dstTexture.Info.SamplesInX)), + (int)Math.Ceiling(dstScale * (dstY1 / dstTexture.Info.SamplesInY)), + (int)Math.Ceiling(dstScale * (dstX2 / dstTexture.Info.SamplesInX)), + (int)Math.Ceiling(dstScale * (dstY2 / dstTexture.Info.SamplesInY))); + + bool linearFilter = _state.State.SetPixelsFromMemorySampleModeFilter == SetPixelsFromMemorySampleModeFilter.Bilinear; + + srcTexture.HostTexture.CopyTo(dstTexture.HostTexture, srcRegion, dstRegion, linearFilter); + + dstTexture.SignalModified(); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClassState.cs b/src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClassState.cs new file mode 100644 index 00000000..46fddb04 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClassState.cs @@ -0,0 +1,816 @@ +// This file was auto-generated from NVIDIA official Maxwell definitions. + +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Gpu.Engine.Twod +{ + /// <summary> + /// Notify type. + /// </summary> + enum NotifyType + { + WriteOnly = 0, + WriteThenAwaken = 1, + } + + /// <summary> + /// Format of the destination texture. + /// </summary> + enum SetDstFormatV + { + A8r8g8b8 = 207, + A8rl8gl8bl8 = 208, + A2r10g10b10 = 223, + A8b8g8r8 = 213, + A8bl8gl8rl8 = 214, + A2b10g10r10 = 209, + X8r8g8b8 = 230, + X8rl8gl8bl8 = 231, + X8b8g8r8 = 249, + X8bl8gl8rl8 = 250, + R5g6b5 = 232, + A1r5g5b5 = 233, + X1r5g5b5 = 248, + Y8 = 243, + Y16 = 238, + Y32 = 255, + Z1r5g5b5 = 251, + O1r5g5b5 = 252, + Z8r8g8b8 = 253, + O8r8g8b8 = 254, + Y18x8 = 28, + Rf16 = 242, + Rf32 = 229, + Rf32Gf32 = 203, + Rf16Gf16Bf16Af16 = 202, + Rf16Gf16Bf16X16 = 206, + Rf32Gf32Bf32Af32 = 192, + Rf32Gf32Bf32X32 = 195, + R16G16B16A16 = 198, + Rn16Gn16Bn16An16 = 199, + Bf10gf11rf11 = 224, + An8bn8gn8rn8 = 215, + Rf16Gf16 = 222, + R16G16 = 218, + Rn16Gn16 = 219, + G8r8 = 234, + Gn8rn8 = 235, + Rn16 = 239, + Rn8 = 244, + A8 = 247, + } + + /// <summary> + /// Memory layout of the destination texture. + /// </summary> + enum SetDstMemoryLayoutV + { + Blocklinear = 0, + Pitch = 1, + } + + /// <summary> + /// Height in GOBs of the destination texture. + /// </summary> + enum SetDstBlockSizeHeight + { + OneGob = 0, + TwoGobs = 1, + FourGobs = 2, + EightGobs = 3, + SixteenGobs = 4, + ThirtytwoGobs = 5, + } + + /// <summary> + /// Depth in GOBs of the destination texture. + /// </summary> + enum SetDstBlockSizeDepth + { + OneGob = 0, + TwoGobs = 1, + FourGobs = 2, + EightGobs = 3, + SixteenGobs = 4, + ThirtytwoGobs = 5, + } + + /// <summary> + /// Format of the source texture. + /// </summary> + enum SetSrcFormatV + { + A8r8g8b8 = 207, + A8rl8gl8bl8 = 208, + A2r10g10b10 = 223, + A8b8g8r8 = 213, + A8bl8gl8rl8 = 214, + A2b10g10r10 = 209, + X8r8g8b8 = 230, + X8rl8gl8bl8 = 231, + X8b8g8r8 = 249, + X8bl8gl8rl8 = 250, + R5g6b5 = 232, + A1r5g5b5 = 233, + X1r5g5b5 = 248, + Y8 = 243, + Ay8 = 29, + Y16 = 238, + Y32 = 255, + Z1r5g5b5 = 251, + O1r5g5b5 = 252, + Z8r8g8b8 = 253, + O8r8g8b8 = 254, + Y18x8 = 28, + Rf16 = 242, + Rf32 = 229, + Rf32Gf32 = 203, + Rf16Gf16Bf16Af16 = 202, + Rf16Gf16Bf16X16 = 206, + Rf32Gf32Bf32Af32 = 192, + Rf32Gf32Bf32X32 = 195, + R16G16B16A16 = 198, + Rn16Gn16Bn16An16 = 199, + Bf10gf11rf11 = 224, + An8bn8gn8rn8 = 215, + Rf16Gf16 = 222, + R16G16 = 218, + Rn16Gn16 = 219, + G8r8 = 234, + Gn8rn8 = 235, + Rn16 = 239, + Rn8 = 244, + A8 = 247, + } + + /// <summary> + /// Memory layout of the source texture. + /// </summary> + enum SetSrcMemoryLayoutV + { + Blocklinear = 0, + Pitch = 1, + } + + /// <summary> + /// Height in GOBs of the source texture. + /// </summary> + enum SetSrcBlockSizeHeight + { + OneGob = 0, + TwoGobs = 1, + FourGobs = 2, + EightGobs = 3, + SixteenGobs = 4, + ThirtytwoGobs = 5, + } + + /// <summary> + /// Depth in GOBs of the source texture. + /// </summary> + enum SetSrcBlockSizeDepth + { + OneGob = 0, + TwoGobs = 1, + FourGobs = 2, + EightGobs = 3, + SixteenGobs = 4, + ThirtytwoGobs = 5, + } + + /// <summary> + /// Texture data caches to invalidate. + /// </summary> + enum TwodInvalidateTextureDataCacheV + { + L1Only = 0, + L2Only = 1, + L1AndL2 = 2, + } + + /// <summary> + /// Sector promotion parameters. + /// </summary> + enum SetPixelsFromMemorySectorPromotionV + { + NoPromotion = 0, + PromoteTo2V = 1, + PromoteTo2H = 2, + PromoteTo4 = 3, + } + + /// <summary> + /// Number of processing clusters. + /// </summary> + enum SetNumProcessingClustersV + { + All = 0, + One = 1, + } + + /// <summary> + /// Color key format. + /// </summary> + enum SetColorKeyFormatV + { + A16r5g6b5 = 0, + A1r5g5b5 = 1, + A8r8g8b8 = 2, + A2r10g10b10 = 3, + Y8 = 4, + Y16 = 5, + Y32 = 6, + } + + /// <summary> + /// Color blit operation. + /// </summary> + enum SetOperationV + { + SrccopyAnd = 0, + RopAnd = 1, + BlendAnd = 2, + Srccopy = 3, + Rop = 4, + SrccopyPremult = 5, + BlendPremult = 6, + } + + /// <summary> + /// Texture pattern selection. + /// </summary> + enum SetPatternSelectV + { + Monochrome8x8 = 0, + Monochrome64x1 = 1, + Monochrome1x64 = 2, + Color = 3, + } + + /// <summary> + /// Render enable override mode. + /// </summary> + enum SetRenderEnableOverrideMode + { + UseRenderEnable = 0, + AlwaysRender = 1, + NeverRender = 2, + } + + /// <summary> + /// Pixels from memory horizontal direction. + /// </summary> + enum SetPixelsFromMemoryDirectionHorizontal + { + HwDecides = 0, + LeftToRight = 1, + RightToLeft = 2, + } + + /// <summary> + /// Pixels from memory vertical direction. + /// </summary> + enum SetPixelsFromMemoryDirectionVertical + { + HwDecides = 0, + TopToBottom = 1, + BottomToTop = 2, + } + + /// <summary> + /// Color format of the monochrome pattern. + /// </summary> + enum SetMonochromePatternColorFormatV + { + A8x8r5g6b5 = 0, + A1r5g5b5 = 1, + A8r8g8b8 = 2, + A8y8 = 3, + A8x8y16 = 4, + Y32 = 5, + ByteExpand = 6, + } + + /// <summary> + /// Format of the monochrome pattern. + /// </summary> + enum SetMonochromePatternFormatV + { + Cga6M1 = 0, + LeM1 = 1, + } + + /// <summary> + /// DMA semaphore reduction operation. + /// </summary> + enum MmeDmaReductionReductionOp + { + RedAdd = 0, + RedMin = 1, + RedMax = 2, + RedInc = 3, + RedDec = 4, + RedAnd = 5, + RedOr = 6, + RedXor = 7, + } + + /// <summary> + /// DMA semaphore reduction format. + /// </summary> + enum MmeDmaReductionReductionFormat + { + Unsigned = 0, + Signed = 1, + } + + /// <summary> + /// DMA semaphore reduction size. + /// </summary> + enum MmeDmaReductionReductionSize + { + FourBytes = 0, + EightBytes = 1, + } + + /// <summary> + /// Data FIFO size. + /// </summary> + enum SetMmeDataFifoConfigFifoSize + { + Size0kb = 0, + Size4kb = 1, + Size8kb = 2, + Size12kb = 3, + Size16kb = 4, + } + + /// <summary> + /// Render solid primitive mode. + /// </summary> + enum RenderSolidPrimModeV + { + Points = 0, + Lines = 1, + Polyline = 2, + Triangles = 3, + Rects = 4, + } + + /// <summary> + /// Render solid primitive color format. + /// </summary> + enum SetRenderSolidPrimColorFormatV + { + Rf32Gf32Bf32Af32 = 192, + Rf16Gf16Bf16Af16 = 202, + Rf32Gf32 = 203, + A8r8g8b8 = 207, + A2r10g10b10 = 223, + A8b8g8r8 = 213, + A2b10g10r10 = 209, + X8r8g8b8 = 230, + X8b8g8r8 = 249, + R5g6b5 = 232, + A1r5g5b5 = 233, + X1r5g5b5 = 248, + Y8 = 243, + Y16 = 238, + Y32 = 255, + Z1r5g5b5 = 251, + O1r5g5b5 = 252, + Z8r8g8b8 = 253, + O8r8g8b8 = 254, + } + + /// <summary> + /// Pixels from CPU data type. + /// </summary> + enum SetPixelsFromCpuDataTypeV + { + Color = 0, + Index = 1, + } + + /// <summary> + /// Pixels from CPU color format. + /// </summary> + enum SetPixelsFromCpuColorFormatV + { + A8r8g8b8 = 207, + A2r10g10b10 = 223, + A8b8g8r8 = 213, + A2b10g10r10 = 209, + X8r8g8b8 = 230, + X8b8g8r8 = 249, + R5g6b5 = 232, + A1r5g5b5 = 233, + X1r5g5b5 = 248, + Y8 = 243, + Y16 = 238, + Y32 = 255, + Z1r5g5b5 = 251, + O1r5g5b5 = 252, + Z8r8g8b8 = 253, + O8r8g8b8 = 254, + } + + /// <summary> + /// Pixels from CPU palette index format. + /// </summary> + enum SetPixelsFromCpuIndexFormatV + { + I1 = 0, + I4 = 1, + I8 = 2, + } + + /// <summary> + /// Pixels from CPU monochrome format. + /// </summary> + enum SetPixelsFromCpuMonoFormatV + { + Cga6M1 = 0, + LeM1 = 1, + } + + /// <summary> + /// Pixels from CPU wrap mode. + /// </summary> + enum SetPixelsFromCpuWrapV + { + WrapPixel = 0, + WrapByte = 1, + WrapDword = 2, + } + + /// <summary> + /// Pixels from CPU monochrome opacity. + /// </summary> + enum SetPixelsFromCpuMonoOpacityV + { + Transparent = 0, + Opaque = 1, + } + + /// <summary> + /// Pixels from memory block shape. + /// </summary> + enum SetPixelsFromMemoryBlockShapeV + { + Auto = 0, + Shape8x8 = 1, + Shape16x4 = 2, + } + + /// <summary> + /// Pixels from memory origin. + /// </summary> + enum SetPixelsFromMemorySampleModeOrigin + { + Center = 0, + Corner = 1, + } + + /// <summary> + /// Pixels from memory filter mode. + /// </summary> + enum SetPixelsFromMemorySampleModeFilter + { + Point = 0, + Bilinear = 1, + } + + /// <summary> + /// Render solid primitive point coordinates. + /// </summary> + struct RenderSolidPrimPoint + { +#pragma warning disable CS0649 + public uint SetX; + public uint Y; +#pragma warning restore CS0649 + } + + /// <summary> + /// 2D class state. + /// </summary> + unsafe struct TwodClassState : IShadowState + { +#pragma warning disable CS0649 + public uint SetObject; + public int SetObjectClassId => (int)((SetObject >> 0) & 0xFFFF); + public int SetObjectEngineId => (int)((SetObject >> 16) & 0x1F); + public fixed uint Reserved04[63]; + public uint NoOperation; + public uint SetNotifyA; + public int SetNotifyAAddressUpper => (int)((SetNotifyA >> 0) & 0x1FFFFFF); + public uint SetNotifyB; + public uint Notify; + public NotifyType NotifyType => (NotifyType)(Notify); + public uint WaitForIdle; + public uint LoadMmeInstructionRamPointer; + public uint LoadMmeInstructionRam; + public uint LoadMmeStartAddressRamPointer; + public uint LoadMmeStartAddressRam; + public uint SetMmeShadowRamControl; + public SetMmeShadowRamControlMode SetMmeShadowRamControlMode => (SetMmeShadowRamControlMode)((SetMmeShadowRamControl >> 0) & 0x3); + public fixed uint Reserved128[2]; + public uint SetGlobalRenderEnableA; + public int SetGlobalRenderEnableAOffsetUpper => (int)((SetGlobalRenderEnableA >> 0) & 0xFF); + public uint SetGlobalRenderEnableB; + public uint SetGlobalRenderEnableC; + public int SetGlobalRenderEnableCMode => (int)((SetGlobalRenderEnableC >> 0) & 0x7); + public uint SendGoIdle; + public uint PmTrigger; + public fixed uint Reserved144[3]; + public uint SetInstrumentationMethodHeader; + public uint SetInstrumentationMethodData; + public fixed uint Reserved158[37]; + public uint SetMmeSwitchState; + public bool SetMmeSwitchStateValid => (SetMmeSwitchState & 0x1) != 0; + public int SetMmeSwitchStateSaveMacro => (int)((SetMmeSwitchState >> 4) & 0xFF); + public int SetMmeSwitchStateRestoreMacro => (int)((SetMmeSwitchState >> 12) & 0xFF); + public fixed uint Reserved1F0[4]; + public uint SetDstFormat; + public SetDstFormatV SetDstFormatV => (SetDstFormatV)((SetDstFormat >> 0) & 0xFF); + public uint SetDstMemoryLayout; + public SetDstMemoryLayoutV SetDstMemoryLayoutV => (SetDstMemoryLayoutV)((SetDstMemoryLayout >> 0) & 0x1); + public uint SetDstBlockSize; + public SetDstBlockSizeHeight SetDstBlockSizeHeight => (SetDstBlockSizeHeight)((SetDstBlockSize >> 4) & 0x7); + public SetDstBlockSizeDepth SetDstBlockSizeDepth => (SetDstBlockSizeDepth)((SetDstBlockSize >> 8) & 0x7); + public uint SetDstDepth; + public uint SetDstLayer; + public uint SetDstPitch; + public uint SetDstWidth; + public uint SetDstHeight; + public uint SetDstOffsetUpper; + public int SetDstOffsetUpperV => (int)((SetDstOffsetUpper >> 0) & 0xFF); + public uint SetDstOffsetLower; + public uint FlushAndInvalidateRopMiniCache; + public bool FlushAndInvalidateRopMiniCacheV => (FlushAndInvalidateRopMiniCache & 0x1) != 0; + public uint SetSpareNoop06; + public uint SetSrcFormat; + public SetSrcFormatV SetSrcFormatV => (SetSrcFormatV)((SetSrcFormat >> 0) & 0xFF); + public uint SetSrcMemoryLayout; + public SetSrcMemoryLayoutV SetSrcMemoryLayoutV => (SetSrcMemoryLayoutV)((SetSrcMemoryLayout >> 0) & 0x1); + public uint SetSrcBlockSize; + public SetSrcBlockSizeHeight SetSrcBlockSizeHeight => (SetSrcBlockSizeHeight)((SetSrcBlockSize >> 4) & 0x7); + public SetSrcBlockSizeDepth SetSrcBlockSizeDepth => (SetSrcBlockSizeDepth)((SetSrcBlockSize >> 8) & 0x7); + public uint SetSrcDepth; + public uint TwodInvalidateTextureDataCache; + public TwodInvalidateTextureDataCacheV TwodInvalidateTextureDataCacheV => (TwodInvalidateTextureDataCacheV)((TwodInvalidateTextureDataCache >> 0) & 0x3); + public uint SetSrcPitch; + public uint SetSrcWidth; + public uint SetSrcHeight; + public uint SetSrcOffsetUpper; + public int SetSrcOffsetUpperV => (int)((SetSrcOffsetUpper >> 0) & 0xFF); + public uint SetSrcOffsetLower; + public uint SetPixelsFromMemorySectorPromotion; + public SetPixelsFromMemorySectorPromotionV SetPixelsFromMemorySectorPromotionV => (SetPixelsFromMemorySectorPromotionV)((SetPixelsFromMemorySectorPromotion >> 0) & 0x3); + public uint SetSpareNoop12; + public uint SetNumProcessingClusters; + public SetNumProcessingClustersV SetNumProcessingClustersV => (SetNumProcessingClustersV)((SetNumProcessingClusters >> 0) & 0x1); + public uint SetRenderEnableA; + public int SetRenderEnableAOffsetUpper => (int)((SetRenderEnableA >> 0) & 0xFF); + public uint SetRenderEnableB; + public uint SetRenderEnableC; + public int SetRenderEnableCMode => (int)((SetRenderEnableC >> 0) & 0x7); + public uint SetSpareNoop08; + public uint SetSpareNoop01; + public uint SetSpareNoop11; + public uint SetSpareNoop07; + public uint SetClipX0; + public uint SetClipY0; + public uint SetClipWidth; + public uint SetClipHeight; + public uint SetClipEnable; + public bool SetClipEnableV => (SetClipEnable & 0x1) != 0; + public uint SetColorKeyFormat; + public SetColorKeyFormatV SetColorKeyFormatV => (SetColorKeyFormatV)((SetColorKeyFormat >> 0) & 0x7); + public uint SetColorKey; + public uint SetColorKeyEnable; + public bool SetColorKeyEnableV => (SetColorKeyEnable & 0x1) != 0; + public uint SetRop; + public int SetRopV => (int)((SetRop >> 0) & 0xFF); + public uint SetBeta1; + public uint SetBeta4; + public int SetBeta4B => (int)((SetBeta4 >> 0) & 0xFF); + public int SetBeta4G => (int)((SetBeta4 >> 8) & 0xFF); + public int SetBeta4R => (int)((SetBeta4 >> 16) & 0xFF); + public int SetBeta4A => (int)((SetBeta4 >> 24) & 0xFF); + public uint SetOperation; + public SetOperationV SetOperationV => (SetOperationV)((SetOperation >> 0) & 0x7); + public uint SetPatternOffset; + public int SetPatternOffsetX => (int)((SetPatternOffset >> 0) & 0x3F); + public int SetPatternOffsetY => (int)((SetPatternOffset >> 8) & 0x3F); + public uint SetPatternSelect; + public SetPatternSelectV SetPatternSelectV => (SetPatternSelectV)((SetPatternSelect >> 0) & 0x3); + public uint SetDstColorRenderToZetaSurface; + public bool SetDstColorRenderToZetaSurfaceV => (SetDstColorRenderToZetaSurface & 0x1) != 0; + public uint SetSpareNoop04; + public uint SetSpareNoop15; + public uint SetSpareNoop13; + public uint SetSpareNoop03; + public uint SetSpareNoop14; + public uint SetSpareNoop02; + public uint SetCompression; + public bool SetCompressionEnable => (SetCompression & 0x1) != 0; + public uint SetSpareNoop09; + public uint SetRenderEnableOverride; + public SetRenderEnableOverrideMode SetRenderEnableOverrideMode => (SetRenderEnableOverrideMode)((SetRenderEnableOverride >> 0) & 0x3); + public uint SetPixelsFromMemoryDirection; + public SetPixelsFromMemoryDirectionHorizontal SetPixelsFromMemoryDirectionHorizontal => (SetPixelsFromMemoryDirectionHorizontal)((SetPixelsFromMemoryDirection >> 0) & 0x3); + public SetPixelsFromMemoryDirectionVertical SetPixelsFromMemoryDirectionVertical => (SetPixelsFromMemoryDirectionVertical)((SetPixelsFromMemoryDirection >> 4) & 0x3); + public uint SetSpareNoop10; + public uint SetMonochromePatternColorFormat; + public SetMonochromePatternColorFormatV SetMonochromePatternColorFormatV => (SetMonochromePatternColorFormatV)((SetMonochromePatternColorFormat >> 0) & 0x7); + public uint SetMonochromePatternFormat; + public SetMonochromePatternFormatV SetMonochromePatternFormatV => (SetMonochromePatternFormatV)((SetMonochromePatternFormat >> 0) & 0x1); + public uint SetMonochromePatternColor0; + public uint SetMonochromePatternColor1; + public uint SetMonochromePattern0; + public uint SetMonochromePattern1; + public Array64<uint> ColorPatternX8r8g8b8; + public int ColorPatternX8r8g8b8B0(int i) => (int)((ColorPatternX8r8g8b8[i] >> 0) & 0xFF); + public int ColorPatternX8r8g8b8G0(int i) => (int)((ColorPatternX8r8g8b8[i] >> 8) & 0xFF); + public int ColorPatternX8r8g8b8R0(int i) => (int)((ColorPatternX8r8g8b8[i] >> 16) & 0xFF); + public int ColorPatternX8r8g8b8Ignore0(int i) => (int)((ColorPatternX8r8g8b8[i] >> 24) & 0xFF); + public Array32<uint> ColorPatternR5g6b5; + public int ColorPatternR5g6b5B0(int i) => (int)((ColorPatternR5g6b5[i] >> 0) & 0x1F); + public int ColorPatternR5g6b5G0(int i) => (int)((ColorPatternR5g6b5[i] >> 5) & 0x3F); + public int ColorPatternR5g6b5R0(int i) => (int)((ColorPatternR5g6b5[i] >> 11) & 0x1F); + public int ColorPatternR5g6b5B1(int i) => (int)((ColorPatternR5g6b5[i] >> 16) & 0x1F); + public int ColorPatternR5g6b5G1(int i) => (int)((ColorPatternR5g6b5[i] >> 21) & 0x3F); + public int ColorPatternR5g6b5R1(int i) => (int)((ColorPatternR5g6b5[i] >> 27) & 0x1F); + public Array32<uint> ColorPatternX1r5g5b5; + public int ColorPatternX1r5g5b5B0(int i) => (int)((ColorPatternX1r5g5b5[i] >> 0) & 0x1F); + public int ColorPatternX1r5g5b5G0(int i) => (int)((ColorPatternX1r5g5b5[i] >> 5) & 0x1F); + public int ColorPatternX1r5g5b5R0(int i) => (int)((ColorPatternX1r5g5b5[i] >> 10) & 0x1F); + public bool ColorPatternX1r5g5b5Ignore0(int i) => (ColorPatternX1r5g5b5[i] & 0x8000) != 0; + public int ColorPatternX1r5g5b5B1(int i) => (int)((ColorPatternX1r5g5b5[i] >> 16) & 0x1F); + public int ColorPatternX1r5g5b5G1(int i) => (int)((ColorPatternX1r5g5b5[i] >> 21) & 0x1F); + public int ColorPatternX1r5g5b5R1(int i) => (int)((ColorPatternX1r5g5b5[i] >> 26) & 0x1F); + public bool ColorPatternX1r5g5b5Ignore1(int i) => (ColorPatternX1r5g5b5[i] & 0x80000000) != 0; + public Array16<uint> ColorPatternY8; + public int ColorPatternY8Y0(int i) => (int)((ColorPatternY8[i] >> 0) & 0xFF); + public int ColorPatternY8Y1(int i) => (int)((ColorPatternY8[i] >> 8) & 0xFF); + public int ColorPatternY8Y2(int i) => (int)((ColorPatternY8[i] >> 16) & 0xFF); + public int ColorPatternY8Y3(int i) => (int)((ColorPatternY8[i] >> 24) & 0xFF); + public uint SetRenderSolidPrimColor0; + public uint SetRenderSolidPrimColor1; + public uint SetRenderSolidPrimColor2; + public uint SetRenderSolidPrimColor3; + public uint SetMmeMemAddressA; + public int SetMmeMemAddressAUpper => (int)((SetMmeMemAddressA >> 0) & 0x1FFFFFF); + public uint SetMmeMemAddressB; + public uint SetMmeDataRamAddress; + public uint MmeDmaRead; + public uint MmeDmaReadFifoed; + public uint MmeDmaWrite; + public uint MmeDmaReduction; + public MmeDmaReductionReductionOp MmeDmaReductionReductionOp => (MmeDmaReductionReductionOp)((MmeDmaReduction >> 0) & 0x7); + public MmeDmaReductionReductionFormat MmeDmaReductionReductionFormat => (MmeDmaReductionReductionFormat)((MmeDmaReduction >> 4) & 0x3); + public MmeDmaReductionReductionSize MmeDmaReductionReductionSize => (MmeDmaReductionReductionSize)((MmeDmaReduction >> 8) & 0x1); + public uint MmeDmaSysmembar; + public bool MmeDmaSysmembarV => (MmeDmaSysmembar & 0x1) != 0; + public uint MmeDmaSync; + public uint SetMmeDataFifoConfig; + public SetMmeDataFifoConfigFifoSize SetMmeDataFifoConfigFifoSize => (SetMmeDataFifoConfigFifoSize)((SetMmeDataFifoConfig >> 0) & 0x7); + public fixed uint Reserved578[2]; + public uint RenderSolidPrimMode; + public RenderSolidPrimModeV RenderSolidPrimModeV => (RenderSolidPrimModeV)((RenderSolidPrimMode >> 0) & 0x7); + public uint SetRenderSolidPrimColorFormat; + public SetRenderSolidPrimColorFormatV SetRenderSolidPrimColorFormatV => (SetRenderSolidPrimColorFormatV)((SetRenderSolidPrimColorFormat >> 0) & 0xFF); + public uint SetRenderSolidPrimColor; + public uint SetRenderSolidLineTieBreakBits; + public bool SetRenderSolidLineTieBreakBitsXmajXincYinc => (SetRenderSolidLineTieBreakBits & 0x1) != 0; + public bool SetRenderSolidLineTieBreakBitsXmajXdecYinc => (SetRenderSolidLineTieBreakBits & 0x10) != 0; + public bool SetRenderSolidLineTieBreakBitsYmajXincYinc => (SetRenderSolidLineTieBreakBits & 0x100) != 0; + public bool SetRenderSolidLineTieBreakBitsYmajXdecYinc => (SetRenderSolidLineTieBreakBits & 0x1000) != 0; + public fixed uint Reserved590[20]; + public uint RenderSolidPrimPointXY; + public int RenderSolidPrimPointXYX => (int)((RenderSolidPrimPointXY >> 0) & 0xFFFF); + public int RenderSolidPrimPointXYY => (int)((RenderSolidPrimPointXY >> 16) & 0xFFFF); + public fixed uint Reserved5E4[7]; + public Array64<RenderSolidPrimPoint> RenderSolidPrimPoint; + public uint SetPixelsFromCpuDataType; + public SetPixelsFromCpuDataTypeV SetPixelsFromCpuDataTypeV => (SetPixelsFromCpuDataTypeV)((SetPixelsFromCpuDataType >> 0) & 0x1); + public uint SetPixelsFromCpuColorFormat; + public SetPixelsFromCpuColorFormatV SetPixelsFromCpuColorFormatV => (SetPixelsFromCpuColorFormatV)((SetPixelsFromCpuColorFormat >> 0) & 0xFF); + public uint SetPixelsFromCpuIndexFormat; + public SetPixelsFromCpuIndexFormatV SetPixelsFromCpuIndexFormatV => (SetPixelsFromCpuIndexFormatV)((SetPixelsFromCpuIndexFormat >> 0) & 0x3); + public uint SetPixelsFromCpuMonoFormat; + public SetPixelsFromCpuMonoFormatV SetPixelsFromCpuMonoFormatV => (SetPixelsFromCpuMonoFormatV)((SetPixelsFromCpuMonoFormat >> 0) & 0x1); + public uint SetPixelsFromCpuWrap; + public SetPixelsFromCpuWrapV SetPixelsFromCpuWrapV => (SetPixelsFromCpuWrapV)((SetPixelsFromCpuWrap >> 0) & 0x3); + public uint SetPixelsFromCpuColor0; + public uint SetPixelsFromCpuColor1; + public uint SetPixelsFromCpuMonoOpacity; + public SetPixelsFromCpuMonoOpacityV SetPixelsFromCpuMonoOpacityV => (SetPixelsFromCpuMonoOpacityV)((SetPixelsFromCpuMonoOpacity >> 0) & 0x1); + public fixed uint Reserved820[6]; + public uint SetPixelsFromCpuSrcWidth; + public uint SetPixelsFromCpuSrcHeight; + public uint SetPixelsFromCpuDxDuFrac; + public uint SetPixelsFromCpuDxDuInt; + public uint SetPixelsFromCpuDyDvFrac; + public uint SetPixelsFromCpuDyDvInt; + public uint SetPixelsFromCpuDstX0Frac; + public uint SetPixelsFromCpuDstX0Int; + public uint SetPixelsFromCpuDstY0Frac; + public uint SetPixelsFromCpuDstY0Int; + public uint PixelsFromCpuData; + public fixed uint Reserved864[3]; + public uint SetBigEndianControl; + public bool SetBigEndianControlX32Swap1 => (SetBigEndianControl & 0x1) != 0; + public bool SetBigEndianControlX32Swap4 => (SetBigEndianControl & 0x2) != 0; + public bool SetBigEndianControlX32Swap8 => (SetBigEndianControl & 0x4) != 0; + public bool SetBigEndianControlX32Swap16 => (SetBigEndianControl & 0x8) != 0; + public bool SetBigEndianControlX16Swap1 => (SetBigEndianControl & 0x10) != 0; + public bool SetBigEndianControlX16Swap4 => (SetBigEndianControl & 0x20) != 0; + public bool SetBigEndianControlX16Swap8 => (SetBigEndianControl & 0x40) != 0; + public bool SetBigEndianControlX16Swap16 => (SetBigEndianControl & 0x80) != 0; + public bool SetBigEndianControlX8Swap1 => (SetBigEndianControl & 0x100) != 0; + public bool SetBigEndianControlX8Swap4 => (SetBigEndianControl & 0x200) != 0; + public bool SetBigEndianControlX8Swap8 => (SetBigEndianControl & 0x400) != 0; + public bool SetBigEndianControlX8Swap16 => (SetBigEndianControl & 0x800) != 0; + public bool SetBigEndianControlI1X8Cga6Swap1 => (SetBigEndianControl & 0x1000) != 0; + public bool SetBigEndianControlI1X8Cga6Swap4 => (SetBigEndianControl & 0x2000) != 0; + public bool SetBigEndianControlI1X8Cga6Swap8 => (SetBigEndianControl & 0x4000) != 0; + public bool SetBigEndianControlI1X8Cga6Swap16 => (SetBigEndianControl & 0x8000) != 0; + public bool SetBigEndianControlI1X8LeSwap1 => (SetBigEndianControl & 0x10000) != 0; + public bool SetBigEndianControlI1X8LeSwap4 => (SetBigEndianControl & 0x20000) != 0; + public bool SetBigEndianControlI1X8LeSwap8 => (SetBigEndianControl & 0x40000) != 0; + public bool SetBigEndianControlI1X8LeSwap16 => (SetBigEndianControl & 0x80000) != 0; + public bool SetBigEndianControlI4Swap1 => (SetBigEndianControl & 0x100000) != 0; + public bool SetBigEndianControlI4Swap4 => (SetBigEndianControl & 0x200000) != 0; + public bool SetBigEndianControlI4Swap8 => (SetBigEndianControl & 0x400000) != 0; + public bool SetBigEndianControlI4Swap16 => (SetBigEndianControl & 0x800000) != 0; + public bool SetBigEndianControlI8Swap1 => (SetBigEndianControl & 0x1000000) != 0; + public bool SetBigEndianControlI8Swap4 => (SetBigEndianControl & 0x2000000) != 0; + public bool SetBigEndianControlI8Swap8 => (SetBigEndianControl & 0x4000000) != 0; + public bool SetBigEndianControlI8Swap16 => (SetBigEndianControl & 0x8000000) != 0; + public bool SetBigEndianControlOverride => (SetBigEndianControl & 0x10000000) != 0; + public fixed uint Reserved874[3]; + public uint SetPixelsFromMemoryBlockShape; + public SetPixelsFromMemoryBlockShapeV SetPixelsFromMemoryBlockShapeV => (SetPixelsFromMemoryBlockShapeV)((SetPixelsFromMemoryBlockShape >> 0) & 0x7); + public uint SetPixelsFromMemoryCorralSize; + public int SetPixelsFromMemoryCorralSizeV => (int)((SetPixelsFromMemoryCorralSize >> 0) & 0x3FF); + public uint SetPixelsFromMemorySafeOverlap; + public bool SetPixelsFromMemorySafeOverlapV => (SetPixelsFromMemorySafeOverlap & 0x1) != 0; + public uint SetPixelsFromMemorySampleMode; + public SetPixelsFromMemorySampleModeOrigin SetPixelsFromMemorySampleModeOrigin => (SetPixelsFromMemorySampleModeOrigin)((SetPixelsFromMemorySampleMode >> 0) & 0x1); + public SetPixelsFromMemorySampleModeFilter SetPixelsFromMemorySampleModeFilter => (SetPixelsFromMemorySampleModeFilter)((SetPixelsFromMemorySampleMode >> 4) & 0x1); + public fixed uint Reserved890[8]; + public uint SetPixelsFromMemoryDstX0; + public uint SetPixelsFromMemoryDstY0; + public uint SetPixelsFromMemoryDstWidth; + public uint SetPixelsFromMemoryDstHeight; + public uint SetPixelsFromMemoryDuDxFrac; + public uint SetPixelsFromMemoryDuDxInt; + public uint SetPixelsFromMemoryDvDyFrac; + public uint SetPixelsFromMemoryDvDyInt; + public uint SetPixelsFromMemorySrcX0Frac; + public uint SetPixelsFromMemorySrcX0Int; + public uint SetPixelsFromMemorySrcY0Frac; + public uint PixelsFromMemorySrcY0Int; + public uint SetFalcon00; + public uint SetFalcon01; + public uint SetFalcon02; + public uint SetFalcon03; + public uint SetFalcon04; + public uint SetFalcon05; + public uint SetFalcon06; + public uint SetFalcon07; + public uint SetFalcon08; + public uint SetFalcon09; + public uint SetFalcon10; + public uint SetFalcon11; + public uint SetFalcon12; + public uint SetFalcon13; + public uint SetFalcon14; + public uint SetFalcon15; + public uint SetFalcon16; + public uint SetFalcon17; + public uint SetFalcon18; + public uint SetFalcon19; + public uint SetFalcon20; + public uint SetFalcon21; + public uint SetFalcon22; + public uint SetFalcon23; + public uint SetFalcon24; + public uint SetFalcon25; + public uint SetFalcon26; + public uint SetFalcon27; + public uint SetFalcon28; + public uint SetFalcon29; + public uint SetFalcon30; + public uint SetFalcon31; + public fixed uint Reserved960[291]; + public uint MmeDmaWriteMethodBarrier; + public bool MmeDmaWriteMethodBarrierV => (MmeDmaWriteMethodBarrier & 0x1) != 0; + public fixed uint ReservedDF0[2436]; + public MmeShadowScratch SetMmeShadowScratch; +#pragma warning restore CS0649 + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodTexture.cs b/src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodTexture.cs new file mode 100644 index 00000000..c28da094 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Twod/TwodTexture.cs @@ -0,0 +1,22 @@ +using Ryujinx.Graphics.Gpu.Engine.Types; + +namespace Ryujinx.Graphics.Gpu.Engine.Twod +{ + /// <summary> + /// Texture to texture (with optional resizing) copy parameters. + /// </summary> + struct TwodTexture + { +#pragma warning disable CS0649 + public ColorFormat Format; + public Boolean32 LinearLayout; + public MemoryLayout MemoryLayout; + public int Depth; + public int Layer; + public int Stride; + public int Width; + public int Height; + public GpuVa Address; +#pragma warning restore CS0649 + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Types/Boolean32.cs b/src/Ryujinx.Graphics.Gpu/Engine/Types/Boolean32.cs new file mode 100644 index 00000000..c982347a --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Types/Boolean32.cs @@ -0,0 +1,17 @@ +namespace Ryujinx.Graphics.Gpu.Engine.Types +{ + /// <summary> + /// Boolean value, stored as a 32-bits integer in memory. + /// </summary> + struct Boolean32 + { +#pragma warning disable CS0649 + private uint _value; +#pragma warning restore CS0649 + + public static implicit operator bool(Boolean32 value) + { + return (value._value & 1) != 0; + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Types/ColorFormat.cs b/src/Ryujinx.Graphics.Gpu/Engine/Types/ColorFormat.cs new file mode 100644 index 00000000..889b5c8b --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Types/ColorFormat.cs @@ -0,0 +1,165 @@ +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Image; + +namespace Ryujinx.Graphics.Gpu.Engine.Types +{ + /// <summary> + /// Color texture format. + /// </summary> + enum ColorFormat + { + R32G32B32A32Float = 0xc0, + R32G32B32A32Sint = 0xc1, + R32G32B32A32Uint = 0xc2, + R32G32B32X32Float = 0xc3, + R32G32B32X32Sint = 0xc4, + R32G32B32X32Uint = 0xc5, + R16G16B16X16Unorm = 0xc6, + R16G16B16X16Snorm = 0xc7, + R16G16B16X16Sint = 0xc8, + R16G16B16X16Uint = 0xc9, + R16G16B16A16Float = 0xca, + R32G32Float = 0xcb, + R32G32Sint = 0xcc, + R32G32Uint = 0xcd, + R16G16B16X16Float = 0xce, + B8G8R8A8Unorm = 0xcf, + B8G8R8A8Srgb = 0xd0, + R10G10B10A2Unorm = 0xd1, + R10G10B10A2Uint = 0xd2, + R8G8B8A8Unorm = 0xd5, + R8G8B8A8Srgb = 0xd6, + R8G8B8X8Snorm = 0xd7, + R8G8B8X8Sint = 0xd8, + R8G8B8X8Uint = 0xd9, + R16G16Unorm = 0xda, + R16G16Snorm = 0xdb, + R16G16Sint = 0xdc, + R16G16Uint = 0xdd, + R16G16Float = 0xde, + R11G11B10Float = 0xe0, + R32Sint = 0xe3, + R32Uint = 0xe4, + R32Float = 0xe5, + B8G8R8X8Unorm = 0xe6, + B8G8R8X8Srgb = 0xe7, + B5G6R5Unorm = 0xe8, + B5G5R5A1Unorm = 0xe9, + R8G8Unorm = 0xea, + R8G8Snorm = 0xeb, + R8G8Sint = 0xec, + R8G8Uint = 0xed, + R16Unorm = 0xee, + R16Snorm = 0xef, + R16Sint = 0xf0, + R16Uint = 0xf1, + R16Float = 0xf2, + R8Unorm = 0xf3, + R8Snorm = 0xf4, + R8Sint = 0xf5, + R8Uint = 0xf6, + B5G5R5X1Unorm = 0xf8, + R8G8B8X8Unorm = 0xf9, + R8G8B8X8Srgb = 0xfa + } + + static class ColorFormatConverter + { + /// <summary> + /// Converts the color texture format to a host compatible format. + /// </summary> + /// <param name="format">Color format</param> + /// <returns>Host compatible format enum value</returns> + public static FormatInfo Convert(this ColorFormat format) + { + return format switch + { + ColorFormat.R32G32B32A32Float => new FormatInfo(Format.R32G32B32A32Float, 1, 1, 16, 4), + ColorFormat.R32G32B32A32Sint => new FormatInfo(Format.R32G32B32A32Sint, 1, 1, 16, 4), + ColorFormat.R32G32B32A32Uint => new FormatInfo(Format.R32G32B32A32Uint, 1, 1, 16, 4), + ColorFormat.R32G32B32X32Float => new FormatInfo(Format.R32G32B32A32Float, 1, 1, 16, 4), + ColorFormat.R32G32B32X32Sint => new FormatInfo(Format.R32G32B32A32Sint, 1, 1, 16, 4), + ColorFormat.R32G32B32X32Uint => new FormatInfo(Format.R32G32B32A32Uint, 1, 1, 16, 4), + ColorFormat.R16G16B16X16Unorm => new FormatInfo(Format.R16G16B16A16Unorm, 1, 1, 8, 4), + ColorFormat.R16G16B16X16Snorm => new FormatInfo(Format.R16G16B16A16Snorm, 1, 1, 8, 4), + ColorFormat.R16G16B16X16Sint => new FormatInfo(Format.R16G16B16A16Sint, 1, 1, 8, 4), + ColorFormat.R16G16B16X16Uint => new FormatInfo(Format.R16G16B16A16Uint, 1, 1, 8, 4), + ColorFormat.R16G16B16A16Float => new FormatInfo(Format.R16G16B16A16Float, 1, 1, 8, 4), + ColorFormat.R32G32Float => new FormatInfo(Format.R32G32Float, 1, 1, 8, 2), + ColorFormat.R32G32Sint => new FormatInfo(Format.R32G32Sint, 1, 1, 8, 2), + ColorFormat.R32G32Uint => new FormatInfo(Format.R32G32Uint, 1, 1, 8, 2), + ColorFormat.R16G16B16X16Float => new FormatInfo(Format.R16G16B16A16Float, 1, 1, 8, 4), + ColorFormat.B8G8R8A8Unorm => new FormatInfo(Format.B8G8R8A8Unorm, 1, 1, 4, 4), + ColorFormat.B8G8R8A8Srgb => new FormatInfo(Format.B8G8R8A8Srgb, 1, 1, 4, 4), + ColorFormat.R10G10B10A2Unorm => new FormatInfo(Format.R10G10B10A2Unorm, 1, 1, 4, 4), + ColorFormat.R10G10B10A2Uint => new FormatInfo(Format.R10G10B10A2Uint, 1, 1, 4, 4), + ColorFormat.R8G8B8A8Unorm => new FormatInfo(Format.R8G8B8A8Unorm, 1, 1, 4, 4), + ColorFormat.R8G8B8A8Srgb => new FormatInfo(Format.R8G8B8A8Srgb, 1, 1, 4, 4), + ColorFormat.R8G8B8X8Snorm => new FormatInfo(Format.R8G8B8A8Snorm, 1, 1, 4, 4), + ColorFormat.R8G8B8X8Sint => new FormatInfo(Format.R8G8B8A8Sint, 1, 1, 4, 4), + ColorFormat.R8G8B8X8Uint => new FormatInfo(Format.R8G8B8A8Uint, 1, 1, 4, 4), + ColorFormat.R16G16Unorm => new FormatInfo(Format.R16G16Unorm, 1, 1, 4, 2), + ColorFormat.R16G16Snorm => new FormatInfo(Format.R16G16Snorm, 1, 1, 4, 2), + ColorFormat.R16G16Sint => new FormatInfo(Format.R16G16Sint, 1, 1, 4, 2), + ColorFormat.R16G16Uint => new FormatInfo(Format.R16G16Uint, 1, 1, 4, 2), + ColorFormat.R16G16Float => new FormatInfo(Format.R16G16Float, 1, 1, 4, 2), + ColorFormat.R11G11B10Float => new FormatInfo(Format.R11G11B10Float, 1, 1, 4, 3), + ColorFormat.R32Sint => new FormatInfo(Format.R32Sint, 1, 1, 4, 1), + ColorFormat.R32Uint => new FormatInfo(Format.R32Uint, 1, 1, 4, 1), + ColorFormat.R32Float => new FormatInfo(Format.R32Float, 1, 1, 4, 1), + ColorFormat.B8G8R8X8Unorm => new FormatInfo(Format.B8G8R8A8Unorm, 1, 1, 4, 4), + ColorFormat.B8G8R8X8Srgb => new FormatInfo(Format.B8G8R8A8Srgb, 1, 1, 4, 4), + ColorFormat.B5G6R5Unorm => new FormatInfo(Format.B5G6R5Unorm, 1, 1, 2, 3), + ColorFormat.B5G5R5A1Unorm => new FormatInfo(Format.B5G5R5A1Unorm, 1, 1, 2, 4), + ColorFormat.R8G8Unorm => new FormatInfo(Format.R8G8Unorm, 1, 1, 2, 2), + ColorFormat.R8G8Snorm => new FormatInfo(Format.R8G8Snorm, 1, 1, 2, 2), + ColorFormat.R8G8Sint => new FormatInfo(Format.R8G8Sint, 1, 1, 2, 2), + ColorFormat.R8G8Uint => new FormatInfo(Format.R8G8Uint, 1, 1, 2, 2), + ColorFormat.R16Unorm => new FormatInfo(Format.R16Unorm, 1, 1, 2, 1), + ColorFormat.R16Snorm => new FormatInfo(Format.R16Snorm, 1, 1, 2, 1), + ColorFormat.R16Sint => new FormatInfo(Format.R16Sint, 1, 1, 2, 1), + ColorFormat.R16Uint => new FormatInfo(Format.R16Uint, 1, 1, 2, 1), + ColorFormat.R16Float => new FormatInfo(Format.R16Float, 1, 1, 2, 1), + ColorFormat.R8Unorm => new FormatInfo(Format.R8Unorm, 1, 1, 1, 1), + ColorFormat.R8Snorm => new FormatInfo(Format.R8Snorm, 1, 1, 1, 1), + ColorFormat.R8Sint => new FormatInfo(Format.R8Sint, 1, 1, 1, 1), + ColorFormat.R8Uint => new FormatInfo(Format.R8Uint, 1, 1, 1, 1), + ColorFormat.B5G5R5X1Unorm => new FormatInfo(Format.B5G5R5A1Unorm, 1, 1, 2, 4), + ColorFormat.R8G8B8X8Unorm => new FormatInfo(Format.R8G8B8A8Unorm, 1, 1, 4, 4), + ColorFormat.R8G8B8X8Srgb => new FormatInfo(Format.R8G8B8A8Srgb, 1, 1, 4, 4), + _ => FormatInfo.Default + }; + } + + /// <summary> + /// Checks if a format has an alpha component. + /// </summary> + /// <param name="format">Format to be checked</param> + /// <returns>True if the format has no alpha component (RGBX), false if it does (RGBA)</returns> + public static bool NoAlpha(this ColorFormat format) + { + switch (format) + { + case ColorFormat.R32G32B32X32Float: + case ColorFormat.R32G32B32X32Sint: + case ColorFormat.R32G32B32X32Uint: + case ColorFormat.R16G16B16X16Unorm: + case ColorFormat.R16G16B16X16Snorm: + case ColorFormat.R16G16B16X16Sint: + case ColorFormat.R16G16B16X16Uint: + case ColorFormat.R16G16B16X16Float: + case ColorFormat.R8G8B8X8Snorm: + case ColorFormat.R8G8B8X8Sint: + case ColorFormat.R8G8B8X8Uint: + case ColorFormat.B8G8R8X8Unorm: + case ColorFormat.B8G8R8X8Srgb: + case ColorFormat.B5G5R5X1Unorm: + case ColorFormat.R8G8B8X8Unorm: + case ColorFormat.R8G8B8X8Srgb: + return true; + } + + return false; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Types/GpuVa.cs b/src/Ryujinx.Graphics.Gpu/Engine/Types/GpuVa.cs new file mode 100644 index 00000000..839faac9 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Types/GpuVa.cs @@ -0,0 +1,22 @@ +namespace Ryujinx.Graphics.Gpu.Engine.Types +{ + /// <summary> + /// Split GPU virtual address. + /// </summary> + struct GpuVa + { +#pragma warning disable CS0649 + public uint High; + public uint Low; +#pragma warning restore CS0649 + + /// <summary> + /// Packs the split address into a 64-bits address value. + /// </summary> + /// <returns>The 64-bits address value</returns> + public ulong Pack() + { + return Low | ((ulong)High << 32); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Types/MemoryLayout.cs b/src/Ryujinx.Graphics.Gpu/Engine/Types/MemoryLayout.cs new file mode 100644 index 00000000..6da96bd4 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Types/MemoryLayout.cs @@ -0,0 +1,37 @@ +namespace Ryujinx.Graphics.Gpu.Engine.Types +{ + /// <summary> + /// Memory layout parameters, for block linear textures. + /// </summary> + struct MemoryLayout + { +#pragma warning disable CS0649 + public uint Packed; +#pragma warning restore CS0649 + + public int UnpackGobBlocksInX() + { + return 1 << (int)(Packed & 0xf); + } + + public int UnpackGobBlocksInY() + { + return 1 << (int)((Packed >> 4) & 0xf); + } + + public int UnpackGobBlocksInZ() + { + return 1 << (int)((Packed >> 8) & 0xf); + } + + public bool UnpackIsLinear() + { + return (Packed & 0x1000) != 0; + } + + public bool UnpackIsTarget3D() + { + return (Packed & 0x10000) != 0; + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Types/PrimitiveType.cs b/src/Ryujinx.Graphics.Gpu/Engine/Types/PrimitiveType.cs new file mode 100644 index 00000000..dae63124 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Types/PrimitiveType.cs @@ -0,0 +1,99 @@ +using Ryujinx.Graphics.GAL; + +namespace Ryujinx.Graphics.Gpu.Engine.Types +{ + /// <summary> + /// Draw primitive type. + /// </summary> + enum PrimitiveType + { + Points, + Lines, + LineLoop, + LineStrip, + Triangles, + TriangleStrip, + TriangleFan, + Quads, + QuadStrip, + Polygon, + LinesAdjacency, + LineStripAdjacency, + TrianglesAdjacency, + TriangleStripAdjacency, + Patches + } + + /// <summary> + /// Alternative primitive type that might override <see cref="PrimitiveType"/>. + /// </summary> + enum PrimitiveTypeOverride + { + Points = 1, + Lines = 2, + LineStrip = 3, + Triangles = 4, + TriangleStrip = 5, + TriangleFan = 0x1015, + LinesAdjacency = 10, + LineStripAdjacency = 11, + TrianglesAdjacency = 12, + TriangleStripAdjacency = 13, + Patches = 14 + } + + static class PrimitiveTypeConverter + { + /// <summary> + /// Converts the primitive type into something that can be used with the host API. + /// </summary> + /// <param name="type">The primitive type to convert</param> + /// <returns>A host compatible enum value</returns> + public static PrimitiveTopology Convert(this PrimitiveType type) + { + return type switch + { + PrimitiveType.Points => PrimitiveTopology.Points, + PrimitiveType.Lines => PrimitiveTopology.Lines, + PrimitiveType.LineLoop => PrimitiveTopology.LineLoop, + PrimitiveType.LineStrip => PrimitiveTopology.LineStrip, + PrimitiveType.Triangles => PrimitiveTopology.Triangles, + PrimitiveType.TriangleStrip => PrimitiveTopology.TriangleStrip, + PrimitiveType.TriangleFan => PrimitiveTopology.TriangleFan, + PrimitiveType.Quads => PrimitiveTopology.Quads, + PrimitiveType.QuadStrip => PrimitiveTopology.QuadStrip, + PrimitiveType.Polygon => PrimitiveTopology.Polygon, + PrimitiveType.LinesAdjacency => PrimitiveTopology.LinesAdjacency, + PrimitiveType.LineStripAdjacency => PrimitiveTopology.LineStripAdjacency, + PrimitiveType.TrianglesAdjacency => PrimitiveTopology.TrianglesAdjacency, + PrimitiveType.TriangleStripAdjacency => PrimitiveTopology.TriangleStripAdjacency, + PrimitiveType.Patches => PrimitiveTopology.Patches, + _ => PrimitiveTopology.Triangles + }; + } + + /// <summary> + /// Converts the primitive type into something that can be used with the host API. + /// </summary> + /// <param name="type">The primitive type to convert</param> + /// <returns>A host compatible enum value</returns> + public static PrimitiveTopology Convert(this PrimitiveTypeOverride type) + { + return type switch + { + PrimitiveTypeOverride.Points => PrimitiveTopology.Points, + PrimitiveTypeOverride.Lines => PrimitiveTopology.Lines, + PrimitiveTypeOverride.LineStrip => PrimitiveTopology.LineStrip, + PrimitiveTypeOverride.Triangles => PrimitiveTopology.Triangles, + PrimitiveTypeOverride.TriangleStrip => PrimitiveTopology.TriangleStrip, + PrimitiveTypeOverride.TriangleFan => PrimitiveTopology.TriangleFan, + PrimitiveTypeOverride.LinesAdjacency => PrimitiveTopology.LinesAdjacency, + PrimitiveTypeOverride.LineStripAdjacency => PrimitiveTopology.LineStripAdjacency, + PrimitiveTypeOverride.TrianglesAdjacency => PrimitiveTopology.TrianglesAdjacency, + PrimitiveTypeOverride.TriangleStripAdjacency => PrimitiveTopology.TriangleStripAdjacency, + PrimitiveTypeOverride.Patches => PrimitiveTopology.Patches, + _ => PrimitiveTopology.Triangles + }; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Types/SamplerIndex.cs b/src/Ryujinx.Graphics.Gpu/Engine/Types/SamplerIndex.cs new file mode 100644 index 00000000..839a4d0a --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Types/SamplerIndex.cs @@ -0,0 +1,11 @@ +namespace Ryujinx.Graphics.Gpu.Engine.Types +{ + /// <summary> + /// Sampler pool indexing mode. + /// </summary> + enum SamplerIndex + { + Independently = 0, + ViaHeaderIndex = 1 + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Types/SbDescriptor.cs b/src/Ryujinx.Graphics.Gpu/Engine/Types/SbDescriptor.cs new file mode 100644 index 00000000..c457dbf9 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Types/SbDescriptor.cs @@ -0,0 +1,20 @@ +namespace Ryujinx.Graphics.Gpu.Engine.Types +{ + /// <summary> + /// Storage buffer address and size information. + /// </summary> + struct SbDescriptor + { +#pragma warning disable CS0649 + public uint AddressLow; + public uint AddressHigh; + public int Size; + public int Padding; +#pragma warning restore CS0649 + + public ulong PackAddress() + { + return AddressLow | ((ulong)AddressHigh << 32); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Types/ZetaFormat.cs b/src/Ryujinx.Graphics.Gpu/Engine/Types/ZetaFormat.cs new file mode 100644 index 00000000..1de1621f --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Types/ZetaFormat.cs @@ -0,0 +1,42 @@ +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Image; + +namespace Ryujinx.Graphics.Gpu.Engine.Types +{ + /// <summary> + /// Depth-stencil texture format. + /// </summary> + enum ZetaFormat + { + D32Float = 0xa, + D16Unorm = 0x13, + D24UnormS8Uint = 0x14, + D24Unorm = 0x15, + S8UintD24Unorm = 0x16, + S8Uint = 0x17, + D32FloatS8Uint = 0x19 + } + + static class ZetaFormatConverter + { + /// <summary> + /// Converts the depth-stencil texture format to a host compatible format. + /// </summary> + /// <param name="format">Depth-stencil format</param> + /// <returns>Host compatible format enum value</returns> + public static FormatInfo Convert(this ZetaFormat format) + { + return format switch + { + ZetaFormat.D32Float => new FormatInfo(Format.D32Float, 1, 1, 4, 1), + ZetaFormat.D16Unorm => new FormatInfo(Format.D16Unorm, 1, 1, 2, 1), + ZetaFormat.D24UnormS8Uint => new FormatInfo(Format.D24UnormS8Uint, 1, 1, 4, 2), + ZetaFormat.D24Unorm => new FormatInfo(Format.D24UnormS8Uint, 1, 1, 4, 1), + ZetaFormat.S8UintD24Unorm => new FormatInfo(Format.S8UintD24Unorm, 1, 1, 4, 2), + ZetaFormat.S8Uint => new FormatInfo(Format.S8Uint, 1, 1, 1, 1), + ZetaFormat.D32FloatS8Uint => new FormatInfo(Format.D32FloatS8Uint, 1, 1, 8, 2), + _ => FormatInfo.Default + }; + } + } +} |
