aboutsummaryrefslogtreecommitdiff
path: root/Ryujinx.Audio/Renderer/Dsp/Command
diff options
context:
space:
mode:
authorMary <me@thog.eu>2021-03-02 23:50:46 +0100
committerGitHub <noreply@github.com>2021-03-02 23:50:46 +0100
commit31fca432a7274907c46f6ec254d54e96cb6446c6 (patch)
tree7008b6c1e161ce36dcd13ba3eef1990b42ca8f8e /Ryujinx.Audio/Renderer/Dsp/Command
parent3d04d7ef0870fe74c529dd6c35e1c02d869bd3c0 (diff)
Amadeus: Add ARM SIMD fast path (#2069)
Add fast paths in the audio renderer for AArch64 in all current fast paths.
Diffstat (limited to 'Ryujinx.Audio/Renderer/Dsp/Command')
-rw-r--r--Ryujinx.Audio/Renderer/Dsp/Command/MixCommand.cs26
-rw-r--r--Ryujinx.Audio/Renderer/Dsp/Command/VolumeCommand.cs25
2 files changed, 51 insertions, 0 deletions
diff --git a/Ryujinx.Audio/Renderer/Dsp/Command/MixCommand.cs b/Ryujinx.Audio/Renderer/Dsp/Command/MixCommand.cs
index 566fea92..06968871 100644
--- a/Ryujinx.Audio/Renderer/Dsp/Command/MixCommand.cs
+++ b/Ryujinx.Audio/Renderer/Dsp/Command/MixCommand.cs
@@ -19,6 +19,7 @@ using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;
namespace Ryujinx.Audio.Renderer.Dsp.Command
@@ -90,6 +91,27 @@ namespace Ryujinx.Audio.Renderer.Dsp.Command
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private void ProcessMixAdvSimd(Span<float> outputMix, ReadOnlySpan<float> inputMix)
+ {
+ Vector128<float> volumeVec = Vector128.Create(Volume);
+
+ ReadOnlySpan<Vector128<float>> inputVec = MemoryMarshal.Cast<float, Vector128<float>>(inputMix);
+ Span<Vector128<float>> outputVec = MemoryMarshal.Cast<float, Vector128<float>>(outputMix);
+
+ int sisdStart = inputVec.Length * 4;
+
+ for (int i = 0; i < inputVec.Length; i++)
+ {
+ outputVec[i] = AdvSimd.Add(outputVec[i], AdvSimd.Ceiling(AdvSimd.Multiply(inputVec[i], volumeVec)));
+ }
+
+ for (int i = sisdStart; i < inputMix.Length; i++)
+ {
+ outputMix[i] += FloatingPointHelper.MultiplyRoundUp(inputMix[i], Volume);
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
private void ProcessMixSlowPath(Span<float> outputMix, ReadOnlySpan<float> inputMix)
{
for (int i = 0; i < inputMix.Length; i++)
@@ -108,6 +130,10 @@ namespace Ryujinx.Audio.Renderer.Dsp.Command
{
ProcessMixSse41(outputMix, inputMix);
}
+ else if (AdvSimd.IsSupported)
+ {
+ ProcessMixAdvSimd(outputMix, inputMix);
+ }
else
{
ProcessMixSlowPath(outputMix, inputMix);
diff --git a/Ryujinx.Audio/Renderer/Dsp/Command/VolumeCommand.cs b/Ryujinx.Audio/Renderer/Dsp/Command/VolumeCommand.cs
index b58ae1f8..217d51c9 100644
--- a/Ryujinx.Audio/Renderer/Dsp/Command/VolumeCommand.cs
+++ b/Ryujinx.Audio/Renderer/Dsp/Command/VolumeCommand.cs
@@ -19,6 +19,7 @@ using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;
namespace Ryujinx.Audio.Renderer.Dsp.Command
@@ -89,6 +90,26 @@ namespace Ryujinx.Audio.Renderer.Dsp.Command
}
}
+ private void ProcessVolumeAdvSimd(Span<float> outputBuffer, ReadOnlySpan<float> inputBuffer)
+ {
+ Vector128<float> volumeVec = Vector128.Create(Volume);
+
+ ReadOnlySpan<Vector128<float>> inputVec = MemoryMarshal.Cast<float, Vector128<float>>(inputBuffer);
+ Span<Vector128<float>> outputVec = MemoryMarshal.Cast<float, Vector128<float>>(outputBuffer);
+
+ int sisdStart = inputVec.Length * 4;
+
+ for (int i = 0; i < inputVec.Length; i++)
+ {
+ outputVec[i] = AdvSimd.Ceiling(AdvSimd.Multiply(inputVec[i], volumeVec));
+ }
+
+ for (int i = sisdStart; i < inputBuffer.Length; i++)
+ {
+ outputBuffer[i] = FloatingPointHelper.MultiplyRoundUp(inputBuffer[i], Volume);
+ }
+ }
+
private void ProcessVolume(Span<float> outputBuffer, ReadOnlySpan<float> inputBuffer)
{
if (Avx.IsSupported)
@@ -99,6 +120,10 @@ namespace Ryujinx.Audio.Renderer.Dsp.Command
{
ProcessVolumeSse41(outputBuffer, inputBuffer);
}
+ else if (AdvSimd.IsSupported)
+ {
+ ProcessVolumeAdvSimd(outputBuffer, inputBuffer);
+ }
else
{
ProcessVolumeSlowPath(outputBuffer, inputBuffer);