aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>2018-10-28 23:27:50 +0100
committergdkchan <gab.dark.100@gmail.com>2018-10-28 19:27:50 -0300
commitb956bbc32c7f9fdffebfd9a9416e8e0a2a588abd (patch)
tree7769acbc7d7c1f747642b2efb955e146ff8b923a
parent111d14f74aca5e6467473ec73ab0825b9c0b4db1 (diff)
Add SHA1C, SHA1H, SHA1M, SHA1P, SHA1SU0, SHA1SU1 and Isb instructions; add 6 Tests (closed box). (#483)
* Update AOpCodeTable.cs * Update AInstEmitSystem.cs * Update AInstEmitSimdHash.cs * Update ASoftFallback.cs * Update CpuTestSimdReg.cs * Update CpuTestSimd.cs
-rw-r--r--ChocolArm64/AOpCodeTable.cs7
-rw-r--r--ChocolArm64/Instruction/AInstEmitSimdHash.cs83
-rw-r--r--ChocolArm64/Instruction/AInstEmitSystem.cs7
-rw-r--r--ChocolArm64/Instruction/ASoftFallback.cs304
-rw-r--r--Ryujinx.Tests/Cpu/CpuTestSimd.cs44
-rw-r--r--Ryujinx.Tests/Cpu/CpuTestSimdReg.cs81
6 files changed, 397 insertions, 129 deletions
diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs
index 44493298..cbdff47f 100644
--- a/ChocolArm64/AOpCodeTable.cs
+++ b/ChocolArm64/AOpCodeTable.cs
@@ -88,6 +88,7 @@ namespace ChocolArm64
SetA64("00010011100xxxxx0xxxxxxxxxxxxxxx", AInstEmit.Extr, typeof(AOpCodeAluRs));
SetA64("10010011110xxxxxxxxxxxxxxxxxxxxx", AInstEmit.Extr, typeof(AOpCodeAluRs));
SetA64("11010101000000110010xxxxxxx11111", AInstEmit.Hint, typeof(AOpCodeSystem));
+ SetA64("11010101000000110011xxxx11011111", AInstEmit.Isb, typeof(AOpCodeSystem));
SetA64("xx001000110xxxxx1xxxxxxxxxxxxxxx", AInstEmit.Ldar, typeof(AOpCodeMemEx));
SetA64("1x001000011xxxxx1xxxxxxxxxxxxxxx", AInstEmit.Ldaxp, typeof(AOpCodeMemEx));
SetA64("xx001000010xxxxx1xxxxxxxxxxxxxxx", AInstEmit.Ldaxr, typeof(AOpCodeMemEx));
@@ -386,6 +387,12 @@ namespace ChocolArm64
SetA64("x0011110xx100010000000xxxxxxxxxx", AInstEmit.Scvtf_Gp, typeof(AOpCodeSimdCvt));
SetA64("010111100x100001110110xxxxxxxxxx", AInstEmit.Scvtf_S, typeof(AOpCodeSimd));
SetA64("0x0011100x100001110110xxxxxxxxxx", AInstEmit.Scvtf_V, typeof(AOpCodeSimd));
+ SetA64("01011110000xxxxx000000xxxxxxxxxx", AInstEmit.Sha1c_V, typeof(AOpCodeSimdReg));
+ SetA64("0101111000101000000010xxxxxxxxxx", AInstEmit.Sha1h_V, typeof(AOpCodeSimd));
+ SetA64("01011110000xxxxx001000xxxxxxxxxx", AInstEmit.Sha1m_V, typeof(AOpCodeSimdReg));
+ SetA64("01011110000xxxxx000100xxxxxxxxxx", AInstEmit.Sha1p_V, typeof(AOpCodeSimdReg));
+ SetA64("01011110000xxxxx001100xxxxxxxxxx", AInstEmit.Sha1su0_V, typeof(AOpCodeSimdReg));
+ SetA64("0101111000101000000110xxxxxxxxxx", AInstEmit.Sha1su1_V, typeof(AOpCodeSimd));
SetA64("01011110000xxxxx010000xxxxxxxxxx", AInstEmit.Sha256h_V, typeof(AOpCodeSimdReg));
SetA64("01011110000xxxxx010100xxxxxxxxxx", AInstEmit.Sha256h2_V, typeof(AOpCodeSimdReg));
SetA64("0101111000101000001010xxxxxxxxxx", AInstEmit.Sha256su0_V, typeof(AOpCodeSimd));
diff --git a/ChocolArm64/Instruction/AInstEmitSimdHash.cs b/ChocolArm64/Instruction/AInstEmitSimdHash.cs
index 6b642acb..5a59e779 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdHash.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdHash.cs
@@ -1,10 +1,89 @@
using ChocolArm64.Decoder;
using ChocolArm64.Translation;
+using static ChocolArm64.Instruction.AInstEmitSimdHelper;
+
namespace ChocolArm64.Instruction
{
static partial class AInstEmit
{
+#region "Sha1"
+ public static void Sha1c_V(AILEmitterCtx Context)
+ {
+ AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
+
+ Context.EmitLdvec(Op.Rd);
+ EmitVectorExtractZx(Context, Op.Rn, 0, 2);
+ Context.EmitLdvec(Op.Rm);
+
+ ASoftFallback.EmitCall(Context, nameof(ASoftFallback.HashChoose));
+
+ Context.EmitStvec(Op.Rd);
+ }
+
+ public static void Sha1h_V(AILEmitterCtx Context)
+ {
+ AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
+
+ EmitVectorExtractZx(Context, Op.Rn, 0, 2);
+
+ ASoftFallback.EmitCall(Context, nameof(ASoftFallback.FixedRotate));
+
+ EmitScalarSet(Context, Op.Rd, 2);
+ }
+
+ public static void Sha1m_V(AILEmitterCtx Context)
+ {
+ AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
+
+ Context.EmitLdvec(Op.Rd);
+ EmitVectorExtractZx(Context, Op.Rn, 0, 2);
+ Context.EmitLdvec(Op.Rm);
+
+ ASoftFallback.EmitCall(Context, nameof(ASoftFallback.HashMajority));
+
+ Context.EmitStvec(Op.Rd);
+ }
+
+ public static void Sha1p_V(AILEmitterCtx Context)
+ {
+ AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
+
+ Context.EmitLdvec(Op.Rd);
+ EmitVectorExtractZx(Context, Op.Rn, 0, 2);
+ Context.EmitLdvec(Op.Rm);
+
+ ASoftFallback.EmitCall(Context, nameof(ASoftFallback.HashParity));
+
+ Context.EmitStvec(Op.Rd);
+ }
+
+ public static void Sha1su0_V(AILEmitterCtx Context)
+ {
+ AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
+
+ Context.EmitLdvec(Op.Rd);
+ Context.EmitLdvec(Op.Rn);
+ Context.EmitLdvec(Op.Rm);
+
+ ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Sha1SchedulePart1));
+
+ Context.EmitStvec(Op.Rd);
+ }
+
+ public static void Sha1su1_V(AILEmitterCtx Context)
+ {
+ AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
+
+ Context.EmitLdvec(Op.Rd);
+ Context.EmitLdvec(Op.Rn);
+
+ ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Sha1SchedulePart2));
+
+ Context.EmitStvec(Op.Rd);
+ }
+#endregion
+
#region "Sha256"
public static void Sha256h_V(AILEmitterCtx Context)
{
@@ -39,7 +118,7 @@ namespace ChocolArm64.Instruction
Context.EmitLdvec(Op.Rd);
Context.EmitLdvec(Op.Rn);
- ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SchedulePart1));
+ ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Sha256SchedulePart1));
Context.EmitStvec(Op.Rd);
}
@@ -52,7 +131,7 @@ namespace ChocolArm64.Instruction
Context.EmitLdvec(Op.Rn);
Context.EmitLdvec(Op.Rm);
- ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SchedulePart2));
+ ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Sha256SchedulePart2));
Context.EmitStvec(Op.Rd);
}
diff --git a/ChocolArm64/Instruction/AInstEmitSystem.cs b/ChocolArm64/Instruction/AInstEmitSystem.cs
index 1c5d0263..a365398f 100644
--- a/ChocolArm64/Instruction/AInstEmitSystem.cs
+++ b/ChocolArm64/Instruction/AInstEmitSystem.cs
@@ -14,6 +14,11 @@ namespace ChocolArm64.Instruction
//Execute as no-op.
}
+ public static void Isb(AILEmitterCtx Context)
+ {
+ //Execute as no-op.
+ }
+
public static void Mrs(AILEmitterCtx Context)
{
AOpCodeSystem Op = (AOpCodeSystem)Context.CurrOp;
@@ -130,4 +135,4 @@ namespace ChocolArm64.Instruction
return Id;
}
}
-} \ No newline at end of file
+}
diff --git a/ChocolArm64/Instruction/ASoftFallback.cs b/ChocolArm64/Instruction/ASoftFallback.cs
index b69e2c75..d643fb6f 100644
--- a/ChocolArm64/Instruction/ASoftFallback.cs
+++ b/ChocolArm64/Instruction/ASoftFallback.cs
@@ -103,20 +103,20 @@ namespace ChocolArm64.Instruction
#endregion
#region "Saturating"
- public static long SignedSrcSignedDstSatQ(long op, int Size, AThreadState State)
+ public static long SignedSrcSignedDstSatQ(long Op, int Size, AThreadState State)
{
int ESize = 8 << Size;
long TMaxValue = (1L << (ESize - 1)) - 1L;
long TMinValue = -(1L << (ESize - 1));
- if (op > TMaxValue)
+ if (Op > TMaxValue)
{
State.SetFpsrFlag(FPSR.QC);
return TMaxValue;
}
- else if (op < TMinValue)
+ else if (Op < TMinValue)
{
State.SetFpsrFlag(FPSR.QC);
@@ -124,24 +124,24 @@ namespace ChocolArm64.Instruction
}
else
{
- return op;
+ return Op;
}
}
- public static ulong SignedSrcUnsignedDstSatQ(long op, int Size, AThreadState State)
+ public static ulong SignedSrcUnsignedDstSatQ(long Op, int Size, AThreadState State)
{
int ESize = 8 << Size;
ulong TMaxValue = (1UL << ESize) - 1UL;
ulong TMinValue = 0UL;
- if (op > (long)TMaxValue)
+ if (Op > (long)TMaxValue)
{
State.SetFpsrFlag(FPSR.QC);
return TMaxValue;
}
- else if (op < (long)TMinValue)
+ else if (Op < (long)TMinValue)
{
State.SetFpsrFlag(FPSR.QC);
@@ -149,17 +149,17 @@ namespace ChocolArm64.Instruction
}
else
{
- return (ulong)op;
+ return (ulong)Op;
}
}
- public static long UnsignedSrcSignedDstSatQ(ulong op, int Size, AThreadState State)
+ public static long UnsignedSrcSignedDstSatQ(ulong Op, int Size, AThreadState State)
{
int ESize = 8 << Size;
long TMaxValue = (1L << (ESize - 1)) - 1L;
- if (op > (ulong)TMaxValue)
+ if (Op > (ulong)TMaxValue)
{
State.SetFpsrFlag(FPSR.QC);
@@ -167,17 +167,17 @@ namespace ChocolArm64.Instruction
}
else
{
- return (long)op;
+ return (long)Op;
}
}
- public static ulong UnsignedSrcUnsignedDstSatQ(ulong op, int Size, AThreadState State)
+ public static ulong UnsignedSrcUnsignedDstSatQ(ulong Op, int Size, AThreadState State)
{
int ESize = 8 << Size;
ulong TMaxValue = (1UL << ESize) - 1UL;
- if (op > TMaxValue)
+ if (Op > TMaxValue)
{
State.SetFpsrFlag(FPSR.QC);
@@ -185,13 +185,13 @@ namespace ChocolArm64.Instruction
}
else
{
- return op;
+ return Op;
}
}
- public static long UnarySignedSatQAbsOrNeg(long op, AThreadState State)
+ public static long UnarySignedSatQAbsOrNeg(long Op, AThreadState State)
{
- if (op == long.MinValue)
+ if (Op == long.MinValue)
{
State.SetFpsrFlag(FPSR.QC);
@@ -199,19 +199,19 @@ namespace ChocolArm64.Instruction
}
else
{
- return op;
+ return Op;
}
}
- public static long BinarySignedSatQAdd(long op1, long op2, AThreadState State)
+ public static long BinarySignedSatQAdd(long Op1, long Op2, AThreadState State)
{
- long Add = op1 + op2;
+ long Add = Op1 + Op2;
- if ((~(op1 ^ op2) & (op1 ^ Add)) < 0L)
+ if ((~(Op1 ^ Op2) & (Op1 ^ Add)) < 0L)
{
State.SetFpsrFlag(FPSR.QC);
- if (op1 < 0L)
+ if (Op1 < 0L)
{
return long.MinValue;
}
@@ -226,11 +226,11 @@ namespace ChocolArm64.Instruction
}
}
- public static ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2, AThreadState State)
+ public static ulong BinaryUnsignedSatQAdd(ulong Op1, ulong Op2, AThreadState State)
{
- ulong Add = op1 + op2;
+ ulong Add = Op1 + Op2;
- if ((Add < op1) && (Add < op2))
+ if ((Add < Op1) && (Add < Op2))
{
State.SetFpsrFlag(FPSR.QC);
@@ -242,15 +242,15 @@ namespace ChocolArm64.Instruction
}
}
- public static long BinarySignedSatQSub(long op1, long op2, AThreadState State)
+ public static long BinarySignedSatQSub(long Op1, long Op2, AThreadState State)
{
- long Sub = op1 - op2;
+ long Sub = Op1 - Op2;
- if (((op1 ^ op2) & (op1 ^ Sub)) < 0L)
+ if (((Op1 ^ Op2) & (Op1 ^ Sub)) < 0L)
{
State.SetFpsrFlag(FPSR.QC);
- if (op1 < 0L)
+ if (Op1 < 0L)
{
return long.MinValue;
}
@@ -265,11 +265,11 @@ namespace ChocolArm64.Instruction
}
}
- public static ulong BinaryUnsignedSatQSub(ulong op1, ulong op2, AThreadState State)
+ public static ulong BinaryUnsignedSatQSub(ulong Op1, ulong Op2, AThreadState State)
{
- ulong Sub = op1 - op2;
+ ulong Sub = Op1 - Op2;
- if (op1 < op2)
+ if (Op1 < Op2)
{
State.SetFpsrFlag(FPSR.QC);
@@ -281,16 +281,16 @@ namespace ChocolArm64.Instruction
}
}
- public static long BinarySignedSatQAcc(ulong op1, long op2, AThreadState State)
+ public static long BinarySignedSatQAcc(ulong Op1, long Op2, AThreadState State)
{
- if (op1 <= (ulong)long.MaxValue)
+ if (Op1 <= (ulong)long.MaxValue)
{
- // op1 from ulong.MinValue to (ulong)long.MaxValue
- // op2 from long.MinValue to long.MaxValue
+ // Op1 from ulong.MinValue to (ulong)long.MaxValue
+ // Op2 from long.MinValue to long.MaxValue
- long Add = (long)op1 + op2;
+ long Add = (long)Op1 + Op2;
- if ((~op2 & Add) < 0L)
+ if ((~Op2 & Add) < 0L)
{
State.SetFpsrFlag(FPSR.QC);
@@ -301,10 +301,10 @@ namespace ChocolArm64.Instruction
return Add;
}
}
- else if (op2 >= 0L)
+ else if (Op2 >= 0L)
{
- // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
- // op2 from (long)ulong.MinValue to long.MaxValue
+ // Op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
+ // Op2 from (long)ulong.MinValue to long.MaxValue
State.SetFpsrFlag(FPSR.QC);
@@ -312,10 +312,10 @@ namespace ChocolArm64.Instruction
}
else
{
- // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
- // op2 from long.MinValue to (long)ulong.MinValue - 1L
+ // Op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
+ // Op2 from long.MinValue to (long)ulong.MinValue - 1L
- ulong Add = op1 + (ulong)op2;
+ ulong Add = Op1 + (ulong)Op2;
if (Add > (ulong)long.MaxValue)
{
@@ -330,16 +330,16 @@ namespace ChocolArm64.Instruction
}
}
- public static ulong BinaryUnsignedSatQAcc(long op1, ulong op2, AThreadState State)
+ public static ulong BinaryUnsignedSatQAcc(long Op1, ulong Op2, AThreadState State)
{
- if (op1 >= 0L)
+ if (Op1 >= 0L)
{
- // op1 from (long)ulong.MinValue to long.MaxValue
- // op2 from ulong.MinValue to ulong.MaxValue
+ // Op1 from (long)ulong.MinValue to long.MaxValue
+ // Op2 from ulong.MinValue to ulong.MaxValue
- ulong Add = (ulong)op1 + op2;
+ ulong Add = (ulong)Op1 + Op2;
- if ((Add < (ulong)op1) && (Add < op2))
+ if ((Add < (ulong)Op1) && (Add < Op2))
{
State.SetFpsrFlag(FPSR.QC);
@@ -350,19 +350,19 @@ namespace ChocolArm64.Instruction
return Add;
}
}
- else if (op2 > (ulong)long.MaxValue)
+ else if (Op2 > (ulong)long.MaxValue)
{
- // op1 from long.MinValue to (long)ulong.MinValue - 1L
- // op2 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
+ // Op1 from long.MinValue to (long)ulong.MinValue - 1L
+ // Op2 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
- return (ulong)op1 + op2;
+ return (ulong)Op1 + Op2;
}
else
{
- // op1 from long.MinValue to (long)ulong.MinValue - 1L
- // op2 from ulong.MinValue to (ulong)long.MaxValue
+ // Op1 from long.MinValue to (long)ulong.MinValue - 1L
+ // Op2 from ulong.MinValue to (ulong)long.MaxValue
- long Add = op1 + (long)op2;
+ long Add = Op1 + (long)Op2;
if (Add < (long)ulong.MinValue)
{
@@ -530,6 +530,150 @@ namespace ChocolArm64.Instruction
}
#endregion
+#region "Sha1"
+ public static Vector128<float> HashChoose(Vector128<float> hash_abcd, uint hash_e, Vector128<float> wk)
+ {
+ for (int e = 0; e <= 3; e++)
+ {
+ uint t = SHAchoose((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2),
+ (uint)VectorExtractIntZx(hash_abcd, (byte)2, 2),
+ (uint)VectorExtractIntZx(hash_abcd, (byte)3, 2));
+
+ hash_e += Rol((uint)VectorExtractIntZx(hash_abcd, (byte)0, 2), 5) + t;
+ hash_e += (uint)VectorExtractIntZx(wk, (byte)e, 2);
+
+ t = Rol((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2), 30);
+ hash_abcd = VectorInsertInt((ulong)t, hash_abcd, (byte)1, 2);
+
+ Rol32_160(ref hash_e, ref hash_abcd);
+ }
+
+ return hash_abcd;
+ }
+
+ public static uint FixedRotate(uint hash_e)
+ {
+ return hash_e.Rol(30);
+ }
+
+ public static Vector128<float> HashMajority(Vector128<float> hash_abcd, uint hash_e, Vector128<float> wk)
+ {
+ for (int e = 0; e <= 3; e++)
+ {
+ uint t = SHAmajority((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2),
+ (uint)VectorExtractIntZx(hash_abcd, (byte)2, 2),
+ (uint)VectorExtractIntZx(hash_abcd, (byte)3, 2));
+
+ hash_e += Rol((uint)VectorExtractIntZx(hash_abcd, (byte)0, 2), 5) + t;
+ hash_e += (uint)VectorExtractIntZx(wk, (byte)e, 2);
+
+ t = Rol((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2), 30);
+ hash_abcd = VectorInsertInt((ulong)t, hash_abcd, (byte)1, 2);
+
+ Rol32_160(ref hash_e, ref hash_abcd);
+ }
+
+ return hash_abcd;
+ }
+
+ public static Vector128<float> HashParity(Vector128<float> hash_abcd, uint hash_e, Vector128<float> wk)
+ {
+ for (int e = 0; e <= 3; e++)
+ {
+ uint t = SHAparity((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2),
+ (uint)VectorExtractIntZx(hash_abcd, (byte)2, 2),
+ (uint)VectorExtractIntZx(hash_abcd, (byte)3, 2));
+
+ hash_e += Rol((uint)VectorExtractIntZx(hash_abcd, (byte)0, 2), 5) + t;
+ hash_e += (uint)VectorExtractIntZx(wk, (byte)e, 2);
+
+ t = Rol((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2), 30);
+ hash_abcd = VectorInsertInt((ulong)t, hash_abcd, (byte)1, 2);
+
+ Rol32_160(ref hash_e, ref hash_abcd);
+ }
+
+ return hash_abcd;
+ }
+
+ public static Vector128<float> Sha1SchedulePart1(Vector128<float> w0_3, Vector128<float> w4_7, Vector128<float> w8_11)
+ {
+ if (!Sse.IsSupported)
+ {
+ throw new PlatformNotSupportedException();
+ }
+
+ Vector128<float> result = new Vector128<float>();
+
+ ulong t2 = VectorExtractIntZx(w4_7, (byte)0, 3);
+ ulong t1 = VectorExtractIntZx(w0_3, (byte)1, 3);
+
+ result = VectorInsertInt((ulong)t1, result, (byte)0, 3);
+ result = VectorInsertInt((ulong)t2, result, (byte)1, 3);
+
+ return Sse.Xor(result, Sse.Xor(w0_3, w8_11));
+ }
+
+ public static Vector128<float> Sha1SchedulePart2(Vector128<float> tw0_3, Vector128<float> w12_15)
+ {
+ if (!Sse2.IsSupported)
+ {
+ throw new PlatformNotSupportedException();
+ }
+
+ Vector128<float> result = new Vector128<float>();
+
+ Vector128<float> T = Sse.Xor(tw0_3, Sse.StaticCast<uint, float>(
+ Sse2.ShiftRightLogical128BitLane(Sse.StaticCast<float, uint>(w12_15), (byte)4)));
+
+ uint tE0 = (uint)VectorExtractIntZx(T, (byte)0, 2);
+ uint tE1 = (uint)VectorExtractIntZx(T, (byte)1, 2);
+ uint tE2 = (uint)VectorExtractIntZx(T, (byte)2, 2);
+ uint tE3 = (uint)VectorExtractIntZx(T, (byte)3, 2);
+
+ result = VectorInsertInt((ulong)tE0.Rol(1), result, (byte)0, 2);
+ result = VectorInsertInt((ulong)tE1.Rol(1), result, (byte)1, 2);
+ result = VectorInsertInt((ulong)tE2.Rol(1), result, (byte)2, 2);
+
+ return VectorInsertInt((ulong)(tE3.Rol(1) ^ tE0.Rol(2)), result, (byte)3, 2);
+ }
+
+ private static void Rol32_160(ref uint y, ref Vector128<float> X)
+ {
+ if (!Sse2.IsSupported)
+ {
+ throw new PlatformNotSupportedException();
+ }
+
+ uint xE3 = (uint)VectorExtractIntZx(X, (byte)3, 2);
+
+ X = Sse.StaticCast<uint, float>(Sse2.ShiftLeftLogical128BitLane(Sse.StaticCast<float, uint>(X), (byte)4));
+ X = VectorInsertInt((ulong)y, X, (byte)0, 2);
+
+ y = xE3;
+ }
+
+ private static uint SHAchoose(uint x, uint y, uint z)
+ {
+ return ((y ^ z) & x) ^ z;
+ }
+
+ private static uint SHAmajority(uint x, uint y, uint z)
+ {
+ return (x & y) | ((x | y) & z);
+ }
+
+ private static uint SHAparity(uint x, uint y, uint z)
+ {
+ return x ^ y ^ z;
+ }
+
+ private static uint Rol(this uint value, int count)
+ {
+ return (value << count) | (value >> (32 - count));
+ }
+#endregion
+
#region "Sha256"
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> HashLower(Vector128<float> hash_abcd, Vector128<float> hash_efgh, Vector128<float> wk)
@@ -543,7 +687,7 @@ namespace ChocolArm64.Instruction
return SHA256hash(hash_abcd, hash_efgh, wk, false);
}
- public static Vector128<float> SchedulePart1(Vector128<float> w0_3, Vector128<float> w4_7)
+ public static Vector128<float> Sha256SchedulePart1(Vector128<float> w0_3, Vector128<float> w4_7)
{
Vector128<float> result = new Vector128<float>();
@@ -561,7 +705,7 @@ namespace ChocolArm64.Instruction
return result;
}
- public static Vector128<float> SchedulePart2(Vector128<float> w0_3, Vector128<float> w8_11, Vector128<float> w12_15)
+ public static Vector128<float> Sha256SchedulePart2(Vector128<float> w0_3, Vector128<float> w8_11, Vector128<float> w12_15)
{
Vector128<float> result = new Vector128<float>();
@@ -650,16 +794,6 @@ namespace ChocolArm64.Instruction
return x.Ror(6) ^ x.Ror(11) ^ x.Ror(25);
}
- private static uint SHAmajority(uint x, uint y, uint z)
- {
- return (x & y) | ((x | y) & z);
- }
-
- private static uint SHAchoose(uint x, uint y, uint z)
- {
- return ((y ^ z) & x) ^ z;
- }
-
private static uint Ror(this uint value, int count)
{
return (value >> count) | (value << (32 - count));
@@ -750,27 +884,35 @@ namespace ChocolArm64.Instruction
#endregion
#region "MultiplyHigh"
- public static long SMulHi128(long LHS, long RHS)
+ public static long SMulHi128(long Left, long Right)
{
- long Result = (long)UMulHi128((ulong)LHS, (ulong)RHS);
- if (LHS < 0) Result -= RHS;
- if (RHS < 0) Result -= LHS;
+ long Result = (long)UMulHi128((ulong)Left, (ulong)Right);
+
+ if (Left < 0)
+ {
+ Result -= Right;
+ }
+
+ if (Right < 0)
+ {
+ Result -= Left;
+ }
return Result;
}
- public static ulong UMulHi128(ulong LHS, ulong RHS)
+ public static ulong UMulHi128(ulong Left, ulong Right)
{
- //long multiplication
- //multiply 32 bits at a time in 64 bit, the result is what's carried over 64 bits.
- ulong LHigh = LHS >> 32;
- ulong LLow = LHS & 0xFFFFFFFF;
- ulong RHigh = RHS >> 32;
- ulong RLow = RHS & 0xFFFFFFFF;
- ulong Z2 = LLow * RLow;
- ulong T = LHigh * RLow + (Z2 >> 32);
+ ulong LHigh = Left >> 32;
+ ulong LLow = Left & 0xFFFFFFFF;
+ ulong RHigh = Right >> 32;
+ ulong RLow = Right & 0xFFFFFFFF;
+
+ ulong Z2 = LLow * RLow;
+ ulong T = LHigh * RLow + (Z2 >> 32);
ulong Z1 = T & 0xFFFFFFFF;
ulong Z0 = T >> 32;
+
Z1 += LLow * RHigh;
return LHigh * RHigh + Z0 + (Z1 >> 32);
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs
index 795d649a..3bb24f3a 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs
@@ -371,6 +371,23 @@ namespace Ryujinx.Tests.Cpu
0x6EE1F800u // FSQRT V0.2D, V0.2D
};
}
+
+ private static uint[] _Sha1h_Sha1su1_V_()
+ {
+ return new uint[]
+ {
+ 0x5E280800u, // SHA1H S0, S0
+ 0x5E281800u // SHA1SU1 V0.4S, V0.4S
+ };
+ }
+
+ private static uint[] _Sha256su0_V_()
+ {
+ return new uint[]
+ {
+ 0x5E282800u // SHA256SU0 V0.4S, V0.4S
+ };
+ }
#endregion
private const int RndCnt = 2;
@@ -1435,19 +1452,36 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn();
}
- [Test, Pairwise, Description("SHA256SU0 <Vd>.4S, <Vn>.4S")]
- public void Sha256su0_V([Values(0u)] uint Rd,
+ [Test, Pairwise]
+ public void Sha1h_Sha1su1_V([ValueSource("_Sha1h_Sha1su1_V_")] uint Opcodes,
+ [Values(0u)] uint Rd,
+ [Values(1u, 0u)] uint Rn,
+ [Random(RndCnt / 2)] ulong Z0, [Random(RndCnt / 2)] ulong Z1,
+ [Random(RndCnt / 2)] ulong A0, [Random(RndCnt / 2)] ulong A1)
+ {
+ Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+
+ Vector128<float> V0 = MakeVectorE0E1(Z0, Z1);
+ Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+
+ AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1);
+
+ CompareAgainstUnicorn();
+ }
+
+ [Test, Pairwise]
+ public void Sha256su0_V([ValueSource("_Sha256su0_V_")] uint Opcodes,
+ [Values(0u)] uint Rd,
[Values(1u, 0u)] uint Rn,
[Random(RndCnt / 2)] ulong Z0, [Random(RndCnt / 2)] ulong Z1,
[Random(RndCnt / 2)] ulong A0, [Random(RndCnt / 2)] ulong A1)
{
- uint Opcode = 0x5E282800; // SHA256SU0 V0.4S, V0.4S
- Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+ Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
Vector128<float> V0 = MakeVectorE0E1(Z0, Z1);
Vector128<float> V1 = MakeVectorE0E1(A0, A1);
- AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+ AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1);
CompareAgainstUnicorn();
}
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
index 1ea017c8..e986d7f6 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
@@ -353,6 +353,27 @@ namespace Ryujinx.Tests.Cpu
0x4EE0FC00u // FRSQRTS V0.2D, V0.2D, V0.2D
};
}
+
+ private static uint[] _Sha1c_Sha1m_Sha1p_Sha1su0_V_()
+ {
+ return new uint[]
+ {
+ 0x5E000000u, // SHA1C Q0, S0, V0.4S
+ 0x5E002000u, // SHA1M Q0, S0, V0.4S
+ 0x5E001000u, // SHA1P Q0, S0, V0.4S
+ 0x5E003000u // SHA1SU0 V0.4S, V0.4S, V0.4S
+ };
+ }
+
+ private static uint[] _Sha256h_Sha256h2_Sha256su1_V_()
+ {
+ return new uint[]
+ {
+ 0x5E004000u, // SHA256H Q0, Q0, V0.4S
+ 0x5E005000u, // SHA256H2 Q0, Q0, V0.4S
+ 0x5E006000u // SHA256SU1 V0.4S, V0.4S, V0.4S
+ };
+ }
#endregion
private const int RndCnt = 2;
@@ -1847,62 +1868,42 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn();
}
- [Test, Pairwise, Description("SHA256H <Qd>, <Qn>, <Vm>.4S")]
- public void Sha256h_V([Values(0u)] uint Rd,
- [Values(1u, 0u)] uint Rn,
- [Values(2u, 0u)] uint Rm,
- [Random(RndCnt / 2)] ulong Z0, [Random(RndCnt / 2)] ulong Z1,
- [Random(RndCnt / 2)] ulong A0, [Random(RndCnt / 2)] ulong A1,
- [Random(RndCnt / 2)] ulong B0, [Random(RndCnt / 2)] ulong B1)
- {
- uint Opcode = 0x5E004000; // SHA256H Q0, Q0, V0.4S
- Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
-
- Vector128<float> V0 = MakeVectorE0E1(Z0, Z1);
- Vector128<float> V1 = MakeVectorE0E1(A0, A1);
- Vector128<float> V2 = MakeVectorE0E1(B0, B1);
-
- AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
-
- CompareAgainstUnicorn();
- }
-
- [Test, Pairwise, Description("SHA256H2 <Qd>, <Qn>, <Vm>.4S")]
- public void Sha256h2_V([Values(0u)] uint Rd,
- [Values(1u, 0u)] uint Rn,
- [Values(2u, 0u)] uint Rm,
- [Random(RndCnt / 2)] ulong Z0, [Random(RndCnt / 2)] ulong Z1,
- [Random(RndCnt / 2)] ulong A0, [Random(RndCnt / 2)] ulong A1,
- [Random(RndCnt / 2)] ulong B0, [Random(RndCnt / 2)] ulong B1)
+ [Test, Pairwise]
+ public void Sha1c_Sha1m_Sha1p_Sha1su0_V([ValueSource("_Sha1c_Sha1m_Sha1p_Sha1su0_V_")] uint Opcodes,
+ [Values(0u)] uint Rd,
+ [Values(1u, 0u)] uint Rn,
+ [Values(2u, 0u)] uint Rm,
+ [Random(RndCnt / 2)] ulong Z0, [Random(RndCnt / 2)] ulong Z1,
+ [Random(RndCnt / 2)] ulong A0, [Random(RndCnt / 2)] ulong A1,
+ [Random(RndCnt / 2)] ulong B0, [Random(RndCnt / 2)] ulong B1)
{
- uint Opcode = 0x5E005000; // SHA256H2 Q0, Q0, V0.4S
- Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+ Opcodes |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
Vector128<float> V0 = MakeVectorE0E1(Z0, Z1);
Vector128<float> V1 = MakeVectorE0E1(A0, A1);
Vector128<float> V2 = MakeVectorE0E1(B0, B1);
- AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+ AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2);
CompareAgainstUnicorn();
}
- [Test, Pairwise, Description("SHA256SU1 <Vd>.4S, <Vn>.4S, <Vm>.4S")]
- public void Sha256su1_V([Values(0u)] uint Rd,
- [Values(1u, 0u)] uint Rn,
- [Values(2u, 0u)] uint Rm,
- [Random(RndCnt / 2)] ulong Z0, [Random(RndCnt / 2)] ulong Z1,
- [Random(RndCnt / 2)] ulong A0, [Random(RndCnt / 2)] ulong A1,
- [Random(RndCnt / 2)] ulong B0, [Random(RndCnt / 2)] ulong B1)
+ [Test, Pairwise]
+ public void Sha256h_Sha256h2_Sha256su1_V([ValueSource("_Sha256h_Sha256h2_Sha256su1_V_")] uint Opcodes,
+ [Values(0u)] uint Rd,
+ [Values(1u, 0u)] uint Rn,
+ [Values(2u, 0u)] uint Rm,
+ [Random(RndCnt / 2)] ulong Z0, [Random(RndCnt / 2)] ulong Z1,
+ [Random(RndCnt / 2)] ulong A0, [Random(RndCnt / 2)] ulong A1,
+ [Random(RndCnt / 2)] ulong B0, [Random(RndCnt / 2)] ulong B1)
{
- uint Opcode = 0x5E006000; // SHA256SU1 V0.4S, V0.4S, V0.4S
- Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+ Opcodes |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
Vector128<float> V0 = MakeVectorE0E1(Z0, Z1);
Vector128<float> V1 = MakeVectorE0E1(A0, A1);
Vector128<float> V2 = MakeVectorE0E1(B0, B1);
- AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+ AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2);
CompareAgainstUnicorn();
}