diff options
| author | Alex Barney <thealexbarney@gmail.com> | 2018-10-30 19:43:02 -0600 |
|---|---|---|
| committer | gdkchan <gab.dark.100@gmail.com> | 2018-10-30 22:43:02 -0300 |
| commit | 9cb57fb4bb3bbae0ae052a5af4a96a49fc5d864d (patch) | |
| tree | 0c97425aeb311c142bc92a6fcc503cb2c07d4376 /ChocolArm64/Instructions/InstEmitSimdMove.cs | |
| parent | 5a87e58183578f5b84ca8d01cbb76aed11820f78 (diff) | |
Adjust naming conventions for Ryujinx and ChocolArm64 projects (#484)
* Change naming convention for Ryujinx project
* Change naming convention for ChocolArm64 project
* Fix NaN
* Remove unneeded this. from Ryujinx project
* Adjust naming from new PRs
* Name changes based on feedback
* How did this get removed?
* Rebasing fix
* Change FP enum case
* Remove prefix from ChocolArm64 classes - Part 1
* Remove prefix from ChocolArm64 classes - Part 2
* Fix alignment from last commit's renaming
* Rename namespaces
* Rename stragglers
* Fix alignment
* Rename OpCode class
* Missed a few
* Adjust alignment
Diffstat (limited to 'ChocolArm64/Instructions/InstEmitSimdMove.cs')
| -rw-r--r-- | ChocolArm64/Instructions/InstEmitSimdMove.cs | 562 |
1 files changed, 562 insertions, 0 deletions
diff --git a/ChocolArm64/Instructions/InstEmitSimdMove.cs b/ChocolArm64/Instructions/InstEmitSimdMove.cs new file mode 100644 index 00000000..3f539b8a --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitSimdMove.cs @@ -0,0 +1,562 @@ +using ChocolArm64.Decoders; +using ChocolArm64.State; +using ChocolArm64.Translation; +using System; +using System.Reflection.Emit; +using System.Runtime.Intrinsics.X86; + +using static ChocolArm64.Instructions.InstEmitSimdHelper; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + public static void Dup_Gp(ILEmitterCtx context) + { + OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp; + + if (Optimizations.UseSse2) + { + context.EmitLdintzr(op.Rn); + + switch (op.Size) + { + case 0: context.Emit(OpCodes.Conv_U1); break; + case 1: context.Emit(OpCodes.Conv_U2); break; + case 2: context.Emit(OpCodes.Conv_U4); break; + } + + Type[] types = new Type[] { UIntTypesPerSizeLog2[op.Size] }; + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), types)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + for (int index = 0; index < elems; index++) + { + context.EmitLdintzr(op.Rn); + + EmitVectorInsert(context, op.Rd, index, op.Size); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + } + + public static void Dup_S(ILEmitterCtx context) + { + OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp; + + EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); + + EmitScalarSet(context, op.Rd, op.Size); + } + + public static void Dup_V(ILEmitterCtx context) + { + OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp; + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); + + EmitVectorInsert(context, op.Rd, index, op.Size); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void Ext_V(ILEmitterCtx context) + { + OpCodeSimdExt64 op = (OpCodeSimdExt64)context.CurrOp; + + context.EmitLdvec(op.Rd); + context.EmitStvectmp(); + + int bytes = op.GetBitsCount() >> 3; + + int position = op.Imm4; + + for (int index = 0; index < bytes; index++) + { + int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm; + + if (position == bytes) + { + position = 0; + } + + EmitVectorExtractZx(context, reg, position++, 0); + EmitVectorInsertTmp(context, index, 0); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void Fcsel_S(ILEmitterCtx context) + { + OpCodeSimdFcond64 op = (OpCodeSimdFcond64)context.CurrOp; + + ILLabel lblTrue = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + context.EmitCondBranch(lblTrue, op.Cond); + + EmitVectorExtractF(context, op.Rm, 0, op.Size); + + context.Emit(OpCodes.Br_S, lblEnd); + + context.MarkLabel(lblTrue); + + EmitVectorExtractF(context, op.Rn, 0, op.Size); + + context.MarkLabel(lblEnd); + + EmitScalarSetF(context, op.Rd, op.Size); + } + + public static void Fmov_Ftoi(ILEmitterCtx context) + { + OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp; + + EmitVectorExtractZx(context, op.Rn, 0, 3); + + EmitIntZeroUpperIfNeeded(context); + + context.EmitStintzr(op.Rd); + } + + public static void Fmov_Ftoi1(ILEmitterCtx context) + { + OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp; + + EmitVectorExtractZx(context, op.Rn, 1, 3); + + EmitIntZeroUpperIfNeeded(context); + + context.EmitStintzr(op.Rd); + } + + public static void Fmov_Itof(ILEmitterCtx context) + { + OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + + EmitIntZeroUpperIfNeeded(context); + + EmitScalarSet(context, op.Rd, 3); + } + + public static void Fmov_Itof1(ILEmitterCtx context) + { + OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + + EmitIntZeroUpperIfNeeded(context); + + EmitVectorInsert(context, op.Rd, 1, 3); + } + + public static void Fmov_S(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + EmitVectorExtractF(context, op.Rn, 0, op.Size); + + EmitScalarSetF(context, op.Rd, op.Size); + } + + public static void Fmov_Si(ILEmitterCtx context) + { + OpCodeSimdFmov64 op = (OpCodeSimdFmov64)context.CurrOp; + + context.EmitLdc_I8(op.Imm); + + EmitScalarSet(context, op.Rd, op.Size + 2); + } + + public static void Fmov_V(ILEmitterCtx context) + { + OpCodeSimdImm64 op = (OpCodeSimdImm64)context.CurrOp; + + int elems = op.RegisterSize == RegisterSize.Simd128 ? 4 : 2; + + for (int index = 0; index < (elems >> op.Size); index++) + { + context.EmitLdc_I8(op.Imm); + + EmitVectorInsert(context, op.Rd, index, op.Size + 2); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void Ins_Gp(ILEmitterCtx context) + { + OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + + EmitVectorInsert(context, op.Rd, op.DstIndex, op.Size); + } + + public static void Ins_V(ILEmitterCtx context) + { + OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp; + + EmitVectorExtractZx(context, op.Rn, op.SrcIndex, op.Size); + + EmitVectorInsert(context, op.Rd, op.DstIndex, op.Size); + } + + public static void Movi_V(ILEmitterCtx context) + { + EmitVectorImmUnaryOp(context, () => { }); + } + + public static void Mvni_V(ILEmitterCtx context) + { + EmitVectorImmUnaryOp(context, () => context.Emit(OpCodes.Not)); + } + + public static void Smov_S(ILEmitterCtx context) + { + OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp; + + EmitVectorExtractSx(context, op.Rn, op.DstIndex, op.Size); + + EmitIntZeroUpperIfNeeded(context); + + context.EmitStintzr(op.Rd); + } + + public static void Tbl_V(ILEmitterCtx context) + { + OpCodeSimdTbl64 op = (OpCodeSimdTbl64)context.CurrOp; + + context.EmitLdvec(op.Rm); + + for (int index = 0; index < op.Size; index++) + { + context.EmitLdvec((op.Rn + index) & 0x1f); + } + + switch (op.Size) + { + case 1: VectorHelper.EmitCall(context, + nameof(VectorHelper.Tbl1_V64), + nameof(VectorHelper.Tbl1_V128)); break; + + case 2: VectorHelper.EmitCall(context, + nameof(VectorHelper.Tbl2_V64), + nameof(VectorHelper.Tbl2_V128)); break; + + case 3: VectorHelper.EmitCall(context, + nameof(VectorHelper.Tbl3_V64), + nameof(VectorHelper.Tbl3_V128)); break; + + case 4: VectorHelper.EmitCall(context, + nameof(VectorHelper.Tbl4_V64), + nameof(VectorHelper.Tbl4_V128)); break; + + default: throw new InvalidOperationException(); + } + + context.EmitStvec(op.Rd); + } + + public static void Trn1_V(ILEmitterCtx context) + { + EmitVectorTranspose(context, part: 0); + } + + public static void Trn2_V(ILEmitterCtx context) + { + EmitVectorTranspose(context, part: 1); + } + + public static void Umov_S(ILEmitterCtx context) + { + OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp; + + EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); + + context.EmitStintzr(op.Rd); + } + + public static void Uzp1_V(ILEmitterCtx context) + { + EmitVectorUnzip(context, part: 0); + } + + public static void Uzp2_V(ILEmitterCtx context) + { + EmitVectorUnzip(context, part: 1); + } + + public static void Xtn_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + if (Optimizations.UseSse41 && op.Size < 2) + { + void EmitZeroVector() + { + switch (op.Size) + { + case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt16Zero)); break; + case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt32Zero)); break; + } + } + + //For XTN, first operand is source, second operand is 0. + //For XTN2, first operand is 0, second operand is source. + if (part != 0) + { + EmitZeroVector(); + } + + EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1); + + //Set mask to discard the upper half of the wide elements. + switch (op.Size) + { + case 0: context.EmitLdc_I4(0x00ff); break; + case 1: context.EmitLdc_I4(0x0000ffff); break; + } + + Type wideType = IntTypesPerSizeLog2[op.Size + 1]; + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), new Type[] { wideType })); + + wideType = VectorIntTypesPerSizeLog2[op.Size + 1]; + + Type[] wideTypes = new Type[] { wideType, wideType }; + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), wideTypes)); + + if (part == 0) + { + EmitZeroVector(); + } + + //Pack values with signed saturation, the signed saturation shouldn't + //saturate anything since the upper bits were masked off. + Type sseType = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + + context.EmitCall(sseType.GetMethod(nameof(Sse2.PackUnsignedSaturate), wideTypes)); + + if (part != 0) + { + //For XTN2, we additionally need to discard the upper bits + //of the target register and OR the result with it. + EmitVectorZeroUpper(context, op.Rd); + + EmitLdvecWithUnsignedCast(context, op.Rd, op.Size); + + Type narrowType = VectorUIntTypesPerSizeLog2[op.Size]; + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), new Type[] { narrowType, narrowType })); + } + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + } + else + { + if (part != 0) + { + context.EmitLdvec(op.Rd); + context.EmitStvectmp(); + } + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); + + EmitVectorInsertTmp(context, part + index, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (part == 0) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + } + + public static void Zip1_V(ILEmitterCtx context) + { + EmitVectorZip(context, part: 0); + } + + public static void Zip2_V(ILEmitterCtx context) + { + EmitVectorZip(context, part: 1); + } + + private static void EmitIntZeroUpperIfNeeded(ILEmitterCtx context) + { + if (context.CurrOp.RegisterSize == RegisterSize.Int32 || + context.CurrOp.RegisterSize == RegisterSize.Simd64) + { + context.Emit(OpCodes.Conv_U4); + context.Emit(OpCodes.Conv_U8); + } + } + + private static void EmitVectorTranspose(ILEmitterCtx context, int part) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int words = op.GetBitsCount() >> 4; + int pairs = words >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int idx = index << 1; + + EmitVectorExtractZx(context, op.Rn, idx + part, op.Size); + EmitVectorExtractZx(context, op.Rm, idx + part, op.Size); + + EmitVectorInsertTmp(context, idx + 1, op.Size); + EmitVectorInsertTmp(context, idx, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + private static void EmitVectorUnzip(ILEmitterCtx context, int part) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int words = op.GetBitsCount() >> 4; + int pairs = words >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int idx = index << 1; + + EmitVectorExtractZx(context, op.Rn, idx + part, op.Size); + EmitVectorExtractZx(context, op.Rm, idx + part, op.Size); + + EmitVectorInsertTmp(context, pairs + index, op.Size); + EmitVectorInsertTmp(context, index, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + private static void EmitVectorZip(ILEmitterCtx context, int part) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse2) + { + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + Type[] types = new Type[] + { + VectorUIntTypesPerSizeLog2[op.Size], + VectorUIntTypesPerSizeLog2[op.Size] + }; + + string name = part == 0 || (part != 0 && op.RegisterSize == RegisterSize.Simd64) + ? nameof(Sse2.UnpackLow) + : nameof(Sse2.UnpackHigh); + + context.EmitCall(typeof(Sse2).GetMethod(name, types)); + + if (op.RegisterSize == RegisterSize.Simd64 && part != 0) + { + context.EmitLdc_I4(8); + + Type[] shTypes = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), shTypes)); + } + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64 && part == 0) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + int words = op.GetBitsCount() >> 4; + int pairs = words >> op.Size; + + int Base = part != 0 ? pairs : 0; + + for (int index = 0; index < pairs; index++) + { + int idx = index << 1; + + EmitVectorExtractZx(context, op.Rn, Base + index, op.Size); + EmitVectorExtractZx(context, op.Rm, Base + index, op.Size); + + EmitVectorInsertTmp(context, idx + 1, op.Size); + EmitVectorInsertTmp(context, idx, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + } + } +} |
