diff options
Diffstat (limited to 'ARMeilleure/Instructions')
27 files changed, 5254 insertions, 390 deletions
diff --git a/ARMeilleure/Instructions/DelegateTypes.cs b/ARMeilleure/Instructions/DelegateTypes.cs index 424203ff..b65149cb 100644 --- a/ARMeilleure/Instructions/DelegateTypes.cs +++ b/ARMeilleure/Instructions/DelegateTypes.cs @@ -4,13 +4,19 @@ using System; namespace ARMeilleure.Instructions { delegate double _F64_F64(double a1); + delegate double _F64_F64_Bool(double a1, bool a2); delegate double _F64_F64_F64(double a1, double a2); + delegate double _F64_F64_F64_Bool(double a1, double a2, bool a3); delegate double _F64_F64_F64_F64(double a1, double a2, double a3); + delegate double _F64_F64_F64_F64_Bool(double a1, double a2, double a3, bool a4); delegate double _F64_F64_MidpointRounding(double a1, MidpointRounding a2); delegate float _F32_F32(float a1); + delegate float _F32_F32_Bool(float a1, bool a2); delegate float _F32_F32_F32(float a1, float a2); + delegate float _F32_F32_F32_Bool(float a1, float a2, bool a3); delegate float _F32_F32_F32_F32(float a1, float a2, float a3); + delegate float _F32_F32_F32_F32_Bool(float a1, float a2, float a3, bool a4); delegate float _F32_F32_MidpointRounding(float a1, MidpointRounding a2); delegate float _F32_U16(ushort a1); @@ -37,6 +43,7 @@ namespace ARMeilleure.Instructions delegate ushort _U16_F32(float a1); delegate ushort _U16_U64(ulong a1); + delegate uint _U32(); delegate uint _U32_F32(float a1); delegate uint _U32_F64(double a1); delegate uint _U32_U32(uint a1); @@ -74,6 +81,7 @@ namespace ARMeilleure.Instructions delegate V128 _V128_V128_V128_V128(V128 a1, V128 a2, V128 a3); delegate void _Void(); + delegate void _Void_U32(uint a1); delegate void _Void_U64(ulong a1); delegate void _Void_U64_S32(ulong a1, int a2); delegate void _Void_U64_U16(ulong a1, ushort a2); diff --git a/ARMeilleure/Instructions/InstEmitAlu.cs b/ARMeilleure/Instructions/InstEmitAlu.cs index ed1faae4..6e2875e6 100644 --- a/ARMeilleure/Instructions/InstEmitAlu.cs +++ b/ARMeilleure/Instructions/InstEmitAlu.cs @@ -276,23 +276,6 @@ namespace ARMeilleure.Instructions SetAluDOrZR(context, d); } - private static Operand EmitReverseBits32Op(ArmEmitterContext context, Operand op) - { - Debug.Assert(op.Type == OperandType.I32); - - Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaaaaaaau)), Const(1)), - context.ShiftLeft (context.BitwiseAnd(op, Const(0x55555555u)), Const(1))); - - val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccccccccu)), Const(2)), - context.ShiftLeft (context.BitwiseAnd(val, Const(0x33333333u)), Const(2))); - val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xf0f0f0f0u)), Const(4)), - context.ShiftLeft (context.BitwiseAnd(val, Const(0x0f0f0f0fu)), Const(4))); - val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xff00ff00u)), Const(8)), - context.ShiftLeft (context.BitwiseAnd(val, Const(0x00ff00ffu)), Const(8))); - - return context.BitwiseOr(context.ShiftRightUI(val, Const(16)), context.ShiftLeft(val, Const(16))); - } - private static Operand EmitReverseBits64Op(ArmEmitterContext context, Operand op) { Debug.Assert(op.Type == OperandType.I64); @@ -331,23 +314,6 @@ namespace ARMeilleure.Instructions SetAluDOrZR(context, d); } - private static Operand EmitReverseBytes16_32Op(ArmEmitterContext context, Operand op) - { - Debug.Assert(op.Type == OperandType.I32); - - Operand val = EmitReverseBytes16_64Op(context, context.ZeroExtend32(OperandType.I64, op)); - - return context.ConvertI64ToI32(val); - } - - private static Operand EmitReverseBytes16_64Op(ArmEmitterContext context, Operand op) - { - Debug.Assert(op.Type == OperandType.I64); - - return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xff00ff00ff00ff00ul)), Const(8)), - context.ShiftLeft (context.BitwiseAnd(op, Const(0x00ff00ff00ff00fful)), Const(8))); - } - public static void Rev32(ArmEmitterContext context) { OpCodeAlu op = (OpCodeAlu)context.CurrOp; diff --git a/ARMeilleure/Instructions/InstEmitAlu32.cs b/ARMeilleure/Instructions/InstEmitAlu32.cs index 79b0abbc..4d03f5c2 100644 --- a/ARMeilleure/Instructions/InstEmitAlu32.cs +++ b/ARMeilleure/Instructions/InstEmitAlu32.cs @@ -3,8 +3,8 @@ using ARMeilleure.IntermediateRepresentation; using ARMeilleure.State; using ARMeilleure.Translation; -using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.Instructions.InstEmitAluHelper; +using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.IntermediateRepresentation.OperandHelper; namespace ARMeilleure.Instructions @@ -31,6 +31,101 @@ namespace ARMeilleure.Instructions EmitAluStore(context, res); } + public static void Adc(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Add(n, m); + + Operand carry = GetFlag(PState.CFlag); + + res = context.Add(res, carry); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + + EmitAdcsCCheck(context, n, res); + EmitAddsVCheck(context, n, m, res); + } + + EmitAluStore(context, res); + } + + public static void And(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseAnd(n, m); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Bfc(ArmEmitterContext context) + { + OpCode32AluBf op = (OpCode32AluBf)context.CurrOp; + + Operand d = GetIntA32(context, op.Rd); + Operand res = context.BitwiseAnd(d, Const(~op.DestMask)); + + SetIntA32(context, op.Rd, res); + } + + public static void Bfi(ArmEmitterContext context) + { + OpCode32AluBf op = (OpCode32AluBf)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand d = GetIntA32(context, op.Rd); + Operand part = context.BitwiseAnd(n, Const(op.SourceMask)); + + if (op.Lsb != 0) + { + part = context.ShiftLeft(part, Const(op.Lsb)); + } + + Operand res = context.BitwiseAnd(d, Const(~op.DestMask)); + res = context.BitwiseOr(res, context.BitwiseAnd(part, Const(op.DestMask))); + + SetIntA32(context, op.Rd, res); + } + + public static void Bic(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseAnd(n, context.BitwiseNot(m)); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Clz(ArmEmitterContext context) + { + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.CountLeadingZeros(m); + EmitAluStore(context, res); + } + public static void Cmp(ArmEmitterContext context) { IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; @@ -46,6 +141,36 @@ namespace ARMeilleure.Instructions EmitSubsVCheck(context, n, m, res); } + public static void Cmn(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Add(n, m); + + EmitNZFlagsCheck(context, res); + + EmitAddsCCheck(context, n, res); + EmitAddsVCheck(context, n, m, res); + } + + public static void Eor(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseExclusiveOr(n, m); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + public static void Mov(ArmEmitterContext context) { IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; @@ -60,7 +185,170 @@ namespace ARMeilleure.Instructions EmitAluStore(context, m); } - public static void Sub(ArmEmitterContext context) + public static void Movt(ArmEmitterContext context) + { + OpCode32AluImm16 op = (OpCode32AluImm16)context.CurrOp; + + Operand d = GetIntA32(context, op.Rd); + Operand imm = Const(op.Immediate << 16); // Immeditate value as top halfword. + Operand res = context.BitwiseAnd(d, Const(0x0000ffff)); + res = context.BitwiseOr(res, imm); + + EmitAluStore(context, res); + } + + public static void Mul(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.Multiply(n, m); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Mvn(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + Operand m = GetAluM(context); + + Operand res = context.BitwiseNot(m); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Orr(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseOr(n, m); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Pkh(ArmEmitterContext context) + { + OpCode32AluRsImm op = (OpCode32AluRsImm)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res; + + bool tbform = op.ShiftType == ShiftType.Asr; + if (tbform) + { + res = context.BitwiseOr(context.BitwiseAnd(n, Const(0xFFFF0000)), context.BitwiseAnd(m, Const(0xFFFF))); + } + else + { + res = context.BitwiseOr(context.BitwiseAnd(m, Const(0xFFFF0000)), context.BitwiseAnd(n, Const(0xFFFF))); + } + + EmitAluStore(context, res); + } + + public static void Rbit(ArmEmitterContext context) + { + Operand m = GetAluM(context); + + Operand res = EmitReverseBits32Op(context, m); + + EmitAluStore(context, res); + } + + public static void Rev(ArmEmitterContext context) + { + Operand m = GetAluM(context); + + Operand res = context.ByteSwap(m); + + EmitAluStore(context, res); + } + + public static void Rev16(ArmEmitterContext context) + { + Operand m = GetAluM(context); + + Operand res = EmitReverseBytes16_32Op(context, m); + + EmitAluStore(context, res); + } + + public static void Revsh(ArmEmitterContext context) + { + Operand m = GetAluM(context); + + Operand res = EmitReverseBytes16_32Op(context, m); + + EmitAluStore(context, context.SignExtend16(OperandType.I32, res)); + } + + public static void Rsc(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Subtract(m, n); + + Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1)); + + res = context.Subtract(res, borrow); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + + EmitSbcsCCheck(context, m, n); + EmitSubsVCheck(context, m, n, res); + } + + EmitAluStore(context, res); + } + + public static void Rsb(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Subtract(m, n); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + + EmitSubsCCheck(context, m, res); + EmitSubsVCheck(context, m, n, res); + } + + EmitAluStore(context, res); + } + + public static void Sbc(ArmEmitterContext context) { IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; @@ -69,61 +357,268 @@ namespace ARMeilleure.Instructions Operand res = context.Subtract(n, m); + Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1)); + + res = context.Subtract(res, borrow); + if (op.SetFlags) { EmitNZFlagsCheck(context, res); - EmitSubsCCheck(context, n, res); + EmitSbcsCCheck(context, n, m); EmitSubsVCheck(context, n, m, res); } EmitAluStore(context, res); } - private static void EmitAluStore(ArmEmitterContext context, Operand value) + public static void Sbfx(ArmEmitterContext context) + { + OpCode32AluBf op = (OpCode32AluBf)context.CurrOp; + + var msb = op.Lsb + op.Msb; // For this instruction, the msb is actually a width. + + Operand n = GetIntA32(context, op.Rn); + Operand res = context.ShiftRightSI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb)); + + SetIntA32(context, op.Rd, res); + } + + public static void Sdiv(ArmEmitterContext context) + { + EmitDiv(context, false); + } + + public static void Sub(ArmEmitterContext context) { IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; - if (op.Rd == RegisterAlias.Aarch32Pc) + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Subtract(n, m); + + if (op.SetFlags) { - if (op.SetFlags) - { - // TODO: Load SPSR etc. - Operand isThumb = GetFlag(PState.TFlag); + EmitNZFlagsCheck(context, res); - Operand lblThumb = Label(); + EmitSubsCCheck(context, n, res); + EmitSubsVCheck(context, n, m, res); + } - context.BranchIfTrue(lblThumb, isThumb); + EmitAluStore(context, res); + } - context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~3)))); + public static void Sxtb(ArmEmitterContext context) + { + EmitSignExtend(context, true, 8); + } - context.MarkLabel(lblThumb); + public static void Sxtb16(ArmEmitterContext context) + { + EmitExtend16(context, true); + } - context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~1)))); - } - else - { - EmitAluWritePc(context, value); - } + public static void Sxth(ArmEmitterContext context) + { + EmitSignExtend(context, true, 16); + } + + public static void Teq(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseExclusiveOr(n, m); + + EmitNZFlagsCheck(context, res); + } + + public static void Tst(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseAnd(n, m); + EmitNZFlagsCheck(context, res); + } + + public static void Ubfx(ArmEmitterContext context) + { + OpCode32AluBf op = (OpCode32AluBf)context.CurrOp; + + var msb = op.Lsb + op.Msb; // For this instruction, the msb is actually a width. + + Operand n = GetIntA32(context, op.Rn); + Operand res = context.ShiftRightUI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb)); + + SetIntA32(context, op.Rd, res); + } + + public static void Udiv(ArmEmitterContext context) + { + EmitDiv(context, true); + } + + public static void Uxtb(ArmEmitterContext context) + { + EmitSignExtend(context, false, 8); + } + + public static void Uxtb16(ArmEmitterContext context) + { + EmitExtend16(context, false); + } + + public static void Uxth(ArmEmitterContext context) + { + EmitSignExtend(context, false, 16); + } + + private static void EmitSignExtend(ArmEmitterContext context, bool signed, int bits) + { + IOpCode32AluUx op = (IOpCode32AluUx)context.CurrOp; + + Operand m = GetAluM(context); + Operand res; + + if (op.RotateBits == 0) + { + res = m; } else { - SetIntA32(context, op.Rd, value); + Operand rotate = Const(op.RotateBits); + res = context.RotateRight(m, rotate); + } + + switch (bits) + { + case 8: + res = (signed) ? context.SignExtend8(OperandType.I32, res) : context.ZeroExtend8(OperandType.I32, res); + break; + case 16: + res = (signed) ? context.SignExtend16(OperandType.I32, res) : context.ZeroExtend16(OperandType.I32, res); + break; + } + + if (op.Add) + { + res = context.Add(res, GetAluN(context)); } + + EmitAluStore(context, res); } - private static void EmitAluWritePc(ArmEmitterContext context, Operand value) + private static void EmitExtend16(ArmEmitterContext context, bool signed) { - context.StoreToContext(); + IOpCode32AluUx op = (IOpCode32AluUx)context.CurrOp; - if (IsThumb(context.CurrOp)) + Operand m = GetAluM(context); + Operand res; + + if (op.RotateBits == 0) { - context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~1)))); + res = m; } else { - EmitBxWritePc(context, value); + Operand rotate = Const(op.RotateBits); + res = context.RotateRight(m, rotate); + } + + Operand low16, high16; + if (signed) + { + low16 = context.SignExtend8(OperandType.I32, res); + high16 = context.SignExtend8(OperandType.I32, context.ShiftRightUI(res, Const(16))); } + else + { + low16 = context.ZeroExtend8(OperandType.I32, res); + high16 = context.ZeroExtend8(OperandType.I32, context.ShiftRightUI(res, Const(16))); + } + + if (op.Add) + { + Operand n = GetAluN(context); + Operand lowAdd, highAdd; + if (signed) + { + lowAdd = context.SignExtend16(OperandType.I32, n); + highAdd = context.SignExtend16(OperandType.I32, context.ShiftRightUI(n, Const(16))); + } + else + { + lowAdd = context.ZeroExtend16(OperandType.I32, n); + highAdd = context.ZeroExtend16(OperandType.I32, context.ShiftRightUI(n, Const(16))); + } + + low16 = context.Add(low16, lowAdd); + high16 = context.Add(high16, highAdd); + } + + res = context.BitwiseOr( + context.ZeroExtend16(OperandType.I32, low16), + context.ShiftLeft(context.ZeroExtend16(OperandType.I32, high16), Const(16))); + + EmitAluStore(context, res); + } + + public static void EmitDiv(ArmEmitterContext context, bool unsigned) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + Operand zero = Const(m.Type, 0); + + Operand divisorIsZero = context.ICompareEqual(m, zero); + + Operand lblBadDiv = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBadDiv, divisorIsZero); + + if (!unsigned) + { + // ARM64 behaviour: If Rn == INT_MIN && Rm == -1, Rd = INT_MIN (overflow). + // TODO: tests to ensure A32 works the same + + Operand intMin = Const(int.MinValue); + Operand minus1 = Const(-1); + + Operand nIsIntMin = context.ICompareEqual(n, intMin); + Operand mIsMinus1 = context.ICompareEqual(m, minus1); + + Operand lblGoodDiv = Label(); + + context.BranchIfFalse(lblGoodDiv, context.BitwiseAnd(nIsIntMin, mIsMinus1)); + + EmitAluStore(context, intMin); + + context.Branch(lblEnd); + + context.MarkLabel(lblGoodDiv); + } + + Operand res = unsigned + ? context.DivideUI(n, m) + : context.Divide(n, m); + + EmitAluStore(context, res); + + context.Branch(lblEnd); + + context.MarkLabel(lblBadDiv); + + EmitAluStore(context, zero); + + context.MarkLabel(lblEnd); + } + + private static void EmitAluStore(ArmEmitterContext context, Operand value) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + EmitGenericAluStoreA32(context, op.Rd, op.SetFlags, value); } } }
\ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitAluHelper.cs b/ARMeilleure/Instructions/InstEmitAluHelper.cs index d032b32e..3bb87f27 100644 --- a/ARMeilleure/Instructions/InstEmitAluHelper.cs +++ b/ARMeilleure/Instructions/InstEmitAluHelper.cs @@ -3,6 +3,7 @@ using ARMeilleure.IntermediateRepresentation; using ARMeilleure.State; using ARMeilleure.Translation; using System; +using System.Diagnostics; using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.IntermediateRepresentation.OperandHelper; @@ -77,6 +78,89 @@ namespace ARMeilleure.Instructions SetFlag(context, PState.VFlag, vOut); } + public static Operand EmitReverseBits32Op(ArmEmitterContext context, Operand op) + { + Debug.Assert(op.Type == OperandType.I32); + + Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaaaaaaau)), Const(1)), + context.ShiftLeft(context.BitwiseAnd(op, Const(0x55555555u)), Const(1))); + + val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccccccccu)), Const(2)), + context.ShiftLeft(context.BitwiseAnd(val, Const(0x33333333u)), Const(2))); + val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xf0f0f0f0u)), Const(4)), + context.ShiftLeft(context.BitwiseAnd(val, Const(0x0f0f0f0fu)), Const(4))); + val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xff00ff00u)), Const(8)), + context.ShiftLeft(context.BitwiseAnd(val, Const(0x00ff00ffu)), Const(8))); + + return context.BitwiseOr(context.ShiftRightUI(val, Const(16)), context.ShiftLeft(val, Const(16))); + } + + public static Operand EmitReverseBytes16_64Op(ArmEmitterContext context, Operand op) + { + Debug.Assert(op.Type == OperandType.I64); + + return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xff00ff00ff00ff00ul)), Const(8)), + context.ShiftLeft(context.BitwiseAnd(op, Const(0x00ff00ff00ff00fful)), Const(8))); + } + + public static Operand EmitReverseBytes16_32Op(ArmEmitterContext context, Operand op) + { + Debug.Assert(op.Type == OperandType.I32); + + Operand val = EmitReverseBytes16_64Op(context, context.ZeroExtend32(OperandType.I64, op)); + + return context.ConvertI64ToI32(val); + } + + private static void EmitAluWritePc(ArmEmitterContext context, Operand value) + { + Debug.Assert(value.Type == OperandType.I32); + + context.StoreToContext(); + + if (IsThumb(context.CurrOp)) + { + // Make this count as a call, the translator will ignore the low bit for the address. + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(value, Const(1)))); + } + else + { + EmitBxWritePc(context, value); + } + } + + public static void EmitGenericAluStoreA32(ArmEmitterContext context, int rd, bool setFlags, Operand value) + { + Debug.Assert(value.Type == OperandType.I32); + + if (rd == RegisterAlias.Aarch32Pc && setFlags) + { + if (setFlags) + { + // TODO: Load SPSR etc. + Operand isThumb = GetFlag(PState.TFlag); + + Operand lblThumb = Label(); + + context.BranchIfTrue(lblThumb, isThumb); + + // Make this count as a call, the translator will ignore the low bit for the address. + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(context.BitwiseAnd(value, Const(~3)), Const(1)))); + + context.MarkLabel(lblThumb); + + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(value, Const(1)))); + } + else + { + EmitAluWritePc(context, value); + } + } + else + { + SetIntA32(context, rd, value); + } + } public static Operand GetAluN(ArmEmitterContext context) { @@ -116,10 +200,15 @@ namespace ARMeilleure.Instructions return Const(op.Immediate); } + case OpCode32AluImm16 op: return Const(op.Immediate); + case OpCode32AluRsImm op: return GetMShiftedByImmediate(context, op, setCarry); + case OpCode32AluRsReg op: return GetMShiftedByReg(context, op, setCarry); case OpCodeT16AluImm8 op: return Const(op.Immediate); + case IOpCode32AluReg op: return GetIntA32(context, op.Rm); + // ARM64. case IOpCodeAluImm op: { @@ -167,11 +256,11 @@ namespace ARMeilleure.Instructions } // ARM32 helpers. - private static Operand GetMShiftedByImmediate(ArmEmitterContext context, OpCode32AluRsImm op, bool setCarry) + public static Operand GetMShiftedByImmediate(ArmEmitterContext context, OpCode32AluRsImm op, bool setCarry) { Operand m = GetIntA32(context, op.Rm); - int shift = op.Imm; + int shift = op.Immediate; if (shift == 0) { @@ -193,7 +282,7 @@ namespace ARMeilleure.Instructions case ShiftType.Lsr: m = GetLsrC(context, m, setCarry, shift); break; case ShiftType.Asr: m = GetAsrC(context, m, setCarry, shift); break; case ShiftType.Ror: - if (op.Imm != 0) + if (op.Immediate != 0) { m = GetRorC(context, m, setCarry, shift); } @@ -208,8 +297,74 @@ namespace ARMeilleure.Instructions return m; } - private static Operand GetLslC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + public static Operand GetMShiftedByReg(ArmEmitterContext context, OpCode32AluRsReg op, bool setCarry) + { + Operand m = GetIntA32(context, op.Rm); + Operand s = context.ZeroExtend8(OperandType.I32, GetIntA32(context, op.Rs)); + Operand shiftIsZero = context.ICompareEqual(s, Const(0)); + + Operand zeroResult = m; + Operand shiftResult = m; + + setCarry &= op.SetFlags; + + switch (op.ShiftType) + { + case ShiftType.Lsl: shiftResult = EmitLslC(context, m, setCarry, s, shiftIsZero); break; + case ShiftType.Lsr: shiftResult = EmitLsrC(context, m, setCarry, s, shiftIsZero); break; + case ShiftType.Asr: shiftResult = EmitAsrC(context, m, setCarry, s, shiftIsZero); break; + case ShiftType.Ror: shiftResult = EmitRorC(context, m, setCarry, s, shiftIsZero); break; + } + + return context.ConditionalSelect(shiftIsZero, zeroResult, shiftResult); + } + + public static void EmitIfHelper(ArmEmitterContext context, Operand boolValue, Action action, bool expected = true) + { + Debug.Assert(boolValue.Type == OperandType.I32); + + Operand endLabel = Label(); + + if (expected) + { + context.BranchIfFalse(endLabel, boolValue); + } + else + { + context.BranchIfTrue(endLabel, boolValue); + } + + action(); + + context.MarkLabel(endLabel); + } + + public static Operand EmitLslC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero) { + Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32); + + Operand shiftLarge = context.ICompareGreaterOrEqual(shift, Const(32)); + Operand result = context.ShiftLeft(m, shift); + if (setCarry) + { + EmitIfHelper(context, shiftIsZero, () => + { + Operand cOut = context.ShiftRightUI(m, context.Subtract(Const(32), shift)); + + cOut = context.BitwiseAnd(cOut, Const(1)); + cOut = context.ConditionalSelect(context.ICompareGreater(shift, Const(32)), Const(0), cOut); + + SetFlag(context, PState.CFlag, cOut); + }, false); + } + + return context.ConditionalSelect(shiftLarge, Const(0), result); + } + + public static Operand GetLslC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + Debug.Assert(m.Type == OperandType.I32); + if ((uint)shift > 32) { return GetShiftByMoreThan32(context, setCarry); @@ -238,8 +393,32 @@ namespace ARMeilleure.Instructions } } - private static Operand GetLsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + public static Operand EmitLsrC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero) { + Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32); + + Operand shiftLarge = context.ICompareGreaterOrEqual(shift, Const(32)); + Operand result = context.ShiftRightUI(m, shift); + if (setCarry) + { + EmitIfHelper(context, shiftIsZero, () => + { + Operand cOut = context.ShiftRightUI(m, context.Subtract(shift, Const(1))); + + cOut = context.BitwiseAnd(cOut, Const(1)); + cOut = context.ConditionalSelect(context.ICompareGreater(shift, Const(32)), Const(0), cOut); + + SetFlag(context, PState.CFlag, cOut); + }, false); + } + + return context.ConditionalSelect(shiftLarge, Const(0), result); + } + + public static Operand GetLsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + Debug.Assert(m.Type == OperandType.I32); + if ((uint)shift > 32) { return GetShiftByMoreThan32(context, setCarry); @@ -274,8 +453,45 @@ namespace ARMeilleure.Instructions return Const(0); } - private static Operand GetAsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + public static Operand EmitAsrC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero) + { + Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32); + + Operand l32Result; + Operand ge32Result; + + Operand less32 = context.ICompareLess(shift, Const(32)); + + ge32Result = context.ShiftRightSI(m, Const(31)); + + if (setCarry) + { + EmitIfHelper(context, context.BitwiseOr(less32, shiftIsZero), () => + { + SetCarryMLsb(context, ge32Result); + }, false); + } + + l32Result = context.ShiftRightSI(m, shift); + if (setCarry) + { + EmitIfHelper(context, context.BitwiseAnd(less32, context.BitwiseNot(shiftIsZero)), () => + { + Operand cOut = context.ShiftRightUI(m, context.Subtract(shift, Const(1))); + + cOut = context.BitwiseAnd(cOut, Const(1)); + + SetFlag(context, PState.CFlag, cOut); + }); + } + + return context.ConditionalSelect(less32, l32Result, ge32Result); + } + + public static Operand GetAsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift) { + Debug.Assert(m.Type == OperandType.I32); + if ((uint)shift >= 32) { m = context.ShiftRightSI(m, Const(31)); @@ -298,8 +514,28 @@ namespace ARMeilleure.Instructions } } - private static Operand GetRorC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + public static Operand EmitRorC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero) { + Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32); + + shift = context.BitwiseAnd(shift, Const(0x1f)); + m = context.RotateRight(m, shift); + + if (setCarry) + { + EmitIfHelper(context, shiftIsZero, () => + { + SetCarryMMsb(context, m); + }, false); + } + + return m; + } + + public static Operand GetRorC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + Debug.Assert(m.Type == OperandType.I32); + shift &= 0x1f; m = context.RotateRight(m, Const(shift)); @@ -312,8 +548,10 @@ namespace ARMeilleure.Instructions return m; } - private static Operand GetRrxC(ArmEmitterContext context, Operand m, bool setCarry) + public static Operand GetRrxC(ArmEmitterContext context, Operand m, bool setCarry) { + Debug.Assert(m.Type == OperandType.I32); + // Rotate right by 1 with carry. Operand cIn = context.Copy(GetFlag(PState.CFlag)); @@ -331,16 +569,22 @@ namespace ARMeilleure.Instructions private static void SetCarryMLsb(ArmEmitterContext context, Operand m) { + Debug.Assert(m.Type == OperandType.I32); + SetFlag(context, PState.CFlag, context.BitwiseAnd(m, Const(1))); } private static void SetCarryMMsb(ArmEmitterContext context, Operand m) { + Debug.Assert(m.Type == OperandType.I32); + SetFlag(context, PState.CFlag, context.ShiftRightUI(m, Const(31))); } private static void SetCarryMShrOut(ArmEmitterContext context, Operand m, int shift) { + Debug.Assert(m.Type == OperandType.I32); + Operand cOut = context.ShiftRightUI(m, Const(shift - 1)); cOut = context.BitwiseAnd(cOut, Const(1)); diff --git a/ARMeilleure/Instructions/InstEmitException32.cs b/ARMeilleure/Instructions/InstEmitException32.cs new file mode 100644 index 00000000..a73f0dec --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitException32.cs @@ -0,0 +1,36 @@ +using ARMeilleure.Decoders; +using ARMeilleure.Translation; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Svc(ArmEmitterContext context) + { + EmitExceptionCall(context, NativeInterface.SupervisorCall); + } + + public static void Trap(ArmEmitterContext context) + { + EmitExceptionCall(context, NativeInterface.Break); + } + + private static void EmitExceptionCall(ArmEmitterContext context, _Void_U64_S32 func) + { + OpCode32Exception op = (OpCode32Exception)context.CurrOp; + + context.StoreToContext(); + + context.Call(func, Const(op.Address), Const(op.Id)); + + context.LoadFromContext(); + + if (context.CurrBlock.Next == null) + { + context.Return(Const(op.Address + 4)); + } + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitFlow32.cs b/ARMeilleure/Instructions/InstEmitFlow32.cs index 27addc78..cbb9ad5b 100644 --- a/ARMeilleure/Instructions/InstEmitFlow32.cs +++ b/ARMeilleure/Instructions/InstEmitFlow32.cs @@ -1,7 +1,9 @@ using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; using ARMeilleure.State; using ARMeilleure.Translation; +using static ARMeilleure.Instructions.InstEmitFlowHelper; using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.IntermediateRepresentation.OperandHelper; @@ -20,7 +22,6 @@ namespace ARMeilleure.Instructions else { context.StoreToContext(); - context.Return(Const(op.Immediate)); } } @@ -35,15 +36,6 @@ namespace ARMeilleure.Instructions Blx(context, x: true); } - public static void Bx(ArmEmitterContext context) - { - IOpCode32BReg op = (IOpCode32BReg)context.CurrOp; - - context.StoreToContext(); - - EmitBxWritePc(context, GetIntA32(context, op.Rm)); - } - private static void Blx(ArmEmitterContext context, bool x) { IOpCode32BImm op = (IOpCode32BImm)context.CurrOp; @@ -53,10 +45,10 @@ namespace ARMeilleure.Instructions bool isThumb = IsThumb(context.CurrOp); uint currentPc = isThumb - ? op.GetPc() | 1 - : op.GetPc() - 4; + ? pc | 1 + : pc - 4; - SetIntOrSP(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc)); + SetIntA32(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc)); // If x is true, then this is a branch with link and exchange. // In this case we need to swap the mode between Arm <-> Thumb. @@ -67,5 +59,37 @@ namespace ARMeilleure.Instructions InstEmitFlowHelper.EmitCall(context, (ulong)op.Immediate); } + + public static void Blxr(ArmEmitterContext context) + { + IOpCode32BReg op = (IOpCode32BReg)context.CurrOp; + + uint pc = op.GetPc(); + + Operand addr = GetIntA32(context, op.Rm); + Operand bitOne = context.BitwiseAnd(addr, Const(1)); + addr = context.BitwiseOr(addr, Const((int)CallFlag)); // Set call flag. + + bool isThumb = IsThumb(context.CurrOp); + + uint currentPc = isThumb + ? pc | 1 + : pc - 4; + + SetIntA32(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc)); + + SetFlag(context, PState.TFlag, bitOne); + + context.Return(addr); // Call. + } + + public static void Bx(ArmEmitterContext context) + { + IOpCode32BReg op = (IOpCode32BReg)context.CurrOp; + + context.StoreToContext(); + + EmitBxWritePc(context, GetIntA32(context, op.Rm)); + } } }
\ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitHelper.cs b/ARMeilleure/Instructions/InstEmitHelper.cs index 02e104a4..f5495c66 100644 --- a/ARMeilleure/Instructions/InstEmitHelper.cs +++ b/ARMeilleure/Instructions/InstEmitHelper.cs @@ -43,10 +43,15 @@ namespace ARMeilleure.Instructions } else { - return GetIntOrSP(context, GetRegisterAlias(context.Mode, regIndex)); + return Register(GetRegisterAlias(context.Mode, regIndex), RegisterType.Integer, OperandType.I32); } } + public static Operand GetVecA32(int regIndex) + { + return Register(regIndex, RegisterType.Vector, OperandType.V128); + } + public static void SetIntA32(ArmEmitterContext context, int regIndex, Operand value) { if (regIndex == RegisterAlias.Aarch32Pc) @@ -57,7 +62,13 @@ namespace ARMeilleure.Instructions } else { - SetIntOrSP(context, GetRegisterAlias(context.Mode, regIndex), value); + if (value.Type == OperandType.I64) + { + value = context.ConvertI64ToI32(value); + } + Operand reg = Register(GetRegisterAlias(context.Mode, regIndex), RegisterType.Integer, OperandType.I32); + + context.Copy(reg, value); } } @@ -143,11 +154,12 @@ namespace ARMeilleure.Instructions context.BranchIfTrue(lblArmMode, mode); - context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(pc, Const(~1)))); + // Make this count as a call, the translator will ignore the low bit for the address. + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(pc, Const((int)InstEmitFlowHelper.CallFlag)))); context.MarkLabel(lblArmMode); - context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(pc, Const(~3)))); + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(context.BitwiseAnd(pc, Const(~3)), Const((int)InstEmitFlowHelper.CallFlag)))); } public static Operand GetIntOrZR(ArmEmitterContext context, int regIndex) @@ -208,11 +220,21 @@ namespace ARMeilleure.Instructions return Register((int)stateFlag, RegisterType.Flag, OperandType.I32); } + public static Operand GetFpFlag(FPState stateFlag) + { + return Register((int)stateFlag, RegisterType.FpFlag, OperandType.I32); + } + public static void SetFlag(ArmEmitterContext context, PState stateFlag, Operand value) { context.Copy(GetFlag(stateFlag), value); context.MarkFlagSet(stateFlag); } + + public static void SetFpFlag(ArmEmitterContext context, FPState stateFlag, Operand value) + { + context.Copy(GetFpFlag(stateFlag), value); + } } } diff --git a/ARMeilleure/Instructions/InstEmitMemory32.cs b/ARMeilleure/Instructions/InstEmitMemory32.cs index 002d2c5c..ffd816b2 100644 --- a/ARMeilleure/Instructions/InstEmitMemory32.cs +++ b/ARMeilleure/Instructions/InstEmitMemory32.cs @@ -20,9 +20,11 @@ namespace ARMeilleure.Instructions [Flags] enum AccessType { - Store = 0, - Signed = 1, - Load = 2, + Store = 0, + Signed = 1, + Load = 2, + Ordered = 4, + Exclusive = 8, LoadZx = Load, LoadSx = Load | Signed, @@ -95,7 +97,7 @@ namespace ARMeilleure.Instructions { OpCode32MemMult op = (OpCode32MemMult)context.CurrOp; - Operand n = GetIntA32(context, op.Rn); + Operand n = context.Copy(GetIntA32(context, op.Rn)); Operand baseAddress = context.Add(n, Const(op.Offset)); @@ -152,14 +154,15 @@ namespace ARMeilleure.Instructions OpCode32Mem op = (OpCode32Mem)context.CurrOp; Operand n = context.Copy(GetIntA32(context, op.Rn)); + Operand m = GetMemM(context, setCarry: false); Operand temp = null; if (op.Index || op.WBack) { temp = op.Add - ? context.Add (n, Const(op.Immediate)) - : context.Subtract(n, Const(op.Immediate)); + ? context.Add (n, m) + : context.Subtract(n, m); } if (op.WBack) diff --git a/ARMeilleure/Instructions/InstEmitMemoryEx.cs b/ARMeilleure/Instructions/InstEmitMemoryEx.cs index bcca7619..93c20cb5 100644 --- a/ARMeilleure/Instructions/InstEmitMemoryEx.cs +++ b/ARMeilleure/Instructions/InstEmitMemoryEx.cs @@ -5,6 +5,7 @@ using System; using System.Diagnostics; using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryExHelper; using static ARMeilleure.IntermediateRepresentation.OperandHelper; namespace ARMeilleure.Instructions @@ -66,7 +67,7 @@ namespace ARMeilleure.Instructions // method to read 128-bits atomically. if (op.Size == 2) { - Operand value = EmitLoad(context, address, exclusive, 3); + Operand value = EmitLoadExclusive(context, address, exclusive, 3); Operand valueLow = context.ConvertI64ToI32(value); @@ -79,7 +80,7 @@ namespace ARMeilleure.Instructions } else if (op.Size == 3) { - Operand value = EmitLoad(context, address, exclusive, 4); + Operand value = EmitLoadExclusive(context, address, exclusive, 4); Operand valueLow = context.VectorExtract(OperandType.I64, value, 0); Operand valueHigh = context.VectorExtract(OperandType.I64, value, 1); @@ -95,46 +96,11 @@ namespace ARMeilleure.Instructions else { // 8, 16, 32 or 64-bits (non-pairwise) load. - Operand value = EmitLoad(context, address, exclusive, op.Size); + Operand value = EmitLoadExclusive(context, address, exclusive, op.Size); SetIntOrZR(context, op.Rt, value); } } - - private static Operand EmitLoad( - ArmEmitterContext context, - Operand address, - bool exclusive, - int size) - { - Delegate fallbackMethodDlg = null; - - if (exclusive) - { - switch (size) - { - case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByteExclusive); break; - case 1: fallbackMethodDlg = new _U16_U64 (NativeInterface.ReadUInt16Exclusive); break; - case 2: fallbackMethodDlg = new _U32_U64 (NativeInterface.ReadUInt32Exclusive); break; - case 3: fallbackMethodDlg = new _U64_U64 (NativeInterface.ReadUInt64Exclusive); break; - case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128Exclusive); break; - } - } - else - { - switch (size) - { - case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByte); break; - case 1: fallbackMethodDlg = new _U16_U64 (NativeInterface.ReadUInt16); break; - case 2: fallbackMethodDlg = new _U32_U64 (NativeInterface.ReadUInt32); break; - case 3: fallbackMethodDlg = new _U64_U64 (NativeInterface.ReadUInt64); break; - case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128); break; - } - } - - return context.Call(fallbackMethodDlg, address); - } - public static void Pfrm(ArmEmitterContext context) { // Memory Prefetch, execute as no-op. @@ -192,11 +158,11 @@ namespace ARMeilleure.Instructions value = context.VectorInsert(value, t2, 1); } - s = EmitStore(context, address, value, exclusive, op.Size + 1); + s = EmitStoreExclusive(context, address, value, exclusive, op.Size + 1); } else { - s = EmitStore(context, address, t, exclusive, op.Size); + s = EmitStoreExclusive(context, address, t, exclusive, op.Size); } if (s != null) @@ -207,50 +173,6 @@ namespace ARMeilleure.Instructions } } - private static Operand EmitStore( - ArmEmitterContext context, - Operand address, - Operand value, - bool exclusive, - int size) - { - if (size < 3) - { - value = context.ConvertI64ToI32(value); - } - - Delegate fallbackMethodDlg = null; - - if (exclusive) - { - switch (size) - { - case 0: fallbackMethodDlg = new _S32_U64_U8 (NativeInterface.WriteByteExclusive); break; - case 1: fallbackMethodDlg = new _S32_U64_U16 (NativeInterface.WriteUInt16Exclusive); break; - case 2: fallbackMethodDlg = new _S32_U64_U32 (NativeInterface.WriteUInt32Exclusive); break; - case 3: fallbackMethodDlg = new _S32_U64_U64 (NativeInterface.WriteUInt64Exclusive); break; - case 4: fallbackMethodDlg = new _S32_U64_V128(NativeInterface.WriteVector128Exclusive); break; - } - - return context.Call(fallbackMethodDlg, address, value); - } - else - { - switch (size) - { - case 0: fallbackMethodDlg = new _Void_U64_U8 (NativeInterface.WriteByte); break; - case 1: fallbackMethodDlg = new _Void_U64_U16 (NativeInterface.WriteUInt16); break; - case 2: fallbackMethodDlg = new _Void_U64_U32 (NativeInterface.WriteUInt32); break; - case 3: fallbackMethodDlg = new _Void_U64_U64 (NativeInterface.WriteUInt64); break; - case 4: fallbackMethodDlg = new _Void_U64_V128(NativeInterface.WriteVector128); break; - } - - context.Call(fallbackMethodDlg, address, value); - - return null; - } - } - private static void EmitBarrier(ArmEmitterContext context) { // Note: This barrier is most likely not necessary, and probably diff --git a/ARMeilleure/Instructions/InstEmitMemoryEx32.cs b/ARMeilleure/Instructions/InstEmitMemoryEx32.cs new file mode 100644 index 00000000..0ab990f8 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitMemoryEx32.cs @@ -0,0 +1,240 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryExHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Clrex(ArmEmitterContext context) + { + context.Call(new _Void(NativeInterface.ClearExclusive)); + } + + public static void Dmb(ArmEmitterContext context) => EmitBarrier(context); + + public static void Dsb(ArmEmitterContext context) => EmitBarrier(context); + + public static void Ldrex(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.LoadZx | AccessType.Exclusive); + } + + public static void Ldrexb(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx | AccessType.Exclusive); + } + + public static void Ldrexd(ArmEmitterContext context) + { + EmitExLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive); + } + + public static void Ldrexh(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive); + } + + public static void Lda(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.LoadZx | AccessType.Ordered); + } + + public static void Ldab(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx | AccessType.Ordered); + } + + public static void Ldaex(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Ldaexb(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Ldaexd(ArmEmitterContext context) + { + EmitExLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Ldaexh(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Ldah(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx | AccessType.Ordered); + } + + // Stores. + + public static void Strex(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.Store | AccessType.Exclusive); + } + + public static void Strexb(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.Store | AccessType.Exclusive); + } + + public static void Strexd(ArmEmitterContext context) + { + EmitExLoadOrStore(context, DWordSizeLog2, AccessType.Store | AccessType.Exclusive); + } + + public static void Strexh(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.Store | AccessType.Exclusive); + } + + public static void Stl(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.Store | AccessType.Ordered); + } + + public static void Stlb(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.Store | AccessType.Ordered); + } + + public static void Stlex(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Stlexb(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Stlexd(ArmEmitterContext context) + { + EmitExLoadOrStore(context, DWordSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Stlexh(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Stlh(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.Store | AccessType.Ordered); + } + + private static void EmitExLoadOrStore(ArmEmitterContext context, int size, AccessType accType) + { + IOpCode32MemEx op = (IOpCode32MemEx)context.CurrOp; + + Operand address = context.Copy(GetIntA32(context, op.Rn)); + + var exclusive = (accType & AccessType.Exclusive) != 0; + var ordered = (accType & AccessType.Ordered) != 0; + + if (ordered) + { + EmitBarrier(context); + } + + if ((accType & AccessType.Load) != 0) + { + if (size == DWordSizeLog2) + { + // Keep loads atomic - make the call to get the whole region and then decompose it into parts + // for the registers. + + Operand value = EmitLoadExclusive(context, address, exclusive, size); + + Operand valueLow = context.ConvertI64ToI32(value); + + valueLow = context.ZeroExtend32(OperandType.I64, valueLow); + + Operand valueHigh = context.ShiftRightUI(value, Const(32)); + + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + SetIntA32(context, op.Rt, valueLow); + SetIntA32(context, op.Rt | 1, valueHigh); + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + SetIntA32(context, op.Rt | 1, valueLow); + SetIntA32(context, op.Rt, valueHigh); + + context.MarkLabel(lblEnd); + } + else + { + SetIntA32(context, op.Rt, EmitLoadExclusive(context, address, exclusive, size)); + } + } + else + { + if (size == DWordSizeLog2) + { + // Split the result into 2 words (based on endianness) + + Operand lo = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt)); + Operand hi = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt | 1)); + + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + Operand leResult = context.BitwiseOr(lo, context.ShiftLeft(hi, Const(32))); + Operand leS = EmitStoreExclusive(context, address, leResult, exclusive, size); + if (exclusive) + { + SetIntA32(context, op.Rd, leS); + } + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + Operand beResult = context.BitwiseOr(hi, context.ShiftLeft(lo, Const(32))); + Operand beS = EmitStoreExclusive(context, address, beResult, exclusive, size); + if (exclusive) + { + SetIntA32(context, op.Rd, beS); + } + + context.MarkLabel(lblEnd); + } + else + { + Operand s = EmitStoreExclusive(context, address, context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt)), exclusive, size); + // This is only needed for exclusive stores. The function returns 0 + // when the store is successful, and 1 otherwise. + if (exclusive) + { + SetIntA32(context, op.Rd, s); + } + } + } + } + + private static void EmitBarrier(ArmEmitterContext context) + { + // Note: This barrier is most likely not necessary, and probably + // doesn't make any difference since we need to do a ton of stuff + // (software MMU emulation) to read or write anything anyway. + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs b/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs new file mode 100644 index 00000000..00a5385b --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs @@ -0,0 +1,87 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +namespace ARMeilleure.Instructions +{ + static class InstEmitMemoryExHelper + { + public static Operand EmitLoadExclusive( + ArmEmitterContext context, + Operand address, + bool exclusive, + int size) + { + Delegate fallbackMethodDlg = null; + + if (exclusive) + { + switch (size) + { + case 0: fallbackMethodDlg = new _U8_U64(NativeInterface.ReadByteExclusive); break; + case 1: fallbackMethodDlg = new _U16_U64(NativeInterface.ReadUInt16Exclusive); break; + case 2: fallbackMethodDlg = new _U32_U64(NativeInterface.ReadUInt32Exclusive); break; + case 3: fallbackMethodDlg = new _U64_U64(NativeInterface.ReadUInt64Exclusive); break; + case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128Exclusive); break; + } + } + else + { + switch (size) + { + case 0: fallbackMethodDlg = new _U8_U64(NativeInterface.ReadByte); break; + case 1: fallbackMethodDlg = new _U16_U64(NativeInterface.ReadUInt16); break; + case 2: fallbackMethodDlg = new _U32_U64(NativeInterface.ReadUInt32); break; + case 3: fallbackMethodDlg = new _U64_U64(NativeInterface.ReadUInt64); break; + case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128); break; + } + } + + return context.Call(fallbackMethodDlg, address); + } + + public static Operand EmitStoreExclusive( + ArmEmitterContext context, + Operand address, + Operand value, + bool exclusive, + int size) + { + if (size < 3) + { + value = context.ConvertI64ToI32(value); + } + + Delegate fallbackMethodDlg = null; + + if (exclusive) + { + switch (size) + { + case 0: fallbackMethodDlg = new _S32_U64_U8(NativeInterface.WriteByteExclusive); break; + case 1: fallbackMethodDlg = new _S32_U64_U16(NativeInterface.WriteUInt16Exclusive); break; + case 2: fallbackMethodDlg = new _S32_U64_U32(NativeInterface.WriteUInt32Exclusive); break; + case 3: fallbackMethodDlg = new _S32_U64_U64(NativeInterface.WriteUInt64Exclusive); break; + case 4: fallbackMethodDlg = new _S32_U64_V128(NativeInterface.WriteVector128Exclusive); break; + } + + return context.Call(fallbackMethodDlg, address, value); + } + else + { + switch (size) + { + case 0: fallbackMethodDlg = new _Void_U64_U8(NativeInterface.WriteByte); break; + case 1: fallbackMethodDlg = new _Void_U64_U16(NativeInterface.WriteUInt16); break; + case 2: fallbackMethodDlg = new _Void_U64_U32(NativeInterface.WriteUInt32); break; + case 3: fallbackMethodDlg = new _Void_U64_U64(NativeInterface.WriteUInt64); break; + case 4: fallbackMethodDlg = new _Void_U64_V128(NativeInterface.WriteVector128); break; + } + + context.Call(fallbackMethodDlg, address, value); + + return null; + } + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs index e0b44353..70861d16 100644 --- a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs +++ b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs @@ -53,7 +53,7 @@ namespace ARMeilleure.Instructions if (!isSimd) { - Operand value = GetIntOrZR(context, rt); + Operand value = GetInt(context, rt); if (ext == Extension.Sx32 || ext == Extension.Sx64) { @@ -67,7 +67,7 @@ namespace ARMeilleure.Instructions } } - SetIntOrZR(context, rt, value); + SetInt(context, rt, value); } } @@ -505,5 +505,68 @@ namespace ARMeilleure.Instructions SetIntOrZR(context, rt, value); } } + + // ARM32 helpers. + public static Operand GetMemM(ArmEmitterContext context, bool setCarry = true) + { + switch (context.CurrOp) + { + case OpCode32MemRsImm op: return GetMShiftedByImmediate(context, op, setCarry); + + case OpCode32MemReg op: return GetIntA32(context, op.Rm); + + case OpCode32Mem op: return Const(op.Immediate); + + case OpCode32SimdMemImm op: return Const(op.Immediate); + + default: throw InvalidOpCodeType(context.CurrOp); + } + } + + private static Exception InvalidOpCodeType(OpCode opCode) + { + return new InvalidOperationException($"Invalid OpCode type \"{opCode?.GetType().Name ?? "null"}\"."); + } + + public static Operand GetMShiftedByImmediate(ArmEmitterContext context, OpCode32MemRsImm op, bool setCarry) + { + Operand m = GetIntA32(context, op.Rm); + + int shift = op.Immediate; + + if (shift == 0) + { + switch (op.ShiftType) + { + case ShiftType.Lsr: shift = 32; break; + case ShiftType.Asr: shift = 32; break; + case ShiftType.Ror: shift = 1; break; + } + } + + if (shift != 0) + { + setCarry &= false; + + switch (op.ShiftType) + { + case ShiftType.Lsl: m = InstEmitAluHelper.GetLslC(context, m, setCarry, shift); break; + case ShiftType.Lsr: m = InstEmitAluHelper.GetLsrC(context, m, setCarry, shift); break; + case ShiftType.Asr: m = InstEmitAluHelper.GetAsrC(context, m, setCarry, shift); break; + case ShiftType.Ror: + if (op.Immediate != 0) + { + m = InstEmitAluHelper.GetRorC(context, m, setCarry, shift); + } + else + { + m = InstEmitAluHelper.GetRrxC(context, m, setCarry); + } + break; + } + } + + return m; + } } }
\ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitMul32.cs b/ARMeilleure/Instructions/InstEmitMul32.cs new file mode 100644 index 00000000..e64f3568 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitMul32.cs @@ -0,0 +1,290 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitAluHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + [Flags] + private enum MullFlags + { + Subtract = 1, + Add = 1 << 1, + Signed = 1 << 2, + + SignedAdd = Signed | Add, + SignedSubtract = Signed | Subtract + } + + public static void Mla(ArmEmitterContext context) + { + OpCode32AluMla op = (OpCode32AluMla)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + Operand a = GetIntA32(context, op.Ra); + + Operand res = context.Add(a, context.Multiply(n, m)); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Mls(ArmEmitterContext context) + { + OpCode32AluMla op = (OpCode32AluMla)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + Operand a = GetIntA32(context, op.Ra); + + Operand res = context.Subtract(a, context.Multiply(n, m)); + + EmitAluStore(context, res); + } + + public static void Smull(ArmEmitterContext context) + { + OpCode32AluUmull op = (OpCode32AluUmull)context.CurrOp; + + Operand n = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rn)); + Operand m = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rm)); + + Operand res = context.Multiply(n, m); + + Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32))); + Operand lo = context.ConvertI64ToI32(res); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitGenericAluStoreA32(context, op.RdHi, op.SetFlags, hi); + EmitGenericAluStoreA32(context, op.RdLo, op.SetFlags, lo); + } + + public static void Smmla(ArmEmitterContext context) + { + EmitSmmul(context, MullFlags.SignedAdd); + } + + public static void Smmls(ArmEmitterContext context) + { + EmitSmmul(context, MullFlags.SignedSubtract); + } + + public static void Smmul(ArmEmitterContext context) + { + EmitSmmul(context, MullFlags.Signed); + } + + private static void EmitSmmul(ArmEmitterContext context, MullFlags flags) + { + OpCode32AluMla op = (OpCode32AluMla)context.CurrOp; + + Operand n = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rn)); + Operand m = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rm)); + + Operand res = context.Multiply(n, m); + + if (flags.HasFlag(MullFlags.Add) && op.Ra != 0xf) + { + res = context.Add(context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Ra)), Const(32)), res); + } + else if (flags.HasFlag(MullFlags.Subtract)) + { + res = context.Subtract(context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Ra)), Const(32)), res); + } + + if (op.R) + { + res = context.Add(res, Const(0x80000000L)); + } + + Operand hi = context.ConvertI64ToI32(context.ShiftRightSI(res, Const(32))); + + EmitGenericAluStoreA32(context, op.Rd, false, hi); + } + + public static void Smlab(ArmEmitterContext context) + { + OpCode32AluMla op = (OpCode32AluMla)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand m = GetIntA32(context, op.Rm); + + if (op.NHigh) + { + n = context.SignExtend16(OperandType.I32, context.ShiftRightUI(n, Const(16))); + } + else + { + n = context.SignExtend16(OperandType.I32, n); + } + + if (op.MHigh) + { + m = context.SignExtend16(OperandType.I32, context.ShiftRightUI(m, Const(16))); + } + else + { + m = context.SignExtend16(OperandType.I32, m); + } + + Operand res = context.Multiply(n, m); + + Operand a = GetIntA32(context, op.Ra); + res = context.Add(res, a); + + // TODO: set Q flag when last addition overflows (saturation)? + + EmitGenericAluStoreA32(context, op.Rd, false, res); + } + + public static void Smlal(ArmEmitterContext context) + { + EmitMlal(context, true); + } + + public static void Smlalh(ArmEmitterContext context) + { + OpCode32AluUmull op = (OpCode32AluUmull)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand m = GetIntA32(context, op.Rm); + + if (op.NHigh) + { + n = context.SignExtend16(OperandType.I64, context.ShiftRightUI(n, Const(16))); + } + else + { + n = context.SignExtend16(OperandType.I64, n); + } + + if (op.MHigh) + { + m = context.SignExtend16(OperandType.I64, context.ShiftRightUI(m, Const(16))); + } + else + { + m = context.SignExtend16(OperandType.I64, m); + } + + Operand res = context.Multiply(n, m); + + Operand toAdd = context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdHi)), Const(32)); + toAdd = context.BitwiseOr(toAdd, context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdLo))); + res = context.Add(res, toAdd); + + Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32))); + Operand lo = context.ConvertI64ToI32(res); + + EmitGenericAluStoreA32(context, op.RdHi, false, hi); + EmitGenericAluStoreA32(context, op.RdLo, false, lo); + } + + public static void Smulh(ArmEmitterContext context) + { + OpCode32AluMla op = (OpCode32AluMla)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand m = GetIntA32(context, op.Rm); + + if (op.NHigh) + { + n = context.ShiftRightSI(n, Const(16)); + } + else + { + n = context.SignExtend16(OperandType.I32, n); + } + + if (op.MHigh) + { + m = context.ShiftRightSI(m, Const(16)); + } + else + { + m = context.SignExtend16(OperandType.I32, m); + } + + Operand res = context.Multiply(n, m); + + EmitGenericAluStoreA32(context, op.Rd, false, res); + } + + public static void Umlal(ArmEmitterContext context) + { + EmitMlal(context, false); + } + + public static void Umull(ArmEmitterContext context) + { + OpCode32AluUmull op = (OpCode32AluUmull)context.CurrOp; + + Operand n = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rn)); + Operand m = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rm)); + + Operand res = context.Multiply(n, m); + + Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32))); + Operand lo = context.ConvertI64ToI32(res); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitGenericAluStoreA32(context, op.RdHi, op.SetFlags, hi); + EmitGenericAluStoreA32(context, op.RdLo, op.SetFlags, lo); + } + + public static void EmitMlal(ArmEmitterContext context, bool signed) + { + OpCode32AluUmull op = (OpCode32AluUmull)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand m = GetIntA32(context, op.Rm); + + if (signed) + { + n = context.SignExtend32(OperandType.I64, n); + m = context.SignExtend32(OperandType.I64, m); + } + else + { + n = context.ZeroExtend32(OperandType.I64, n); + m = context.ZeroExtend32(OperandType.I64, m); + } + + Operand res = context.Multiply(n, m); + + Operand toAdd = context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdHi)), Const(32)); + toAdd = context.BitwiseOr(toAdd, context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdLo))); + res = context.Add(res, toAdd); + + Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32))); + Operand lo = context.ConvertI64ToI32(res); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitGenericAluStoreA32(context, op.RdHi, op.SetFlags, hi); + EmitGenericAluStoreA32(context, op.RdLo, op.SetFlags, lo); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs new file mode 100644 index 00000000..4ee279ee --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs @@ -0,0 +1,634 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitFlowHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Vabs_S(ArmEmitterContext context) + { + EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1)); + } + + public static void Vabs_V(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + EmitVectorUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1)); + } + else + { + EmitVectorUnaryOpSx32(context, (op1) => EmitAbs(context, op1)); + } + } + + private static Operand EmitAbs(ArmEmitterContext context, Operand value) + { + Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0)); + + return context.ConditionalSelect(isPositive, value, context.Negate(value)); + } + + public static void Vadd_S(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitScalarBinaryOpF32(context, (op1, op2) => context.Add(op1, op2)); + } + else + { + EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, op1, op2)); + } + } + + public static void Vadd_V(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitVectorBinaryOpF32(context, (op1, op2) => context.Add(op1, op2)); + } + else + { + EmitVectorBinaryOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPAddFpscr, SoftFloat64.FPAddFpscr, op1, op2)); + } + } + + public static void Vadd_I(ArmEmitterContext context) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.Add(op1, op2)); + } + + public static void Vdup(ArmEmitterContext context) + { + OpCode32SimdDupGP op = (OpCode32SimdDupGP)context.CurrOp; + + Operand insert = GetIntA32(context, op.Rt); + + // Zero extend into an I64, then replicate. Saves the most time over elementwise inserts. + switch (op.Size) + { + case 2: + insert = context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)); + break; + case 1: + insert = context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)); + break; + case 0: + insert = context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)); + break; + default: + throw new InvalidOperationException("Unknown Vdup Size."); + } + + InsertScalar(context, op.Vd, insert); + if (op.Q) + { + InsertScalar(context, op.Vd + 1, insert); + } + } + + public static void Vdup_1(ArmEmitterContext context) + { + OpCode32SimdDupElem op = (OpCode32SimdDupElem)context.CurrOp; + + Operand insert = EmitVectorExtractZx32(context, op.Vm >> 1, ((op.Vm & 1) << (3 - op.Size)) + op.Index, op.Size); + + // Zero extend into an I64, then replicate. Saves the most time over elementwise inserts. + switch (op.Size) + { + case 2: + insert = context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)); + break; + case 1: + insert = context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)); + break; + case 0: + insert = context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)); + break; + default: + throw new InvalidOperationException("Unknown Vdup Size."); + } + + InsertScalar(context, op.Vd, insert); + if (op.Q) + { + InsertScalar(context, op.Vd | 1, insert); + } + } + + public static void Vext(ArmEmitterContext context) + { + OpCode32SimdExt op = (OpCode32SimdExt)context.CurrOp; + + int elems = op.GetBytesCount(); + int byteOff = op.Immediate; + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand extract; + + if (byteOff >= elems) + { + extract = EmitVectorExtractZx32(context, op.Qm, op.Im + (byteOff - elems), op.Size); + } + else + { + extract = EmitVectorExtractZx32(context, op.Qn, op.In + byteOff, op.Size); + } + byteOff++; + + res = EmitVectorInsert(context, res, extract, op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void Vmov_S(ArmEmitterContext context) + { + EmitScalarUnaryOpF32(context, (op1) => op1); + } + + public static void Vmovn(ArmEmitterContext context) + { + EmitVectorUnaryNarrowOp32(context, (op1) => op1); + } + + public static void Vneg_S(ArmEmitterContext context) + { + EmitScalarUnaryOpF32(context, (op1) => context.Negate(op1)); + } + + public static void Vnmul_S(ArmEmitterContext context) + { + EmitScalarBinaryOpF32(context, (op1, op2) => context.Negate(context.Multiply(op1, op2))); + } + + public static void Vnmla_S(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return context.Negate(context.Add(op1, context.Multiply(op2, op3))); + }); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPNegMulAdd, SoftFloat64.FPNegMulAdd, op1, op2, op3); + }); + } + } + + public static void Vnmls_S(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return context.Add(context.Negate(op1), context.Multiply(op2, op3)); + }); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPNegMulSub, SoftFloat64.FPNegMulSub, op1, op2, op3); + }); + } + } + + public static void Vneg_V(ArmEmitterContext context) + { + if ((context.CurrOp as OpCode32Simd).F) + { + EmitVectorUnaryOpF32(context, (op1) => context.Negate(op1)); + } + else + { + EmitVectorUnaryOpSx32(context, (op1) => context.Negate(op1)); + } + } + + public static void Vdiv_S(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitScalarBinaryOpF32(context, (op1, op2) => context.Divide(op1, op2)); + } + else + { + EmitScalarBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPDiv, SoftFloat64.FPDiv, op1, op2); + }); + } + } + + public static void Vmaxnm_S(ArmEmitterContext context) + { + EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPMaxNum, SoftFloat64.FPMaxNum, op1, op2)); + } + + public static void Vmaxnm_V(ArmEmitterContext context) + { + EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMaxNumFpscr, SoftFloat64.FPMaxNumFpscr, op1, op2)); + } + + public static void Vminnm_S(ArmEmitterContext context) + { + EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPMinNum, SoftFloat64.FPMinNum, op1, op2)); + } + + public static void Vminnm_V(ArmEmitterContext context) + { + EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMinNumFpscr, SoftFloat64.FPMinNumFpscr, op1, op2)); + } + + public static void Vmax_V(ArmEmitterContext context) + { + EmitVectorBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMaxFpscr, SoftFloat64.FPMaxFpscr, op1, op2); + }); + } + + public static void Vmax_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + if (op.U) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareGreaterUI(op1, op2), op1, op2)); + } + else + { + EmitVectorBinaryOpSx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareGreater(op1, op2), op1, op2)); + } + } + + public static void Vmin_V(ArmEmitterContext context) + { + EmitVectorBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMinFpscr, SoftFloat64.FPMinFpscr, op1, op2); + }); + } + + public static void Vmin_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + if (op.U) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLessUI(op1, op2), op1, op2)); + } + else + { + EmitVectorBinaryOpSx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLess(op1, op2), op1, op2)); + } + } + + public static void Vmul_S(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitScalarBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitScalarBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2); + }); + } + } + + public static void Vmul_V(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitVectorBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitVectorBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulFpscr, SoftFloat64.FPMulFpscr, op1, op2); + }); + } + } + + public static void Vmul_I(ArmEmitterContext context) + { + if ((context.CurrOp as OpCode32SimdReg).U) throw new NotImplementedException("Polynomial mode not implemented"); + EmitVectorBinaryOpSx32(context, (op1, op2) => context.Multiply(op1, op2)); + } + + public static void Vmul_1(ArmEmitterContext context) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + if (op.F) + { + if (Optimizations.FastFP) + { + EmitVectorByScalarOpF32(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitVectorByScalarOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulFpscr, SoftFloat64.FPMulFpscr, op1, op2)); + } + } + else + { + EmitVectorByScalarOpI32(context, (op1, op2) => context.Multiply(op1, op2), false); + } + } + + public static void Vmla_S(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3); + }); + } + } + + public static void Vmla_V(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3))); + } + else + { + EmitVectorTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulAddFpscr, SoftFloat64.FPMulAddFpscr, op1, op2, op3); + }); + } + } + + public static void Vmla_I(ArmEmitterContext context) + { + EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3))); + } + + public static void Vmla_1(ArmEmitterContext context) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + if (op.F) + { + if (Optimizations.FastFP) + { + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3))); + } + else + { + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulAddFpscr, SoftFloat64.FPMulAddFpscr, op1, op2, op3)); + } + } + else + { + EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)), false); + } + } + + public static void Vmls_S(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3); + }); + } + } + + public static void Vmls_V(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3))); + } + else + { + EmitVectorTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulSubFpscr, SoftFloat64.FPMulSubFpscr, op1, op2, op3); + }); + } + } + + public static void Vmls_I(ArmEmitterContext context) + { + EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3))); + } + + public static void Vmls_1(ArmEmitterContext context) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + if (op.F) + { + if (Optimizations.FastFP) + { + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3))); + } + else + { + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulSubFpscr, SoftFloat64.FPMulSubFpscr, op1, op2, op3)); + } + } + else + { + EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)), false); + } + } + + public static void Vpadd_V(ArmEmitterContext context) + { + EmitVectorPairwiseOpF32(context, (op1, op2) => context.Add(op1, op2)); + } + + public static void Vpadd_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitVectorPairwiseOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U); + } + + public static void Vrev(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + EmitVectorUnaryOpZx32(context, (op1) => + { + switch (op.Opc) + { + case 0: + switch (op.Size) // Swap bytes. + { + default: + return op1; + case 1: + return InstEmitAluHelper.EmitReverseBytes16_32Op(context, op1); + case 2: + case 3: + return context.ByteSwap(op1); + } + case 1: + switch (op.Size) + { + default: + return op1; + case 2: + return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffff0000)), Const(16)), + context.ShiftLeft(context.BitwiseAnd(op1, Const(0x0000ffff)), Const(16))); + case 3: + return context.BitwiseOr( + context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffff000000000000ul)), Const(48)), + context.ShiftLeft(context.BitwiseAnd(op1, Const(0x000000000000fffful)), Const(48))), + context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0x0000ffff00000000ul)), Const(16)), + context.ShiftLeft(context.BitwiseAnd(op1, Const(0x00000000ffff0000ul)), Const(16)))); + } + case 2: + // Swap upper and lower halves. + return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffffffff00000000ul)), Const(32)), + context.ShiftLeft(context.BitwiseAnd(op1, Const(0x00000000fffffffful)), Const(32))); + } + + return op1; + }); + } + + public static void Vrecpe(ArmEmitterContext context) + { + OpCode32SimdSqrte op = (OpCode32SimdSqrte)context.CurrOp; + + if (op.F) + { + EmitVectorUnaryOpF32(context, (op1) => + { + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPRecipEstimateFpscr, SoftFloat64.FPRecipEstimateFpscr, op1); + }); + } + else + { + throw new NotImplementedException("Integer Vrecpe not currently implemented."); + } + } + + public static void Vrecps(ArmEmitterContext context) + { + EmitVectorBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRecipStep, SoftFloat64.FPRecipStep, op1, op2); + }); + } + + public static void Vrsqrte(ArmEmitterContext context) + { + OpCode32SimdSqrte op = (OpCode32SimdSqrte)context.CurrOp; + + if (op.F) + { + EmitVectorUnaryOpF32(context, (op1) => + { + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPRSqrtEstimateFpscr, SoftFloat64.FPRSqrtEstimateFpscr, op1); + }); + } + else + { + throw new NotImplementedException("Integer Vrsqrte not currently implemented."); + } + } + + public static void Vrsqrts(ArmEmitterContext context) + { + EmitVectorBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtStep, SoftFloat64.FPRSqrtStep, op1, op2); + }); + } + + public static void Vsel(ArmEmitterContext context) + { + OpCode32SimdSel op = (OpCode32SimdSel)context.CurrOp; + + Operand condition = null; + switch (op.Cc) + { + case OpCode32SimdSelMode.Eq: + condition = GetCondTrue(context, Condition.Eq); + break; + case OpCode32SimdSelMode.Ge: + condition = GetCondTrue(context, Condition.Ge); + break; + case OpCode32SimdSelMode.Gt: + condition = GetCondTrue(context, Condition.Gt); + break; + case OpCode32SimdSelMode.Vs: + condition = GetCondTrue(context, Condition.Vs); + break; + } + + EmitScalarBinaryOpI32(context, (op1, op2) => + { + return context.ConditionalSelect(condition, op1, op2); + }); + } + + public static void Vsqrt_S(ArmEmitterContext context) + { + EmitScalarUnaryOpF32(context, (op1) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPSqrt, SoftFloat64.FPSqrt, op1); + }); + } + + public static void Vsub_S(ArmEmitterContext context) + { + EmitScalarBinaryOpF32(context, (op1, op2) => context.Subtract(op1, op2)); + } + + public static void Vsub_V(ArmEmitterContext context) + { + EmitVectorBinaryOpF32(context, (op1, op2) => context.Subtract(op1, op2)); + } + + public static void Vsub_I(ArmEmitterContext context) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.Subtract(op1, op2)); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp32.cs b/ARMeilleure/Instructions/InstEmitSimdCmp32.cs new file mode 100644 index 00000000..3b2483ce --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdCmp32.cs @@ -0,0 +1,273 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + using Func2I = Func<Operand, Operand, Operand>; + + static partial class InstEmit32 + { + public static void Vceq_V(ArmEmitterContext context) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, false); + } + + public static void Vceq_I(ArmEmitterContext context) + { + EmitCmpOpI32(context, context.ICompareEqual, context.ICompareEqual, false, false); + } + + public static void Vceq_Z(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, true); + } + else + { + EmitCmpOpI32(context, context.ICompareEqual, context.ICompareEqual, true, false); + } + } + + public static void Vcge_V(ArmEmitterContext context) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, false); + } + + public static void Vcge_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitCmpOpI32(context, context.ICompareGreaterOrEqual, context.ICompareGreaterOrEqualUI, false, !op.U); + } + + public static void Vcge_Z(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, true); + } + else + { + EmitCmpOpI32(context, context.ICompareGreaterOrEqual, context.ICompareGreaterOrEqualUI, true, true); + } + } + + public static void Vcgt_V(ArmEmitterContext context) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, false); + } + + public static void Vcgt_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitCmpOpI32(context, context.ICompareGreater, context.ICompareGreaterUI, false, !op.U); + } + + public static void Vcgt_Z(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, true); + } + else + { + EmitCmpOpI32(context, context.ICompareGreater, context.ICompareGreaterUI, true, true); + } + } + + public static void Vcle_Z(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareLEFpscr, SoftFloat64.FPCompareLEFpscr, true); + } + else + { + EmitCmpOpI32(context, context.ICompareLessOrEqual, context.ICompareLessOrEqualUI, true, true); + } + } + + public static void Vclt_Z(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareLTFpscr, SoftFloat64.FPCompareLTFpscr, true); + } + else + { + EmitCmpOpI32(context, context.ICompareLess, context.ICompareLessUI, true, true); + } + } + + private static void EmitCmpOpF32( + ArmEmitterContext context, + _F32_F32_F32_Bool f32, + _F64_F64_F64_Bool f64, + bool zero) + { + Operand one = Const(1); + if (zero) + { + EmitVectorUnaryOpF32(context, (m) => + { + OperandType type = m.Type; + + if (type == OperandType.FP64) + { + return context.Call(f64, m, ConstF(0.0), one); + } + else + { + return context.Call(f32, m, ConstF(0.0f), one); + } + }); + } + else + { + EmitVectorBinaryOpF32(context, (n, m) => + { + OperandType type = n.Type; + + if (type == OperandType.FP64) + { + return context.Call(f64, n, m, one); + } + else + { + return context.Call(f32, n, m, one); + } + }); + } + } + + private static Operand ZerosOrOnes(ArmEmitterContext context, Operand fromBool, OperandType baseType) + { + var ones = (baseType == OperandType.I64) ? Const(-1L) : Const(-1); + + return context.ConditionalSelect(fromBool, ones, Const(baseType, 0L)); + } + + private static void EmitCmpOpI32( + ArmEmitterContext context, + Func2I signedOp, + Func2I unsignedOp, + bool zero, + bool signed) + { + if (zero) + { + if (signed) + { + EmitVectorUnaryOpSx32(context, (m) => + { + OperandType type = m.Type; + Operand zeroV = (type == OperandType.I64) ? Const(0L) : Const(0); + + return ZerosOrOnes(context, signedOp(m, zeroV), type); + }); + } + else + { + EmitVectorUnaryOpZx32(context, (m) => + { + OperandType type = m.Type; + Operand zeroV = (type == OperandType.I64) ? Const(0L) : Const(0); + + return ZerosOrOnes(context, unsignedOp(m, zeroV), type); + }); + } + } + else + { + if (signed) + { + EmitVectorBinaryOpSx32(context, (n, m) => ZerosOrOnes(context, signedOp(n, m), n.Type)); + } + else + { + EmitVectorBinaryOpZx32(context, (n, m) => ZerosOrOnes(context, unsignedOp(n, m), n.Type)); + } + } + } + + public static void Vcmp(ArmEmitterContext context) + { + EmitVcmpOrVcmpe(context, false); + } + + public static void Vcmpe(ArmEmitterContext context) + { + EmitVcmpOrVcmpe(context, true); + } + + private static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + bool cmpWithZero = (op.Opc & 2) != 0; + { + int fSize = op.Size & 1; + OperandType type = fSize != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand ne = ExtractScalar(context, type, op.Vd); + Operand me; + + if (cmpWithZero) + { + me = fSize == 0 ? ConstF(0f) : ConstF(0d); + } + else + { + me = ExtractScalar(context, type, op.Vm); + } + + Delegate dlg = fSize != 0 + ? (Delegate)new _S32_F64_F64_Bool(SoftFloat64.FPCompare) + : (Delegate)new _S32_F32_F32_Bool(SoftFloat32.FPCompare); + + Operand nzcv = context.Call(dlg, ne, me, Const(signalNaNs)); + + EmitSetFPSCRFlags(context, nzcv); + } + } + + private static void EmitSetFPSCRFlags(ArmEmitterContext context, Operand nzcv) + { + Operand Extract(Operand value, int bit) + { + if (bit != 0) + { + value = context.ShiftRightUI(value, Const(bit)); + } + + value = context.BitwiseAnd(value, Const(1)); + + return value; + } + + SetFpFlag(context, FPState.VFlag, Extract(nzcv, 0)); + SetFpFlag(context, FPState.CFlag, Extract(nzcv, 1)); + SetFpFlag(context, FPState.ZFlag, Extract(nzcv, 2)); + SetFpFlag(context, FPState.NFlag, Extract(nzcv, 3)); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs new file mode 100644 index 00000000..6ab089cb --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs @@ -0,0 +1,274 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + private static int FlipVdBits(int vd, bool lowBit) + { + if (lowBit) + { + // Move the low bit to the top. + return ((vd & 0x1) << 4) | (vd >> 1); + } + else + { + // Move the high bit to the bottom. + return ((vd & 0xf) << 1) | (vd >> 4); + } + } + + private static Operand EmitSaturateFloatToInt(ArmEmitterContext context, Operand op1, bool unsigned) + { + if (op1.Type == OperandType.FP64) + { + if (unsigned) + { + return context.Call(new _U32_F64(SoftFallback.SatF64ToU32), op1); + } + else + { + return context.Call(new _S32_F64(SoftFallback.SatF64ToS32), op1); + } + + } + else + { + if (unsigned) + { + return context.Call(new _U32_F32(SoftFallback.SatF32ToU32), op1); + } + else + { + return context.Call(new _S32_F32(SoftFallback.SatF32ToS32), op1); + } + } + } + + public static void Vcvt_V(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + bool unsigned = (op.Opc & 1) != 0; + bool toInteger = (op.Opc & 2) != 0; + OperandType floatSize = (op.Size == 2) ? OperandType.FP32 : OperandType.FP64; + + if (toInteger) + { + EmitVectorUnaryOpF32(context, (op1) => + { + return EmitSaturateFloatToInt(context, op1, unsigned); + }); + } + else + { + if (unsigned) + { + EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false)); + } + else + { + EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true)); + } + } + + } + + public static void Vcvt_FD(ArmEmitterContext context) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + int vm = op.Vm; + int vd; + if (op.Size == 3) + { + vd = FlipVdBits(op.Vd, false); + // Double to single. + Operand fp = ExtractScalar(context, OperandType.FP64, vm); + + Operand res = context.ConvertToFP(OperandType.FP32, fp); + + InsertScalar(context, vd, res); + } + else + { + vd = FlipVdBits(op.Vd, true); + // Single to double. + Operand fp = ExtractScalar(context, OperandType.FP32, vm); + + Operand res = context.ConvertToFP(OperandType.FP64, fp); + + InsertScalar(context, vd, res); + } + } + + public static void Vcvt_FI(ArmEmitterContext context) + { + OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; + + bool toInteger = (op.Opc2 & 0b100) != 0; + + OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32; + + if (toInteger) + { + bool unsigned = (op.Opc2 & 1) == 0; + bool roundWithFpscr = op.Opc != 1; + + Operand toConvert = ExtractScalar(context, floatSize, op.Vm); + + Operand asInteger; + + // TODO: Fast Path. + if (roundWithFpscr) + { + // These need to get the FPSCR value, so it's worth noting we'd need to do a c# call at some point. + if (floatSize == OperandType.FP64) + { + if (unsigned) + { + asInteger = context.Call(new _U32_F64(SoftFallback.DoubleToUInt32), toConvert); + } + else + { + asInteger = context.Call(new _S32_F64(SoftFallback.DoubleToInt32), toConvert); + } + } + else + { + if (unsigned) + { + asInteger = context.Call(new _U32_F32(SoftFallback.FloatToUInt32), toConvert); + } + else + { + asInteger = context.Call(new _S32_F32(SoftFallback.FloatToInt32), toConvert); + } + } + } + else + { + // Round towards zero. + asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned); + } + + InsertScalar(context, op.Vd, asInteger); + } + else + { + bool unsigned = op.Opc == 0; + + Operand toConvert = ExtractScalar(context, OperandType.I32, op.Vm); + + Operand asFloat = EmitFPConvert(context, toConvert, floatSize, !unsigned); + + InsertScalar(context, op.Vd, asFloat); + } + } + + public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n) + { + IOpCode32Simd op = (IOpCode32Simd)context.CurrOp; + + Delegate dlg; + + if ((op.Size & 1) == 0) + { + dlg = new _F32_F32_MidpointRounding(MathF.Round); + } + else /* if ((op.Size & 1) == 1) */ + { + dlg = new _F64_F64_MidpointRounding(Math.Round); + } + + return context.Call(dlg, n, Const((int)roundMode)); + } + + public static void Vcvt_R(ArmEmitterContext context) + { + OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; + + OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32; + + bool unsigned = (op.Opc & 1) == 0; + + Operand toConvert = ExtractScalar(context, floatSize, op.Vm); + + switch (op.Opc2) + { + case 0b00: // Away + toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert); + break; + case 0b01: // Nearest + toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert); + break; + case 0b10: // Towards positive infinity + toConvert = EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, toConvert); + break; + case 0b11: // Towards negative infinity + toConvert = EmitUnaryMathCall(context, MathF.Floor, Math.Floor, toConvert); + break; + } + + Operand asInteger; + + asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned); + + InsertScalar(context, op.Vd, asInteger); + } + + public static void Vrint_RM(ArmEmitterContext context) + { + OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; + + OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32; + + Operand toConvert = ExtractScalar(context, floatSize, op.Vm); + + switch (op.Opc2) + { + case 0b00: // Away + toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert); + break; + case 0b01: // Nearest + toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert); + break; + case 0b10: // Towards positive infinity + toConvert = EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, toConvert); + break; + case 0b11: // Towards negative infinity + toConvert = EmitUnaryMathCall(context, MathF.Floor, Math.Floor, toConvert); + break; + } + + InsertScalar(context, op.Vd, toConvert); + } + + public static void Vrint_Z(ArmEmitterContext context) + { + EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1)); + } + + private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, OperandType type, bool signed) + { + Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64); + + if (signed) + { + return context.ConvertToFP(type, value); + } + else + { + return context.ConvertToFPUI(type, value); + } + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs index fce1bed5..a87dac01 100644 --- a/ARMeilleure/Instructions/InstEmitSimdHelper.cs +++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs @@ -1528,7 +1528,7 @@ namespace ARMeilleure.Instructions { ThrowIfInvalid(index, size); - if (size < 3) + if (size < 3 && value.Type == OperandType.I64) { value = context.ConvertI64ToI32(value); } @@ -1544,7 +1544,7 @@ namespace ARMeilleure.Instructions return vector; } - private static void ThrowIfInvalid(int index, int size) + public static void ThrowIfInvalid(int index, int size) { if ((uint)size > 3u) { diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs new file mode 100644 index 00000000..b13b1d87 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs @@ -0,0 +1,581 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + using Func1I = Func<Operand, Operand>; + using Func2I = Func<Operand, Operand, Operand>; + using Func3I = Func<Operand, Operand, Operand, Operand>; + + static class InstEmitSimdHelper32 + { + public static (int, int) GetQuadwordAndSubindex(int index, RegisterSize size) + { + switch (size) + { + case RegisterSize.Simd128: + return (index >> 1, 0); + case RegisterSize.Simd64: + case RegisterSize.Int64: + return (index >> 1, index & 1); + case RegisterSize.Int32: + return (index >> 2, index & 3); + } + + throw new ArgumentException("Unrecognized Vector Register Size."); + } + + public static Operand ExtractScalar(ArmEmitterContext context, OperandType type, int reg) + { + Debug.Assert(type != OperandType.V128); + + if (type == OperandType.FP64 || type == OperandType.I64) + { + // From dreg. + return context.VectorExtract(type, GetVecA32(reg >> 1), reg & 1); + } + else + { + // From sreg. + return context.VectorExtract(type, GetVecA32(reg >> 2), reg & 3); + } + } + + public static void InsertScalar(ArmEmitterContext context, int reg, Operand value) + { + Debug.Assert(value.Type != OperandType.V128); + + Operand vec, insert; + if (value.Type == OperandType.FP64 || value.Type == OperandType.I64) + { + // From dreg. + vec = GetVecA32(reg >> 1); + insert = context.VectorInsert(vec, value, reg & 1); + + } + else + { + // From sreg. + vec = GetVecA32(reg >> 2); + insert = context.VectorInsert(vec, value, reg & 3); + } + + context.Copy(vec, insert); + } + + public static void EmitVectorImmUnaryOp32(ArmEmitterContext context, Func1I emit) + { + IOpCode32SimdImm op = (IOpCode32SimdImm)context.CurrOp; + + Operand imm = Const(op.Immediate); + + int elems = op.Elems; + (int index, int subIndex) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); + + Operand vec = GetVecA32(index); + Operand res = vec; + + for (int item = 0; item < elems; item++) + { + res = EmitVectorInsert(context, res, emit(imm), item + subIndex * elems, op.Size); + } + + context.Copy(vec, res); + } + + public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Func1I emit) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand m = ExtractScalar(context, type, op.Vm); + + InsertScalar(context, op.Vd, emit(m)); + } + + public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Func2I emit) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand n = ExtractScalar(context, type, op.Vn); + Operand m = ExtractScalar(context, type, op.Vm); + + InsertScalar(context, op.Vd, emit(n, m)); + } + + public static void EmitScalarBinaryOpI32(ArmEmitterContext context, Func2I emit) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.I64 : OperandType.I32; + + if (op.Size < 2) + { + throw new NotSupportedException("Cannot perform a scalar SIMD operation on integers smaller than 32 bits."); + } + + Operand n = ExtractScalar(context, type, op.Vn); + Operand m = ExtractScalar(context, type, op.Vm); + + InsertScalar(context, op.Vd, emit(n, m)); + } + + public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Func3I emit) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand a = ExtractScalar(context, type, op.Vd); + Operand n = ExtractScalar(context, type, op.Vn); + Operand m = ExtractScalar(context, type, op.Vm); + + InsertScalar(context, op.Vd, emit(a, n, m)); + } + + public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Func1I emit) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index); + + res = context.VectorInsert(res, emit(me), op.Fd + index); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Func2I emit) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> (sizeF + 2); + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index); + Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index); + + res = context.VectorInsert(res, emit(ne, me), op.Fd + index); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Func3I emit) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand de = context.VectorExtract(type, GetVecA32(op.Qd), op.Fd + index); + Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index); + Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index); + + res = context.VectorInsert(res, emit(de, ne, me), op.Fd + index); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + // Integer + + public static void EmitVectorUnaryOpI32(ArmEmitterContext context, Func1I emit, bool signed) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(me), op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorBinaryOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, me), op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorTernaryOpI32(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed); + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(de, ne, me), op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorUnaryOpSx32(ArmEmitterContext context, Func1I emit) + { + EmitVectorUnaryOpI32(context, emit, true); + } + + public static void EmitVectorBinaryOpSx32(ArmEmitterContext context, Func2I emit) + { + EmitVectorBinaryOpI32(context, emit, true); + } + + public static void EmitVectorTernaryOpSx32(ArmEmitterContext context, Func3I emit) + { + EmitVectorTernaryOpI32(context, emit, true); + } + + public static void EmitVectorUnaryOpZx32(ArmEmitterContext context, Func1I emit) + { + EmitVectorUnaryOpI32(context, emit, false); + } + + public static void EmitVectorBinaryOpZx32(ArmEmitterContext context, Func2I emit) + { + EmitVectorBinaryOpI32(context, emit, false); + } + + public static void EmitVectorTernaryOpZx32(ArmEmitterContext context, Func3I emit) + { + EmitVectorTernaryOpI32(context, emit, false); + } + + // Vector by scalar + + public static void EmitVectorByScalarOpF32(ArmEmitterContext context, Func2I emit) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + Operand m = ExtractScalar(context, type, op.Vm); + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index); + + res = context.VectorInsert(res, emit(ne, m), op.Fd + index); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorByScalarOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed); + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, m), op.In + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorsByScalarOpF32(ArmEmitterContext context, Func3I emit) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + Operand m = ExtractScalar(context, type, op.Vm); + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand de = context.VectorExtract(type, GetVecA32(op.Qd), op.Fd + index); + Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index); + + res = context.VectorInsert(res, emit(de, ne, m), op.Fd + index); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorsByScalarOpI32(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed); + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed); + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(de, ne, m), op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + // Pairwise + + public static void EmitVectorPairwiseOpF32(ArmEmitterContext context, Func2I emit) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> (sizeF + 2); + int pairs = elems >> 1; + + Operand res = GetVecA32(op.Qd); + Operand mvec = GetVecA32(op.Qm); + Operand nvec = GetVecA32(op.Qn); + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand n1 = context.VectorExtract(type, nvec, op.Fn + pairIndex); + Operand n2 = context.VectorExtract(type, nvec, op.Fn + pairIndex + 1); + + res = context.VectorInsert(res, emit(n1, n2), op.Fd + index); + + Operand m1 = context.VectorExtract(type, mvec, op.Fm + pairIndex); + Operand m2 = context.VectorExtract(type, mvec, op.Fm + pairIndex + 1); + + res = context.VectorInsert(res, emit(m1, m2), op.Fd + index + pairs); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorPairwiseOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + int pairs = elems >> 1; + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + Operand n1 = EmitVectorExtract32(context, op.Qn, op.In + pairIndex, op.Size, signed); + Operand n2 = EmitVectorExtract32(context, op.Qn, op.In + pairIndex + 1, op.Size, signed); + + Operand m1 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex, op.Size, signed); + Operand m2 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex + 1, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(n1, n2), op.Id + index, op.Size); + res = EmitVectorInsert(context, res, emit(m1, m2), op.Id + index + pairs, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + // Narrow + + public static void EmitVectorUnaryNarrowOp32(ArmEmitterContext context, Func1I emit) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + int elems = 8 >> op.Size; // Size contains the target element size. (for when it becomes a doubleword) + + Operand res = GetVecA32(op.Qd); + int id = (op.Vd & 1) << (3 - op.Size); // Target doubleword base. + + for (int index = 0; index < elems; index++) + { + Operand m = EmitVectorExtract32(context, op.Qm, index, op.Size + 1, false); + + res = EmitVectorInsert(context, res, emit(m), id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + // Generic Functions + + public static Operand EmitSoftFloatCallDefaultFpscr( + ArmEmitterContext context, + _F32_F32_Bool f32, + _F64_F64_Bool f64, + params Operand[] callArgs) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64; + + Array.Resize(ref callArgs, callArgs.Length + 1); + callArgs[callArgs.Length - 1] = Const(1); + + return context.Call(dlg, callArgs); + } + + public static Operand EmitSoftFloatCallDefaultFpscr( + ArmEmitterContext context, + _F32_F32_F32_Bool f32, + _F64_F64_F64_Bool f64, + params Operand[] callArgs) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64; + + Array.Resize(ref callArgs, callArgs.Length + 1); + callArgs[callArgs.Length - 1] = Const(1); + + return context.Call(dlg, callArgs); + } + + public static Operand EmitSoftFloatCallDefaultFpscr( + ArmEmitterContext context, + _F32_F32_F32_F32_Bool f32, + _F64_F64_F64_F64_Bool f64, + params Operand[] callArgs) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64; + + Array.Resize(ref callArgs, callArgs.Length + 1); + callArgs[callArgs.Length - 1] = Const(1); + + return context.Call(dlg, callArgs); + } + + public static Operand EmitVectorExtractSx32(ArmEmitterContext context, int reg, int index, int size) + { + return EmitVectorExtract32(context, reg, index, size, true); + } + + public static Operand EmitVectorExtractZx32(ArmEmitterContext context, int reg, int index, int size) + { + return EmitVectorExtract32(context, reg, index, size, false); + } + + public static Operand EmitVectorExtract32(ArmEmitterContext context, int reg, int index, int size, bool signed) + { + ThrowIfInvalid(index, size); + + Operand res = null; + + switch (size) + { + case 0: + res = context.VectorExtract8(GetVec(reg), index); + break; + + case 1: + res = context.VectorExtract16(GetVec(reg), index); + break; + + case 2: + res = context.VectorExtract(OperandType.I32, GetVec(reg), index); + break; + + case 3: + res = context.VectorExtract(OperandType.I64, GetVec(reg), index); + break; + } + + if (signed) + { + switch (size) + { + case 0: res = context.SignExtend8(OperandType.I32, res); break; + case 1: res = context.SignExtend16(OperandType.I32, res); break; + } + } + else + { + switch (size) + { + case 0: res = context.ZeroExtend8(OperandType.I32, res); break; + case 1: res = context.ZeroExtend16(OperandType.I32, res); break; + } + } + + return res; + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical32.cs b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs new file mode 100644 index 00000000..e2e9e18e --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs @@ -0,0 +1,56 @@ +using ARMeilleure.Decoders; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitSimdHelper32; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Vand_I(ArmEmitterContext context) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseAnd(op1, op2)); + } + + public static void Vbif(ArmEmitterContext context) + { + EmitBifBit(context, true); + } + + public static void Vbit(ArmEmitterContext context) + { + EmitBifBit(context, false); + } + + public static void Vbsl(ArmEmitterContext context) + { + EmitVectorTernaryOpZx32(context, (op1, op2, op3) => + { + return context.BitwiseExclusiveOr( + context.BitwiseAnd(op1, + context.BitwiseExclusiveOr(op2, op3)), op3); + }); + } + + public static void Vorr_I(ArmEmitterContext context) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, op2)); + } + + private static void EmitBifBit(ArmEmitterContext context, bool notRm) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitVectorTernaryOpZx32(context, (d, n, m) => + { + if (notRm) + { + m = context.BitwiseNot(m); + } + return context.BitwiseExclusiveOr( + context.BitwiseAnd(m, + context.BitwiseExclusiveOr(d, n)), d); + }); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdMemory32.cs b/ARMeilleure/Instructions/InstEmitSimdMemory32.cs new file mode 100644 index 00000000..fb9931d8 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdMemory32.cs @@ -0,0 +1,352 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Vld1(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 1, true); + } + + public static void Vld2(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 2, true); + } + + public static void Vld3(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 3, true); + } + + public static void Vld4(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 4, true); + } + + public static void Vst1(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 1, false); + } + + public static void Vst2(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 2, false); + } + + public static void Vst3(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 3, false); + } + + public static void Vst4(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 4, false); + } + + public static void EmitVStoreOrLoadN(ArmEmitterContext context, int count, bool load) + { + if (context.CurrOp is OpCode32SimdMemSingle) + { + OpCode32SimdMemSingle op = (OpCode32SimdMemSingle)context.CurrOp; + + int eBytes = 1 << op.Size; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + + // TODO: Check alignment. + int offset = 0; + int d = op.Vd; + + for (int i = 0; i < count; i++) + { + // Write an element from a double simd register. + Operand address = context.Add(n, Const(offset)); + if (eBytes == 8) + { + if (load) + { + EmitDVectorLoad(context, address, d); + } + else + { + EmitDVectorStore(context, address, d); + } + } + else + { + int index = ((d & 1) << (3 - op.Size)) + op.Index; + if (load) + { + if (op.Replicate) + { + var regs = (count > 1) ? 1 : op.Increment; + for (int reg = 0; reg < regs; reg++) + { + int dreg = reg + d; + int rIndex = ((dreg & 1) << (3 - op.Size)); + int limit = rIndex + (1 << (3 - op.Size)); + + while (rIndex < limit) + { + EmitLoadSimd(context, address, GetVecA32(dreg >> 1), dreg >> 1, rIndex++, op.Size); + } + } + } + else + { + EmitLoadSimd(context, address, GetVecA32(d >> 1), d >> 1, index, op.Size); + } + } + else + { + EmitStoreSimd(context, address, d >> 1, index, op.Size); + } + } + offset += eBytes; + d += op.Increment; + } + + if (op.WBack) + { + if (op.RegisterIndex) + { + Operand m = GetIntA32(context, op.Rm); + SetIntA32(context, op.Rn, context.Add(n, m)); + } + else + { + SetIntA32(context, op.Rn, context.Add(n, Const(count * eBytes))); + } + } + } + else + { + OpCode32SimdMemPair op = (OpCode32SimdMemPair)context.CurrOp; + + int eBytes = 1 << op.Size; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + int offset = 0; + int d = op.Vd; + + for (int reg = 0; reg < op.Regs; reg++) + { + for (int elem = 0; elem < op.Elems; elem++) + { + int elemD = d + reg; + for (int i = 0; i < count; i++) + { + // Write an element from a double simd register + // add ebytes for each element. + Operand address = context.Add(n, Const(offset)); + int index = ((elemD & 1) << (3 - op.Size)) + elem; + if (eBytes == 8) + { + if (load) + { + EmitDVectorLoad(context, address, elemD); + } + else + { + EmitDVectorStore(context, address, elemD); + } + } + else + { + + if (load) + { + EmitLoadSimd(context, address, GetVecA32(elemD >> 1), elemD >> 1, index, op.Size); + } + else + { + EmitStoreSimd(context, address, elemD >> 1, index, op.Size); + } + } + + offset += eBytes; + elemD += op.Increment; + } + } + } + + if (op.WBack) + { + if (op.RegisterIndex) + { + Operand m = GetIntA32(context, op.Rm); + SetIntA32(context, op.Rn, context.Add(n, m)); + } + else + { + SetIntA32(context, op.Rn, context.Add(n, Const(count * 8 * op.Regs))); + } + } + } + } + + public static void Vldm(ArmEmitterContext context) + { + OpCode32SimdMemMult op = (OpCode32SimdMemMult)context.CurrOp; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + + Operand baseAddress = context.Add(n, Const(op.Offset)); + + bool writeBack = op.PostOffset != 0; + + if (writeBack) + { + SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset))); + } + + int range = op.RegisterRange; + + int sReg = (op.DoubleWidth) ? (op.Vd << 1) : op.Vd; + int offset = 0; + int byteSize = 4; + + for (int num = 0; num < range; num++, sReg++) + { + Operand address = context.Add(baseAddress, Const(offset)); + Operand vec = GetVecA32(sReg >> 2); + + EmitLoadSimd(context, address, vec, sReg >> 2, sReg & 3, WordSizeLog2); + offset += byteSize; + } + } + + public static void Vstm(ArmEmitterContext context) + { + OpCode32SimdMemMult op = (OpCode32SimdMemMult)context.CurrOp; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + + Operand baseAddress = context.Add(n, Const(op.Offset)); + + bool writeBack = op.PostOffset != 0; + + if (writeBack) + { + SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset))); + } + + int offset = 0; + + int range = op.RegisterRange; + int sReg = (op.DoubleWidth) ? (op.Vd << 1) : op.Vd; + int byteSize = 4; + + for (int num = 0; num < range; num++, sReg++) + { + Operand address = context.Add(baseAddress, Const(offset)); + + EmitStoreSimd(context, address, sReg >> 2, sReg & 3, WordSizeLog2); + + offset += byteSize; + } + } + + public static void Vldr(ArmEmitterContext context) + { + EmitVLoadOrStore(context, AccessType.Load); + } + + public static void Vstr(ArmEmitterContext context) + { + EmitVLoadOrStore(context, AccessType.Store); + } + + private static void EmitDVectorStore(ArmEmitterContext context, Operand address, int vecD) + { + int vecQ = vecD >> 1; + int vecSElem = (vecD & 1) << 1; + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + EmitStoreSimd(context, address, vecQ, vecSElem, WordSizeLog2); + EmitStoreSimd(context, context.Add(address, Const(4)), vecQ, vecSElem | 1, WordSizeLog2); + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + EmitStoreSimd(context, address, vecQ, vecSElem | 1, WordSizeLog2); + EmitStoreSimd(context, context.Add(address, Const(4)), vecQ, vecSElem, WordSizeLog2); + + context.MarkLabel(lblEnd); + } + + private static void EmitDVectorLoad(ArmEmitterContext context, Operand address, int vecD) + { + int vecQ = vecD >> 1; + int vecSElem = (vecD & 1) << 1; + Operand vec = GetVecA32(vecQ); + + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + EmitLoadSimd(context, address, vec, vecQ, vecSElem, WordSizeLog2); + EmitLoadSimd(context, context.Add(address, Const(4)), vec, vecQ, vecSElem | 1, WordSizeLog2); + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + EmitLoadSimd(context, address, vec, vecQ, vecSElem | 1, WordSizeLog2); + EmitLoadSimd(context, context.Add(address, Const(4)), vec, vecQ, vecSElem, WordSizeLog2); + + context.MarkLabel(lblEnd); + } + + private static void EmitVLoadOrStore(ArmEmitterContext context, AccessType accType) + { + OpCode32SimdMemImm op = (OpCode32SimdMemImm)context.CurrOp; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + Operand m = GetMemM(context, setCarry: false); + + Operand address = op.Add + ? context.Add(n, m) + : context.Subtract(n, m); + + int size = op.Size; + + if ((accType & AccessType.Load) != 0) + { + if (size == DWordSizeLog2) + { + EmitDVectorLoad(context, address, op.Vd); + } + else + { + Operand vec = GetVecA32(op.Vd >> 2); + EmitLoadSimd(context, address, vec, op.Vd >> 2, (op.Vd & 3) << (2 - size), size); + } + } + else + { + if (size == DWordSizeLog2) + { + EmitDVectorStore(context, address, op.Vd); + } + else + { + EmitStoreSimd(context, address, op.Vd >> 2, (op.Vd & 3) << (2 - size), size); + } + } + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/ARMeilleure/Instructions/InstEmitSimdMove32.cs new file mode 100644 index 00000000..3fd42cbf --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdMove32.cs @@ -0,0 +1,336 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Vmov_I(ArmEmitterContext context) + { + EmitVectorImmUnaryOp32(context, (op1) => op1); + } + + public static void Vmvn_I(ArmEmitterContext context) + { + EmitVectorImmUnaryOp32(context, (op1) => context.BitwiseExclusiveOr(op1, op1)); + } + + public static void Vmov_GS(ArmEmitterContext context) + { + OpCode32SimdMovGp op = (OpCode32SimdMovGp)context.CurrOp; + + Operand vec = GetVecA32(op.Vn >> 2); + if (op.Op == 1) + { + // To general purpose. + Operand value = context.VectorExtract(OperandType.I32, vec, op.Vn & 0x3); + SetIntA32(context, op.Rt, value); + } + else + { + // From general purpose. + Operand value = GetIntA32(context, op.Rt); + context.Copy(vec, context.VectorInsert(vec, value, op.Vn & 0x3)); + } + } + + public static void Vmov_G1(ArmEmitterContext context) + { + OpCode32SimdMovGpElem op = (OpCode32SimdMovGpElem)context.CurrOp; + + int index = op.Index + ((op.Vd & 1) << (3 - op.Size)); + if (op.Op == 1) + { + // To general purpose. + Operand value = EmitVectorExtract32(context, op.Vd >> 1, index, op.Size, !op.U); + SetIntA32(context, op.Rt, value); + } + else + { + // From general purpose. + Operand vec = GetVecA32(op.Vd >> 1); + Operand value = GetIntA32(context, op.Rt); + context.Copy(vec, EmitVectorInsert(context, vec, value, index, op.Size)); + } + } + + public static void Vmov_G2(ArmEmitterContext context) + { + OpCode32SimdMovGpDouble op = (OpCode32SimdMovGpDouble)context.CurrOp; + + Operand vec = GetVecA32(op.Vm >> 2); + int vm1 = op.Vm + 1; + bool sameOwnerVec = (op.Vm >> 2) == (vm1 >> 2); + Operand vec2 = sameOwnerVec ? vec : GetVecA32(vm1 >> 2); + if (op.Op == 1) + { + // To general purpose. + Operand lowValue = context.VectorExtract(OperandType.I32, vec, op.Vm & 3); + SetIntA32(context, op.Rt, lowValue); + + Operand highValue = context.VectorExtract(OperandType.I32, vec2, vm1 & 3); + SetIntA32(context, op.Rt2, highValue); + } + else + { + // From general purpose. + Operand lowValue = GetIntA32(context, op.Rt); + Operand resultVec = context.VectorInsert(vec, lowValue, op.Vm & 3); + + Operand highValue = GetIntA32(context, op.Rt2); + + if (sameOwnerVec) + { + context.Copy(vec, context.VectorInsert(resultVec, highValue, vm1 & 3)); + } + else + { + context.Copy(vec, resultVec); + context.Copy(vec2, context.VectorInsert(vec2, highValue, vm1 & 3)); + } + } + } + + public static void Vmov_GD(ArmEmitterContext context) + { + OpCode32SimdMovGpDouble op = (OpCode32SimdMovGpDouble)context.CurrOp; + + Operand vec = GetVecA32(op.Vm >> 1); + if (op.Op == 1) + { + // To general purpose. + Operand value = context.VectorExtract(OperandType.I64, vec, op.Vm & 1); + SetIntA32(context, op.Rt, context.ConvertI64ToI32(value)); + SetIntA32(context, op.Rt2, context.ConvertI64ToI32(context.ShiftRightUI(value, Const(32)))); + } + else + { + // From general purpose. + Operand lowValue = GetIntA32(context, op.Rt); + Operand highValue = GetIntA32(context, op.Rt2); + + Operand value = context.BitwiseOr( + context.ZeroExtend32(OperandType.I64, lowValue), + context.ShiftLeft(context.ZeroExtend32(OperandType.I64, highValue), Const(32))); + + context.Copy(vec, context.VectorInsert(vec, value, op.Vm & 1)); + } + } + + public static void Vtbl(ArmEmitterContext context) + { + OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp; + + bool extension = op.Opc == 1; + + int elems = op.GetBytesCount() >> op.Size; + + int length = op.Length + 1; + + (int Qx, int Ix)[] tableTuples = new (int, int)[length]; + for (int i = 0; i < length; i++) + { + (int vn, int en) = GetQuadwordAndSubindex(op.Vn + i, op.RegisterSize); + tableTuples[i] = (vn, en); + } + + int byteLength = length * 8; + + Operand res = GetVecA32(op.Qd); + Operand m = GetVecA32(op.Qm); + + for (int index = 0; index < elems; index++) + { + Operand selectedIndex = context.ZeroExtend8(OperandType.I32, context.VectorExtract8(m, index + op.Im)); + + Operand inRange = context.ICompareLess(selectedIndex, Const(byteLength)); + Operand elemRes = null; // Note: This is I64 for ease of calculation. + + // TODO: Branching rather than conditional select. + + // Get indexed byte. + // To simplify (ha) the il, we get bytes from every vector and use a nested conditional select to choose the right result. + // This does have to extract `length` times for every element but certainly not as bad as it could be. + + // Which vector number is the index on. + Operand vecIndex = context.ShiftRightUI(selectedIndex, Const(3)); + // What should we shift by to extract it. + Operand subVecIndexShift = context.ShiftLeft(context.BitwiseAnd(selectedIndex, Const(7)), Const(3)); + + for (int i = 0; i < length; i++) + { + (int qx, int ix) = tableTuples[i]; + // Get the whole vector, we'll get a byte out of it. + Operand lookupResult; + if (qx == op.Qd) + { + // Result contains the current state of the vector. + lookupResult = context.VectorExtract(OperandType.I64, res, ix); + } + else + { + lookupResult = EmitVectorExtract32(context, qx, ix, 3, false); // I64 + } + + lookupResult = context.ShiftRightUI(lookupResult, subVecIndexShift); // Get the relevant byte from this vector. + + if (i == 0) + { + elemRes = lookupResult; // First result is always default. + } + else + { + Operand isThisElem = context.ICompareEqual(vecIndex, Const(i)); + elemRes = context.ConditionalSelect(isThisElem, lookupResult, elemRes); + } + } + + Operand fallback = (extension) ? context.ZeroExtend32(OperandType.I64, EmitVectorExtract32(context, op.Qd, index + op.Id, 0, false)) : Const(0L); + + res = EmitVectorInsert(context, res, context.ConditionalSelect(inRange, elemRes, fallback), index + op.Id, 0); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void Vtrn(ArmEmitterContext context) + { + OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + int pairs = elems >> 1; + + bool overlap = op.Qm == op.Qd; + + Operand resD = GetVecA32(op.Qd); + Operand resM = GetVecA32(op.Qm); + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + Operand d2 = EmitVectorExtract32(context, op.Qd, pairIndex + 1 + op.Id, op.Size, false); + Operand m1 = EmitVectorExtract32(context, op.Qm, pairIndex + op.Im, op.Size, false); + + resD = EmitVectorInsert(context, resD, m1, pairIndex + 1 + op.Id, op.Size); + + if (overlap) + { + resM = resD; + } + + resM = EmitVectorInsert(context, resM, d2, pairIndex + op.Im, op.Size); + + if (overlap) + { + resD = resM; + } + } + + context.Copy(GetVecA32(op.Qd), resD); + if (!overlap) + { + context.Copy(GetVecA32(op.Qm), resM); + } + } + + public static void Vzip(ArmEmitterContext context) + { + OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + int pairs = elems >> 1; + + bool overlap = op.Qm == op.Qd; + + Operand resD = GetVecA32(op.Qd); + Operand resM = GetVecA32(op.Qm); + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + Operand dRowD = EmitVectorExtract32(context, op.Qd, index + op.Id, op.Size, false); + Operand mRowD = EmitVectorExtract32(context, op.Qm, index + op.Im, op.Size, false); + + Operand dRowM = EmitVectorExtract32(context, op.Qd, index + op.Id + pairs, op.Size, false); + Operand mRowM = EmitVectorExtract32(context, op.Qm, index + op.Im + pairs, op.Size, false); + + resD = EmitVectorInsert(context, resD, dRowD, pairIndex + op.Id, op.Size); + resD = EmitVectorInsert(context, resD, mRowD, pairIndex + 1 + op.Id, op.Size); + + if (overlap) + { + resM = resD; + } + + resM = EmitVectorInsert(context, resM, dRowM, pairIndex + op.Im, op.Size); + resM = EmitVectorInsert(context, resM, mRowM, pairIndex + 1 + op.Im, op.Size); + + if (overlap) + { + resD = resM; + } + } + + context.Copy(GetVecA32(op.Qd), resD); + if (!overlap) + { + context.Copy(GetVecA32(op.Qm), resM); + } + } + + public static void Vuzp(ArmEmitterContext context) + { + OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + int pairs = elems >> 1; + + bool overlap = op.Qm == op.Qd; + + Operand resD = GetVecA32(op.Qd); + Operand resM = GetVecA32(op.Qm); + + for (int index = 0; index < elems; index++) + { + Operand dIns, mIns; + if (index >= pairs) + { + int pind = index - pairs; + dIns = EmitVectorExtract32(context, op.Qm, (pind << 1) + op.Im, op.Size, false); + mIns = EmitVectorExtract32(context, op.Qm, ((pind << 1) | 1) + op.Im, op.Size, false); + } + else + { + dIns = EmitVectorExtract32(context, op.Qd, (index << 1) + op.Id, op.Size, false); + mIns = EmitVectorExtract32(context, op.Qd, ((index << 1) | 1) + op.Id, op.Size, false); + } + + resD = EmitVectorInsert(context, resD, dIns, index + op.Id, op.Size); + + if (overlap) + { + resM = resD; + } + + resM = EmitVectorInsert(context, resM, mIns, index + op.Im, op.Size); + + if (overlap) + { + resD = resM; + } + } + + context.Copy(GetVecA32(op.Qd), resD); + if (!overlap) + { + context.Copy(GetVecA32(op.Qm), resM); + } + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdShift32.cs b/ARMeilleure/Instructions/InstEmitSimdShift32.cs new file mode 100644 index 00000000..89385476 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdShift32.cs @@ -0,0 +1,100 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Vshl(ArmEmitterContext context) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + + EmitVectorUnaryOpZx32(context, (op1) => context.ShiftLeft(op1, Const(op.Shift))); + } + + public static void Vshl_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + if (op.U) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => EmitShlRegOp(context, op2, op1, op.Size, true)); + } + else + { + EmitVectorBinaryOpSx32(context, (op1, op2) => EmitShlRegOp(context, op2, op1, op.Size, false)); + } + } + + public static void Vshr(ArmEmitterContext context) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + int shift = (8 << op.Size) - op.Shift; // Shr amount is flipped. + int maxShift = (8 << op.Size) - 1; + + if (op.U) + { + EmitVectorUnaryOpZx32(context, (op1) => (shift > maxShift) ? Const(op1.Type, 0) : context.ShiftRightUI(op1, Const(shift))); + } + else + { + EmitVectorUnaryOpSx32(context, (op1) => context.ShiftRightSI(op1, Const(Math.Min(maxShift, shift)))); + } + } + + public static void Vshrn(ArmEmitterContext context) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + int shift = (8 << op.Size) - op.Shift; // Shr amount is flipped. + + EmitVectorUnaryNarrowOp32(context, (op1) => context.ShiftRightUI(op1, Const(shift))); + } + + private static Operand EmitShlRegOp(ArmEmitterContext context, Operand op, Operand shiftLsB, int size, bool unsigned) + { + if (shiftLsB.Type == OperandType.I64) + { + shiftLsB = context.ConvertI64ToI32(shiftLsB); + } + + shiftLsB = context.SignExtend8(OperandType.I32, shiftLsB); + Debug.Assert((uint)size < 4u); + + Operand negShiftLsB = context.Negate(shiftLsB); + + Operand isPositive = context.ICompareGreaterOrEqual(shiftLsB, Const(0)); + + Operand shl = context.ShiftLeft(op, shiftLsB); + Operand shr = unsigned ? context.ShiftRightUI(op, negShiftLsB) : context.ShiftRightSI(op, negShiftLsB); + + Operand res = context.ConditionalSelect(isPositive, shl, shr); + + if (unsigned) + { + Operand isOutOfRange = context.BitwiseOr( + context.ICompareGreaterOrEqual(shiftLsB, Const(8 << size)), + context.ICompareGreaterOrEqual(negShiftLsB, Const(8 << size))); + + return context.ConditionalSelect(isOutOfRange, Const(op.Type, 0), res); + } + else + { + Operand isOutOfRange0 = context.ICompareGreaterOrEqual(shiftLsB, Const(8 << size)); + Operand isOutOfRangeN = context.ICompareGreaterOrEqual(negShiftLsB, Const(8 << size)); + + // Also zero if shift is too negative, but value was positive. + isOutOfRange0 = context.BitwiseOr(isOutOfRange0, context.BitwiseAnd(isOutOfRangeN, context.ICompareGreaterOrEqual(op, Const(op.Type, 0)))); + + Operand min = (op.Type == OperandType.I64) ? Const(-1L) : Const(-1); + + return context.ConditionalSelect(isOutOfRange0, Const(op.Type, 0), context.ConditionalSelect(isOutOfRangeN, min, res)); + } + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSystem32.cs b/ARMeilleure/Instructions/InstEmitSystem32.cs new file mode 100644 index 00000000..808b4fdd --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSystem32.cs @@ -0,0 +1,233 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Mcr(ArmEmitterContext context) + { + OpCode32System op = (OpCode32System)context.CurrOp; + + if (op.Coproc != 15) + { + throw new NotImplementedException($"Unknown MRC Coprocessor ID 0x{op.Coproc:X16} at 0x{op.Address:X16}."); + } + + if (op.Opc1 != 0) + { + throw new NotImplementedException($"Unknown MRC Opc1 0x{op.Opc1:X16} at 0x{op.Address:X16}."); + } + + Delegate dlg; + switch (op.CRn) + { + case 13: // Process and Thread Info. + if (op.CRm != 0) + { + throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X16} at 0x{op.Address:X16}."); + } + switch (op.Opc2) + { + case 2: + dlg = new _Void_U32(NativeInterface.SetTpidrEl032); break; + default: + throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X16} at 0x{op.Address:X16}."); + } + break; + + case 7: + switch (op.CRm) // Cache and Memory barrier. + { + case 10: + switch (op.Opc2) + { + case 5: // Data Memory Barrier Register. + return; // No-op. + default: + throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X16} at 0x{op.Address:X16}."); + } + default: + throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X16} at 0x{op.Address:X16}."); + } + + default: + throw new NotImplementedException($"Unknown MRC 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + + context.Call(dlg, GetIntA32(context, op.Rt)); + } + + public static void Mrc(ArmEmitterContext context) + { + OpCode32System op = (OpCode32System)context.CurrOp; + + if (op.Coproc != 15) + { + throw new NotImplementedException($"Unknown MRC Coprocessor ID 0x{op.Coproc:X16} at 0x{op.Address:X16}."); + } + + if (op.Opc1 != 0) + { + throw new NotImplementedException($"Unknown MRC Opc1 0x{op.Opc1:X16} at 0x{op.Address:X16}."); + } + + Delegate dlg; + switch (op.CRn) + { + case 13: // Process and Thread Info. + if (op.CRm != 0) + { + throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X16} at 0x{op.Address:X16}."); + } + switch (op.Opc2) + { + case 2: + dlg = new _U32(NativeInterface.GetTpidrEl032); break; + case 3: + dlg = new _U32(NativeInterface.GetTpidr32); break; + default: + throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X16} at 0x{op.Address:X16}."); + } + break; + default: + throw new NotImplementedException($"Unknown MRC 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + + if (op.Rt == RegisterAlias.Aarch32Pc) + { + // Special behavior: copy NZCV flags into APSR. + EmitSetNzcv(context, context.Call(dlg)); + + return; + } + else + { + SetIntA32(context, op.Rt, context.Call(dlg)); + } + } + + public static void Mrrc(ArmEmitterContext context) + { + OpCode32System op = (OpCode32System)context.CurrOp; + + if (op.Coproc != 15) + { + throw new NotImplementedException($"Unknown MRC Coprocessor ID 0x{op.Coproc:X16} at 0x{op.Address:X16}."); + } + + var opc = op.MrrcOp; + + Delegate dlg; + switch (op.CRm) + { + case 14: // Timer. + switch (opc) + { + case 0: + dlg = new _U64(NativeInterface.GetCntpctEl0); break; + default: + throw new NotImplementedException($"Unknown MRRC Opc1 0x{opc:X16} at 0x{op.Address:X16}."); + } + break; + default: + throw new NotImplementedException($"Unknown MRRC 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + + Operand result = context.Call(dlg); + + SetIntA32(context, op.Rt, context.ConvertI64ToI32(result)); + SetIntA32(context, op.CRn, context.ConvertI64ToI32(context.ShiftRightUI(result, Const(32)))); + } + + public static void Nop(ArmEmitterContext context) { } + + public static void Vmrs(ArmEmitterContext context) + { + OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp; + + if (op.Rt == RegisterAlias.Aarch32Pc && op.Sreg == 0b0001) + { + // Special behavior: copy NZCV flags into APSR. + SetFlag(context, PState.VFlag, GetFpFlag(FPState.VFlag)); + SetFlag(context, PState.CFlag, GetFpFlag(FPState.CFlag)); + SetFlag(context, PState.ZFlag, GetFpFlag(FPState.ZFlag)); + SetFlag(context, PState.NFlag, GetFpFlag(FPState.NFlag)); + return; + } + + Delegate dlg; + switch (op.Sreg) + { + case 0b0000: // FPSID + throw new NotImplementedException("Supervisor Only"); + case 0b0001: // FPSCR + dlg = new _U32(NativeInterface.GetFpscr); break; + case 0b0101: // MVFR2 + throw new NotImplementedException("MVFR2"); + case 0b0110: // MVFR1 + throw new NotImplementedException("MVFR1"); + case 0b0111: // MVFR0 + throw new NotImplementedException("MVFR0"); + case 0b1000: // FPEXC + throw new NotImplementedException("Supervisor Only"); + default: + throw new NotImplementedException($"Unknown VMRS 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + + SetIntA32(context, op.Rt, context.Call(dlg)); + } + + public static void Vmsr(ArmEmitterContext context) + { + OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp; + + Delegate dlg; + switch (op.Sreg) + { + case 0b0000: // FPSID + throw new NotImplementedException("Supervisor Only"); + case 0b0001: // FPSCR + dlg = new _Void_U32(NativeInterface.SetFpscr); break; + case 0b0101: // MVFR2 + throw new NotImplementedException("MVFR2"); + case 0b0110: // MVFR1 + throw new NotImplementedException("MVFR1"); + case 0b0111: // MVFR0 + throw new NotImplementedException("MVFR0"); + case 0b1000: // FPEXC + throw new NotImplementedException("Supervisor Only"); + default: + throw new NotImplementedException($"Unknown VMSR 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + + context.Call(dlg, GetIntA32(context, op.Rt)); + } + + private static void EmitSetNzcv(ArmEmitterContext context, Operand t) + { + Operand v = context.ShiftRightUI(t, Const((int)PState.VFlag)); + v = context.BitwiseAnd(v, Const(1)); + + Operand c = context.ShiftRightUI(t, Const((int)PState.CFlag)); + c = context.BitwiseAnd(c, Const(1)); + + Operand z = context.ShiftRightUI(t, Const((int)PState.ZFlag)); + z = context.BitwiseAnd(z, Const(1)); + + Operand n = context.ShiftRightUI(t, Const((int)PState.NFlag)); + n = context.BitwiseAnd(n, Const(1)); + + SetFlag(context, PState.VFlag, v); + SetFlag(context, PState.CFlag, c); + SetFlag(context, PState.ZFlag, z); + SetFlag(context, PState.NFlag, n); + } + } +} diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index c81484a6..0c2dd18d 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -82,6 +82,7 @@ namespace ARMeilleure.Instructions Smaddl, Smsubl, Smulh, + Smull, Stlr, Stlxp, Stlxr, @@ -92,6 +93,8 @@ namespace ARMeilleure.Instructions Sub, Subs, Svc, + Sxtb, + Sxth, Sys, Tbnz, Tbz, @@ -445,19 +448,140 @@ namespace ARMeilleure.Instructions Zip2_V, // Base (AArch32) + Bfc, + Bfi, Blx, Bx, Cmp, + Cmn, + Movt, + Mul, + Lda, + Ldab, + Ldaex, + Ldaexb, + Ldaexd, + Ldaexh, + Ldah, Ldm, Ldrb, Ldrd, + Ldrex, + Ldrexb, + Ldrexd, + Ldrexh, Ldrh, Ldrsb, Ldrsh, + Mcr, + Mla, + Mls, Mov, + Mrc, + Mrrc, + Mvn, + Pkh, + Pld, + Rev, + Revsh, + Rsb, + Rsc, + Sbfx, + Smlab, + Smlal, + Smlalh, + Smmla, + Smmls, + Smmul, + Stl, + Stlb, + Stlex, + Stlexb, + Stlexd, + Stlexh, + Stlh, Stm, Strb, Strd, - Strh + Strex, + Strexb, + Strexd, + Strexh, + Strh, + Sxtb16, + Teq, + Trap, + Tst, + Ubfx, + Umlal, + Umull, + Uxtb, + Uxtb16, + Uxth, + + // FP & SIMD (AArch32) + Vabs, + Vadd, + Vand, + Vbif, + Vbit, + Vbsl, + Vceq, + Vcge, + Vcgt, + Vcle, + Vclt, + Vcmp, + Vcmpe, + Vcvt, + Vdiv, + Vdup, + Vext, + Vld1, + Vld2, + Vld3, + Vld4, + Vldm, + Vldr, + Vmax, + Vmaxnm, + Vmin, + Vminnm, + Vmla, + Vmls, + Vmov, + Vmovn, + Vmrs, + Vmsr, + Vmul, + Vmvn, + Vneg, + Vnmul, + Vnmla, + Vnmls, + Vorr, + Vpadd, + Vrev, + Vrint, + Vsel, + Vshl, + Vshr, + Vshrn, + Vst1, + Vst2, + Vst3, + Vst4, + Vstm, + Vstr, + Vsqrt, + Vrecpe, + Vrecps, + Vrsqrte, + Vrsqrts, + Vsub, + Vtbl, + Vtrn, + Vuzp, + Vzip, } } diff --git a/ARMeilleure/Instructions/NativeInterface.cs b/ARMeilleure/Instructions/NativeInterface.cs index 3a1e91c8..988e86bd 100644 --- a/ARMeilleure/Instructions/NativeInterface.cs +++ b/ARMeilleure/Instructions/NativeInterface.cs @@ -87,16 +87,39 @@ namespace ARMeilleure.Instructions return (ulong)GetContext().Fpsr; } + public static uint GetFpscr() + { + ExecutionContext context = GetContext(); + uint result = (uint)(context.Fpsr & FPSR.A32Mask) | (uint)(context.Fpcr & FPCR.A32Mask); + + result |= context.GetFPstateFlag(FPState.NFlag) ? (1u << 31) : 0; + result |= context.GetFPstateFlag(FPState.ZFlag) ? (1u << 30) : 0; + result |= context.GetFPstateFlag(FPState.CFlag) ? (1u << 29) : 0; + result |= context.GetFPstateFlag(FPState.VFlag) ? (1u << 28) : 0; + + return result; + } + public static ulong GetTpidrEl0() { return (ulong)GetContext().TpidrEl0; } + public static uint GetTpidrEl032() + { + return (uint)GetContext().TpidrEl0; + } + public static ulong GetTpidr() { return (ulong)GetContext().Tpidr; } + public static uint GetTpidr32() + { + return (uint)GetContext().Tpidr; + } + public static ulong GetCntfrqEl0() { return GetContext().CntfrqEl0; @@ -117,13 +140,31 @@ namespace ARMeilleure.Instructions GetContext().Fpsr = (FPSR)value; } + public static void SetFpscr(uint value) + { + ExecutionContext context = GetContext(); + + context.SetFPstateFlag(FPState.NFlag, (value & (1u << 31)) != 0); + context.SetFPstateFlag(FPState.ZFlag, (value & (1u << 30)) != 0); + context.SetFPstateFlag(FPState.CFlag, (value & (1u << 29)) != 0); + context.SetFPstateFlag(FPState.VFlag, (value & (1u << 28)) != 0); + + context.Fpsr = FPSR.A32Mask & (FPSR)value; + context.Fpcr = FPCR.A32Mask & (FPCR)value; + } + public static void SetTpidrEl0(ulong value) { GetContext().TpidrEl0 = (long)value; } -#endregion -#region "Read" + public static void SetTpidrEl032(uint value) + { + GetContext().TpidrEl0 = (long)value; + } + #endregion + + #region "Read" public static byte ReadByte(ulong address) { return GetMemoryManager().ReadByte((long)address); diff --git a/ARMeilleure/Instructions/SoftFallback.cs b/ARMeilleure/Instructions/SoftFallback.cs index 10bb47df..611e8d6a 100644 --- a/ARMeilleure/Instructions/SoftFallback.cs +++ b/ARMeilleure/Instructions/SoftFallback.cs @@ -420,6 +420,26 @@ namespace ARMeilleure.Instructions return MathF.Truncate(value); } } + + public static int FloatToInt32(float value) + { + return SatF32ToS32(RoundF(value)); + } + + public static int DoubleToInt32(double value) + { + return SatF64ToS32(Round(value)); + } + + public static uint FloatToUInt32(float value) + { + return SatF32ToU32(RoundF(value)); + } + + public static uint DoubleToUInt32(double value) + { + return SatF64ToU32(Round(value)); + } #endregion #region "Saturation" diff --git a/ARMeilleure/Instructions/SoftFloat.cs b/ARMeilleure/Instructions/SoftFloat.cs index 256bc5b9..d3e15a2c 100644 --- a/ARMeilleure/Instructions/SoftFloat.cs +++ b/ARMeilleure/Instructions/SoftFloat.cs @@ -121,7 +121,7 @@ namespace ARMeilleure.Instructions private static float FPDefaultNaN() { - return -float.NaN; + return BitConverter.Int32BitsToSingle(0x7fc00000); } private static float FPInfinity(bool sign) @@ -623,12 +623,18 @@ namespace ARMeilleure.Instructions { public static float FPAdd(float value1, float value2) { + return FPAddFpscr(value1, value2, false); + } + + public static float FPAddFpscr(float value1, float value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -639,7 +645,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((inf1 && !sign1) || (inf2 && !sign2)) { @@ -657,7 +663,7 @@ namespace ARMeilleure.Instructions { result = value1 + value2; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -672,9 +678,10 @@ namespace ARMeilleure.Instructions public static int FPCompare(float value1, float value2, bool signalNaNs) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context, fpcr); int result; @@ -684,7 +691,7 @@ namespace ARMeilleure.Instructions if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs) { - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } } else @@ -708,10 +715,16 @@ namespace ARMeilleure.Instructions public static float FPCompareEQ(float value1, float value2) { + return FPCompareEQFpscr(value1, value2, false); + } + + public static float FPCompareEQFpscr(float value1, float value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out _, out _, context); - value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); float result; @@ -721,7 +734,7 @@ namespace ARMeilleure.Instructions if (type1 == FPType.SNaN || type2 == FPType.SNaN) { - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } } else @@ -734,10 +747,16 @@ namespace ARMeilleure.Instructions public static float FPCompareGE(float value1, float value2) { + return FPCompareGEFpscr(value1, value2, false); + } + + public static float FPCompareGEFpscr(float value1, float value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out _, out _, context); - value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); float result; @@ -745,7 +764,7 @@ namespace ARMeilleure.Instructions { result = ZerosOrOnes(false); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else { @@ -757,10 +776,16 @@ namespace ARMeilleure.Instructions public static float FPCompareGT(float value1, float value2) { + return FPCompareGTFpscr(value1, value2, false); + } + + public static float FPCompareGTFpscr(float value1, float value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out _, out _, context); - value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); float result; @@ -768,7 +793,7 @@ namespace ARMeilleure.Instructions { result = ZerosOrOnes(false); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else { @@ -788,14 +813,25 @@ namespace ARMeilleure.Instructions return FPCompareGT(value2, value1); } + public static float FPCompareLEFpscr(float value1, float value2, bool standardFpscr) + { + return FPCompareGEFpscr(value2, value1, standardFpscr); + } + + public static float FPCompareLTFpscr(float value1, float value2, bool standardFpscr) + { + return FPCompareGTFpscr(value2, value1, standardFpscr); + } + public static float FPDiv(float value1, float value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -806,7 +842,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if (inf1 || zero2) { @@ -814,7 +850,7 @@ namespace ARMeilleure.Instructions if (!inf1) { - FPProcessException(FPException.DivideByZero, context); + FPProcessException(FPException.DivideByZero, context, fpcr); } } else if (zero1 || inf2) @@ -825,7 +861,7 @@ namespace ARMeilleure.Instructions { result = value1 / value2; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -839,12 +875,18 @@ namespace ARMeilleure.Instructions public static float FPMax(float value1, float value2) { + return FPMaxFpscr(value1, value2, false); + } + + public static float FPMaxFpscr(float value1, float value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -877,7 +919,7 @@ namespace ARMeilleure.Instructions { result = value2; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -892,10 +934,16 @@ namespace ARMeilleure.Instructions public static float FPMaxNum(float value1, float value2) { + return FPMaxNumFpscr(value1, value2, false); + } + + public static float FPMaxNumFpscr(float value1, float value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1.FPUnpack(out FPType type1, out _, out _, context); - value2.FPUnpack(out FPType type2, out _, out _, context); + value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); if (type1 == FPType.QNaN && type2 != FPType.QNaN) { @@ -906,17 +954,23 @@ namespace ARMeilleure.Instructions value2 = FPInfinity(true); } - return FPMax(value1, value2); + return FPMaxFpscr(value1, value2, standardFpscr); } public static float FPMin(float value1, float value2) { + return FPMinFpscr(value1, value2, false); + } + + public static float FPMinFpscr(float value1, float value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -949,7 +1003,7 @@ namespace ARMeilleure.Instructions { result = value2; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -964,10 +1018,16 @@ namespace ARMeilleure.Instructions public static float FPMinNum(float value1, float value2) { + return FPMinNumFpscr(value1, value2, false); + } + + public static float FPMinNumFpscr(float value1, float value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1.FPUnpack(out FPType type1, out _, out _, context); - value2.FPUnpack(out FPType type2, out _, out _, context); + value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); if (type1 == FPType.QNaN && type2 != FPType.QNaN) { @@ -978,17 +1038,23 @@ namespace ARMeilleure.Instructions value2 = FPInfinity(false); } - return FPMin(value1, value2); + return FPMinFpscr(value1, value2, standardFpscr); } public static float FPMul(float value1, float value2) { + return FPMulFpscr(value1, value2, false); + } + + public static float FPMulFpscr(float value1, float value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -999,7 +1065,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if (inf1 || inf2) { @@ -1013,7 +1079,7 @@ namespace ARMeilleure.Instructions { result = value1 * value2; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1027,22 +1093,28 @@ namespace ARMeilleure.Instructions public static float FPMulAdd(float valueA, float value1, float value2) { + return FPMulAddFpscr(valueA, value1, value2, false); + } + + public static float FPMulAddFpscr(float valueA, float value1, float value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out uint addend, context); - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out uint addend, context, fpcr); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; - float result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context); + float result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context, fpcr); if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2))) { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } if (!done) @@ -1057,7 +1129,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((infA && !signA) || (infP && !signP)) { @@ -1075,7 +1147,7 @@ namespace ARMeilleure.Instructions { result = MathF.FusedMultiplyAdd(value1, value2, valueA); - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1094,14 +1166,22 @@ namespace ARMeilleure.Instructions return FPMulAdd(valueA, value1, value2); } + public static float FPMulSubFpscr(float valueA, float value1, float value2, bool standardFpscr) + { + value1 = value1.FPNeg(); + + return FPMulAddFpscr(valueA, value1, value2, standardFpscr); + } + public static float FPMulX(float value1, float value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -1124,7 +1204,7 @@ namespace ARMeilleure.Instructions { result = value1 * value2; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1153,15 +1233,21 @@ namespace ARMeilleure.Instructions public static float FPRecipEstimate(float value) { + return FPRecipEstimateFpscr(value, false); + } + + public static float FPRecipEstimateFpscr(float value, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value.FPUnpack(out FPType type, out bool sign, out uint op, context); + value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr); float result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else if (type == FPType.Infinity) { @@ -1171,13 +1257,13 @@ namespace ARMeilleure.Instructions { result = FPInfinity(sign); - FPProcessException(FPException.DivideByZero, context); + FPProcessException(FPException.DivideByZero, context, fpcr); } else if (MathF.Abs(value) < MathF.Pow(2f, -128)) { bool overflowToInf; - switch (context.Fpcr.GetRoundingMode()) + switch (fpcr.GetRoundingMode()) { default: case FPRoundingMode.ToNearest: overflowToInf = true; break; @@ -1188,10 +1274,10 @@ namespace ARMeilleure.Instructions result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); - FPProcessException(FPException.Overflow, context); - FPProcessException(FPException.Inexact, context); + FPProcessException(FPException.Overflow, context, fpcr); + FPProcessException(FPException.Inexact, context, fpcr); } - else if ((context.Fpcr & FPCR.Fz) != 0 && (MathF.Abs(value) >= MathF.Pow(2f, 126))) + else if ((fpcr & FPCR.Fz) != 0 && (MathF.Abs(value) >= MathF.Pow(2f, 126))) { result = FPZero(sign); @@ -1240,16 +1326,49 @@ namespace ARMeilleure.Instructions return result; } + public static float FPRecipStep(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.StandardFpcrValue; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + float product; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + product = FPZero(false); + } + else + { + product = FPMulFpscr(value1, value2, true); + } + + result = FPSubFpscr(FPTwo(false), product, true); + } + + return result; + } + public static float FPRecipStepFused(float value1, float value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; value1 = value1.FPNeg(); - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -1268,7 +1387,7 @@ namespace ARMeilleure.Instructions { result = MathF.FusedMultiplyAdd(value1, value2, 2f); - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1283,14 +1402,15 @@ namespace ARMeilleure.Instructions public static float FPRecpX(float value) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value.FPUnpack(out FPType type, out bool sign, out uint op, context); + value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr); float result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else { @@ -1306,27 +1426,33 @@ namespace ARMeilleure.Instructions public static float FPRSqrtEstimate(float value) { + return FPRSqrtEstimateFpscr(value, false); + } + + public static float FPRSqrtEstimateFpscr(float value, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value.FPUnpack(out FPType type, out bool sign, out uint op, context); + value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr); float result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else if (type == FPType.Zero) { result = FPInfinity(sign); - FPProcessException(FPException.DivideByZero, context); + FPProcessException(FPException.DivideByZero, context, fpcr); } else if (sign) { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if (type == FPType.Infinity) { @@ -1369,16 +1495,95 @@ namespace ARMeilleure.Instructions return result; } + public static float FPHalvedSub(float value1, float value2, ExecutionContext context, FPCR fpcr) + { + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context, fpcr); + } + else if ((inf1 && !sign1) || (inf2 && sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && !sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == !sign2) + { + result = FPZero(sign1); + } + else + { + result = (value1 - value2) / 2.0f; + + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPRSqrtStep(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.StandardFpcrValue; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + float product; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + product = FPZero(false); + } + else + { + product = FPMulFpscr(value1, value2, true); + } + + result = FPHalvedSub(FPThree(false), product, context, fpcr); + } + + return result; + } + public static float FPRSqrtStepFused(float value1, float value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; value1 = value1.FPNeg(); - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -1397,7 +1602,7 @@ namespace ARMeilleure.Instructions { result = MathF.FusedMultiplyAdd(value1, value2, 3f) / 2f; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1412,14 +1617,15 @@ namespace ARMeilleure.Instructions public static float FPSqrt(float value) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value = value.FPUnpack(out FPType type, out bool sign, out uint op, context); + value = value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr); float result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else if (type == FPType.Zero) { @@ -1433,13 +1639,13 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else { result = MathF.Sqrt(value); - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1452,12 +1658,18 @@ namespace ARMeilleure.Instructions public static float FPSub(float value1, float value2) { + return FPSubFpscr(value1, value2, false); + } + + public static float FPSubFpscr(float value1, float value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -1468,7 +1680,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((inf1 && !sign1) || (inf2 && sign2)) { @@ -1486,7 +1698,7 @@ namespace ARMeilleure.Instructions { result = value1 - value2; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1500,7 +1712,7 @@ namespace ARMeilleure.Instructions private static float FPDefaultNaN() { - return -float.NaN; + return BitConverter.Int32BitsToSingle(0x7fc00000); } private static float FPInfinity(bool sign) @@ -1523,6 +1735,11 @@ namespace ARMeilleure.Instructions return sign ? -2f : +2f; } + private static float FPThree(bool sign) + { + return sign ? -3f : +3f; + } + private static float FPOnePointFive(bool sign) { return sign ? -1.5f : +1.5f; @@ -1543,7 +1760,8 @@ namespace ARMeilleure.Instructions out FPType type, out bool sign, out uint valueBits, - ExecutionContext context) + ExecutionContext context, + FPCR fpcr) { valueBits = (uint)BitConverter.SingleToInt32Bits(value); @@ -1551,14 +1769,14 @@ namespace ARMeilleure.Instructions if ((valueBits & 0x7F800000u) == 0u) { - if ((valueBits & 0x007FFFFFu) == 0u || (context.Fpcr & FPCR.Fz) != 0) + if ((valueBits & 0x007FFFFFu) == 0u || (fpcr & FPCR.Fz) != 0) { type = FPType.Zero; value = FPZero(sign); if ((valueBits & 0x007FFFFFu) != 0u) { - FPProcessException(FPException.InputDenorm, context); + FPProcessException(FPException.InputDenorm, context, fpcr); } } else @@ -1592,25 +1810,26 @@ namespace ARMeilleure.Instructions uint op1, uint op2, out bool done, - ExecutionContext context) + ExecutionContext context, + FPCR fpcr) { done = true; if (type1 == FPType.SNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.SNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } else if (type1 == FPType.QNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.QNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } done = false; @@ -1626,33 +1845,34 @@ namespace ARMeilleure.Instructions uint op2, uint op3, out bool done, - ExecutionContext context) + ExecutionContext context, + FPCR fpcr) { done = true; if (type1 == FPType.SNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.SNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } else if (type3 == FPType.SNaN) { - return FPProcessNaN(type3, op3, context); + return FPProcessNaN(type3, op3, context, fpcr); } else if (type1 == FPType.QNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.QNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } else if (type3 == FPType.QNaN) { - return FPProcessNaN(type3, op3, context); + return FPProcessNaN(type3, op3, context, fpcr); } done = false; @@ -1660,16 +1880,16 @@ namespace ARMeilleure.Instructions return FPZero(false); } - private static float FPProcessNaN(FPType type, uint op, ExecutionContext context) + private static float FPProcessNaN(FPType type, uint op, ExecutionContext context, FPCR fpcr) { if (type == FPType.SNaN) { op |= 1u << 22; - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } - if ((context.Fpcr & FPCR.Dn) != 0) + if ((fpcr & FPCR.Dn) != 0) { return FPDefaultNaN(); } @@ -1677,11 +1897,11 @@ namespace ARMeilleure.Instructions return BitConverter.Int32BitsToSingle((int)op); } - private static void FPProcessException(FPException exc, ExecutionContext context) + private static void FPProcessException(FPException exc, ExecutionContext context, FPCR fpcr) { int enable = (int)exc + 8; - if ((context.Fpcr & (FPCR)(1 << enable)) != 0) + if ((fpcr & (FPCR)(1 << enable)) != 0) { throw new NotImplementedException("Floating-point trap handling."); } @@ -1696,12 +1916,18 @@ namespace ARMeilleure.Instructions { public static double FPAdd(double value1, double value2) { + return FPAddFpscr(value1, value2, false); + } + + public static double FPAddFpscr(double value1, double value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -1712,7 +1938,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((inf1 && !sign1) || (inf2 && !sign2)) { @@ -1730,7 +1956,7 @@ namespace ARMeilleure.Instructions { result = value1 + value2; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1745,9 +1971,10 @@ namespace ARMeilleure.Instructions public static int FPCompare(double value1, double value2, bool signalNaNs) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context, fpcr); int result; @@ -1757,7 +1984,7 @@ namespace ARMeilleure.Instructions if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs) { - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } } else @@ -1781,10 +2008,16 @@ namespace ARMeilleure.Instructions public static double FPCompareEQ(double value1, double value2) { + return FPCompareEQFpscr(value1, value2, false); + } + + public static double FPCompareEQFpscr(double value1, double value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out _, out _, context); - value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); double result; @@ -1794,7 +2027,7 @@ namespace ARMeilleure.Instructions if (type1 == FPType.SNaN || type2 == FPType.SNaN) { - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } } else @@ -1807,10 +2040,16 @@ namespace ARMeilleure.Instructions public static double FPCompareGE(double value1, double value2) { + return FPCompareGEFpscr(value1, value2, false); + } + + public static double FPCompareGEFpscr(double value1, double value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out _, out _, context); - value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); double result; @@ -1818,7 +2057,7 @@ namespace ARMeilleure.Instructions { result = ZerosOrOnes(false); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else { @@ -1830,10 +2069,16 @@ namespace ARMeilleure.Instructions public static double FPCompareGT(double value1, double value2) { + return FPCompareGTFpscr(value1, value2, false); + } + + public static double FPCompareGTFpscr(double value1, double value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out _, out _, context); - value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); double result; @@ -1841,7 +2086,7 @@ namespace ARMeilleure.Instructions { result = ZerosOrOnes(false); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else { @@ -1861,14 +2106,25 @@ namespace ARMeilleure.Instructions return FPCompareGT(value2, value1); } + public static double FPCompareLEFpscr(double value1, double value2, bool standardFpscr) + { + return FPCompareGEFpscr(value2, value1, standardFpscr); + } + + public static double FPCompareLTFpscr(double value1, double value2, bool standardFpscr) + { + return FPCompareGTFpscr(value2, value1, standardFpscr); + } + public static double FPDiv(double value1, double value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -1879,7 +2135,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if (inf1 || zero2) { @@ -1887,7 +2143,7 @@ namespace ARMeilleure.Instructions if (!inf1) { - FPProcessException(FPException.DivideByZero, context); + FPProcessException(FPException.DivideByZero, context, fpcr); } } else if (zero1 || inf2) @@ -1898,7 +2154,7 @@ namespace ARMeilleure.Instructions { result = value1 / value2; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1912,12 +2168,18 @@ namespace ARMeilleure.Instructions public static double FPMax(double value1, double value2) { + return FPMaxFpscr(value1, value2, false); + } + + public static double FPMaxFpscr(double value1, double value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -1950,7 +2212,7 @@ namespace ARMeilleure.Instructions { result = value2; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1965,10 +2227,16 @@ namespace ARMeilleure.Instructions public static double FPMaxNum(double value1, double value2) { + return FPMaxNumFpscr(value1, value2, false); + } + + public static double FPMaxNumFpscr(double value1, double value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1.FPUnpack(out FPType type1, out _, out _, context); - value2.FPUnpack(out FPType type2, out _, out _, context); + value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); if (type1 == FPType.QNaN && type2 != FPType.QNaN) { @@ -1979,17 +2247,23 @@ namespace ARMeilleure.Instructions value2 = FPInfinity(true); } - return FPMax(value1, value2); + return FPMaxFpscr(value1, value2, standardFpscr); } public static double FPMin(double value1, double value2) { + return FPMinFpscr(value1, value2, false); + } + + public static double FPMinFpscr(double value1, double value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -2022,7 +2296,7 @@ namespace ARMeilleure.Instructions { result = value2; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2037,10 +2311,16 @@ namespace ARMeilleure.Instructions public static double FPMinNum(double value1, double value2) { + return FPMinNumFpscr(value1, value2, false); + } + + public static double FPMinNumFpscr(double value1, double value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1.FPUnpack(out FPType type1, out _, out _, context); - value2.FPUnpack(out FPType type2, out _, out _, context); + value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); if (type1 == FPType.QNaN && type2 != FPType.QNaN) { @@ -2051,17 +2331,23 @@ namespace ARMeilleure.Instructions value2 = FPInfinity(false); } - return FPMin(value1, value2); + return FPMinFpscr(value1, value2, standardFpscr); } public static double FPMul(double value1, double value2) { + return FPMulFpscr(value1, value2, false); + } + + public static double FPMulFpscr(double value1, double value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -2072,7 +2358,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if (inf1 || inf2) { @@ -2086,7 +2372,7 @@ namespace ARMeilleure.Instructions { result = value1 * value2; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2100,22 +2386,28 @@ namespace ARMeilleure.Instructions public static double FPMulAdd(double valueA, double value1, double value2) { + return FPMulAddFpscr(valueA, value1, value2, false); + } + + public static double FPMulAddFpscr(double valueA, double value1, double value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out ulong addend, context); - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out ulong addend, context, fpcr); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; - double result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context); + double result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context, fpcr); if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2))) { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } if (!done) @@ -2130,7 +2422,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((infA && !signA) || (infP && !signP)) { @@ -2148,7 +2440,7 @@ namespace ARMeilleure.Instructions { result = Math.FusedMultiplyAdd(value1, value2, valueA); - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2167,14 +2459,22 @@ namespace ARMeilleure.Instructions return FPMulAdd(valueA, value1, value2); } + public static double FPMulSubFpscr(double valueA, double value1, double value2, bool standardFpscr) + { + value1 = value1.FPNeg(); + + return FPMulAddFpscr(valueA, value1, value2, standardFpscr); + } + public static double FPMulX(double value1, double value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -2197,7 +2497,7 @@ namespace ARMeilleure.Instructions { result = value1 * value2; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2226,15 +2526,21 @@ namespace ARMeilleure.Instructions public static double FPRecipEstimate(double value) { + return FPRecipEstimateFpscr(value, false); + } + + public static double FPRecipEstimateFpscr(double value, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr); double result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else if (type == FPType.Infinity) { @@ -2244,13 +2550,13 @@ namespace ARMeilleure.Instructions { result = FPInfinity(sign); - FPProcessException(FPException.DivideByZero, context); + FPProcessException(FPException.DivideByZero, context, fpcr); } else if (Math.Abs(value) < Math.Pow(2d, -1024)) { bool overflowToInf; - switch (context.Fpcr.GetRoundingMode()) + switch (fpcr.GetRoundingMode()) { default: case FPRoundingMode.ToNearest: overflowToInf = true; break; @@ -2261,10 +2567,10 @@ namespace ARMeilleure.Instructions result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); - FPProcessException(FPException.Overflow, context); - FPProcessException(FPException.Inexact, context); + FPProcessException(FPException.Overflow, context, fpcr); + FPProcessException(FPException.Inexact, context, fpcr); } - else if ((context.Fpcr & FPCR.Fz) != 0 && (Math.Abs(value) >= Math.Pow(2d, 1022))) + else if ((fpcr & FPCR.Fz) != 0 && (Math.Abs(value) >= Math.Pow(2d, 1022))) { result = FPZero(sign); @@ -2313,16 +2619,49 @@ namespace ARMeilleure.Instructions return result; } + public static double FPRecipStep(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.StandardFpcrValue; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + double product; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + product = FPZero(false); + } + else + { + product = FPMulFpscr(value1, value2, true); + } + + result = FPSubFpscr(FPTwo(false), product, true); + } + + return result; + } + public static double FPRecipStepFused(double value1, double value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; value1 = value1.FPNeg(); - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -2341,7 +2680,7 @@ namespace ARMeilleure.Instructions { result = Math.FusedMultiplyAdd(value1, value2, 2d); - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2356,14 +2695,15 @@ namespace ARMeilleure.Instructions public static double FPRecpX(double value) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr); double result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else { @@ -2379,27 +2719,33 @@ namespace ARMeilleure.Instructions public static double FPRSqrtEstimate(double value) { + return FPRSqrtEstimateFpscr(value, false); + } + + public static double FPRSqrtEstimateFpscr(double value, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr); double result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else if (type == FPType.Zero) { result = FPInfinity(sign); - FPProcessException(FPException.DivideByZero, context); + FPProcessException(FPException.DivideByZero, context, fpcr); } else if (sign) { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if (type == FPType.Infinity) { @@ -2442,16 +2788,95 @@ namespace ARMeilleure.Instructions return result; } + public static double FPHalvedSub(double value1, double value2, ExecutionContext context, FPCR fpcr) + { + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context, fpcr); + } + else if ((inf1 && !sign1) || (inf2 && sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && !sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == !sign2) + { + result = FPZero(sign1); + } + else + { + result = (value1 - value2) / 2.0; + + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPRSqrtStep(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.StandardFpcrValue; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + double product; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + product = FPZero(false); + } + else + { + product = FPMulFpscr(value1, value2, true); + } + + result = FPHalvedSub(FPThree(false), product, context, fpcr); + } + + return result; + } + public static double FPRSqrtStepFused(double value1, double value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; value1 = value1.FPNeg(); - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -2470,7 +2895,7 @@ namespace ARMeilleure.Instructions { result = Math.FusedMultiplyAdd(value1, value2, 3d) / 2d; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2485,14 +2910,15 @@ namespace ARMeilleure.Instructions public static double FPSqrt(double value) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value = value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + value = value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr); double result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else if (type == FPType.Zero) { @@ -2506,13 +2932,13 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else { result = Math.Sqrt(value); - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2525,12 +2951,18 @@ namespace ARMeilleure.Instructions public static double FPSub(double value1, double value2) { + return FPSubFpscr(value1, value2, false); + } + + public static double FPSubFpscr(double value1, double value2, bool standardFpscr) + { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -2541,7 +2973,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((inf1 && !sign1) || (inf2 && sign2)) { @@ -2559,7 +2991,7 @@ namespace ARMeilleure.Instructions { result = value1 - value2; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2573,7 +3005,7 @@ namespace ARMeilleure.Instructions private static double FPDefaultNaN() { - return -double.NaN; + return BitConverter.Int64BitsToDouble(0x7ff8000000000000); } private static double FPInfinity(bool sign) @@ -2596,6 +3028,11 @@ namespace ARMeilleure.Instructions return sign ? -2d : +2d; } + private static double FPThree(bool sign) + { + return sign ? -3d : +3d; + } + private static double FPOnePointFive(bool sign) { return sign ? -1.5d : +1.5d; @@ -2616,7 +3053,8 @@ namespace ARMeilleure.Instructions out FPType type, out bool sign, out ulong valueBits, - ExecutionContext context) + ExecutionContext context, + FPCR fpcr) { valueBits = (ulong)BitConverter.DoubleToInt64Bits(value); @@ -2624,14 +3062,14 @@ namespace ARMeilleure.Instructions if ((valueBits & 0x7FF0000000000000ul) == 0ul) { - if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul || (context.Fpcr & FPCR.Fz) != 0) + if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul || (fpcr & FPCR.Fz) != 0) { type = FPType.Zero; value = FPZero(sign); if ((valueBits & 0x000FFFFFFFFFFFFFul) != 0ul) { - FPProcessException(FPException.InputDenorm, context); + FPProcessException(FPException.InputDenorm, context, fpcr); } } else @@ -2665,25 +3103,26 @@ namespace ARMeilleure.Instructions ulong op1, ulong op2, out bool done, - ExecutionContext context) + ExecutionContext context, + FPCR fpcr) { done = true; if (type1 == FPType.SNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.SNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } else if (type1 == FPType.QNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.QNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } done = false; @@ -2699,33 +3138,34 @@ namespace ARMeilleure.Instructions ulong op2, ulong op3, out bool done, - ExecutionContext context) + ExecutionContext context, + FPCR fpcr) { done = true; if (type1 == FPType.SNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.SNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } else if (type3 == FPType.SNaN) { - return FPProcessNaN(type3, op3, context); + return FPProcessNaN(type3, op3, context, fpcr); } else if (type1 == FPType.QNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.QNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } else if (type3 == FPType.QNaN) { - return FPProcessNaN(type3, op3, context); + return FPProcessNaN(type3, op3, context, fpcr); } done = false; @@ -2733,16 +3173,16 @@ namespace ARMeilleure.Instructions return FPZero(false); } - private static double FPProcessNaN(FPType type, ulong op, ExecutionContext context) + private static double FPProcessNaN(FPType type, ulong op, ExecutionContext context, FPCR fpcr) { if (type == FPType.SNaN) { op |= 1ul << 51; - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } - if ((context.Fpcr & FPCR.Dn) != 0) + if ((fpcr & FPCR.Dn) != 0) { return FPDefaultNaN(); } @@ -2750,11 +3190,11 @@ namespace ARMeilleure.Instructions return BitConverter.Int64BitsToDouble((long)op); } - private static void FPProcessException(FPException exc, ExecutionContext context) + private static void FPProcessException(FPException exc, ExecutionContext context, FPCR fpcr) { int enable = (int)exc + 8; - if ((context.Fpcr & (FPCR)(1 << enable)) != 0) + if ((fpcr & (FPCR)(1 << enable)) != 0) { throw new NotImplementedException("Floating-point trap handling."); } |
