Fix Vcvt_FI & Vcvt_RM; Add Vfma_S & Vfms_S. Add Tests. (#1471)

* Fix Vcvt_FI & Vcvt_RM; Add Vfma_S & Vfms_S. Add Tests. * Address PR feedback & Nit.
author: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> 2020-08-13 07:34:02 +0200
committer: GitHub <noreply@github.com> 2020-08-13 02:34:02 -0300
commit: 6938988427e7f96adcd8fe76fe5d0a19b014b2b2 (patch)
tree: 39b049344e5d00b5f152b5354e7c8090ebf46c41 /ARMeilleure/Instructions
parent: 1ad9045c6b00a5c729c8c7d697f3da54ed177883 (diff)
5 files changed, 50 insertions, 10 deletions
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
index f7f3d47e..57176794 100644
--- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
@@ -231,6 +231,38 @@ namespace ARMeilleure.Instructions
             }
         }
 
+        public static void Vfma_S(ArmEmitterContext context) // Fused.
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                // TODO: Use FMA instruction set.
+                EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd);
+            }
+            else
+            {
+                EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+                {
+                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
+                });
+            }
+        }
+
+        public static void Vfms_S(ArmEmitterContext context) // Fused.
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                // TODO: Use FMA instruction set.
+                EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd);
+            }
+            else
+            {
+                EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+                {
+                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3);
+                });
+            }
+        }
+
         public static void Vmov_S(ArmEmitterContext context)
         {
             if (Optimizations.FastFP && Optimizations.UseSse2)
@@ -586,7 +618,8 @@ namespace ARMeilleure.Instructions
             {
                 EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
                 {
-                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
+                    Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
+                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, res);
                 });
             }
         }
@@ -657,7 +690,8 @@ namespace ARMeilleure.Instructions
             {
                 EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
                 {
-                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3);
+                    Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
+                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, res);
                 });
             }
         }
diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
index 00b8ffd6..e4efea70 100644
--- a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
@@ -139,6 +139,7 @@ namespace ARMeilleure.Instructions
             }
         }
 
+        // VCVT (floating-point to integer, floating-point) | VCVT (integer to floating-point, floating-point).
         public static void Vcvt_FI(ArmEmitterContext context)
         {
             OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
@@ -236,13 +237,14 @@ namespace ARMeilleure.Instructions
             return roundMode;
         }
 
-        public static void Vcvt_R(ArmEmitterContext context)
+        // VCVTA/M/N/P (floating-point).
+        public static void Vcvt_RM(ArmEmitterContext context)
         {
-            OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
+            OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; // toInteger == true (opCode<18> == 1 => Opc2<2> == 1).
 
             OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
 
-            bool unsigned = (op.Opc & 1) == 0;
+            bool unsigned = op.Opc == 0;
             int rm = op.Opc2 & 3;
 
             if (Optimizations.UseSse41 && rm != 0b00)
@@ -277,9 +279,10 @@ namespace ARMeilleure.Instructions
             }
         }
 
+        // VRINTA/M/N/P (floating-point).
         public static void Vrint_RM(ArmEmitterContext context)
         {
-            OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
+            OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
 
             OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
 
@@ -320,9 +323,10 @@ namespace ARMeilleure.Instructions
             }
         }
 
+        // VRINTZ (floating-point).
         public static void Vrint_Z(ArmEmitterContext context)
         {
-            IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+            OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
 
             if (Optimizations.UseSse2)
             {
@@ -355,7 +359,7 @@ namespace ARMeilleure.Instructions
         private static void EmitSse41ConvertInt32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
         {
             // A port of the similar round function in InstEmitSimdCvt.
-            OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+            OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
 
             bool doubleSize = (op.Size & 1) != 0;
             int shift = doubleSize ? 1 : 2;
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
index e045c601..a962c0fc 100644
--- a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
@@ -906,7 +906,7 @@ namespace ARMeilleure.Instructions
             OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
 
             bool doubleSize = (op.Size & 1) != 0;
-            int shift = doubleSize ? 1 : 2;
+
             Intrinsic inst1 = doubleSize ? inst64pt1 : inst32pt1;
             Intrinsic inst2 = doubleSize ? inst64pt2 : inst32pt2;
 
diff --git a/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/ARMeilleure/Instructions/InstEmitSimdMove32.cs
index b484381f..52292242 100644
--- a/ARMeilleure/Instructions/InstEmitSimdMove32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdMove32.cs
@@ -559,7 +559,7 @@ namespace ARMeilleure.Instructions
             }
         }
 
-        public static void EmitVectorShuffleOpSimd32(ArmEmitterContext context, Func<Operand, Operand, (Operand, Operand)> shuffleFunc)
+        private static void EmitVectorShuffleOpSimd32(ArmEmitterContext context, Func<Operand, Operand, (Operand, Operand)> shuffleFunc)
         {
             OpCode32Simd op = (OpCode32Simd)context.CurrOp;
 
diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs
index d7283029..9e820f6b 100644
--- a/ARMeilleure/Instructions/InstName.cs
+++ b/ARMeilleure/Instructions/InstName.cs
@@ -563,6 +563,8 @@ namespace ARMeilleure.Instructions
         Vdup,
         Veor,
         Vext,
+        Vfma,
+        Vfms,
         Vld1,
         Vld2,
         Vld3,
author	LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>	2020-08-13 07:34:02 +0200
committer	GitHub <noreply@github.com>	2020-08-13 02:34:02 -0300
commit	6938988427e7f96adcd8fe76fe5d0a19b014b2b2 (patch)
tree	39b049344e5d00b5f152b5354e7c8090ebf46c41 /ARMeilleure/Instructions
parent	1ad9045c6b00a5c729c8c7d697f3da54ed177883 (diff)