diff options
| author | LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> | 2021-01-04 23:45:54 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-01-04 23:45:54 +0100 |
| commit | 430ba6da65a781196db7d723cc88710bb7f5caf8 (patch) | |
| tree | a7ed55f638dde795f4270a324fa5338ffb80ee12 /ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs | |
| parent | a03ab0c4a0bef3c168874dc2105c43c9051e0807 (diff) | |
CPU (A64): Add Pmull_V Inst. with Clmul fast path for the "1/2D -> 1Q" variant & Sse fast path and slow path for both the "8/16B -> 8H" and "1/2D -> 1Q" variants; with Test. (#1817)
* Add Pmull_V Sse fast path only, both "8/16B -> 8H" and "1/2D -> 1Q" variants; with Test.
* Add Clmul fast path for the 128 bits variant.
* Small optimisation (save 60 instructions) for the Sse fast path about the 128 bits variant.
* Add slow path, both variants. Fix V128 Shl/Shr when shift = 0.
* A32: Add Vmull_I P64 variant (slow path); not tested.
* A32: Add Vmull_I_P8_P64 Test and fix P64 variant.
Diffstat (limited to 'ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs')
| -rw-r--r-- | ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs | 36 |
1 files changed, 13 insertions, 23 deletions
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs index d35af209..0fc8c391 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs @@ -920,7 +920,19 @@ namespace ARMeilleure.Instructions if (op.Polynomial) { - EmitVectorBinaryLongOpI32(context, (op1, op2) => EmitPolynomialMultiply(context, op1, op2, 8 << op.Size), false); + if (op.Size == 0) // P8 + { + EmitVectorBinaryLongOpI32(context, (op1, op2) => EmitPolynomialMultiply(context, op1, op2, 8 << op.Size), false); + } + else /* if (op.Size == 2) // P64 */ + { + Operand ne = context.VectorExtract(OperandType.I64, GetVec(op.Qn), op.Vn & 1); + Operand me = context.VectorExtract(OperandType.I64, GetVec(op.Qm), op.Vm & 1); + + Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.PolynomialMult64_128)), ne, me); + + context.Copy(GetVecA32(op.Qd), res); + } } else { @@ -1366,27 +1378,5 @@ namespace ARMeilleure.Instructions EmitVectorBinaryOpSimd32(context, genericEmit); } } - - private static Operand EmitPolynomialMultiply(ArmEmitterContext context, Operand op1, Operand op2, int eSize) - { - Debug.Assert(eSize <= 32); - - Operand result = eSize == 32 ? Const(0L) : Const(0); - - if (eSize == 32) - { - op1 = context.ZeroExtend32(OperandType.I64, op1); - op2 = context.ZeroExtend32(OperandType.I64, op2); - } - - for (int i = 0; i < eSize; i++) - { - Operand mask = context.BitwiseAnd(op1, Const(op1.Type, 1L << i)); - - result = context.BitwiseExclusiveOr(result, context.Multiply(op2, mask)); - } - - return result; - } } } |
