diff options
| author | LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> | 2021-01-04 23:45:54 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-01-04 23:45:54 +0100 |
| commit | 430ba6da65a781196db7d723cc88710bb7f5caf8 (patch) | |
| tree | a7ed55f638dde795f4270a324fa5338ffb80ee12 /ARMeilleure/Decoders/OpCodeTable.cs | |
| parent | a03ab0c4a0bef3c168874dc2105c43c9051e0807 (diff) | |
CPU (A64): Add Pmull_V Inst. with Clmul fast path for the "1/2D -> 1Q" variant & Sse fast path and slow path for both the "8/16B -> 8H" and "1/2D -> 1Q" variants; with Test. (#1817)
* Add Pmull_V Sse fast path only, both "8/16B -> 8H" and "1/2D -> 1Q" variants; with Test.
* Add Clmul fast path for the 128 bits variant.
* Small optimisation (save 60 instructions) for the Sse fast path about the 128 bits variant.
* Add slow path, both variants. Fix V128 Shl/Shr when shift = 0.
* A32: Add Vmull_I P64 variant (slow path); not tested.
* A32: Add Vmull_I_P8_P64 Test and fix P64 variant.
Diffstat (limited to 'ARMeilleure/Decoders/OpCodeTable.cs')
| -rw-r--r-- | ARMeilleure/Decoders/OpCodeTable.cs | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index 665e7129..b1912485 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -413,6 +413,8 @@ namespace ARMeilleure.Decoders SetA64("0x001110101xxxxx000111xxxxxxxxxx", InstName.Orr_V, InstEmit.Orr_V, OpCodeSimdReg.Create); SetA64("0x00111100000xxx0xx101xxxxxxxxxx", InstName.Orr_Vi, InstEmit.Orr_Vi, OpCodeSimdImm.Create); SetA64("0x00111100000xxx10x101xxxxxxxxxx", InstName.Orr_Vi, InstEmit.Orr_Vi, OpCodeSimdImm.Create); + SetA64("0x001110001xxxxx111000xxxxxxxxxx", InstName.Pmull_V, InstEmit.Pmull_V, OpCodeSimdReg.Create); + SetA64("0x001110111xxxxx111000xxxxxxxxxx", InstName.Pmull_V, InstEmit.Pmull_V, OpCodeSimdReg.Create); SetA64("0x101110<<1xxxxx010000xxxxxxxxxx", InstName.Raddhn_V, InstEmit.Raddhn_V, OpCodeSimdReg.Create); SetA64("0x10111001100000010110xxxxxxxxxx", InstName.Rbit_V, InstEmit.Rbit_V, OpCodeSimd.Create); SetA64("0x00111000100000000110xxxxxxxxxx", InstName.Rev16_V, InstEmit.Rev16_V, OpCodeSimd.Create); @@ -886,7 +888,7 @@ namespace ARMeilleure.Decoders SetA32("111100110x00xxxxxxxx1101xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_V, OpCode32SimdReg.Create); SetA32("1111001x1x<<xxxxxxx01010x1x0xxxx", InstName.Vmull, InstEmit32.Vmull_1, OpCode32SimdRegElemLong.Create); SetA32("1111001x1x<<xxxxxxx01100x0x0xxxx", InstName.Vmull, InstEmit32.Vmull_I, OpCode32SimdRegLong.Create); - SetA32("111100101x00xxxxxxx01110x0x0xxxx", InstName.Vmull, InstEmit32.Vmull_I, OpCode32SimdRegLong.Create); // Polynomial + SetA32("111100101xx0xxxxxxx01110x0x0xxxx", InstName.Vmull, InstEmit32.Vmull_I, OpCode32SimdRegLong.Create); // P8/P64 SetA32("111100111x110000xxxx01011xx0xxxx", InstName.Vmvn, InstEmit32.Vmvn_I, OpCode32SimdBinary.Create); SetA32("1111001x1x000xxxxxxx0xx00x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_II, OpCode32SimdImm.Create); // D/Q vector I32. SetA32("1111001x1x000xxxxxxx10x00x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_II, OpCode32SimdImm.Create); |
