aboutsummaryrefslogtreecommitdiff
path: root/src/ARMeilleure/Instructions
diff options
context:
space:
mode:
authorTSR Berry <20988865+TSRBerry@users.noreply.github.com>2023-04-08 01:22:00 +0200
committerMary <thog@protonmail.com>2023-04-27 23:51:14 +0200
commitcee712105850ac3385cd0091a923438167433f9f (patch)
tree4a5274b21d8b7f938c0d0ce18736d3f2993b11b1 /src/ARMeilleure/Instructions
parentcd124bda587ef09668a971fa1cac1c3f0cfc9f21 (diff)
Move solution and projects to src
Diffstat (limited to 'src/ARMeilleure/Instructions')
-rw-r--r--src/ARMeilleure/Instructions/CryptoHelper.cs280
-rw-r--r--src/ARMeilleure/Instructions/InstEmitAlu.cs400
-rw-r--r--src/ARMeilleure/Instructions/InstEmitAlu32.cs931
-rw-r--r--src/ARMeilleure/Instructions/InstEmitAluHelper.cs613
-rw-r--r--src/ARMeilleure/Instructions/InstEmitBfm.cs196
-rw-r--r--src/ARMeilleure/Instructions/InstEmitCcmp.cs61
-rw-r--r--src/ARMeilleure/Instructions/InstEmitCsel.cs53
-rw-r--r--src/ARMeilleure/Instructions/InstEmitDiv.cs67
-rw-r--r--src/ARMeilleure/Instructions/InstEmitException.cs55
-rw-r--r--src/ARMeilleure/Instructions/InstEmitException32.cs39
-rw-r--r--src/ARMeilleure/Instructions/InstEmitFlow.cs107
-rw-r--r--src/ARMeilleure/Instructions/InstEmitFlow32.cs136
-rw-r--r--src/ARMeilleure/Instructions/InstEmitFlowHelper.cs240
-rw-r--r--src/ARMeilleure/Instructions/InstEmitHash.cs69
-rw-r--r--src/ARMeilleure/Instructions/InstEmitHash32.cs53
-rw-r--r--src/ARMeilleure/Instructions/InstEmitHashHelper.cs118
-rw-r--r--src/ARMeilleure/Instructions/InstEmitHelper.cs264
-rw-r--r--src/ARMeilleure/Instructions/InstEmitMemory.cs184
-rw-r--r--src/ARMeilleure/Instructions/InstEmitMemory32.cs265
-rw-r--r--src/ARMeilleure/Instructions/InstEmitMemoryEx.cs178
-rw-r--r--src/ARMeilleure/Instructions/InstEmitMemoryEx32.cs237
-rw-r--r--src/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs174
-rw-r--r--src/ARMeilleure/Instructions/InstEmitMemoryHelper.cs648
-rw-r--r--src/ARMeilleure/Instructions/InstEmitMove.cs41
-rw-r--r--src/ARMeilleure/Instructions/InstEmitMul.cs100
-rw-r--r--src/ARMeilleure/Instructions/InstEmitMul32.cs379
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs5224
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs1703
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdCmp.cs799
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdCmp32.cs437
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdCrypto.cs99
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs99
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdCvt.cs1891
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs800
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdHash.cs147
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdHash32.cs64
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdHashHelper.cs56
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdHelper.cs2088
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs1286
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs366
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs720
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdLogical.cs612
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdLogical32.cs266
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdMemory.cs160
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdMemory32.cs352
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdMove.cs850
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdMove32.cs656
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdShift.cs1827
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdShift32.cs389
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSystem.cs248
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSystem32.cs351
-rw-r--r--src/ARMeilleure/Instructions/InstName.cs685
-rw-r--r--src/ARMeilleure/Instructions/NativeInterface.cs195
-rw-r--r--src/ARMeilleure/Instructions/SoftFallback.cs624
-rw-r--r--src/ARMeilleure/Instructions/SoftFloat.cs3480
55 files changed, 32362 insertions, 0 deletions
diff --git a/src/ARMeilleure/Instructions/CryptoHelper.cs b/src/ARMeilleure/Instructions/CryptoHelper.cs
new file mode 100644
index 00000000..e517c75d
--- /dev/null
+++ b/src/ARMeilleure/Instructions/CryptoHelper.cs
@@ -0,0 +1,280 @@
+// https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf
+
+using ARMeilleure.State;
+using System;
+
+namespace ARMeilleure.Instructions
+{
+ static class CryptoHelper
+ {
+#region "LookUp Tables"
+ private static ReadOnlySpan<byte> _sBox => new byte[]
+ {
+ 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+ 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+ 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+ 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+ 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+ 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+ 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+ 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+ 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+ 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+ 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+ 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+ 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+ 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+ 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+ 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+ };
+
+ private static ReadOnlySpan<byte> _invSBox => new byte[]
+ {
+ 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+ };
+
+ private static ReadOnlySpan<byte> _gfMul02 => new byte[]
+ {
+ 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
+ 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
+ 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,
+ 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e,
+ 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e,
+ 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe,
+ 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde,
+ 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe,
+ 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05,
+ 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25,
+ 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45,
+ 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65,
+ 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85,
+ 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5,
+ 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5,
+ 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5
+ };
+
+ private static ReadOnlySpan<byte> _gfMul03 => new byte[]
+ {
+ 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11,
+ 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21,
+ 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71,
+ 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41,
+ 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1,
+ 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1,
+ 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1,
+ 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81,
+ 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a,
+ 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba,
+ 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea,
+ 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda,
+ 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a,
+ 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a,
+ 0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a,
+ 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a
+ };
+
+ private static ReadOnlySpan<byte> _gfMul09 => new byte[]
+ {
+ 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
+ 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7,
+ 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
+ 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc,
+ 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01,
+ 0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91,
+ 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a,
+ 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa,
+ 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b,
+ 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b,
+ 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0,
+ 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30,
+ 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed,
+ 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d,
+ 0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6,
+ 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46
+ };
+
+ private static ReadOnlySpan<byte> _gfMul0B => new byte[]
+ {
+ 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69,
+ 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9,
+ 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12,
+ 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2,
+ 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f,
+ 0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f,
+ 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4,
+ 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54,
+ 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e,
+ 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e,
+ 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5,
+ 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55,
+ 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68,
+ 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8,
+ 0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13,
+ 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3
+ };
+
+ private static ReadOnlySpan<byte> _gfMul0D => new byte[]
+ {
+ 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b,
+ 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b,
+ 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0,
+ 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20,
+ 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26,
+ 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6,
+ 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d,
+ 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d,
+ 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91,
+ 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41,
+ 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a,
+ 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa,
+ 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc,
+ 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c,
+ 0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47,
+ 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97
+ };
+
+ private static ReadOnlySpan<byte> _gfMul0E => new byte[]
+ {
+ 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a,
+ 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba,
+ 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81,
+ 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61,
+ 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7,
+ 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17,
+ 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c,
+ 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc,
+ 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b,
+ 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb,
+ 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0,
+ 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20,
+ 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6,
+ 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56,
+ 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d,
+ 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d
+ };
+
+ private static ReadOnlySpan<byte> _srPerm => new byte[]
+ {
+ 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3
+ };
+
+ private static ReadOnlySpan<byte> _isrPerm => new byte[]
+ {
+ 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11
+ };
+#endregion
+
+ public static V128 AesInvMixColumns(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int columns = 0; columns <= 3; columns++)
+ {
+ int idx = columns << 2;
+
+ byte row0 = inState[idx + 0]; // A, E, I, M: [row0, col0-col3]
+ byte row1 = inState[idx + 1]; // B, F, J, N: [row1, col0-col3]
+ byte row2 = inState[idx + 2]; // C, G, K, O: [row2, col0-col3]
+ byte row3 = inState[idx + 3]; // D, H, L, P: [row3, col0-col3]
+
+ outState[idx + 0] = (byte)((uint)_gfMul0E[row0] ^ _gfMul0B[row1] ^ _gfMul0D[row2] ^ _gfMul09[row3]);
+ outState[idx + 1] = (byte)((uint)_gfMul09[row0] ^ _gfMul0E[row1] ^ _gfMul0B[row2] ^ _gfMul0D[row3]);
+ outState[idx + 2] = (byte)((uint)_gfMul0D[row0] ^ _gfMul09[row1] ^ _gfMul0E[row2] ^ _gfMul0B[row3]);
+ outState[idx + 3] = (byte)((uint)_gfMul0B[row0] ^ _gfMul0D[row1] ^ _gfMul09[row2] ^ _gfMul0E[row3]);
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesInvShiftRows(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int idx = 0; idx <= 15; idx++)
+ {
+ outState[_isrPerm[idx]] = inState[idx];
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesInvSubBytes(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int idx = 0; idx <= 15; idx++)
+ {
+ outState[idx] = _invSBox[inState[idx]];
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesMixColumns(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int columns = 0; columns <= 3; columns++)
+ {
+ int idx = columns << 2;
+
+ byte row0 = inState[idx + 0]; // A, E, I, M: [row0, col0-col3]
+ byte row1 = inState[idx + 1]; // B, F, J, N: [row1, col0-col3]
+ byte row2 = inState[idx + 2]; // C, G, K, O: [row2, col0-col3]
+ byte row3 = inState[idx + 3]; // D, H, L, P: [row3, col0-col3]
+
+ outState[idx + 0] = (byte)((uint)_gfMul02[row0] ^ _gfMul03[row1] ^ row2 ^ row3);
+ outState[idx + 1] = (byte)((uint)row0 ^ _gfMul02[row1] ^ _gfMul03[row2] ^ row3);
+ outState[idx + 2] = (byte)((uint)row0 ^ row1 ^ _gfMul02[row2] ^ _gfMul03[row3]);
+ outState[idx + 3] = (byte)((uint)_gfMul03[row0] ^ row1 ^ row2 ^ _gfMul02[row3]);
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesShiftRows(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int idx = 0; idx <= 15; idx++)
+ {
+ outState[_srPerm[idx]] = inState[idx];
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesSubBytes(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int idx = 0; idx <= 15; idx++)
+ {
+ outState[idx] = _sBox[inState[idx]];
+ }
+
+ return new V128(outState);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitAlu.cs b/src/ARMeilleure/Instructions/InstEmitAlu.cs
new file mode 100644
index 00000000..e0d10e77
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitAlu.cs
@@ -0,0 +1,400 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitAluHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Adc(ArmEmitterContext context) => EmitAdc(context, setFlags: false);
+ public static void Adcs(ArmEmitterContext context) => EmitAdc(context, setFlags: true);
+
+ private static void EmitAdc(ArmEmitterContext context, bool setFlags)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.Add(n, m);
+
+ Operand carry = GetFlag(PState.CFlag);
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int64)
+ {
+ carry = context.ZeroExtend32(OperandType.I64, carry);
+ }
+
+ d = context.Add(d, carry);
+
+ if (setFlags)
+ {
+ EmitNZFlagsCheck(context, d);
+
+ EmitAdcsCCheck(context, n, d);
+ EmitAddsVCheck(context, n, m, d);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Add(ArmEmitterContext context)
+ {
+ SetAluD(context, context.Add(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Adds(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ context.MarkComparison(n, m);
+
+ Operand d = context.Add(n, m);
+
+ EmitNZFlagsCheck(context, d);
+
+ EmitAddsCCheck(context, n, d);
+ EmitAddsVCheck(context, n, m, d);
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void And(ArmEmitterContext context)
+ {
+ SetAluD(context, context.BitwiseAnd(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Ands(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.BitwiseAnd(n, m);
+
+ EmitNZFlagsCheck(context, d);
+ EmitCVFlagsClear(context);
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Asrv(ArmEmitterContext context)
+ {
+ SetAluDOrZR(context, context.ShiftRightSI(GetAluN(context), GetAluMShift(context)));
+ }
+
+ public static void Bic(ArmEmitterContext context) => EmitBic(context, setFlags: false);
+ public static void Bics(ArmEmitterContext context) => EmitBic(context, setFlags: true);
+
+ private static void EmitBic(ArmEmitterContext context, bool setFlags)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.BitwiseAnd(n, context.BitwiseNot(m));
+
+ if (setFlags)
+ {
+ EmitNZFlagsCheck(context, d);
+ EmitCVFlagsClear(context);
+ }
+
+ SetAluD(context, d, setFlags);
+ }
+
+ public static void Cls(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ Operand nHigh = context.ShiftRightUI(n, Const(1));
+
+ bool is32Bits = op.RegisterSize == RegisterSize.Int32;
+
+ Operand mask = is32Bits ? Const(int.MaxValue) : Const(long.MaxValue);
+
+ Operand nLow = context.BitwiseAnd(n, mask);
+
+ Operand res = context.CountLeadingZeros(context.BitwiseExclusiveOr(nHigh, nLow));
+
+ res = context.Subtract(res, Const(res.Type, 1));
+
+ SetAluDOrZR(context, res);
+ }
+
+ public static void Clz(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ Operand d = context.CountLeadingZeros(n);
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Eon(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.BitwiseExclusiveOr(n, context.BitwiseNot(m));
+
+ SetAluD(context, d);
+ }
+
+ public static void Eor(ArmEmitterContext context)
+ {
+ SetAluD(context, context.BitwiseExclusiveOr(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Extr(ArmEmitterContext context)
+ {
+ OpCodeAluRs op = (OpCodeAluRs)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rm);
+
+ if (op.Shift != 0)
+ {
+ if (op.Rn == op.Rm)
+ {
+ res = context.RotateRight(res, Const(op.Shift));
+ }
+ else
+ {
+ res = context.ShiftRightUI(res, Const(op.Shift));
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ int invShift = op.GetBitsCount() - op.Shift;
+
+ res = context.BitwiseOr(res, context.ShiftLeft(n, Const(invShift)));
+ }
+ }
+
+ SetAluDOrZR(context, res);
+ }
+
+ public static void Lslv(ArmEmitterContext context)
+ {
+ SetAluDOrZR(context, context.ShiftLeft(GetAluN(context), GetAluMShift(context)));
+ }
+
+ public static void Lsrv(ArmEmitterContext context)
+ {
+ SetAluDOrZR(context, context.ShiftRightUI(GetAluN(context), GetAluMShift(context)));
+ }
+
+ public static void Sbc(ArmEmitterContext context) => EmitSbc(context, setFlags: false);
+ public static void Sbcs(ArmEmitterContext context) => EmitSbc(context, setFlags: true);
+
+ private static void EmitSbc(ArmEmitterContext context, bool setFlags)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.Subtract(n, m);
+
+ Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1));
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int64)
+ {
+ borrow = context.ZeroExtend32(OperandType.I64, borrow);
+ }
+
+ d = context.Subtract(d, borrow);
+
+ if (setFlags)
+ {
+ EmitNZFlagsCheck(context, d);
+
+ EmitSbcsCCheck(context, n, m);
+ EmitSubsVCheck(context, n, m, d);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Sub(ArmEmitterContext context)
+ {
+ SetAluD(context, context.Subtract(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Subs(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ context.MarkComparison(n, m);
+
+ Operand d = context.Subtract(n, m);
+
+ EmitNZFlagsCheck(context, d);
+
+ EmitSubsCCheck(context, n, m);
+ EmitSubsVCheck(context, n, m, d);
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Orn(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.BitwiseOr(n, context.BitwiseNot(m));
+
+ SetAluD(context, d);
+ }
+
+ public static void Orr(ArmEmitterContext context)
+ {
+ SetAluD(context, context.BitwiseOr(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Rbit(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand d;
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ d = EmitReverseBits32Op(context, n);
+ }
+ else
+ {
+ d = EmitReverseBits64Op(context, n);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ private static Operand EmitReverseBits64Op(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.I64);
+
+ Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaaaaaaaaaaaaaaaul)), Const(1)),
+ context.ShiftLeft (context.BitwiseAnd(op, Const(0x5555555555555555ul)), Const(1)));
+
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccccccccccccccccul)), Const(2)),
+ context.ShiftLeft (context.BitwiseAnd(val, Const(0x3333333333333333ul)), Const(2)));
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xf0f0f0f0f0f0f0f0ul)), Const(4)),
+ context.ShiftLeft (context.BitwiseAnd(val, Const(0x0f0f0f0f0f0f0f0ful)), Const(4)));
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xff00ff00ff00ff00ul)), Const(8)),
+ context.ShiftLeft (context.BitwiseAnd(val, Const(0x00ff00ff00ff00fful)), Const(8)));
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xffff0000ffff0000ul)), Const(16)),
+ context.ShiftLeft (context.BitwiseAnd(val, Const(0x0000ffff0000fffful)), Const(16)));
+
+ return context.BitwiseOr(context.ShiftRightUI(val, Const(32)), context.ShiftLeft(val, Const(32)));
+ }
+
+ public static void Rev16(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand d;
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ d = EmitReverseBytes16_32Op(context, n);
+ }
+ else
+ {
+ d = EmitReverseBytes16_64Op(context, n);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Rev32(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand d;
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ d = context.ByteSwap(n);
+ }
+ else
+ {
+ d = EmitReverseBytes32_64Op(context, n);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ private static Operand EmitReverseBytes32_64Op(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.I64);
+
+ Operand val = EmitReverseBytes16_64Op(context, op);
+
+ return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xffff0000ffff0000ul)), Const(16)),
+ context.ShiftLeft (context.BitwiseAnd(val, Const(0x0000ffff0000fffful)), Const(16)));
+ }
+
+ public static void Rev64(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ SetAluDOrZR(context, context.ByteSwap(GetIntOrZR(context, op.Rn)));
+ }
+
+ public static void Rorv(ArmEmitterContext context)
+ {
+ SetAluDOrZR(context, context.RotateRight(GetAluN(context), GetAluMShift(context)));
+ }
+
+ private static Operand GetAluMShift(ArmEmitterContext context)
+ {
+ IOpCodeAluRs op = (IOpCodeAluRs)context.CurrOp;
+
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Int64)
+ {
+ m = context.ConvertI64ToI32(m);
+ }
+
+ return context.BitwiseAnd(m, Const(context.CurrOp.GetBitsCount() - 1));
+ }
+
+ private static void EmitCVFlagsClear(ArmEmitterContext context)
+ {
+ SetFlag(context, PState.CFlag, Const(0));
+ SetFlag(context, PState.VFlag, Const(0));
+ }
+
+ public static void SetAluD(ArmEmitterContext context, Operand d)
+ {
+ SetAluD(context, d, x31IsZR: false);
+ }
+
+ public static void SetAluDOrZR(ArmEmitterContext context, Operand d)
+ {
+ SetAluD(context, d, x31IsZR: true);
+ }
+
+ public static void SetAluD(ArmEmitterContext context, Operand d, bool x31IsZR)
+ {
+ IOpCodeAlu op = (IOpCodeAlu)context.CurrOp;
+
+ if ((x31IsZR || op is IOpCodeAluRs) && op.Rd == RegisterConsts.ZeroIndex)
+ {
+ return;
+ }
+
+ SetIntOrSP(context, op.Rd, d);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitAlu32.cs b/src/ARMeilleure/Instructions/InstEmitAlu32.cs
new file mode 100644
index 00000000..584ada7e
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitAlu32.cs
@@ -0,0 +1,931 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitAluHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Add(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Add(n, m);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitAddsCCheck(context, n, res);
+ EmitAddsVCheck(context, n, m, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Adc(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Add(n, m);
+
+ Operand carry = GetFlag(PState.CFlag);
+
+ res = context.Add(res, carry);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitAdcsCCheck(context, n, res);
+ EmitAddsVCheck(context, n, m, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void And(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseAnd(n, m);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Bfc(ArmEmitterContext context)
+ {
+ IOpCode32AluBf op = (IOpCode32AluBf)context.CurrOp;
+
+ Operand d = GetIntA32(context, op.Rd);
+ Operand res = context.BitwiseAnd(d, Const(~op.DestMask));
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ public static void Bfi(ArmEmitterContext context)
+ {
+ IOpCode32AluBf op = (IOpCode32AluBf)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand d = GetIntA32(context, op.Rd);
+ Operand part = context.BitwiseAnd(n, Const(op.SourceMask));
+
+ if (op.Lsb != 0)
+ {
+ part = context.ShiftLeft(part, Const(op.Lsb));
+ }
+
+ Operand res = context.BitwiseAnd(d, Const(~op.DestMask));
+ res = context.BitwiseOr(res, context.BitwiseAnd(part, Const(op.DestMask)));
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ public static void Bic(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseAnd(n, context.BitwiseNot(m));
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Clz(ArmEmitterContext context)
+ {
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.CountLeadingZeros(m);
+ EmitAluStore(context, res);
+ }
+
+ public static void Cmp(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Subtract(n, m);
+
+ EmitNZFlagsCheck(context, res);
+
+ EmitSubsCCheck(context, n, res);
+ EmitSubsVCheck(context, n, m, res);
+ }
+
+ public static void Cmn(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Add(n, m);
+
+ EmitNZFlagsCheck(context, res);
+
+ EmitAddsCCheck(context, n, res);
+ EmitAddsVCheck(context, n, m, res);
+ }
+
+ public static void Eor(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseExclusiveOr(n, m);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Mov(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand m = GetAluM(context);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, m);
+ }
+
+ EmitAluStore(context, m);
+ }
+
+ public static void Movt(ArmEmitterContext context)
+ {
+ IOpCode32AluImm16 op = (IOpCode32AluImm16)context.CurrOp;
+
+ Operand d = GetIntA32(context, op.Rd);
+ Operand imm = Const(op.Immediate << 16); // Immeditate value as top halfword.
+ Operand res = context.BitwiseAnd(d, Const(0x0000ffff));
+ res = context.BitwiseOr(res, imm);
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Mul(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.Multiply(n, m);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Mvn(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseNot(m);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Orr(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseOr(n, m);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Orn(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseOr(n, context.BitwiseNot(m));
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Pkh(ArmEmitterContext context)
+ {
+ OpCode32AluRsImm op = (OpCode32AluRsImm)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res;
+
+ bool tbform = op.ShiftType == ShiftType.Asr;
+ if (tbform)
+ {
+ res = context.BitwiseOr(context.BitwiseAnd(n, Const(0xFFFF0000)), context.BitwiseAnd(m, Const(0xFFFF)));
+ }
+ else
+ {
+ res = context.BitwiseOr(context.BitwiseAnd(m, Const(0xFFFF0000)), context.BitwiseAnd(n, Const(0xFFFF)));
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Rbit(ArmEmitterContext context)
+ {
+ Operand m = GetAluM(context);
+
+ Operand res = EmitReverseBits32Op(context, m);
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Rev(ArmEmitterContext context)
+ {
+ Operand m = GetAluM(context);
+
+ Operand res = context.ByteSwap(m);
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Rev16(ArmEmitterContext context)
+ {
+ Operand m = GetAluM(context);
+
+ Operand res = EmitReverseBytes16_32Op(context, m);
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Revsh(ArmEmitterContext context)
+ {
+ Operand m = GetAluM(context);
+
+ Operand res = EmitReverseBytes16_32Op(context, m);
+
+ EmitAluStore(context, context.SignExtend16(OperandType.I32, res));
+ }
+
+ public static void Rsc(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Subtract(m, n);
+
+ Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1));
+
+ res = context.Subtract(res, borrow);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitSbcsCCheck(context, m, n);
+ EmitSubsVCheck(context, m, n, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Rsb(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Subtract(m, n);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitSubsCCheck(context, m, res);
+ EmitSubsVCheck(context, m, n, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Sadd8(ArmEmitterContext context)
+ {
+ EmitAddSub8(context, add: true, unsigned: false);
+ }
+
+ public static void Sbc(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Subtract(n, m);
+
+ Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1));
+
+ res = context.Subtract(res, borrow);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitSbcsCCheck(context, n, m);
+ EmitSubsVCheck(context, n, m, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Sbfx(ArmEmitterContext context)
+ {
+ IOpCode32AluBf op = (IOpCode32AluBf)context.CurrOp;
+
+ var msb = op.Lsb + op.Msb; // For this instruction, the msb is actually a width.
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand res = context.ShiftRightSI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb));
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ public static void Sdiv(ArmEmitterContext context)
+ {
+ EmitDiv(context, unsigned: false);
+ }
+
+ public static void Sel(ArmEmitterContext context)
+ {
+ IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+
+ Operand ge0 = context.ZeroExtend8(OperandType.I32, context.Negate(GetFlag(PState.GE0Flag)));
+ Operand ge1 = context.ZeroExtend8(OperandType.I32, context.Negate(GetFlag(PState.GE1Flag)));
+ Operand ge2 = context.ZeroExtend8(OperandType.I32, context.Negate(GetFlag(PState.GE2Flag)));
+ Operand ge3 = context.Negate(GetFlag(PState.GE3Flag));
+
+ Operand mask = context.BitwiseOr(ge0, context.ShiftLeft(ge1, Const(8)));
+ mask = context.BitwiseOr(mask, context.ShiftLeft(ge2, Const(16)));
+ mask = context.BitwiseOr(mask, context.ShiftLeft(ge3, Const(24)));
+
+ Operand res = context.BitwiseOr(context.BitwiseAnd(n, mask), context.BitwiseAnd(m, context.BitwiseNot(mask)));
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ public static void Shadd8(ArmEmitterContext context)
+ {
+ EmitHadd8(context, unsigned: false);
+ }
+
+ public static void Shsub8(ArmEmitterContext context)
+ {
+ EmitHsub8(context, unsigned: false);
+ }
+
+ public static void Ssat(ArmEmitterContext context)
+ {
+ OpCode32Sat op = (OpCode32Sat)context.CurrOp;
+
+ EmitSat(context, -(1 << op.SatImm), (1 << op.SatImm) - 1);
+ }
+
+ public static void Ssat16(ArmEmitterContext context)
+ {
+ OpCode32Sat16 op = (OpCode32Sat16)context.CurrOp;
+
+ EmitSat16(context, -(1 << op.SatImm), (1 << op.SatImm) - 1);
+ }
+
+ public static void Ssub8(ArmEmitterContext context)
+ {
+ EmitAddSub8(context, add: false, unsigned: false);
+ }
+
+ public static void Sub(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Subtract(n, m);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitSubsCCheck(context, n, res);
+ EmitSubsVCheck(context, n, m, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Sxtb(ArmEmitterContext context)
+ {
+ EmitSignExtend(context, true, 8);
+ }
+
+ public static void Sxtb16(ArmEmitterContext context)
+ {
+ EmitExtend16(context, true);
+ }
+
+ public static void Sxth(ArmEmitterContext context)
+ {
+ EmitSignExtend(context, true, 16);
+ }
+
+ public static void Teq(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseExclusiveOr(n, m);
+
+ EmitNZFlagsCheck(context, res);
+ }
+
+ public static void Tst(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseAnd(n, m);
+ EmitNZFlagsCheck(context, res);
+ }
+
+ public static void Uadd8(ArmEmitterContext context)
+ {
+ EmitAddSub8(context, add: true, unsigned: true);
+ }
+
+ public static void Ubfx(ArmEmitterContext context)
+ {
+ IOpCode32AluBf op = (IOpCode32AluBf)context.CurrOp;
+
+ var msb = op.Lsb + op.Msb; // For this instruction, the msb is actually a width.
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand res = context.ShiftRightUI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb));
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ public static void Udiv(ArmEmitterContext context)
+ {
+ EmitDiv(context, unsigned: true);
+ }
+
+ public static void Uhadd8(ArmEmitterContext context)
+ {
+ EmitHadd8(context, unsigned: true);
+ }
+
+ public static void Uhsub8(ArmEmitterContext context)
+ {
+ EmitHsub8(context, unsigned: true);
+ }
+
+ public static void Usat(ArmEmitterContext context)
+ {
+ OpCode32Sat op = (OpCode32Sat)context.CurrOp;
+
+ EmitSat(context, 0, op.SatImm == 32 ? (int)(~0) : (1 << op.SatImm) - 1);
+ }
+
+ public static void Usat16(ArmEmitterContext context)
+ {
+ OpCode32Sat16 op = (OpCode32Sat16)context.CurrOp;
+
+ EmitSat16(context, 0, (1 << op.SatImm) - 1);
+ }
+
+ public static void Usub8(ArmEmitterContext context)
+ {
+ EmitAddSub8(context, add: false, unsigned: true);
+ }
+
+ public static void Uxtb(ArmEmitterContext context)
+ {
+ EmitSignExtend(context, false, 8);
+ }
+
+ public static void Uxtb16(ArmEmitterContext context)
+ {
+ EmitExtend16(context, false);
+ }
+
+ public static void Uxth(ArmEmitterContext context)
+ {
+ EmitSignExtend(context, false, 16);
+ }
+
+ private static void EmitSignExtend(ArmEmitterContext context, bool signed, int bits)
+ {
+ IOpCode32AluUx op = (IOpCode32AluUx)context.CurrOp;
+
+ Operand m = GetAluM(context);
+ Operand res;
+
+ if (op.RotateBits == 0)
+ {
+ res = m;
+ }
+ else
+ {
+ Operand rotate = Const(op.RotateBits);
+ res = context.RotateRight(m, rotate);
+ }
+
+ switch (bits)
+ {
+ case 8:
+ res = (signed) ? context.SignExtend8(OperandType.I32, res) : context.ZeroExtend8(OperandType.I32, res);
+ break;
+ case 16:
+ res = (signed) ? context.SignExtend16(OperandType.I32, res) : context.ZeroExtend16(OperandType.I32, res);
+ break;
+ }
+
+ if (op.Add)
+ {
+ res = context.Add(res, GetAluN(context));
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ private static void EmitExtend16(ArmEmitterContext context, bool signed)
+ {
+ IOpCode32AluUx op = (IOpCode32AluUx)context.CurrOp;
+
+ Operand m = GetAluM(context);
+ Operand res;
+
+ if (op.RotateBits == 0)
+ {
+ res = m;
+ }
+ else
+ {
+ Operand rotate = Const(op.RotateBits);
+ res = context.RotateRight(m, rotate);
+ }
+
+ Operand low16, high16;
+ if (signed)
+ {
+ low16 = context.SignExtend8(OperandType.I32, res);
+ high16 = context.SignExtend8(OperandType.I32, context.ShiftRightUI(res, Const(16)));
+ }
+ else
+ {
+ low16 = context.ZeroExtend8(OperandType.I32, res);
+ high16 = context.ZeroExtend8(OperandType.I32, context.ShiftRightUI(res, Const(16)));
+ }
+
+ if (op.Add)
+ {
+ Operand n = GetAluN(context);
+ Operand lowAdd, highAdd;
+ if (signed)
+ {
+ lowAdd = context.SignExtend16(OperandType.I32, n);
+ highAdd = context.SignExtend16(OperandType.I32, context.ShiftRightUI(n, Const(16)));
+ }
+ else
+ {
+ lowAdd = context.ZeroExtend16(OperandType.I32, n);
+ highAdd = context.ZeroExtend16(OperandType.I32, context.ShiftRightUI(n, Const(16)));
+ }
+
+ low16 = context.Add(low16, lowAdd);
+ high16 = context.Add(high16, highAdd);
+ }
+
+ res = context.BitwiseOr(
+ context.ZeroExtend16(OperandType.I32, low16),
+ context.ShiftLeft(context.ZeroExtend16(OperandType.I32, high16), Const(16)));
+
+ EmitAluStore(context, res);
+ }
+
+ private static void EmitDiv(ArmEmitterContext context, bool unsigned)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+ Operand zero = Const(m.Type, 0);
+
+ Operand divisorIsZero = context.ICompareEqual(m, zero);
+
+ Operand lblBadDiv = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBadDiv, divisorIsZero);
+
+ if (!unsigned)
+ {
+ // ARM64 behaviour: If Rn == INT_MIN && Rm == -1, Rd = INT_MIN (overflow).
+ // TODO: tests to ensure A32 works the same
+
+ Operand intMin = Const(int.MinValue);
+ Operand minus1 = Const(-1);
+
+ Operand nIsIntMin = context.ICompareEqual(n, intMin);
+ Operand mIsMinus1 = context.ICompareEqual(m, minus1);
+
+ Operand lblGoodDiv = Label();
+
+ context.BranchIfFalse(lblGoodDiv, context.BitwiseAnd(nIsIntMin, mIsMinus1));
+
+ EmitAluStore(context, intMin);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblGoodDiv);
+ }
+
+ Operand res = unsigned
+ ? context.DivideUI(n, m)
+ : context.Divide(n, m);
+
+ EmitAluStore(context, res);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBadDiv);
+
+ EmitAluStore(context, zero);
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitAddSub8(ArmEmitterContext context, bool add, bool unsigned)
+ {
+ IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+
+ Operand res = Const(0);
+
+ for (int byteSel = 0; byteSel < 4; byteSel++)
+ {
+ Operand shift = Const(byteSel * 8);
+
+ Operand nByte = context.ShiftRightUI(n, shift);
+ Operand mByte = context.ShiftRightUI(m, shift);
+
+ nByte = unsigned ? context.ZeroExtend8(OperandType.I32, nByte) : context.SignExtend8(OperandType.I32, nByte);
+ mByte = unsigned ? context.ZeroExtend8(OperandType.I32, mByte) : context.SignExtend8(OperandType.I32, mByte);
+
+ Operand resByte = add ? context.Add(nByte, mByte) : context.Subtract(nByte, mByte);
+
+ res = context.BitwiseOr(res, context.ShiftLeft(context.ZeroExtend8(OperandType.I32, resByte), shift));
+
+ SetFlag(context, PState.GE0Flag + byteSel, unsigned && add
+ ? context.ShiftRightUI(resByte, Const(8))
+ : context.ShiftRightUI(context.BitwiseNot(resByte), Const(31)));
+ }
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ private static void EmitHadd8(ArmEmitterContext context, bool unsigned)
+ {
+ IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp;
+
+ Operand m = GetIntA32(context, op.Rm);
+ Operand n = GetIntA32(context, op.Rn);
+
+ Operand xor, res, carry;
+
+ // This relies on the equality x+y == ((x&y) << 1) + (x^y).
+ // Note that x^y always contains the LSB of the result.
+ // Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
+ // We mask by 0x7F to remove the LSB so that it doesn't leak into the field below.
+
+ res = context.BitwiseAnd(m, n);
+ carry = context.BitwiseExclusiveOr(m, n);
+ xor = context.ShiftRightUI(carry, Const(1));
+ xor = context.BitwiseAnd(xor, Const(0x7F7F7F7Fu));
+ res = context.Add(res, xor);
+
+ if (!unsigned)
+ {
+ // Propagates the sign bit from (x^y)>>1 upwards by one.
+ carry = context.BitwiseAnd(carry, Const(0x80808080u));
+ res = context.BitwiseExclusiveOr(res, carry);
+ }
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ private static void EmitHsub8(ArmEmitterContext context, bool unsigned)
+ {
+ IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp;
+
+ Operand m = GetIntA32(context, op.Rm);
+ Operand n = GetIntA32(context, op.Rn);
+ Operand left, right, carry, res;
+
+ // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
+ // Note that x^y always contains the LSB of the result.
+ // Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
+
+ carry = context.BitwiseExclusiveOr(m, n);
+ left = context.ShiftRightUI(carry, Const(1));
+ right = context.BitwiseAnd(carry, m);
+
+ // We must now perform a partitioned subtraction.
+ // We can do this because minuend contains 7 bit fields.
+ // We use the extra bit in minuend as a bit to borrow from; we set this bit.
+ // We invert this bit at the end as this tells us if that bit was borrowed from.
+
+ res = context.BitwiseOr(left, Const(0x80808080));
+ res = context.Subtract(res, right);
+ res = context.BitwiseExclusiveOr(res, Const(0x80808080));
+
+ if (!unsigned)
+ {
+ // We then sign extend the result into this bit.
+ carry = context.BitwiseAnd(carry, Const(0x80808080));
+ res = context.BitwiseExclusiveOr(res, carry);
+ }
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ private static void EmitSat(ArmEmitterContext context, int intMin, int intMax)
+ {
+ OpCode32Sat op = (OpCode32Sat)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+
+ int shift = DecodeImmShift(op.ShiftType, op.Imm5);
+
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsl:
+ if (shift == 32)
+ {
+ n = Const(0);
+ }
+ else
+ {
+ n = context.ShiftLeft(n, Const(shift));
+ }
+ break;
+ case ShiftType.Asr:
+ if (shift == 32)
+ {
+ n = context.ShiftRightSI(n, Const(31));
+ }
+ else
+ {
+ n = context.ShiftRightSI(n, Const(shift));
+ }
+ break;
+ }
+
+ Operand lblCheckLtIntMin = Label();
+ Operand lblNoSat = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfFalse(lblCheckLtIntMin, context.ICompareGreater(n, Const(intMax)));
+
+ SetFlag(context, PState.QFlag, Const(1));
+ SetIntA32(context, op.Rd, Const(intMax));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblCheckLtIntMin);
+ context.BranchIfFalse(lblNoSat, context.ICompareLess(n, Const(intMin)));
+
+ SetFlag(context, PState.QFlag, Const(1));
+ SetIntA32(context, op.Rd, Const(intMin));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblNoSat);
+
+ SetIntA32(context, op.Rd, n);
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitSat16(ArmEmitterContext context, int intMin, int intMax)
+ {
+ OpCode32Sat16 op = (OpCode32Sat16)context.CurrOp;
+
+ void SetD(int part, Operand value)
+ {
+ if (part == 0)
+ {
+ SetIntA32(context, op.Rd, context.ZeroExtend16(OperandType.I32, value));
+ }
+ else
+ {
+ SetIntA32(context, op.Rd, context.BitwiseOr(GetIntA32(context, op.Rd), context.ShiftLeft(value, Const(16))));
+ }
+ }
+
+ Operand n = GetIntA32(context, op.Rn);
+
+ Operand nLow = context.SignExtend16(OperandType.I32, n);
+ Operand nHigh = context.ShiftRightSI(n, Const(16));
+
+ for (int part = 0; part < 2; part++)
+ {
+ Operand nPart = part == 0 ? nLow : nHigh;
+
+ Operand lblCheckLtIntMin = Label();
+ Operand lblNoSat = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfFalse(lblCheckLtIntMin, context.ICompareGreater(nPart, Const(intMax)));
+
+ SetFlag(context, PState.QFlag, Const(1));
+ SetD(part, Const(intMax));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblCheckLtIntMin);
+ context.BranchIfFalse(lblNoSat, context.ICompareLess(nPart, Const(intMin)));
+
+ SetFlag(context, PState.QFlag, Const(1));
+ SetD(part, Const(intMin));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblNoSat);
+
+ SetD(part, nPart);
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+
+ private static void EmitAluStore(ArmEmitterContext context, Operand value)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ EmitGenericAluStoreA32(context, op.Rd, ShouldSetFlags(context), value);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitAluHelper.cs b/src/ARMeilleure/Instructions/InstEmitAluHelper.cs
new file mode 100644
index 00000000..994878ad
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitAluHelper.cs
@@ -0,0 +1,613 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitAluHelper
+ {
+ public static bool ShouldSetFlags(ArmEmitterContext context)
+ {
+ IOpCode32HasSetFlags op = (IOpCode32HasSetFlags)context.CurrOp;
+
+ if (op.SetFlags == null)
+ {
+ return !context.IsInIfThenBlock;
+ }
+
+ return op.SetFlags.Value;
+ }
+
+ public static void EmitNZFlagsCheck(ArmEmitterContext context, Operand d)
+ {
+ SetFlag(context, PState.NFlag, context.ICompareLess (d, Const(d.Type, 0)));
+ SetFlag(context, PState.ZFlag, context.ICompareEqual(d, Const(d.Type, 0)));
+ }
+
+ public static void EmitAdcsCCheck(ArmEmitterContext context, Operand n, Operand d)
+ {
+ // C = (Rd == Rn && CIn) || Rd < Rn
+ Operand cIn = GetFlag(PState.CFlag);
+
+ Operand cOut = context.BitwiseAnd(context.ICompareEqual(d, n), cIn);
+
+ cOut = context.BitwiseOr(cOut, context.ICompareLessUI(d, n));
+
+ SetFlag(context, PState.CFlag, cOut);
+ }
+
+ public static void EmitAddsCCheck(ArmEmitterContext context, Operand n, Operand d)
+ {
+ // C = Rd < Rn
+ SetFlag(context, PState.CFlag, context.ICompareLessUI(d, n));
+ }
+
+ public static void EmitAddsVCheck(ArmEmitterContext context, Operand n, Operand m, Operand d)
+ {
+ // V = (Rd ^ Rn) & ~(Rn ^ Rm) < 0
+ Operand vOut = context.BitwiseExclusiveOr(d, n);
+
+ vOut = context.BitwiseAnd(vOut, context.BitwiseNot(context.BitwiseExclusiveOr(n, m)));
+
+ vOut = context.ICompareLess(vOut, Const(vOut.Type, 0));
+
+ SetFlag(context, PState.VFlag, vOut);
+ }
+
+ public static void EmitSbcsCCheck(ArmEmitterContext context, Operand n, Operand m)
+ {
+ // C = (Rn == Rm && CIn) || Rn > Rm
+ Operand cIn = GetFlag(PState.CFlag);
+
+ Operand cOut = context.BitwiseAnd(context.ICompareEqual(n, m), cIn);
+
+ cOut = context.BitwiseOr(cOut, context.ICompareGreaterUI(n, m));
+
+ SetFlag(context, PState.CFlag, cOut);
+ }
+
+ public static void EmitSubsCCheck(ArmEmitterContext context, Operand n, Operand m)
+ {
+ // C = Rn >= Rm
+ SetFlag(context, PState.CFlag, context.ICompareGreaterOrEqualUI(n, m));
+ }
+
+ public static void EmitSubsVCheck(ArmEmitterContext context, Operand n, Operand m, Operand d)
+ {
+ // V = (Rd ^ Rn) & (Rn ^ Rm) < 0
+ Operand vOut = context.BitwiseExclusiveOr(d, n);
+
+ vOut = context.BitwiseAnd(vOut, context.BitwiseExclusiveOr(n, m));
+
+ vOut = context.ICompareLess(vOut, Const(vOut.Type, 0));
+
+ SetFlag(context, PState.VFlag, vOut);
+ }
+
+ public static Operand EmitReverseBits32Op(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.I32);
+
+ Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaaaaaaau)), Const(1)),
+ context.ShiftLeft(context.BitwiseAnd(op, Const(0x55555555u)), Const(1)));
+
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccccccccu)), Const(2)),
+ context.ShiftLeft(context.BitwiseAnd(val, Const(0x33333333u)), Const(2)));
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xf0f0f0f0u)), Const(4)),
+ context.ShiftLeft(context.BitwiseAnd(val, Const(0x0f0f0f0fu)), Const(4)));
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xff00ff00u)), Const(8)),
+ context.ShiftLeft(context.BitwiseAnd(val, Const(0x00ff00ffu)), Const(8)));
+
+ return context.BitwiseOr(context.ShiftRightUI(val, Const(16)), context.ShiftLeft(val, Const(16)));
+ }
+
+ public static Operand EmitReverseBytes16_64Op(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.I64);
+
+ return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xff00ff00ff00ff00ul)), Const(8)),
+ context.ShiftLeft(context.BitwiseAnd(op, Const(0x00ff00ff00ff00fful)), Const(8)));
+ }
+
+ public static Operand EmitReverseBytes16_32Op(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.I32);
+
+ Operand val = EmitReverseBytes16_64Op(context, context.ZeroExtend32(OperandType.I64, op));
+
+ return context.ConvertI64ToI32(val);
+ }
+
+ private static void EmitAluWritePc(ArmEmitterContext context, Operand value)
+ {
+ Debug.Assert(value.Type == OperandType.I32);
+
+ if (((OpCode32)context.CurrOp).IsThumb)
+ {
+ bool isReturn = IsA32Return(context);
+ if (!isReturn)
+ {
+ context.StoreToContext();
+ }
+
+ InstEmitFlowHelper.EmitVirtualJump(context, value, isReturn);
+ }
+ else
+ {
+ EmitBxWritePc(context, value);
+ }
+ }
+
+ public static void EmitGenericAluStoreA32(ArmEmitterContext context, int rd, bool setFlags, Operand value)
+ {
+ Debug.Assert(value.Type == OperandType.I32);
+
+ if (rd == RegisterAlias.Aarch32Pc && setFlags)
+ {
+ if (setFlags)
+ {
+ // TODO: Load SPSR etc.
+
+ EmitBxWritePc(context, value);
+ }
+ else
+ {
+ EmitAluWritePc(context, value);
+ }
+ }
+ else
+ {
+ SetIntA32(context, rd, value);
+ }
+ }
+
+ public static Operand GetAluN(ArmEmitterContext context)
+ {
+ if (context.CurrOp is IOpCodeAlu op)
+ {
+ if (op.DataOp == DataOp.Logical || op is IOpCodeAluRs)
+ {
+ return GetIntOrZR(context, op.Rn);
+ }
+ else
+ {
+ return GetIntOrSP(context, op.Rn);
+ }
+ }
+ else if (context.CurrOp is IOpCode32Alu op32)
+ {
+ return GetIntA32(context, op32.Rn);
+ }
+ else
+ {
+ throw InvalidOpCodeType(context.CurrOp);
+ }
+ }
+
+ public static Operand GetAluM(ArmEmitterContext context, bool setCarry = true)
+ {
+ switch (context.CurrOp)
+ {
+ // ARM32.
+ case IOpCode32AluImm op:
+ {
+ if (ShouldSetFlags(context) && op.IsRotated && setCarry)
+ {
+ SetFlag(context, PState.CFlag, Const((uint)op.Immediate >> 31));
+ }
+
+ return Const(op.Immediate);
+ }
+
+ case IOpCode32AluImm16 op: return Const(op.Immediate);
+
+ case IOpCode32AluRsImm op: return GetMShiftedByImmediate(context, op, setCarry);
+ case IOpCode32AluRsReg op: return GetMShiftedByReg(context, op, setCarry);
+
+ case IOpCode32AluReg op: return GetIntA32(context, op.Rm);
+
+ // ARM64.
+ case IOpCodeAluImm op:
+ {
+ if (op.GetOperandType() == OperandType.I32)
+ {
+ return Const((int)op.Immediate);
+ }
+ else
+ {
+ return Const(op.Immediate);
+ }
+ }
+
+ case IOpCodeAluRs op:
+ {
+ Operand value = GetIntOrZR(context, op.Rm);
+
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsl: value = context.ShiftLeft (value, Const(op.Shift)); break;
+ case ShiftType.Lsr: value = context.ShiftRightUI(value, Const(op.Shift)); break;
+ case ShiftType.Asr: value = context.ShiftRightSI(value, Const(op.Shift)); break;
+ case ShiftType.Ror: value = context.RotateRight (value, Const(op.Shift)); break;
+ }
+
+ return value;
+ }
+
+ case IOpCodeAluRx op:
+ {
+ Operand value = GetExtendedM(context, op.Rm, op.IntType);
+
+ value = context.ShiftLeft(value, Const(op.Shift));
+
+ return value;
+ }
+
+ default: throw InvalidOpCodeType(context.CurrOp);
+ }
+ }
+
+ private static Exception InvalidOpCodeType(OpCode opCode)
+ {
+ return new InvalidOperationException($"Invalid OpCode type \"{opCode?.GetType().Name ?? "null"}\".");
+ }
+
+ // ARM32 helpers.
+ public static Operand GetMShiftedByImmediate(ArmEmitterContext context, IOpCode32AluRsImm op, bool setCarry)
+ {
+ Operand m = GetIntA32(context, op.Rm);
+
+ int shift = op.Immediate;
+
+ if (shift == 0)
+ {
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsr: shift = 32; break;
+ case ShiftType.Asr: shift = 32; break;
+ case ShiftType.Ror: shift = 1; break;
+ }
+ }
+
+ if (shift != 0)
+ {
+ setCarry &= ShouldSetFlags(context);
+
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsl: m = GetLslC(context, m, setCarry, shift); break;
+ case ShiftType.Lsr: m = GetLsrC(context, m, setCarry, shift); break;
+ case ShiftType.Asr: m = GetAsrC(context, m, setCarry, shift); break;
+ case ShiftType.Ror:
+ if (op.Immediate != 0)
+ {
+ m = GetRorC(context, m, setCarry, shift);
+ }
+ else
+ {
+ m = GetRrxC(context, m, setCarry);
+ }
+ break;
+ }
+ }
+
+ return m;
+ }
+
+ public static int DecodeImmShift(ShiftType shiftType, int shift)
+ {
+ if (shift == 0)
+ {
+ switch (shiftType)
+ {
+ case ShiftType.Lsr: shift = 32; break;
+ case ShiftType.Asr: shift = 32; break;
+ case ShiftType.Ror: shift = 1; break;
+ }
+ }
+
+ return shift;
+ }
+
+ public static Operand GetMShiftedByReg(ArmEmitterContext context, IOpCode32AluRsReg op, bool setCarry)
+ {
+ Operand m = GetIntA32(context, op.Rm);
+ Operand s = context.ZeroExtend8(OperandType.I32, GetIntA32(context, op.Rs));
+ Operand shiftIsZero = context.ICompareEqual(s, Const(0));
+
+ Operand zeroResult = m;
+ Operand shiftResult = m;
+
+ setCarry &= ShouldSetFlags(context);
+
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsl: shiftResult = EmitLslC(context, m, setCarry, s, shiftIsZero); break;
+ case ShiftType.Lsr: shiftResult = EmitLsrC(context, m, setCarry, s, shiftIsZero); break;
+ case ShiftType.Asr: shiftResult = EmitAsrC(context, m, setCarry, s, shiftIsZero); break;
+ case ShiftType.Ror: shiftResult = EmitRorC(context, m, setCarry, s, shiftIsZero); break;
+ }
+
+ return context.ConditionalSelect(shiftIsZero, zeroResult, shiftResult);
+ }
+
+ public static void EmitIfHelper(ArmEmitterContext context, Operand boolValue, Action action, bool expected = true)
+ {
+ Debug.Assert(boolValue.Type == OperandType.I32);
+
+ Operand endLabel = Label();
+
+ if (expected)
+ {
+ context.BranchIfFalse(endLabel, boolValue);
+ }
+ else
+ {
+ context.BranchIfTrue(endLabel, boolValue);
+ }
+
+ action();
+
+ context.MarkLabel(endLabel);
+ }
+
+ public static Operand EmitLslC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero)
+ {
+ Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32);
+
+ Operand shiftLarge = context.ICompareGreaterOrEqual(shift, Const(32));
+ Operand result = context.ShiftLeft(m, shift);
+ if (setCarry)
+ {
+ EmitIfHelper(context, shiftIsZero, () =>
+ {
+ Operand cOut = context.ShiftRightUI(m, context.Subtract(Const(32), shift));
+
+ cOut = context.BitwiseAnd(cOut, Const(1));
+ cOut = context.ConditionalSelect(context.ICompareGreater(shift, Const(32)), Const(0), cOut);
+
+ SetFlag(context, PState.CFlag, cOut);
+ }, false);
+ }
+
+ return context.ConditionalSelect(shiftLarge, Const(0), result);
+ }
+
+ public static Operand GetLslC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ if ((uint)shift > 32)
+ {
+ return GetShiftByMoreThan32(context, setCarry);
+ }
+ else if (shift == 32)
+ {
+ if (setCarry)
+ {
+ SetCarryMLsb(context, m);
+ }
+
+ return Const(0);
+ }
+ else
+ {
+ if (setCarry)
+ {
+ Operand cOut = context.ShiftRightUI(m, Const(32 - shift));
+
+ cOut = context.BitwiseAnd(cOut, Const(1));
+
+ SetFlag(context, PState.CFlag, cOut);
+ }
+
+ return context.ShiftLeft(m, Const(shift));
+ }
+ }
+
+ public static Operand EmitLsrC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero)
+ {
+ Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32);
+
+ Operand shiftLarge = context.ICompareGreaterOrEqual(shift, Const(32));
+ Operand result = context.ShiftRightUI(m, shift);
+ if (setCarry)
+ {
+ EmitIfHelper(context, shiftIsZero, () =>
+ {
+ Operand cOut = context.ShiftRightUI(m, context.Subtract(shift, Const(1)));
+
+ cOut = context.BitwiseAnd(cOut, Const(1));
+ cOut = context.ConditionalSelect(context.ICompareGreater(shift, Const(32)), Const(0), cOut);
+
+ SetFlag(context, PState.CFlag, cOut);
+ }, false);
+ }
+
+ return context.ConditionalSelect(shiftLarge, Const(0), result);
+ }
+
+ public static Operand GetLsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ if ((uint)shift > 32)
+ {
+ return GetShiftByMoreThan32(context, setCarry);
+ }
+ else if (shift == 32)
+ {
+ if (setCarry)
+ {
+ SetCarryMMsb(context, m);
+ }
+
+ return Const(0);
+ }
+ else
+ {
+ if (setCarry)
+ {
+ SetCarryMShrOut(context, m, shift);
+ }
+
+ return context.ShiftRightUI(m, Const(shift));
+ }
+ }
+
+ private static Operand GetShiftByMoreThan32(ArmEmitterContext context, bool setCarry)
+ {
+ if (setCarry)
+ {
+ SetFlag(context, PState.CFlag, Const(0));
+ }
+
+ return Const(0);
+ }
+
+ public static Operand EmitAsrC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero)
+ {
+ Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32);
+
+ Operand l32Result;
+ Operand ge32Result;
+
+ Operand less32 = context.ICompareLess(shift, Const(32));
+
+ ge32Result = context.ShiftRightSI(m, Const(31));
+
+ if (setCarry)
+ {
+ EmitIfHelper(context, context.BitwiseOr(less32, shiftIsZero), () =>
+ {
+ SetCarryMLsb(context, ge32Result);
+ }, false);
+ }
+
+ l32Result = context.ShiftRightSI(m, shift);
+ if (setCarry)
+ {
+ EmitIfHelper(context, context.BitwiseAnd(less32, context.BitwiseNot(shiftIsZero)), () =>
+ {
+ Operand cOut = context.ShiftRightUI(m, context.Subtract(shift, Const(1)));
+
+ cOut = context.BitwiseAnd(cOut, Const(1));
+
+ SetFlag(context, PState.CFlag, cOut);
+ });
+ }
+
+ return context.ConditionalSelect(less32, l32Result, ge32Result);
+ }
+
+ public static Operand GetAsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ if ((uint)shift >= 32)
+ {
+ m = context.ShiftRightSI(m, Const(31));
+
+ if (setCarry)
+ {
+ SetCarryMLsb(context, m);
+ }
+
+ return m;
+ }
+ else
+ {
+ if (setCarry)
+ {
+ SetCarryMShrOut(context, m, shift);
+ }
+
+ return context.ShiftRightSI(m, Const(shift));
+ }
+ }
+
+ public static Operand EmitRorC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero)
+ {
+ Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32);
+
+ shift = context.BitwiseAnd(shift, Const(0x1f));
+ m = context.RotateRight(m, shift);
+
+ if (setCarry)
+ {
+ EmitIfHelper(context, shiftIsZero, () =>
+ {
+ SetCarryMMsb(context, m);
+ }, false);
+ }
+
+ return m;
+ }
+
+ public static Operand GetRorC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ shift &= 0x1f;
+
+ m = context.RotateRight(m, Const(shift));
+
+ if (setCarry)
+ {
+ SetCarryMMsb(context, m);
+ }
+
+ return m;
+ }
+
+ public static Operand GetRrxC(ArmEmitterContext context, Operand m, bool setCarry)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ // Rotate right by 1 with carry.
+ Operand cIn = context.Copy(GetFlag(PState.CFlag));
+
+ if (setCarry)
+ {
+ SetCarryMLsb(context, m);
+ }
+
+ m = context.ShiftRightUI(m, Const(1));
+
+ m = context.BitwiseOr(m, context.ShiftLeft(cIn, Const(31)));
+
+ return m;
+ }
+
+ private static void SetCarryMLsb(ArmEmitterContext context, Operand m)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ SetFlag(context, PState.CFlag, context.BitwiseAnd(m, Const(1)));
+ }
+
+ private static void SetCarryMMsb(ArmEmitterContext context, Operand m)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ SetFlag(context, PState.CFlag, context.ShiftRightUI(m, Const(31)));
+ }
+
+ private static void SetCarryMShrOut(ArmEmitterContext context, Operand m, int shift)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ Operand cOut = context.ShiftRightUI(m, Const(shift - 1));
+
+ cOut = context.BitwiseAnd(cOut, Const(1));
+
+ SetFlag(context, PState.CFlag, cOut);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitBfm.cs b/src/ARMeilleure/Instructions/InstEmitBfm.cs
new file mode 100644
index 00000000..46a7dddd
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitBfm.cs
@@ -0,0 +1,196 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Bfm(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ Operand d = GetIntOrZR(context, op.Rd);
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ Operand res;
+
+ if (op.Pos < op.Shift)
+ {
+ // BFI.
+ int shift = op.GetBitsCount() - op.Shift;
+
+ int width = op.Pos + 1;
+
+ long mask = (long)(ulong.MaxValue >> (64 - width));
+
+ res = context.ShiftLeft(context.BitwiseAnd(n, Const(n.Type, mask)), Const(shift));
+
+ res = context.BitwiseOr(res, context.BitwiseAnd(d, Const(d.Type, ~(mask << shift))));
+ }
+ else
+ {
+ // BFXIL.
+ int shift = op.Shift;
+
+ int width = op.Pos - shift + 1;
+
+ long mask = (long)(ulong.MaxValue >> (64 - width));
+
+ res = context.BitwiseAnd(context.ShiftRightUI(n, Const(shift)), Const(n.Type, mask));
+
+ res = context.BitwiseOr(res, context.BitwiseAnd(d, Const(d.Type, ~mask)));
+ }
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ public static void Sbfm(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ int bitsCount = op.GetBitsCount();
+
+ if (op.Pos + 1 == bitsCount)
+ {
+ EmitSbfmShift(context);
+ }
+ else if (op.Pos < op.Shift)
+ {
+ EmitSbfiz(context);
+ }
+ else if (op.Pos == 7 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.SignExtend8(n.Type, n));
+ }
+ else if (op.Pos == 15 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.SignExtend16(n.Type, n));
+ }
+ else if (op.Pos == 31 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.SignExtend32(n.Type, n));
+ }
+ else
+ {
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = context.ShiftLeft (res, Const(bitsCount - 1 - op.Pos));
+ res = context.ShiftRightSI(res, Const(bitsCount - 1));
+ res = context.BitwiseAnd (res, Const(res.Type, ~op.TMask));
+
+ Operand n2 = GetBfmN(context);
+
+ SetIntOrZR(context, op.Rd, context.BitwiseOr(res, n2));
+ }
+ }
+
+ public static void Ubfm(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ if (op.Pos + 1 == op.GetBitsCount())
+ {
+ EmitUbfmShift(context);
+ }
+ else if (op.Pos < op.Shift)
+ {
+ EmitUbfiz(context);
+ }
+ else if (op.Pos + 1 == op.Shift)
+ {
+ EmitBfmLsl(context);
+ }
+ else if (op.Pos == 7 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.BitwiseAnd(n, Const(n.Type, 0xff)));
+ }
+ else if (op.Pos == 15 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.BitwiseAnd(n, Const(n.Type, 0xffff)));
+ }
+ else
+ {
+ SetIntOrZR(context, op.Rd, GetBfmN(context));
+ }
+ }
+
+ private static void EmitSbfiz(ArmEmitterContext context) => EmitBfiz(context, signed: true);
+ private static void EmitUbfiz(ArmEmitterContext context) => EmitBfiz(context, signed: false);
+
+ private static void EmitBfiz(ArmEmitterContext context, bool signed)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ int width = op.Pos + 1;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = context.ShiftLeft(res, Const(op.GetBitsCount() - width));
+
+ res = signed
+ ? context.ShiftRightSI(res, Const(op.Shift - width))
+ : context.ShiftRightUI(res, Const(op.Shift - width));
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ private static void EmitSbfmShift(ArmEmitterContext context)
+ {
+ EmitBfmShift(context, signed: true);
+ }
+
+ private static void EmitUbfmShift(ArmEmitterContext context)
+ {
+ EmitBfmShift(context, signed: false);
+ }
+
+ private static void EmitBfmShift(ArmEmitterContext context, bool signed)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = signed
+ ? context.ShiftRightSI(res, Const(op.Shift))
+ : context.ShiftRightUI(res, Const(op.Shift));
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ private static void EmitBfmLsl(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ int shift = op.GetBitsCount() - op.Shift;
+
+ SetIntOrZR(context, op.Rd, context.ShiftLeft(res, Const(shift)));
+ }
+
+ private static Operand GetBfmN(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ long mask = op.WMask & op.TMask;
+
+ return context.BitwiseAnd(context.RotateRight(res, Const(op.Shift)), Const(res.Type, mask));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitCcmp.cs b/src/ARMeilleure/Instructions/InstEmitCcmp.cs
new file mode 100644
index 00000000..7f0beb6c
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitCcmp.cs
@@ -0,0 +1,61 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitAluHelper;
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Ccmn(ArmEmitterContext context) => EmitCcmp(context, isNegated: true);
+ public static void Ccmp(ArmEmitterContext context) => EmitCcmp(context, isNegated: false);
+
+ private static void EmitCcmp(ArmEmitterContext context, bool isNegated)
+ {
+ OpCodeCcmp op = (OpCodeCcmp)context.CurrOp;
+
+ Operand lblTrue = Label();
+ Operand lblEnd = Label();
+
+ EmitCondBranch(context, lblTrue, op.Cond);
+
+ SetFlag(context, PState.VFlag, Const((op.Nzcv >> 0) & 1));
+ SetFlag(context, PState.CFlag, Const((op.Nzcv >> 1) & 1));
+ SetFlag(context, PState.ZFlag, Const((op.Nzcv >> 2) & 1));
+ SetFlag(context, PState.NFlag, Const((op.Nzcv >> 3) & 1));
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblTrue);
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ if (isNegated)
+ {
+ Operand d = context.Add(n, m);
+
+ EmitNZFlagsCheck(context, d);
+
+ EmitAddsCCheck(context, n, d);
+ EmitAddsVCheck(context, n, m, d);
+ }
+ else
+ {
+ Operand d = context.Subtract(n, m);
+
+ EmitNZFlagsCheck(context, d);
+
+ EmitSubsCCheck(context, n, m);
+ EmitSubsVCheck(context, n, m, d);
+ }
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitCsel.cs b/src/ARMeilleure/Instructions/InstEmitCsel.cs
new file mode 100644
index 00000000..926b9a9e
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitCsel.cs
@@ -0,0 +1,53 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ private enum CselOperation
+ {
+ None,
+ Increment,
+ Invert,
+ Negate
+ }
+
+ public static void Csel(ArmEmitterContext context) => EmitCsel(context, CselOperation.None);
+ public static void Csinc(ArmEmitterContext context) => EmitCsel(context, CselOperation.Increment);
+ public static void Csinv(ArmEmitterContext context) => EmitCsel(context, CselOperation.Invert);
+ public static void Csneg(ArmEmitterContext context) => EmitCsel(context, CselOperation.Negate);
+
+ private static void EmitCsel(ArmEmitterContext context, CselOperation cselOp)
+ {
+ OpCodeCsel op = (OpCodeCsel)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ if (cselOp == CselOperation.Increment)
+ {
+ m = context.Add(m, Const(m.Type, 1));
+ }
+ else if (cselOp == CselOperation.Invert)
+ {
+ m = context.BitwiseNot(m);
+ }
+ else if (cselOp == CselOperation.Negate)
+ {
+ m = context.Negate(m);
+ }
+
+ Operand condTrue = GetCondTrue(context, op.Cond);
+
+ Operand d = context.ConditionalSelect(condTrue, n, m);
+
+ SetIntOrZR(context, op.Rd, d);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitDiv.cs b/src/ARMeilleure/Instructions/InstEmitDiv.cs
new file mode 100644
index 00000000..39a5c32e
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitDiv.cs
@@ -0,0 +1,67 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Sdiv(ArmEmitterContext context) => EmitDiv(context, unsigned: false);
+ public static void Udiv(ArmEmitterContext context) => EmitDiv(context, unsigned: true);
+
+ private static void EmitDiv(ArmEmitterContext context, bool unsigned)
+ {
+ OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp;
+
+ // If Rm == 0, Rd = 0 (division by zero).
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand divisorIsZero = context.ICompareEqual(m, Const(m.Type, 0));
+
+ Operand lblBadDiv = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBadDiv, divisorIsZero);
+
+ if (!unsigned)
+ {
+ // If Rn == INT_MIN && Rm == -1, Rd = INT_MIN (overflow).
+ bool is32Bits = op.RegisterSize == RegisterSize.Int32;
+
+ Operand intMin = is32Bits ? Const(int.MinValue) : Const(long.MinValue);
+ Operand minus1 = is32Bits ? Const(-1) : Const(-1L);
+
+ Operand nIsIntMin = context.ICompareEqual(n, intMin);
+ Operand mIsMinus1 = context.ICompareEqual(m, minus1);
+
+ Operand lblGoodDiv = Label();
+
+ context.BranchIfFalse(lblGoodDiv, context.BitwiseAnd(nIsIntMin, mIsMinus1));
+
+ SetAluDOrZR(context, intMin);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblGoodDiv);
+ }
+
+ Operand d = unsigned
+ ? context.DivideUI(n, m)
+ : context.Divide (n, m);
+
+ SetAluDOrZR(context, d);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBadDiv);
+
+ SetAluDOrZR(context, Const(op.GetOperandType(), 0));
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitException.cs b/src/ARMeilleure/Instructions/InstEmitException.cs
new file mode 100644
index 00000000..0baaa87d
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitException.cs
@@ -0,0 +1,55 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Brk(ArmEmitterContext context)
+ {
+ OpCodeException op = (OpCodeException)context.CurrOp;
+
+ string name = nameof(NativeInterface.Break);
+
+ context.StoreToContext();
+
+ context.Call(typeof(NativeInterface).GetMethod(name), Const(op.Address), Const(op.Id));
+
+ context.LoadFromContext();
+
+ context.Return(Const(op.Address));
+ }
+
+ public static void Svc(ArmEmitterContext context)
+ {
+ OpCodeException op = (OpCodeException)context.CurrOp;
+
+ string name = nameof(NativeInterface.SupervisorCall);
+
+ context.StoreToContext();
+
+ context.Call(typeof(NativeInterface).GetMethod(name), Const(op.Address), Const(op.Id));
+
+ context.LoadFromContext();
+
+ Translator.EmitSynchronization(context);
+ }
+
+ public static void Und(ArmEmitterContext context)
+ {
+ OpCode op = context.CurrOp;
+
+ string name = nameof(NativeInterface.Undefined);
+
+ context.StoreToContext();
+
+ context.Call(typeof(NativeInterface).GetMethod(name), Const(op.Address), Const(op.RawOpCode));
+
+ context.LoadFromContext();
+
+ context.Return(Const(op.Address));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitException32.cs b/src/ARMeilleure/Instructions/InstEmitException32.cs
new file mode 100644
index 00000000..ec0c32bf
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitException32.cs
@@ -0,0 +1,39 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.Translation;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Svc(ArmEmitterContext context)
+ {
+ IOpCode32Exception op = (IOpCode32Exception)context.CurrOp;
+
+ string name = nameof(NativeInterface.SupervisorCall);
+
+ context.StoreToContext();
+
+ context.Call(typeof(NativeInterface).GetMethod(name), Const(((IOpCode)op).Address), Const(op.Id));
+
+ context.LoadFromContext();
+
+ Translator.EmitSynchronization(context);
+ }
+
+ public static void Trap(ArmEmitterContext context)
+ {
+ IOpCode32Exception op = (IOpCode32Exception)context.CurrOp;
+
+ string name = nameof(NativeInterface.Break);
+
+ context.StoreToContext();
+
+ context.Call(typeof(NativeInterface).GetMethod(name), Const(((IOpCode)op).Address), Const(op.Id));
+
+ context.LoadFromContext();
+
+ context.Return(Const(context.CurrOp.Address));
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitFlow.cs b/src/ARMeilleure/Instructions/InstEmitFlow.cs
new file mode 100644
index 00000000..c40eb55c
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitFlow.cs
@@ -0,0 +1,107 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void B(ArmEmitterContext context)
+ {
+ OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp;
+
+ context.Branch(context.GetLabel((ulong)op.Immediate));
+ }
+
+ public static void B_Cond(ArmEmitterContext context)
+ {
+ OpCodeBImmCond op = (OpCodeBImmCond)context.CurrOp;
+
+ EmitBranch(context, op.Cond);
+ }
+
+ public static void Bl(ArmEmitterContext context)
+ {
+ OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp;
+
+ context.Copy(GetIntOrZR(context, RegisterAlias.Lr), Const(op.Address + 4));
+
+ EmitCall(context, (ulong)op.Immediate);
+ }
+
+ public static void Blr(ArmEmitterContext context)
+ {
+ OpCodeBReg op = (OpCodeBReg)context.CurrOp;
+
+ Operand n = context.Copy(GetIntOrZR(context, op.Rn));
+
+ context.Copy(GetIntOrZR(context, RegisterAlias.Lr), Const(op.Address + 4));
+
+ EmitVirtualCall(context, n);
+ }
+
+ public static void Br(ArmEmitterContext context)
+ {
+ OpCodeBReg op = (OpCodeBReg)context.CurrOp;
+
+ EmitVirtualJump(context, GetIntOrZR(context, op.Rn), op.Rn == RegisterAlias.Lr);
+ }
+
+ public static void Cbnz(ArmEmitterContext context) => EmitCb(context, onNotZero: true);
+ public static void Cbz(ArmEmitterContext context) => EmitCb(context, onNotZero: false);
+
+ private static void EmitCb(ArmEmitterContext context, bool onNotZero)
+ {
+ OpCodeBImmCmp op = (OpCodeBImmCmp)context.CurrOp;
+
+ EmitBranch(context, GetIntOrZR(context, op.Rt), onNotZero);
+ }
+
+ public static void Ret(ArmEmitterContext context)
+ {
+ OpCodeBReg op = (OpCodeBReg)context.CurrOp;
+
+ context.Return(GetIntOrZR(context, op.Rn));
+ }
+
+ public static void Tbnz(ArmEmitterContext context) => EmitTb(context, onNotZero: true);
+ public static void Tbz(ArmEmitterContext context) => EmitTb(context, onNotZero: false);
+
+ private static void EmitTb(ArmEmitterContext context, bool onNotZero)
+ {
+ OpCodeBImmTest op = (OpCodeBImmTest)context.CurrOp;
+
+ Operand value = context.BitwiseAnd(GetIntOrZR(context, op.Rt), Const(1L << op.Bit));
+
+ EmitBranch(context, value, onNotZero);
+ }
+
+ private static void EmitBranch(ArmEmitterContext context, Condition cond)
+ {
+ OpCodeBImm op = (OpCodeBImm)context.CurrOp;
+
+ EmitCondBranch(context, context.GetLabel((ulong)op.Immediate), cond);
+ }
+
+ private static void EmitBranch(ArmEmitterContext context, Operand value, bool onNotZero)
+ {
+ OpCodeBImm op = (OpCodeBImm)context.CurrOp;
+
+ Operand lblTarget = context.GetLabel((ulong)op.Immediate);
+
+ if (onNotZero)
+ {
+ context.BranchIfTrue(lblTarget, value);
+ }
+ else
+ {
+ context.BranchIfFalse(lblTarget, value);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitFlow32.cs b/src/ARMeilleure/Instructions/InstEmitFlow32.cs
new file mode 100644
index 00000000..3a7707ee
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitFlow32.cs
@@ -0,0 +1,136 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void B(ArmEmitterContext context)
+ {
+ IOpCode32BImm op = (IOpCode32BImm)context.CurrOp;
+
+ context.Branch(context.GetLabel((ulong)op.Immediate));
+ }
+
+ public static void Bl(ArmEmitterContext context)
+ {
+ Blx(context, x: false);
+ }
+
+ public static void Blx(ArmEmitterContext context)
+ {
+ Blx(context, x: true);
+ }
+
+ private static void Blx(ArmEmitterContext context, bool x)
+ {
+ IOpCode32BImm op = (IOpCode32BImm)context.CurrOp;
+
+ uint pc = op.GetPc();
+
+ bool isThumb = ((OpCode32)context.CurrOp).IsThumb;
+
+ uint currentPc = isThumb
+ ? pc | 1
+ : pc - 4;
+
+ SetIntA32(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc));
+
+ // If x is true, then this is a branch with link and exchange.
+ // In this case we need to swap the mode between Arm <-> Thumb.
+ if (x)
+ {
+ SetFlag(context, PState.TFlag, Const(isThumb ? 0 : 1));
+ }
+
+ EmitCall(context, (ulong)op.Immediate);
+ }
+
+ public static void Blxr(ArmEmitterContext context)
+ {
+ IOpCode32BReg op = (IOpCode32BReg)context.CurrOp;
+
+ uint pc = op.GetPc();
+
+ Operand addr = context.Copy(GetIntA32(context, op.Rm));
+ Operand bitOne = context.BitwiseAnd(addr, Const(1));
+
+ bool isThumb = ((OpCode32)context.CurrOp).IsThumb;
+
+ uint currentPc = isThumb
+ ? (pc - 2) | 1
+ : pc - 4;
+
+ SetIntA32(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc));
+
+ SetFlag(context, PState.TFlag, bitOne);
+
+ EmitBxWritePc(context, addr);
+ }
+
+ public static void Bx(ArmEmitterContext context)
+ {
+ IOpCode32BReg op = (IOpCode32BReg)context.CurrOp;
+
+ EmitBxWritePc(context, GetIntA32(context, op.Rm), op.Rm);
+ }
+
+ public static void Cbnz(ArmEmitterContext context) => EmitCb(context, onNotZero: true);
+ public static void Cbz(ArmEmitterContext context) => EmitCb(context, onNotZero: false);
+
+ private static void EmitCb(ArmEmitterContext context, bool onNotZero)
+ {
+ OpCodeT16BImmCmp op = (OpCodeT16BImmCmp)context.CurrOp;
+
+ Operand value = GetIntA32(context, op.Rn);
+ Operand lblTarget = context.GetLabel((ulong)op.Immediate);
+
+ if (onNotZero)
+ {
+ context.BranchIfTrue(lblTarget, value);
+ }
+ else
+ {
+ context.BranchIfFalse(lblTarget, value);
+ }
+ }
+
+ public static void It(ArmEmitterContext context)
+ {
+ OpCodeT16IfThen op = (OpCodeT16IfThen)context.CurrOp;
+
+ context.SetIfThenBlockState(op.IfThenBlockConds);
+ }
+
+ public static void Tbb(ArmEmitterContext context) => EmitTb(context, halfword: false);
+ public static void Tbh(ArmEmitterContext context) => EmitTb(context, halfword: true);
+
+ private static void EmitTb(ArmEmitterContext context, bool halfword)
+ {
+ OpCodeT32Tb op = (OpCodeT32Tb)context.CurrOp;
+
+ Operand halfwords;
+
+ if (halfword)
+ {
+ Operand address = context.Add(GetIntA32(context, op.Rn), context.ShiftLeft(GetIntA32(context, op.Rm), Const(1)));
+ halfwords = InstEmitMemoryHelper.EmitReadInt(context, address, 1);
+ }
+ else
+ {
+ Operand address = context.Add(GetIntA32(context, op.Rn), GetIntA32(context, op.Rm));
+ halfwords = InstEmitMemoryHelper.EmitReadIntAligned(context, address, 0);
+ }
+
+ Operand targetAddress = context.Add(Const((int)op.GetPc()), context.ShiftLeft(halfwords, Const(1)));
+
+ EmitVirtualJump(context, targetAddress, isReturn: false);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs b/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs
new file mode 100644
index 00000000..6ac32908
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs
@@ -0,0 +1,240 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using ARMeilleure.Translation.PTC;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitFlowHelper
+ {
+ public static void EmitCondBranch(ArmEmitterContext context, Operand target, Condition cond)
+ {
+ if (cond != Condition.Al)
+ {
+ context.BranchIfTrue(target, GetCondTrue(context, cond));
+ }
+ else
+ {
+ context.Branch(target);
+ }
+ }
+
+ public static Operand GetCondTrue(ArmEmitterContext context, Condition condition)
+ {
+ Operand cmpResult = context.TryGetComparisonResult(condition);
+
+ if (cmpResult != default)
+ {
+ return cmpResult;
+ }
+
+ Operand value = Const(1);
+
+ Operand Inverse(Operand val)
+ {
+ return context.BitwiseExclusiveOr(val, Const(1));
+ }
+
+ switch (condition)
+ {
+ case Condition.Eq:
+ value = GetFlag(PState.ZFlag);
+ break;
+
+ case Condition.Ne:
+ value = Inverse(GetFlag(PState.ZFlag));
+ break;
+
+ case Condition.GeUn:
+ value = GetFlag(PState.CFlag);
+ break;
+
+ case Condition.LtUn:
+ value = Inverse(GetFlag(PState.CFlag));
+ break;
+
+ case Condition.Mi:
+ value = GetFlag(PState.NFlag);
+ break;
+
+ case Condition.Pl:
+ value = Inverse(GetFlag(PState.NFlag));
+ break;
+
+ case Condition.Vs:
+ value = GetFlag(PState.VFlag);
+ break;
+
+ case Condition.Vc:
+ value = Inverse(GetFlag(PState.VFlag));
+ break;
+
+ case Condition.GtUn:
+ {
+ Operand c = GetFlag(PState.CFlag);
+ Operand z = GetFlag(PState.ZFlag);
+
+ value = context.BitwiseAnd(c, Inverse(z));
+
+ break;
+ }
+
+ case Condition.LeUn:
+ {
+ Operand c = GetFlag(PState.CFlag);
+ Operand z = GetFlag(PState.ZFlag);
+
+ value = context.BitwiseOr(Inverse(c), z);
+
+ break;
+ }
+
+ case Condition.Ge:
+ {
+ Operand n = GetFlag(PState.NFlag);
+ Operand v = GetFlag(PState.VFlag);
+
+ value = context.ICompareEqual(n, v);
+
+ break;
+ }
+
+ case Condition.Lt:
+ {
+ Operand n = GetFlag(PState.NFlag);
+ Operand v = GetFlag(PState.VFlag);
+
+ value = context.ICompareNotEqual(n, v);
+
+ break;
+ }
+
+ case Condition.Gt:
+ {
+ Operand n = GetFlag(PState.NFlag);
+ Operand z = GetFlag(PState.ZFlag);
+ Operand v = GetFlag(PState.VFlag);
+
+ value = context.BitwiseAnd(Inverse(z), context.ICompareEqual(n, v));
+
+ break;
+ }
+
+ case Condition.Le:
+ {
+ Operand n = GetFlag(PState.NFlag);
+ Operand z = GetFlag(PState.ZFlag);
+ Operand v = GetFlag(PState.VFlag);
+
+ value = context.BitwiseOr(z, context.ICompareNotEqual(n, v));
+
+ break;
+ }
+ }
+
+ return value;
+ }
+
+ public static void EmitCall(ArmEmitterContext context, ulong immediate)
+ {
+ bool isRecursive = immediate == context.EntryAddress;
+
+ if (isRecursive)
+ {
+ context.Branch(context.GetLabel(immediate));
+ }
+ else
+ {
+ EmitTableBranch(context, Const(immediate), isJump: false);
+ }
+ }
+
+ public static void EmitVirtualCall(ArmEmitterContext context, Operand target)
+ {
+ EmitTableBranch(context, target, isJump: false);
+ }
+
+ public static void EmitVirtualJump(ArmEmitterContext context, Operand target, bool isReturn)
+ {
+ if (isReturn)
+ {
+ if (target.Type == OperandType.I32)
+ {
+ target = context.ZeroExtend32(OperandType.I64, target);
+ }
+
+ context.Return(target);
+ }
+ else
+ {
+ EmitTableBranch(context, target, isJump: true);
+ }
+ }
+
+ private static void EmitTableBranch(ArmEmitterContext context, Operand guestAddress, bool isJump)
+ {
+ context.StoreToContext();
+
+ if (guestAddress.Type == OperandType.I32)
+ {
+ guestAddress = context.ZeroExtend32(OperandType.I64, guestAddress);
+ }
+
+ // Store the target guest address into the native context. The stubs uses this address to dispatch into the
+ // next translation.
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+ Operand dispAddressAddr = context.Add(nativeContext, Const((ulong)NativeContext.GetDispatchAddressOffset()));
+ context.Store(dispAddressAddr, guestAddress);
+
+ Operand hostAddress;
+
+ // If address is mapped onto the function table, we can skip the table walk. Otherwise we fallback
+ // onto the dispatch stub.
+ if (guestAddress.Kind == OperandKind.Constant && context.FunctionTable.IsValid(guestAddress.Value))
+ {
+ Operand hostAddressAddr = !context.HasPtc ?
+ Const(ref context.FunctionTable.GetValue(guestAddress.Value)) :
+ Const(ref context.FunctionTable.GetValue(guestAddress.Value), new Symbol(SymbolType.FunctionTable, guestAddress.Value));
+
+ hostAddress = context.Load(OperandType.I64, hostAddressAddr);
+ }
+ else
+ {
+ hostAddress = !context.HasPtc ?
+ Const((long)context.Stubs.DispatchStub) :
+ Const((long)context.Stubs.DispatchStub, Ptc.DispatchStubSymbol);
+ }
+
+ if (isJump)
+ {
+ context.Tailcall(hostAddress, nativeContext);
+ }
+ else
+ {
+ OpCode op = context.CurrOp;
+
+ Operand returnAddress = context.Call(hostAddress, OperandType.I64, nativeContext);
+
+ context.LoadFromContext();
+
+ // Note: The return value of a translated function is always an Int64 with the address execution has
+ // returned to. We expect this address to be immediately after the current instruction, if it isn't we
+ // keep returning until we reach the dispatcher.
+ Operand nextAddr = Const((long)op.Address + op.OpCodeSizeInBytes);
+
+ // Try to continue within this block.
+ // If the return address isn't to our next instruction, we need to return so the JIT can figure out
+ // what to do.
+ Operand lblContinue = context.GetLabel(nextAddr.Value);
+ context.BranchIf(lblContinue, returnAddress, nextAddr, Comparison.Equal, BasicBlockFrequency.Cold);
+
+ context.Return(returnAddress);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitHash.cs b/src/ARMeilleure/Instructions/InstEmitHash.cs
new file mode 100644
index 00000000..82b3e353
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitHash.cs
@@ -0,0 +1,69 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHashHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ private const int ByteSizeLog2 = 0;
+ private const int HWordSizeLog2 = 1;
+ private const int WordSizeLog2 = 2;
+ private const int DWordSizeLog2 = 3;
+
+ public static void Crc32b(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, ByteSizeLog2, false);
+ }
+
+ public static void Crc32h(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, HWordSizeLog2, false);
+ }
+
+ public static void Crc32w(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, WordSizeLog2, false);
+ }
+
+ public static void Crc32x(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, DWordSizeLog2, false);
+ }
+
+ public static void Crc32cb(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, ByteSizeLog2, true);
+ }
+
+ public static void Crc32ch(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, HWordSizeLog2, true);
+ }
+
+ public static void Crc32cw(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, WordSizeLog2, true);
+ }
+
+ public static void Crc32cx(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, DWordSizeLog2, true);
+ }
+
+ private static void EmitCrc32Call(ArmEmitterContext context, int size, bool c)
+ {
+ OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand d = EmitCrc32(context, n, m, size, c);
+
+ SetIntOrZR(context, op.Rd, d);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitHash32.cs b/src/ARMeilleure/Instructions/InstEmitHash32.cs
new file mode 100644
index 00000000..5d39f8af
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitHash32.cs
@@ -0,0 +1,53 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using static ARMeilleure.Instructions.InstEmitHashHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Crc32b(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, ByteSizeLog2, false);
+ }
+
+ public static void Crc32h(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, HWordSizeLog2, false);
+ }
+
+ public static void Crc32w(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, WordSizeLog2, false);
+ }
+
+ public static void Crc32cb(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, ByteSizeLog2, true);
+ }
+
+ public static void Crc32ch(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, HWordSizeLog2, true);
+ }
+
+ public static void Crc32cw(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, WordSizeLog2, true);
+ }
+
+ private static void EmitCrc32Call(ArmEmitterContext context, int size, bool c)
+ {
+ IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+
+ Operand d = EmitCrc32(context, n, m, size, c);
+
+ EmitAluStore(context, d);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitHashHelper.cs b/src/ARMeilleure/Instructions/InstEmitHashHelper.cs
new file mode 100644
index 00000000..55a03a4f
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitHashHelper.cs
@@ -0,0 +1,118 @@
+// https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
+
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitHashHelper
+ {
+ public const uint Crc32RevPoly = 0xedb88320;
+ public const uint Crc32cRevPoly = 0x82f63b78;
+
+ public static Operand EmitCrc32(ArmEmitterContext context, Operand crc, Operand value, int size, bool castagnoli)
+ {
+ Debug.Assert(crc.Type.IsInteger() && value.Type.IsInteger());
+ Debug.Assert(size >= 0 && size < 4);
+ Debug.Assert((size < 3) || (value.Type == OperandType.I64));
+
+ if (castagnoli && Optimizations.UseSse42)
+ {
+ // The CRC32 instruction does not have an immediate variant, so ensure both inputs are in registers.
+ value = (value.Kind == OperandKind.Constant) ? context.Copy(value) : value;
+ crc = (crc.Kind == OperandKind.Constant) ? context.Copy(crc) : crc;
+
+ Intrinsic op = size switch
+ {
+ 0 => Intrinsic.X86Crc32_8,
+ 1 => Intrinsic.X86Crc32_16,
+ _ => Intrinsic.X86Crc32,
+ };
+
+ return (size == 3) ? context.ConvertI64ToI32(context.AddIntrinsicLong(op, crc, value)) : context.AddIntrinsicInt(op, crc, value);
+ }
+ else if (Optimizations.UsePclmulqdq)
+ {
+ return size switch
+ {
+ 3 => EmitCrc32Optimized64(context, crc, value, castagnoli),
+ _ => EmitCrc32Optimized(context, crc, value, castagnoli, size),
+ };
+ }
+ else
+ {
+ string name = (size, castagnoli) switch
+ {
+ (0, false) => nameof(SoftFallback.Crc32b),
+ (1, false) => nameof(SoftFallback.Crc32h),
+ (2, false) => nameof(SoftFallback.Crc32w),
+ (3, false) => nameof(SoftFallback.Crc32x),
+ (0, true) => nameof(SoftFallback.Crc32cb),
+ (1, true) => nameof(SoftFallback.Crc32ch),
+ (2, true) => nameof(SoftFallback.Crc32cw),
+ (3, true) => nameof(SoftFallback.Crc32cx),
+ _ => throw new ArgumentOutOfRangeException(nameof(size))
+ };
+
+ return context.Call(typeof(SoftFallback).GetMethod(name), crc, value);
+ }
+ }
+
+ private static Operand EmitCrc32Optimized(ArmEmitterContext context, Operand crc, Operand data, bool castagnoli, int size)
+ {
+ long mu = castagnoli ? 0x0DEA713F1 : 0x1F7011641; // mu' = floor(x^64/P(x))'
+ long polynomial = castagnoli ? 0x105EC76F0 : 0x1DB710641; // P'(x) << 1
+
+ crc = context.VectorInsert(context.VectorZero(), crc, 0);
+
+ switch (size)
+ {
+ case 0: data = context.VectorInsert8(context.VectorZero(), data, 0); break;
+ case 1: data = context.VectorInsert16(context.VectorZero(), data, 0); break;
+ case 2: data = context.VectorInsert(context.VectorZero(), data, 0); break;
+ }
+
+ int bitsize = 8 << size;
+
+ Operand tmp = context.AddIntrinsic(Intrinsic.X86Pxor, crc, data);
+ tmp = context.AddIntrinsic(Intrinsic.X86Psllq, tmp, Const(64 - bitsize));
+ tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, mu), Const(0));
+ tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));
+
+ if (bitsize < 32)
+ {
+ crc = context.AddIntrinsic(Intrinsic.X86Pslldq, crc, Const((64 - bitsize) / 8));
+ tmp = context.AddIntrinsic(Intrinsic.X86Pxor, tmp, crc);
+ }
+
+ return context.VectorExtract(OperandType.I32, tmp, 2);
+ }
+
+ private static Operand EmitCrc32Optimized64(ArmEmitterContext context, Operand crc, Operand data, bool castagnoli)
+ {
+ long mu = castagnoli ? 0x0DEA713F1 : 0x1F7011641; // mu' = floor(x^64/P(x))'
+ long polynomial = castagnoli ? 0x105EC76F0 : 0x1DB710641; // P'(x) << 1
+
+ crc = context.VectorInsert(context.VectorZero(), crc, 0);
+ data = context.VectorInsert(context.VectorZero(), data, 0);
+
+ Operand tmp = context.AddIntrinsic(Intrinsic.X86Pxor, crc, data);
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pslldq, tmp, Const(4));
+
+ tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, res, X86GetScalar(context, mu), Const(0));
+ tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));
+
+ tmp = context.AddIntrinsic(Intrinsic.X86Pxor, tmp, res);
+ tmp = context.AddIntrinsic(Intrinsic.X86Psllq, tmp, Const(32));
+
+ tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, mu), Const(1));
+ tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));
+
+ return context.VectorExtract(OperandType.I32, tmp, 2);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitHelper.cs b/src/ARMeilleure/Instructions/InstEmitHelper.cs
new file mode 100644
index 00000000..a22bb3fb
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitHelper.cs
@@ -0,0 +1,264 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitHelper
+ {
+ public static Operand GetExtendedM(ArmEmitterContext context, int rm, IntType type)
+ {
+ Operand value = GetIntOrZR(context, rm);
+
+ switch (type)
+ {
+ case IntType.UInt8: value = context.ZeroExtend8 (value.Type, value); break;
+ case IntType.UInt16: value = context.ZeroExtend16(value.Type, value); break;
+ case IntType.UInt32: value = context.ZeroExtend32(value.Type, value); break;
+
+ case IntType.Int8: value = context.SignExtend8 (value.Type, value); break;
+ case IntType.Int16: value = context.SignExtend16(value.Type, value); break;
+ case IntType.Int32: value = context.SignExtend32(value.Type, value); break;
+ }
+
+ return value;
+ }
+
+ public static Operand GetIntA32(ArmEmitterContext context, int regIndex)
+ {
+ if (regIndex == RegisterAlias.Aarch32Pc)
+ {
+ OpCode32 op = (OpCode32)context.CurrOp;
+
+ return Const((int)op.GetPc());
+ }
+ else
+ {
+ return Register(GetRegisterAlias(context.Mode, regIndex), RegisterType.Integer, OperandType.I32);
+ }
+ }
+
+ public static Operand GetIntA32AlignedPC(ArmEmitterContext context, int regIndex)
+ {
+ if (regIndex == RegisterAlias.Aarch32Pc)
+ {
+ OpCode32 op = (OpCode32)context.CurrOp;
+
+ return Const((int)(op.GetPc() & 0xfffffffc));
+ }
+ else
+ {
+ return Register(GetRegisterAlias(context.Mode, regIndex), RegisterType.Integer, OperandType.I32);
+ }
+ }
+
+ public static Operand GetVecA32(int regIndex)
+ {
+ return Register(regIndex, RegisterType.Vector, OperandType.V128);
+ }
+
+ public static void SetIntA32(ArmEmitterContext context, int regIndex, Operand value)
+ {
+ if (regIndex == RegisterAlias.Aarch32Pc)
+ {
+ if (!IsA32Return(context))
+ {
+ context.StoreToContext();
+ }
+
+ EmitBxWritePc(context, value);
+ }
+ else
+ {
+ if (value.Type == OperandType.I64)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+ Operand reg = Register(GetRegisterAlias(context.Mode, regIndex), RegisterType.Integer, OperandType.I32);
+
+ context.Copy(reg, value);
+ }
+ }
+
+ public static int GetRegisterAlias(Aarch32Mode mode, int regIndex)
+ {
+ // Only registers >= 8 are banked,
+ // with registers in the range [8, 12] being
+ // banked for the FIQ mode, and registers
+ // 13 and 14 being banked for all modes.
+ if ((uint)regIndex < 8)
+ {
+ return regIndex;
+ }
+
+ return GetBankedRegisterAlias(mode, regIndex);
+ }
+
+ public static int GetBankedRegisterAlias(Aarch32Mode mode, int regIndex)
+ {
+ switch (regIndex)
+ {
+ case 8: return mode == Aarch32Mode.Fiq
+ ? RegisterAlias.R8Fiq
+ : RegisterAlias.R8Usr;
+
+ case 9: return mode == Aarch32Mode.Fiq
+ ? RegisterAlias.R9Fiq
+ : RegisterAlias.R9Usr;
+
+ case 10: return mode == Aarch32Mode.Fiq
+ ? RegisterAlias.R10Fiq
+ : RegisterAlias.R10Usr;
+
+ case 11: return mode == Aarch32Mode.Fiq
+ ? RegisterAlias.R11Fiq
+ : RegisterAlias.R11Usr;
+
+ case 12: return mode == Aarch32Mode.Fiq
+ ? RegisterAlias.R12Fiq
+ : RegisterAlias.R12Usr;
+
+ case 13:
+ switch (mode)
+ {
+ case Aarch32Mode.User:
+ case Aarch32Mode.System: return RegisterAlias.SpUsr;
+ case Aarch32Mode.Fiq: return RegisterAlias.SpFiq;
+ case Aarch32Mode.Irq: return RegisterAlias.SpIrq;
+ case Aarch32Mode.Supervisor: return RegisterAlias.SpSvc;
+ case Aarch32Mode.Abort: return RegisterAlias.SpAbt;
+ case Aarch32Mode.Hypervisor: return RegisterAlias.SpHyp;
+ case Aarch32Mode.Undefined: return RegisterAlias.SpUnd;
+
+ default: throw new ArgumentException(nameof(mode));
+ }
+
+ case 14:
+ switch (mode)
+ {
+ case Aarch32Mode.User:
+ case Aarch32Mode.Hypervisor:
+ case Aarch32Mode.System: return RegisterAlias.LrUsr;
+ case Aarch32Mode.Fiq: return RegisterAlias.LrFiq;
+ case Aarch32Mode.Irq: return RegisterAlias.LrIrq;
+ case Aarch32Mode.Supervisor: return RegisterAlias.LrSvc;
+ case Aarch32Mode.Abort: return RegisterAlias.LrAbt;
+ case Aarch32Mode.Undefined: return RegisterAlias.LrUnd;
+
+ default: throw new ArgumentException(nameof(mode));
+ }
+
+ default: throw new ArgumentOutOfRangeException(nameof(regIndex));
+ }
+ }
+
+ public static bool IsA32Return(ArmEmitterContext context)
+ {
+ switch (context.CurrOp)
+ {
+ case IOpCode32MemMult op:
+ return true; // Setting PC using LDM is nearly always a return.
+ case OpCode32AluRsImm op:
+ return op.Rm == RegisterAlias.Aarch32Lr;
+ case OpCode32AluRsReg op:
+ return op.Rm == RegisterAlias.Aarch32Lr;
+ case OpCode32AluReg op:
+ return op.Rm == RegisterAlias.Aarch32Lr;
+ case OpCode32Mem op:
+ return op.Rn == RegisterAlias.Aarch32Sp && op.WBack && !op.Index; // Setting PC to an address stored on the stack is nearly always a return.
+ }
+ return false;
+ }
+
+ public static void EmitBxWritePc(ArmEmitterContext context, Operand pc, int sourceRegister = 0)
+ {
+ bool isReturn = sourceRegister == RegisterAlias.Aarch32Lr || IsA32Return(context);
+ Operand mode = context.BitwiseAnd(pc, Const(1));
+
+ SetFlag(context, PState.TFlag, mode);
+
+ Operand addr = context.ConditionalSelect(mode, context.BitwiseAnd(pc, Const(~1)), context.BitwiseAnd(pc, Const(~3)));
+
+ InstEmitFlowHelper.EmitVirtualJump(context, addr, isReturn);
+ }
+
+ public static Operand GetIntOrZR(ArmEmitterContext context, int regIndex)
+ {
+ if (regIndex == RegisterConsts.ZeroIndex)
+ {
+ OperandType type = context.CurrOp.GetOperandType();
+
+ return type == OperandType.I32 ? Const(0) : Const(0L);
+ }
+ else
+ {
+ return GetIntOrSP(context, regIndex);
+ }
+ }
+
+ public static void SetIntOrZR(ArmEmitterContext context, int regIndex, Operand value)
+ {
+ if (regIndex == RegisterConsts.ZeroIndex)
+ {
+ return;
+ }
+
+ SetIntOrSP(context, regIndex, value);
+ }
+
+ public static Operand GetIntOrSP(ArmEmitterContext context, int regIndex)
+ {
+ Operand value = Register(regIndex, RegisterType.Integer, OperandType.I64);
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ return value;
+ }
+
+ public static void SetIntOrSP(ArmEmitterContext context, int regIndex, Operand value)
+ {
+ Operand reg = Register(regIndex, RegisterType.Integer, OperandType.I64);
+
+ if (value.Type == OperandType.I32)
+ {
+ value = context.ZeroExtend32(OperandType.I64, value);
+ }
+
+ context.Copy(reg, value);
+ }
+
+ public static Operand GetVec(int regIndex)
+ {
+ return Register(regIndex, RegisterType.Vector, OperandType.V128);
+ }
+
+ public static Operand GetFlag(PState stateFlag)
+ {
+ return Register((int)stateFlag, RegisterType.Flag, OperandType.I32);
+ }
+
+ public static Operand GetFpFlag(FPState stateFlag)
+ {
+ return Register((int)stateFlag, RegisterType.FpFlag, OperandType.I32);
+ }
+
+ public static void SetFlag(ArmEmitterContext context, PState stateFlag, Operand value)
+ {
+ context.Copy(GetFlag(stateFlag), value);
+
+ context.MarkFlagSet(stateFlag);
+ }
+
+ public static void SetFpFlag(ArmEmitterContext context, FPState stateFlag, Operand value)
+ {
+ context.Copy(GetFpFlag(stateFlag), value);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitMemory.cs b/src/ARMeilleure/Instructions/InstEmitMemory.cs
new file mode 100644
index 00000000..7baed14c
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMemory.cs
@@ -0,0 +1,184 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Adr(ArmEmitterContext context)
+ {
+ OpCodeAdr op = (OpCodeAdr)context.CurrOp;
+
+ SetIntOrZR(context, op.Rd, Const(op.Address + (ulong)op.Immediate));
+ }
+
+ public static void Adrp(ArmEmitterContext context)
+ {
+ OpCodeAdr op = (OpCodeAdr)context.CurrOp;
+
+ ulong address = (op.Address & ~0xfffUL) + ((ulong)op.Immediate << 12);
+
+ SetIntOrZR(context, op.Rd, Const(address));
+ }
+
+ public static void Ldr(ArmEmitterContext context) => EmitLdr(context, signed: false);
+ public static void Ldrs(ArmEmitterContext context) => EmitLdr(context, signed: true);
+
+ private static void EmitLdr(ArmEmitterContext context, bool signed)
+ {
+ OpCodeMem op = (OpCodeMem)context.CurrOp;
+
+ Operand address = GetAddress(context);
+
+ if (signed && op.Extend64)
+ {
+ EmitLoadSx64(context, address, op.Rt, op.Size);
+ }
+ else if (signed)
+ {
+ EmitLoadSx32(context, address, op.Rt, op.Size);
+ }
+ else
+ {
+ EmitLoadZx(context, address, op.Rt, op.Size);
+ }
+
+ EmitWBackIfNeeded(context, address);
+ }
+
+ public static void Ldr_Literal(ArmEmitterContext context)
+ {
+ IOpCodeLit op = (IOpCodeLit)context.CurrOp;
+
+ if (op.Prefetch)
+ {
+ return;
+ }
+
+ if (op.Signed)
+ {
+ EmitLoadSx64(context, Const(op.Immediate), op.Rt, op.Size);
+ }
+ else
+ {
+ EmitLoadZx(context, Const(op.Immediate), op.Rt, op.Size);
+ }
+ }
+
+ public static void Ldp(ArmEmitterContext context)
+ {
+ OpCodeMemPair op = (OpCodeMemPair)context.CurrOp;
+
+ void EmitLoad(int rt, Operand ldAddr)
+ {
+ if (op.Extend64)
+ {
+ EmitLoadSx64(context, ldAddr, rt, op.Size);
+ }
+ else
+ {
+ EmitLoadZx(context, ldAddr, rt, op.Size);
+ }
+ }
+
+ Operand address = GetAddress(context);
+ Operand address2 = GetAddress(context, 1L << op.Size);
+
+ EmitLoad(op.Rt, address);
+ EmitLoad(op.Rt2, address2);
+
+ EmitWBackIfNeeded(context, address);
+ }
+
+ public static void Str(ArmEmitterContext context)
+ {
+ OpCodeMem op = (OpCodeMem)context.CurrOp;
+
+ Operand address = GetAddress(context);
+
+ EmitStore(context, address, op.Rt, op.Size);
+
+ EmitWBackIfNeeded(context, address);
+ }
+
+ public static void Stp(ArmEmitterContext context)
+ {
+ OpCodeMemPair op = (OpCodeMemPair)context.CurrOp;
+
+ Operand address = GetAddress(context);
+ Operand address2 = GetAddress(context, 1L << op.Size);
+
+ EmitStore(context, address, op.Rt, op.Size);
+ EmitStore(context, address2, op.Rt2, op.Size);
+
+ EmitWBackIfNeeded(context, address);
+ }
+
+ private static Operand GetAddress(ArmEmitterContext context, long addend = 0)
+ {
+ Operand address = default;
+
+ switch (context.CurrOp)
+ {
+ case OpCodeMemImm op:
+ {
+ address = context.Copy(GetIntOrSP(context, op.Rn));
+
+ // Pre-indexing.
+ if (!op.PostIdx)
+ {
+ address = context.Add(address, Const(op.Immediate + addend));
+ }
+ else if (addend != 0)
+ {
+ address = context.Add(address, Const(addend));
+ }
+
+ break;
+ }
+
+ case OpCodeMemReg op:
+ {
+ Operand n = GetIntOrSP(context, op.Rn);
+
+ Operand m = GetExtendedM(context, op.Rm, op.IntType);
+
+ if (op.Shift)
+ {
+ m = context.ShiftLeft(m, Const(op.Size));
+ }
+
+ address = context.Add(n, m);
+
+ if (addend != 0)
+ {
+ address = context.Add(address, Const(addend));
+ }
+
+ break;
+ }
+ }
+
+ return address;
+ }
+
+ private static void EmitWBackIfNeeded(ArmEmitterContext context, Operand address)
+ {
+ // Check whenever the current OpCode has post-indexed write back, if so write it.
+ if (context.CurrOp is OpCodeMemImm op && op.WBack)
+ {
+ if (op.PostIdx)
+ {
+ address = context.Add(address, Const(op.Immediate));
+ }
+
+ SetIntOrSP(context, op.Rn, address);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitMemory32.cs b/src/ARMeilleure/Instructions/InstEmitMemory32.cs
new file mode 100644
index 00000000..17ec97aa
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMemory32.cs
@@ -0,0 +1,265 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ private const int ByteSizeLog2 = 0;
+ private const int HWordSizeLog2 = 1;
+ private const int WordSizeLog2 = 2;
+ private const int DWordSizeLog2 = 3;
+
+ [Flags]
+ enum AccessType
+ {
+ Store = 0,
+ Signed = 1,
+ Load = 2,
+ Ordered = 4,
+ Exclusive = 8,
+
+ LoadZx = Load,
+ LoadSx = Load | Signed,
+ }
+
+ public static void Ldm(ArmEmitterContext context)
+ {
+ IOpCode32MemMult op = (IOpCode32MemMult)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+
+ Operand baseAddress = context.Add(n, Const(op.Offset));
+
+ bool writesToPc = (op.RegisterMask & (1 << RegisterAlias.Aarch32Pc)) != 0;
+
+ bool writeBack = op.PostOffset != 0 && (op.Rn != RegisterAlias.Aarch32Pc || !writesToPc);
+
+ if (writeBack)
+ {
+ SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset)));
+ }
+
+ int mask = op.RegisterMask;
+ int offset = 0;
+
+ for (int register = 0; mask != 0; mask >>= 1, register++)
+ {
+ if ((mask & 1) != 0)
+ {
+ Operand address = context.Add(baseAddress, Const(offset));
+
+ EmitLoadZx(context, address, register, WordSizeLog2);
+
+ offset += 4;
+ }
+ }
+ }
+
+ public static void Ldr(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, WordSizeLog2, AccessType.LoadZx);
+ }
+
+ public static void Ldrb(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx);
+ }
+
+ public static void Ldrd(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx);
+ }
+
+ public static void Ldrh(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx);
+ }
+
+ public static void Ldrsb(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, ByteSizeLog2, AccessType.LoadSx);
+ }
+
+ public static void Ldrsh(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, HWordSizeLog2, AccessType.LoadSx);
+ }
+
+ public static void Stm(ArmEmitterContext context)
+ {
+ IOpCode32MemMult op = (IOpCode32MemMult)context.CurrOp;
+
+ Operand n = context.Copy(GetIntA32(context, op.Rn));
+
+ Operand baseAddress = context.Add(n, Const(op.Offset));
+
+ int mask = op.RegisterMask;
+ int offset = 0;
+
+ for (int register = 0; mask != 0; mask >>= 1, register++)
+ {
+ if ((mask & 1) != 0)
+ {
+ Operand address = context.Add(baseAddress, Const(offset));
+
+ EmitStore(context, address, register, WordSizeLog2);
+
+ // Note: If Rn is also specified on the register list,
+ // and Rn is the first register on this list, then the
+ // value that is written to memory is the unmodified value,
+ // before the write back. If it is on the list, but it's
+ // not the first one, then the value written to memory
+ // varies between CPUs.
+ if (offset == 0 && op.PostOffset != 0)
+ {
+ // Emit write back after the first write.
+ SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset)));
+ }
+
+ offset += 4;
+ }
+ }
+ }
+
+ public static void Str(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, WordSizeLog2, AccessType.Store);
+ }
+
+ public static void Strb(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, ByteSizeLog2, AccessType.Store);
+ }
+
+ public static void Strd(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, DWordSizeLog2, AccessType.Store);
+ }
+
+ public static void Strh(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, HWordSizeLog2, AccessType.Store);
+ }
+
+ private static void EmitLoadOrStore(ArmEmitterContext context, int size, AccessType accType)
+ {
+ IOpCode32Mem op = (IOpCode32Mem)context.CurrOp;
+
+ Operand n = context.Copy(GetIntA32AlignedPC(context, op.Rn));
+ Operand m = GetMemM(context, setCarry: false);
+
+ Operand temp = default;
+
+ if (op.Index || op.WBack)
+ {
+ temp = op.Add
+ ? context.Add (n, m)
+ : context.Subtract(n, m);
+ }
+
+ if (op.WBack)
+ {
+ SetIntA32(context, op.Rn, temp);
+ }
+
+ Operand address;
+
+ if (op.Index)
+ {
+ address = temp;
+ }
+ else
+ {
+ address = n;
+ }
+
+ if ((accType & AccessType.Load) != 0)
+ {
+ void Load(int rt, int offs, int loadSize)
+ {
+ Operand addr = context.Add(address, Const(offs));
+
+ if ((accType & AccessType.Signed) != 0)
+ {
+ EmitLoadSx32(context, addr, rt, loadSize);
+ }
+ else
+ {
+ EmitLoadZx(context, addr, rt, loadSize);
+ }
+ }
+
+ if (size == DWordSizeLog2)
+ {
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ Load(op.Rt, 0, WordSizeLog2);
+ Load(op.Rt2, 4, WordSizeLog2);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ Load(op.Rt2, 0, WordSizeLog2);
+ Load(op.Rt, 4, WordSizeLog2);
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ Load(op.Rt, 0, size);
+ }
+ }
+ else
+ {
+ void Store(int rt, int offs, int storeSize)
+ {
+ Operand addr = context.Add(address, Const(offs));
+
+ EmitStore(context, addr, rt, storeSize);
+ }
+
+ if (size == DWordSizeLog2)
+ {
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ Store(op.Rt, 0, WordSizeLog2);
+ Store(op.Rt2, 4, WordSizeLog2);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ Store(op.Rt2, 0, WordSizeLog2);
+ Store(op.Rt, 4, WordSizeLog2);
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ Store(op.Rt, 0, size);
+ }
+ }
+ }
+
+ public static void Adr(ArmEmitterContext context)
+ {
+ IOpCode32Adr op = (IOpCode32Adr)context.CurrOp;
+ SetIntA32(context, op.Rd, Const(op.Immediate));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitMemoryEx.cs b/src/ARMeilleure/Instructions/InstEmitMemoryEx.cs
new file mode 100644
index 00000000..c7ed01e3
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMemoryEx.cs
@@ -0,0 +1,178 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryExHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ [Flags]
+ private enum AccessType
+ {
+ None = 0,
+ Ordered = 1,
+ Exclusive = 2,
+ OrderedEx = Ordered | Exclusive
+ }
+
+ public static void Clrex(ArmEmitterContext context)
+ {
+ EmitClearExclusive(context);
+ }
+
+ public static void Csdb(ArmEmitterContext context)
+ {
+ // Execute as no-op.
+ }
+
+ public static void Dmb(ArmEmitterContext context) => EmitBarrier(context);
+ public static void Dsb(ArmEmitterContext context) => EmitBarrier(context);
+
+ public static void Ldar(ArmEmitterContext context) => EmitLdr(context, AccessType.Ordered);
+ public static void Ldaxr(ArmEmitterContext context) => EmitLdr(context, AccessType.OrderedEx);
+ public static void Ldxr(ArmEmitterContext context) => EmitLdr(context, AccessType.Exclusive);
+ public static void Ldxp(ArmEmitterContext context) => EmitLdp(context, AccessType.Exclusive);
+ public static void Ldaxp(ArmEmitterContext context) => EmitLdp(context, AccessType.OrderedEx);
+
+ private static void EmitLdr(ArmEmitterContext context, AccessType accType)
+ {
+ EmitLoadEx(context, accType, pair: false);
+ }
+
+ private static void EmitLdp(ArmEmitterContext context, AccessType accType)
+ {
+ EmitLoadEx(context, accType, pair: true);
+ }
+
+ private static void EmitLoadEx(ArmEmitterContext context, AccessType accType, bool pair)
+ {
+ OpCodeMemEx op = (OpCodeMemEx)context.CurrOp;
+
+ bool ordered = (accType & AccessType.Ordered) != 0;
+ bool exclusive = (accType & AccessType.Exclusive) != 0;
+
+ if (ordered)
+ {
+ EmitBarrier(context);
+ }
+
+ Operand address = context.Copy(GetIntOrSP(context, op.Rn));
+
+ if (pair)
+ {
+ // Exclusive loads should be atomic. For pairwise loads, we need to
+ // read all the data at once. For a 32-bits pairwise load, we do a
+ // simple 64-bits load, for a 128-bits load, we need to call a special
+ // method to read 128-bits atomically.
+ if (op.Size == 2)
+ {
+ Operand value = EmitLoadExclusive(context, address, exclusive, 3);
+
+ Operand valueLow = context.ConvertI64ToI32(value);
+
+ valueLow = context.ZeroExtend32(OperandType.I64, valueLow);
+
+ Operand valueHigh = context.ShiftRightUI(value, Const(32));
+
+ SetIntOrZR(context, op.Rt, valueLow);
+ SetIntOrZR(context, op.Rt2, valueHigh);
+ }
+ else if (op.Size == 3)
+ {
+ Operand value = EmitLoadExclusive(context, address, exclusive, 4);
+
+ Operand valueLow = context.VectorExtract(OperandType.I64, value, 0);
+ Operand valueHigh = context.VectorExtract(OperandType.I64, value, 1);
+
+ SetIntOrZR(context, op.Rt, valueLow);
+ SetIntOrZR(context, op.Rt2, valueHigh);
+ }
+ else
+ {
+ throw new InvalidOperationException($"Invalid load size of {1 << op.Size} bytes.");
+ }
+ }
+ else
+ {
+ // 8, 16, 32 or 64-bits (non-pairwise) load.
+ Operand value = EmitLoadExclusive(context, address, exclusive, op.Size);
+
+ SetIntOrZR(context, op.Rt, value);
+ }
+ }
+
+ public static void Prfm(ArmEmitterContext context)
+ {
+ // Memory Prefetch, execute as no-op.
+ }
+
+ public static void Stlr(ArmEmitterContext context) => EmitStr(context, AccessType.Ordered);
+ public static void Stlxr(ArmEmitterContext context) => EmitStr(context, AccessType.OrderedEx);
+ public static void Stxr(ArmEmitterContext context) => EmitStr(context, AccessType.Exclusive);
+ public static void Stxp(ArmEmitterContext context) => EmitStp(context, AccessType.Exclusive);
+ public static void Stlxp(ArmEmitterContext context) => EmitStp(context, AccessType.OrderedEx);
+
+ private static void EmitStr(ArmEmitterContext context, AccessType accType)
+ {
+ EmitStoreEx(context, accType, pair: false);
+ }
+
+ private static void EmitStp(ArmEmitterContext context, AccessType accType)
+ {
+ EmitStoreEx(context, accType, pair: true);
+ }
+
+ private static void EmitStoreEx(ArmEmitterContext context, AccessType accType, bool pair)
+ {
+ OpCodeMemEx op = (OpCodeMemEx)context.CurrOp;
+
+ bool ordered = (accType & AccessType.Ordered) != 0;
+ bool exclusive = (accType & AccessType.Exclusive) != 0;
+
+ Operand address = context.Copy(GetIntOrSP(context, op.Rn));
+
+ Operand t = GetIntOrZR(context, op.Rt);
+
+ if (pair)
+ {
+ Debug.Assert(op.Size == 2 || op.Size == 3, "Invalid size for pairwise store.");
+
+ Operand t2 = GetIntOrZR(context, op.Rt2);
+
+ Operand value;
+
+ if (op.Size == 2)
+ {
+ value = context.BitwiseOr(t, context.ShiftLeft(t2, Const(32)));
+ }
+ else /* if (op.Size == 3) */
+ {
+ value = context.VectorInsert(context.VectorZero(), t, 0);
+ value = context.VectorInsert(value, t2, 1);
+ }
+
+ EmitStoreExclusive(context, address, value, exclusive, op.Size + 1, op.Rs, a32: false);
+ }
+ else
+ {
+ EmitStoreExclusive(context, address, t, exclusive, op.Size, op.Rs, a32: false);
+ }
+
+ if (ordered)
+ {
+ EmitBarrier(context);
+ }
+ }
+
+ private static void EmitBarrier(ArmEmitterContext context)
+ {
+ context.MemoryBarrier();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitMemoryEx32.cs b/src/ARMeilleure/Instructions/InstEmitMemoryEx32.cs
new file mode 100644
index 00000000..c0b6fc39
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMemoryEx32.cs
@@ -0,0 +1,237 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryExHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Clrex(ArmEmitterContext context)
+ {
+ EmitClearExclusive(context);
+ }
+
+ public static void Csdb(ArmEmitterContext context)
+ {
+ // Execute as no-op.
+ }
+
+ public static void Dmb(ArmEmitterContext context) => EmitBarrier(context);
+
+ public static void Dsb(ArmEmitterContext context) => EmitBarrier(context);
+
+ public static void Ldrex(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, WordSizeLog2, AccessType.LoadZx | AccessType.Exclusive);
+ }
+
+ public static void Ldrexb(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx | AccessType.Exclusive);
+ }
+
+ public static void Ldrexd(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive);
+ }
+
+ public static void Ldrexh(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive);
+ }
+
+ public static void Lda(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, WordSizeLog2, AccessType.LoadZx | AccessType.Ordered);
+ }
+
+ public static void Ldab(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx | AccessType.Ordered);
+ }
+
+ public static void Ldaex(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, WordSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Ldaexb(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Ldaexd(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Ldaexh(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Ldah(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx | AccessType.Ordered);
+ }
+
+ // Stores.
+
+ public static void Strex(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, WordSizeLog2, AccessType.Store | AccessType.Exclusive);
+ }
+
+ public static void Strexb(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, ByteSizeLog2, AccessType.Store | AccessType.Exclusive);
+ }
+
+ public static void Strexd(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, DWordSizeLog2, AccessType.Store | AccessType.Exclusive);
+ }
+
+ public static void Strexh(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, HWordSizeLog2, AccessType.Store | AccessType.Exclusive);
+ }
+
+ public static void Stl(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, WordSizeLog2, AccessType.Store | AccessType.Ordered);
+ }
+
+ public static void Stlb(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, ByteSizeLog2, AccessType.Store | AccessType.Ordered);
+ }
+
+ public static void Stlex(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, WordSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Stlexb(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, ByteSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Stlexd(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, DWordSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Stlexh(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, HWordSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Stlh(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, HWordSizeLog2, AccessType.Store | AccessType.Ordered);
+ }
+
+ private static void EmitExLoadOrStore(ArmEmitterContext context, int size, AccessType accType)
+ {
+ IOpCode32MemEx op = (IOpCode32MemEx)context.CurrOp;
+
+ Operand address = context.Copy(GetIntA32(context, op.Rn));
+
+ var exclusive = (accType & AccessType.Exclusive) != 0;
+ var ordered = (accType & AccessType.Ordered) != 0;
+
+ if ((accType & AccessType.Load) != 0)
+ {
+ if (ordered)
+ {
+ EmitBarrier(context);
+ }
+
+ if (size == DWordSizeLog2)
+ {
+ // Keep loads atomic - make the call to get the whole region and then decompose it into parts
+ // for the registers.
+
+ Operand value = EmitLoadExclusive(context, address, exclusive, size);
+
+ Operand valueLow = context.ConvertI64ToI32(value);
+
+ valueLow = context.ZeroExtend32(OperandType.I64, valueLow);
+
+ Operand valueHigh = context.ShiftRightUI(value, Const(32));
+
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ SetIntA32(context, op.Rt, valueLow);
+ SetIntA32(context, op.Rt2, valueHigh);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ SetIntA32(context, op.Rt2, valueLow);
+ SetIntA32(context, op.Rt, valueHigh);
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ SetIntA32(context, op.Rt, EmitLoadExclusive(context, address, exclusive, size));
+ }
+ }
+ else
+ {
+ if (size == DWordSizeLog2)
+ {
+ // Split the result into 2 words (based on endianness)
+
+ Operand lo = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt));
+ Operand hi = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt2));
+
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ Operand leResult = context.BitwiseOr(lo, context.ShiftLeft(hi, Const(32)));
+ EmitStoreExclusive(context, address, leResult, exclusive, size, op.Rd, a32: true);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ Operand beResult = context.BitwiseOr(hi, context.ShiftLeft(lo, Const(32)));
+ EmitStoreExclusive(context, address, beResult, exclusive, size, op.Rd, a32: true);
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ Operand value = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt));
+ EmitStoreExclusive(context, address, value, exclusive, size, op.Rd, a32: true);
+ }
+
+ if (ordered)
+ {
+ EmitBarrier(context);
+ }
+ }
+ }
+
+ private static void EmitBarrier(ArmEmitterContext context)
+ {
+ // Note: This barrier is most likely not necessary, and probably
+ // doesn't make any difference since we need to do a ton of stuff
+ // (software MMU emulation) to read or write anything anyway.
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs b/src/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs
new file mode 100644
index 00000000..9a69442a
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs
@@ -0,0 +1,174 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitMemoryExHelper
+ {
+ private const int ErgSizeLog2 = 4;
+
+ public static Operand EmitLoadExclusive(ArmEmitterContext context, Operand address, bool exclusive, int size)
+ {
+ if (exclusive)
+ {
+ Operand value;
+
+ if (size == 4)
+ {
+ // Only 128-bit CAS is guaranteed to have a atomic load.
+ Operand physAddr = InstEmitMemoryHelper.EmitPtPointerLoad(context, address, default, write: false, 4);
+
+ Operand zero = context.VectorZero();
+
+ value = context.CompareAndSwap(physAddr, zero, zero);
+ }
+ else
+ {
+ value = InstEmitMemoryHelper.EmitReadIntAligned(context, address, size);
+ }
+
+ Operand arg0 = context.LoadArgument(OperandType.I64, 0);
+
+ Operand exAddrPtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveAddressOffset()));
+ Operand exValuePtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveValueOffset()));
+
+ context.Store(exAddrPtr, context.BitwiseAnd(address, Const(address.Type, GetExclusiveAddressMask())));
+
+ // Make sure the unused higher bits of the value are cleared.
+ if (size < 3)
+ {
+ context.Store(exValuePtr, Const(0UL));
+ }
+ if (size < 4)
+ {
+ context.Store(context.Add(exValuePtr, Const(exValuePtr.Type, 8L)), Const(0UL));
+ }
+
+ // Store the new exclusive value.
+ context.Store(exValuePtr, value);
+
+ return value;
+ }
+ else
+ {
+ return InstEmitMemoryHelper.EmitReadIntAligned(context, address, size);
+ }
+ }
+
+ public static void EmitStoreExclusive(
+ ArmEmitterContext context,
+ Operand address,
+ Operand value,
+ bool exclusive,
+ int size,
+ int rs,
+ bool a32)
+ {
+ if (size < 3)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ if (exclusive)
+ {
+ // We overwrite one of the register (Rs),
+ // keep a copy of the values to ensure we are working with the correct values.
+ address = context.Copy(address);
+ value = context.Copy(value);
+
+ void SetRs(Operand value)
+ {
+ if (a32)
+ {
+ SetIntA32(context, rs, value);
+ }
+ else
+ {
+ SetIntOrZR(context, rs, value);
+ }
+ }
+
+ Operand arg0 = context.LoadArgument(OperandType.I64, 0);
+
+ Operand exAddrPtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveAddressOffset()));
+ Operand exAddr = context.Load(address.Type, exAddrPtr);
+
+ // STEP 1: Check if we have exclusive access to this memory region. If not, fail and skip store.
+ Operand maskedAddress = context.BitwiseAnd(address, Const(address.Type, GetExclusiveAddressMask()));
+
+ Operand exFailed = context.ICompareNotEqual(exAddr, maskedAddress);
+
+ Operand lblExit = Label();
+
+ SetRs(Const(1));
+
+ context.BranchIfTrue(lblExit, exFailed);
+
+ // STEP 2: We have exclusive access and the address is valid, attempt the store using CAS.
+ Operand physAddr = InstEmitMemoryHelper.EmitPtPointerLoad(context, address, default, write: true, size);
+
+ Operand exValuePtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveValueOffset()));
+ Operand exValue = size switch
+ {
+ 0 => context.Load8(exValuePtr),
+ 1 => context.Load16(exValuePtr),
+ 2 => context.Load(OperandType.I32, exValuePtr),
+ 3 => context.Load(OperandType.I64, exValuePtr),
+ _ => context.Load(OperandType.V128, exValuePtr)
+ };
+
+ Operand currValue = size switch
+ {
+ 0 => context.CompareAndSwap8(physAddr, exValue, value),
+ 1 => context.CompareAndSwap16(physAddr, exValue, value),
+ _ => context.CompareAndSwap(physAddr, exValue, value)
+ };
+
+ // STEP 3: Check if we succeeded by comparing expected and in-memory values.
+ Operand storeFailed;
+
+ if (size == 4)
+ {
+ Operand currValueLow = context.VectorExtract(OperandType.I64, currValue, 0);
+ Operand currValueHigh = context.VectorExtract(OperandType.I64, currValue, 1);
+
+ Operand exValueLow = context.VectorExtract(OperandType.I64, exValue, 0);
+ Operand exValueHigh = context.VectorExtract(OperandType.I64, exValue, 1);
+
+ storeFailed = context.BitwiseOr(
+ context.ICompareNotEqual(currValueLow, exValueLow),
+ context.ICompareNotEqual(currValueHigh, exValueHigh));
+ }
+ else
+ {
+ storeFailed = context.ICompareNotEqual(currValue, exValue);
+ }
+
+ SetRs(storeFailed);
+
+ context.MarkLabel(lblExit);
+ }
+ else
+ {
+ InstEmitMemoryHelper.EmitWriteIntAligned(context, address, value, size);
+ }
+ }
+
+ public static void EmitClearExclusive(ArmEmitterContext context)
+ {
+ Operand arg0 = context.LoadArgument(OperandType.I64, 0);
+
+ Operand exAddrPtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveAddressOffset()));
+
+ // We store ULONG max to force any exclusive address checks to fail,
+ // since this value is not aligned to the ERG mask.
+ context.Store(exAddrPtr, Const(ulong.MaxValue));
+ }
+
+ private static long GetExclusiveAddressMask() => ~((4L << ErgSizeLog2) - 1);
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitMemoryHelper.cs b/src/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
new file mode 100644
index 00000000..f97e395c
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
@@ -0,0 +1,648 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Memory;
+using ARMeilleure.Translation;
+using ARMeilleure.Translation.PTC;
+using System;
+using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitMemoryHelper
+ {
+ private const int PageBits = 12;
+ private const int PageMask = (1 << PageBits) - 1;
+
+ private enum Extension
+ {
+ Zx,
+ Sx32,
+ Sx64
+ }
+
+ public static void EmitLoadZx(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ EmitLoad(context, address, Extension.Zx, rt, size);
+ }
+
+ public static void EmitLoadSx32(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ EmitLoad(context, address, Extension.Sx32, rt, size);
+ }
+
+ public static void EmitLoadSx64(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ EmitLoad(context, address, Extension.Sx64, rt, size);
+ }
+
+ private static void EmitLoad(ArmEmitterContext context, Operand address, Extension ext, int rt, int size)
+ {
+ bool isSimd = IsSimd(context);
+
+ if ((uint)size > (isSimd ? 4 : 3))
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ if (isSimd)
+ {
+ EmitReadVector(context, address, context.VectorZero(), rt, 0, size);
+ }
+ else
+ {
+ EmitReadInt(context, address, rt, size);
+ }
+
+ if (!isSimd && !(context.CurrOp is OpCode32 && rt == State.RegisterAlias.Aarch32Pc))
+ {
+ Operand value = GetInt(context, rt);
+
+ if (ext == Extension.Sx32 || ext == Extension.Sx64)
+ {
+ OperandType destType = ext == Extension.Sx64 ? OperandType.I64 : OperandType.I32;
+
+ switch (size)
+ {
+ case 0: value = context.SignExtend8 (destType, value); break;
+ case 1: value = context.SignExtend16(destType, value); break;
+ case 2: value = context.SignExtend32(destType, value); break;
+ }
+ }
+
+ SetInt(context, rt, value);
+ }
+ }
+
+ public static void EmitLoadSimd(
+ ArmEmitterContext context,
+ Operand address,
+ Operand vector,
+ int rt,
+ int elem,
+ int size)
+ {
+ EmitReadVector(context, address, vector, rt, elem, size);
+ }
+
+ public static void EmitStore(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ bool isSimd = IsSimd(context);
+
+ if ((uint)size > (isSimd ? 4 : 3))
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ if (isSimd)
+ {
+ EmitWriteVector(context, address, rt, 0, size);
+ }
+ else
+ {
+ EmitWriteInt(context, address, rt, size);
+ }
+ }
+
+ public static void EmitStoreSimd(
+ ArmEmitterContext context,
+ Operand address,
+ int rt,
+ int elem,
+ int size)
+ {
+ EmitWriteVector(context, address, rt, elem, size);
+ }
+
+ private static bool IsSimd(ArmEmitterContext context)
+ {
+ return context.CurrOp is IOpCodeSimd &&
+ !(context.CurrOp is OpCodeSimdMemMs ||
+ context.CurrOp is OpCodeSimdMemSs);
+ }
+
+ public static Operand EmitReadInt(ArmEmitterContext context, Operand address, int size)
+ {
+ Operand temp = context.AllocateLocal(size == 3 ? OperandType.I64 : OperandType.I32);
+
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: false, size);
+
+ Operand value = default;
+
+ switch (size)
+ {
+ case 0: value = context.Load8 (physAddr); break;
+ case 1: value = context.Load16(physAddr); break;
+ case 2: value = context.Load (OperandType.I32, physAddr); break;
+ case 3: value = context.Load (OperandType.I64, physAddr); break;
+ }
+
+ context.Copy(temp, value);
+
+ if (!context.Memory.Type.IsHostMapped())
+ {
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold);
+
+ context.Copy(temp, EmitReadIntFallback(context, address, size));
+
+ context.MarkLabel(lblEnd);
+ }
+
+ return temp;
+ }
+
+ private static void EmitReadInt(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: false, size);
+
+ Operand value = default;
+
+ switch (size)
+ {
+ case 0: value = context.Load8 (physAddr); break;
+ case 1: value = context.Load16(physAddr); break;
+ case 2: value = context.Load (OperandType.I32, physAddr); break;
+ case 3: value = context.Load (OperandType.I64, physAddr); break;
+ }
+
+ SetInt(context, rt, value);
+
+ if (!context.Memory.Type.IsHostMapped())
+ {
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold);
+
+ EmitReadIntFallback(context, address, rt, size);
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+
+ public static Operand EmitReadIntAligned(ArmEmitterContext context, Operand address, int size)
+ {
+ if ((uint)size > 4)
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ Operand physAddr = EmitPtPointerLoad(context, address, default, write: false, size);
+
+ return size switch
+ {
+ 0 => context.Load8(physAddr),
+ 1 => context.Load16(physAddr),
+ 2 => context.Load(OperandType.I32, physAddr),
+ 3 => context.Load(OperandType.I64, physAddr),
+ _ => context.Load(OperandType.V128, physAddr)
+ };
+ }
+
+ private static void EmitReadVector(
+ ArmEmitterContext context,
+ Operand address,
+ Operand vector,
+ int rt,
+ int elem,
+ int size)
+ {
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: false, size);
+
+ Operand value = default;
+
+ switch (size)
+ {
+ case 0: value = context.VectorInsert8 (vector, context.Load8(physAddr), elem); break;
+ case 1: value = context.VectorInsert16(vector, context.Load16(physAddr), elem); break;
+ case 2: value = context.VectorInsert (vector, context.Load(OperandType.I32, physAddr), elem); break;
+ case 3: value = context.VectorInsert (vector, context.Load(OperandType.I64, physAddr), elem); break;
+ case 4: value = context.Load (OperandType.V128, physAddr); break;
+ }
+
+ context.Copy(GetVec(rt), value);
+
+ if (!context.Memory.Type.IsHostMapped())
+ {
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold);
+
+ EmitReadVectorFallback(context, address, vector, rt, elem, size);
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+
+ private static Operand VectorCreate(ArmEmitterContext context, Operand value)
+ {
+ return context.VectorInsert(context.VectorZero(), value, 0);
+ }
+
+ private static void EmitWriteInt(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: true, size);
+
+ Operand value = GetInt(context, rt);
+
+ if (size < 3 && value.Type == OperandType.I64)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ switch (size)
+ {
+ case 0: context.Store8 (physAddr, value); break;
+ case 1: context.Store16(physAddr, value); break;
+ case 2: context.Store (physAddr, value); break;
+ case 3: context.Store (physAddr, value); break;
+ }
+
+ if (!context.Memory.Type.IsHostMapped())
+ {
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold);
+
+ EmitWriteIntFallback(context, address, rt, size);
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+
+ public static void EmitWriteIntAligned(ArmEmitterContext context, Operand address, Operand value, int size)
+ {
+ if ((uint)size > 4)
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ Operand physAddr = EmitPtPointerLoad(context, address, default, write: true, size);
+
+ if (size < 3 && value.Type == OperandType.I64)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ if (size == 0)
+ {
+ context.Store8(physAddr, value);
+ }
+ else if (size == 1)
+ {
+ context.Store16(physAddr, value);
+ }
+ else
+ {
+ context.Store(physAddr, value);
+ }
+ }
+
+ private static void EmitWriteVector(
+ ArmEmitterContext context,
+ Operand address,
+ int rt,
+ int elem,
+ int size)
+ {
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: true, size);
+
+ Operand value = GetVec(rt);
+
+ switch (size)
+ {
+ case 0: context.Store8 (physAddr, context.VectorExtract8(value, elem)); break;
+ case 1: context.Store16(physAddr, context.VectorExtract16(value, elem)); break;
+ case 2: context.Store (physAddr, context.VectorExtract(OperandType.I32, value, elem)); break;
+ case 3: context.Store (physAddr, context.VectorExtract(OperandType.I64, value, elem)); break;
+ case 4: context.Store (physAddr, value); break;
+ }
+
+ if (!context.Memory.Type.IsHostMapped())
+ {
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold);
+
+ EmitWriteVectorFallback(context, address, rt, elem, size);
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+
+ public static Operand EmitPtPointerLoad(ArmEmitterContext context, Operand address, Operand lblSlowPath, bool write, int size)
+ {
+ if (context.Memory.Type.IsHostMapped())
+ {
+ return EmitHostMappedPointer(context, address);
+ }
+
+ int ptLevelBits = context.Memory.AddressSpaceBits - PageBits;
+ int ptLevelSize = 1 << ptLevelBits;
+ int ptLevelMask = ptLevelSize - 1;
+
+ Operand addrRotated = size != 0 ? context.RotateRight(address, Const(size)) : address;
+ Operand addrShifted = context.ShiftRightUI(addrRotated, Const(PageBits - size));
+
+ Operand pte = !context.HasPtc
+ ? Const(context.Memory.PageTablePointer.ToInt64())
+ : Const(context.Memory.PageTablePointer.ToInt64(), Ptc.PageTableSymbol);
+
+ Operand pteOffset = context.BitwiseAnd(addrShifted, Const(addrShifted.Type, ptLevelMask));
+
+ if (pteOffset.Type == OperandType.I32)
+ {
+ pteOffset = context.ZeroExtend32(OperandType.I64, pteOffset);
+ }
+
+ pte = context.Load(OperandType.I64, context.Add(pte, context.ShiftLeft(pteOffset, Const(3))));
+
+ if (addrShifted.Type == OperandType.I32)
+ {
+ addrShifted = context.ZeroExtend32(OperandType.I64, addrShifted);
+ }
+
+ // If the VA is out of range, or not aligned to the access size, force PTE to 0 by masking it.
+ pte = context.BitwiseAnd(pte, context.ShiftRightSI(context.Add(addrShifted, Const(-(long)ptLevelSize)), Const(63)));
+
+ if (lblSlowPath != default)
+ {
+ if (write)
+ {
+ context.BranchIf(lblSlowPath, pte, Const(0L), Comparison.LessOrEqual);
+ pte = context.BitwiseAnd(pte, Const(0xffffffffffffUL)); // Ignore any software protection bits. (they are still used by C# memory access)
+ }
+ else
+ {
+ pte = context.ShiftLeft(pte, Const(1));
+ context.BranchIf(lblSlowPath, pte, Const(0L), Comparison.LessOrEqual);
+ pte = context.ShiftRightUI(pte, Const(1));
+ }
+ }
+ else
+ {
+ // When no label is provided to jump to a slow path if the address is invalid,
+ // we do the validation ourselves, and throw if needed.
+
+ Operand lblNotWatched = Label();
+
+ // Is the page currently being tracked for read/write? If so we need to call SignalMemoryTracking.
+ context.BranchIf(lblNotWatched, pte, Const(0L), Comparison.GreaterOrEqual, BasicBlockFrequency.Cold);
+
+ // Signal memory tracking. Size here doesn't matter as address is assumed to be size aligned here.
+ context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.SignalMemoryTracking)), address, Const(1UL), Const(write ? 1 : 0));
+ context.MarkLabel(lblNotWatched);
+
+ pte = context.BitwiseAnd(pte, Const(0xffffffffffffUL)); // Ignore any software protection bits. (they are still used by C# memory access)
+
+ Operand lblNonNull = Label();
+
+ // Skip exception if the PTE address is non-null (not zero).
+ context.BranchIfTrue(lblNonNull, pte, BasicBlockFrequency.Cold);
+
+ // The call is not expected to return (it should throw).
+ context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ThrowInvalidMemoryAccess)), address);
+ context.MarkLabel(lblNonNull);
+ }
+
+ Operand pageOffset = context.BitwiseAnd(address, Const(address.Type, PageMask));
+
+ if (pageOffset.Type == OperandType.I32)
+ {
+ pageOffset = context.ZeroExtend32(OperandType.I64, pageOffset);
+ }
+
+ return context.Add(pte, pageOffset);
+ }
+
+ public static Operand EmitHostMappedPointer(ArmEmitterContext context, Operand address)
+ {
+ if (address.Type == OperandType.I32)
+ {
+ address = context.ZeroExtend32(OperandType.I64, address);
+ }
+
+ if (context.Memory.Type == MemoryManagerType.HostMapped)
+ {
+ Operand mask = Const(ulong.MaxValue >> (64 - context.Memory.AddressSpaceBits));
+ address = context.BitwiseAnd(address, mask);
+ }
+
+ Operand baseAddr = !context.HasPtc
+ ? Const(context.Memory.PageTablePointer.ToInt64())
+ : Const(context.Memory.PageTablePointer.ToInt64(), Ptc.PageTableSymbol);
+
+ return context.Add(baseAddr, address);
+ }
+
+ private static void EmitReadIntFallback(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ SetInt(context, rt, EmitReadIntFallback(context, address, size));
+ }
+
+ private static Operand EmitReadIntFallback(ArmEmitterContext context, Operand address, int size)
+ {
+ MethodInfo info = null;
+
+ switch (size)
+ {
+ case 0: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadByte)); break;
+ case 1: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt16)); break;
+ case 2: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt32)); break;
+ case 3: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt64)); break;
+ }
+
+ return context.Call(info, address);
+ }
+
+ private static void EmitReadVectorFallback(
+ ArmEmitterContext context,
+ Operand address,
+ Operand vector,
+ int rt,
+ int elem,
+ int size)
+ {
+ MethodInfo info = null;
+
+ switch (size)
+ {
+ case 0: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadByte)); break;
+ case 1: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt16)); break;
+ case 2: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt32)); break;
+ case 3: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt64)); break;
+ case 4: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadVector128)); break;
+ }
+
+ Operand value = context.Call(info, address);
+
+ switch (size)
+ {
+ case 0: value = context.VectorInsert8 (vector, value, elem); break;
+ case 1: value = context.VectorInsert16(vector, value, elem); break;
+ case 2: value = context.VectorInsert (vector, value, elem); break;
+ case 3: value = context.VectorInsert (vector, value, elem); break;
+ }
+
+ context.Copy(GetVec(rt), value);
+ }
+
+ private static void EmitWriteIntFallback(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ MethodInfo info = null;
+
+ switch (size)
+ {
+ case 0: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteByte)); break;
+ case 1: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt16)); break;
+ case 2: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt32)); break;
+ case 3: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt64)); break;
+ }
+
+ Operand value = GetInt(context, rt);
+
+ if (size < 3 && value.Type == OperandType.I64)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ context.Call(info, address, value);
+ }
+
+ private static void EmitWriteVectorFallback(
+ ArmEmitterContext context,
+ Operand address,
+ int rt,
+ int elem,
+ int size)
+ {
+ MethodInfo info = null;
+
+ switch (size)
+ {
+ case 0: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteByte)); break;
+ case 1: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt16)); break;
+ case 2: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt32)); break;
+ case 3: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt64)); break;
+ case 4: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteVector128)); break;
+ }
+
+ Operand value = default;
+
+ if (size < 4)
+ {
+ switch (size)
+ {
+ case 0: value = context.VectorExtract8 (GetVec(rt), elem); break;
+ case 1: value = context.VectorExtract16(GetVec(rt), elem); break;
+ case 2: value = context.VectorExtract (OperandType.I32, GetVec(rt), elem); break;
+ case 3: value = context.VectorExtract (OperandType.I64, GetVec(rt), elem); break;
+ }
+ }
+ else
+ {
+ value = GetVec(rt);
+ }
+
+ context.Call(info, address, value);
+ }
+
+ private static Operand GetInt(ArmEmitterContext context, int rt)
+ {
+ return context.CurrOp is OpCode32 ? GetIntA32(context, rt) : GetIntOrZR(context, rt);
+ }
+
+ private static void SetInt(ArmEmitterContext context, int rt, Operand value)
+ {
+ if (context.CurrOp is OpCode32)
+ {
+ SetIntA32(context, rt, value);
+ }
+ else
+ {
+ SetIntOrZR(context, rt, value);
+ }
+ }
+
+ // ARM32 helpers.
+ public static Operand GetMemM(ArmEmitterContext context, bool setCarry = true)
+ {
+ switch (context.CurrOp)
+ {
+ case IOpCode32MemRsImm op: return GetMShiftedByImmediate(context, op, setCarry);
+
+ case IOpCode32MemReg op: return GetIntA32(context, op.Rm);
+
+ case IOpCode32Mem op: return Const(op.Immediate);
+
+ case OpCode32SimdMemImm op: return Const(op.Immediate);
+
+ default: throw InvalidOpCodeType(context.CurrOp);
+ }
+ }
+
+ private static Exception InvalidOpCodeType(OpCode opCode)
+ {
+ return new InvalidOperationException($"Invalid OpCode type \"{opCode?.GetType().Name ?? "null"}\".");
+ }
+
+ public static Operand GetMShiftedByImmediate(ArmEmitterContext context, IOpCode32MemRsImm op, bool setCarry)
+ {
+ Operand m = GetIntA32(context, op.Rm);
+
+ int shift = op.Immediate;
+
+ if (shift == 0)
+ {
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsr: shift = 32; break;
+ case ShiftType.Asr: shift = 32; break;
+ case ShiftType.Ror: shift = 1; break;
+ }
+ }
+
+ if (shift != 0)
+ {
+ setCarry &= false;
+
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsl: m = InstEmitAluHelper.GetLslC(context, m, setCarry, shift); break;
+ case ShiftType.Lsr: m = InstEmitAluHelper.GetLsrC(context, m, setCarry, shift); break;
+ case ShiftType.Asr: m = InstEmitAluHelper.GetAsrC(context, m, setCarry, shift); break;
+ case ShiftType.Ror:
+ if (op.Immediate != 0)
+ {
+ m = InstEmitAluHelper.GetRorC(context, m, setCarry, shift);
+ }
+ else
+ {
+ m = InstEmitAluHelper.GetRrxC(context, m, setCarry);
+ }
+ break;
+ }
+ }
+
+ return m;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitMove.cs b/src/ARMeilleure/Instructions/InstEmitMove.cs
new file mode 100644
index 00000000..d551bf2d
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMove.cs
@@ -0,0 +1,41 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Movk(ArmEmitterContext context)
+ {
+ OpCodeMov op = (OpCodeMov)context.CurrOp;
+
+ OperandType type = op.GetOperandType();
+
+ Operand res = GetIntOrZR(context, op.Rd);
+
+ res = context.BitwiseAnd(res, Const(type, ~(0xffffL << op.Bit)));
+
+ res = context.BitwiseOr(res, Const(type, op.Immediate));
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ public static void Movn(ArmEmitterContext context)
+ {
+ OpCodeMov op = (OpCodeMov)context.CurrOp;
+
+ SetIntOrZR(context, op.Rd, Const(op.GetOperandType(), ~op.Immediate));
+ }
+
+ public static void Movz(ArmEmitterContext context)
+ {
+ OpCodeMov op = (OpCodeMov)context.CurrOp;
+
+ SetIntOrZR(context, op.Rd, Const(op.GetOperandType(), op.Immediate));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitMul.cs b/src/ARMeilleure/Instructions/InstEmitMul.cs
new file mode 100644
index 00000000..65d11b30
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMul.cs
@@ -0,0 +1,100 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Madd(ArmEmitterContext context) => EmitMul(context, isAdd: true);
+ public static void Msub(ArmEmitterContext context) => EmitMul(context, isAdd: false);
+
+ private static void EmitMul(ArmEmitterContext context, bool isAdd)
+ {
+ OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+ Operand a = GetIntOrZR(context, op.Ra);
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand res = context.Multiply(n, m);
+
+ res = isAdd ? context.Add(a, res) : context.Subtract(a, res);
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ public static void Smaddl(ArmEmitterContext context) => EmitMull(context, MullFlags.SignedAdd);
+ public static void Smsubl(ArmEmitterContext context) => EmitMull(context, MullFlags.SignedSubtract);
+ public static void Umaddl(ArmEmitterContext context) => EmitMull(context, MullFlags.Add);
+ public static void Umsubl(ArmEmitterContext context) => EmitMull(context, MullFlags.Subtract);
+
+ [Flags]
+ private enum MullFlags
+ {
+ Subtract = 0,
+ Add = 1 << 0,
+ Signed = 1 << 1,
+
+ SignedAdd = Signed | Add,
+ SignedSubtract = Signed | Subtract
+ }
+
+ private static void EmitMull(ArmEmitterContext context, MullFlags flags)
+ {
+ OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+ Operand GetExtendedRegister32(int index)
+ {
+ Operand value = GetIntOrZR(context, index);
+
+ if ((flags & MullFlags.Signed) != 0)
+ {
+ return context.SignExtend32(value.Type, value);
+ }
+ else
+ {
+ return context.ZeroExtend32(value.Type, value);
+ }
+ }
+
+ Operand a = GetIntOrZR(context, op.Ra);
+
+ Operand n = GetExtendedRegister32(op.Rn);
+ Operand m = GetExtendedRegister32(op.Rm);
+
+ Operand res = context.Multiply(n, m);
+
+ res = (flags & MullFlags.Add) != 0 ? context.Add(a, res) : context.Subtract(a, res);
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ public static void Smulh(ArmEmitterContext context)
+ {
+ OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand d = context.Multiply64HighSI(n, m);
+
+ SetIntOrZR(context, op.Rd, d);
+ }
+
+ public static void Umulh(ArmEmitterContext context)
+ {
+ OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand d = context.Multiply64HighUI(n, m);
+
+ SetIntOrZR(context, op.Rd, d);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitMul32.cs b/src/ARMeilleure/Instructions/InstEmitMul32.cs
new file mode 100644
index 00000000..0822f92c
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMul32.cs
@@ -0,0 +1,379 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitAluHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ [Flags]
+ private enum MullFlags
+ {
+ Subtract = 1,
+ Add = 1 << 1,
+ Signed = 1 << 2,
+
+ SignedAdd = Signed | Add,
+ SignedSubtract = Signed | Subtract
+ }
+
+ public static void Mla(ArmEmitterContext context)
+ {
+ IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+ Operand a = GetIntA32(context, op.Ra);
+
+ Operand res = context.Add(a, context.Multiply(n, m));
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Mls(ArmEmitterContext context)
+ {
+ IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+ Operand a = GetIntA32(context, op.Ra);
+
+ Operand res = context.Subtract(a, context.Multiply(n, m));
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Smmla(ArmEmitterContext context)
+ {
+ EmitSmmul(context, MullFlags.SignedAdd);
+ }
+
+ public static void Smmls(ArmEmitterContext context)
+ {
+ EmitSmmul(context, MullFlags.SignedSubtract);
+ }
+
+ public static void Smmul(ArmEmitterContext context)
+ {
+ EmitSmmul(context, MullFlags.Signed);
+ }
+
+ private static void EmitSmmul(ArmEmitterContext context, MullFlags flags)
+ {
+ IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp;
+
+ Operand n = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rn));
+ Operand m = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rm));
+
+ Operand res = context.Multiply(n, m);
+
+ if (flags.HasFlag(MullFlags.Add) && op.Ra != 0xf)
+ {
+ res = context.Add(context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Ra)), Const(32)), res);
+ }
+ else if (flags.HasFlag(MullFlags.Subtract))
+ {
+ res = context.Subtract(context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Ra)), Const(32)), res);
+ }
+
+ if (op.R)
+ {
+ res = context.Add(res, Const(0x80000000L));
+ }
+
+ Operand hi = context.ConvertI64ToI32(context.ShiftRightSI(res, Const(32)));
+
+ EmitGenericAluStoreA32(context, op.Rd, false, hi);
+ }
+
+ public static void Smla__(ArmEmitterContext context)
+ {
+ IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+ Operand a = GetIntA32(context, op.Ra);
+
+ if (op.NHigh)
+ {
+ n = context.SignExtend16(OperandType.I64, context.ShiftRightUI(n, Const(16)));
+ }
+ else
+ {
+ n = context.SignExtend16(OperandType.I64, n);
+ }
+
+ if (op.MHigh)
+ {
+ m = context.SignExtend16(OperandType.I64, context.ShiftRightUI(m, Const(16)));
+ }
+ else
+ {
+ m = context.SignExtend16(OperandType.I64, m);
+ }
+
+ Operand res = context.Multiply(n, m);
+
+ Operand toAdd = context.SignExtend32(OperandType.I64, a);
+ res = context.Add(res, toAdd);
+ Operand q = context.ICompareNotEqual(res, context.SignExtend32(OperandType.I64, res));
+ res = context.ConvertI64ToI32(res);
+
+ UpdateQFlag(context, q);
+
+ EmitGenericAluStoreA32(context, op.Rd, false, res);
+ }
+
+ public static void Smlal(ArmEmitterContext context)
+ {
+ EmitMlal(context, true);
+ }
+
+ public static void Smlal__(ArmEmitterContext context)
+ {
+ IOpCode32AluUmull op = (IOpCode32AluUmull)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+
+ if (op.NHigh)
+ {
+ n = context.SignExtend16(OperandType.I64, context.ShiftRightUI(n, Const(16)));
+ }
+ else
+ {
+ n = context.SignExtend16(OperandType.I64, n);
+ }
+
+ if (op.MHigh)
+ {
+ m = context.SignExtend16(OperandType.I64, context.ShiftRightUI(m, Const(16)));
+ }
+ else
+ {
+ m = context.SignExtend16(OperandType.I64, m);
+ }
+
+ Operand res = context.Multiply(n, m);
+
+ Operand toAdd = context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdHi)), Const(32));
+ toAdd = context.BitwiseOr(toAdd, context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdLo)));
+ res = context.Add(res, toAdd);
+
+ Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32)));
+ Operand lo = context.ConvertI64ToI32(res);
+
+ EmitGenericAluStoreA32(context, op.RdHi, false, hi);
+ EmitGenericAluStoreA32(context, op.RdLo, false, lo);
+ }
+
+ public static void Smlaw_(ArmEmitterContext context)
+ {
+ IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+ Operand a = GetIntA32(context, op.Ra);
+
+ if (op.MHigh)
+ {
+ m = context.SignExtend16(OperandType.I64, context.ShiftRightUI(m, Const(16)));
+ }
+ else
+ {
+ m = context.SignExtend16(OperandType.I64, m);
+ }
+
+ Operand res = context.Multiply(context.SignExtend32(OperandType.I64, n), m);
+
+ Operand toAdd = context.ShiftLeft(context.SignExtend32(OperandType.I64, a), Const(16));
+ res = context.Add(res, toAdd);
+ res = context.ShiftRightSI(res, Const(16));
+ Operand q = context.ICompareNotEqual(res, context.SignExtend32(OperandType.I64, res));
+ res = context.ConvertI64ToI32(res);
+
+ UpdateQFlag(context, q);
+
+ EmitGenericAluStoreA32(context, op.Rd, false, res);
+ }
+
+ public static void Smul__(ArmEmitterContext context)
+ {
+ IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+
+ if (op.NHigh)
+ {
+ n = context.ShiftRightSI(n, Const(16));
+ }
+ else
+ {
+ n = context.SignExtend16(OperandType.I32, n);
+ }
+
+ if (op.MHigh)
+ {
+ m = context.ShiftRightSI(m, Const(16));
+ }
+ else
+ {
+ m = context.SignExtend16(OperandType.I32, m);
+ }
+
+ Operand res = context.Multiply(n, m);
+
+ EmitGenericAluStoreA32(context, op.Rd, false, res);
+ }
+
+ public static void Smull(ArmEmitterContext context)
+ {
+ IOpCode32AluUmull op = (IOpCode32AluUmull)context.CurrOp;
+
+ Operand n = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rn));
+ Operand m = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rm));
+
+ Operand res = context.Multiply(n, m);
+
+ Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32)));
+ Operand lo = context.ConvertI64ToI32(res);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitGenericAluStoreA32(context, op.RdHi, ShouldSetFlags(context), hi);
+ EmitGenericAluStoreA32(context, op.RdLo, ShouldSetFlags(context), lo);
+ }
+
+ public static void Smulw_(ArmEmitterContext context)
+ {
+ IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+
+ if (op.MHigh)
+ {
+ m = context.SignExtend16(OperandType.I64, context.ShiftRightUI(m, Const(16)));
+ }
+ else
+ {
+ m = context.SignExtend16(OperandType.I64, m);
+ }
+
+ Operand res = context.Multiply(context.SignExtend32(OperandType.I64, n), m);
+
+ res = context.ShiftRightUI(res, Const(16));
+ res = context.ConvertI64ToI32(res);
+
+ EmitGenericAluStoreA32(context, op.Rd, false, res);
+ }
+
+ public static void Umaal(ArmEmitterContext context)
+ {
+ IOpCode32AluUmull op = (IOpCode32AluUmull)context.CurrOp;
+
+ Operand n = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rn));
+ Operand m = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rm));
+ Operand dHi = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdHi));
+ Operand dLo = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdLo));
+
+ Operand res = context.Multiply(n, m);
+ res = context.Add(res, dHi);
+ res = context.Add(res, dLo);
+
+ Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32)));
+ Operand lo = context.ConvertI64ToI32(res);
+
+ EmitGenericAluStoreA32(context, op.RdHi, false, hi);
+ EmitGenericAluStoreA32(context, op.RdLo, false, lo);
+ }
+
+ public static void Umlal(ArmEmitterContext context)
+ {
+ EmitMlal(context, false);
+ }
+
+ public static void Umull(ArmEmitterContext context)
+ {
+ IOpCode32AluUmull op = (IOpCode32AluUmull)context.CurrOp;
+
+ Operand n = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rn));
+ Operand m = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rm));
+
+ Operand res = context.Multiply(n, m);
+
+ Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32)));
+ Operand lo = context.ConvertI64ToI32(res);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitGenericAluStoreA32(context, op.RdHi, ShouldSetFlags(context), hi);
+ EmitGenericAluStoreA32(context, op.RdLo, ShouldSetFlags(context), lo);
+ }
+
+ private static void EmitMlal(ArmEmitterContext context, bool signed)
+ {
+ IOpCode32AluUmull op = (IOpCode32AluUmull)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+
+ if (signed)
+ {
+ n = context.SignExtend32(OperandType.I64, n);
+ m = context.SignExtend32(OperandType.I64, m);
+ }
+ else
+ {
+ n = context.ZeroExtend32(OperandType.I64, n);
+ m = context.ZeroExtend32(OperandType.I64, m);
+ }
+
+ Operand res = context.Multiply(n, m);
+
+ Operand toAdd = context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdHi)), Const(32));
+ toAdd = context.BitwiseOr(toAdd, context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdLo)));
+ res = context.Add(res, toAdd);
+
+ Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32)));
+ Operand lo = context.ConvertI64ToI32(res);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitGenericAluStoreA32(context, op.RdHi, ShouldSetFlags(context), hi);
+ EmitGenericAluStoreA32(context, op.RdLo, ShouldSetFlags(context), lo);
+ }
+
+ private static void UpdateQFlag(ArmEmitterContext context, Operand q)
+ {
+ Operand lblSkipSetQ = Label();
+
+ context.BranchIfFalse(lblSkipSetQ, q);
+
+ SetFlag(context, PState.QFlag, Const(1));
+
+ context.MarkLabel(lblSkipSetQ);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
new file mode 100644
index 00000000..7e7f26b1
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
@@ -0,0 +1,5224 @@
+// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
+// https://www.agner.org/optimize/#vectorclass @ vectori128.h
+
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func2I = Func<Operand, Operand, Operand>;
+
+ static partial class InstEmit
+ {
+ public static void Abs_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64AbsS);
+ }
+ else
+ {
+ EmitScalarUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+ }
+ }
+
+ public static void Abs_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64AbsV);
+ }
+ else
+ {
+ EmitVectorUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+ }
+ }
+
+ public static void Add_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64AddS);
+ }
+ else
+ {
+ EmitScalarBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Add_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AddV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(addInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Addhn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64AddhnV);
+ }
+ else
+ {
+ EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: false);
+ }
+ }
+
+ public static void Addp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64AddpS);
+ }
+ else
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand ne0 = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+ Operand ne1 = EmitVectorExtractZx(context, op.Rn, 1, op.Size);
+
+ Operand res = context.Add(ne0, ne1);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size));
+ }
+ }
+
+ public static void Addp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AddpV);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp(context, X86PaddInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Addv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64AddvV);
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Cls_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ClsV);
+ }
+ else
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ int eSize = 8 << op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingSigns)), ne, Const(eSize));
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Clz_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ClzV);
+ }
+ else
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int eSize = 8 << op.Size;
+
+ Operand res = eSize switch {
+ 8 => Clz_V_I8 (context, GetVec(op.Rn)),
+ 16 => Clz_V_I16(context, GetVec(op.Rn)),
+ 32 => Clz_V_I32(context, GetVec(op.Rn)),
+ _ => default
+ };
+
+ if (res != default)
+ {
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+ }
+ else
+ {
+ int elems = op.GetBytesCount() >> op.Size;
+
+ res = context.VectorZero();
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingZeros)), ne, Const(eSize));
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static Operand Clz_V_I8(ArmEmitterContext context, Operand arg)
+ {
+ if (!Optimizations.UseSsse3)
+ {
+ return default;
+ }
+
+ // CLZ nibble table.
+ Operand clzTable = X86GetScalar(context, 0x01_01_01_01_02_02_03_04);
+
+ Operand maskLow = X86GetAllElements(context, 0x0f_0f_0f_0f);
+ Operand c04 = X86GetAllElements(context, 0x04_04_04_04);
+
+ // CLZ of low 4 bits of elements in arg.
+ Operand loClz = context.AddIntrinsic(Intrinsic.X86Pshufb, clzTable, arg);
+
+ // Get the high 4 bits of elements in arg.
+ Operand hiArg = context.AddIntrinsic(Intrinsic.X86Psrlw, arg, Const(4));
+ hiArg = context.AddIntrinsic(Intrinsic.X86Pand, hiArg, maskLow);
+
+ // CLZ of high 4 bits of elements in arg.
+ Operand hiClz = context.AddIntrinsic(Intrinsic.X86Pshufb, clzTable, hiArg);
+
+ // If high 4 bits are not all zero, we discard the CLZ of the low 4 bits.
+ Operand mask = context.AddIntrinsic(Intrinsic.X86Pcmpeqb, hiClz, c04);
+ loClz = context.AddIntrinsic(Intrinsic.X86Pand, loClz, mask);
+
+ return context.AddIntrinsic(Intrinsic.X86Paddb, loClz, hiClz);
+ }
+
+ private static Operand Clz_V_I16(ArmEmitterContext context, Operand arg)
+ {
+ if (!Optimizations.UseSsse3)
+ {
+ return default;
+ }
+
+ Operand maskSwap = X86GetElements(context, 0x80_0f_80_0d_80_0b_80_09, 0x80_07_80_05_80_03_80_01);
+ Operand maskLow = X86GetAllElements(context, 0x00ff_00ff);
+ Operand c0008 = X86GetAllElements(context, 0x0008_0008);
+
+ // CLZ pair of high 8 and low 8 bits of elements in arg.
+ Operand hiloClz = Clz_V_I8(context, arg);
+ // Get CLZ of low 8 bits in each pair.
+ Operand loClz = context.AddIntrinsic(Intrinsic.X86Pand, hiloClz, maskLow);
+ // Get CLZ of high 8 bits in each pair.
+ Operand hiClz = context.AddIntrinsic(Intrinsic.X86Pshufb, hiloClz, maskSwap);
+
+ // If high 8 bits are not all zero, we discard the CLZ of the low 8 bits.
+ Operand mask = context.AddIntrinsic(Intrinsic.X86Pcmpeqw, hiClz, c0008);
+ loClz = context.AddIntrinsic(Intrinsic.X86Pand, loClz, mask);
+
+ return context.AddIntrinsic(Intrinsic.X86Paddw, loClz, hiClz);
+ }
+
+ private static Operand Clz_V_I32(ArmEmitterContext context, Operand arg)
+ {
+ // TODO: Use vplzcntd when AVX-512 is supported.
+ if (!Optimizations.UseSse2)
+ {
+ return default;
+ }
+
+ Operand AddVectorI32(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Paddd, op0, op1);
+ Operand SubVectorI32(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Psubd, op0, op1);
+ Operand ShiftRightVectorUI32(Operand op0, int imm8) => context.AddIntrinsic(Intrinsic.X86Psrld, op0, Const(imm8));
+ Operand OrVector(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Por, op0, op1);
+ Operand AndVector(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Pand, op0, op1);
+ Operand NotVector(Operand op0) => context.AddIntrinsic(Intrinsic.X86Pandn, op0, context.VectorOne());
+
+ Operand c55555555 = X86GetAllElements(context, 0x55555555);
+ Operand c33333333 = X86GetAllElements(context, 0x33333333);
+ Operand c0f0f0f0f = X86GetAllElements(context, 0x0f0f0f0f);
+ Operand c0000003f = X86GetAllElements(context, 0x0000003f);
+
+ Operand tmp0;
+ Operand tmp1;
+ Operand res;
+
+ // Set all bits after highest set bit to 1.
+ res = OrVector(ShiftRightVectorUI32(arg, 1), arg);
+ res = OrVector(ShiftRightVectorUI32(res, 2), res);
+ res = OrVector(ShiftRightVectorUI32(res, 4), res);
+ res = OrVector(ShiftRightVectorUI32(res, 8), res);
+ res = OrVector(ShiftRightVectorUI32(res, 16), res);
+
+ // Make leading 0s into leading 1s.
+ res = NotVector(res);
+
+ // Count leading 1s, which is the population count.
+ tmp0 = ShiftRightVectorUI32(res, 1);
+ tmp0 = AndVector(tmp0, c55555555);
+ res = SubVectorI32(res, tmp0);
+
+ tmp0 = ShiftRightVectorUI32(res, 2);
+ tmp0 = AndVector(tmp0, c33333333);
+ tmp1 = AndVector(res, c33333333);
+ res = AddVectorI32(tmp0, tmp1);
+
+ tmp0 = ShiftRightVectorUI32(res, 4);
+ tmp0 = AddVectorI32(tmp0, res);
+ res = AndVector(tmp0, c0f0f0f0f);
+
+ tmp0 = ShiftRightVectorUI32(res, 8);
+ res = AddVectorI32(tmp0, res);
+
+ tmp0 = ShiftRightVectorUI32(res, 16);
+ res = AddVectorI32(tmp0, res);
+
+ res = AndVector(res, c0000003f);
+
+ return res;
+ }
+
+ public static void Cnt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64CntV);
+ }
+ else
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
+
+ Operand de;
+
+ if (Optimizations.UsePopCnt)
+ {
+ de = context.AddIntrinsicLong(Intrinsic.X86Popcnt, ne);
+ }
+ else
+ {
+ de = EmitCountSetBits8(context, ne);
+ }
+
+ res = EmitVectorInsert(context, res, de, index, 0);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Fabd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FabdS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subss, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = EmitFloatAbs(context, res, true, false);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subsd, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = EmitFloatAbs(context, res, false, false);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2);
+
+ return EmitUnaryMathCall(context, nameof(Math.Abs), res);
+ });
+ }
+ }
+
+ public static void Fabd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FabdV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subps, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = EmitFloatAbs(context, res, true, true);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = EmitFloatAbs(context, res, false, true);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2);
+
+ return EmitUnaryMathCall(context, nameof(Math.Abs), res);
+ });
+ }
+ }
+
+ public static void Fabs_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FabsS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (op.Size == 0)
+ {
+ Operand res = EmitFloatAbs(context, GetVec(op.Rn), true, false);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand res = EmitFloatAbs(context, GetVec(op.Rn), false, false);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Abs), op1);
+ });
+ }
+ }
+
+ public static void Fabs_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FabsV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand res = EmitFloatAbs(context, GetVec(op.Rn), true, true);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = EmitFloatAbs(context, GetVec(op.Rn), false, true);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Abs), op1);
+ });
+ }
+ }
+
+ public static void Fadd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Addss, Intrinsic.X86Addsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Add(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2);
+ });
+ }
+ }
+
+ public static void Fadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FaddV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) => context.Add(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2);
+ });
+ }
+ }
+
+ public static void Faddp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FaddpS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse3)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if ((op.Size & 1) == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Haddps, GetVec(op.Rn), GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if ((op.Size & 1) == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Haddpd, GetVec(op.Rn), GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2);
+ });
+ }
+ }
+
+ public static void Faddp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FaddpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ Intrinsic addInst = (op.Size & 1) == 0 ? Intrinsic.X86Addps : Intrinsic.X86Addpd;
+
+ return context.AddIntrinsic(addInst, op1, op2);
+ }, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2);
+ });
+ }
+ }
+
+ public static void Fdiv_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FdivS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Divss, Intrinsic.X86Divsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Divide(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPDiv), op1, op2);
+ });
+ }
+ }
+
+ public static void Fdiv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FdivV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Divps, Intrinsic.X86Divpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) => context.Divide(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPDiv), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmadd_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FmaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand a = GetVec(op.Ra);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res;
+
+ if (op.Size == 0)
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ss, a, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addss, a, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231sd, a, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addsd, a, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmax_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmaxS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
+ }, scalar: true);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmax_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
+ }, scalar: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxnm_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmaxnmS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: true);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxnm_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxnmV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxnmp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FmaxnmpS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: true, op1, op2);
+ });
+ }
+ else
+ {
+ EmitScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxnmp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxnmpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxnmv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FmaxnmvV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
+ }, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FmaxvV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
+ }, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmin_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FminS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
+ }, scalar: true);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmin_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
+ }, scalar: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminnm_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FminnmS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: true);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminnm_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminnmV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminnmp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FminnmpS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: true, op1, op2);
+ });
+ }
+ else
+ {
+ EmitScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminnmp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminnmpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminnmv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FminnmvV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
+ }, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FminvV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
+ }, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmla_Se(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlaSe);
+ }
+ else if (Optimizations.UseFma)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ss, d, n, res);
+
+ context.Copy(d, context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231sd, d, n, res);
+
+ context.Copy(d, context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Fmla_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpFRd(context, Intrinsic.Arm64FmlaV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ Operand res;
+
+ if (sizeF == 0)
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ps, d, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231pd, d, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
+ }
+
+ context.Copy(d, res);
+ }
+ }
+ else
+ {
+ EmitVectorTernaryOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmla_Ve(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpFRdByElem(context, Intrinsic.Arm64FmlaVe);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ps, d, n, res);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231pd, d, n, res);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
+ }
+
+ context.Copy(d, res);
+ }
+ }
+ else
+ {
+ EmitVectorTernaryOpByElemF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmls_Se(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlsSe);
+ }
+ else if (Optimizations.UseFma)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, d, n, res);
+
+ context.Copy(d, context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, d, n, res);
+
+ context.Copy(d, context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Fmls_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpFRd(context, Intrinsic.Arm64FmlsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ Operand res;
+
+ if (sizeF == 0)
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, d, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, d, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
+ }
+
+ context.Copy(d, res);
+ }
+ }
+ else
+ {
+ EmitVectorTernaryOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmls_Ve(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpFRdByElem(context, Intrinsic.Arm64FmlsVe);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, d, n, res);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, d, n, res);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
+ }
+
+ context.Copy(d, res);
+ }
+ }
+ else
+ {
+ EmitVectorTernaryOpByElemF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmsub_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FmsubS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand a = GetVec(op.Ra);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res;
+
+ if (op.Size == 0)
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, a, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subss, a, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, a, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, a, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmul_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmulS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmul_Se(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpFByElem(context, Intrinsic.Arm64FmulSe);
+ }
+ else
+ {
+ EmitScalarBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Fmul_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmulV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmul_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpFByElem(context, Intrinsic.Arm64FmulVe);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpByElemF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmulx_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmulxS);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmulx_Se(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpFByElem(context, Intrinsic.Arm64FmulxSe);
+ }
+ else
+ {
+ EmitScalarBinaryOpByElemF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmulx_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmulxV);
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmulx_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpFByElem(context, Intrinsic.Arm64FmulxVe);
+ }
+ else
+ {
+ EmitVectorBinaryOpByElemF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
+ });
+ }
+ }
+
+ public static void Fneg_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FnegS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (op.Size == 0)
+ {
+ Operand mask = X86GetScalar(context, -0f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand mask = X86GetScalar(context, -0d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Fneg_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FnegV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand mask = X86GetAllElements(context, -0f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand mask = X86GetAllElements(context, -0d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Fnmadd_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FnmaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand a = GetVec(op.Ra);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res;
+
+ if (op.Size == 0)
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmsub231ss, a, n, m);
+ }
+ else
+ {
+ Operand mask = X86GetScalar(context, -0f);
+ Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorps, mask, a);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subss, aNeg, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmsub231sd, a, n, m);
+ }
+ else
+ {
+ Operand mask = X86GetScalar(context, -0d);
+ Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, a);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, aNeg, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulAdd), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fnmsub_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FnmsubS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand a = GetVec(op.Ra);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res;
+
+ if (op.Size == 0)
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmsub231ss, a, n, m);
+ }
+ else
+ {
+ Operand mask = X86GetScalar(context, -0f);
+ Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorps, mask, a);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addss, aNeg, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmsub231sd, a, n, m);
+ }
+ else
+ {
+ Operand mask = X86GetScalar(context, -0d);
+ Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, a);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addsd, aNeg, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulSub), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fnmul_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FnmulS);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Negate(context.Multiply(op1, op2)));
+ }
+ }
+
+ public static void Frecpe_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrecpeS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
+ {
+ Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rcpss, GetVec(op.Rn)), scalar: true);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipEstimate), op1);
+ });
+ }
+ }
+
+ public static void Frecpe_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrecpeV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
+ {
+ Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rcpps, GetVec(op.Rn)), scalar: false);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipEstimate), op1);
+ });
+ }
+ }
+
+ public static void Frecps_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrecpsS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ Operand res;
+
+ if (sizeF == 0)
+ {
+ Operand mask = X86GetScalar(context, 2f);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, mask, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subss, mask, res);
+ }
+
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: true, sizeF);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand mask = X86GetScalar(context, 2d);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, mask, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, mask, res);
+ }
+
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: true, sizeF);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipStepFused), op1, op2);
+ });
+ }
+ }
+
+ public static void Frecps_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FrecpsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ Operand res;
+
+ if (sizeF == 0)
+ {
+ Operand mask = X86GetAllElements(context, 2f);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, mask, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subps, mask, res);
+ }
+
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: false, sizeF);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand mask = X86GetAllElements(context, 2d);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, mask, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, mask, res);
+ }
+
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: false, sizeF);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipStepFused), op1, op2);
+ });
+ }
+ }
+
+ public static void Frecpx_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrecpxS);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecpX), op1);
+ });
+ }
+ }
+
+ public static void Frinta_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintaS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearestAway);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1);
+ });
+ }
+ }
+
+ public static void Frinta_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintaV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearestAway);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1);
+ });
+ }
+ }
+
+ public static void Frinti_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintiS);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundByRMode(context, op1);
+ });
+ }
+ }
+
+ public static void Frinti_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintiV);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundByRMode(context, op1);
+ });
+ }
+ }
+
+ public static void Frintm_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintmS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Floor), op1);
+ });
+ }
+ }
+
+ public static void Frintm_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintmV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Floor), op1);
+ });
+ }
+ }
+
+ public static void Frintn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintnS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearest);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.ToEven, op1);
+ });
+ }
+ }
+
+ public static void Frintn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintnV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearest);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.ToEven, op1);
+ });
+ }
+ }
+
+ public static void Frintp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintpS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Ceiling), op1);
+ });
+ }
+ }
+
+ public static void Frintp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintpV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Ceiling), op1);
+ });
+ }
+ }
+
+ public static void Frintx_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintxS);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundByRMode(context, op1);
+ });
+ }
+ }
+
+ public static void Frintx_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintxV);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundByRMode(context, op1);
+ });
+ }
+ }
+
+ public static void Frintz_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintzS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsZero);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Truncate), op1);
+ });
+ }
+ }
+
+ public static void Frintz_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintzV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsZero);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Truncate), op1);
+ });
+ }
+ }
+
+ public static void Frsqrte_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrsqrteS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
+ {
+ Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rsqrtss, GetVec(op.Rn)), scalar: true);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtEstimate), op1);
+ });
+ }
+ }
+
+ public static void Frsqrte_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrsqrteV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
+ {
+ Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rsqrtps, GetVec(op.Rn)), scalar: false);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtEstimate), op1);
+ });
+ }
+ }
+
+ public static void Frsqrts_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrsqrtsS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ Operand res;
+
+ if (sizeF == 0)
+ {
+ Operand maskHalf = X86GetScalar(context, 0.5f);
+ Operand maskThree = X86GetScalar(context, 3f);
+ Operand maskOneHalf = X86GetScalar(context, 1.5f);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, maskThree, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subss, maskThree, res);
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, maskHalf, res);
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: true, sizeF);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand maskHalf = X86GetScalar(context, 0.5d);
+ Operand maskThree = X86GetScalar(context, 3d);
+ Operand maskOneHalf = X86GetScalar(context, 1.5d);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, maskThree, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, maskThree, res);
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, maskHalf, res);
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: true, sizeF);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtStepFused), op1, op2);
+ });
+ }
+ }
+
+ public static void Frsqrts_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FrsqrtsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ Operand res;
+
+ if (sizeF == 0)
+ {
+ Operand maskHalf = X86GetAllElements(context, 0.5f);
+ Operand maskThree = X86GetAllElements(context, 3f);
+ Operand maskOneHalf = X86GetAllElements(context, 1.5f);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, maskThree, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res);
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf, res);
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: false, sizeF);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand maskHalf = X86GetAllElements(context, 0.5d);
+ Operand maskThree = X86GetAllElements(context, 3d);
+ Operand maskOneHalf = X86GetAllElements(context, 1.5d);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, maskThree, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res);
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf, res);
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: false, sizeF);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtStepFused), op1, op2);
+ });
+ }
+ }
+
+ public static void Fsqrt_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FsqrtS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarUnaryOpF(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSqrt), op1);
+ });
+ }
+ }
+
+ public static void Fsqrt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FsqrtV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpF(context, Intrinsic.X86Sqrtps, Intrinsic.X86Sqrtpd);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSqrt), op1);
+ });
+ }
+ }
+
+ public static void Fsub_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FsubS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Subss, Intrinsic.X86Subsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2);
+ });
+ }
+ }
+
+ public static void Fsub_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FsubV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Subps, Intrinsic.X86Subpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2);
+ });
+ }
+ }
+
+ public static void Mla_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64MlaV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorMul_AddSub(context, AddSub.Add);
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Mla_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64MlaVe);
+ }
+ else
+ {
+ EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Mls_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64MlsV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorMul_AddSub(context, AddSub.Subtract);
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Mls_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64MlsVe);
+ }
+ else
+ {
+ EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Mul_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64MulV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorMul_AddSub(context, AddSub.None);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Mul_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64MulVe);
+ }
+ else
+ {
+ EmitVectorBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Neg_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64NegS);
+ }
+ else
+ {
+ EmitScalarUnaryOpSx(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Neg_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64NegV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(subInst, context.VectorZero(), GetVec(op.Rn));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorUnaryOpSx(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Pmull_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseArm64Pmull)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64PmullV);
+ }
+ else if (Optimizations.UsePclmulqdq && op.Size == 3)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int imm8 = op.RegisterSize == RegisterSize.Simd64 ? 0b0000_0000 : 0b0001_0001;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, n, m, Const(imm8));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ n = context.VectorZeroUpper64(n);
+ m = context.VectorZeroUpper64(m);
+ }
+ else /* if (op.RegisterSize == RegisterSize.Simd128) */
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Operand res = context.VectorZero();
+
+ if (op.Size == 0)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Pmovzxbw, n);
+ m = context.AddIntrinsic(Intrinsic.X86Pmovzxbw, m);
+
+ for (int i = 0; i < 8; i++)
+ {
+ Operand mask = context.AddIntrinsic(Intrinsic.X86Psllw, n, Const(15 - i));
+ mask = context.AddIntrinsic(Intrinsic.X86Psraw, mask, Const(15));
+
+ Operand tmp = context.AddIntrinsic(Intrinsic.X86Psllw, m, Const(i));
+ tmp = context.AddIntrinsic(Intrinsic.X86Pand, tmp, mask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pxor, res, tmp);
+ }
+ }
+ else /* if (op.Size == 3) */
+ {
+ Operand zero = context.VectorZero();
+
+ for (int i = 0; i < 64; i++)
+ {
+ Operand mask = context.AddIntrinsic(Intrinsic.X86Movlhps, n, n);
+ mask = context.AddIntrinsic(Intrinsic.X86Psllq, mask, Const(63 - i));
+ mask = context.AddIntrinsic(Intrinsic.X86Psrlq, mask, Const(63));
+ mask = context.AddIntrinsic(Intrinsic.X86Psubq, zero, mask);
+
+ Operand tmp = EmitSse2Sll_128(context, m, i);
+ tmp = context.AddIntrinsic(Intrinsic.X86Pand, tmp, mask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pxor, res, tmp);
+ }
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res;
+
+ if (op.Size == 0)
+ {
+ res = context.VectorZero();
+
+ int part = op.RegisterSize == RegisterSize.Simd64 ? 0 : 8;
+
+ for (int index = 0; index < 8; index++)
+ {
+ Operand ne = context.VectorExtract8(n, part + index);
+ Operand me = context.VectorExtract8(m, part + index);
+
+ Operand de = EmitPolynomialMultiply(context, ne, me, 8);
+
+ res = EmitVectorInsert(context, res, de, index, 1);
+ }
+ }
+ else /* if (op.Size == 3) */
+ {
+ int part = op.RegisterSize == RegisterSize.Simd64 ? 0 : 1;
+
+ Operand ne = context.VectorExtract(OperandType.I64, n, part);
+ Operand me = context.VectorExtract(OperandType.I64, m, part);
+
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.PolynomialMult64_128)), ne, me);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Raddhn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64RaddhnV);
+ }
+ else
+ {
+ EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: true);
+ }
+ }
+
+ public static void Rsubhn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64RsubhnV);
+ }
+ else
+ {
+ EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: true);
+ }
+ }
+
+ public static void Saba_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SabaV);
+ }
+ else
+ {
+ EmitVectorTernaryOpSx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+ });
+ }
+ }
+
+ public static void Sabal_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SabalV);
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+ });
+ }
+ }
+
+ public static void Sabd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SabdV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ EmitSse41VectorSabdOp(context, op, n, m, isLong: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) =>
+ {
+ return EmitAbs(context, context.Subtract(op1, op2));
+ });
+ }
+ }
+
+ public static void Sabdl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SabdlV);
+ }
+ else if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = op.Size == 0
+ ? Intrinsic.X86Pmovsxbw
+ : Intrinsic.X86Pmovsxwd;
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ EmitSse41VectorSabdOp(context, op, n, m, isLong: true);
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) =>
+ {
+ return EmitAbs(context, context.Subtract(op1, op2));
+ });
+ }
+ }
+
+ public static void Sadalp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpRd(context, Intrinsic.Arm64SadalpV);
+ }
+ else
+ {
+ EmitAddLongPairwise(context, signed: true, accumulate: true);
+ }
+ }
+
+ public static void Saddl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SaddlV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Saddlp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SaddlpV);
+ }
+ else
+ {
+ EmitAddLongPairwise(context, signed: true, accumulate: false);
+ }
+ }
+
+ public static void Saddlv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SaddlvV);
+ }
+ else
+ {
+ EmitVectorLongAcrossVectorOpSx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Saddw_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SaddwV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Shadd_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64ShaddV);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+ Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psraw : Intrinsic.X86Psrad;
+
+ res2 = context.AddIntrinsic(shiftInst, res2, Const(1));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, res2);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) =>
+ {
+ return context.ShiftRightSI(context.Add(op1, op2), Const(1));
+ });
+ }
+ }
+
+ public static void Shsub_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64ShsubV);
+ }
+ else if (Optimizations.UseSse2 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ Operand nPlusMask = context.AddIntrinsic(addInst, n, mask);
+ Operand mPlusMask = context.AddIntrinsic(addInst, m, mask);
+
+ Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+ Operand res = context.AddIntrinsic(avgInst, nPlusMask, mPlusMask);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ res = context.AddIntrinsic(subInst, nPlusMask, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) =>
+ {
+ return context.ShiftRightSI(context.Subtract(op1, op2), Const(1));
+ });
+ }
+ }
+
+ public static void Smax_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmaxV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic maxInst = X86PmaxsInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(maxInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true));
+ }
+ }
+
+ public static void Smaxp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmaxpV);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp(context, X86PmaxsInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true));
+ }
+ }
+
+ public static void Smaxv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SmaxvV);
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true));
+ }
+ }
+
+ public static void Smin_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SminV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic minInst = X86PminsInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(minInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true));
+ }
+ }
+
+ public static void Sminp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SminpV);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp(context, X86PminsInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true));
+ }
+ }
+
+ public static void Sminv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SminvV);
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true));
+ }
+ }
+
+ public static void Smlal_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SmlalV);
+ }
+ else if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+ Operand res = context.AddIntrinsic(mullInst, n, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(d, context.AddIntrinsic(addInst, d, res));
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Smlal_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64SmlalVe);
+ }
+ else
+ {
+ EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Smlsl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SmlslV);
+ }
+ else if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = op.Size == 0 ? Intrinsic.X86Pmovsxbw : Intrinsic.X86Pmovsxwd;
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+ Operand res = context.AddIntrinsic(mullInst, n, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(d, context.AddIntrinsic(subInst, d, res));
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Smlsl_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64SmlslVe);
+ }
+ else
+ {
+ EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Smull_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmullV);
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Smull_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64SmullVe);
+ }
+ else
+ {
+ EmitVectorWidenBinaryOpByElemSx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Sqabs_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingUnaryOp(context, Intrinsic.Arm64SqabsS);
+ }
+ else
+ {
+ EmitScalarSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+ }
+ }
+
+ public static void Sqabs_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingUnaryOp(context, Intrinsic.Arm64SqabsV);
+ }
+ else
+ {
+ EmitVectorSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+ }
+ }
+
+ public static void Sqadd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqaddS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add);
+ }
+ }
+
+ public static void Sqadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqaddV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add);
+ }
+ }
+
+ public static void Sqdmulh_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqdmulhS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false));
+ }
+ }
+
+ public static void Sqdmulh_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqdmulhV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false));
+ }
+ }
+
+ public static void Sqdmulh_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpByElem(context, Intrinsic.Arm64SqdmulhVe);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false));
+ }
+ }
+
+ public static void Sqneg_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingUnaryOp(context, Intrinsic.Arm64SqnegS);
+ }
+ else
+ {
+ EmitScalarSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Sqneg_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingUnaryOp(context, Intrinsic.Arm64SqnegV);
+ }
+ else
+ {
+ EmitVectorSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Sqrdmulh_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqrdmulhS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true));
+ }
+ }
+
+ public static void Sqrdmulh_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqrdmulhV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true));
+ }
+ }
+
+ public static void Sqrdmulh_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpByElem(context, Intrinsic.Arm64SqrdmulhVe);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true));
+ }
+ }
+
+ public static void Sqsub_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqsubS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub);
+ }
+ }
+
+ public static void Sqsub_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqsubV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub);
+ }
+ }
+
+ public static void Sqxtn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtnS);
+ }
+ else
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx);
+ }
+ }
+
+ public static void Sqxtn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtnV);
+ }
+ else
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx);
+ }
+ }
+
+ public static void Sqxtun_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtunS);
+ }
+ else
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx);
+ }
+ }
+
+ public static void Sqxtun_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtunV);
+ }
+ else
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx);
+ }
+ }
+
+ public static void Srhadd_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SrhaddV);
+ }
+ else if (Optimizations.UseSse2 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u));
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ Operand nMinusMask = context.AddIntrinsic(subInst, n, mask);
+ Operand mMinusMask = context.AddIntrinsic(subInst, m, mask);
+
+ Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+ Operand res = context.AddIntrinsic(avgInst, nMinusMask, mMinusMask);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, mask, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) =>
+ {
+ Operand res = context.Add(op1, op2);
+
+ res = context.Add(res, Const(1L));
+
+ return context.ShiftRightSI(res, Const(1));
+ });
+ }
+ }
+
+ public static void Ssubl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SsublV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Ssubw_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SsubwV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Sub_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64SubS);
+ }
+ else
+ {
+ EmitScalarBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Sub_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SubV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(subInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Subhn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SubhnV);
+ }
+ else
+ {
+ EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: false);
+ }
+ }
+
+ public static void Suqadd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SuqaddS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate);
+ }
+ }
+
+ public static void Suqadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SuqaddV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate);
+ }
+ }
+
+ public static void Uaba_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UabaV);
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+ });
+ }
+ }
+
+ public static void Uabal_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UabalV);
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+ });
+ }
+ }
+
+ public static void Uabd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UabdV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ EmitSse41VectorUabdOp(context, op, n, m, isLong: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return EmitAbs(context, context.Subtract(op1, op2));
+ });
+ }
+ }
+
+ public static void Uabdl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UabdlV);
+ }
+ else if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = op.Size == 0
+ ? Intrinsic.X86Pmovzxbw
+ : Intrinsic.X86Pmovzxwd;
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ EmitSse41VectorUabdOp(context, op, n, m, isLong: true);
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) =>
+ {
+ return EmitAbs(context, context.Subtract(op1, op2));
+ });
+ }
+ }
+
+ public static void Uadalp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpRd(context, Intrinsic.Arm64UadalpV);
+ }
+ else
+ {
+ EmitAddLongPairwise(context, signed: false, accumulate: true);
+ }
+ }
+
+ public static void Uaddl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UaddlV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Uaddlp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UaddlpV);
+ }
+ else
+ {
+ EmitAddLongPairwise(context, signed: false, accumulate: false);
+ }
+ }
+
+ public static void Uaddlv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UaddlvV);
+ }
+ else
+ {
+ EmitVectorLongAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Uaddw_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UaddwV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Uhadd_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UhaddV);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+ Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psrlw : Intrinsic.X86Psrld;
+
+ res2 = context.AddIntrinsic(shiftInst, res2, Const(1));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, res2);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return context.ShiftRightUI(context.Add(op1, op2), Const(1));
+ });
+ }
+ }
+
+ public static void Uhsub_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UhsubV);
+ }
+ else if (Optimizations.UseSse2 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+ Operand res = context.AddIntrinsic(avgInst, n, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ res = context.AddIntrinsic(subInst, n, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return context.ShiftRightUI(context.Subtract(op1, op2), Const(1));
+ });
+ }
+ }
+
+ public static void Umax_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmaxV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic maxInst = X86PmaxuInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(maxInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false));
+ }
+ }
+
+ public static void Umaxp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmaxpV);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp(context, X86PmaxuInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false));
+ }
+ }
+
+ public static void Umaxv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UmaxvV);
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false));
+ }
+ }
+
+ public static void Umin_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UminV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic minInst = X86PminuInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(minInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false));
+ }
+ }
+
+ public static void Uminp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UminpV);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp(context, X86PminuInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false));
+ }
+ }
+
+ public static void Uminv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UminvV);
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false));
+ }
+ }
+
+ public static void Umlal_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UmlalV);
+ }
+ else if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+ Operand res = context.AddIntrinsic(mullInst, n, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(d, context.AddIntrinsic(addInst, d, res));
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Umlal_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64UmlalVe);
+ }
+ else
+ {
+ EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Umlsl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UmlslV);
+ }
+ else if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = op.Size == 0 ? Intrinsic.X86Pmovzxbw : Intrinsic.X86Pmovzxwd;
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+ Operand res = context.AddIntrinsic(mullInst, n, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(d, context.AddIntrinsic(subInst, d, res));
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Umlsl_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64UmlslVe);
+ }
+ else
+ {
+ EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Umull_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmullV);
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Umull_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64UmullVe);
+ }
+ else
+ {
+ EmitVectorWidenBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Uqadd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64UqaddS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add);
+ }
+ }
+
+ public static void Uqadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqaddV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add);
+ }
+ }
+
+ public static void Uqsub_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64UqsubS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
+ }
+ }
+
+ public static void Uqsub_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqsubV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
+ }
+ }
+
+ public static void Uqxtn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64UqxtnS);
+ }
+ else
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx);
+ }
+ }
+
+ public static void Uqxtn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64UqxtnV);
+ }
+ else
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx);
+ }
+ }
+
+ public static void Urhadd_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UrhaddV);
+ }
+ else if (Optimizations.UseSse2 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+ Operand res = context.AddIntrinsic(avgInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ Operand res = context.Add(op1, op2);
+
+ res = context.Add(res, Const(1L));
+
+ return context.ShiftRightUI(res, Const(1));
+ });
+ }
+ }
+
+ public static void Usqadd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64UsqaddS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
+ }
+ }
+
+ public static void Usqadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64UsqaddV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
+ }
+ }
+
+ public static void Usubl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UsublV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Usubw_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UsubwV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ private static Operand EmitAbs(ArmEmitterContext context, Operand value)
+ {
+ Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0));
+
+ return context.ConditionalSelect(isPositive, value, context.Negate(value));
+ }
+
+ private static void EmitAddLongPairwise(ArmEmitterContext context, bool signed, bool accumulate)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand ne0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed);
+ Operand ne1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed);
+
+ Operand e = context.Add(ne0, ne1);
+
+ if (accumulate)
+ {
+ Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
+
+ e = context.Add(e, de);
+ }
+
+ res = EmitVectorInsert(context, res, e, index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static Operand EmitDoublingMultiplyHighHalf(
+ ArmEmitterContext context,
+ Operand n,
+ Operand m,
+ bool round)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int eSize = 8 << op.Size;
+
+ Operand res = context.Multiply(n, m);
+
+ if (!round)
+ {
+ res = context.ShiftRightSI(res, Const(eSize - 1));
+ }
+ else
+ {
+ long roundConst = 1L << (eSize - 1);
+
+ res = context.ShiftLeft(res, Const(1));
+
+ res = context.Add(res, Const(roundConst));
+
+ res = context.ShiftRightSI(res, Const(eSize));
+
+ Operand isIntMin = context.ICompareEqual(res, Const((long)int.MinValue));
+
+ res = context.ConditionalSelect(isIntMin, context.Negate(res), res);
+ }
+
+ return res;
+ }
+
+ private static void EmitHighNarrow(ArmEmitterContext context, Func2I emit, bool round)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int elems = 8 >> op.Size;
+ int eSize = 8 << op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
+
+ long roundConst = 1L << (eSize - 1);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size + 1);
+
+ Operand de = emit(ne, me);
+
+ if (round)
+ {
+ de = context.Add(de, Const(roundConst));
+ }
+
+ de = context.ShiftRightUI(de, Const(eSize));
+
+ res = EmitVectorInsert(context, res, de, part + index, op.Size);
+ }
+
+ context.Copy(d, res);
+ }
+
+ private static Operand EmitMax64Op(ArmEmitterContext context, Operand op1, Operand op2, bool signed)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand cmp = signed
+ ? context.ICompareGreaterOrEqual (op1, op2)
+ : context.ICompareGreaterOrEqualUI(op1, op2);
+
+ return context.ConditionalSelect(cmp, op1, op2);
+ }
+
+ private static Operand EmitMin64Op(ArmEmitterContext context, Operand op1, Operand op2, bool signed)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand cmp = signed
+ ? context.ICompareLessOrEqual (op1, op2)
+ : context.ICompareLessOrEqualUI(op1, op2);
+
+ return context.ConditionalSelect(cmp, op1, op2);
+ }
+
+ private static void EmitSse41ScalarRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res;
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundsd : Intrinsic.X86Roundss;
+
+ res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ res = EmitSse41RoundToNearestWithTiesToAwayOpF(context, n, scalar: true);
+ }
+
+ if ((op.Size & 1) != 0)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+ else
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitSse41VectorRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res;
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundpd : Intrinsic.X86Roundps;
+
+ res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ res = EmitSse41RoundToNearestWithTiesToAwayOpF(context, n, scalar: false);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static Operand EmitSse41Round32Exp8OpF(ArmEmitterContext context, Operand value, bool scalar)
+ {
+ Operand roundMask;
+ Operand truncMask;
+ Operand expMask;
+
+ if (scalar)
+ {
+ roundMask = X86GetScalar(context, 0x4000);
+ truncMask = X86GetScalar(context, unchecked((int)0xFFFF8000));
+ expMask = X86GetScalar(context, 0x7F800000);
+ }
+ else
+ {
+ roundMask = X86GetAllElements(context, 0x4000);
+ truncMask = X86GetAllElements(context, unchecked((int)0xFFFF8000));
+ expMask = X86GetAllElements(context, 0x7F800000);
+ }
+
+ Operand oValue = value;
+ Operand masked = context.AddIntrinsic(Intrinsic.X86Pand, value, expMask);
+ Operand isNaNInf = context.AddIntrinsic(Intrinsic.X86Pcmpeqd, masked, expMask);
+
+ value = context.AddIntrinsic(Intrinsic.X86Paddd, value, roundMask);
+ value = context.AddIntrinsic(Intrinsic.X86Pand, value, truncMask);
+
+ return context.AddIntrinsic(Intrinsic.X86Blendvps, value, oValue, isNaNInf);
+ }
+
+ private static Operand EmitSse41RecipStepSelectOpF(
+ ArmEmitterContext context,
+ Operand n,
+ Operand m,
+ Operand res,
+ Operand mask,
+ bool scalar,
+ int sizeF)
+ {
+ Intrinsic cmpOp;
+ Intrinsic shlOp;
+ Intrinsic blendOp;
+ Operand zero = context.VectorZero();
+ Operand expMask;
+
+ if (sizeF == 0)
+ {
+ cmpOp = Intrinsic.X86Pcmpeqd;
+ shlOp = Intrinsic.X86Pslld;
+ blendOp = Intrinsic.X86Blendvps;
+ expMask = scalar ? X86GetScalar(context, 0x7F800000 << 1) : X86GetAllElements(context, 0x7F800000 << 1);
+ }
+ else /* if (sizeF == 1) */
+ {
+ cmpOp = Intrinsic.X86Pcmpeqq;
+ shlOp = Intrinsic.X86Psllq;
+ blendOp = Intrinsic.X86Blendvpd;
+ expMask = scalar ? X86GetScalar(context, 0x7FF0000000000000L << 1) : X86GetAllElements(context, 0x7FF0000000000000L << 1);
+ }
+
+ n = context.AddIntrinsic(shlOp, n, Const(1));
+ m = context.AddIntrinsic(shlOp, m, Const(1));
+
+ Operand nZero = context.AddIntrinsic(cmpOp, n, zero);
+ Operand mZero = context.AddIntrinsic(cmpOp, m, zero);
+ Operand nInf = context.AddIntrinsic(cmpOp, n, expMask);
+ Operand mInf = context.AddIntrinsic(cmpOp, m, expMask);
+
+ Operand nmZero = context.AddIntrinsic(Intrinsic.X86Por, nZero, mZero);
+ Operand nmInf = context.AddIntrinsic(Intrinsic.X86Por, nInf, mInf);
+ Operand nmZeroInf = context.AddIntrinsic(Intrinsic.X86Pand, nmZero, nmInf);
+
+ return context.AddIntrinsic(blendOp, res, mask, nmZeroInf);
+ }
+
+ public static void EmitSse2VectorIsNaNOpF(
+ ArmEmitterContext context,
+ Operand opF,
+ out Operand qNaNMask,
+ out Operand sNaNMask,
+ bool? isQNaN = null)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ if ((op.Size & 1) == 0)
+ {
+ const int QBit = 22;
+
+ Operand qMask = X86GetAllElements(context, 1 << QBit);
+
+ Operand mask1 = context.AddIntrinsic(Intrinsic.X86Cmpps, opF, opF, Const((int)CmpCondition.UnorderedQ));
+
+ Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask);
+ mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, mask2, qMask, Const((int)CmpCondition.Equal));
+
+ qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andps, mask2, mask1) : default;
+ sNaNMask = isQNaN == null || !(bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andnps, mask2, mask1) : default;
+ }
+ else /* if ((op.Size & 1) == 1) */
+ {
+ const int QBit = 51;
+
+ Operand qMask = X86GetAllElements(context, 1L << QBit);
+
+ Operand mask1 = context.AddIntrinsic(Intrinsic.X86Cmppd, opF, opF, Const((int)CmpCondition.UnorderedQ));
+
+ Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask);
+ mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, mask2, qMask, Const((int)CmpCondition.Equal));
+
+ qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andpd, mask2, mask1) : default;
+ sNaNMask = isQNaN == null || !(bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andnpd, mask2, mask1) : default;
+ }
+ }
+
+ public static Operand EmitSse41ProcessNaNsOpF(
+ ArmEmitterContext context,
+ Func2I emit,
+ bool scalar,
+ Operand n = default,
+ Operand m = default)
+ {
+ Operand nCopy = n == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rn)) : n;
+ Operand mCopy = m == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rm)) : m;
+
+ EmitSse2VectorIsNaNOpF(context, nCopy, out Operand nQNaNMask, out Operand nSNaNMask);
+ EmitSse2VectorIsNaNOpF(context, mCopy, out _, out Operand mSNaNMask, isQNaN: false);
+
+ int sizeF = ((IOpCodeSimd)context.CurrOp).Size & 1;
+
+ if (sizeF == 0)
+ {
+ const int QBit = 22;
+
+ Operand qMask = scalar ? X86GetScalar(context, 1 << QBit) : X86GetAllElements(context, 1 << QBit);
+
+ Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask);
+ resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask);
+
+ Operand resNaN = context.AddIntrinsic(Intrinsic.X86Blendvps, mCopy, nCopy, resNaNMask);
+ resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask);
+
+ Operand resMask = context.AddIntrinsic(Intrinsic.X86Cmpps, nCopy, mCopy, Const((int)CmpCondition.OrderedQ));
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Blendvps, resNaN, emit(nCopy, mCopy), resMask);
+
+ if (n != default || m != default)
+ {
+ return res;
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (((OpCodeSimdReg)context.CurrOp).RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
+
+ return default;
+ }
+ else /* if (sizeF == 1) */
+ {
+ const int QBit = 51;
+
+ Operand qMask = scalar ? X86GetScalar(context, 1L << QBit) : X86GetAllElements(context, 1L << QBit);
+
+ Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask);
+ resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask);
+
+ Operand resNaN = context.AddIntrinsic(Intrinsic.X86Blendvpd, mCopy, nCopy, resNaNMask);
+ resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask);
+
+ Operand resMask = context.AddIntrinsic(Intrinsic.X86Cmppd, nCopy, mCopy, Const((int)CmpCondition.OrderedQ));
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Blendvpd, resNaN, emit(nCopy, mCopy), resMask);
+
+ if (n != default || m != default)
+ {
+ return res;
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
+
+ return default;
+ }
+ }
+
+ private static Operand EmitSse2VectorMaxMinOpF(ArmEmitterContext context, Operand n, Operand m, bool isMax)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ if ((op.Size & 1) == 0)
+ {
+ Operand mask = X86GetAllElements(context, -0f);
+
+ Operand res = context.AddIntrinsic(isMax ? Intrinsic.X86Maxps : Intrinsic.X86Minps, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res);
+
+ Operand resSign = context.AddIntrinsic(isMax ? Intrinsic.X86Pand : Intrinsic.X86Por, n, m);
+ resSign = context.AddIntrinsic(Intrinsic.X86Andps, mask, resSign);
+
+ return context.AddIntrinsic(Intrinsic.X86Por, res, resSign);
+ }
+ else /* if ((op.Size & 1) == 1) */
+ {
+ Operand mask = X86GetAllElements(context, -0d);
+
+ Operand res = context.AddIntrinsic(isMax ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res);
+
+ Operand resSign = context.AddIntrinsic(isMax ? Intrinsic.X86Pand : Intrinsic.X86Por, n, m);
+ resSign = context.AddIntrinsic(Intrinsic.X86Andpd, mask, resSign);
+
+ return context.AddIntrinsic(Intrinsic.X86Por, res, resSign);
+ }
+ }
+
+ private static Operand EmitSse41MaxMinNumOpF(
+ ArmEmitterContext context,
+ bool isMaxNum,
+ bool scalar,
+ Operand n = default,
+ Operand m = default)
+ {
+ Operand nCopy = n == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rn)) : n;
+ Operand mCopy = m == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rm)) : m;
+
+ EmitSse2VectorIsNaNOpF(context, nCopy, out Operand nQNaNMask, out _, isQNaN: true);
+ EmitSse2VectorIsNaNOpF(context, mCopy, out Operand mQNaNMask, out _, isQNaN: true);
+
+ int sizeF = ((IOpCodeSimd)context.CurrOp).Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand negInfMask = scalar
+ ? X86GetScalar (context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity)
+ : X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity);
+
+ Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnps, mQNaNMask, nQNaNMask);
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnps, nQNaNMask, mQNaNMask);
+
+ nCopy = context.AddIntrinsic(Intrinsic.X86Blendvps, nCopy, negInfMask, nMask);
+ mCopy = context.AddIntrinsic(Intrinsic.X86Blendvps, mCopy, negInfMask, mMask);
+
+ Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
+ }, scalar: scalar, nCopy, mCopy);
+
+ if (n != default || m != default)
+ {
+ return res;
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (((OpCodeSimdReg)context.CurrOp).RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
+
+ return default;
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand negInfMask = scalar
+ ? X86GetScalar (context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity)
+ : X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity);
+
+ Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnpd, mQNaNMask, nQNaNMask);
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnpd, nQNaNMask, mQNaNMask);
+
+ nCopy = context.AddIntrinsic(Intrinsic.X86Blendvpd, nCopy, negInfMask, nMask);
+ mCopy = context.AddIntrinsic(Intrinsic.X86Blendvpd, mCopy, negInfMask, mMask);
+
+ Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
+ }, scalar: scalar, nCopy, mCopy);
+
+ if (n != default || m != default)
+ {
+ return res;
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
+
+ return default;
+ }
+ }
+
+ private enum AddSub
+ {
+ None,
+ Add,
+ Subtract
+ }
+
+ private static void EmitSse41VectorMul_AddSub(ArmEmitterContext context, AddSub addSub)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res;
+
+ if (op.Size == 0)
+ {
+ Operand ns8 = context.AddIntrinsic(Intrinsic.X86Psrlw, n, Const(8));
+ Operand ms8 = context.AddIntrinsic(Intrinsic.X86Psrlw, m, Const(8));
+
+ res = context.AddIntrinsic(Intrinsic.X86Pmullw, ns8, ms8);
+
+ res = context.AddIntrinsic(Intrinsic.X86Psllw, res, Const(8));
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m);
+
+ Operand mask = X86GetAllElements(context, 0x00FF00FF);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pblendvb, res, res2, mask);
+ }
+ else if (op.Size == 1)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Pmulld, n, m);
+ }
+
+ Operand d = GetVec(op.Rd);
+
+ if (addSub == AddSub.Add)
+ {
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, d, res);
+ }
+ else if (addSub == AddSub.Subtract)
+ {
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ res = context.AddIntrinsic(subInst, d, res);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+
+ private static void EmitSse41VectorSabdOp(
+ ArmEmitterContext context,
+ OpCodeSimdReg op,
+ Operand n,
+ Operand m,
+ bool isLong)
+ {
+ int size = isLong ? op.Size + 1 : op.Size;
+
+ Intrinsic cmpgtInst = X86PcmpgtInstruction[size];
+
+ Operand cmpMask = context.AddIntrinsic(cmpgtInst, n, m);
+
+ Intrinsic subInst = X86PsubInstruction[size];
+
+ Operand res = context.AddIntrinsic(subInst, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res);
+
+ Operand res2 = context.AddIntrinsic(subInst, m, n);
+
+ res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
+
+ if (!isLong && op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitSse41VectorUabdOp(
+ ArmEmitterContext context,
+ OpCodeSimdReg op,
+ Operand n,
+ Operand m,
+ bool isLong)
+ {
+ int size = isLong ? op.Size + 1 : op.Size;
+
+ Intrinsic maxInst = X86PmaxuInstruction[size];
+
+ Operand max = context.AddIntrinsic(maxInst, m, n);
+
+ Intrinsic cmpeqInst = X86PcmpeqInstruction[size];
+
+ Operand cmpMask = context.AddIntrinsic(cmpeqInst, max, m);
+
+ Operand onesMask = X86GetAllElements(context, -1L);
+
+ cmpMask = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, onesMask);
+
+ Intrinsic subInst = X86PsubInstruction[size];
+
+ Operand res = context.AddIntrinsic(subInst, n, m);
+ Operand res2 = context.AddIntrinsic(subInst, m, n);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res);
+ res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
+
+ if (!isLong && op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static Operand EmitSse2Sll_128(ArmEmitterContext context, Operand op, int shift)
+ {
+ // The upper part of op is assumed to be zero.
+ Debug.Assert(shift >= 0 && shift < 64);
+
+ if (shift == 0)
+ {
+ return op;
+ }
+
+ Operand high = context.AddIntrinsic(Intrinsic.X86Pslldq, op, Const(8));
+ high = context.AddIntrinsic(Intrinsic.X86Psrlq, high, Const(64 - shift));
+
+ Operand low = context.AddIntrinsic(Intrinsic.X86Psllq, op, Const(shift));
+
+ return context.AddIntrinsic(Intrinsic.X86Por, high, low);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
new file mode 100644
index 00000000..a9994e41
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
@@ -0,0 +1,1703 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Vabd_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitVectorBinaryOpI32(context, (op1, op2) => EmitAbs(context, context.Subtract(op1, op2)), !op.U);
+ }
+
+ public static void Vabdl_I(ArmEmitterContext context)
+ {
+ OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp;
+
+ EmitVectorBinaryLongOpI32(context, (op1, op2) => EmitAbs(context, context.Subtract(op1, op2)), !op.U);
+ }
+
+ public static void Vabs_S(ArmEmitterContext context)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FabsS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarUnaryOpSimd32(context, (m) =>
+ {
+ return EmitFloatAbs(context, m, (op.Size & 1) == 0, false);
+ });
+ }
+ else
+ {
+ EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Abs), op1));
+ }
+ }
+
+ public static void Vabs_V(ArmEmitterContext context)
+ {
+ OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FabsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ return EmitFloatAbs(context, m, (op.Size & 1) == 0, true);
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Abs), op1));
+ }
+ }
+ else
+ {
+ EmitVectorUnaryOpSx32(context, (op1) => EmitAbs(context, op1));
+ }
+ }
+
+ private static Operand EmitAbs(ArmEmitterContext context, Operand value)
+ {
+ Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0));
+
+ return context.ConditionalSelect(isPositive, value, context.Negate(value));
+ }
+
+ public static void Vadd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF32(context, Intrinsic.X86Addss, Intrinsic.X86Addsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => context.Add(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2));
+ }
+ }
+
+ public static void Vadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FaddV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF32(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) => context.Add(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPAddFpscr), op1, op2));
+ }
+ }
+
+ public static void Vadd_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+ EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PaddInstruction[op.Size], op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Vaddl_I(ArmEmitterContext context)
+ {
+ OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp;
+
+ EmitVectorBinaryLongOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U);
+ }
+
+ public static void Vaddw_I(ArmEmitterContext context)
+ {
+ OpCode32SimdRegWide op = (OpCode32SimdRegWide)context.CurrOp;
+
+ EmitVectorBinaryWideOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U);
+ }
+
+ public static void Vcnt(ArmEmitterContext context)
+ {
+ OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount();
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de;
+ Operand me = EmitVectorExtractZx32(context, op.Qm, op.Im + index, op.Size);
+
+ if (Optimizations.UsePopCnt)
+ {
+ de = context.AddIntrinsicInt(Intrinsic.X86Popcnt, me);
+ }
+ else
+ {
+ de = EmitCountSetBits8(context, me);
+ }
+
+ res = EmitVectorInsert(context, res, de, op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Vdup(ArmEmitterContext context)
+ {
+ OpCode32SimdDupGP op = (OpCode32SimdDupGP)context.CurrOp;
+
+ Operand insert = GetIntA32(context, op.Rt);
+
+ // Zero extend into an I64, then replicate. Saves the most time over elementwise inserts.
+ insert = op.Size switch
+ {
+ 2 => context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)),
+ 1 => context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)),
+ 0 => context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)),
+ _ => throw new InvalidOperationException($"Invalid Vdup size \"{op.Size}\".")
+ };
+
+ InsertScalar(context, op.Vd, insert);
+ if (op.Q)
+ {
+ InsertScalar(context, op.Vd + 1, insert);
+ }
+ }
+
+ public static void Vdup_1(ArmEmitterContext context)
+ {
+ OpCode32SimdDupElem op = (OpCode32SimdDupElem)context.CurrOp;
+
+ Operand insert = EmitVectorExtractZx32(context, op.Vm >> 1, ((op.Vm & 1) << (3 - op.Size)) + op.Index, op.Size);
+
+ // Zero extend into an I64, then replicate. Saves the most time over elementwise inserts.
+ insert = op.Size switch
+ {
+ 2 => context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)),
+ 1 => context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)),
+ 0 => context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)),
+ _ => throw new InvalidOperationException($"Invalid Vdup size \"{op.Size}\".")
+ };
+
+ InsertScalar(context, op.Vd, insert);
+ if (op.Q)
+ {
+ InsertScalar(context, op.Vd | 1, insert);
+ }
+ }
+
+ private static (long, long) MaskHelperByteSequence(int start, int length, int startByte)
+ {
+ int end = start + length;
+ int b = startByte;
+ long result = 0;
+ long result2 = 0;
+ for (int i = 0; i < 8; i++)
+ {
+ result |= (long)((i >= end || i < start) ? 0x80 : b++) << (i * 8);
+ }
+ for (int i = 8; i < 16; i++)
+ {
+ result2 |= (long)((i >= end || i < start) ? 0x80 : b++) << ((i - 8) * 8);
+ }
+ return (result2, result);
+ }
+
+ public static void Vext(ArmEmitterContext context)
+ {
+ OpCode32SimdExt op = (OpCode32SimdExt)context.CurrOp;
+ int elems = op.GetBytesCount();
+ int byteOff = op.Immediate;
+
+ if (Optimizations.UseSsse3)
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ // Writing low to high of d: start <imm> into n, overlap into m.
+ // Then rotate n down by <imm>, m up by (elems)-imm.
+ // Then OR them together for the result.
+
+ (long nMaskHigh, long nMaskLow) = MaskHelperByteSequence(0, elems - byteOff, byteOff);
+ (long mMaskHigh, long mMaskLow) = MaskHelperByteSequence(elems - byteOff, byteOff, 0);
+ Operand nMask, mMask;
+ if (!op.Q)
+ {
+ // Do the same operation to the bytes in the top doubleword too, as our target could be in either.
+ nMaskHigh = nMaskLow + 0x0808080808080808L;
+ mMaskHigh = mMaskLow + 0x0808080808080808L;
+ }
+ nMask = X86GetElements(context, nMaskHigh, nMaskLow);
+ mMask = X86GetElements(context, mMaskHigh, mMaskLow);
+ Operand nPart = context.AddIntrinsic(Intrinsic.X86Pshufb, n, nMask);
+ Operand mPart = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mMask);
+
+ return context.AddIntrinsic(Intrinsic.X86Por, nPart, mPart);
+ });
+ }
+ else
+ {
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand extract;
+
+ if (byteOff >= elems)
+ {
+ extract = EmitVectorExtractZx32(context, op.Qm, op.Im + (byteOff - elems), op.Size);
+ }
+ else
+ {
+ extract = EmitVectorExtractZx32(context, op.Qn, op.In + byteOff, op.Size);
+ }
+ byteOff++;
+
+ res = EmitVectorInsert(context, res, extract, op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+ }
+
+ public static void Vfma_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseFma)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmadd231ss, Intrinsic.X86Vfmadd231sd);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd);
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vfma_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseFma)
+ {
+ EmitVectorTernaryOpF32(context, Intrinsic.X86Vfmadd231ps);
+ }
+ else
+ {
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulAddFpscr), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vfms_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmsubS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseFma)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmadd231ss, Intrinsic.X86Vfnmadd231sd);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd);
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vfms_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseFma)
+ {
+ EmitVectorTernaryOpF32(context, Intrinsic.X86Vfnmadd231ps);
+ }
+ else
+ {
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulSubFpscr), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vfnma_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseFma)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmsub231ss, Intrinsic.X86Vfnmsub231sd);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd, isNegD: true);
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulAdd), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vfnms_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseFma)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmsub231ss, Intrinsic.X86Vfmsub231sd);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd, isNegD: true);
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulSub), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vhadd(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (op.U)
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.ShiftRightUI(context.Add(op1, op2), Const(1)));
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => context.ShiftRightSI(context.Add(op1, op2), Const(1)));
+ }
+ }
+
+ public static void Vmov_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarUnaryOpF32(context, 0, 0);
+ }
+ else
+ {
+ EmitScalarUnaryOpF32(context, (op1) => op1);
+ }
+ }
+
+ public static void Vmovn(ArmEmitterContext context)
+ {
+ EmitVectorUnaryNarrowOp32(context, (op1) => op1);
+ }
+
+ public static void Vneg_S(ArmEmitterContext context)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FnegS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitScalarUnaryOpSimd32(context, (m) =>
+ {
+ if ((op.Size & 1) == 0)
+ {
+ Operand mask = X86GetScalar(context, -0f);
+ return context.AddIntrinsic(Intrinsic.X86Xorps, mask, m);
+ }
+ else
+ {
+ Operand mask = X86GetScalar(context, -0d);
+ return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, m);
+ }
+ });
+ }
+ else
+ {
+ EmitScalarUnaryOpF32(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Vnmul_S(ArmEmitterContext context)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FnmulS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpSimd32(context, (n, m) =>
+ {
+ if ((op.Size & 1) == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ Operand mask = X86GetScalar(context, -0f);
+ return context.AddIntrinsic(Intrinsic.X86Xorps, mask, res);
+ }
+ else
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ Operand mask = X86GetScalar(context, -0d);
+ return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, res);
+ }
+ });
+ }
+ else
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => context.Negate(context.Multiply(op1, op2)));
+ }
+ }
+
+ public static void Vnmla_S(ArmEmitterContext context)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd, isNegD: true);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(context.Negate(op1), context.Multiply(op2, op3));
+ });
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), context.Negate(op1), res);
+ });
+ }
+ }
+
+ public static void Vnmls_S(ArmEmitterContext context)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd, isNegD: true);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return context.Add(context.Negate(op1), context.Multiply(op2, op3));
+ });
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), context.Negate(op1), res);
+ });
+ }
+ }
+
+ public static void Vneg_V(ArmEmitterContext context)
+ {
+ OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FnegV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ if ((op.Size & 1) == 0)
+ {
+ Operand mask = X86GetAllElements(context, -0f);
+ return context.AddIntrinsic(Intrinsic.X86Xorps, mask, m);
+ }
+ else
+ {
+ Operand mask = X86GetAllElements(context, -0d);
+ return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, m);
+ }
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (op1) => context.Negate(op1));
+ }
+ }
+ else
+ {
+ EmitVectorUnaryOpSx32(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Vdiv_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FdivS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF32(context, Intrinsic.X86Divss, Intrinsic.X86Divsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => context.Divide(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPDiv), op1, op2);
+ });
+ }
+ }
+
+ public static void Vmaxnm_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmaxnmS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF32(context, true, true);
+ }
+ else
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2));
+ }
+ }
+
+ public static void Vmaxnm_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxnmV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF32(context, true, false);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMaxNumFpscr), op1, op2));
+ }
+ }
+
+ public static void Vminnm_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FminnmS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF32(context, false, true);
+ }
+ else
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2));
+ }
+ }
+
+ public static void Vminnm_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminnmV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF32(context, false, false);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMinNumFpscr), op1, op2));
+ }
+ }
+
+ public static void Vmax_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF32(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd);
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMaxFpscr), op1, op2);
+ });
+ }
+ }
+
+ public static void Vmax_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (op.U)
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PmaxuInstruction[op.Size], op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareGreaterUI(op1, op2), op1, op2));
+ }
+ }
+ else
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PmaxsInstruction[op.Size], op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareGreater(op1, op2), op1, op2));
+ }
+ }
+ }
+
+ public static void Vmin_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF32(context, Intrinsic.X86Minps, Intrinsic.X86Minpd);
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMinFpscr), op1, op2);
+ });
+ }
+ }
+
+ public static void Vmin_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (op.U)
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PminuInstruction[op.Size], op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLessUI(op1, op2), op1, op2));
+ }
+ }
+ else
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PminsInstruction[op.Size], op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLess(op1, op2), op1, op2));
+ }
+ }
+ }
+
+ public static void Vmla_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, res);
+ });
+ }
+ }
+
+ public static void Vmla_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)));
+ }
+ else
+ {
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulAddFpscr), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vmla_I(ArmEmitterContext context)
+ {
+ EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)));
+ }
+
+ public static void Vmla_1(ArmEmitterContext context)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorsByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)));
+ }
+ else
+ {
+ EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulAddFpscr), op1, op2, op3));
+ }
+ }
+ else
+ {
+ EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)), false);
+ }
+ }
+
+ public static void Vmlal_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitVectorTernaryLongOpI32(context, (d, n, m) => context.Add(d, context.Multiply(n, m)), !op.U);
+ }
+
+ public static void Vmls_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmlsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, res);
+ });
+ }
+ }
+
+ public static void Vmls_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)));
+ }
+ else
+ {
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulSubFpscr), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vmls_I(ArmEmitterContext context)
+ {
+ EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)));
+ }
+
+ public static void Vmls_1(ArmEmitterContext context)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorsByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)));
+ }
+ else
+ {
+ EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulSubFpscr), op1, op2, op3));
+ }
+ }
+ else
+ {
+ EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)), false);
+ }
+ }
+
+ public static void Vmlsl_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitVectorTernaryLongOpI32(context, (opD, op1, op2) => context.Subtract(opD, context.Multiply(op1, op2)), !op.U);
+ }
+
+ public static void Vmul_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmulS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2);
+ });
+ }
+ }
+
+ public static void Vmul_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmulV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulFpscr), op1, op2);
+ });
+ }
+ }
+
+ public static void Vmul_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (op.U) // This instruction is always signed, U indicates polynomial mode.
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => EmitPolynomialMultiply(context, op1, op2, 8 << op.Size));
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Vmul_1(ArmEmitterContext context)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorByScalarOpF32(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitVectorByScalarOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulFpscr), op1, op2));
+ }
+ }
+ else
+ {
+ EmitVectorByScalarOpI32(context, (op1, op2) => context.Multiply(op1, op2), false);
+ }
+ }
+
+ public static void Vmull_1(ArmEmitterContext context)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ EmitVectorByScalarLongOpI32(context, (op1, op2) => context.Multiply(op1, op2), !op.U);
+ }
+
+ public static void Vmull_I(ArmEmitterContext context)
+ {
+ OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp;
+
+ if (op.Polynomial)
+ {
+ if (op.Size == 0) // P8
+ {
+ EmitVectorBinaryLongOpI32(context, (op1, op2) => EmitPolynomialMultiply(context, op1, op2, 8 << op.Size), false);
+ }
+ else /* if (op.Size == 2) // P64 */
+ {
+ Operand ne = context.VectorExtract(OperandType.I64, GetVec(op.Qn), op.Vn & 1);
+ Operand me = context.VectorExtract(OperandType.I64, GetVec(op.Qm), op.Vm & 1);
+
+ Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.PolynomialMult64_128)), ne, me);
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryLongOpI32(context, (op1, op2) => context.Multiply(op1, op2), !op.U);
+ }
+ }
+
+ public static void Vpadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FaddpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Addps);
+ }
+ else
+ {
+ EmitVectorPairwiseOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPAddFpscr), op1, op2));
+ }
+ }
+
+ public static void Vpadd_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp32(context, X86PaddInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U);
+ }
+ }
+
+ public static void Vpaddl(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ EmitVectorPairwiseLongOpI32(context, (op1, op2) => context.Add(op1, op2), (op.Opc & 1) == 0);
+ }
+
+ public static void Vpmax_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FmaxpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Maxps);
+ }
+ else
+ {
+ EmitVectorPairwiseOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat64.FPMaxFpscr), op1, op2));
+ }
+ }
+
+ public static void Vpmax_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp32(context, op.U ? X86PmaxuInstruction : X86PmaxsInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpI32(context, (op1, op2) =>
+ {
+ Operand greater = op.U ? context.ICompareGreaterUI(op1, op2) : context.ICompareGreater(op1, op2);
+ return context.ConditionalSelect(greater, op1, op2);
+ }, !op.U);
+ }
+ }
+
+ public static void Vpmin_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FminpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Minps);
+ }
+ else
+ {
+ EmitVectorPairwiseOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMinFpscr), op1, op2));
+ }
+ }
+
+ public static void Vpmin_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp32(context, op.U ? X86PminuInstruction : X86PminsInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpI32(context, (op1, op2) =>
+ {
+ Operand greater = op.U ? context.ICompareLessUI(op1, op2) : context.ICompareLess(op1, op2);
+ return context.ConditionalSelect(greater, op1, op2);
+ }, !op.U);
+ }
+ }
+
+ public static void Vqadd(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitSaturatingAddSubBinaryOp(context, add: true, !op.U);
+ }
+
+ public static void Vqdmulh(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+ int eSize = 8 << op.Size;
+
+ EmitVectorBinaryOpI32(context, (op1, op2) =>
+ {
+ if (op.Size == 2)
+ {
+ op1 = context.SignExtend32(OperandType.I64, op1);
+ op2 = context.SignExtend32(OperandType.I64, op2);
+ }
+
+ Operand res = context.Multiply(op1, op2);
+ res = context.ShiftRightSI(res, Const(eSize - 1));
+ res = EmitSatQ(context, res, eSize, signedSrc: true, signedDst: true);
+
+ if (op.Size == 2)
+ {
+ res = context.ConvertI64ToI32(res);
+ }
+
+ return res;
+ }, signed: true);
+ }
+
+ public static void Vqmovn(ArmEmitterContext context)
+ {
+ OpCode32SimdMovn op = (OpCode32SimdMovn)context.CurrOp;
+
+ bool signed = !op.Q;
+
+ EmitVectorUnaryNarrowOp32(context, (op1) => EmitSatQ(context, op1, 8 << op.Size, signed, signed), signed);
+ }
+
+ public static void Vqmovun(ArmEmitterContext context)
+ {
+ OpCode32SimdMovn op = (OpCode32SimdMovn)context.CurrOp;
+
+ EmitVectorUnaryNarrowOp32(context, (op1) => EmitSatQ(context, op1, 8 << op.Size, signedSrc: true, signedDst: false), signed: true);
+ }
+
+ public static void Vqsub(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitSaturatingAddSubBinaryOp(context, add: false, !op.U);
+ }
+
+ public static void Vrev(ArmEmitterContext context)
+ {
+ OpCode32SimdRev op = (OpCode32SimdRev)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ EmitVectorUnaryOpSimd32(context, (op1) =>
+ {
+ Operand mask;
+ switch (op.Size)
+ {
+ case 3:
+ // Rev64
+ switch (op.Opc)
+ {
+ case 0:
+ mask = X86GetElements(context, 0x08090a0b0c0d0e0fL, 0x0001020304050607L);
+ return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask);
+ case 1:
+ mask = X86GetElements(context, 0x09080b0a0d0c0f0eL, 0x0100030205040706L);
+ return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask);
+ case 2:
+ return context.AddIntrinsic(Intrinsic.X86Shufps, op1, op1, Const(1 | (0 << 2) | (3 << 4) | (2 << 6)));
+ }
+ break;
+ case 2:
+ // Rev32
+ switch (op.Opc)
+ {
+ case 0:
+ mask = X86GetElements(context, 0x0c0d0e0f_08090a0bL, 0x04050607_00010203L);
+ return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask);
+ case 1:
+ mask = X86GetElements(context, 0x0d0c0f0e_09080b0aL, 0x05040706_01000302L);
+ return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask);
+ }
+ break;
+ case 1:
+ // Rev16
+ mask = X86GetElements(context, 0x0e0f_0c0d_0a0b_0809L, 0x_0607_0405_0203_0001L);
+ return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask);
+ }
+
+ throw new InvalidOperationException("Invalid VREV Opcode + Size combo."); // Should be unreachable.
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpZx32(context, (op1) =>
+ {
+ switch (op.Opc)
+ {
+ case 0:
+ switch (op.Size) // Swap bytes.
+ {
+ case 1:
+ return InstEmitAluHelper.EmitReverseBytes16_32Op(context, op1);
+ case 2:
+ case 3:
+ return context.ByteSwap(op1);
+ }
+ break;
+ case 1:
+ switch (op.Size)
+ {
+ case 2:
+ return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffff0000)), Const(16)),
+ context.ShiftLeft(context.BitwiseAnd(op1, Const(0x0000ffff)), Const(16)));
+ case 3:
+ return context.BitwiseOr(
+ context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffff000000000000ul)), Const(48)),
+ context.ShiftLeft(context.BitwiseAnd(op1, Const(0x000000000000fffful)), Const(48))),
+ context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0x0000ffff00000000ul)), Const(16)),
+ context.ShiftLeft(context.BitwiseAnd(op1, Const(0x00000000ffff0000ul)), Const(16))));
+ }
+ break;
+ case 2:
+ // Swap upper and lower halves.
+ return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffffffff00000000ul)), Const(32)),
+ context.ShiftLeft(context.BitwiseAnd(op1, Const(0x00000000fffffffful)), Const(32)));
+ }
+
+ throw new InvalidOperationException("Invalid VREV Opcode + Size combo."); // Should be unreachable.
+ });
+ }
+ }
+
+ public static void Vrecpe(ArmEmitterContext context)
+ {
+ OpCode32SimdSqrte op = (OpCode32SimdSqrte)context.CurrOp;
+
+ if (op.F)
+ {
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrecpeV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitVectorUnaryOpF32(context, Intrinsic.X86Rcpps, 0);
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (op1) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPRecipEstimateFpscr), op1);
+ });
+ }
+ }
+ else
+ {
+ throw new NotImplementedException("Integer Vrecpe not currently implemented.");
+ }
+ }
+
+ public static void Vrecps(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrecpsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+ bool single = (op.Size & 1) == 0;
+
+ // (2 - (n*m))
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ if (single)
+ {
+ Operand maskTwo = X86GetAllElements(context, 2f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+
+ return context.AddIntrinsic(Intrinsic.X86Subps, maskTwo, res);
+ }
+ else
+ {
+ Operand maskTwo = X86GetAllElements(context, 2d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+
+ return context.AddIntrinsic(Intrinsic.X86Subpd, maskTwo, res);
+ }
+ });
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipStep), op1, op2);
+ });
+ }
+ }
+
+ public static void Vrhadd(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitVectorBinaryOpI32(context, (op1, op2) =>
+ {
+ if (op.Size == 2)
+ {
+ op1 = context.ZeroExtend32(OperandType.I64, op1);
+ op2 = context.ZeroExtend32(OperandType.I64, op2);
+ }
+
+ Operand res = context.Add(context.Add(op1, op2), Const(op1.Type, 1L));
+ res = context.ShiftRightUI(res, Const(1));
+
+ if (op.Size == 2)
+ {
+ res = context.ConvertI64ToI32(res);
+ }
+
+ return res;
+ }, !op.U);
+ }
+
+ public static void Vrsqrte(ArmEmitterContext context)
+ {
+ OpCode32SimdSqrte op = (OpCode32SimdSqrte)context.CurrOp;
+
+ if (op.F)
+ {
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrsqrteV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitVectorUnaryOpF32(context, Intrinsic.X86Rsqrtps, 0);
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (op1) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPRSqrtEstimateFpscr), op1);
+ });
+ }
+ }
+ else
+ {
+ throw new NotImplementedException("Integer Vrsqrte not currently implemented.");
+ }
+ }
+
+ public static void Vrsqrts(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrsqrtsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+ bool single = (op.Size & 1) == 0;
+
+ // (3 - (n*m)) / 2
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ if (single)
+ {
+ Operand maskHalf = X86GetAllElements(context, 0.5f);
+ Operand maskThree = X86GetAllElements(context, 3f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res);
+ return context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf, res);
+ }
+ else
+ {
+ Operand maskHalf = X86GetAllElements(context, 0.5d);
+ Operand maskThree = X86GetAllElements(context, 3d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res);
+ return context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf, res);
+ }
+ });
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtStep), op1, op2);
+ });
+ }
+ }
+
+ public static void Vsel(ArmEmitterContext context)
+ {
+ OpCode32SimdSel op = (OpCode32SimdSel)context.CurrOp;
+
+ Operand condition = default;
+
+ switch (op.Cc)
+ {
+ case OpCode32SimdSelMode.Eq:
+ condition = GetCondTrue(context, Condition.Eq);
+ break;
+ case OpCode32SimdSelMode.Ge:
+ condition = GetCondTrue(context, Condition.Ge);
+ break;
+ case OpCode32SimdSelMode.Gt:
+ condition = GetCondTrue(context, Condition.Gt);
+ break;
+ case OpCode32SimdSelMode.Vs:
+ condition = GetCondTrue(context, Condition.Vs);
+ break;
+ }
+
+ EmitScalarBinaryOpI32(context, (op1, op2) =>
+ {
+ return context.ConditionalSelect(condition, op1, op2);
+ });
+ }
+
+ public static void Vsqrt_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FsqrtS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarUnaryOpF32(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd);
+ }
+ else
+ {
+ EmitScalarUnaryOpF32(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSqrt), op1);
+ });
+ }
+ }
+
+ public static void Vsub_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FsubS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF32(context, Intrinsic.X86Subss, Intrinsic.X86Subsd);
+ }
+ else
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Vsub_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FsubV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF32(context, Intrinsic.X86Subps, Intrinsic.X86Subpd);
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Vsub_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+ EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PsubInstruction[op.Size], op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Vsubl_I(ArmEmitterContext context)
+ {
+ OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp;
+
+ EmitVectorBinaryLongOpI32(context, (op1, op2) => context.Subtract(op1, op2), !op.U);
+ }
+
+ public static void Vsubw_I(ArmEmitterContext context)
+ {
+ OpCode32SimdRegWide op = (OpCode32SimdRegWide)context.CurrOp;
+
+ EmitVectorBinaryWideOpI32(context, (op1, op2) => context.Subtract(op1, op2), !op.U);
+ }
+
+ private static void EmitSaturatingAddSubBinaryOp(ArmEmitterContext context, bool add, bool signed)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ EmitVectorBinaryOpI32(context, (ne, me) =>
+ {
+ if (op.Size <= 2)
+ {
+ if (op.Size == 2)
+ {
+ ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne);
+ me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me);
+ }
+
+ Operand res = add ? context.Add(ne, me) : context.Subtract(ne, me);
+
+ res = EmitSatQ(context, res, 8 << op.Size, signedSrc: true, signed);
+
+ if (op.Size == 2)
+ {
+ res = context.ConvertI64ToI32(res);
+ }
+
+ return res;
+ }
+ else if (add) /* if (op.Size == 3) */
+ {
+ return signed
+ ? EmitBinarySignedSatQAdd(context, ne, me)
+ : EmitBinaryUnsignedSatQAdd(context, ne, me);
+ }
+ else /* if (sub) */
+ {
+ return signed
+ ? EmitBinarySignedSatQSub(context, ne, me)
+ : EmitBinaryUnsignedSatQSub(context, ne, me);
+ }
+ }, signed);
+ }
+
+ private static void EmitSse41MaxMinNumOpF32(ArmEmitterContext context, bool isMaxNum, bool scalar)
+ {
+ IOpCode32Simd op = (IOpCode32Simd)context.CurrOp;
+
+ Func<Operand, Operand, Operand> genericEmit = (n, m) =>
+ {
+ Operand nNum = context.Copy(n);
+ Operand mNum = context.Copy(m);
+
+ InstEmit.EmitSse2VectorIsNaNOpF(context, nNum, out Operand nQNaNMask, out _, isQNaN: true);
+ InstEmit.EmitSse2VectorIsNaNOpF(context, mNum, out Operand mQNaNMask, out _, isQNaN: true);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand negInfMask = X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity);
+
+ Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnps, mQNaNMask, nQNaNMask);
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnps, nQNaNMask, mQNaNMask);
+
+ nNum = context.AddIntrinsic(Intrinsic.X86Blendvps, nNum, negInfMask, nMask);
+ mNum = context.AddIntrinsic(Intrinsic.X86Blendvps, mNum, negInfMask, mMask);
+
+ return context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxps : Intrinsic.X86Minps, nNum, mNum);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand negInfMask = X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity);
+
+ Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnpd, mQNaNMask, nQNaNMask);
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnpd, nQNaNMask, mQNaNMask);
+
+ nNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, nNum, negInfMask, nMask);
+ mNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, mNum, negInfMask, mMask);
+
+ return context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, nNum, mNum);
+ }
+ };
+
+ if (scalar)
+ {
+ EmitScalarBinaryOpSimd32(context, genericEmit);
+ }
+ else
+ {
+ EmitVectorBinaryOpSimd32(context, genericEmit);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCmp.cs b/src/ARMeilleure/Instructions/InstEmitSimdCmp.cs
new file mode 100644
index 00000000..c32b64ba
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdCmp.cs
@@ -0,0 +1,799 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func2I = Func<Operand, Operand, Operand>;
+
+ static partial class InstEmit
+ {
+ public static void Cmeq_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareEqual(op1, op2), scalar: true);
+ }
+
+ public static void Cmeq_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ m = GetVec(binOp.Rm);
+ }
+ else
+ {
+ m = context.VectorZero();
+ }
+
+ Intrinsic cmpInst = X86PcmpeqInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareEqual(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmge_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqual(op1, op2), scalar: true);
+ }
+
+ public static void Cmge_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ m = GetVec(binOp.Rm);
+ }
+ else
+ {
+ m = context.VectorZero();
+ }
+
+ Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, m, n);
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqual(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmgt_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreater(op1, op2), scalar: true);
+ }
+
+ public static void Cmgt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ m = GetVec(binOp.Rm);
+ }
+ else
+ {
+ m = context.VectorZero();
+ }
+
+ Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreater(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmhi_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterUI(op1, op2), scalar: true);
+ }
+
+ public static void Cmhi_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 3)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic maxInst = X86PmaxuInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(maxInst, m, n);
+
+ Intrinsic cmpInst = X86PcmpeqInstruction[op.Size];
+
+ res = context.AddIntrinsic(cmpInst, res, m);
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterUI(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmhs_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqualUI(op1, op2), scalar: true);
+ }
+
+ public static void Cmhs_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 3)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic maxInst = X86PmaxuInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(maxInst, n, m);
+
+ Intrinsic cmpInst = X86PcmpeqInstruction[op.Size];
+
+ res = context.AddIntrinsic(cmpInst, res, n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqualUI(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmle_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareLessOrEqual(op1, op2), scalar: true);
+ }
+
+ public static void Cmle_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, n, context.VectorZero());
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareLessOrEqual(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmlt_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareLess(op1, op2), scalar: true);
+ }
+
+ public static void Cmlt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, context.VectorZero(), n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareLess(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmtst_S(ArmEmitterContext context)
+ {
+ EmitCmtstOp(context, scalar: true);
+ }
+
+ public static void Cmtst_V(ArmEmitterContext context)
+ {
+ EmitCmtstOp(context, scalar: false);
+ }
+
+ public static void Facge_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThanOrEqual, scalar: true, absolute: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: true, absolute: true);
+ }
+ }
+
+ public static void Facge_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThanOrEqual, scalar: false, absolute: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: false, absolute: true);
+ }
+ }
+
+ public static void Facgt_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThan, scalar: true, absolute: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: true, absolute: true);
+ }
+ }
+
+ public static void Facgt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThan, scalar: false, absolute: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: false, absolute: true);
+ }
+ }
+
+ public static void Fccmp_S(ArmEmitterContext context)
+ {
+ EmitFccmpOrFccmpe(context, signalNaNs: false);
+ }
+
+ public static void Fccmpe_S(ArmEmitterContext context)
+ {
+ EmitFccmpOrFccmpe(context, signalNaNs: true);
+ }
+
+ public static void Fcmeq_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.Equal, scalar: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareEQ), scalar: true);
+ }
+ }
+
+ public static void Fcmeq_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.Equal, scalar: false);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareEQ), scalar: false);
+ }
+ }
+
+ public static void Fcmge_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThanOrEqual, scalar: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: true);
+ }
+ }
+
+ public static void Fcmge_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThanOrEqual, scalar: false);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: false);
+ }
+ }
+
+ public static void Fcmgt_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThan, scalar: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: true);
+ }
+ }
+
+ public static void Fcmgt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThan, scalar: false);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: false);
+ }
+ }
+
+ public static void Fcmle_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.LessThanOrEqual, scalar: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLE), scalar: true);
+ }
+ }
+
+ public static void Fcmle_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.LessThanOrEqual, scalar: false);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLE), scalar: false);
+ }
+ }
+
+ public static void Fcmlt_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.LessThan, scalar: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLT), scalar: true);
+ }
+ }
+
+ public static void Fcmlt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.LessThan, scalar: false);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLT), scalar: false);
+ }
+ }
+
+ public static void Fcmp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitFcmpOrFcmpe(context, signalNaNs: false);
+ }
+ else
+ {
+ EmitFcmpOrFcmpe(context, signalNaNs: false);
+ }
+ }
+
+ public static void Fcmpe_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitFcmpOrFcmpe(context, signalNaNs: true);
+ }
+ else
+ {
+ EmitFcmpOrFcmpe(context, signalNaNs: true);
+ }
+ }
+
+ private static void EmitFccmpOrFccmpe(ArmEmitterContext context, bool signalNaNs)
+ {
+ OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp;
+
+ Operand lblTrue = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblTrue, InstEmitFlowHelper.GetCondTrue(context, op.Cond));
+
+ EmitSetNzcv(context, op.Nzcv);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblTrue);
+
+ EmitFcmpOrFcmpe(context, signalNaNs);
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitSetNzcv(ArmEmitterContext context, int nzcv)
+ {
+ Operand Extract(int value, int bit)
+ {
+ if (bit != 0)
+ {
+ value >>= bit;
+ }
+
+ value &= 1;
+
+ return Const(value);
+ }
+
+ SetFlag(context, PState.VFlag, Extract(nzcv, 0));
+ SetFlag(context, PState.CFlag, Extract(nzcv, 1));
+ SetFlag(context, PState.ZFlag, Extract(nzcv, 2));
+ SetFlag(context, PState.NFlag, Extract(nzcv, 3));
+ }
+
+ private static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false;
+
+ if (Optimizations.FastFP && (signalNaNs ? Optimizations.UseAvx : Optimizations.UseSse2))
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = cmpWithZero ? context.VectorZero() : GetVec(op.Rm);
+
+ CmpCondition cmpOrdered = signalNaNs ? CmpCondition.OrderedS : CmpCondition.OrderedQ;
+
+ Operand lblNaN = Label();
+ Operand lblEnd = Label();
+
+ if (op.Size == 0)
+ {
+ Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const((int)cmpOrdered));
+
+ Operand isOrdered = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, ordMask);
+
+ context.BranchIfFalse(lblNaN, isOrdered);
+
+ Operand nCopy = context.Copy(n);
+ Operand mCopy = cmpWithZero ? context.VectorZero() : context.Copy(m);
+
+ Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, nCopy, mCopy);
+ Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, nCopy, mCopy);
+ Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, nCopy, mCopy);
+
+ SetFlag(context, PState.VFlag, Const(0));
+ SetFlag(context, PState.CFlag, cf);
+ SetFlag(context, PState.ZFlag, zf);
+ SetFlag(context, PState.NFlag, nf);
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const((int)cmpOrdered));
+
+ Operand isOrdered = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, ordMask);
+
+ context.BranchIfFalse(lblNaN, isOrdered);
+
+ Operand nCopy = context.Copy(n);
+ Operand mCopy = cmpWithZero ? context.VectorZero() : context.Copy(m);
+
+ Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, nCopy, mCopy);
+ Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, nCopy, mCopy);
+ Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, nCopy, mCopy);
+
+ SetFlag(context, PState.VFlag, Const(0));
+ SetFlag(context, PState.CFlag, cf);
+ SetFlag(context, PState.ZFlag, zf);
+ SetFlag(context, PState.NFlag, nf);
+ }
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblNaN);
+
+ SetFlag(context, PState.VFlag, Const(1));
+ SetFlag(context, PState.CFlag, Const(1));
+ SetFlag(context, PState.ZFlag, Const(0));
+ SetFlag(context, PState.NFlag, Const(0));
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ OperandType type = op.Size != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand me;
+
+ if (cmpWithZero)
+ {
+ me = op.Size == 0 ? ConstF(0f) : ConstF(0d);
+ }
+ else
+ {
+ me = context.VectorExtract(type, GetVec(op.Rm), 0);
+ }
+
+ Operand nzcv = EmitSoftFloatCall(context, nameof(SoftFloat32.FPCompare), ne, me, Const(signalNaNs));
+
+ EmitSetNzcv(context, nzcv);
+ }
+ }
+
+ private static void EmitSetNzcv(ArmEmitterContext context, Operand nzcv)
+ {
+ Operand Extract(Operand value, int bit)
+ {
+ if (bit != 0)
+ {
+ value = context.ShiftRightUI(value, Const(bit));
+ }
+
+ value = context.BitwiseAnd(value, Const(1));
+
+ return value;
+ }
+
+ SetFlag(context, PState.VFlag, Extract(nzcv, 0));
+ SetFlag(context, PState.CFlag, Extract(nzcv, 1));
+ SetFlag(context, PState.ZFlag, Extract(nzcv, 2));
+ SetFlag(context, PState.NFlag, Extract(nzcv, 3));
+ }
+
+ private static void EmitCmpOp(ArmEmitterContext context, Func2I emitCmp, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size));
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ me = EmitVectorExtractSx(context, binOp.Rm, index, op.Size);
+ }
+ else
+ {
+ me = Const(0L);
+ }
+
+ Operand isTrue = emitCmp(ne, me);
+
+ Operand mask = context.ConditionalSelect(isTrue, Const(szMask), Const(0L));
+
+ res = EmitVectorInsert(context, res, mask, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitCmtstOp(ArmEmitterContext context, bool scalar)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size));
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ Operand test = context.BitwiseAnd(ne, me);
+
+ Operand isTrue = context.ICompareNotEqual(test, Const(0L));
+
+ Operand mask = context.ConditionalSelect(isTrue, Const(szMask), Const(0L));
+
+ res = EmitVectorInsert(context, res, mask, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitCmpOpF(ArmEmitterContext context, string name, bool scalar, bool absolute = false)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = !scalar ? op.GetBytesCount() >> sizeF + 2 : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ me = context.VectorExtract(type, GetVec(binOp.Rm), index);
+ }
+ else
+ {
+ me = sizeF == 0 ? ConstF(0f) : ConstF(0d);
+ }
+
+ if (absolute)
+ {
+ ne = EmitUnaryMathCall(context, nameof(Math.Abs), ne);
+ me = EmitUnaryMathCall(context, nameof(Math.Abs), me);
+ }
+
+ Operand e = EmitSoftFloatCall(context, name, ne, me);
+
+ res = context.VectorInsert(res, e, index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitSse2OrAvxCmpOpF(ArmEmitterContext context, CmpCondition cond, bool scalar, bool absolute = false)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = op is OpCodeSimdReg binOp ? GetVec(binOp.Rm) : context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ if (absolute)
+ {
+ Operand mask = scalar ? X86GetScalar(context, int.MaxValue) : X86GetAllElements(context, int.MaxValue);
+
+ n = context.AddIntrinsic(Intrinsic.X86Andps, n, mask);
+ m = context.AddIntrinsic(Intrinsic.X86Andps, m, mask);
+ }
+
+ Intrinsic inst = scalar ? Intrinsic.X86Cmpss : Intrinsic.X86Cmpps;
+
+ Operand res = context.AddIntrinsic(inst, n, m, Const((int)cond));
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ if (absolute)
+ {
+ Operand mask = scalar ? X86GetScalar(context, long.MaxValue) : X86GetAllElements(context, long.MaxValue);
+
+ n = context.AddIntrinsic(Intrinsic.X86Andpd, n, mask);
+ m = context.AddIntrinsic(Intrinsic.X86Andpd, m, mask);
+ }
+
+ Intrinsic inst = scalar ? Intrinsic.X86Cmpsd : Intrinsic.X86Cmppd;
+
+ Operand res = context.AddIntrinsic(inst, n, m, Const((int)cond));
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCmp32.cs b/src/ARMeilleure/Instructions/InstEmitSimdCmp32.cs
new file mode 100644
index 00000000..a990e057
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdCmp32.cs
@@ -0,0 +1,437 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func2I = Func<Operand, Operand, Operand>;
+
+ static partial class InstEmit32
+ {
+ public static void Vceq_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.Equal, false);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, false);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareEQFpscr), false);
+ }
+ }
+
+ public static void Vceq_I(ArmEmitterContext context)
+ {
+ EmitCmpOpI32(context, context.ICompareEqual, context.ICompareEqual, false, false);
+ }
+
+ public static void Vceq_Z(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.Equal, true);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, true);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareEQFpscr), true);
+ }
+ }
+ else
+ {
+ EmitCmpOpI32(context, context.ICompareEqual, context.ICompareEqual, true, false);
+ }
+ }
+
+ public static void Vcge_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareGEFpscr), false);
+ }
+ }
+
+ public static void Vcge_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitCmpOpI32(context, context.ICompareGreaterOrEqual, context.ICompareGreaterOrEqualUI, false, !op.U);
+ }
+
+ public static void Vcge_Z(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareGEFpscr), true);
+ }
+ }
+ else
+ {
+ EmitCmpOpI32(context, context.ICompareGreaterOrEqual, context.ICompareGreaterOrEqualUI, true, true);
+ }
+ }
+
+ public static void Vcgt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThan, false);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, false);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareGTFpscr), false);
+ }
+ }
+
+ public static void Vcgt_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitCmpOpI32(context, context.ICompareGreater, context.ICompareGreaterUI, false, !op.U);
+ }
+
+ public static void Vcgt_Z(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThan, true);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, true);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareGTFpscr), true);
+ }
+ }
+ else
+ {
+ EmitCmpOpI32(context, context.ICompareGreater, context.ICompareGreaterUI, true, true);
+ }
+ }
+
+ public static void Vcle_Z(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.LessThanOrEqual, true);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThanOrEqual, true);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareLEFpscr), true);
+ }
+ }
+ else
+ {
+ EmitCmpOpI32(context, context.ICompareLessOrEqual, context.ICompareLessOrEqualUI, true, true);
+ }
+ }
+
+ public static void Vclt_Z(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.LessThan, true);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThan, true);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareLTFpscr), true);
+ }
+ }
+ else
+ {
+ EmitCmpOpI32(context, context.ICompareLess, context.ICompareLessUI, true, true);
+ }
+ }
+
+ private static void EmitCmpOpF32(ArmEmitterContext context, string name, bool zero)
+ {
+ if (zero)
+ {
+ EmitVectorUnaryOpF32(context, (m) =>
+ {
+ Operand zeroOp = m.Type == OperandType.FP64 ? ConstF(0.0d) : ConstF(0.0f);
+
+ return EmitSoftFloatCallDefaultFpscr(context, name, m, zeroOp);
+ });
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (n, m) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, name, n, m);
+ });
+ }
+ }
+
+ private static Operand ZerosOrOnes(ArmEmitterContext context, Operand fromBool, OperandType baseType)
+ {
+ var ones = (baseType == OperandType.I64) ? Const(-1L) : Const(-1);
+
+ return context.ConditionalSelect(fromBool, ones, Const(baseType, 0L));
+ }
+
+ private static void EmitCmpOpI32(
+ ArmEmitterContext context,
+ Func2I signedOp,
+ Func2I unsignedOp,
+ bool zero,
+ bool signed)
+ {
+ if (zero)
+ {
+ if (signed)
+ {
+ EmitVectorUnaryOpSx32(context, (m) =>
+ {
+ OperandType type = m.Type;
+ Operand zeroV = (type == OperandType.I64) ? Const(0L) : Const(0);
+
+ return ZerosOrOnes(context, signedOp(m, zeroV), type);
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpZx32(context, (m) =>
+ {
+ OperandType type = m.Type;
+ Operand zeroV = (type == OperandType.I64) ? Const(0L) : Const(0);
+
+ return ZerosOrOnes(context, unsignedOp(m, zeroV), type);
+ });
+ }
+ }
+ else
+ {
+ if (signed)
+ {
+ EmitVectorBinaryOpSx32(context, (n, m) => ZerosOrOnes(context, signedOp(n, m), n.Type));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (n, m) => ZerosOrOnes(context, unsignedOp(n, m), n.Type));
+ }
+ }
+ }
+
+ public static void Vcmp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVcmpOrVcmpe(context, false);
+ }
+ else
+ {
+ EmitVcmpOrVcmpe(context, false);
+ }
+ }
+
+ public static void Vcmpe(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVcmpOrVcmpe(context, true);
+ }
+ else
+ {
+ EmitVcmpOrVcmpe(context, true);
+ }
+ }
+
+ private static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ bool cmpWithZero = (op.Opc & 2) != 0;
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && (signalNaNs ? Optimizations.UseAvx : Optimizations.UseSse2))
+ {
+ CmpCondition cmpOrdered = signalNaNs ? CmpCondition.OrderedS : CmpCondition.OrderedQ;
+
+ bool doubleSize = sizeF != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand n = GetVecA32(op.Vd >> shift);
+
+ n = EmitSwapScalar(context, n, op.Vd, doubleSize);
+ m = cmpWithZero ? context.VectorZero() : EmitSwapScalar(context, m, op.Vm, doubleSize);
+
+ Operand lblNaN = Label();
+ Operand lblEnd = Label();
+
+ if (!doubleSize)
+ {
+ Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const((int)cmpOrdered));
+
+ Operand isOrdered = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, ordMask);
+
+ context.BranchIfFalse(lblNaN, isOrdered);
+
+ Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, n, m);
+ Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, n, m);
+ Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, n, m);
+
+ SetFpFlag(context, FPState.VFlag, Const(0));
+ SetFpFlag(context, FPState.CFlag, cf);
+ SetFpFlag(context, FPState.ZFlag, zf);
+ SetFpFlag(context, FPState.NFlag, nf);
+ }
+ else
+ {
+ Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const((int)cmpOrdered));
+
+ Operand isOrdered = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, ordMask);
+
+ context.BranchIfFalse(lblNaN, isOrdered);
+
+ Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, n, m);
+ Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, n, m);
+ Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, n, m);
+
+ SetFpFlag(context, FPState.VFlag, Const(0));
+ SetFpFlag(context, FPState.CFlag, cf);
+ SetFpFlag(context, FPState.ZFlag, zf);
+ SetFpFlag(context, FPState.NFlag, nf);
+ }
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblNaN);
+
+ SetFpFlag(context, FPState.VFlag, Const(1));
+ SetFpFlag(context, FPState.CFlag, Const(1));
+ SetFpFlag(context, FPState.ZFlag, Const(0));
+ SetFpFlag(context, FPState.NFlag, Const(0));
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand ne = ExtractScalar(context, type, op.Vd);
+ Operand me;
+
+ if (cmpWithZero)
+ {
+ me = sizeF == 0 ? ConstF(0f) : ConstF(0d);
+ }
+ else
+ {
+ me = ExtractScalar(context, type, op.Vm);
+ }
+
+ Operand nzcv = EmitSoftFloatCall(context, nameof(SoftFloat32.FPCompare), ne, me, Const(signalNaNs));
+
+ EmitSetFpscrNzcv(context, nzcv);
+ }
+ }
+
+ private static void EmitSetFpscrNzcv(ArmEmitterContext context, Operand nzcv)
+ {
+ Operand Extract(Operand value, int bit)
+ {
+ if (bit != 0)
+ {
+ value = context.ShiftRightUI(value, Const(bit));
+ }
+
+ value = context.BitwiseAnd(value, Const(1));
+
+ return value;
+ }
+
+ SetFpFlag(context, FPState.VFlag, Extract(nzcv, 0));
+ SetFpFlag(context, FPState.CFlag, Extract(nzcv, 1));
+ SetFpFlag(context, FPState.ZFlag, Extract(nzcv, 2));
+ SetFpFlag(context, FPState.NFlag, Extract(nzcv, 3));
+ }
+
+ private static void EmitSse2OrAvxCmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+ Intrinsic inst = (sizeF == 0) ? Intrinsic.X86Cmpps : Intrinsic.X86Cmppd;
+
+ if (zero)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ return context.AddIntrinsic(inst, m, context.VectorZero(), Const((int)cond));
+ });
+ }
+ else
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ return context.AddIntrinsic(inst, n, m, Const((int)cond));
+ });
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCrypto.cs b/src/ARMeilleure/Instructions/InstEmitSimdCrypto.cs
new file mode 100644
index 00000000..db24e029
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdCrypto.cs
@@ -0,0 +1,99 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Aesd_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand res;
+
+ if (Optimizations.UseAesni)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Decrypt)), d, n);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void Aese_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand res;
+
+ if (Optimizations.UseAesni)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Encrypt)), d, n);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void Aesimc_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res;
+
+ if (Optimizations.UseAesni)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.InverseMixColumns)), n);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Aesmc_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res;
+
+ if (Optimizations.UseAesni)
+ {
+ Operand roundKey = context.VectorZero();
+
+ // Inverse Shift Rows, Inverse Sub Bytes, xor 0 so nothing happens
+ res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, n, roundKey);
+
+ // Shift Rows, Sub Bytes, Mix Columns (!), xor 0 so nothing happens
+ res = context.AddIntrinsic(Intrinsic.X86Aesenc, res, roundKey);
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.MixColumns)), n);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs b/src/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs
new file mode 100644
index 00000000..f713a388
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs
@@ -0,0 +1,99 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ partial class InstEmit32
+ {
+ public static void Aesd_V(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand d = GetVecA32(op.Qd);
+ Operand n = GetVecA32(op.Qm);
+
+ Operand res;
+
+ if (Optimizations.UseAesni)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Decrypt)), d, n);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void Aese_V(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand d = GetVecA32(op.Qd);
+ Operand n = GetVecA32(op.Qm);
+
+ Operand res;
+
+ if (Optimizations.UseAesni)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Encrypt)), d, n);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void Aesimc_V(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qm);
+
+ Operand res;
+
+ if (Optimizations.UseAesni)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.InverseMixColumns)), n);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Aesmc_V(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qm);
+
+ Operand res;
+
+ if (Optimizations.UseAesni)
+ {
+ Operand roundKey = context.VectorZero();
+
+ // Inverse Shift Rows, Inverse Sub Bytes, xor 0 so nothing happens.
+ res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, n, roundKey);
+
+ // Shift Rows, Sub Bytes, Mix Columns (!), xor 0 so nothing happens.
+ res = context.AddIntrinsic(Intrinsic.X86Aesenc, res, roundKey);
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.MixColumns)), n);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCvt.cs b/src/ARMeilleure/Instructions/InstEmitSimdCvt.cs
new file mode 100644
index 00000000..652ad397
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdCvt.cs
@@ -0,0 +1,1891 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func1I = Func<Operand, Operand>;
+
+ static partial class InstEmit
+ {
+ public static void Fcvt_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (op.Size == 0 && op.Opc == 1) // Single -> Double.
+ {
+ if (Optimizations.UseSse2)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), n);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
+
+ Operand res = context.ConvertToFP(OperandType.FP64, ne);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+ else if (op.Size == 1 && op.Opc == 0) // Double -> Single.
+ {
+ if (Optimizations.UseSse2)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0);
+
+ Operand res = context.ConvertToFP(OperandType.FP32, ne);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+ else if (op.Size == 0 && op.Opc == 3) // Single -> Half.
+ {
+ if (Optimizations.UseF16c)
+ {
+ Debug.Assert(!Optimizations.ForceLegacySse);
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, n, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
+ res = context.AddIntrinsic(Intrinsic.X86Pslldq, res, Const(14)); // VectorZeroUpper112()
+ res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(14));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
+
+ context.StoreToContext();
+ Operand res = context.Call(typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert)), ne);
+ context.LoadFromContext();
+
+ res = context.ZeroExtend16(OperandType.I64, res);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1));
+ }
+ }
+ else if (op.Size == 3 && op.Opc == 0) // Half -> Single.
+ {
+ if (Optimizations.UseF16c)
+ {
+ Debug.Assert(!Optimizations.ForceLegacySse);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, GetVec(op.Rn));
+ res = context.VectorZeroUpper96(res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1);
+
+ context.StoreToContext();
+ Operand res = context.Call(typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)), ne);
+ context.LoadFromContext();
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+ else if (op.Size == 1 && op.Opc == 3) // Double -> Half.
+ {
+ if (Optimizations.UseF16c)
+ {
+ Debug.Assert(!Optimizations.ForceLegacySse);
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n);
+ res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, res, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0);
+
+ context.StoreToContext();
+ Operand res = context.Call(typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert)), ne);
+ context.LoadFromContext();
+
+ res = context.ZeroExtend16(OperandType.I64, res);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1));
+ }
+ }
+ else if (op.Size == 3 && op.Opc == 1) // Half -> Double.
+ {
+ if (Optimizations.UseF16c)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, GetVec(op.Rn));
+ res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), res);
+ res = context.VectorZeroUpper64(res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1);
+
+ context.StoreToContext();
+ Operand res = context.Call(typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert)), ne);
+ context.LoadFromContext();
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+ else // Invalid encoding.
+ {
+ Debug.Assert(false, $"type == {op.Size} && opc == {op.Opc}");
+ }
+ }
+
+ public static void Fcvtas_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtasGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
+ }
+ }
+
+ public static void Fcvtas_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtasS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: true);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: true);
+ }
+ }
+
+ public static void Fcvtas_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtasS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: false);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtau_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtauGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_u_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
+ }
+ }
+
+ public static void Fcvtau_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtauS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: true);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: true);
+ }
+ }
+
+ public static void Fcvtau_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtauV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: false);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: false);
+ }
+ }
+
+ public static void Fcvtl_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtlV);
+ }
+ else if (Optimizations.UseSse2 && sizeF == 1)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Operand res = op.RegisterSize == RegisterSize.Simd128 ? context.AddIntrinsic(Intrinsic.X86Movhlps, n, n) : n;
+ res = context.AddIntrinsic(Intrinsic.X86Cvtps2pd, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else if (Optimizations.UseF16c && sizeF == 0)
+ {
+ Debug.Assert(!Optimizations.ForceLegacySse);
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res = op.RegisterSize == RegisterSize.Simd128 ? context.AddIntrinsic(Intrinsic.X86Movhlps, n, n) : n;
+ res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int elems = 4 >> sizeF;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ if (sizeF == 0)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, part + index, 1);
+
+ context.StoreToContext();
+ Operand e = context.Call(typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)), ne);
+ context.LoadFromContext();
+
+ res = context.VectorInsert(res, e, index);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), part + index);
+
+ Operand e = context.ConvertToFP(OperandType.FP64, ne);
+
+ res = context.VectorInsert(res, e, index);
+ }
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Fcvtms_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmsGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1));
+ }
+ }
+
+ public static void Fcvtms_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtmsV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsMinusInfinity, scalar: false);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1), signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtmu_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmuGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1));
+ }
+ }
+
+ public static void Fcvtn_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpFRd(context, Intrinsic.Arm64FcvtnV);
+ }
+ else if (Optimizations.UseSse2 && sizeF == 1)
+ {
+ Operand d = GetVec(op.Rd);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 ? Intrinsic.X86Movlhps : Intrinsic.X86Movhlps;
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtpd2ps, GetVec(op.Rn));
+ nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt);
+
+ Operand res = context.VectorZeroUpper64(d);
+ res = context.AddIntrinsic(movInst, res, nInt);
+
+ context.Copy(d, res);
+ }
+ else if (Optimizations.UseF16c && sizeF == 0)
+ {
+ Debug.Assert(!Optimizations.ForceLegacySse);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 ? Intrinsic.X86Movlhps : Intrinsic.X86Movhlps;
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, n, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
+ nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt);
+
+ Operand res = context.VectorZeroUpper64(d);
+ res = context.AddIntrinsic(movInst, res, nInt);
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ int elems = 4 >> sizeF;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+
+ if (sizeF == 0)
+ {
+ context.StoreToContext();
+ Operand e = context.Call(typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert)), ne);
+ context.LoadFromContext();
+
+ res = EmitVectorInsert(context, res, e, part + index, 1);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand e = context.ConvertToFP(OperandType.FP32, ne);
+
+ res = context.VectorInsert(res, e, part + index);
+ }
+ }
+
+ context.Copy(d, res);
+ }
+ }
+
+ public static void Fcvtns_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtnsGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearest, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1));
+ }
+ }
+
+ public static void Fcvtns_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnsS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: true);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: true, scalar: true);
+ }
+ }
+
+ public static void Fcvtns_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnsV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: false);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtnu_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnuS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: true);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: false, scalar: true);
+ }
+ }
+
+ public static void Fcvtnu_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnuV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: false);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: false, scalar: false);
+ }
+ }
+
+ public static void Fcvtps_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpsGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Ceiling), op1));
+ }
+ }
+
+ public static void Fcvtpu_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpuGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Ceiling), op1));
+ }
+ }
+
+ public static void Fcvtzs_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzsGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_s_Gp(context, (op1) => op1);
+ }
+ }
+
+ public static void Fcvtzs_Gp_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzsGpFixed, op.FBits);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
+ }
+ else
+ {
+ EmitFcvtzs_Gp_Fixed(context);
+ }
+ }
+
+ public static void Fcvtzs_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzsS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: true);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: true, scalar: true);
+ }
+ }
+
+ public static void Fcvtzs_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzsV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtzs_V_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzsVFixed, GetFBits(context));
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtzu_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzuGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_u_Gp(context, (op1) => op1);
+ }
+ }
+
+ public static void Fcvtzu_Gp_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzuGpFixed, op.FBits);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
+ }
+ else
+ {
+ EmitFcvtzu_Gp_Fixed(context);
+ }
+ }
+
+ public static void Fcvtzu_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzuS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: true);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: false, scalar: true);
+ }
+ }
+
+ public static void Fcvtzu_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzuV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: false, scalar: false);
+ }
+ }
+
+ public static void Fcvtzu_V_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzuVFixed, GetFBits(context));
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: false, scalar: false);
+ }
+ }
+
+ public static void Scvtf_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64ScvtfGp);
+ }
+ else
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ res = context.SignExtend32(OperandType.I64, res);
+ }
+
+ res = EmitFPConvert(context, res, op.Size, signed: true);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+
+ public static void Scvtf_Gp_Fixed(ArmEmitterContext context)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64ScvtfGpFixed, op.FBits);
+ }
+ else
+ {
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ res = context.SignExtend32(OperandType.I64, res);
+ }
+
+ res = EmitFPConvert(context, res, op.Size, signed: true);
+
+ res = EmitI2fFBitsMul(context, res, op.FBits);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+
+ public static void Scvtf_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64ScvtfS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2ScvtfOp(context, scalar: true);
+ }
+ else
+ {
+ EmitCvtf(context, signed: true, scalar: true);
+ }
+ }
+
+ public static void Scvtf_S_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64ScvtfSFixed, GetFBits(context));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2ScvtfOp(context, scalar: true);
+ }
+ else
+ {
+ EmitCvtf(context, signed: true, scalar: true);
+ }
+ }
+
+ public static void Scvtf_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64ScvtfV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2ScvtfOp(context, scalar: false);
+ }
+ else
+ {
+ EmitCvtf(context, signed: true, scalar: false);
+ }
+ }
+
+ public static void Scvtf_V_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64ScvtfVFixed, GetFBits(context));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2ScvtfOp(context, scalar: false);
+ }
+ else
+ {
+ EmitCvtf(context, signed: true, scalar: false);
+ }
+ }
+
+ public static void Ucvtf_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64UcvtfGp);
+ }
+ else
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = EmitFPConvert(context, res, op.Size, signed: false);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+
+ public static void Ucvtf_Gp_Fixed(ArmEmitterContext context)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64UcvtfGpFixed, op.FBits);
+ }
+ else
+ {
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = EmitFPConvert(context, res, op.Size, signed: false);
+
+ res = EmitI2fFBitsMul(context, res, op.FBits);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+
+ public static void Ucvtf_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64UcvtfS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2UcvtfOp(context, scalar: true);
+ }
+ else
+ {
+ EmitCvtf(context, signed: false, scalar: true);
+ }
+ }
+
+ public static void Ucvtf_S_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64UcvtfSFixed, GetFBits(context));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2UcvtfOp(context, scalar: true);
+ }
+ else
+ {
+ EmitCvtf(context, signed: false, scalar: true);
+ }
+ }
+
+ public static void Ucvtf_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64UcvtfV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2UcvtfOp(context, scalar: false);
+ }
+ else
+ {
+ EmitCvtf(context, signed: false, scalar: false);
+ }
+ }
+
+ public static void Ucvtf_V_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64UcvtfVFixed, GetFBits(context));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2UcvtfOp(context, scalar: false);
+ }
+ else
+ {
+ EmitCvtf(context, signed: false, scalar: false);
+ }
+ }
+
+ private static void EmitFcvt(ArmEmitterContext context, Func1I emit, bool signed, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand n = GetVec(op.Rn);
+
+ int sizeF = op.Size & 1;
+ int sizeI = sizeF + 2;
+
+ OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, n, index);
+
+ Operand e = emit(ne);
+
+ if (sizeF == 0)
+ {
+ MethodInfo info = signed
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32));
+
+ e = context.Call(info, e);
+
+ e = context.ZeroExtend32(OperandType.I64, e);
+ }
+ else /* if (sizeF == 1) */
+ {
+ MethodInfo info = signed
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64));
+
+ e = context.Call(info, e);
+ }
+
+ res = EmitVectorInsert(context, res, e, index, sizeI);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitFcvtz(ArmEmitterContext context, bool signed, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand n = GetVec(op.Rn);
+
+ int sizeF = op.Size & 1;
+ int sizeI = sizeF + 2;
+
+ OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ int fBits = GetFBits(context);
+
+ int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, n, index);
+
+ Operand e = EmitF2iFBitsMul(context, ne, fBits);
+
+ if (sizeF == 0)
+ {
+ MethodInfo info = signed
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32));
+
+ e = context.Call(info, e);
+
+ e = context.ZeroExtend32(OperandType.I64, e);
+ }
+ else /* if (sizeF == 1) */
+ {
+ MethodInfo info = signed
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64));
+
+ e = context.Call(info, e);
+ }
+
+ res = EmitVectorInsert(context, res, e, index, sizeI);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitFcvt_s_Gp(ArmEmitterContext context, Func1I emit)
+ {
+ EmitFcvt___Gp(context, emit, signed: true);
+ }
+
+ private static void EmitFcvt_u_Gp(ArmEmitterContext context, Func1I emit)
+ {
+ EmitFcvt___Gp(context, emit, signed: false);
+ }
+
+ private static void EmitFcvt___Gp(ArmEmitterContext context, Func1I emit, bool signed)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ Operand res = signed
+ ? EmitScalarFcvts(context, emit(ne), 0)
+ : EmitScalarFcvtu(context, emit(ne), 0);
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ private static void EmitFcvtzs_Gp_Fixed(ArmEmitterContext context)
+ {
+ EmitFcvtz__Gp_Fixed(context, signed: true);
+ }
+
+ private static void EmitFcvtzu_Gp_Fixed(ArmEmitterContext context)
+ {
+ EmitFcvtz__Gp_Fixed(context, signed: false);
+ }
+
+ private static void EmitFcvtz__Gp_Fixed(ArmEmitterContext context, bool signed)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ Operand res = signed
+ ? EmitScalarFcvts(context, ne, op.FBits)
+ : EmitScalarFcvtu(context, ne, op.FBits);
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ private static void EmitCvtf(ArmEmitterContext context, bool signed, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+ int sizeI = sizeF + 2;
+
+ int fBits = GetFBits(context);
+
+ int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorLongExtract(context, op.Rn, index, sizeI);
+
+ Operand e = EmitFPConvert(context, ne, sizeF, signed);
+
+ e = EmitI2fFBitsMul(context, e, fBits);
+
+ res = context.VectorInsert(res, e, index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static int GetFBits(ArmEmitterContext context)
+ {
+ if (context.CurrOp is OpCodeSimdShImm op)
+ {
+ return GetImmShr(op);
+ }
+
+ return 0;
+ }
+
+ private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, int size, bool signed)
+ {
+ Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
+ Debug.Assert((uint)size < 2);
+
+ OperandType type = size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ if (signed)
+ {
+ return context.ConvertToFP(type, value);
+ }
+ else
+ {
+ return context.ConvertToFPUI(type, value);
+ }
+ }
+
+ private static Operand EmitScalarFcvts(ArmEmitterContext context, Operand value, int fBits)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+ value = EmitF2iFBitsMul(context, value, fBits);
+
+ MethodInfo info;
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ info = value.Type == OperandType.FP32
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS32));
+ }
+ else
+ {
+ info = value.Type == OperandType.FP32
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS64))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64));
+ }
+
+ return context.Call(info, value);
+ }
+
+ private static Operand EmitScalarFcvtu(ArmEmitterContext context, Operand value, int fBits)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+ value = EmitF2iFBitsMul(context, value, fBits);
+
+ MethodInfo info;
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ info = value.Type == OperandType.FP32
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU32));
+ }
+ else
+ {
+ info = value.Type == OperandType.FP32
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU64))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64));
+ }
+
+ return context.Call(info, value);
+ }
+
+ private static Operand EmitF2iFBitsMul(ArmEmitterContext context, Operand value, int fBits)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+ if (fBits == 0)
+ {
+ return value;
+ }
+
+ if (value.Type == OperandType.FP32)
+ {
+ return context.Multiply(value, ConstF(MathF.Pow(2f, fBits)));
+ }
+ else /* if (value.Type == OperandType.FP64) */
+ {
+ return context.Multiply(value, ConstF(Math.Pow(2d, fBits)));
+ }
+ }
+
+ private static Operand EmitI2fFBitsMul(ArmEmitterContext context, Operand value, int fBits)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+ if (fBits == 0)
+ {
+ return value;
+ }
+
+ if (value.Type == OperandType.FP32)
+ {
+ return context.Multiply(value, ConstF(1f / MathF.Pow(2f, fBits)));
+ }
+ else /* if (value.Type == OperandType.FP64) */
+ {
+ return context.Multiply(value, ConstF(1d / Math.Pow(2d, fBits)));
+ }
+ }
+
+ public static Operand EmitSse2CvtDoubleToInt64OpF(ArmEmitterContext context, Operand opF, bool scalar)
+ {
+ Debug.Assert(opF.Type == OperandType.V128);
+
+ Operand longL = context.AddIntrinsicLong (Intrinsic.X86Cvtsd2si, opF); // opFL
+ Operand res = context.VectorCreateScalar(longL);
+
+ if (!scalar)
+ {
+ Operand opFH = context.AddIntrinsic (Intrinsic.X86Movhlps, res, opF); // res doesn't matter.
+ Operand longH = context.AddIntrinsicLong (Intrinsic.X86Cvtsd2si, opFH);
+ Operand resH = context.VectorCreateScalar(longH);
+ res = context.AddIntrinsic (Intrinsic.X86Movlhps, res, resH);
+ }
+
+ return res;
+ }
+
+ private static Operand EmitSse2CvtInt64ToDoubleOp(ArmEmitterContext context, Operand op, bool scalar)
+ {
+ Debug.Assert(op.Type == OperandType.V128);
+
+ Operand longL = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, op); // opL
+ Operand res = context.AddIntrinsic (Intrinsic.X86Cvtsi2sd, context.VectorZero(), longL);
+
+ if (!scalar)
+ {
+ Operand opH = context.AddIntrinsic (Intrinsic.X86Movhlps, res, op); // res doesn't matter.
+ Operand longH = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, opH);
+ Operand resH = context.AddIntrinsic (Intrinsic.X86Cvtsi2sd, res, longH); // res doesn't matter.
+ res = context.AddIntrinsic (Intrinsic.X86Movlhps, res, resH);
+ }
+
+ return res;
+ }
+
+ private static void EmitSse2ScvtfOp(ArmEmitterContext context, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ // sizeF == ((OpCodeSimdShImm)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 - fBits * 0x800000;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar (context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, res, fpScaledMask);
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = EmitSse2CvtInt64ToDoubleOp(context, n, scalar);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int64BitsToDouble(fpScaled) == 1d / Math.Pow(2d, fBits)
+ long fpScaled = 0x3FF0000000000000L - fBits * 0x10000000000000L;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar (context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, res, fpScaledMask);
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitSse2UcvtfOp(ArmEmitterContext context, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ // sizeF == ((OpCodeSimdShImm)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand mask = scalar // 65536.000f (1 << 16)
+ ? X86GetScalar (context, 0x47800000)
+ : X86GetAllElements(context, 0x47800000);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16));
+ res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16));
+ res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16));
+ res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2);
+
+ res = context.AddIntrinsic(Intrinsic.X86Addps, res, res2);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 - fBits * 0x800000;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar (context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, res, fpScaledMask);
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand mask = scalar // 4294967296.0000000d (1L << 32)
+ ? X86GetScalar (context, 0x41F0000000000000L)
+ : X86GetAllElements(context, 0x41F0000000000000L);
+
+ Operand res = context.AddIntrinsic (Intrinsic.X86Psrlq, n, Const(32));
+ res = EmitSse2CvtInt64ToDoubleOp(context, res, scalar);
+ res = context.AddIntrinsic (Intrinsic.X86Mulpd, res, mask);
+
+ Operand res2 = context.AddIntrinsic (Intrinsic.X86Psllq, n, Const(32));
+ res2 = context.AddIntrinsic (Intrinsic.X86Psrlq, res2, Const(32));
+ res2 = EmitSse2CvtInt64ToDoubleOp(context, res2, scalar);
+
+ res = context.AddIntrinsic(Intrinsic.X86Addpd, res, res2);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int64BitsToDouble(fpScaled) == 1d / Math.Pow(2d, fBits)
+ long fpScaled = 0x3FF0000000000000L - fBits * 0x10000000000000L;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar (context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, res, fpScaledMask);
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitSse41FcvtsOpF(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ // sizeF == ((OpCodeSimdShImm)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 + fBits * 0x800000;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar (context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulps, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
+ }
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
+
+ Operand fpMaxValMask = scalar // 2.14748365E9f (2147483648)
+ ? X86GetScalar (context, 0x4F000000)
+ : X86GetAllElements(context, 0x4F000000);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes);
+
+ if (scalar)
+ {
+ dRes = context.VectorZeroUpper96(dRes);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ dRes = context.VectorZeroUpper64(dRes);
+ }
+
+ context.Copy(GetVec(op.Rd), dRes);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
+ long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar (context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulpd, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
+ }
+
+ Operand nLong = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
+
+ Operand fpMaxValMask = scalar // 9.2233720368547760E18d (9223372036854775808)
+ ? X86GetScalar (context, 0x43E0000000000000L)
+ : X86GetAllElements(context, 0x43E0000000000000L);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes);
+
+ if (scalar)
+ {
+ dRes = context.VectorZeroUpper64(dRes);
+ }
+
+ context.Copy(GetVec(op.Rd), dRes);
+ }
+ }
+
+ private static void EmitSse41FcvtuOpF(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ // sizeF == ((OpCodeSimdShImm)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 + fBits * 0x800000;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar (context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulps, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
+ }
+
+ Operand zero = context.VectorZero();
+
+ Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ Operand fpMaxValMask = scalar // 2.14748365E9f (2147483648)
+ ? X86GetScalar (context, 0x4F000000)
+ : X86GetAllElements(context, 0x4F000000);
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ Operand nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes);
+ dRes = context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt);
+
+ if (scalar)
+ {
+ dRes = context.VectorZeroUpper96(dRes);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ dRes = context.VectorZeroUpper64(dRes);
+ }
+
+ context.Copy(GetVec(op.Rd), dRes);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
+ long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar (context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulpd, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
+ }
+
+ Operand zero = context.VectorZero();
+
+ Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ Operand fpMaxValMask = scalar // 9.2233720368547760E18d (9223372036854775808)
+ ? X86GetScalar (context, 0x43E0000000000000L)
+ : X86GetAllElements(context, 0x43E0000000000000L);
+
+ Operand nLong = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ Operand nLong2 = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes);
+ dRes = context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong);
+
+ if (scalar)
+ {
+ dRes = context.VectorZeroUpper64(dRes);
+ }
+
+ context.Copy(GetVec(op.Rd), dRes);
+ }
+ }
+
+ private static void EmitSse41Fcvts_Gp(ArmEmitterContext context, FPRoundingMode roundMode, bool isFixed)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if (op.Size == 0)
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (isFixed)
+ {
+ // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, op.FBits)
+ int fpScaled = 0x3F800000 + op.FBits * 0x800000;
+
+ Operand fpScaledMask = X86GetScalar(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulss, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
+
+ Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt (Intrinsic.X86Cvtss2si, nRes)
+ : context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes);
+
+ int fpMaxVal = op.RegisterSize == RegisterSize.Int32
+ ? 0x4F000000 // 2.14748365E9f (2147483648)
+ : 0x5F000000; // 9.223372E18f (9223372036854775808)
+
+ Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
+
+ if (op.RegisterSize == RegisterSize.Int64)
+ {
+ nInt = context.SignExtend32(OperandType.I64, nInt);
+ }
+
+ Operand dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt);
+
+ SetIntOrZR(context, op.Rd, dRes);
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (isFixed)
+ {
+ // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, op.FBits)
+ long fpScaled = 0x3FF0000000000000L + op.FBits * 0x10000000000000L;
+
+ Operand fpScaledMask = X86GetScalar(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulsd, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
+
+ Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt (Intrinsic.X86Cvtsd2si, nRes)
+ : context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes);
+
+ long fpMaxVal = op.RegisterSize == RegisterSize.Int32
+ ? 0x41E0000000000000L // 2147483648.0000000d (2147483648)
+ : 0x43E0000000000000L; // 9.2233720368547760E18d (9223372036854775808)
+
+ Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ nLong = context.ConvertI64ToI32(nLong);
+ }
+
+ Operand dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong);
+
+ SetIntOrZR(context, op.Rd, dRes);
+ }
+ }
+
+ private static void EmitSse41Fcvtu_Gp(ArmEmitterContext context, FPRoundingMode roundMode, bool isFixed)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if (op.Size == 0)
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (isFixed)
+ {
+ // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, op.FBits)
+ int fpScaled = 0x3F800000 + op.FBits * 0x800000;
+
+ Operand fpScaledMask = X86GetScalar(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulss, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
+
+ Operand zero = context.VectorZero();
+
+ Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ int fpMaxVal = op.RegisterSize == RegisterSize.Int32
+ ? 0x4F000000 // 2.14748365E9f (2147483648)
+ : 0x5F000000; // 9.223372E18f (9223372036854775808)
+
+ Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
+
+ Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt (Intrinsic.X86Cvtss2si, nRes)
+ : context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ Operand nIntOrLong2 = op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt (Intrinsic.X86Cvtss2si, nRes)
+ : context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
+
+ if (op.RegisterSize == RegisterSize.Int64)
+ {
+ nInt = context.SignExtend32(OperandType.I64, nInt);
+ }
+
+ Operand dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt);
+ dRes = context.Add(dRes, nIntOrLong);
+
+ SetIntOrZR(context, op.Rd, dRes);
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (isFixed)
+ {
+ // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, op.FBits)
+ long fpScaled = 0x3FF0000000000000L + op.FBits * 0x10000000000000L;
+
+ Operand fpScaledMask = X86GetScalar(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulsd, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
+
+ Operand zero = context.VectorZero();
+
+ Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ long fpMaxVal = op.RegisterSize == RegisterSize.Int32
+ ? 0x41E0000000000000L // 2147483648.0000000d (2147483648)
+ : 0x43E0000000000000L; // 9.2233720368547760E18d (9223372036854775808)
+
+ Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
+
+ Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt (Intrinsic.X86Cvtsd2si, nRes)
+ : context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ Operand nIntOrLong2 = op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt (Intrinsic.X86Cvtsd2si, nRes)
+ : context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ nLong = context.ConvertI64ToI32(nLong);
+ }
+
+ Operand dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong);
+ dRes = context.Add(dRes, nIntOrLong);
+
+ SetIntOrZR(context, op.Rd, dRes);
+ }
+ }
+
+ private static Operand EmitVectorLongExtract(ArmEmitterContext context, int reg, int index, int size)
+ {
+ OperandType type = size == 3 ? OperandType.I64 : OperandType.I32;
+
+ return context.VectorExtract(type, GetVec(reg), index);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
new file mode 100644
index 00000000..33ae83df
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
@@ -0,0 +1,800 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ private static int FlipVdBits(int vd, bool lowBit)
+ {
+ if (lowBit)
+ {
+ // Move the low bit to the top.
+ return ((vd & 0x1) << 4) | (vd >> 1);
+ }
+ else
+ {
+ // Move the high bit to the bottom.
+ return ((vd & 0xf) << 1) | (vd >> 4);
+ }
+ }
+
+ private static Operand EmitSaturateFloatToInt(ArmEmitterContext context, Operand op1, bool unsigned)
+ {
+ MethodInfo info;
+
+ if (op1.Type == OperandType.FP64)
+ {
+ info = unsigned
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU32))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS32));
+ }
+ else
+ {
+ info = unsigned
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32));
+ }
+
+ return context.Call(info, op1);
+ }
+
+ public static void Vcvt_V(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ bool unsigned = (op.Opc & 1) != 0;
+ bool toInteger = (op.Opc & 2) != 0;
+ OperandType floatSize = (op.Size == 2) ? OperandType.FP32 : OperandType.FP64;
+
+ if (toInteger)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuV : Intrinsic.Arm64FcvtzsV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41ConvertVector32(context, FPRoundingMode.TowardsZero, !unsigned);
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (op1) =>
+ {
+ return EmitSaturateFloatToInt(context, op1, unsigned);
+ });
+ }
+ }
+ else
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpSimd32(context, (n) =>
+ {
+ if (unsigned)
+ {
+ Operand mask = X86GetAllElements(context, 0x47800000);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16));
+ res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16));
+ res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16));
+ res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2);
+
+ return context.AddIntrinsic(Intrinsic.X86Addps, res, res2);
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);
+ }
+ });
+ }
+ else
+ {
+ if (unsigned)
+ {
+ EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false));
+ }
+ else
+ {
+ EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true));
+ }
+ }
+ }
+ }
+
+ public static void Vcvt_FD(ArmEmitterContext context)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ int vm = op.Vm;
+ int vd;
+ if (op.Size == 3)
+ {
+ vd = FlipVdBits(op.Vd, false);
+ // Double to single.
+ Operand fp = ExtractScalar(context, OperandType.FP64, vm);
+
+ Operand res = context.ConvertToFP(OperandType.FP32, fp);
+
+ InsertScalar(context, vd, res);
+ }
+ else
+ {
+ vd = FlipVdBits(op.Vd, true);
+ // Single to double.
+ Operand fp = ExtractScalar(context, OperandType.FP32, vm);
+
+ Operand res = context.ConvertToFP(OperandType.FP64, fp);
+
+ InsertScalar(context, vd, res);
+ }
+ }
+
+ // VCVT (floating-point to integer, floating-point) | VCVT (integer to floating-point, floating-point).
+ public static void Vcvt_FI(ArmEmitterContext context)
+ {
+ OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
+
+ bool toInteger = (op.Opc2 & 0b100) != 0;
+
+ OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
+
+ if (toInteger)
+ {
+ bool unsigned = (op.Opc2 & 1) == 0;
+ bool roundWithFpscr = op.Opc != 1;
+
+ if (!roundWithFpscr && Optimizations.UseAdvSimd)
+ {
+ bool doubleSize = floatSize == OperandType.FP64;
+
+ if (doubleSize)
+ {
+ Operand m = GetVecA32(op.Vm >> 1);
+
+ Operand toConvert = InstEmitSimdHelper32Arm64.EmitExtractScalar(context, m, op.Vm, doubleSize);
+
+ Intrinsic inst = (unsigned ? Intrinsic.Arm64FcvtzuGp : Intrinsic.Arm64FcvtzsGp) | Intrinsic.Arm64VDouble;
+
+ Operand asInteger = context.AddIntrinsicInt(inst, toConvert);
+
+ InsertScalar(context, op.Vd, asInteger);
+ }
+ else
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuS : Intrinsic.Arm64FcvtzsS);
+ }
+ }
+ else if (!roundWithFpscr && Optimizations.UseSse41)
+ {
+ EmitSse41ConvertInt32(context, FPRoundingMode.TowardsZero, !unsigned);
+ }
+ else
+ {
+ Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
+
+ // TODO: Fast Path.
+ if (roundWithFpscr)
+ {
+ toConvert = EmitRoundByRMode(context, toConvert);
+ }
+
+ // Round towards zero.
+ Operand asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);
+
+ InsertScalar(context, op.Vd, asInteger);
+ }
+ }
+ else
+ {
+ bool unsigned = op.Opc == 0;
+
+ Operand toConvert = ExtractScalar(context, OperandType.I32, op.Vm);
+
+ Operand asFloat = EmitFPConvert(context, toConvert, floatSize, !unsigned);
+
+ InsertScalar(context, op.Vd, asFloat);
+ }
+ }
+
+ private static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
+ {
+ IOpCode32Simd op = (IOpCode32Simd)context.CurrOp;
+
+ string name = nameof(Math.Round);
+
+ MethodInfo info = (op.Size & 1) == 0
+ ? typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(MidpointRounding) })
+ : typeof(Math). GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) });
+
+ return context.Call(info, n, Const((int)roundMode));
+ }
+
+ private static FPRoundingMode RMToRoundMode(int rm)
+ {
+ FPRoundingMode roundMode;
+ switch (rm)
+ {
+ case 0b00:
+ roundMode = FPRoundingMode.ToNearestAway;
+ break;
+ case 0b01:
+ roundMode = FPRoundingMode.ToNearest;
+ break;
+ case 0b10:
+ roundMode = FPRoundingMode.TowardsPlusInfinity;
+ break;
+ case 0b11:
+ roundMode = FPRoundingMode.TowardsMinusInfinity;
+ break;
+ default:
+ throw new ArgumentOutOfRangeException(nameof(rm));
+ }
+ return roundMode;
+ }
+
+ // VCVTA/M/N/P (floating-point).
+ public static void Vcvt_RM(ArmEmitterContext context)
+ {
+ OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; // toInteger == true (opCode<18> == 1 => Opc2<2> == 1).
+
+ OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
+
+ bool unsigned = op.Opc == 0;
+ int rm = op.Opc2 & 3;
+
+ Intrinsic inst;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ if (unsigned)
+ {
+ inst = rm switch {
+ 0b00 => Intrinsic.Arm64FcvtauS,
+ 0b01 => Intrinsic.Arm64FcvtnuS,
+ 0b10 => Intrinsic.Arm64FcvtpuS,
+ 0b11 => Intrinsic.Arm64FcvtmuS,
+ _ => throw new ArgumentOutOfRangeException(nameof(rm))
+ };
+ }
+ else
+ {
+ inst = rm switch {
+ 0b00 => Intrinsic.Arm64FcvtasS,
+ 0b01 => Intrinsic.Arm64FcvtnsS,
+ 0b10 => Intrinsic.Arm64FcvtpsS,
+ 0b11 => Intrinsic.Arm64FcvtmsS,
+ _ => throw new ArgumentOutOfRangeException(nameof(rm))
+ };
+ }
+
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned);
+ }
+ else
+ {
+ Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
+
+ switch (rm)
+ {
+ case 0b00: // Away
+ toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
+ break;
+ case 0b01: // Nearest
+ toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
+ break;
+ case 0b10: // Towards positive infinity
+ toConvert = EmitUnaryMathCall(context, nameof(Math.Ceiling), toConvert);
+ break;
+ case 0b11: // Towards negative infinity
+ toConvert = EmitUnaryMathCall(context, nameof(Math.Floor), toConvert);
+ break;
+ }
+
+ Operand asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);
+
+ InsertScalar(context, op.Vd, asInteger);
+ }
+ }
+
+ public static void Vcvt_TB(ArmEmitterContext context)
+ {
+ OpCode32SimdCvtTB op = (OpCode32SimdCvtTB)context.CurrOp;
+
+ if (Optimizations.UseF16c)
+ {
+ Debug.Assert(!Optimizations.ForceLegacySse);
+
+ if (op.Op)
+ {
+ Operand res = ExtractScalar(context, op.Size == 1 ? OperandType.FP64 : OperandType.FP32, op.Vm);
+ if (op.Size == 1)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), res);
+ }
+ res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, res, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
+ res = context.VectorExtract16(res, 0);
+ InsertScalar16(context, op.Vd, op.T, res);
+ }
+ else
+ {
+ Operand res = context.VectorCreateScalar(ExtractScalar16(context, op.Vm, op.T));
+ res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, res);
+ if (op.Size == 1)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), res);
+ }
+ res = context.VectorExtract(op.Size == 1 ? OperandType.I64 : OperandType.I32, res, 0);
+ InsertScalar(context, op.Vd, res);
+ }
+ }
+ else
+ {
+ if (op.Op)
+ {
+ // Convert to half.
+
+ Operand src = ExtractScalar(context, op.Size == 1 ? OperandType.FP64 : OperandType.FP32, op.Vm);
+
+ MethodInfo method = op.Size == 1
+ ? typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert))
+ : typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert));
+
+ context.ExitArmFpMode();
+ context.StoreToContext();
+ Operand res = context.Call(method, src);
+ context.LoadFromContext();
+ context.EnterArmFpMode();
+
+ InsertScalar16(context, op.Vd, op.T, res);
+ }
+ else
+ {
+ // Convert from half.
+
+ Operand src = ExtractScalar16(context, op.Vm, op.T);
+
+ MethodInfo method = op.Size == 1
+ ? typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert))
+ : typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert));
+
+ context.ExitArmFpMode();
+ context.StoreToContext();
+ Operand res = context.Call(method, src);
+ context.LoadFromContext();
+ context.EnterArmFpMode();
+
+ InsertScalar(context, op.Vd, res);
+ }
+ }
+ }
+
+ // VRINTA/M/N/P (floating-point).
+ public static void Vrint_RM(ArmEmitterContext context)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
+
+ int rm = op.Opc2 & 3;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ Intrinsic inst = rm switch {
+ 0b00 => Intrinsic.Arm64FrintaS,
+ 0b01 => Intrinsic.Arm64FrintnS,
+ 0b10 => Intrinsic.Arm64FrintpS,
+ 0b11 => Intrinsic.Arm64FrintmS,
+ _ => throw new ArgumentOutOfRangeException(nameof(rm))
+ };
+
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitScalarUnaryOpSimd32(context, (m) =>
+ {
+ FPRoundingMode roundMode = RMToRoundMode(rm);
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
+ return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ return EmitSse41RoundToNearestWithTiesToAwayOpF(context, m, scalar: true);
+ }
+ });
+ }
+ else
+ {
+ Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
+
+ switch (rm)
+ {
+ case 0b00: // Away
+ toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
+ break;
+ case 0b01: // Nearest
+ toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
+ break;
+ case 0b10: // Towards positive infinity
+ toConvert = EmitUnaryMathCall(context, nameof(Math.Ceiling), toConvert);
+ break;
+ case 0b11: // Towards negative infinity
+ toConvert = EmitUnaryMathCall(context, nameof(Math.Floor), toConvert);
+ break;
+ }
+
+ InsertScalar(context, op.Vd, toConvert);
+ }
+ }
+
+ // VRINTA (vector).
+ public static void Vrinta_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintaS);
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, m));
+ }
+ }
+
+ // VRINTM (vector).
+ public static void Vrintm_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintmS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsMinusInfinity)));
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Floor), m));
+ }
+ }
+
+ // VRINTN (vector).
+ public static void Vrintn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintnS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.ToEven, m));
+ }
+ }
+
+ // VRINTP (vector).
+ public static void Vrintp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintpS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsPlusInfinity)));
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Ceiling), m));
+ }
+ }
+
+ // VRINTZ (floating-point).
+ public static void Vrint_Z(ArmEmitterContext context)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintzS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitScalarUnaryOpSimd32(context, (m) =>
+ {
+ Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
+ return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(FPRoundingMode.TowardsZero)));
+ });
+ }
+ else
+ {
+ EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Truncate), op1));
+ }
+ }
+
+ // VRINTX (floating-point).
+ public static void Vrintx_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintxS);
+ }
+ else
+ {
+ EmitScalarUnaryOpF32(context, (op1) =>
+ {
+ return EmitRoundByRMode(context, op1);
+ });
+ }
+ }
+
+ private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, OperandType type, bool signed)
+ {
+ Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
+
+ if (signed)
+ {
+ return context.ConvertToFP(type, value);
+ }
+ else
+ {
+ return context.ConvertToFPUI(type, value);
+ }
+ }
+
+ private static void EmitSse41ConvertInt32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
+ {
+ // A port of the similar round function in InstEmitSimdCvt.
+ OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand n = GetVecA32(op.Vm >> shift);
+ n = EmitSwapScalar(context, n, op.Vm, doubleSize);
+
+ if (!doubleSize)
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
+
+ Operand zero = context.VectorZero();
+
+ Operand nCmp;
+ Operand nIntOrLong2 = default;
+
+ if (!signed)
+ {
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+ }
+
+ int fpMaxVal = 0x4F000000; // 2.14748365E9f (2147483648)
+
+ Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
+
+ Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes);
+
+ if (!signed)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes);
+ }
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
+
+ Operand dRes;
+ if (signed)
+ {
+ dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt);
+ }
+ else
+ {
+ dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt);
+ dRes = context.Add(dRes, nIntOrLong);
+ }
+
+ InsertScalar(context, op.Vd, dRes);
+ }
+ else
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
+
+ Operand zero = context.VectorZero();
+
+ Operand nCmp;
+ Operand nIntOrLong2 = default;
+
+ if (!signed)
+ {
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+ }
+
+ long fpMaxVal = 0x41E0000000000000L; // 2147483648.0000000d (2147483648)
+
+ Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
+
+ Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes);
+
+ if (!signed)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes);
+ }
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
+ nLong = context.ConvertI64ToI32(nLong);
+
+ Operand dRes;
+ if (signed)
+ {
+ dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong);
+ }
+ else
+ {
+ dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong);
+ dRes = context.Add(dRes, nIntOrLong);
+ }
+
+ InsertScalar(context, op.Vd, dRes);
+ }
+ }
+
+ private static void EmitSse41ConvertVector32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ EmitVectorUnaryOpSimd32(context, (n) =>
+ {
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
+
+ Operand zero = context.VectorZero();
+ Operand nCmp;
+ if (!signed)
+ {
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+ }
+
+ Operand fpMaxValMask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648)
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
+ Operand nInt2 = default;
+
+ if (!signed)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
+ }
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ if (signed)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes);
+ }
+ else
+ {
+ Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes);
+ return context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt);
+ }
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
+
+ Operand zero = context.VectorZero();
+ Operand nCmp;
+ if (!signed)
+ {
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+ }
+
+ Operand fpMaxValMask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
+
+ Operand nLong = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false);
+ Operand nLong2 = default;
+
+ if (!signed)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ nLong2 = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false);
+ }
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ if (signed)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes);
+ }
+ else
+ {
+ Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes);
+ return context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong);
+ }
+ }
+ });
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHash.cs b/src/ARMeilleure/Instructions/InstEmitSimdHash.cs
new file mode 100644
index 00000000..4fb048ee
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdHash.cs
@@ -0,0 +1,147 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+#region "Sha1"
+ public static void Sha1c_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashChoose)), d, ne, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha1h_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+ Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.FixedRotate)), ne);
+
+ context.Copy(GetVec(op.Rd), context.VectorCreateScalar(res));
+ }
+
+ public static void Sha1m_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashMajority)), d, ne, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha1p_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashParity)), d, ne, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha1su0_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha1SchedulePart1)), d, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha1su1_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha1SchedulePart2)), d, n);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+#endregion
+
+#region "Sha256"
+ public static void Sha256h_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256h(context, d, n, m, part2: false);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha256h2_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256h(context, n, d, m, part2: true);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha256su0_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256su0(context, d, n);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha256su1_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256su1(context, d, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+#endregion
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHash32.cs b/src/ARMeilleure/Instructions/InstEmitSimdHash32.cs
new file mode 100644
index 00000000..51334608
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdHash32.cs
@@ -0,0 +1,64 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+#region "Sha256"
+ public static void Sha256h_V(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand d = GetVecA32(op.Qd);
+ Operand n = GetVecA32(op.Qn);
+ Operand m = GetVecA32(op.Qm);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256h(context, d, n, m, part2: false);
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Sha256h2_V(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand d = GetVecA32(op.Qd);
+ Operand n = GetVecA32(op.Qn);
+ Operand m = GetVecA32(op.Qm);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256h(context, n, d, m, part2: true);
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Sha256su0_V(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand d = GetVecA32(op.Qd);
+ Operand m = GetVecA32(op.Qm);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256su0(context, d, m);
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Sha256su1_V(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand d = GetVecA32(op.Qd);
+ Operand n = GetVecA32(op.Qn);
+ Operand m = GetVecA32(op.Qm);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256su1(context, d, n, m);
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+#endregion
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHashHelper.cs b/src/ARMeilleure/Instructions/InstEmitSimdHashHelper.cs
new file mode 100644
index 00000000..23e4948d
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdHashHelper.cs
@@ -0,0 +1,56 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitSimdHashHelper
+ {
+ public static Operand EmitSha256h(ArmEmitterContext context, Operand x, Operand y, Operand w, bool part2)
+ {
+ if (Optimizations.UseSha)
+ {
+ Operand src1 = context.AddIntrinsic(Intrinsic.X86Shufps, y, x, Const(0xbb));
+ Operand src2 = context.AddIntrinsic(Intrinsic.X86Shufps, y, x, Const(0x11));
+ Operand w2 = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, w, w);
+
+ Operand round2 = context.AddIntrinsic(Intrinsic.X86Sha256Rnds2, src1, src2, w);
+ Operand round4 = context.AddIntrinsic(Intrinsic.X86Sha256Rnds2, src2, round2, w2);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, round4, round2, Const(part2 ? 0x11 : 0xbb));
+
+ return res;
+ }
+
+ String method = part2 ? nameof(SoftFallback.HashUpper) : nameof(SoftFallback.HashLower);
+ return context.Call(typeof(SoftFallback).GetMethod(method), x, y, w);
+ }
+
+ public static Operand EmitSha256su0(ArmEmitterContext context, Operand x, Operand y)
+ {
+ if (Optimizations.UseSha)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Sha256Msg1, x, y);
+ }
+
+ return context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart1)), x, y);
+ }
+
+ public static Operand EmitSha256su1(ArmEmitterContext context, Operand x, Operand y, Operand z)
+ {
+ if (Optimizations.UseSha && Optimizations.UseSsse3)
+ {
+ Operand extr = context.AddIntrinsic(Intrinsic.X86Palignr, z, y, Const(4));
+ Operand tmp = context.AddIntrinsic(Intrinsic.X86Paddd, extr, x);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Sha256Msg2, tmp, z);
+
+ return res;
+ }
+
+ return context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart2)), x, y, z);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/src/ARMeilleure/Instructions/InstEmitSimdHelper.cs
new file mode 100644
index 00000000..c44c9b4d
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdHelper.cs
@@ -0,0 +1,2088 @@
+using ARMeilleure.CodeGen.X86;
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func1I = Func<Operand, Operand>;
+ using Func2I = Func<Operand, Operand, Operand>;
+ using Func3I = Func<Operand, Operand, Operand, Operand>;
+
+ static class InstEmitSimdHelper
+ {
+#region "Masks"
+ public static readonly long[] EvenMasks = new long[]
+ {
+ 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, // B
+ 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, // H
+ 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 // S
+ };
+
+ public static readonly long[] OddMasks = new long[]
+ {
+ 15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0, // B
+ 15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0, // H
+ 15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0 // S
+ };
+
+ public static readonly long ZeroMask = 128L << 56 | 128L << 48 | 128L << 40 | 128L << 32 | 128L << 24 | 128L << 16 | 128L << 8 | 128L << 0;
+
+ public static ulong X86GetGf2p8LogicalShiftLeft(int shift)
+ {
+ ulong identity = (0b00000001UL << 56) | (0b00000010UL << 48) | (0b00000100UL << 40) | (0b00001000UL << 32) |
+ (0b00010000UL << 24) | (0b00100000UL << 16) | (0b01000000UL << 8) | (0b10000000UL << 0);
+
+ return shift >= 0 ? identity >> (shift * 8) : identity << (-shift * 8);
+ }
+#endregion
+
+#region "X86 SSE Intrinsics"
+ public static readonly Intrinsic[] X86PaddInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Paddb,
+ Intrinsic.X86Paddw,
+ Intrinsic.X86Paddd,
+ Intrinsic.X86Paddq
+ };
+
+ public static readonly Intrinsic[] X86PcmpeqInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pcmpeqb,
+ Intrinsic.X86Pcmpeqw,
+ Intrinsic.X86Pcmpeqd,
+ Intrinsic.X86Pcmpeqq
+ };
+
+ public static readonly Intrinsic[] X86PcmpgtInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pcmpgtb,
+ Intrinsic.X86Pcmpgtw,
+ Intrinsic.X86Pcmpgtd,
+ Intrinsic.X86Pcmpgtq
+ };
+
+ public static readonly Intrinsic[] X86PmaxsInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pmaxsb,
+ Intrinsic.X86Pmaxsw,
+ Intrinsic.X86Pmaxsd
+ };
+
+ public static readonly Intrinsic[] X86PmaxuInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pmaxub,
+ Intrinsic.X86Pmaxuw,
+ Intrinsic.X86Pmaxud
+ };
+
+ public static readonly Intrinsic[] X86PminsInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pminsb,
+ Intrinsic.X86Pminsw,
+ Intrinsic.X86Pminsd
+ };
+
+ public static readonly Intrinsic[] X86PminuInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pminub,
+ Intrinsic.X86Pminuw,
+ Intrinsic.X86Pminud
+ };
+
+ public static readonly Intrinsic[] X86PmovsxInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pmovsxbw,
+ Intrinsic.X86Pmovsxwd,
+ Intrinsic.X86Pmovsxdq
+ };
+
+ public static readonly Intrinsic[] X86PmovzxInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pmovzxbw,
+ Intrinsic.X86Pmovzxwd,
+ Intrinsic.X86Pmovzxdq
+ };
+
+ public static readonly Intrinsic[] X86PsllInstruction = new Intrinsic[]
+ {
+ 0,
+ Intrinsic.X86Psllw,
+ Intrinsic.X86Pslld,
+ Intrinsic.X86Psllq
+ };
+
+ public static readonly Intrinsic[] X86PsraInstruction = new Intrinsic[]
+ {
+ 0,
+ Intrinsic.X86Psraw,
+ Intrinsic.X86Psrad
+ };
+
+ public static readonly Intrinsic[] X86PsrlInstruction = new Intrinsic[]
+ {
+ 0,
+ Intrinsic.X86Psrlw,
+ Intrinsic.X86Psrld,
+ Intrinsic.X86Psrlq
+ };
+
+ public static readonly Intrinsic[] X86PsubInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Psubb,
+ Intrinsic.X86Psubw,
+ Intrinsic.X86Psubd,
+ Intrinsic.X86Psubq
+ };
+
+ public static readonly Intrinsic[] X86PunpckhInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Punpckhbw,
+ Intrinsic.X86Punpckhwd,
+ Intrinsic.X86Punpckhdq,
+ Intrinsic.X86Punpckhqdq
+ };
+
+ public static readonly Intrinsic[] X86PunpcklInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Punpcklbw,
+ Intrinsic.X86Punpcklwd,
+ Intrinsic.X86Punpckldq,
+ Intrinsic.X86Punpcklqdq
+ };
+#endregion
+
+ public static void EnterArmFpMode(EmitterContext context, Func<FPState, Operand> getFpFlag)
+ {
+ if (Optimizations.UseSse2)
+ {
+ Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr);
+
+ Operand fzTrue = getFpFlag(FPState.FzFlag);
+ Operand r0True = getFpFlag(FPState.RMode0Flag);
+ Operand r1True = getFpFlag(FPState.RMode1Flag);
+
+ mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo)));
+
+ mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(fzTrue, Const((int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Um | Mxcsr.Dm)), Const(0)));
+
+ // X86 round modes in order: nearest, negative, positive, zero
+ // ARM round modes in order: nearest, positive, negative, zero
+ // Read the bits backwards to correct this.
+
+ mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r0True, Const((int)Mxcsr.Rhi), Const(0)));
+ mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r1True, Const((int)Mxcsr.Rlo), Const(0)));
+
+ context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr);
+ }
+ else if (Optimizations.UseAdvSimd)
+ {
+ Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr);
+
+ Operand fzTrue = getFpFlag(FPState.FzFlag);
+ Operand r0True = getFpFlag(FPState.RMode0Flag);
+ Operand r1True = getFpFlag(FPState.RMode1Flag);
+
+ fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1)));
+
+ fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(fzTrue, Const((int)FPCR.Fz), Const(0)));
+ fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r0True, Const((int)FPCR.RMode0), Const(0)));
+ fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r1True, Const((int)FPCR.RMode1), Const(0)));
+
+ context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr);
+
+ // TODO: Restore FPSR
+ }
+ }
+
+ public static void ExitArmFpMode(EmitterContext context, Action<FPState, Operand> setFpFlag)
+ {
+ if (Optimizations.UseSse2)
+ {
+ Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr);
+
+ // Unset round mode (to nearest) and ftz.
+ mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo)));
+
+ context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr);
+
+ // Status flags would be stored here if they were used.
+ }
+ else if (Optimizations.UseAdvSimd)
+ {
+ Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr);
+
+ // Unset round mode (to nearest) and fz.
+ fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1)));
+
+ context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr);
+
+ // TODO: Store FPSR
+ }
+ }
+
+ public static int GetImmShl(OpCodeSimdShImm op)
+ {
+ return op.Imm - (8 << op.Size);
+ }
+
+ public static int GetImmShr(OpCodeSimdShImm op)
+ {
+ return (8 << (op.Size + 1)) - op.Imm;
+ }
+
+ public static Operand X86GetScalar(ArmEmitterContext context, float value)
+ {
+ return X86GetScalar(context, BitConverter.SingleToInt32Bits(value));
+ }
+
+ public static Operand X86GetScalar(ArmEmitterContext context, double value)
+ {
+ return X86GetScalar(context, BitConverter.DoubleToInt64Bits(value));
+ }
+
+ public static Operand X86GetScalar(ArmEmitterContext context, int value)
+ {
+ return context.VectorCreateScalar(Const(value));
+ }
+
+ public static Operand X86GetScalar(ArmEmitterContext context, long value)
+ {
+ return context.VectorCreateScalar(Const(value));
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, float value)
+ {
+ return X86GetAllElements(context, BitConverter.SingleToInt32Bits(value));
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, double value)
+ {
+ return X86GetAllElements(context, BitConverter.DoubleToInt64Bits(value));
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, short value)
+ {
+ ulong value1 = (ushort)value;
+ ulong value2 = value1 << 16 | value1;
+ ulong value4 = value2 << 32 | value2;
+
+ return X86GetAllElements(context, (long)value4);
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, int value)
+ {
+ Operand vector = context.VectorCreateScalar(Const(value));
+
+ vector = context.AddIntrinsic(Intrinsic.X86Shufps, vector, vector, Const(0));
+
+ return vector;
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, long value)
+ {
+ Operand vector = context.VectorCreateScalar(Const(value));
+
+ vector = context.AddIntrinsic(Intrinsic.X86Movlhps, vector, vector);
+
+ return vector;
+ }
+
+ public static Operand X86GetElements(ArmEmitterContext context, long e1, long e0)
+ {
+ return X86GetElements(context, (ulong)e1, (ulong)e0);
+ }
+
+ public static Operand X86GetElements(ArmEmitterContext context, ulong e1, ulong e0)
+ {
+ Operand vector0 = context.VectorCreateScalar(Const(e0));
+ Operand vector1 = context.VectorCreateScalar(Const(e1));
+
+ return context.AddIntrinsic(Intrinsic.X86Punpcklqdq, vector0, vector1);
+ }
+
+ public static int X86GetRoundControl(FPRoundingMode roundMode)
+ {
+ switch (roundMode)
+ {
+ case FPRoundingMode.ToNearest: return 8 | 0; // even
+ case FPRoundingMode.TowardsPlusInfinity: return 8 | 2;
+ case FPRoundingMode.TowardsMinusInfinity: return 8 | 1;
+ case FPRoundingMode.TowardsZero: return 8 | 3;
+ }
+
+ throw new ArgumentException($"Invalid rounding mode \"{roundMode}\".");
+ }
+
+ public static Operand EmitSse41RoundToNearestWithTiesToAwayOpF(ArmEmitterContext context, Operand n, bool scalar)
+ {
+ Debug.Assert(n.Type == OperandType.V128);
+
+ Operand nCopy = context.Copy(n);
+
+ Operand rC = Const(X86GetRoundControl(FPRoundingMode.TowardsZero));
+
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ if ((op.Size & 1) == 0)
+ {
+ Operand signMask = scalar ? X86GetScalar(context, int.MinValue) : X86GetAllElements(context, int.MinValue);
+ signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy);
+
+ // 0x3EFFFFFF == BitConverter.SingleToInt32Bits(0.5f) - 1
+ Operand valueMask = scalar ? X86GetScalar(context, 0x3EFFFFFF) : X86GetAllElements(context, 0x3EFFFFFF);
+ valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask);
+
+ nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addss : Intrinsic.X86Addps, nCopy, valueMask);
+
+ nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundss : Intrinsic.X86Roundps, nCopy, rC);
+ }
+ else
+ {
+ Operand signMask = scalar ? X86GetScalar(context, long.MinValue) : X86GetAllElements(context, long.MinValue);
+ signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy);
+
+ // 0x3FDFFFFFFFFFFFFFL == BitConverter.DoubleToInt64Bits(0.5d) - 1L
+ Operand valueMask = scalar ? X86GetScalar(context, 0x3FDFFFFFFFFFFFFFL) : X86GetAllElements(context, 0x3FDFFFFFFFFFFFFFL);
+ valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask);
+
+ nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addsd : Intrinsic.X86Addpd, nCopy, valueMask);
+
+ nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundsd : Intrinsic.X86Roundpd, nCopy, rC);
+ }
+
+ return nCopy;
+ }
+
+ public static Operand EmitCountSetBits8(ArmEmitterContext context, Operand op) // "size" is 8 (SIMD&FP Inst.).
+ {
+ Debug.Assert(op.Type == OperandType.I32 || op.Type == OperandType.I64);
+
+ Operand op0 = context.Subtract(op, context.BitwiseAnd(context.ShiftRightUI(op, Const(1)), Const(op.Type, 0x55L)));
+
+ Operand c1 = Const(op.Type, 0x33L);
+ Operand op1 = context.Add(context.BitwiseAnd(context.ShiftRightUI(op0, Const(2)), c1), context.BitwiseAnd(op0, c1));
+
+ return context.BitwiseAnd(context.Add(op1, context.ShiftRightUI(op1, Const(4))), Const(op.Type, 0x0fL));
+ }
+
+ public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ Operand res = context.AddIntrinsic(inst, n);
+
+ if ((op.Size & 1) != 0)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+ else
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ Operand res = context.AddIntrinsic(inst, n, m);
+
+ if ((op.Size & 1) != 0)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+ else
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ Operand res = context.AddIntrinsic(inst, n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ Operand res = context.AddIntrinsic(inst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static Operand EmitUnaryMathCall(ArmEmitterContext context, string name, Operand n)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ MethodInfo info = (op.Size & 1) == 0
+ ? typeof(MathF).GetMethod(name, new Type[] { typeof(float) })
+ : typeof(Math). GetMethod(name, new Type[] { typeof(double) });
+
+ return context.Call(info, n);
+ }
+
+ public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ string name = nameof(Math.Round);
+
+ MethodInfo info = (op.Size & 1) == 0
+ ? typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(MidpointRounding) })
+ : typeof(Math). GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) });
+
+ return context.Call(info, n, Const((int)roundMode));
+ }
+
+ public static Operand EmitGetRoundingMode(ArmEmitterContext context)
+ {
+ Operand rMode = context.ShiftLeft(GetFpFlag(FPState.RMode1Flag), Const(1));
+ rMode = context.BitwiseOr(rMode, GetFpFlag(FPState.RMode0Flag));
+
+ return rMode;
+ }
+
+ public static Operand EmitRoundByRMode(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.FP32 || op.Type == OperandType.FP64);
+
+ Operand lbl1 = Label();
+ Operand lbl2 = Label();
+ Operand lbl3 = Label();
+ Operand lblEnd = Label();
+
+ Operand rN = Const((int)FPRoundingMode.ToNearest);
+ Operand rP = Const((int)FPRoundingMode.TowardsPlusInfinity);
+ Operand rM = Const((int)FPRoundingMode.TowardsMinusInfinity);
+
+ Operand res = context.AllocateLocal(op.Type);
+
+ Operand rMode = EmitGetRoundingMode(context);
+
+ context.BranchIf(lbl1, rMode, rN, Comparison.NotEqual);
+ context.Copy(res, EmitRoundMathCall(context, MidpointRounding.ToEven, op));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl1);
+ context.BranchIf(lbl2, rMode, rP, Comparison.NotEqual);
+ context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Ceiling), op));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl2);
+ context.BranchIf(lbl3, rMode, rM, Comparison.NotEqual);
+ context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Floor), op));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl3);
+ context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Truncate), op));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ public static Operand EmitSoftFloatCall(ArmEmitterContext context, string name, params Operand[] callArgs)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ MethodInfo info = (op.Size & 1) == 0
+ ? typeof(SoftFloat32).GetMethod(name)
+ : typeof(SoftFloat64).GetMethod(name);
+
+ context.ExitArmFpMode();
+ context.StoreToContext();
+ Operand res = context.Call(info, callArgs);
+ context.LoadFromContext();
+ context.EnterArmFpMode();
+
+ return res;
+ }
+
+ public static void EmitScalarBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
+ }
+
+ public static void EmitScalarTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand d = context.VectorExtract(type, GetVec(op.Rd), 0);
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(d, n, m), 0));
+ }
+
+ public static void EmitScalarUnaryOpSx(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarBinaryOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
+ Operand m = EmitVectorExtractSx(context, op.Rm, 0, op.Size);
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarUnaryOpZx(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarBinaryOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+ Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarTernaryOpZx(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = EmitVectorExtractZx(context, op.Rd, 0, op.Size);
+ Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+ Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
+
+ d = EmitVectorInsert(context, context.VectorZero(), emit(d, n, m), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarUnaryOpF(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n), 0));
+ }
+
+ public static void EmitScalarBinaryOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
+ }
+
+ public static void EmitScalarTernaryRaOpF(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand a = context.VectorExtract(type, GetVec(op.Ra), 0);
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(a, n, m), 0));
+ }
+
+ public static void EmitVectorUnaryOpF(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+
+ res = context.VectorInsert(res, emit(ne), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
+
+ res = context.VectorInsert(res, emit(ne, me), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpF(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
+
+ res = context.VectorInsert(res, emit(de, ne, me), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+ res = context.VectorInsert(res, emit(ne, me), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+ res = context.VectorInsert(res, emit(de, ne, me), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorUnaryOpSx(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpSx(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtractSx(context, op.Rd, index, op.Size);
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorUnaryOpZx(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpZx(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtractSx(context, op.Rm, op.Index, op.Size);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorImmUnaryOp(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ Operand imm = Const(op.Immediate);
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ res = EmitVectorInsert(context, res, emit(imm), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorImmBinaryOp(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ Operand imm = Const(op.Immediate);
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(de, imm), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenRmBinaryOp(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenRmBinaryOp(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signed);
+ Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenRnRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenRnRmBinaryOp(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenRnRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenRnRmBinaryOp(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenRnRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+ Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenRnRmTernaryOpSx(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorWidenRnRmTernaryOp(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenRnRmTernaryOpZx(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorWidenRnRmTernaryOp(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenRnRmTernaryOp(ArmEmitterContext context, Func3I emit, bool signed)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+ Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenBinaryOpByElem(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenBinaryOpByElem(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenBinaryOpByElem(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenTernaryOpByElemSx(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorWidenTernaryOpByElem(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorWidenTernaryOpByElem(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenTernaryOpByElem(ArmEmitterContext context, Func3I emit, bool signed)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorPairwiseOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorPairwiseOp(context, emit, signed: true);
+ }
+
+ public static void EmitVectorPairwiseOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorPairwiseOp(context, emit, signed: false);
+ }
+
+ private static void EmitVectorPairwiseOp(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand n0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed);
+ Operand n1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed);
+
+ Operand m0 = EmitVectorExtract(context, op.Rm, pairIndex, op.Size, signed);
+ Operand m1 = EmitVectorExtract(context, op.Rm, pairIndex + 1, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(n0, n1), index, op.Size);
+ res = EmitVectorInsert(context, res, emit(m0, m1), pairs + index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitSsse3VectorPairwiseOp(ArmEmitterContext context, Intrinsic[] inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ Operand zeroEvenMask = X86GetElements(context, ZeroMask, EvenMasks[op.Size]);
+ Operand zeroOddMask = X86GetElements(context, ZeroMask, OddMasks [op.Size]);
+
+ Operand mN = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); // m:n
+
+ Operand left = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroEvenMask); // 0:even from m:n
+ Operand right = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroOddMask); // 0:odd from m:n
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right));
+ }
+ else if (op.Size < 3)
+ {
+ Operand oddEvenMask = X86GetElements(context, OddMasks[op.Size], EvenMasks[op.Size]);
+
+ Operand oddEvenN = context.AddIntrinsic(Intrinsic.X86Pshufb, n, oddEvenMask); // odd:even from n
+ Operand oddEvenM = context.AddIntrinsic(Intrinsic.X86Pshufb, m, oddEvenMask); // odd:even from m
+
+ Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, oddEvenN, oddEvenM);
+ Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, oddEvenN, oddEvenM);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right));
+ }
+ else
+ {
+ Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m);
+ Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, n, m);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[3], left, right));
+ }
+ }
+
+ public static void EmitVectorAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: false);
+ }
+
+ public static void EmitVectorAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: false);
+ }
+
+ public static void EmitVectorLongAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: true);
+ }
+
+ public static void EmitVectorLongAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: true);
+ }
+
+ private static void EmitVectorAcrossVectorOp(
+ ArmEmitterContext context,
+ Func2I emit,
+ bool signed,
+ bool isLong)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ Operand res = EmitVectorExtract(context, op.Rn, 0, op.Size, signed);
+
+ for (int index = 1; index < elems; index++)
+ {
+ Operand n = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+
+ res = emit(res, n);
+ }
+
+ int size = isLong ? op.Size + 1 : op.Size;
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), res, 0, size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitVectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128);
+
+ Operand res = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
+
+ for (int index = 1; index < 4; index++)
+ {
+ Operand n = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), index);
+
+ res = emit(res, n);
+ }
+
+ Operand d = context.VectorInsert(context.VectorZero(), res, 0);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitSse2VectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128);
+
+ const int sm0 = 0 << 6 | 0 << 4 | 0 << 2 | 0 << 0;
+ const int sm1 = 1 << 6 | 1 << 4 | 1 << 2 | 1 << 0;
+ const int sm2 = 2 << 6 | 2 << 4 | 2 << 2 | 2 << 0;
+ const int sm3 = 3 << 6 | 3 << 4 | 3 << 2 | 3 << 0;
+
+ Operand nCopy = context.Copy(GetVec(op.Rn));
+
+ Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm0));
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm1));
+ Operand part2 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm2));
+ Operand part3 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm3));
+
+ Operand res = emit(emit(part0, part1), emit(part2, part3));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+
+ public static void EmitScalarPairwiseOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1);
+
+ Operand res = context.VectorInsert(context.VectorZero(), emit(ne0, ne1), 0);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitSse2ScalarPairwiseOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand op0, op1;
+
+ if ((op.Size & 1) == 0)
+ {
+ const int sm0 = 2 << 6 | 2 << 4 | 2 << 2 | 0 << 0;
+ const int sm1 = 2 << 6 | 2 << 4 | 2 << 2 | 1 << 0;
+
+ Operand zeroN = context.VectorZeroUpper64(n);
+
+ op0 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(sm0));
+ op1 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(sm1));
+ }
+ else /* if ((op.Size & 1) == 1) */
+ {
+ Operand zero = context.VectorZero();
+
+ op0 = context.AddIntrinsic(Intrinsic.X86Movlhps, n, zero);
+ op1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, n);
+ }
+
+ context.Copy(GetVec(op.Rd), emit(op0, op1));
+ }
+
+ public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int pairs = op.GetPairsCount() >> sizeF + 2;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand n0 = context.VectorExtract(type, GetVec(op.Rn), pairIndex);
+ Operand n1 = context.VectorExtract(type, GetVec(op.Rn), pairIndex + 1);
+
+ Operand m0 = context.VectorExtract(type, GetVec(op.Rm), pairIndex);
+ Operand m1 = context.VectorExtract(type, GetVec(op.Rm), pairIndex + 1);
+
+ res = context.VectorInsert(res, emit(n0, n1), index);
+ res = context.VectorInsert(res, emit(m0, m1), pairs + index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitSse2VectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand nCopy = context.Copy(GetVec(op.Rn));
+ Operand mCopy = context.Copy(GetVec(op.Rm));
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, nCopy, mCopy);
+
+ Operand zero = context.VectorZero();
+
+ Operand part0 = context.AddIntrinsic(Intrinsic.X86Movlhps, unpck, zero);
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, unpck);
+
+ context.Copy(GetVec(op.Rd), emit(part0, part1));
+ }
+ else /* if (op.RegisterSize == RegisterSize.Simd128) */
+ {
+ const int sm0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0;
+ const int sm1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0;
+
+ Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(sm0));
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(sm1));
+
+ context.Copy(GetVec(op.Rd), emit(part0, part1));
+ }
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, nCopy, mCopy);
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nCopy, mCopy);
+
+ context.Copy(GetVec(op.Rd), emit(part0, part1));
+ }
+ }
+
+ public enum CmpCondition
+ {
+ // Legacy Sse.
+ Equal = 0, // Ordered, non-signaling.
+ LessThan = 1, // Ordered, signaling.
+ LessThanOrEqual = 2, // Ordered, signaling.
+ UnorderedQ = 3, // Non-signaling.
+ NotLessThan = 5, // Unordered, signaling.
+ NotLessThanOrEqual = 6, // Unordered, signaling.
+ OrderedQ = 7, // Non-signaling.
+
+ // Vex.
+ GreaterThanOrEqual = 13, // Ordered, signaling.
+ GreaterThan = 14, // Ordered, signaling.
+ OrderedS = 23 // Signaling.
+ }
+
+ [Flags]
+ public enum SaturatingFlags
+ {
+ None = 0,
+
+ ByElem = 1 << 0,
+ Scalar = 1 << 1,
+ Signed = 1 << 2,
+
+ Add = 1 << 3,
+ Sub = 1 << 4,
+
+ Accumulate = 1 << 5
+ }
+
+ public static void EmitScalarSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
+ {
+ EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.Scalar | SaturatingFlags.Signed);
+ }
+
+ public static void EmitVectorSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
+ {
+ EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.Signed);
+ }
+
+ public static void EmitSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit, SaturatingFlags flags)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ bool scalar = (flags & SaturatingFlags.Scalar) != 0;
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand de;
+
+ if (op.Size <= 2)
+ {
+ de = EmitSignedSrcSatQ(context, emit(ne), op.Size, signedDst: true);
+ }
+ else /* if (op.Size == 3) */
+ {
+ de = EmitUnarySignedSatQAbsOrNeg(context, emit(ne));
+ }
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitScalarSaturatingBinaryOpSx(ArmEmitterContext context, Func2I emit = null, SaturatingFlags flags = SaturatingFlags.None)
+ {
+ EmitSaturatingBinaryOp(context, emit, SaturatingFlags.Scalar | SaturatingFlags.Signed | flags);
+ }
+
+ public static void EmitScalarSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
+ {
+ EmitSaturatingBinaryOp(context, null, SaturatingFlags.Scalar | flags);
+ }
+
+ public static void EmitVectorSaturatingBinaryOpSx(ArmEmitterContext context, Func2I emit = null, SaturatingFlags flags = SaturatingFlags.None)
+ {
+ EmitSaturatingBinaryOp(context, emit, SaturatingFlags.Signed | flags);
+ }
+
+ public static void EmitVectorSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
+ {
+ EmitSaturatingBinaryOp(context, null, flags);
+ }
+
+ public static void EmitVectorSaturatingBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitSaturatingBinaryOp(context, emit, SaturatingFlags.ByElem | SaturatingFlags.Signed);
+ }
+
+ public static void EmitSaturatingBinaryOp(ArmEmitterContext context, Func2I emit, SaturatingFlags flags)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ bool byElem = (flags & SaturatingFlags.ByElem) != 0;
+ bool scalar = (flags & SaturatingFlags.Scalar) != 0;
+ bool signed = (flags & SaturatingFlags.Signed) != 0;
+
+ bool add = (flags & SaturatingFlags.Add) != 0;
+ bool sub = (flags & SaturatingFlags.Sub) != 0;
+
+ bool accumulate = (flags & SaturatingFlags.Accumulate) != 0;
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ if (add || sub)
+ {
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de;
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+ Operand me = EmitVectorExtract(context, ((OpCodeSimdReg)op).Rm, index, op.Size, signed);
+
+ if (op.Size <= 2)
+ {
+ Operand temp = add ? context.Add(ne, me) : context.Subtract(ne, me);
+
+ de = EmitSignedSrcSatQ(context, temp, op.Size, signedDst: signed);
+ }
+ else /* if (op.Size == 3) */
+ {
+ if (add)
+ {
+ de = signed ? EmitBinarySignedSatQAdd(context, ne, me) : EmitBinaryUnsignedSatQAdd(context, ne, me);
+ }
+ else /* if (sub) */
+ {
+ de = signed ? EmitBinarySignedSatQSub(context, ne, me) : EmitBinaryUnsignedSatQSub(context, ne, me);
+ }
+ }
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+ }
+ else if (accumulate)
+ {
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de;
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, !signed);
+ Operand me = EmitVectorExtract(context, op.Rd, index, op.Size, signed);
+
+ if (op.Size <= 2)
+ {
+ Operand temp = context.Add(ne, me);
+
+ de = EmitSignedSrcSatQ(context, temp, op.Size, signedDst: signed);
+ }
+ else /* if (op.Size == 3) */
+ {
+ de = signed ? EmitBinarySignedSatQAcc(context, ne, me) : EmitBinaryUnsignedSatQAcc(context, ne, me);
+ }
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+ }
+ else
+ {
+ Operand me = default;
+
+ if (byElem)
+ {
+ OpCodeSimdRegElem opRegElem = (OpCodeSimdRegElem)op;
+
+ me = EmitVectorExtract(context, opRegElem.Rm, opRegElem.Index, op.Size, signed);
+ }
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+
+ if (!byElem)
+ {
+ me = EmitVectorExtract(context, ((OpCodeSimdReg)op).Rm, index, op.Size, signed);
+ }
+
+ Operand de = EmitSignedSrcSatQ(context, emit(ne, me), op.Size, signedDst: signed);
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ [Flags]
+ public enum SaturatingNarrowFlags
+ {
+ Scalar = 1 << 0,
+ SignedSrc = 1 << 1,
+ SignedDst = 1 << 2,
+
+ ScalarSxSx = Scalar | SignedSrc | SignedDst,
+ ScalarSxZx = Scalar | SignedSrc,
+ ScalarZxZx = Scalar,
+
+ VectorSxSx = SignedSrc | SignedDst,
+ VectorSxZx = SignedSrc,
+ VectorZxZx = 0
+ }
+
+ public static void EmitSaturatingNarrowOp(ArmEmitterContext context, SaturatingNarrowFlags flags)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ bool scalar = (flags & SaturatingNarrowFlags.Scalar) != 0;
+ bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0;
+ bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0;
+
+ int elems = !scalar ? 8 >> op.Size : 1;
+
+ int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
+
+ Operand temp = signedSrc
+ ? EmitSignedSrcSatQ(context, ne, op.Size, signedDst)
+ : EmitUnsignedSrcSatQ(context, ne, op.Size, signedDst);
+
+ res = EmitVectorInsert(context, res, temp, part + index, op.Size);
+ }
+
+ context.Copy(d, res);
+ }
+
+ // long SignedSignSatQ(long op, int size);
+ public static Operand EmitSignedSignSatQ(ArmEmitterContext context, Operand op, int size)
+ {
+ int eSize = 8 << size;
+
+ Debug.Assert(op.Type == OperandType.I64);
+ Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
+
+ Operand lbl1 = Label();
+ Operand lblEnd = Label();
+
+ Operand zeroL = Const(0L);
+ Operand maxT = Const((1L << (eSize - 1)) - 1L);
+ Operand minT = Const(-(1L << (eSize - 1)));
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroL);
+
+ context.BranchIf(lbl1, op, zeroL, Comparison.LessOrEqual);
+ context.Copy(res, maxT);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl1);
+ context.BranchIf(lblEnd, op, zeroL, Comparison.GreaterOrEqual);
+ context.Copy(res, minT);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // private static ulong UnsignedSignSatQ(ulong op, int size);
+ public static Operand EmitUnsignedSignSatQ(ArmEmitterContext context, Operand op, int size)
+ {
+ int eSize = 8 << size;
+
+ Debug.Assert(op.Type == OperandType.I64);
+ Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
+
+ Operand lblEnd = Label();
+
+ Operand zeroUL = Const(0UL);
+ Operand maxT = Const(ulong.MaxValue >> (64 - eSize));
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroUL);
+
+ context.BranchIf(lblEnd, op, zeroUL, Comparison.LessOrEqualUI);
+ context.Copy(res, maxT);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // TSrc (16bit, 32bit, 64bit; signed) > TDst (8bit, 16bit, 32bit; signed, unsigned).
+ // long SignedSrcSignedDstSatQ(long op, int size); ulong SignedSrcUnsignedDstSatQ(long op, int size);
+ public static Operand EmitSignedSrcSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedDst)
+ {
+ int eSizeDst = 8 << sizeDst;
+
+ Debug.Assert(op.Type == OperandType.I64);
+ Debug.Assert(eSizeDst == 8 || eSizeDst == 16 || eSizeDst == 32);
+
+ Operand lbl1 = Label();
+ Operand lblEnd = Label();
+
+ Operand maxT = signedDst ? Const((1L << (eSizeDst - 1)) - 1L) : Const((1UL << eSizeDst) - 1UL);
+ Operand minT = signedDst ? Const(-(1L << (eSizeDst - 1))) : Const(0UL);
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
+
+ context.BranchIf(lbl1, op, maxT, Comparison.LessOrEqual);
+ context.Copy(res, maxT);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl1);
+ context.BranchIf(lblEnd, op, minT, Comparison.GreaterOrEqual);
+ context.Copy(res, minT);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // TSrc (16bit, 32bit, 64bit; unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned).
+ // long UnsignedSrcSignedDstSatQ(ulong op, int size); ulong UnsignedSrcUnsignedDstSatQ(ulong op, int size);
+ public static Operand EmitUnsignedSrcSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedDst)
+ {
+ int eSizeDst = 8 << sizeDst;
+
+ Debug.Assert(op.Type == OperandType.I64);
+ Debug.Assert(eSizeDst == 8 || eSizeDst == 16 || eSizeDst == 32);
+
+ Operand lblEnd = Label();
+
+ Operand maxT = signedDst ? Const((1L << (eSizeDst - 1)) - 1L) : Const((1UL << eSizeDst) - 1UL);
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
+
+ context.BranchIf(lblEnd, op, maxT, Comparison.LessOrEqualUI);
+ context.Copy(res, maxT);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // long UnarySignedSatQAbsOrNeg(long op);
+ private static Operand EmitUnarySignedSatQAbsOrNeg(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.I64);
+
+ Operand lblEnd = Label();
+
+ Operand minL = Const(long.MinValue);
+ Operand maxL = Const(long.MaxValue);
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
+
+ context.BranchIf(lblEnd, op, minL, Comparison.NotEqual);
+ context.Copy(res, maxL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // long BinarySignedSatQAdd(long op1, long op2);
+ public static Operand EmitBinarySignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand lblEnd = Label();
+
+ Operand minL = Const(long.MinValue);
+ Operand maxL = Const(long.MaxValue);
+ Operand zeroL = Const(0L);
+
+ Operand add = context.Add(op1, op2);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
+
+ Operand left = context.BitwiseNot(context.BitwiseExclusiveOr(op1, op2));
+ Operand right = context.BitwiseExclusiveOr(op1, add);
+ context.BranchIf(lblEnd, context.BitwiseAnd(left, right), zeroL, Comparison.GreaterOrEqual);
+
+ Operand isPositive = context.ICompareGreaterOrEqual(op1, zeroL);
+ context.Copy(res, context.ConditionalSelect(isPositive, maxL, minL));
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2);
+ public static Operand EmitBinaryUnsignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand lblEnd = Label();
+
+ Operand maxUL = Const(ulong.MaxValue);
+
+ Operand add = context.Add(op1, op2);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
+
+ context.BranchIf(lblEnd, add, op1, Comparison.GreaterOrEqualUI);
+ context.Copy(res, maxUL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // long BinarySignedSatQSub(long op1, long op2);
+ public static Operand EmitBinarySignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand lblEnd = Label();
+
+ Operand minL = Const(long.MinValue);
+ Operand maxL = Const(long.MaxValue);
+ Operand zeroL = Const(0L);
+
+ Operand sub = context.Subtract(op1, op2);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sub);
+
+ Operand left = context.BitwiseExclusiveOr(op1, op2);
+ Operand right = context.BitwiseExclusiveOr(op1, sub);
+ context.BranchIf(lblEnd, context.BitwiseAnd(left, right), zeroL, Comparison.GreaterOrEqual);
+
+ Operand isPositive = context.ICompareGreaterOrEqual(op1, zeroL);
+ context.Copy(res, context.ConditionalSelect(isPositive, maxL, minL));
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // ulong BinaryUnsignedSatQSub(ulong op1, ulong op2);
+ public static Operand EmitBinaryUnsignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand lblEnd = Label();
+
+ Operand zeroL = Const(0L);
+
+ Operand sub = context.Subtract(op1, op2);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sub);
+
+ context.BranchIf(lblEnd, op1, op2, Comparison.GreaterOrEqualUI);
+ context.Copy(res, zeroL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // long BinarySignedSatQAcc(ulong op1, long op2);
+ private static Operand EmitBinarySignedSatQAcc(ArmEmitterContext context, Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand lbl1 = Label();
+ Operand lbl2 = Label();
+ Operand lblEnd = Label();
+
+ Operand maxL = Const(long.MaxValue);
+ Operand zeroL = Const(0L);
+
+ Operand add = context.Add(op1, op2);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
+
+ context.BranchIf(lbl1, op1, maxL, Comparison.GreaterUI);
+ Operand notOp2AndRes = context.BitwiseAnd(context.BitwiseNot(op2), add);
+ context.BranchIf(lblEnd, notOp2AndRes, zeroL, Comparison.GreaterOrEqual);
+ context.Copy(res, maxL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl1);
+ context.BranchIf(lbl2, op2, zeroL, Comparison.Less);
+ context.Copy(res, maxL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl2);
+ context.BranchIf(lblEnd, add, maxL, Comparison.LessOrEqualUI);
+ context.Copy(res, maxL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // ulong BinaryUnsignedSatQAcc(long op1, ulong op2);
+ private static Operand EmitBinaryUnsignedSatQAcc(ArmEmitterContext context, Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand lbl1 = Label();
+ Operand lblEnd = Label();
+
+ Operand maxUL = Const(ulong.MaxValue);
+ Operand maxL = Const(long.MaxValue);
+ Operand zeroL = Const(0L);
+
+ Operand add = context.Add(op1, op2);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
+
+ context.BranchIf(lbl1, op1, zeroL, Comparison.Less);
+ context.BranchIf(lblEnd, add, op1, Comparison.GreaterOrEqualUI);
+ context.Copy(res, maxUL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl1);
+ context.BranchIf(lblEnd, op2, maxL, Comparison.GreaterUI);
+ context.BranchIf(lblEnd, add, zeroL, Comparison.GreaterOrEqual);
+ context.Copy(res, zeroL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ public static Operand EmitFloatAbs(ArmEmitterContext context, Operand value, bool single, bool vector)
+ {
+ Operand mask;
+ if (single)
+ {
+ mask = vector ? X86GetAllElements(context, -0f) : X86GetScalar(context, -0f);
+ }
+ else
+ {
+ mask = vector ? X86GetAllElements(context, -0d) : X86GetScalar(context, -0d);
+ }
+
+ return context.AddIntrinsic(single ? Intrinsic.X86Andnps : Intrinsic.X86Andnpd, mask, value);
+ }
+
+ public static Operand EmitVectorExtractSx(ArmEmitterContext context, int reg, int index, int size)
+ {
+ return EmitVectorExtract(context, reg, index, size, true);
+ }
+
+ public static Operand EmitVectorExtractZx(ArmEmitterContext context, int reg, int index, int size)
+ {
+ return EmitVectorExtract(context, reg, index, size, false);
+ }
+
+ public static Operand EmitVectorExtract(ArmEmitterContext context, int reg, int index, int size, bool signed)
+ {
+ ThrowIfInvalid(index, size);
+
+ Operand res = default;
+
+ switch (size)
+ {
+ case 0:
+ res = context.VectorExtract8(GetVec(reg), index);
+ break;
+
+ case 1:
+ res = context.VectorExtract16(GetVec(reg), index);
+ break;
+
+ case 2:
+ res = context.VectorExtract(OperandType.I32, GetVec(reg), index);
+ break;
+
+ case 3:
+ res = context.VectorExtract(OperandType.I64, GetVec(reg), index);
+ break;
+ }
+
+ if (signed)
+ {
+ switch (size)
+ {
+ case 0: res = context.SignExtend8 (OperandType.I64, res); break;
+ case 1: res = context.SignExtend16(OperandType.I64, res); break;
+ case 2: res = context.SignExtend32(OperandType.I64, res); break;
+ }
+ }
+ else
+ {
+ switch (size)
+ {
+ case 0: res = context.ZeroExtend8 (OperandType.I64, res); break;
+ case 1: res = context.ZeroExtend16(OperandType.I64, res); break;
+ case 2: res = context.ZeroExtend32(OperandType.I64, res); break;
+ }
+ }
+
+ return res;
+ }
+
+ public static Operand EmitVectorInsert(ArmEmitterContext context, Operand vector, Operand value, int index, int size)
+ {
+ ThrowIfInvalid(index, size);
+
+ if (size < 3 && value.Type == OperandType.I64)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ switch (size)
+ {
+ case 0: vector = context.VectorInsert8 (vector, value, index); break;
+ case 1: vector = context.VectorInsert16(vector, value, index); break;
+ case 2: vector = context.VectorInsert (vector, value, index); break;
+ case 3: vector = context.VectorInsert (vector, value, index); break;
+ }
+
+ return vector;
+ }
+
+ public static void ThrowIfInvalid(int index, int size)
+ {
+ if ((uint)size > 3u)
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ if ((uint)index >= 16u >> size)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
new file mode 100644
index 00000000..36d27d42
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
@@ -0,0 +1,1286 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func1I = Func<Operand, Operand>;
+ using Func2I = Func<Operand, Operand, Operand>;
+ using Func3I = Func<Operand, Operand, Operand, Operand>;
+
+ static class InstEmitSimdHelper32
+ {
+ public static (int, int) GetQuadwordAndSubindex(int index, RegisterSize size)
+ {
+ switch (size)
+ {
+ case RegisterSize.Simd128:
+ return (index >> 1, 0);
+ case RegisterSize.Simd64:
+ case RegisterSize.Int64:
+ return (index >> 1, index & 1);
+ case RegisterSize.Int32:
+ return (index >> 2, index & 3);
+ }
+
+ throw new ArgumentException("Unrecognized Vector Register Size.");
+ }
+
+ public static Operand ExtractScalar(ArmEmitterContext context, OperandType type, int reg)
+ {
+ Debug.Assert(type != OperandType.V128);
+
+ if (type == OperandType.FP64 || type == OperandType.I64)
+ {
+ // From dreg.
+ return context.VectorExtract(type, GetVecA32(reg >> 1), reg & 1);
+ }
+ else
+ {
+ // From sreg.
+ return context.VectorExtract(type, GetVecA32(reg >> 2), reg & 3);
+ }
+ }
+
+ public static void InsertScalar(ArmEmitterContext context, int reg, Operand value)
+ {
+ Debug.Assert(value.Type != OperandType.V128);
+
+ Operand vec, insert;
+ if (value.Type == OperandType.FP64 || value.Type == OperandType.I64)
+ {
+ // From dreg.
+ vec = GetVecA32(reg >> 1);
+ insert = context.VectorInsert(vec, value, reg & 1);
+ }
+ else
+ {
+ // From sreg.
+ vec = GetVecA32(reg >> 2);
+ insert = context.VectorInsert(vec, value, reg & 3);
+ }
+
+ context.Copy(vec, insert);
+ }
+
+ public static Operand ExtractScalar16(ArmEmitterContext context, int reg, bool top)
+ {
+ return context.VectorExtract16(GetVecA32(reg >> 2), ((reg & 3) << 1) | (top ? 1 : 0));
+ }
+
+ public static void InsertScalar16(ArmEmitterContext context, int reg, bool top, Operand value)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.I32);
+
+ Operand vec, insert;
+ vec = GetVecA32(reg >> 2);
+ insert = context.VectorInsert16(vec, value, ((reg & 3) << 1) | (top ? 1 : 0));
+
+ context.Copy(vec, insert);
+ }
+
+ public static Operand ExtractElement(ArmEmitterContext context, int reg, int size, bool signed)
+ {
+ return EmitVectorExtract32(context, reg >> (4 - size), reg & ((16 >> size) - 1), size, signed);
+ }
+
+ public static void EmitVectorImmUnaryOp32(ArmEmitterContext context, Func1I emit)
+ {
+ IOpCode32SimdImm op = (IOpCode32SimdImm)context.CurrOp;
+
+ Operand imm = Const(op.Immediate);
+
+ int elems = op.Elems;
+ (int index, int subIndex) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
+
+ Operand vec = GetVecA32(index);
+ Operand res = vec;
+
+ for (int item = 0; item < elems; item++)
+ {
+ res = EmitVectorInsert(context, res, emit(imm), item + subIndex * elems, op.Size);
+ }
+
+ context.Copy(vec, res);
+ }
+
+ public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Func1I emit)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand m = ExtractScalar(context, type, op.Vm);
+
+ InsertScalar(context, op.Vd, emit(m));
+ }
+
+ public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Func2I emit)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand n = ExtractScalar(context, type, op.Vn);
+ Operand m = ExtractScalar(context, type, op.Vm);
+
+ InsertScalar(context, op.Vd, emit(n, m));
+ }
+
+ public static void EmitScalarBinaryOpI32(ArmEmitterContext context, Func2I emit)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.I64 : OperandType.I32;
+
+ if (op.Size < 2)
+ {
+ throw new NotSupportedException("Cannot perform a scalar SIMD operation on integers smaller than 32 bits.");
+ }
+
+ Operand n = ExtractScalar(context, type, op.Vn);
+ Operand m = ExtractScalar(context, type, op.Vm);
+
+ InsertScalar(context, op.Vd, emit(n, m));
+ }
+
+ public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Func3I emit)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand a = ExtractScalar(context, type, op.Vd);
+ Operand n = ExtractScalar(context, type, op.Vn);
+ Operand m = ExtractScalar(context, type, op.Vm);
+
+ InsertScalar(context, op.Vd, emit(a, n, m));
+ }
+
+ public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Func1I emit)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index);
+
+ res = context.VectorInsert(res, emit(me), op.Fd + index);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Func2I emit)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> (sizeF + 2);
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index);
+ Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index);
+
+ res = context.VectorInsert(res, emit(ne, me), op.Fd + index);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Func3I emit)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = context.VectorExtract(type, GetVecA32(op.Qd), op.Fd + index);
+ Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index);
+ Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index);
+
+ res = context.VectorInsert(res, emit(de, ne, me), op.Fd + index);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ // Integer
+
+ public static void EmitVectorUnaryAccumulateOpI32(ArmEmitterContext context, Func1I emit, bool signed)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, context.Add(de, emit(me)), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorUnaryOpI32(ArmEmitterContext context, Func1I emit, bool signed)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(me), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorBinaryOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorBinaryLongOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne);
+ me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me);
+ }
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorBinaryWideOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size + 1, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me);
+ }
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorImmBinaryQdQmOpZx32(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorImmBinaryQdQmOpI32(context, emit, false);
+ }
+
+ public static void EmitVectorImmBinaryQdQmOpSx32(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorImmBinaryQdQmOpI32(context, emit, true);
+ }
+
+ public static void EmitVectorImmBinaryQdQmOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(de, me), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorTernaryLongOpI32(ArmEmitterContext context, Func3I emit, bool signed)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size + 1, signed);
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne);
+ me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me);
+ }
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorTernaryOpI32(ArmEmitterContext context, Func3I emit, bool signed)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed);
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorUnaryOpSx32(ArmEmitterContext context, Func1I emit)
+ {
+ EmitVectorUnaryOpI32(context, emit, true);
+ }
+
+ public static void EmitVectorUnaryOpSx32(ArmEmitterContext context, Func1I emit, bool accumulate)
+ {
+ if (accumulate)
+ {
+ EmitVectorUnaryAccumulateOpI32(context, emit, true);
+ }
+ else
+ {
+ EmitVectorUnaryOpI32(context, emit, true);
+ }
+ }
+
+ public static void EmitVectorBinaryOpSx32(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorBinaryOpI32(context, emit, true);
+ }
+
+ public static void EmitVectorTernaryOpSx32(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorTernaryOpI32(context, emit, true);
+ }
+
+ public static void EmitVectorUnaryOpZx32(ArmEmitterContext context, Func1I emit)
+ {
+ EmitVectorUnaryOpI32(context, emit, false);
+ }
+
+ public static void EmitVectorUnaryOpZx32(ArmEmitterContext context, Func1I emit, bool accumulate)
+ {
+ if (accumulate)
+ {
+ EmitVectorUnaryAccumulateOpI32(context, emit, false);
+ }
+ else
+ {
+ EmitVectorUnaryOpI32(context, emit, false);
+ }
+ }
+
+ public static void EmitVectorBinaryOpZx32(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorBinaryOpI32(context, emit, false);
+ }
+
+ public static void EmitVectorTernaryOpZx32(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorTernaryOpI32(context, emit, false);
+ }
+
+ // Vector by scalar
+
+ public static void EmitVectorByScalarOpF32(ArmEmitterContext context, Func2I emit)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ Operand m = ExtractScalar(context, type, op.Vm);
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index);
+
+ res = context.VectorInsert(res, emit(ne, m), op.Fd + index);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorByScalarOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Operand m = ExtractElement(context, op.Vm, op.Size, signed);
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, m), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorByScalarLongOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Operand m = ExtractElement(context, op.Vm, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ m = signed ? context.SignExtend32(OperandType.I64, m) : context.ZeroExtend32(OperandType.I64, m);
+ }
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne);
+ }
+
+ res = EmitVectorInsert(context, res, emit(ne, m), index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorsByScalarOpF32(ArmEmitterContext context, Func3I emit)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ Operand m = ExtractScalar(context, type, op.Vm);
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = context.VectorExtract(type, GetVecA32(op.Qd), op.Fd + index);
+ Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index);
+
+ res = context.VectorInsert(res, emit(de, ne, m), op.Fd + index);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorsByScalarOpI32(ArmEmitterContext context, Func3I emit, bool signed)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed);
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed);
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, m), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ // Pairwise
+
+ public static void EmitVectorPairwiseOpF32(ArmEmitterContext context, Func2I emit)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> (sizeF + 2);
+ int pairs = elems >> 1;
+
+ Operand res = GetVecA32(op.Qd);
+ Operand mvec = GetVecA32(op.Qm);
+ Operand nvec = GetVecA32(op.Qn);
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand n1 = context.VectorExtract(type, nvec, op.Fn + pairIndex);
+ Operand n2 = context.VectorExtract(type, nvec, op.Fn + pairIndex + 1);
+
+ res = context.VectorInsert(res, emit(n1, n2), op.Fd + index);
+
+ Operand m1 = context.VectorExtract(type, mvec, op.Fm + pairIndex);
+ Operand m2 = context.VectorExtract(type, mvec, op.Fm + pairIndex + 1);
+
+ res = context.VectorInsert(res, emit(m1, m2), op.Fd + index + pairs);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorPairwiseOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ int elems = op.GetBytesCount() >> op.Size;
+ int pairs = elems >> 1;
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+ Operand n1 = EmitVectorExtract32(context, op.Qn, op.In + pairIndex, op.Size, signed);
+ Operand n2 = EmitVectorExtract32(context, op.Qn, op.In + pairIndex + 1, op.Size, signed);
+
+ Operand m1 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex, op.Size, signed);
+ Operand m2 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex + 1, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(n1, n2), op.Id + index, op.Size);
+ res = EmitVectorInsert(context, res, emit(m1, m2), op.Id + index + pairs, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorPairwiseLongOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ int elems = (op.Q ? 16 : 8) >> op.Size;
+ int pairs = elems >> 1;
+ int id = (op.Vd & 1) * pairs;
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+ Operand m1 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex, op.Size, signed);
+ Operand m2 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex + 1, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ m1 = signed ? context.SignExtend32(OperandType.I64, m1) : context.ZeroExtend32(OperandType.I64, m1);
+ m2 = signed ? context.SignExtend32(OperandType.I64, m2) : context.ZeroExtend32(OperandType.I64, m2);
+ }
+
+ res = EmitVectorInsert(context, res, emit(m1, m2), id + index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ // Narrow
+
+ public static void EmitVectorUnaryNarrowOp32(ArmEmitterContext context, Func1I emit, bool signed = false)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ int elems = 8 >> op.Size; // Size contains the target element size. (for when it becomes a doubleword)
+
+ Operand res = GetVecA32(op.Qd);
+ int id = (op.Vd & 1) << (3 - op.Size); // Target doubleword base.
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand m = EmitVectorExtract32(context, op.Qm, index, op.Size + 1, signed);
+
+ res = EmitVectorInsert(context, res, emit(m), id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ // Intrinsic Helpers
+
+ public static Operand EmitMoveDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV)
+ {
+ Debug.Assert(input.Type == OperandType.V128);
+
+ int originalSide = originalV & 1;
+ int targetSide = targetV & 1;
+
+ if (originalSide == targetSide)
+ {
+ return input;
+ }
+
+ if (targetSide == 1)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Movlhps, input, input); // Low to high.
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.X86Movhlps, input, input); // High to low.
+ }
+ }
+
+ public static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV)
+ {
+ Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
+
+ int targetSide = targetV & 1;
+ int shuffleMask = 2;
+
+ if (targetSide == 1)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Shufpd, target, value, Const(shuffleMask));
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.X86Shufpd, value, target, Const(shuffleMask));
+ }
+ }
+
+ public static Operand EmitScalarInsert(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth)
+ {
+ Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
+
+ // Insert from index 0 in value to index in target.
+ int index = reg & (doubleWidth ? 1 : 3);
+
+ if (doubleWidth)
+ {
+ if (index == 1)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Movlhps, target, value); // Low to high.
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.X86Shufpd, value, target, Const(2)); // Low to low, keep high from original.
+ }
+ }
+ else
+ {
+ if (Optimizations.UseSse41)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Insertps, target, value, Const(index << 4));
+ }
+ else
+ {
+ target = EmitSwapScalar(context, target, index, doubleWidth); // Swap value to replace into element 0.
+ target = context.AddIntrinsic(Intrinsic.X86Movss, target, value); // Move the value into element 0 of the vector.
+ return EmitSwapScalar(context, target, index, doubleWidth); // Swap new value back to the correct index.
+ }
+ }
+ }
+
+ public static Operand EmitSwapScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth)
+ {
+ // Index into 0, 0 into index. This swap happens at the start of an A32 scalar op if required.
+ int index = reg & (doubleWidth ? 1 : 3);
+ if (index == 0) return target;
+
+ if (doubleWidth)
+ {
+ int shuffleMask = 1; // Swap top and bottom. (b0 = 1, b1 = 0)
+ return context.AddIntrinsic(Intrinsic.X86Shufpd, target, target, Const(shuffleMask));
+ }
+ else
+ {
+ int shuffleMask = (3 << 6) | (2 << 4) | (1 << 2) | index; // Swap index and 0. (others remain)
+ shuffleMask &= ~(3 << (index * 2));
+
+ return context.AddIntrinsic(Intrinsic.X86Shufps, target, target, Const(shuffleMask));
+ }
+ }
+
+ // Vector Operand Templates
+
+ public static void EmitVectorUnaryOpSimd32(ArmEmitterContext context, Func1I vectorFunc)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand m = GetVecA32(op.Qm);
+ Operand d = GetVecA32(op.Qd);
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
+ }
+
+ Operand res = vectorFunc(m);
+
+ if (!op.Q) // Register insert.
+ {
+ res = EmitDoubleWordInsert(context, d, res, op.Vd);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ EmitVectorUnaryOpSimd32(context, (m) => context.AddIntrinsic(inst, m));
+ }
+
+ public static void EmitVectorBinaryOpSimd32(ArmEmitterContext context, Func2I vectorFunc, int side = -1)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qn);
+ Operand m = GetVecA32(op.Qm);
+ Operand d = GetVecA32(op.Qd);
+
+ if (side == -1)
+ {
+ side = op.Vd;
+ }
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ n = EmitMoveDoubleWordToSide(context, n, op.Vn, side);
+ m = EmitMoveDoubleWordToSide(context, m, op.Vm, side);
+ }
+
+ Operand res = vectorFunc(n, m);
+
+ if (!op.Q) // Register insert.
+ {
+ if (side != op.Vd)
+ {
+ res = EmitMoveDoubleWordToSide(context, res, side, op.Vd);
+ }
+ res = EmitDoubleWordInsert(context, d, res, op.Vd);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+ EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitVectorTernaryOpSimd32(ArmEmitterContext context, Func3I vectorFunc)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qn);
+ Operand m = GetVecA32(op.Qm);
+ Operand d = GetVecA32(op.Qd);
+ Operand initialD = d;
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd);
+ m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
+ }
+
+ Operand res = vectorFunc(d, n, m);
+
+ if (!op.Q) // Register insert.
+ {
+ res = EmitDoubleWordInsert(context, initialD, res, op.Vd);
+ }
+
+ context.Copy(initialD, res);
+ }
+
+ public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Intrinsic inst1 = (op.Size & 1) != 0 ? inst64pt1 : inst32pt1;
+ Intrinsic inst2 = (op.Size & 1) != 0 ? inst64pt2 : inst32pt2;
+
+ EmitVectorTernaryOpSimd32(context, (d, n, m) =>
+ {
+ Operand res = context.AddIntrinsic(inst1, n, m);
+ return res = context.AddIntrinsic(inst2, d, res);
+ });
+ }
+
+ public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Debug.Assert((op.Size & 1) == 0);
+
+ EmitVectorTernaryOpSimd32(context, (d, n, m) =>
+ {
+ return context.AddIntrinsic(inst32, d, n, m);
+ });
+ }
+
+ public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand d = GetVecA32(op.Vd >> shift);
+
+ m = EmitSwapScalar(context, m, op.Vm, doubleSize);
+
+ Operand res = scalarFunc(m);
+
+ // Insert scalar into vector.
+ res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ EmitScalarUnaryOpSimd32(context, (m) => (inst == 0) ? m : context.AddIntrinsic(inst, m));
+ }
+
+ public static void EmitScalarBinaryOpSimd32(ArmEmitterContext context, Func2I scalarFunc)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand n = GetVecA32(op.Vn >> shift);
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand d = GetVecA32(op.Vd >> shift);
+
+ n = EmitSwapScalar(context, n, op.Vn, doubleSize);
+ m = EmitSwapScalar(context, m, op.Vm, doubleSize);
+
+ Operand res = scalarFunc(n, m);
+
+ // Insert scalar into vector.
+ res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ EmitScalarBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitScalarTernaryOpSimd32(ArmEmitterContext context, Func3I scalarFunc)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand n = GetVecA32(op.Vn >> shift);
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand d = GetVecA32(op.Vd >> shift);
+ Operand initialD = d;
+
+ n = EmitSwapScalar(context, n, op.Vn, doubleSize);
+ m = EmitSwapScalar(context, m, op.Vm, doubleSize);
+ d = EmitSwapScalar(context, d, op.Vd, doubleSize);
+
+ Operand res = scalarFunc(d, n, m);
+
+ // Insert scalar into vector.
+ res = EmitScalarInsert(context, initialD, res, op.Vd, doubleSize);
+
+ context.Copy(initialD, res);
+ }
+
+ public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+
+ Intrinsic inst = doubleSize ? inst64 : inst32;
+
+ EmitScalarTernaryOpSimd32(context, (d, n, m) =>
+ {
+ return context.AddIntrinsic(inst, d, n, m);
+ });
+ }
+
+ public static void EmitScalarTernaryOpF32(
+ ArmEmitterContext context,
+ Intrinsic inst32pt1,
+ Intrinsic inst64pt1,
+ Intrinsic inst32pt2,
+ Intrinsic inst64pt2,
+ bool isNegD = false)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+
+ Intrinsic inst1 = doubleSize ? inst64pt1 : inst32pt1;
+ Intrinsic inst2 = doubleSize ? inst64pt2 : inst32pt2;
+
+ EmitScalarTernaryOpSimd32(context, (d, n, m) =>
+ {
+ Operand res = context.AddIntrinsic(inst1, n, m);
+
+ if (isNegD)
+ {
+ Operand mask = doubleSize
+ ? X86GetScalar(context, -0d)
+ : X86GetScalar(context, -0f);
+
+ d = doubleSize
+ ? context.AddIntrinsic(Intrinsic.X86Xorpd, mask, d)
+ : context.AddIntrinsic(Intrinsic.X86Xorps, mask, d);
+ }
+
+ return context.AddIntrinsic(inst2, d, res);
+ });
+ }
+
+ // By Scalar
+
+ public static void EmitVectorByScalarOpSimd32(ArmEmitterContext context, Func2I vectorFunc)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qn);
+ Operand d = GetVecA32(op.Qd);
+
+ int index = op.Vm & 3;
+ int dupeMask = (index << 6) | (index << 4) | (index << 2) | index;
+ Operand m = GetVecA32(op.Vm >> 2);
+ m = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(dupeMask));
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd);
+ }
+
+ Operand res = vectorFunc(n, m);
+
+ if (!op.Q) // Register insert.
+ {
+ res = EmitDoubleWordInsert(context, d, res, op.Vd);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitVectorByScalarOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+ EmitVectorByScalarOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitVectorsByScalarOpSimd32(ArmEmitterContext context, Func3I vectorFunc)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qn);
+ Operand d = GetVecA32(op.Qd);
+ Operand initialD = d;
+
+ int index = op.Vm & 3;
+ int dupeMask = (index << 6) | (index << 4) | (index << 2) | index;
+ Operand m = GetVecA32(op.Vm >> 2);
+ m = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(dupeMask));
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd);
+ }
+
+ Operand res = vectorFunc(d, n, m);
+
+ if (!op.Q) // Register insert.
+ {
+ res = EmitDoubleWordInsert(context, initialD, res, op.Vd);
+ }
+
+ context.Copy(initialD, res);
+ }
+
+ public static void EmitVectorsByScalarOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Intrinsic inst1 = (op.Size & 1) != 0 ? inst64pt1 : inst32pt1;
+ Intrinsic inst2 = (op.Size & 1) != 0 ? inst64pt2 : inst32pt2;
+
+ EmitVectorsByScalarOpSimd32(context, (d, n, m) =>
+ {
+ Operand res = context.AddIntrinsic(inst1, n, m);
+ return res = context.AddIntrinsic(inst2, d, res);
+ });
+ }
+
+ // Pairwise
+
+ public static void EmitSse2VectorPairwiseOpF32(ArmEmitterContext context, Intrinsic inst32)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, n, m);
+
+ Operand part0 = unpck;
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, unpck, unpck);
+
+ return context.AddIntrinsic(inst32, part0, part1);
+ }, 0);
+ }
+
+ public static void EmitSsse3VectorPairwiseOp32(ArmEmitterContext context, Intrinsic[] inst)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ Operand zeroEvenMask = X86GetElements(context, ZeroMask, EvenMasks[op.Size]);
+ Operand zeroOddMask = X86GetElements(context, ZeroMask, OddMasks[op.Size]);
+
+ Operand mN = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); // m:n
+
+ Operand left = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroEvenMask); // 0:even from m:n
+ Operand right = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroOddMask); // 0:odd from m:n
+
+ return context.AddIntrinsic(inst[op.Size], left, right);
+ }
+ else if (op.Size < 3)
+ {
+ Operand oddEvenMask = X86GetElements(context, OddMasks[op.Size], EvenMasks[op.Size]);
+
+ Operand oddEvenN = context.AddIntrinsic(Intrinsic.X86Pshufb, n, oddEvenMask); // odd:even from n
+ Operand oddEvenM = context.AddIntrinsic(Intrinsic.X86Pshufb, m, oddEvenMask); // odd:even from m
+
+ Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, oddEvenN, oddEvenM);
+ Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, oddEvenN, oddEvenM);
+
+ return context.AddIntrinsic(inst[op.Size], left, right);
+ }
+ else
+ {
+ Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m);
+ Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, n, m);
+
+ return context.AddIntrinsic(inst[3], left, right);
+ }
+ }, 0);
+ }
+
+ // Generic Functions
+
+ public static Operand EmitSoftFloatCallDefaultFpscr(ArmEmitterContext context, string name, params Operand[] callArgs)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ MethodInfo info = (op.Size & 1) == 0
+ ? typeof(SoftFloat32).GetMethod(name)
+ : typeof(SoftFloat64).GetMethod(name);
+
+ Array.Resize(ref callArgs, callArgs.Length + 1);
+ callArgs[callArgs.Length - 1] = Const(1);
+
+ context.ExitArmFpMode();
+ context.StoreToContext();
+ Operand res = context.Call(info, callArgs);
+ context.LoadFromContext();
+ context.EnterArmFpMode();
+
+ return res;
+ }
+
+ public static Operand EmitVectorExtractSx32(ArmEmitterContext context, int reg, int index, int size)
+ {
+ return EmitVectorExtract32(context, reg, index, size, true);
+ }
+
+ public static Operand EmitVectorExtractZx32(ArmEmitterContext context, int reg, int index, int size)
+ {
+ return EmitVectorExtract32(context, reg, index, size, false);
+ }
+
+ public static Operand EmitVectorExtract32(ArmEmitterContext context, int reg, int index, int size, bool signed)
+ {
+ ThrowIfInvalid(index, size);
+
+ Operand res = default;
+
+ switch (size)
+ {
+ case 0:
+ res = context.VectorExtract8(GetVec(reg), index);
+ break;
+
+ case 1:
+ res = context.VectorExtract16(GetVec(reg), index);
+ break;
+
+ case 2:
+ res = context.VectorExtract(OperandType.I32, GetVec(reg), index);
+ break;
+
+ case 3:
+ res = context.VectorExtract(OperandType.I64, GetVec(reg), index);
+ break;
+ }
+
+ if (signed)
+ {
+ switch (size)
+ {
+ case 0: res = context.SignExtend8(OperandType.I32, res); break;
+ case 1: res = context.SignExtend16(OperandType.I32, res); break;
+ }
+ }
+ else
+ {
+ switch (size)
+ {
+ case 0: res = context.ZeroExtend8(OperandType.I32, res); break;
+ case 1: res = context.ZeroExtend16(OperandType.I32, res); break;
+ }
+ }
+
+ return res;
+ }
+
+ public static Operand EmitPolynomialMultiply(ArmEmitterContext context, Operand op1, Operand op2, int eSize)
+ {
+ Debug.Assert(eSize <= 32);
+
+ Operand result = eSize == 32 ? Const(0L) : Const(0);
+
+ if (eSize == 32)
+ {
+ op1 = context.ZeroExtend32(OperandType.I64, op1);
+ op2 = context.ZeroExtend32(OperandType.I64, op2);
+ }
+
+ for (int i = 0; i < eSize; i++)
+ {
+ Operand mask = context.BitwiseAnd(op1, Const(op1.Type, 1L << i));
+
+ result = context.BitwiseExclusiveOr(result, context.Multiply(op2, mask));
+ }
+
+ return result;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs b/src/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs
new file mode 100644
index 00000000..98236be6
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs
@@ -0,0 +1,366 @@
+
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func1I = Func<Operand, Operand>;
+ using Func2I = Func<Operand, Operand, Operand>;
+ using Func3I = Func<Operand, Operand, Operand, Operand>;
+
+ static class InstEmitSimdHelper32Arm64
+ {
+ // Intrinsic Helpers
+
+ public static Operand EmitMoveDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV)
+ {
+ Debug.Assert(input.Type == OperandType.V128);
+
+ int originalSide = originalV & 1;
+ int targetSide = targetV & 1;
+
+ if (originalSide == targetSide)
+ {
+ return input;
+ }
+
+ Intrinsic vType = Intrinsic.Arm64VDWord | Intrinsic.Arm64V128;
+
+ if (targetSide == 1)
+ {
+ return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 0)); // Low to high.
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 1)); // High to low.
+ }
+ }
+
+ public static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV)
+ {
+ Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
+
+ int targetSide = targetV & 1;
+ Operand idx = Const(targetSide);
+
+ return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, idx, value, idx);
+ }
+
+ public static Operand EmitScalarInsert(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth)
+ {
+ Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
+
+ // Insert from index 0 in value to index in target.
+ int index = reg & (doubleWidth ? 1 : 3);
+
+ if (doubleWidth)
+ {
+ return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, Const(index), value, Const(0));
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VWord, target, Const(index), value, Const(0));
+ }
+ }
+
+ public static Operand EmitExtractScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth)
+ {
+ int index = reg & (doubleWidth ? 1 : 3);
+ if (index == 0) return target; // Element is already at index 0, so just return the vector directly.
+
+ if (doubleWidth)
+ {
+ return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VDWord, target, Const(1)); // Extract high (index 1).
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VWord, target, Const(index)); // Extract element at index.
+ }
+ }
+
+ // Vector Operand Templates
+
+ public static void EmitVectorUnaryOpSimd32(ArmEmitterContext context, Func1I vectorFunc)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand m = GetVecA32(op.Qm);
+ Operand d = GetVecA32(op.Qd);
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
+ }
+
+ Operand res = vectorFunc(m);
+
+ if (!op.Q) // Register insert.
+ {
+ res = EmitDoubleWordInsert(context, d, res, op.Vd);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+ EmitVectorUnaryOpSimd32(context, (m) => context.AddIntrinsic(inst, m));
+ }
+
+ public static void EmitVectorBinaryOpSimd32(ArmEmitterContext context, Func2I vectorFunc, int side = -1)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qn);
+ Operand m = GetVecA32(op.Qm);
+ Operand d = GetVecA32(op.Qd);
+
+ if (side == -1)
+ {
+ side = op.Vd;
+ }
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ n = EmitMoveDoubleWordToSide(context, n, op.Vn, side);
+ m = EmitMoveDoubleWordToSide(context, m, op.Vm, side);
+ }
+
+ Operand res = vectorFunc(n, m);
+
+ if (!op.Q) // Register insert.
+ {
+ if (side != op.Vd)
+ {
+ res = EmitMoveDoubleWordToSide(context, res, side, op.Vd);
+ }
+ res = EmitDoubleWordInsert(context, d, res, op.Vd);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+ EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitVectorTernaryOpSimd32(ArmEmitterContext context, Func3I vectorFunc)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qn);
+ Operand m = GetVecA32(op.Qm);
+ Operand d = GetVecA32(op.Qd);
+ Operand initialD = d;
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd);
+ m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
+ }
+
+ Operand res = vectorFunc(d, n, m);
+
+ if (!op.Q) // Register insert.
+ {
+ res = EmitDoubleWordInsert(context, initialD, res, op.Vd);
+ }
+
+ context.Copy(initialD, res);
+ }
+
+ public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+ EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m));
+ }
+
+ public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand d = GetVecA32(op.Vd >> shift);
+
+ m = EmitExtractScalar(context, m, op.Vm, doubleSize);
+
+ Operand res = scalarFunc(m);
+
+ // Insert scalar into vector.
+ res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+ EmitScalarUnaryOpSimd32(context, (m) => (inst == 0) ? m : context.AddIntrinsic(inst, m));
+ }
+
+ public static void EmitScalarBinaryOpSimd32(ArmEmitterContext context, Func2I scalarFunc)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand n = GetVecA32(op.Vn >> shift);
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand d = GetVecA32(op.Vd >> shift);
+
+ n = EmitExtractScalar(context, n, op.Vn, doubleSize);
+ m = EmitExtractScalar(context, m, op.Vm, doubleSize);
+
+ Operand res = scalarFunc(n, m);
+
+ // Insert scalar into vector.
+ res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+ EmitScalarBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitScalarTernaryOpSimd32(ArmEmitterContext context, Func3I scalarFunc)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand n = GetVecA32(op.Vn >> shift);
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand d = GetVecA32(op.Vd >> shift);
+ Operand initialD = d;
+
+ n = EmitExtractScalar(context, n, op.Vn, doubleSize);
+ m = EmitExtractScalar(context, m, op.Vm, doubleSize);
+ d = EmitExtractScalar(context, d, op.Vd, doubleSize);
+
+ Operand res = scalarFunc(d, n, m);
+
+ // Insert scalar into vector.
+ res = EmitScalarInsert(context, initialD, res, op.Vd, doubleSize);
+
+ context.Copy(initialD, res);
+ }
+
+ public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+ EmitScalarTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m));
+ }
+
+ // Pairwise
+
+ public static void EmitVectorPairwiseOpF32(ArmEmitterContext context, Intrinsic inst32)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ inst32 |= Intrinsic.Arm64V64 | Intrinsic.Arm64VFloat;
+ EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst32, n, m), 0);
+ }
+
+ public static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ bool cmpWithZero = (op.Opc & 2) != 0;
+
+ Intrinsic inst = signalNaNs ? Intrinsic.Arm64FcmpeS : Intrinsic.Arm64FcmpS;
+ inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand n = GetVecA32(op.Vd >> shift);
+ Operand m = GetVecA32(op.Vm >> shift);
+
+ n = EmitExtractScalar(context, n, op.Vd, doubleSize);
+ m = cmpWithZero ? Const(0) : EmitExtractScalar(context, m, op.Vm, doubleSize);
+
+ Operand nzcv = context.AddIntrinsicInt(inst, n, m);
+
+ Operand one = Const(1);
+
+ SetFpFlag(context, FPState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(28)), one));
+ SetFpFlag(context, FPState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(29)), one));
+ SetFpFlag(context, FPState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(30)), one));
+ SetFpFlag(context, FPState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(31)), one));
+ }
+
+ public static void EmitCmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ Intrinsic inst;
+ if (zero)
+ {
+ inst = cond switch
+ {
+ CmpCondition.Equal => Intrinsic.Arm64FcmeqVz,
+ CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtVz,
+ CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeVz,
+ CmpCondition.LessThan => Intrinsic.Arm64FcmltVz,
+ CmpCondition.LessThanOrEqual => Intrinsic.Arm64FcmleVz,
+ _ => throw new InvalidOperationException()
+ };
+ }
+ else {
+ inst = cond switch
+ {
+ CmpCondition.Equal => Intrinsic.Arm64FcmeqV,
+ CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtV,
+ CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeV,
+ _ => throw new InvalidOperationException()
+ };
+ }
+
+ inst |= (sizeF != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+
+ if (zero)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ return context.AddIntrinsic(inst, m);
+ });
+ }
+ else
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ return context.AddIntrinsic(inst, n, m);
+ });
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs b/src/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs
new file mode 100644
index 00000000..f0d242ae
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs
@@ -0,0 +1,720 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitSimdHelperArm64
+ {
+ public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+ }
+
+ public static void EmitScalarUnaryOpFFromGp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+ }
+
+ public static void EmitScalarUnaryOpFToGp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ SetIntOrZR(context, op.Rd, op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt (inst, n)
+ : context.AddIntrinsicLong(inst, n));
+ }
+
+ public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitScalarBinaryOpFByElem(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index)));
+ }
+
+ public static void EmitScalarTernaryOpF(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+ Operand a = GetVec(op.Ra);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, a, n, m));
+ }
+
+ public static void EmitScalarTernaryOpFRdByElem(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index)));
+ }
+
+ public static void EmitScalarUnaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+ }
+
+ public static void EmitScalarBinaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitScalarBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n));
+ }
+
+ public static void EmitScalarTernaryOpRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ context.Copy(d, context.AddIntrinsic(inst, d, n, m));
+ }
+
+ public static void EmitScalarShiftBinaryOp(ArmEmitterContext context, Intrinsic inst, int shift)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(shift)));
+ }
+
+ public static void EmitScalarShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
+ }
+
+ public static void EmitScalarSaturatingShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitScalarSaturatingUnaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ Operand result = context.AddIntrinsic(inst, n);
+
+ context.Copy(GetVec(op.Rd), result);
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitScalarSaturatingBinaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ Operand result = context.AddIntrinsic(inst, n, m);
+
+ context.Copy(GetVec(op.Rd), result);
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitScalarSaturatingBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ Operand result = context.AddIntrinsic(inst, d, n);
+
+ context.Copy(GetVec(op.Rd), result);
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitScalarConvertBinaryOpF(ArmEmitterContext context, Intrinsic inst, int fBits)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits)));
+ }
+
+ public static void EmitScalarConvertBinaryOpFFromGp(ArmEmitterContext context, Intrinsic inst, int fBits)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits)));
+ }
+
+ public static void EmitScalarConvertBinaryOpFToGp(ArmEmitterContext context, Intrinsic inst, int fBits)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ SetIntOrZR(context, op.Rd, op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt (inst, n, Const(fBits))
+ : context.AddIntrinsicLong(inst, n, Const(fBits)));
+ }
+
+ public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+ }
+
+ public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitVectorBinaryOpFRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n));
+ }
+
+ public static void EmitVectorBinaryOpFByElem(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index)));
+ }
+
+ public static void EmitVectorTernaryOpFRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(d, context.AddIntrinsic(inst, d, n, m));
+ }
+
+ public static void EmitVectorTernaryOpFRdByElem(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index)));
+ }
+
+ public static void EmitVectorUnaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+ }
+
+ public static void EmitVectorBinaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitVectorBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n));
+ }
+
+ public static void EmitVectorBinaryOpByElem(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index)));
+ }
+
+ public static void EmitVectorTernaryOpRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(d, context.AddIntrinsic(inst, d, n, m));
+ }
+
+ public static void EmitVectorTernaryOpRdByElem(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index)));
+ }
+
+ public static void EmitVectorShiftBinaryOp(ArmEmitterContext context, Intrinsic inst, int shift)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(shift)));
+ }
+
+ public static void EmitVectorShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
+ }
+
+ public static void EmitVectorSaturatingShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitVectorSaturatingUnaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ Operand result = context.AddIntrinsic(inst, n);
+
+ context.Copy(GetVec(op.Rd), result);
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitVectorSaturatingBinaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ Operand result = context.AddIntrinsic(inst, n, m);
+
+ context.Copy(GetVec(op.Rd), result);
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitVectorSaturatingBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ Operand result = context.AddIntrinsic(inst, d, n);
+
+ context.Copy(GetVec(op.Rd), result);
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitVectorSaturatingBinaryOpByElem(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ Operand result = context.AddIntrinsic(inst, n, m, Const(op.Index));
+
+ context.Copy(GetVec(op.Rd), result);
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitVectorConvertBinaryOpF(ArmEmitterContext context, Intrinsic inst, int fBits)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits)));
+ }
+
+ public static void EmitVectorLookupTable(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp;
+
+ Operand[] operands = new Operand[op.Size + 1];
+
+ operands[op.Size] = GetVec(op.Rm);
+
+ for (int index = 0; index < op.Size; index++)
+ {
+ operands[index] = GetVec((op.Rn + index) & 0x1F);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, operands));
+ }
+
+ public static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false;
+
+ Intrinsic inst = signalNaNs ? Intrinsic.Arm64FcmpeS : Intrinsic.Arm64FcmpS;
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ Operand n = GetVec(op.Rn);
+ Operand m = cmpWithZero ? Const(0) : GetVec(op.Rm);
+
+ Operand nzcv = context.AddIntrinsicInt(inst, n, m);
+
+ Operand one = Const(1);
+
+ SetFlag(context, PState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(28)), one));
+ SetFlag(context, PState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(29)), one));
+ SetFlag(context, PState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(30)), one));
+ SetFlag(context, PState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(31)), one));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdLogical.cs b/src/ARMeilleure/Instructions/InstEmitSimdLogical.cs
new file mode 100644
index 00000000..2bf531e6
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdLogical.cs
@@ -0,0 +1,612 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void And_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AndV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseAnd(op1, op2));
+ }
+ }
+
+ public static void Bic_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64BicV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return context.BitwiseAnd(op1, context.BitwiseNot(op2));
+ });
+ }
+ }
+
+ public static void Bic_Vi(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ int eSize = 8 << op.Size;
+
+ Operand d = GetVec(op.Rd);
+ Operand imm = eSize switch {
+ 16 => X86GetAllElements(context, (short)~op.Immediate),
+ 32 => X86GetAllElements(context, (int)~op.Immediate),
+ _ => throw new InvalidOperationException($"Invalid element size {eSize}.")
+ };
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pand, d, imm);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorImmBinaryOp(context, (op1, op2) =>
+ {
+ return context.BitwiseAnd(op1, context.BitwiseNot(op2));
+ });
+ }
+ }
+
+ public static void Bif_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BifV);
+ }
+ else
+ {
+ EmitBifBit(context, notRm: true);
+ }
+ }
+
+ public static void Bit_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BitV);
+ }
+ else
+ {
+ EmitBifBit(context, notRm: false);
+ }
+ }
+
+ private static void EmitBifBit(ArmEmitterContext context, bool notRm)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d);
+
+ if (notRm)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Pandn, m, res);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Pand, m, res);
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Pxor, d, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand d = EmitVectorExtractZx(context, op.Rd, index, 3);
+ Operand n = EmitVectorExtractZx(context, op.Rn, index, 3);
+ Operand m = EmitVectorExtractZx(context, op.Rm, index, 3);
+
+ if (notRm)
+ {
+ m = context.BitwiseNot(m);
+ }
+
+ Operand e = context.BitwiseExclusiveOr(d, n);
+
+ e = context.BitwiseAnd(e, m);
+ e = context.BitwiseExclusiveOr(e, d);
+
+ res = EmitVectorInsert(context, res, e, index, 3);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Bsl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BslV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pand, res, d);
+ res = context.AddIntrinsic(Intrinsic.X86Pxor, res, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.BitwiseExclusiveOr(
+ context.BitwiseAnd(op1,
+ context.BitwiseExclusiveOr(op2, op3)), op3);
+ });
+ }
+ }
+
+ public static void Eor_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64EorV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseExclusiveOr(op1, op2));
+ }
+ }
+
+ public static void Not_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAvx512Ortho)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, n, Const(~0b10101010));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, n, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorUnaryOpZx(context, (op1) => context.BitwiseNot(op1));
+ }
+ }
+
+ public static void Orn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrnV);
+ }
+ else if (Optimizations.UseAvx512Ortho)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, m, Const(0b11001100 | ~0b10101010));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, mask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return context.BitwiseOr(op1, context.BitwiseNot(op2));
+ });
+ }
+ }
+
+ public static void Orr_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrrV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseOr(op1, op2));
+ }
+ }
+
+ public static void Orr_Vi(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ int eSize = 8 << op.Size;
+
+ Operand d = GetVec(op.Rd);
+ Operand imm = eSize switch {
+ 16 => X86GetAllElements(context, (short)op.Immediate),
+ 32 => X86GetAllElements(context, (int)op.Immediate),
+ _ => throw new InvalidOperationException($"Invalid element size {eSize}.")
+ };
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, d, imm);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorImmBinaryOp(context, (op1, op2) => context.BitwiseOr(op1, op2));
+ }
+ }
+
+ public static void Rbit_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (Optimizations.UseGfni)
+ {
+ const long bitMatrix =
+ (0b10000000L << 56) |
+ (0b01000000L << 48) |
+ (0b00100000L << 40) |
+ (0b00010000L << 32) |
+ (0b00001000L << 24) |
+ (0b00000100L << 16) |
+ (0b00000010L << 8) |
+ (0b00000001L << 0);
+
+ Operand vBitMatrix = X86GetAllElements(context, bitMatrix);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, GetVec(op.Rn), vBitMatrix, Const(0));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+ int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
+
+ Operand de = EmitReverseBits8Op(context, ne);
+
+ res = EmitVectorInsert(context, res, de, index, 0);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static Operand EmitReverseBits8Op(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.I64);
+
+ Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaul)), Const(1)),
+ context.ShiftLeft (context.BitwiseAnd(op, Const(0x55ul)), Const(1)));
+
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccul)), Const(2)),
+ context.ShiftLeft (context.BitwiseAnd(val, Const(0x33ul)), Const(2)));
+
+ return context.BitwiseOr(context.ShiftRightUI(val, Const(4)),
+ context.ShiftLeft (context.BitwiseAnd(val, Const(0x0ful)), Const(4)));
+ }
+
+ public static void Rev16_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ const long maskE0 = 06L << 56 | 07L << 48 | 04L << 40 | 05L << 32 | 02L << 24 | 03L << 16 | 00L << 8 | 01L << 0;
+ const long maskE1 = 14L << 56 | 15L << 48 | 12L << 40 | 13L << 32 | 10L << 24 | 11L << 16 | 08L << 8 | 09L << 0;
+
+ Operand mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitRev_V(context, containerSize: 1);
+ }
+ }
+
+ public static void Rev32_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand mask;
+
+ if (op.Size == 0)
+ {
+ const long maskE0 = 04L << 56 | 05L << 48 | 06L << 40 | 07L << 32 | 00L << 24 | 01L << 16 | 02L << 8 | 03L << 0;
+ const long maskE1 = 12L << 56 | 13L << 48 | 14L << 40 | 15L << 32 | 08L << 24 | 09L << 16 | 10L << 8 | 11L << 0;
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+ else /* if (op.Size == 1) */
+ {
+ const long maskE0 = 05L << 56 | 04L << 48 | 07L << 40 | 06L << 32 | 01L << 24 | 00L << 16 | 03L << 8 | 02L << 0;
+ const long maskE1 = 13L << 56 | 12L << 48 | 15L << 40 | 14L << 32 | 09L << 24 | 08L << 16 | 11L << 8 | 10L << 0;
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitRev_V(context, containerSize: 2);
+ }
+ }
+
+ public static void Rev64_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand mask;
+
+ if (op.Size == 0)
+ {
+ const long maskE0 = 00L << 56 | 01L << 48 | 02L << 40 | 03L << 32 | 04L << 24 | 05L << 16 | 06L << 8 | 07L << 0;
+ const long maskE1 = 08L << 56 | 09L << 48 | 10L << 40 | 11L << 32 | 12L << 24 | 13L << 16 | 14L << 8 | 15L << 0;
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+ else if (op.Size == 1)
+ {
+ const long maskE0 = 01L << 56 | 00L << 48 | 03L << 40 | 02L << 32 | 05L << 24 | 04L << 16 | 07L << 8 | 06L << 0;
+ const long maskE1 = 09L << 56 | 08L << 48 | 11L << 40 | 10L << 32 | 13L << 24 | 12L << 16 | 15L << 8 | 14L << 0;
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+ else /* if (op.Size == 2) */
+ {
+ const long maskE0 = 03L << 56 | 02L << 48 | 01L << 40 | 00L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0;
+ const long maskE1 = 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 15L << 24 | 14L << 16 | 13L << 8 | 12L << 0;
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitRev_V(context, containerSize: 3);
+ }
+ }
+
+ private static void EmitRev_V(ArmEmitterContext context, int containerSize)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ int containerMask = (1 << (containerSize - op.Size)) - 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ int revIndex = index ^ containerMask;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, revIndex, op.Size);
+
+ res = EmitVectorInsert(context, res, ne, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdLogical32.cs b/src/ARMeilleure/Instructions/InstEmitSimdLogical32.cs
new file mode 100644
index 00000000..68ef4ed1
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdLogical32.cs
@@ -0,0 +1,266 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Vand_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64AndV | Intrinsic.Arm64V128, n, m));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pand, n, m));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseAnd(op1, op2));
+ }
+ }
+
+ public static void Vbic_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64BicV | Intrinsic.Arm64V128, n, m));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pandn, m, n));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseAnd(op1, context.BitwiseNot(op2)));
+ }
+ }
+
+ public static void Vbic_II(ArmEmitterContext context)
+ {
+ OpCode32SimdImm op = (OpCode32SimdImm)context.CurrOp;
+
+ long immediate = op.Immediate;
+
+ // Replicate fields to fill the 64-bits, if size is < 64-bits.
+ switch (op.Size)
+ {
+ case 0: immediate *= 0x0101010101010101L; break;
+ case 1: immediate *= 0x0001000100010001L; break;
+ case 2: immediate *= 0x0000000100000001L; break;
+ }
+
+ Operand imm = Const(immediate);
+ Operand res = GetVecA32(op.Qd);
+
+ if (op.Q)
+ {
+ for (int elem = 0; elem < 2; elem++)
+ {
+ Operand de = EmitVectorExtractZx(context, op.Qd, elem, 3);
+
+ res = EmitVectorInsert(context, res, context.BitwiseAnd(de, context.BitwiseNot(imm)), elem, 3);
+ }
+ }
+ else
+ {
+ Operand de = EmitVectorExtractZx(context, op.Qd, op.Vd & 1, 3);
+
+ res = EmitVectorInsert(context, res, context.BitwiseAnd(de, context.BitwiseNot(imm)), op.Vd & 1, 3);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Vbif(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BifV | Intrinsic.Arm64V128, d, n, m));
+ }
+ else
+ {
+ EmitBifBit(context, true);
+ }
+ }
+
+ public static void Vbit(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BitV | Intrinsic.Arm64V128, d, n, m));
+ }
+ else
+ {
+ EmitBifBit(context, false);
+ }
+ }
+
+ public static void Vbsl(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BslV | Intrinsic.Arm64V128, d, n, m));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorTernaryOpSimd32(context, (d, n, m) =>
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Pand, res, d);
+ return context.AddIntrinsic(Intrinsic.X86Pxor, res, m);
+ });
+ }
+ else
+ {
+ EmitVectorTernaryOpZx32(context, (op1, op2, op3) =>
+ {
+ return context.BitwiseExclusiveOr(
+ context.BitwiseAnd(op1,
+ context.BitwiseExclusiveOr(op2, op3)), op3);
+ });
+ }
+ }
+
+ public static void Veor_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64EorV | Intrinsic.Arm64V128, n, m));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pxor, n, m));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseExclusiveOr(op1, op2));
+ }
+ }
+
+ public static void Vorn_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrnV | Intrinsic.Arm64V128, n, m));
+ }
+ else if (Optimizations.UseAvx512Ortho)
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ return context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, m, Const(0b11001100 | ~0b10101010));
+ });
+ }
+ else if (Optimizations.UseSse2)
+ {
+ Operand mask = context.VectorOne();
+
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Pandn, m, mask);
+ return context.AddIntrinsic(Intrinsic.X86Por, n, m);
+ });
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, context.BitwiseNot(op2)));
+ }
+ }
+
+ public static void Vorr_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrrV | Intrinsic.Arm64V128, n, m));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Por, n, m));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, op2));
+ }
+ }
+
+ public static void Vorr_II(ArmEmitterContext context)
+ {
+ OpCode32SimdImm op = (OpCode32SimdImm)context.CurrOp;
+
+ long immediate = op.Immediate;
+
+ // Replicate fields to fill the 64-bits, if size is < 64-bits.
+ switch (op.Size)
+ {
+ case 0: immediate *= 0x0101010101010101L; break;
+ case 1: immediate *= 0x0001000100010001L; break;
+ case 2: immediate *= 0x0000000100000001L; break;
+ }
+
+ Operand imm = Const(immediate);
+ Operand res = GetVecA32(op.Qd);
+
+ if (op.Q)
+ {
+ for (int elem = 0; elem < 2; elem++)
+ {
+ Operand de = EmitVectorExtractZx(context, op.Qd, elem, 3);
+
+ res = EmitVectorInsert(context, res, context.BitwiseOr(de, imm), elem, 3);
+ }
+ }
+ else
+ {
+ Operand de = EmitVectorExtractZx(context, op.Qd, op.Vd & 1, 3);
+
+ res = EmitVectorInsert(context, res, context.BitwiseOr(de, imm), op.Vd & 1, 3);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Vtst(ArmEmitterContext context)
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) =>
+ {
+ Operand isZero = context.ICompareEqual(context.BitwiseAnd(op1, op2), Const(0));
+ return context.ConditionalSelect(isZero, Const(0), Const(-1));
+ });
+ }
+
+ private static void EmitBifBit(ArmEmitterContext context, bool notRm)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ EmitVectorTernaryOpSimd32(context, (d, n, m) =>
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d);
+ res = context.AddIntrinsic((notRm) ? Intrinsic.X86Pandn : Intrinsic.X86Pand, m, res);
+ return context.AddIntrinsic(Intrinsic.X86Pxor, d, res);
+ });
+ }
+ else
+ {
+ EmitVectorTernaryOpZx32(context, (d, n, m) =>
+ {
+ if (notRm)
+ {
+ m = context.BitwiseNot(m);
+ }
+ return context.BitwiseExclusiveOr(
+ context.BitwiseAnd(m,
+ context.BitwiseExclusiveOr(d, n)), d);
+ });
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdMemory.cs b/src/ARMeilleure/Instructions/InstEmitSimdMemory.cs
new file mode 100644
index 00000000..9b19872a
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdMemory.cs
@@ -0,0 +1,160 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Ld__Vms(ArmEmitterContext context)
+ {
+ EmitSimdMemMs(context, isLoad: true);
+ }
+
+ public static void Ld__Vss(ArmEmitterContext context)
+ {
+ EmitSimdMemSs(context, isLoad: true);
+ }
+
+ public static void St__Vms(ArmEmitterContext context)
+ {
+ EmitSimdMemMs(context, isLoad: false);
+ }
+
+ public static void St__Vss(ArmEmitterContext context)
+ {
+ EmitSimdMemSs(context, isLoad: false);
+ }
+
+ private static void EmitSimdMemMs(ArmEmitterContext context, bool isLoad)
+ {
+ OpCodeSimdMemMs op = (OpCodeSimdMemMs)context.CurrOp;
+
+ Operand n = GetIntOrSP(context, op.Rn);
+
+ long offset = 0;
+
+ for (int rep = 0; rep < op.Reps; rep++)
+ for (int elem = 0; elem < op.Elems; elem++)
+ for (int sElem = 0; sElem < op.SElems; sElem++)
+ {
+ int rtt = (op.Rt + rep + sElem) & 0x1f;
+
+ Operand tt = GetVec(rtt);
+
+ Operand address = context.Add(n, Const(offset));
+
+ if (isLoad)
+ {
+ EmitLoadSimd(context, address, tt, rtt, elem, op.Size);
+
+ if (op.RegisterSize == RegisterSize.Simd64 && elem == op.Elems - 1)
+ {
+ context.Copy(tt, context.VectorZeroUpper64(tt));
+ }
+ }
+ else
+ {
+ EmitStoreSimd(context, address, rtt, elem, op.Size);
+ }
+
+ offset += 1 << op.Size;
+ }
+
+ if (op.WBack)
+ {
+ EmitSimdMemWBack(context, offset);
+ }
+ }
+
+ private static void EmitSimdMemSs(ArmEmitterContext context, bool isLoad)
+ {
+ OpCodeSimdMemSs op = (OpCodeSimdMemSs)context.CurrOp;
+
+ Operand n = GetIntOrSP(context, op.Rn);
+
+ long offset = 0;
+
+ if (op.Replicate)
+ {
+ // Only loads uses the replicate mode.
+ Debug.Assert(isLoad, "Replicate mode is not valid for stores.");
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int sElem = 0; sElem < op.SElems; sElem++)
+ {
+ int rt = (op.Rt + sElem) & 0x1f;
+
+ Operand t = GetVec(rt);
+
+ Operand address = context.Add(n, Const(offset));
+
+ for (int index = 0; index < elems; index++)
+ {
+ EmitLoadSimd(context, address, t, rt, index, op.Size);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ context.Copy(t, context.VectorZeroUpper64(t));
+ }
+
+ offset += 1 << op.Size;
+ }
+ }
+ else
+ {
+ for (int sElem = 0; sElem < op.SElems; sElem++)
+ {
+ int rt = (op.Rt + sElem) & 0x1f;
+
+ Operand t = GetVec(rt);
+
+ Operand address = context.Add(n, Const(offset));
+
+ if (isLoad)
+ {
+ EmitLoadSimd(context, address, t, rt, op.Index, op.Size);
+ }
+ else
+ {
+ EmitStoreSimd(context, address, rt, op.Index, op.Size);
+ }
+
+ offset += 1 << op.Size;
+ }
+ }
+
+ if (op.WBack)
+ {
+ EmitSimdMemWBack(context, offset);
+ }
+ }
+
+ private static void EmitSimdMemWBack(ArmEmitterContext context, long offset)
+ {
+ OpCodeMemReg op = (OpCodeMemReg)context.CurrOp;
+
+ Operand n = GetIntOrSP(context, op.Rn);
+ Operand m;
+
+ if (op.Rm != RegisterAlias.Zr)
+ {
+ m = GetIntOrZR(context, op.Rm);
+ }
+ else
+ {
+ m = Const(offset);
+ }
+
+ context.Copy(n, context.Add(n, m));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdMemory32.cs b/src/ARMeilleure/Instructions/InstEmitSimdMemory32.cs
new file mode 100644
index 00000000..b774bd06
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdMemory32.cs
@@ -0,0 +1,352 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Vld1(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 1, true);
+ }
+
+ public static void Vld2(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 2, true);
+ }
+
+ public static void Vld3(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 3, true);
+ }
+
+ public static void Vld4(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 4, true);
+ }
+
+ public static void Vst1(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 1, false);
+ }
+
+ public static void Vst2(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 2, false);
+ }
+
+ public static void Vst3(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 3, false);
+ }
+
+ public static void Vst4(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 4, false);
+ }
+
+ public static void EmitVStoreOrLoadN(ArmEmitterContext context, int count, bool load)
+ {
+ if (context.CurrOp is OpCode32SimdMemSingle)
+ {
+ OpCode32SimdMemSingle op = (OpCode32SimdMemSingle)context.CurrOp;
+
+ int eBytes = 1 << op.Size;
+
+ Operand n = context.Copy(GetIntA32(context, op.Rn));
+
+ // TODO: Check alignment.
+ int offset = 0;
+ int d = op.Vd;
+
+ for (int i = 0; i < count; i++)
+ {
+ // Accesses an element from a double simd register.
+ Operand address = context.Add(n, Const(offset));
+ if (eBytes == 8)
+ {
+ if (load)
+ {
+ EmitDVectorLoad(context, address, d);
+ }
+ else
+ {
+ EmitDVectorStore(context, address, d);
+ }
+ }
+ else
+ {
+ int index = ((d & 1) << (3 - op.Size)) + op.Index;
+ if (load)
+ {
+ if (op.Replicate)
+ {
+ var regs = (count > 1) ? 1 : op.Increment;
+ for (int reg = 0; reg < regs; reg++)
+ {
+ int dreg = reg + d;
+ int rIndex = ((dreg & 1) << (3 - op.Size));
+ int limit = rIndex + (1 << (3 - op.Size));
+
+ while (rIndex < limit)
+ {
+ EmitLoadSimd(context, address, GetVecA32(dreg >> 1), dreg >> 1, rIndex++, op.Size);
+ }
+ }
+ }
+ else
+ {
+ EmitLoadSimd(context, address, GetVecA32(d >> 1), d >> 1, index, op.Size);
+ }
+ }
+ else
+ {
+ EmitStoreSimd(context, address, d >> 1, index, op.Size);
+ }
+ }
+ offset += eBytes;
+ d += op.Increment;
+ }
+
+ if (op.WBack)
+ {
+ if (op.RegisterIndex)
+ {
+ Operand m = GetIntA32(context, op.Rm);
+ SetIntA32(context, op.Rn, context.Add(n, m));
+ }
+ else
+ {
+ SetIntA32(context, op.Rn, context.Add(n, Const(count * eBytes)));
+ }
+ }
+ }
+ else
+ {
+ OpCode32SimdMemPair op = (OpCode32SimdMemPair)context.CurrOp;
+
+ int increment = count > 1 ? op.Increment : 1;
+ int eBytes = 1 << op.Size;
+
+ Operand n = context.Copy(GetIntA32(context, op.Rn));
+ int offset = 0;
+ int d = op.Vd;
+
+ for (int reg = 0; reg < op.Regs; reg++)
+ {
+ for (int elem = 0; elem < op.Elems; elem++)
+ {
+ int elemD = d + reg;
+ for (int i = 0; i < count; i++)
+ {
+ // Accesses an element from a double simd register,
+ // add ebytes for each element.
+ Operand address = context.Add(n, Const(offset));
+ int index = ((elemD & 1) << (3 - op.Size)) + elem;
+ if (eBytes == 8)
+ {
+ if (load)
+ {
+ EmitDVectorLoad(context, address, elemD);
+ }
+ else
+ {
+ EmitDVectorStore(context, address, elemD);
+ }
+ }
+ else
+ {
+ if (load)
+ {
+ EmitLoadSimd(context, address, GetVecA32(elemD >> 1), elemD >> 1, index, op.Size);
+ }
+ else
+ {
+ EmitStoreSimd(context, address, elemD >> 1, index, op.Size);
+ }
+ }
+
+ offset += eBytes;
+ elemD += increment;
+ }
+ }
+ }
+
+ if (op.WBack)
+ {
+ if (op.RegisterIndex)
+ {
+ Operand m = GetIntA32(context, op.Rm);
+ SetIntA32(context, op.Rn, context.Add(n, m));
+ }
+ else
+ {
+ SetIntA32(context, op.Rn, context.Add(n, Const(count * 8 * op.Regs)));
+ }
+ }
+ }
+ }
+
+ public static void Vldm(ArmEmitterContext context)
+ {
+ OpCode32SimdMemMult op = (OpCode32SimdMemMult)context.CurrOp;
+
+ Operand n = context.Copy(GetIntA32(context, op.Rn));
+
+ Operand baseAddress = context.Add(n, Const(op.Offset));
+
+ bool writeBack = op.PostOffset != 0;
+
+ if (writeBack)
+ {
+ SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset)));
+ }
+
+ int range = op.RegisterRange;
+
+ int sReg = (op.DoubleWidth) ? (op.Vd << 1) : op.Vd;
+ int offset = 0;
+ int byteSize = 4;
+
+ for (int num = 0; num < range; num++, sReg++)
+ {
+ Operand address = context.Add(baseAddress, Const(offset));
+ Operand vec = GetVecA32(sReg >> 2);
+
+ EmitLoadSimd(context, address, vec, sReg >> 2, sReg & 3, WordSizeLog2);
+ offset += byteSize;
+ }
+ }
+
+ public static void Vstm(ArmEmitterContext context)
+ {
+ OpCode32SimdMemMult op = (OpCode32SimdMemMult)context.CurrOp;
+
+ Operand n = context.Copy(GetIntA32(context, op.Rn));
+
+ Operand baseAddress = context.Add(n, Const(op.Offset));
+
+ bool writeBack = op.PostOffset != 0;
+
+ if (writeBack)
+ {
+ SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset)));
+ }
+
+ int offset = 0;
+
+ int range = op.RegisterRange;
+ int sReg = (op.DoubleWidth) ? (op.Vd << 1) : op.Vd;
+ int byteSize = 4;
+
+ for (int num = 0; num < range; num++, sReg++)
+ {
+ Operand address = context.Add(baseAddress, Const(offset));
+
+ EmitStoreSimd(context, address, sReg >> 2, sReg & 3, WordSizeLog2);
+
+ offset += byteSize;
+ }
+ }
+
+ public static void Vldr(ArmEmitterContext context)
+ {
+ EmitVLoadOrStore(context, AccessType.Load);
+ }
+
+ public static void Vstr(ArmEmitterContext context)
+ {
+ EmitVLoadOrStore(context, AccessType.Store);
+ }
+
+ private static void EmitDVectorStore(ArmEmitterContext context, Operand address, int vecD)
+ {
+ int vecQ = vecD >> 1;
+ int vecSElem = (vecD & 1) << 1;
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ EmitStoreSimd(context, address, vecQ, vecSElem, WordSizeLog2);
+ EmitStoreSimd(context, context.Add(address, Const(4)), vecQ, vecSElem | 1, WordSizeLog2);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ EmitStoreSimd(context, address, vecQ, vecSElem | 1, WordSizeLog2);
+ EmitStoreSimd(context, context.Add(address, Const(4)), vecQ, vecSElem, WordSizeLog2);
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitDVectorLoad(ArmEmitterContext context, Operand address, int vecD)
+ {
+ int vecQ = vecD >> 1;
+ int vecSElem = (vecD & 1) << 1;
+ Operand vec = GetVecA32(vecQ);
+
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ EmitLoadSimd(context, address, vec, vecQ, vecSElem, WordSizeLog2);
+ EmitLoadSimd(context, context.Add(address, Const(4)), vec, vecQ, vecSElem | 1, WordSizeLog2);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ EmitLoadSimd(context, address, vec, vecQ, vecSElem | 1, WordSizeLog2);
+ EmitLoadSimd(context, context.Add(address, Const(4)), vec, vecQ, vecSElem, WordSizeLog2);
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitVLoadOrStore(ArmEmitterContext context, AccessType accType)
+ {
+ OpCode32SimdMemImm op = (OpCode32SimdMemImm)context.CurrOp;
+
+ Operand n = context.Copy(GetIntA32(context, op.Rn));
+ Operand m = GetMemM(context, setCarry: false);
+
+ Operand address = op.Add
+ ? context.Add(n, m)
+ : context.Subtract(n, m);
+
+ int size = op.Size;
+
+ if ((accType & AccessType.Load) != 0)
+ {
+ if (size == DWordSizeLog2)
+ {
+ EmitDVectorLoad(context, address, op.Vd);
+ }
+ else
+ {
+ Operand vec = GetVecA32(op.Vd >> 2);
+ EmitLoadSimd(context, address, vec, op.Vd >> 2, (op.Vd & 3) << (2 - size), size);
+ }
+ }
+ else
+ {
+ if (size == DWordSizeLog2)
+ {
+ EmitDVectorStore(context, address, op.Vd);
+ }
+ else
+ {
+ EmitStoreSimd(context, address, op.Vd >> 2, (op.Vd & 3) << (2 - size), size);
+ }
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdMove.cs b/src/ARMeilleure/Instructions/InstEmitSimdMove.cs
new file mode 100644
index 00000000..b58a32f6
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdMove.cs
@@ -0,0 +1,850 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Collections.Generic;
+using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+#region "Masks"
+ private static readonly long[] _masksE0_Uzp = new long[]
+ {
+ 13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0,
+ 11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0
+ };
+
+ private static readonly long[] _masksE1_Uzp = new long[]
+ {
+ 15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0,
+ 15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0
+ };
+#endregion
+
+ public static void Dup_Gp(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ if (Optimizations.UseSse2)
+ {
+ switch (op.Size)
+ {
+ case 0: n = context.ZeroExtend8 (n.Type, n); n = context.Multiply(n, Const(n.Type, 0x01010101)); break;
+ case 1: n = context.ZeroExtend16(n.Type, n); n = context.Multiply(n, Const(n.Type, 0x00010001)); break;
+ case 2: n = context.ZeroExtend32(n.Type, n); break;
+ }
+
+ Operand res = context.VectorInsert(context.VectorZero(), n, 0);
+
+ if (op.Size < 3)
+ {
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0xf0));
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
+ }
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ res = EmitVectorInsert(context, res, n, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Dup_S(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), ne, 0, op.Size));
+ }
+
+ public static void Dup_V(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ Operand res = GetVec(op.Rn);
+
+ if (op.Size == 0)
+ {
+ if (op.DstIndex != 0)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex));
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Punpcklbw, res, res);
+ res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res);
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
+ }
+ else if (op.Size == 1)
+ {
+ if (op.DstIndex != 0)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex * 2));
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res);
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
+ }
+ else if (op.Size == 2)
+ {
+ int mask = op.DstIndex * 0b01010101;
+
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(mask));
+ }
+ else if (op.DstIndex == 0 && op.RegisterSize != RegisterSize.Simd64)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res);
+ }
+ else if (op.DstIndex == 1)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Movhlps, res, res);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ res = EmitVectorInsert(context, res, ne, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Ext_V(ArmEmitterContext context)
+ {
+ OpCodeSimdExt op = (OpCodeSimdExt)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ Operand nShifted = GetVec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ nShifted = context.VectorZeroUpper64(nShifted);
+ }
+
+ nShifted = context.AddIntrinsic(Intrinsic.X86Psrldq, nShifted, Const(op.Imm4));
+
+ Operand mShifted = GetVec(op.Rm);
+
+ mShifted = context.AddIntrinsic(Intrinsic.X86Pslldq, mShifted, Const(op.GetBytesCount() - op.Imm4));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ mShifted = context.VectorZeroUpper64(mShifted);
+ }
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, mShifted);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int bytes = op.GetBytesCount();
+
+ int position = op.Imm4 & (bytes - 1);
+
+ for (int index = 0; index < bytes; index++)
+ {
+ int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm;
+
+ Operand e = EmitVectorExtractZx(context, reg, position, 0);
+
+ position = (position + 1) & (bytes - 1);
+
+ res = EmitVectorInsert(context, res, e, index, 0);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Fcsel_S(ArmEmitterContext context)
+ {
+ OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp;
+
+ Operand lblTrue = Label();
+ Operand lblEnd = Label();
+
+ Operand isTrue = InstEmitFlowHelper.GetCondTrue(context, op.Cond);
+
+ context.BranchIfTrue(lblTrue, isTrue);
+
+ OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), me, 0));
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblTrue);
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0));
+
+ context.MarkLabel(lblEnd);
+ }
+
+ public static void Fmov_Ftoi(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, 0, op.Size + 2);
+
+ SetIntOrZR(context, op.Rd, ne);
+ }
+
+ public static void Fmov_Ftoi1(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, 1, 3);
+
+ SetIntOrZR(context, op.Rd, ne);
+ }
+
+ public static void Fmov_Itof(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), n, 0, op.Size + 2));
+ }
+
+ public static void Fmov_Itof1(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ context.Copy(d, EmitVectorInsert(context, d, n, 1, 3));
+ }
+
+ public static void Fmov_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0));
+ }
+
+ public static void Fmov_Si(ArmEmitterContext context)
+ {
+ OpCodeSimdFmov op = (OpCodeSimdFmov)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ if (op.Size == 0)
+ {
+ context.Copy(GetVec(op.Rd), X86GetScalar(context, (int)op.Immediate));
+ }
+ else
+ {
+ context.Copy(GetVec(op.Rd), X86GetScalar(context, op.Immediate));
+ }
+ }
+ else
+ {
+ Operand e = Const(op.Immediate);
+
+ Operand res = context.VectorZero();
+
+ res = EmitVectorInsert(context, res, e, 0, op.Size + 2);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Fmov_Vi(ArmEmitterContext context)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Copy(GetVec(op.Rd), X86GetAllElements(context, op.Immediate));
+ }
+ else
+ {
+ context.Copy(GetVec(op.Rd), X86GetScalar(context, op.Immediate));
+ }
+ }
+ else
+ {
+ Operand e = Const(op.Immediate);
+
+ Operand res = context.VectorZero();
+
+ int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ res = EmitVectorInsert(context, res, e, index, 3);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Ins_Gp(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ context.Copy(d, EmitVectorInsert(context, d, n, op.DstIndex, op.Size));
+ }
+
+ public static void Ins_V(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand ne = EmitVectorExtractZx(context, op.Rn, op.SrcIndex, op.Size);
+
+ context.Copy(d, EmitVectorInsert(context, d, ne, op.DstIndex, op.Size));
+ }
+
+ public static void Movi_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitSse2VectorMoviMvniOp(context, not: false);
+ }
+ else
+ {
+ EmitVectorImmUnaryOp(context, (op1) => op1);
+ }
+ }
+
+ public static void Mvni_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitSse2VectorMoviMvniOp(context, not: true);
+ }
+ else
+ {
+ EmitVectorImmUnaryOp(context, (op1) => context.BitwiseNot(op1));
+ }
+ }
+
+ public static void Smov_S(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand ne = EmitVectorExtractSx(context, op.Rn, op.DstIndex, op.Size);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ ne = context.ZeroExtend32(OperandType.I64, ne);
+ }
+
+ SetIntOrZR(context, op.Rd, ne);
+ }
+
+ public static void Tbl_V(ArmEmitterContext context)
+ {
+ EmitTableVectorLookup(context, isTbl: true);
+ }
+
+ public static void Tbx_V(ArmEmitterContext context)
+ {
+ EmitTableVectorLookup(context, isTbl: false);
+ }
+
+ public static void Trn1_V(ArmEmitterContext context)
+ {
+ EmitVectorTranspose(context, part: 0);
+ }
+
+ public static void Trn2_V(ArmEmitterContext context)
+ {
+ EmitVectorTranspose(context, part: 1);
+ }
+
+ public static void Umov_S(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
+
+ SetIntOrZR(context, op.Rd, ne);
+ }
+
+ public static void Uzp1_V(ArmEmitterContext context)
+ {
+ EmitVectorUnzip(context, part: 0);
+ }
+
+ public static void Uzp2_V(ArmEmitterContext context)
+ {
+ EmitVectorUnzip(context, part: 1);
+ }
+
+ public static void Xtn_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ Operand d = GetVec(op.Rd);
+
+ Operand res = context.VectorZeroUpper64(d);
+
+ Operand mask = X86GetAllElements(context, EvenMasks[op.Size]);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, GetVec(op.Rn), mask);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
+ ? Intrinsic.X86Movlhps
+ : Intrinsic.X86Movhlps;
+
+ res = context.AddIntrinsic(movInst, res, res2);
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
+
+ res = EmitVectorInsert(context, res, ne, part + index, op.Size);
+ }
+
+ context.Copy(d, res);
+ }
+ }
+
+ public static void Zip1_V(ArmEmitterContext context)
+ {
+ EmitVectorZip(context, part: 0);
+ }
+
+ public static void Zip2_V(ArmEmitterContext context)
+ {
+ EmitVectorZip(context, part: 1);
+ }
+
+ private static void EmitSse2VectorMoviMvniOp(ArmEmitterContext context, bool not)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ long imm = op.Immediate;
+
+ switch (op.Size)
+ {
+ case 0: imm *= 0x01010101; break;
+ case 1: imm *= 0x00010001; break;
+ }
+
+ if (not)
+ {
+ imm = ~imm;
+ }
+
+ Operand mask;
+
+ if (op.Size < 3)
+ {
+ mask = X86GetAllElements(context, (int)imm);
+ }
+ else
+ {
+ mask = X86GetAllElements(context, imm);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ mask = context.VectorZeroUpper64(mask);
+ }
+
+ context.Copy(GetVec(op.Rd), mask);
+ }
+
+ private static void EmitTableVectorLookup(ArmEmitterContext context, bool isTbl)
+ {
+ OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand m = GetVec(op.Rm);
+
+ Operand res;
+
+ Operand mask = X86GetAllElements(context, 0x0F0F0F0F0F0F0F0FL);
+
+ // Fast path for single register table.
+ {
+ Operand n = GetVec(op.Rn);
+
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, mask);
+ mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mMask);
+ }
+
+ for (int index = 1; index < op.Size; index++)
+ {
+ Operand ni = GetVec((op.Rn + index) & 0x1F);
+
+ Operand idxMask = X86GetAllElements(context, 0x1010101010101010L * index);
+
+ Operand mSubMask = context.AddIntrinsic(Intrinsic.X86Psubb, m, idxMask);
+
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, mSubMask, mask);
+ mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, mSubMask);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, ni, mMask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
+ }
+
+ if (!isTbl)
+ {
+ Operand idxMask = X86GetAllElements(context, (0x1010101010101010L * op.Size) - 0x0101010101010101L);
+ Operand zeroMask = context.VectorZero();
+
+ Operand mPosMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, idxMask);
+ Operand mNegMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, zeroMask, m);
+
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Por, mPosMask, mNegMask);
+
+ Operand dMask = context.AddIntrinsic(Intrinsic.X86Pand, d, mMask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, dMask);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ Operand d = GetVec(op.Rd);
+
+ List<Operand> args = new List<Operand>();
+
+ if (!isTbl)
+ {
+ args.Add(d);
+ }
+
+ args.Add(GetVec(op.Rm));
+
+ args.Add(Const(op.RegisterSize == RegisterSize.Simd64 ? 8 : 16));
+
+ for (int index = 0; index < op.Size; index++)
+ {
+ args.Add(GetVec((op.Rn + index) & 0x1F));
+ }
+
+ MethodInfo info = null;
+
+ if (isTbl)
+ {
+ switch (op.Size)
+ {
+ case 1: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl1)); break;
+ case 2: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl2)); break;
+ case 3: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl3)); break;
+ case 4: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl4)); break;
+ }
+ }
+ else
+ {
+ switch (op.Size)
+ {
+ case 1: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx1)); break;
+ case 2: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx2)); break;
+ case 3: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx3)); break;
+ case 4: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx4)); break;
+ }
+ }
+
+ context.Copy(d, context.Call(info, args.ToArray()));
+ }
+ }
+
+ private static void EmitVectorTranspose(ArmEmitterContext context, int part)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ Operand mask = default;
+
+ if (op.Size < 3)
+ {
+ long maskE0 = EvenMasks[op.Size];
+ long maskE1 = OddMasks [op.Size];
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+
+ Operand n = GetVec(op.Rn);
+
+ if (op.Size < 3)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+ }
+
+ Operand m = GetVec(op.Rm);
+
+ if (op.Size < 3)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
+ }
+
+ Intrinsic punpckInst = part == 0
+ ? X86PunpcklInstruction[op.Size]
+ : X86PunpckhInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(punpckInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, pairIndex + part, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, pairIndex + part, op.Size);
+
+ res = EmitVectorInsert(context, res, ne, pairIndex, op.Size);
+ res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitVectorUnzip(ArmEmitterContext context, int part)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ Operand mask = default;
+
+ if (op.Size < 3)
+ {
+ long maskE0 = EvenMasks[op.Size];
+ long maskE1 = OddMasks [op.Size];
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+
+ Operand n = GetVec(op.Rn);
+
+ if (op.Size < 3)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+ }
+
+ Operand m = GetVec(op.Rm);
+
+ if (op.Size < 3)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
+ }
+
+ Intrinsic punpckInst = part == 0
+ ? Intrinsic.X86Punpcklqdq
+ : Intrinsic.X86Punpckhqdq;
+
+ Operand res = context.AddIntrinsic(punpckInst, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic punpcklInst = X86PunpcklInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(punpcklInst, n, m);
+
+ if (op.Size < 2)
+ {
+ long maskE0 = _masksE0_Uzp[op.Size];
+ long maskE1 = _masksE1_Uzp[op.Size];
+
+ Operand mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask);
+ }
+
+ Intrinsic punpckInst = part == 0
+ ? Intrinsic.X86Punpcklqdq
+ : Intrinsic.X86Punpckhqdq;
+
+ res = context.AddIntrinsic(punpckInst, res, context.VectorZero());
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int idx = index << 1;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, idx + part, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, idx + part, op.Size);
+
+ res = EmitVectorInsert(context, res, ne, index, op.Size);
+ res = EmitVectorInsert(context, res, me, pairs + index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitVectorZip(ArmEmitterContext context, int part)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ Intrinsic punpckInst = part == 0
+ ? X86PunpcklInstruction[op.Size]
+ : X86PunpckhInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(punpckInst, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], n, m);
+
+ Intrinsic punpckInst = part == 0
+ ? Intrinsic.X86Punpcklqdq
+ : Intrinsic.X86Punpckhqdq;
+
+ res = context.AddIntrinsic(punpckInst, res, context.VectorZero());
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ int baseIndex = part != 0 ? pairs : 0;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, baseIndex + index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, baseIndex + index, op.Size);
+
+ res = EmitVectorInsert(context, res, ne, pairIndex, op.Size);
+ res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/src/ARMeilleure/Instructions/InstEmitSimdMove32.cs
new file mode 100644
index 00000000..b8b91b31
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdMove32.cs
@@ -0,0 +1,656 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ #region "Masks"
+ // Same as InstEmitSimdMove, as the instructions do the same thing.
+ private static readonly long[] _masksE0_Uzp = new long[]
+ {
+ 13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0,
+ 11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0
+ };
+
+ private static readonly long[] _masksE1_Uzp = new long[]
+ {
+ 15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0,
+ 15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0
+ };
+ #endregion
+
+ public static void Vmov_I(ArmEmitterContext context)
+ {
+ EmitVectorImmUnaryOp32(context, (op1) => op1);
+ }
+
+ public static void Vmvn_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAvx512Ortho)
+ {
+ EmitVectorUnaryOpSimd32(context, (op1) =>
+ {
+ return context.AddIntrinsic(Intrinsic.X86Vpternlogd, op1, op1, Const(0b01010101));
+ });
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpSimd32(context, (op1) =>
+ {
+ Operand mask = X86GetAllElements(context, -1L);
+ return context.AddIntrinsic(Intrinsic.X86Pandn, op1, mask);
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpZx32(context, (op1) => context.BitwiseNot(op1));
+ }
+ }
+
+ public static void Vmvn_II(ArmEmitterContext context)
+ {
+ EmitVectorImmUnaryOp32(context, (op1) => context.BitwiseNot(op1));
+ }
+
+ public static void Vmov_GS(ArmEmitterContext context)
+ {
+ OpCode32SimdMovGp op = (OpCode32SimdMovGp)context.CurrOp;
+
+ Operand vec = GetVecA32(op.Vn >> 2);
+ if (op.Op == 1)
+ {
+ // To general purpose.
+ Operand value = context.VectorExtract(OperandType.I32, vec, op.Vn & 0x3);
+ SetIntA32(context, op.Rt, value);
+ }
+ else
+ {
+ // From general purpose.
+ Operand value = GetIntA32(context, op.Rt);
+ context.Copy(vec, context.VectorInsert(vec, value, op.Vn & 0x3));
+ }
+ }
+
+ public static void Vmov_G1(ArmEmitterContext context)
+ {
+ OpCode32SimdMovGpElem op = (OpCode32SimdMovGpElem)context.CurrOp;
+
+ int index = op.Index + ((op.Vd & 1) << (3 - op.Size));
+ if (op.Op == 1)
+ {
+ // To general purpose.
+ Operand value = EmitVectorExtract32(context, op.Vd >> 1, index, op.Size, !op.U);
+ SetIntA32(context, op.Rt, value);
+ }
+ else
+ {
+ // From general purpose.
+ Operand vec = GetVecA32(op.Vd >> 1);
+ Operand value = GetIntA32(context, op.Rt);
+ context.Copy(vec, EmitVectorInsert(context, vec, value, index, op.Size));
+ }
+ }
+
+ public static void Vmov_G2(ArmEmitterContext context)
+ {
+ OpCode32SimdMovGpDouble op = (OpCode32SimdMovGpDouble)context.CurrOp;
+
+ Operand vec = GetVecA32(op.Vm >> 2);
+ int vm1 = op.Vm + 1;
+ bool sameOwnerVec = (op.Vm >> 2) == (vm1 >> 2);
+ Operand vec2 = sameOwnerVec ? vec : GetVecA32(vm1 >> 2);
+ if (op.Op == 1)
+ {
+ // To general purpose.
+ Operand lowValue = context.VectorExtract(OperandType.I32, vec, op.Vm & 3);
+ SetIntA32(context, op.Rt, lowValue);
+
+ Operand highValue = context.VectorExtract(OperandType.I32, vec2, vm1 & 3);
+ SetIntA32(context, op.Rt2, highValue);
+ }
+ else
+ {
+ // From general purpose.
+ Operand lowValue = GetIntA32(context, op.Rt);
+ Operand resultVec = context.VectorInsert(vec, lowValue, op.Vm & 3);
+
+ Operand highValue = GetIntA32(context, op.Rt2);
+
+ if (sameOwnerVec)
+ {
+ context.Copy(vec, context.VectorInsert(resultVec, highValue, vm1 & 3));
+ }
+ else
+ {
+ context.Copy(vec, resultVec);
+ context.Copy(vec2, context.VectorInsert(vec2, highValue, vm1 & 3));
+ }
+ }
+ }
+
+ public static void Vmov_GD(ArmEmitterContext context)
+ {
+ OpCode32SimdMovGpDouble op = (OpCode32SimdMovGpDouble)context.CurrOp;
+
+ Operand vec = GetVecA32(op.Vm >> 1);
+ if (op.Op == 1)
+ {
+ // To general purpose.
+ Operand value = context.VectorExtract(OperandType.I64, vec, op.Vm & 1);
+ SetIntA32(context, op.Rt, context.ConvertI64ToI32(value));
+ SetIntA32(context, op.Rt2, context.ConvertI64ToI32(context.ShiftRightUI(value, Const(32))));
+ }
+ else
+ {
+ // From general purpose.
+ Operand lowValue = GetIntA32(context, op.Rt);
+ Operand highValue = GetIntA32(context, op.Rt2);
+
+ Operand value = context.BitwiseOr(
+ context.ZeroExtend32(OperandType.I64, lowValue),
+ context.ShiftLeft(context.ZeroExtend32(OperandType.I64, highValue), Const(32)));
+
+ context.Copy(vec, context.VectorInsert(vec, value, op.Vm & 1));
+ }
+ }
+
+ public static void Vmovl(ArmEmitterContext context)
+ {
+ OpCode32SimdLong op = (OpCode32SimdLong)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, !op.U);
+
+ if (op.Size == 2)
+ {
+ if (op.U)
+ {
+ me = context.ZeroExtend32(OperandType.I64, me);
+ }
+ else
+ {
+ me = context.SignExtend32(OperandType.I64, me);
+ }
+ }
+
+ res = EmitVectorInsert(context, res, me, index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Vtbl(ArmEmitterContext context)
+ {
+ OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp;
+
+ bool extension = op.Opc == 1;
+ int length = op.Length + 1;
+
+ if (Optimizations.UseSsse3)
+ {
+ Operand d = GetVecA32(op.Qd);
+ Operand m = EmitMoveDoubleWordToSide(context, GetVecA32(op.Qm), op.Vm, 0);
+
+ Operand res;
+ Operand mask = X86GetAllElements(context, 0x0707070707070707L);
+
+ // Fast path for single register table.
+ {
+ Operand n = EmitMoveDoubleWordToSide(context, GetVecA32(op.Qn), op.Vn, 0);
+
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, mask);
+ mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mMask);
+ }
+
+ for (int index = 1; index < length; index++)
+ {
+ int newVn = (op.Vn + index) & 0x1F;
+ (int qn, int ind) = GetQuadwordAndSubindex(newVn, op.RegisterSize);
+ Operand ni = EmitMoveDoubleWordToSide(context, GetVecA32(qn), newVn, 0);
+
+ Operand idxMask = X86GetAllElements(context, 0x0808080808080808L * index);
+
+ Operand mSubMask = context.AddIntrinsic(Intrinsic.X86Psubb, m, idxMask);
+
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, mSubMask, mask);
+ mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, mSubMask);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, ni, mMask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
+ }
+
+ if (extension)
+ {
+ Operand idxMask = X86GetAllElements(context, (0x0808080808080808L * length) - 0x0101010101010101L);
+ Operand zeroMask = context.VectorZero();
+
+ Operand mPosMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, idxMask);
+ Operand mNegMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, zeroMask, m);
+
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Por, mPosMask, mNegMask);
+
+ Operand dMask = context.AddIntrinsic(Intrinsic.X86Pand, EmitMoveDoubleWordToSide(context, d, op.Vd, 0), mMask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, dMask);
+ }
+
+ res = EmitMoveDoubleWordToSide(context, res, 0, op.Vd);
+
+ context.Copy(d, EmitDoubleWordInsert(context, d, res, op.Vd));
+ }
+ else
+ {
+ int elems = op.GetBytesCount() >> op.Size;
+
+ (int Qx, int Ix)[] tableTuples = new (int, int)[length];
+ for (int i = 0; i < length; i++)
+ {
+ tableTuples[i] = GetQuadwordAndSubindex(op.Vn + i, op.RegisterSize);
+ }
+
+ int byteLength = length * 8;
+
+ Operand res = GetVecA32(op.Qd);
+ Operand m = GetVecA32(op.Qm);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand selectedIndex = context.ZeroExtend8(OperandType.I32, context.VectorExtract8(m, index + op.Im));
+
+ Operand inRange = context.ICompareLess(selectedIndex, Const(byteLength));
+ Operand elemRes = default; // Note: This is I64 for ease of calculation.
+
+ // TODO: Branching rather than conditional select.
+
+ // Get indexed byte.
+ // To simplify (ha) the il, we get bytes from every vector and use a nested conditional select to choose the right result.
+ // This does have to extract `length` times for every element but certainly not as bad as it could be.
+
+ // Which vector number is the index on.
+ Operand vecIndex = context.ShiftRightUI(selectedIndex, Const(3));
+ // What should we shift by to extract it.
+ Operand subVecIndexShift = context.ShiftLeft(context.BitwiseAnd(selectedIndex, Const(7)), Const(3));
+
+ for (int i = 0; i < length; i++)
+ {
+ (int qx, int ix) = tableTuples[i];
+ // Get the whole vector, we'll get a byte out of it.
+ Operand lookupResult;
+ if (qx == op.Qd)
+ {
+ // Result contains the current state of the vector.
+ lookupResult = context.VectorExtract(OperandType.I64, res, ix);
+ }
+ else
+ {
+ lookupResult = EmitVectorExtract32(context, qx, ix, 3, false); // I64
+ }
+
+ lookupResult = context.ShiftRightUI(lookupResult, subVecIndexShift); // Get the relevant byte from this vector.
+
+ if (i == 0)
+ {
+ elemRes = lookupResult; // First result is always default.
+ }
+ else
+ {
+ Operand isThisElem = context.ICompareEqual(vecIndex, Const(i));
+ elemRes = context.ConditionalSelect(isThisElem, lookupResult, elemRes);
+ }
+ }
+
+ Operand fallback = (extension) ? context.ZeroExtend32(OperandType.I64, EmitVectorExtract32(context, op.Qd, index + op.Id, 0, false)) : Const(0L);
+
+ res = EmitVectorInsert(context, res, context.ConditionalSelect(inRange, elemRes, fallback), index + op.Id, 0);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+ }
+
+ public static void Vtrn(ArmEmitterContext context)
+ {
+ OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ EmitVectorShuffleOpSimd32(context, (m, d) =>
+ {
+ Operand mask = default;
+
+ if (op.Size < 3)
+ {
+ long maskE0 = EvenMasks[op.Size];
+ long maskE1 = OddMasks[op.Size];
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+
+ if (op.Size < 3)
+ {
+ d = context.AddIntrinsic(Intrinsic.X86Pshufb, d, mask);
+ m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
+ }
+
+ Operand resD = context.AddIntrinsic(X86PunpcklInstruction[op.Size], d, m);
+ Operand resM = context.AddIntrinsic(X86PunpckhInstruction[op.Size], d, m);
+
+ return (resM, resD);
+ });
+ }
+ else
+ {
+ int elems = op.GetBytesCount() >> op.Size;
+ int pairs = elems >> 1;
+
+ bool overlap = op.Qm == op.Qd;
+
+ Operand resD = GetVecA32(op.Qd);
+ Operand resM = GetVecA32(op.Qm);
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+ Operand d2 = EmitVectorExtract32(context, op.Qd, pairIndex + 1 + op.Id, op.Size, false);
+ Operand m1 = EmitVectorExtract32(context, op.Qm, pairIndex + op.Im, op.Size, false);
+
+ resD = EmitVectorInsert(context, resD, m1, pairIndex + 1 + op.Id, op.Size);
+
+ if (overlap)
+ {
+ resM = resD;
+ }
+
+ resM = EmitVectorInsert(context, resM, d2, pairIndex + op.Im, op.Size);
+
+ if (overlap)
+ {
+ resD = resM;
+ }
+ }
+
+ context.Copy(GetVecA32(op.Qd), resD);
+ if (!overlap)
+ {
+ context.Copy(GetVecA32(op.Qm), resM);
+ }
+ }
+ }
+
+ public static void Vzip(ArmEmitterContext context)
+ {
+ OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Zip1V, Intrinsic.Arm64Zip2V);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorShuffleOpSimd32(context, (m, d) =>
+ {
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ Operand resD = context.AddIntrinsic(X86PunpcklInstruction[op.Size], d, m);
+ Operand resM = context.AddIntrinsic(X86PunpckhInstruction[op.Size], d, m);
+
+ return (resM, resD);
+ }
+ else
+ {
+ Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], d, m);
+
+ Operand resD = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, res, context.VectorZero());
+ Operand resM = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, res, context.VectorZero());
+ return (resM, resD);
+ }
+ });
+ }
+ else
+ {
+ int elems = op.GetBytesCount() >> op.Size;
+ int pairs = elems >> 1;
+
+ bool overlap = op.Qm == op.Qd;
+
+ Operand resD = GetVecA32(op.Qd);
+ Operand resM = GetVecA32(op.Qm);
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+ Operand dRowD = EmitVectorExtract32(context, op.Qd, index + op.Id, op.Size, false);
+ Operand mRowD = EmitVectorExtract32(context, op.Qm, index + op.Im, op.Size, false);
+
+ Operand dRowM = EmitVectorExtract32(context, op.Qd, index + op.Id + pairs, op.Size, false);
+ Operand mRowM = EmitVectorExtract32(context, op.Qm, index + op.Im + pairs, op.Size, false);
+
+ resD = EmitVectorInsert(context, resD, dRowD, pairIndex + op.Id, op.Size);
+ resD = EmitVectorInsert(context, resD, mRowD, pairIndex + 1 + op.Id, op.Size);
+
+ if (overlap)
+ {
+ resM = resD;
+ }
+
+ resM = EmitVectorInsert(context, resM, dRowM, pairIndex + op.Im, op.Size);
+ resM = EmitVectorInsert(context, resM, mRowM, pairIndex + 1 + op.Im, op.Size);
+
+ if (overlap)
+ {
+ resD = resM;
+ }
+ }
+
+ context.Copy(GetVecA32(op.Qd), resD);
+ if (!overlap)
+ {
+ context.Copy(GetVecA32(op.Qm), resM);
+ }
+ }
+ }
+
+ public static void Vuzp(ArmEmitterContext context)
+ {
+ OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Uzp1V, Intrinsic.Arm64Uzp2V);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ EmitVectorShuffleOpSimd32(context, (m, d) =>
+ {
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ Operand mask = default;
+
+ if (op.Size < 3)
+ {
+ long maskE0 = EvenMasks[op.Size];
+ long maskE1 = OddMasks[op.Size];
+
+ mask = X86GetScalar(context, maskE0);
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+
+ d = context.AddIntrinsic(Intrinsic.X86Pshufb, d, mask);
+ m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
+ }
+
+ Operand resD = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, d, m);
+ Operand resM = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, d, m);
+
+ return (resM, resD);
+ }
+ else
+ {
+ Intrinsic punpcklInst = X86PunpcklInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(punpcklInst, d, m);
+
+ if (op.Size < 2)
+ {
+ long maskE0 = _masksE0_Uzp[op.Size];
+ long maskE1 = _masksE1_Uzp[op.Size];
+
+ Operand mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask);
+ }
+
+ Operand resD = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, res, context.VectorZero());
+ Operand resM = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, res, context.VectorZero());
+
+ return (resM, resD);
+ }
+ });
+ }
+ else
+ {
+ int elems = op.GetBytesCount() >> op.Size;
+ int pairs = elems >> 1;
+
+ bool overlap = op.Qm == op.Qd;
+
+ Operand resD = GetVecA32(op.Qd);
+ Operand resM = GetVecA32(op.Qm);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand dIns, mIns;
+ if (index >= pairs)
+ {
+ int pairIndex = index - pairs;
+ dIns = EmitVectorExtract32(context, op.Qm, (pairIndex << 1) + op.Im, op.Size, false);
+ mIns = EmitVectorExtract32(context, op.Qm, ((pairIndex << 1) | 1) + op.Im, op.Size, false);
+ }
+ else
+ {
+ dIns = EmitVectorExtract32(context, op.Qd, (index << 1) + op.Id, op.Size, false);
+ mIns = EmitVectorExtract32(context, op.Qd, ((index << 1) | 1) + op.Id, op.Size, false);
+ }
+
+ resD = EmitVectorInsert(context, resD, dIns, index + op.Id, op.Size);
+
+ if (overlap)
+ {
+ resM = resD;
+ }
+
+ resM = EmitVectorInsert(context, resM, mIns, index + op.Im, op.Size);
+
+ if (overlap)
+ {
+ resD = resM;
+ }
+ }
+
+ context.Copy(GetVecA32(op.Qd), resD);
+ if (!overlap)
+ {
+ context.Copy(GetVecA32(op.Qm), resM);
+ }
+ }
+ }
+
+ private static void EmitVectorZipUzpOpSimd32(ArmEmitterContext context, Intrinsic inst1, Intrinsic inst2)
+ {
+ OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+ bool overlap = op.Qm == op.Qd;
+
+ Operand d = GetVecA32(op.Qd);
+ Operand m = GetVecA32(op.Qm);
+
+ Operand dPart = d;
+ Operand mPart = m;
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ dPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, d, op.Vd, 0);
+ mPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, m, op.Vm, 0);
+ }
+
+ Intrinsic vSize = op.Q ? Intrinsic.Arm64V128 : Intrinsic.Arm64V64;
+
+ vSize |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ Operand resD = context.AddIntrinsic(inst1 | vSize, dPart, mPart);
+ Operand resM = context.AddIntrinsic(inst2 | vSize, dPart, mPart);
+
+ if (!op.Q) // Register insert.
+ {
+ resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, d, Const(op.Vd & 1), resD, Const(0));
+
+ if (overlap)
+ {
+ resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, resD, Const(op.Vm & 1), resM, Const(0));
+ }
+ else
+ {
+ resM = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, m, Const(op.Vm & 1), resM, Const(0));
+ }
+ }
+
+ context.Copy(d, resD);
+ if (!overlap)
+ {
+ context.Copy(m, resM);
+ }
+ }
+
+ private static void EmitVectorShuffleOpSimd32(ArmEmitterContext context, Func<Operand, Operand, (Operand, Operand)> shuffleFunc)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand m = GetVecA32(op.Qm);
+ Operand d = GetVecA32(op.Qd);
+ Operand initialM = m;
+ Operand initialD = d;
+
+ if (!op.Q) // Register swap: move relevant doubleword to side 0, for consistency.
+ {
+ m = EmitMoveDoubleWordToSide(context, m, op.Vm, 0);
+ d = EmitMoveDoubleWordToSide(context, d, op.Vd, 0);
+ }
+
+ (Operand resM, Operand resD) = shuffleFunc(m, d);
+
+ bool overlap = op.Qm == op.Qd;
+
+ if (!op.Q) // Register insert.
+ {
+ resM = EmitDoubleWordInsert(context, initialM, EmitMoveDoubleWordToSide(context, resM, 0, op.Vm), op.Vm);
+ resD = EmitDoubleWordInsert(context, overlap ? resM : initialD, EmitMoveDoubleWordToSide(context, resD, 0, op.Vd), op.Vd);
+ }
+
+ if (!overlap)
+ {
+ context.Copy(initialM, resM);
+ }
+
+ context.Copy(initialD, resD);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdShift.cs b/src/ARMeilleure/Instructions/InstEmitSimdShift.cs
new file mode 100644
index 00000000..19e41119
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdShift.cs
@@ -0,0 +1,1827 @@
+// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
+
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func2I = Func<Operand, Operand, Operand>;
+
+ static partial class InstEmit
+ {
+#region "Masks"
+ private static readonly long[] _masks_SliSri = new long[] // Replication masks.
+ {
+ 0x0101010101010101L, 0x0001000100010001L, 0x0000000100000001L, 0x0000000000000001L
+ };
+#endregion
+
+ public static void Rshrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64RshrnV, shift);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand dLow = context.VectorZeroUpper64(d);
+
+ Operand mask = default;
+
+ switch (op.Size + 1)
+ {
+ case 1: mask = X86GetAllElements(context, (int)roundConst * 0x00010001); break;
+ case 2: mask = X86GetAllElements(context, (int)roundConst); break;
+ case 3: mask = X86GetAllElements(context, roundConst); break;
+ }
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ Operand res = context.AddIntrinsic(addInst, n, mask);
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size + 1];
+
+ res = context.AddIntrinsic(srlInst, res, Const(shift));
+
+ Operand mask2 = X86GetAllElements(context, EvenMasks[op.Size]);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask2);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
+ ? Intrinsic.X86Movlhps
+ : Intrinsic.X86Movhlps;
+
+ res = context.AddIntrinsic(movInst, dLow, res);
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmNarrowOpZx(context, round: true);
+ }
+ }
+
+ public static void Shl_S(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64ShlS, shift);
+ }
+ else
+ {
+ EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
+ }
+ }
+
+ public static void Shl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+ int eSize = 8 << op.Size;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64ShlV, shift);
+ }
+ else if (shift >= eSize)
+ {
+ if ((op.RegisterSize == RegisterSize.Simd64))
+ {
+ Operand res = context.VectorZeroUpper64(GetVec(op.Rd));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else if (Optimizations.UseGfni && op.Size == 0)
+ {
+ Operand n = GetVec(op.Rn);
+
+ ulong bitMatrix = X86GetGf2p8LogicalShiftLeft(shift);
+
+ Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(shift));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
+ }
+ }
+
+ public static void Shll_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int shift = 8 << op.Size;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ShllV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ Operand n = GetVec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ }
+
+ Intrinsic movsxInst = X86PmovsxInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(movsxInst, n);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
+
+ res = context.AddIntrinsic(sllInst, res, Const(shift));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
+ }
+ }
+
+ public static void Shrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64ShrnV, shift);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand dLow = context.VectorZeroUpper64(d);
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size + 1];
+
+ Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Operand mask = X86GetAllElements(context, EvenMasks[op.Size]);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, nShifted, mask);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
+ ? Intrinsic.X86Movlhps
+ : Intrinsic.X86Movhlps;
+
+ res = context.AddIntrinsic(movInst, dLow, res);
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmNarrowOpZx(context, round: false);
+ }
+ }
+
+ public static void Sli_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SliS, shift);
+ }
+ else
+ {
+ EmitSli(context, scalar: true);
+ }
+ }
+
+ public static void Sli_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SliV, shift);
+ }
+ else
+ {
+ EmitSli(context, scalar: false);
+ }
+ }
+
+ public static void Sqrshl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqrshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round | ShlRegFlags.Saturating);
+ }
+ }
+
+ public static void Sqrshrn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnS, shift);
+ }
+ else
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
+ }
+ }
+
+ public static void Sqrshrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnV, shift);
+ }
+ else
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
+ }
+ }
+
+ public static void Sqrshrun_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunS, shift);
+ }
+ else
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
+ }
+ }
+
+ public static void Sqrshrun_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunV, shift);
+ }
+ else
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+ }
+ }
+
+ public static void Sqshl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Saturating);
+ }
+ }
+
+ public static void Sqshrn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnS, shift);
+ }
+ else
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
+ }
+ }
+
+ public static void Sqshrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnV, shift);
+ }
+ else
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
+ }
+ }
+
+ public static void Sqshrun_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunS, shift);
+ }
+ else
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
+ }
+ }
+
+ public static void Sqshrun_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunV, shift);
+ }
+ else
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+ }
+ }
+
+ public static void Sri_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SriS, shift);
+ }
+ else
+ {
+ EmitSri(context, scalar: true);
+ }
+ }
+
+ public static void Sri_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SriV, shift);
+ }
+ else
+ {
+ EmitSri(context, scalar: false);
+ }
+ }
+
+ public static void Srshl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SrshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round);
+ }
+ }
+
+ public static void Srshr_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SrshrS, shift);
+ }
+ else
+ {
+ EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
+ }
+ }
+
+ public static void Srshr_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SrshrV, shift);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+ {
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+ Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+ Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, nSra);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShrImmOpSx(context, ShrImmFlags.Round);
+ }
+ }
+
+ public static void Srsra_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SrsraS, shift);
+ }
+ else
+ {
+ EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Srsra_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SrsraV, shift);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+ {
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+ Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+ Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, nSra);
+ res = context.AddIntrinsic(addInst, res, d);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Sshl_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64SshlS);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Scalar | ShlRegFlags.Signed);
+ }
+ }
+
+ public static void Sshl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Signed);
+ }
+ }
+
+ public static void Sshll_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshllV, shift);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ Operand n = GetVec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ }
+
+ Intrinsic movsxInst = X86PmovsxInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(movsxInst, n);
+
+ if (shift != 0)
+ {
+ Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
+
+ res = context.AddIntrinsic(sllInst, res, Const(shift));
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShImmWidenBinarySx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
+ }
+ }
+
+ public static void Sshr_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SshrS, shift);
+ }
+ else
+ {
+ EmitShrImmOp(context, ShrImmFlags.ScalarSx);
+ }
+ }
+
+ public static void Sshr_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshrV, shift);
+ }
+ else if (Optimizations.UseGfni && op.Size == 0)
+ {
+ Operand n = GetVec(op.Rn);
+
+ ulong bitMatrix;
+
+ if (shift < 8)
+ {
+ bitMatrix = X86GetGf2p8LogicalShiftLeft(-shift);
+
+ // Extend sign-bit
+ bitMatrix |= 0x8080808080808080UL >> (64 - shift * 8);
+ }
+ else
+ {
+ // Replicate sign-bit into all bits
+ bitMatrix = 0x8080808080808080UL;
+ }
+
+ Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sraInst, n, Const(shift));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitShrImmOp(context, ShrImmFlags.VectorSx);
+ }
+ }
+
+ public static void Ssra_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SsraS, shift);
+ }
+ else
+ {
+ EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Ssra_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SsraV, shift);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+ {
+ int shift = GetImmShr(op);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sraInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, d);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmOpSx(context, ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Uqrshl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqrshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Round | ShlRegFlags.Saturating);
+ }
+ }
+
+ public static void Uqrshrn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnS, shift);
+ }
+ else
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
+ }
+ }
+
+ public static void Uqrshrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnV, shift);
+ }
+ else
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
+ }
+ }
+
+ public static void Uqshl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Saturating);
+ }
+ }
+
+ public static void Uqshrn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnS, shift);
+ }
+ else
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
+ }
+ }
+
+ public static void Uqshrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnV, shift);
+ }
+ else
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
+ }
+ }
+
+ public static void Urshl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UrshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Round);
+ }
+ }
+
+ public static void Urshr_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UrshrS, shift);
+ }
+ else
+ {
+ EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
+ }
+ }
+
+ public static void Urshr_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UrshrV, shift);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+ Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, nSrl);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShrImmOpZx(context, ShrImmFlags.Round);
+ }
+ }
+
+ public static void Ursra_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UrsraS, shift);
+ }
+ else
+ {
+ EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Ursra_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UrsraV, shift);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+ Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, nSrl);
+ res = context.AddIntrinsic(addInst, res, d);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Ushl_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64UshlS);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Scalar);
+ }
+ }
+
+ public static void Ushl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.None);
+ }
+ }
+
+ public static void Ushll_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshllV, shift);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ Operand n = GetVec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ }
+
+ Intrinsic movzxInst = X86PmovzxInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(movzxInst, n);
+
+ if (shift != 0)
+ {
+ Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
+
+ res = context.AddIntrinsic(sllInst, res, Const(shift));
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
+ }
+ }
+
+ public static void Ushr_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UshrS, shift);
+ }
+ else
+ {
+ EmitShrImmOp(context, ShrImmFlags.ScalarZx);
+ }
+ }
+
+ public static void Ushr_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshrV, shift);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ int shift = GetImmShr(op);
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitShrImmOp(context, ShrImmFlags.VectorZx);
+ }
+ }
+
+ public static void Usra_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UsraS, shift);
+ }
+ else
+ {
+ EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Usra_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UsraV, shift);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ int shift = GetImmShr(op);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, d);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate);
+ }
+ }
+
+ [Flags]
+ private enum ShrImmFlags
+ {
+ Scalar = 1 << 0,
+ Signed = 1 << 1,
+
+ Round = 1 << 2,
+ Accumulate = 1 << 3,
+
+ ScalarSx = Scalar | Signed,
+ ScalarZx = Scalar,
+
+ VectorSx = Signed,
+ VectorZx = 0
+ }
+
+ private static void EmitScalarShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ EmitShrImmOp(context, ShrImmFlags.ScalarSx | flags);
+ }
+
+ private static void EmitScalarShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ EmitShrImmOp(context, ShrImmFlags.ScalarZx | flags);
+ }
+
+ private static void EmitVectorShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ EmitShrImmOp(context, ShrImmFlags.VectorSx | flags);
+ }
+
+ private static void EmitVectorShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ EmitShrImmOp(context, ShrImmFlags.VectorZx | flags);
+ }
+
+ private static void EmitShrImmOp(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ bool scalar = (flags & ShrImmFlags.Scalar) != 0;
+ bool signed = (flags & ShrImmFlags.Signed) != 0;
+ bool round = (flags & ShrImmFlags.Round) != 0;
+ bool accumulate = (flags & ShrImmFlags.Accumulate) != 0;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand e = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+
+ if (op.Size <= 2)
+ {
+ if (round)
+ {
+ e = context.Add(e, Const(roundConst));
+ }
+
+ e = signed ? context.ShiftRightSI(e, Const(shift)) : context.ShiftRightUI(e, Const(shift));
+ }
+ else /* if (op.Size == 3) */
+ {
+ e = EmitShrImm64(context, e, signed, round ? roundConst : 0L, shift);
+ }
+
+ if (accumulate)
+ {
+ Operand de = EmitVectorExtract(context, op.Rd, index, op.Size, signed);
+
+ e = context.Add(e, de);
+ }
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitVectorShrImmNarrowOpZx(ArmEmitterContext context, bool round)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand e = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
+
+ if (round)
+ {
+ e = context.Add(e, Const(roundConst));
+ }
+
+ e = context.ShiftRightUI(e, Const(shift));
+
+ res = EmitVectorInsert(context, res, e, part + index, op.Size);
+ }
+
+ context.Copy(d, res);
+ }
+
+ [Flags]
+ private enum ShrImmSaturatingNarrowFlags
+ {
+ Scalar = 1 << 0,
+ SignedSrc = 1 << 1,
+ SignedDst = 1 << 2,
+
+ Round = 1 << 3,
+
+ ScalarSxSx = Scalar | SignedSrc | SignedDst,
+ ScalarSxZx = Scalar | SignedSrc,
+ ScalarZxZx = Scalar,
+
+ VectorSxSx = SignedSrc | SignedDst,
+ VectorSxZx = SignedSrc,
+ VectorZxZx = 0
+ }
+
+ private static void EmitRoundShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags);
+ }
+
+ private static void EmitShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0;
+ bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0;
+ bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0;
+ bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ int elems = !scalar ? 8 >> op.Size : 1;
+
+ int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand e = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
+
+ if (op.Size <= 1 || !round)
+ {
+ if (round)
+ {
+ e = context.Add(e, Const(roundConst));
+ }
+
+ e = signedSrc ? context.ShiftRightSI(e, Const(shift)) : context.ShiftRightUI(e, Const(shift));
+ }
+ else /* if (op.Size == 2 && round) */
+ {
+ e = EmitShrImm64(context, e, signedSrc, roundConst, shift); // shift <= 32
+ }
+
+ e = signedSrc ? EmitSignedSrcSatQ(context, e, op.Size, signedDst) : EmitUnsignedSrcSatQ(context, e, op.Size, signedDst);
+
+ res = EmitVectorInsert(context, res, e, part + index, op.Size);
+ }
+
+ context.Copy(d, res);
+ }
+
+ // dst64 = (Int(src64, signed) + roundConst) >> shift;
+ private static Operand EmitShrImm64(
+ ArmEmitterContext context,
+ Operand value,
+ bool signed,
+ long roundConst,
+ int shift)
+ {
+ MethodInfo info = signed
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedShrImm64))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedShrImm64));
+
+ return context.Call(info, value, Const(roundConst), Const(shift));
+ }
+
+ private static void EmitVectorShImmWidenBinarySx(ArmEmitterContext context, Func2I emit, int imm)
+ {
+ EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: true);
+ }
+
+ private static void EmitVectorShImmWidenBinaryZx(ArmEmitterContext context, Func2I emit, int imm)
+ {
+ EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: false);
+ }
+
+ private static void EmitVectorShImmWidenBinaryOp(ArmEmitterContext context, Func2I emit, int imm, bool signed)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, Const(imm)), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitSli(ArmEmitterContext context, bool scalar)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+ int eSize = 8 << op.Size;
+
+ ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0UL;
+
+ if (shift >= eSize)
+ {
+ if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+ {
+ Operand res = context.VectorZeroUpper64(GetVec(op.Rd));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else if (Optimizations.UseGfni && op.Size == 0)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ ulong bitMatrix = X86GetGf2p8LogicalShiftLeft(shift);
+
+ Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
+
+ Operand nShifted = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
+
+ Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
+
+ Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
+
+ if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand nShifted = context.AddIntrinsic(sllInst, n, Const(shift));
+
+ Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
+
+ Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
+
+ if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ Operand neShifted = context.ShiftLeft(ne, Const(shift));
+
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+
+ Operand deMasked = context.BitwiseAnd(de, Const(mask));
+
+ Operand e = context.BitwiseOr(neShifted, deMasked);
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitSri(ArmEmitterContext context, bool scalar)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ ulong mask = (ulong.MaxValue << (eSize - shift)) & (ulong.MaxValue >> (64 - eSize));
+
+ if (shift >= eSize)
+ {
+ if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+ {
+ Operand res = context.VectorZeroUpper64(GetVec(op.Rd));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else if (Optimizations.UseGfni && op.Size == 0)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ ulong bitMatrix = X86GetGf2p8LogicalShiftLeft(-shift);
+
+ Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
+
+ Operand nShifted = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
+
+ Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
+
+ Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
+
+ if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
+
+ Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
+
+ if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ Operand neShifted = shift != 64 ? context.ShiftRightUI(ne, Const(shift)) : Const(0UL);
+
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+
+ Operand deMasked = context.BitwiseAnd(de, Const(mask));
+
+ Operand e = context.BitwiseOr(neShifted, deMasked);
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ [Flags]
+ private enum ShlRegFlags
+ {
+ None = 0,
+ Scalar = 1 << 0,
+ Signed = 1 << 1,
+ Round = 1 << 2,
+ Saturating = 1 << 3
+ }
+
+ private static void EmitShlRegOp(ArmEmitterContext context, ShlRegFlags flags = ShlRegFlags.None)
+ {
+ bool scalar = flags.HasFlag(ShlRegFlags.Scalar);
+ bool signed = flags.HasFlag(ShlRegFlags.Signed);
+ bool round = flags.HasFlag(ShlRegFlags.Round);
+ bool saturating = flags.HasFlag(ShlRegFlags.Saturating);
+
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index << op.Size, size: 0);
+
+ Operand e = !saturating
+ ? EmitShlReg(context, ne, context.ConvertI64ToI32(me), round, op.Size, signed)
+ : EmitShlRegSatQ(context, ne, context.ConvertI64ToI32(me), round, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ // long SignedShlReg(long op, int shiftLsB, bool round, int size);
+ // ulong UnsignedShlReg(ulong op, int shiftLsB, bool round, int size);
+ private static Operand EmitShlReg(ArmEmitterContext context, Operand op, Operand shiftLsB, bool round, int size, bool signed)
+ {
+ int eSize = 8 << size;
+
+ Debug.Assert(op.Type == OperandType.I64);
+ Debug.Assert(shiftLsB.Type == OperandType.I32);
+ Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
+
+ Operand lbl1 = Label();
+ Operand lblEnd = Label();
+
+ Operand eSizeOp = Const(eSize);
+ Operand zero = Const(0);
+ Operand zeroL = Const(0L);
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
+
+ context.BranchIf(lbl1, shiftLsB, zero, Comparison.GreaterOrEqual);
+ context.Copy(res, signed
+ ? EmitSignedShrReg(context, op, context.Negate(shiftLsB), round, eSize)
+ : EmitUnsignedShrReg(context, op, context.Negate(shiftLsB), round, eSize));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl1);
+ context.BranchIf(lblEnd, shiftLsB, zero, Comparison.LessOrEqual);
+ Operand shl = context.ShiftLeft(op, shiftLsB);
+ Operand isGreaterOrEqual = context.ICompareGreaterOrEqual(shiftLsB, eSizeOp);
+ context.Copy(res, context.ConditionalSelect(isGreaterOrEqual, zeroL, shl));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // long SignedShlRegSatQ(long op, int shiftLsB, bool round, int size);
+ // ulong UnsignedShlRegSatQ(ulong op, int shiftLsB, bool round, int size);
+ private static Operand EmitShlRegSatQ(ArmEmitterContext context, Operand op, Operand shiftLsB, bool round, int size, bool signed)
+ {
+ int eSize = 8 << size;
+
+ Debug.Assert(op.Type == OperandType.I64);
+ Debug.Assert(shiftLsB.Type == OperandType.I32);
+ Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
+
+ Operand lbl1 = Label();
+ Operand lbl2 = Label();
+ Operand lblEnd = Label();
+
+ Operand eSizeOp = Const(eSize);
+ Operand zero = Const(0);
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
+
+ context.BranchIf(lbl1, shiftLsB, zero, Comparison.GreaterOrEqual);
+ context.Copy(res, signed
+ ? EmitSignedShrReg(context, op, context.Negate(shiftLsB), round, eSize)
+ : EmitUnsignedShrReg(context, op, context.Negate(shiftLsB), round, eSize));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl1);
+ context.BranchIf(lblEnd, shiftLsB, zero, Comparison.LessOrEqual);
+ context.BranchIf(lbl2, shiftLsB, eSizeOp, Comparison.Less);
+ context.Copy(res, signed
+ ? EmitSignedSignSatQ(context, op, size)
+ : EmitUnsignedSignSatQ(context, op, size));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl2);
+ Operand shl = context.ShiftLeft(op, shiftLsB);
+ if (eSize == 64)
+ {
+ Operand sarOrShr = signed
+ ? context.ShiftRightSI(shl, shiftLsB)
+ : context.ShiftRightUI(shl, shiftLsB);
+ context.Copy(res, shl);
+ context.BranchIf(lblEnd, sarOrShr, op, Comparison.Equal);
+ context.Copy(res, signed
+ ? EmitSignedSignSatQ(context, op, size)
+ : EmitUnsignedSignSatQ(context, op, size));
+ }
+ else
+ {
+ context.Copy(res, signed
+ ? EmitSignedSrcSatQ(context, shl, size, signedDst: true)
+ : EmitUnsignedSrcSatQ(context, shl, size, signedDst: false));
+ }
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // shift := [1, 128]; eSize := {8, 16, 32, 64}.
+ // long SignedShrReg(long op, int shift, bool round, int eSize);
+ private static Operand EmitSignedShrReg(ArmEmitterContext context, Operand op, Operand shift, bool round, int eSize)
+ {
+ if (round)
+ {
+ Operand lblEnd = Label();
+
+ Operand eSizeOp = Const(eSize);
+ Operand zeroL = Const(0L);
+ Operand one = Const(1);
+ Operand oneL = Const(1L);
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroL);
+
+ context.BranchIf(lblEnd, shift, eSizeOp, Comparison.GreaterOrEqual);
+ Operand roundConst = context.ShiftLeft(oneL, context.Subtract(shift, one));
+ Operand add = context.Add(op, roundConst);
+ Operand sar = context.ShiftRightSI(add, shift);
+ if (eSize == 64)
+ {
+ Operand shr = context.ShiftRightUI(add, shift);
+ Operand left = context.BitwiseAnd(context.Negate(op), context.BitwiseExclusiveOr(op, add));
+ Operand isLess = context.ICompareLess(left, zeroL);
+ context.Copy(res, context.ConditionalSelect(isLess, shr, sar));
+ }
+ else
+ {
+ context.Copy(res, sar);
+ }
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+ else
+ {
+ Operand lblEnd = Label();
+
+ Operand eSizeOp = Const(eSize);
+ Operand zeroL = Const(0L);
+ Operand negOneL = Const(-1L);
+
+ Operand sar = context.ShiftRightSI(op, shift);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sar);
+
+ context.BranchIf(lblEnd, shift, eSizeOp, Comparison.Less);
+ Operand isLess = context.ICompareLess(op, zeroL);
+ context.Copy(res, context.ConditionalSelect(isLess, negOneL, zeroL));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+ }
+
+ // shift := [1, 128]; eSize := {8, 16, 32, 64}.
+ // ulong UnsignedShrReg(ulong op, int shift, bool round, int eSize);
+ private static Operand EmitUnsignedShrReg(ArmEmitterContext context, Operand op, Operand shift, bool round, int eSize)
+ {
+ if (round)
+ {
+ Operand lblEnd = Label();
+
+ Operand zeroUL = Const(0UL);
+ Operand one = Const(1);
+ Operand oneUL = Const(1UL);
+ Operand eSizeMaxOp = Const(64);
+ Operand oneShl63UL = Const(1UL << 63);
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroUL);
+
+ context.BranchIf(lblEnd, shift, eSizeMaxOp, Comparison.Greater);
+ Operand roundConst = context.ShiftLeft(oneUL, context.Subtract(shift, one));
+ Operand add = context.Add(op, roundConst);
+ Operand shr = context.ShiftRightUI(add, shift);
+ Operand isEqual = context.ICompareEqual(shift, eSizeMaxOp);
+ context.Copy(res, context.ConditionalSelect(isEqual, zeroUL, shr));
+ if (eSize == 64)
+ {
+ context.BranchIf(lblEnd, add, op, Comparison.GreaterOrEqualUI);
+ Operand right = context.BitwiseOr(shr, context.ShiftRightUI(oneShl63UL, context.Subtract(shift, one)));
+ context.Copy(res, context.ConditionalSelect(isEqual, oneUL, right));
+ }
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+ else
+ {
+ Operand lblEnd = Label();
+
+ Operand eSizeOp = Const(eSize);
+ Operand zeroUL = Const(0UL);
+
+ Operand shr = context.ShiftRightUI(op, shift);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), shr);
+
+ context.BranchIf(lblEnd, shift, eSizeOp, Comparison.Less);
+ context.Copy(res, zeroUL);
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdShift32.cs b/src/ARMeilleure/Instructions/InstEmitSimdShift32.cs
new file mode 100644
index 00000000..9ac68088
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdShift32.cs
@@ -0,0 +1,389 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Vqrshrn(ArmEmitterContext context)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+ EmitRoundShrImmSaturatingNarrowOp(context, op.U ? ShrImmSaturatingNarrowFlags.VectorZxZx : ShrImmSaturatingNarrowFlags.VectorSxSx);
+ }
+
+ public static void Vqrshrun(ArmEmitterContext context)
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+ }
+
+ public static void Vqshrn(ArmEmitterContext context)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+ EmitShrImmSaturatingNarrowOp(context, op.U ? ShrImmSaturatingNarrowFlags.VectorZxZx : ShrImmSaturatingNarrowFlags.VectorSxSx);
+ }
+
+ public static void Vqshrun(ArmEmitterContext context)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+ }
+
+ public static void Vrshr(ArmEmitterContext context)
+ {
+ EmitRoundShrImmOp(context, accumulate: false);
+ }
+
+ public static void Vrshrn(ArmEmitterContext context)
+ {
+ EmitRoundShrImmNarrowOp(context, signed: false);
+ }
+
+ public static void Vrsra(ArmEmitterContext context)
+ {
+ EmitRoundShrImmOp(context, accumulate: true);
+ }
+
+ public static void Vshl(ArmEmitterContext context)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+ EmitVectorUnaryOpZx32(context, (op1) => context.ShiftLeft(op1, Const(op.Shift)));
+ }
+
+ public static void Vshl_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (op.U)
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => EmitShlRegOp(context, op2, op1, op.Size, true));
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => EmitShlRegOp(context, op2, op1, op.Size, false));
+ }
+ }
+
+ public static void Vshll(ArmEmitterContext context)
+ {
+ OpCode32SimdShImmLong op = (OpCode32SimdShImmLong)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, !op.U);
+
+ if (op.Size == 2)
+ {
+ if (op.U)
+ {
+ me = context.ZeroExtend32(OperandType.I64, me);
+ }
+ else
+ {
+ me = context.SignExtend32(OperandType.I64, me);
+ }
+ }
+
+ me = context.ShiftLeft(me, Const(op.Shift));
+
+ res = EmitVectorInsert(context, res, me, index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Vshr(ArmEmitterContext context)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+ int shift = GetImmShr(op);
+ int maxShift = (8 << op.Size) - 1;
+
+ if (op.U)
+ {
+ EmitVectorUnaryOpZx32(context, (op1) => (shift > maxShift) ? Const(op1.Type, 0) : context.ShiftRightUI(op1, Const(shift)));
+ }
+ else
+ {
+ EmitVectorUnaryOpSx32(context, (op1) => context.ShiftRightSI(op1, Const(Math.Min(maxShift, shift))));
+ }
+ }
+
+ public static void Vshrn(ArmEmitterContext context)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+ int shift = GetImmShr(op);
+
+ EmitVectorUnaryNarrowOp32(context, (op1) => context.ShiftRightUI(op1, Const(shift)));
+ }
+
+ public static void Vsra(ArmEmitterContext context)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+ int shift = GetImmShr(op);
+ int maxShift = (8 << op.Size) - 1;
+
+ if (op.U)
+ {
+ EmitVectorImmBinaryQdQmOpZx32(context, (op1, op2) =>
+ {
+ Operand shiftRes = shift > maxShift ? Const(op2.Type, 0) : context.ShiftRightUI(op2, Const(shift));
+
+ return context.Add(op1, shiftRes);
+ });
+ }
+ else
+ {
+ EmitVectorImmBinaryQdQmOpSx32(context, (op1, op2) => context.Add(op1, context.ShiftRightSI(op2, Const(Math.Min(maxShift, shift)))));
+ }
+ }
+
+ public static void EmitRoundShrImmOp(ArmEmitterContext context, bool accumulate)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+ int shift = GetImmShr(op);
+ long roundConst = 1L << (shift - 1);
+
+ if (op.U)
+ {
+ if (op.Size < 2)
+ {
+ EmitVectorUnaryOpZx32(context, (op1) =>
+ {
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+
+ return context.ShiftRightUI(op1, Const(shift));
+ }, accumulate);
+ }
+ else if (op.Size == 2)
+ {
+ EmitVectorUnaryOpZx32(context, (op1) =>
+ {
+ op1 = context.ZeroExtend32(OperandType.I64, op1);
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+
+ return context.ConvertI64ToI32(context.ShiftRightUI(op1, Const(shift)));
+ }, accumulate);
+ }
+ else /* if (op.Size == 3) */
+ {
+ EmitVectorUnaryOpZx32(context, (op1) => EmitShrImm64(context, op1, signed: false, roundConst, shift), accumulate);
+ }
+ }
+ else
+ {
+ if (op.Size < 2)
+ {
+ EmitVectorUnaryOpSx32(context, (op1) =>
+ {
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+
+ return context.ShiftRightSI(op1, Const(shift));
+ }, accumulate);
+ }
+ else if (op.Size == 2)
+ {
+ EmitVectorUnaryOpSx32(context, (op1) =>
+ {
+ op1 = context.SignExtend32(OperandType.I64, op1);
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+
+ return context.ConvertI64ToI32(context.ShiftRightSI(op1, Const(shift)));
+ }, accumulate);
+ }
+ else /* if (op.Size == 3) */
+ {
+ EmitVectorUnaryOpZx32(context, (op1) => EmitShrImm64(context, op1, signed: true, roundConst, shift), accumulate);
+ }
+ }
+ }
+
+ private static void EmitRoundShrImmNarrowOp(ArmEmitterContext context, bool signed)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+ long roundConst = 1L << (shift - 1);
+
+ EmitVectorUnaryNarrowOp32(context, (op1) =>
+ {
+ if (op.Size <= 1)
+ {
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+ op1 = signed ? context.ShiftRightSI(op1, Const(shift)) : context.ShiftRightUI(op1, Const(shift));
+ }
+ else /* if (op.Size == 2 && round) */
+ {
+ op1 = EmitShrImm64(context, op1, signed, roundConst, shift); // shift <= 32
+ }
+
+ return op1;
+ }, signed);
+ }
+
+ private static Operand EmitShlRegOp(ArmEmitterContext context, Operand op, Operand shiftLsB, int size, bool unsigned)
+ {
+ if (shiftLsB.Type == OperandType.I64)
+ {
+ shiftLsB = context.ConvertI64ToI32(shiftLsB);
+ }
+
+ shiftLsB = context.SignExtend8(OperandType.I32, shiftLsB);
+ Debug.Assert((uint)size < 4u);
+
+ Operand negShiftLsB = context.Negate(shiftLsB);
+
+ Operand isPositive = context.ICompareGreaterOrEqual(shiftLsB, Const(0));
+
+ Operand shl = context.ShiftLeft(op, shiftLsB);
+ Operand shr = unsigned ? context.ShiftRightUI(op, negShiftLsB) : context.ShiftRightSI(op, negShiftLsB);
+
+ Operand res = context.ConditionalSelect(isPositive, shl, shr);
+
+ if (unsigned)
+ {
+ Operand isOutOfRange = context.BitwiseOr(
+ context.ICompareGreaterOrEqual(shiftLsB, Const(8 << size)),
+ context.ICompareGreaterOrEqual(negShiftLsB, Const(8 << size)));
+
+ return context.ConditionalSelect(isOutOfRange, Const(op.Type, 0), res);
+ }
+ else
+ {
+ Operand isOutOfRange0 = context.ICompareGreaterOrEqual(shiftLsB, Const(8 << size));
+ Operand isOutOfRangeN = context.ICompareGreaterOrEqual(negShiftLsB, Const(8 << size));
+
+ // Also zero if shift is too negative, but value was positive.
+ isOutOfRange0 = context.BitwiseOr(isOutOfRange0, context.BitwiseAnd(isOutOfRangeN, context.ICompareGreaterOrEqual(op, Const(op.Type, 0))));
+
+ Operand min = (op.Type == OperandType.I64) ? Const(-1L) : Const(-1);
+
+ return context.ConditionalSelect(isOutOfRange0, Const(op.Type, 0), context.ConditionalSelect(isOutOfRangeN, min, res));
+ }
+ }
+
+ [Flags]
+ private enum ShrImmSaturatingNarrowFlags
+ {
+ Scalar = 1 << 0,
+ SignedSrc = 1 << 1,
+ SignedDst = 1 << 2,
+
+ Round = 1 << 3,
+
+ ScalarSxSx = Scalar | SignedSrc | SignedDst,
+ ScalarSxZx = Scalar | SignedSrc,
+ ScalarZxZx = Scalar,
+
+ VectorSxSx = SignedSrc | SignedDst,
+ VectorSxZx = SignedSrc,
+ VectorZxZx = 0
+ }
+
+ private static void EmitRoundShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags);
+ }
+
+ private static void EmitShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+ bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0;
+ bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0;
+ bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0;
+ bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0;
+
+ if (scalar)
+ {
+ // TODO: Support scalar operation.
+ throw new NotImplementedException();
+ }
+
+ int shift = GetImmShr(op);
+ long roundConst = 1L << (shift - 1);
+
+ EmitVectorUnaryNarrowOp32(context, (op1) =>
+ {
+ if (op.Size <= 1 || !round)
+ {
+ if (round)
+ {
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+ }
+
+ op1 = signedSrc ? context.ShiftRightSI(op1, Const(shift)) : context.ShiftRightUI(op1, Const(shift));
+ }
+ else /* if (op.Size == 2 && round) */
+ {
+ op1 = EmitShrImm64(context, op1, signedSrc, roundConst, shift); // shift <= 32
+ }
+
+ return EmitSatQ(context, op1, 8 << op.Size, signedSrc, signedDst);
+ }, signedSrc);
+ }
+
+ private static int GetImmShr(OpCode32SimdShImm op)
+ {
+ return (8 << op.Size) - op.Shift; // Shr amount is flipped.
+ }
+
+ // dst64 = (Int(src64, signed) + roundConst) >> shift;
+ private static Operand EmitShrImm64(
+ ArmEmitterContext context,
+ Operand value,
+ bool signed,
+ long roundConst,
+ int shift)
+ {
+ MethodInfo info = signed
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedShrImm64))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedShrImm64));
+
+ return context.Call(info, value, Const(roundConst), Const(shift));
+ }
+
+ private static Operand EmitSatQ(ArmEmitterContext context, Operand value, int eSize, bool signedSrc, bool signedDst)
+ {
+ Debug.Assert(eSize <= 32);
+
+ long intMin = signedDst ? -(1L << (eSize - 1)) : 0;
+ long intMax = signedDst ? (1L << (eSize - 1)) - 1 : (1L << eSize) - 1;
+
+ Operand gt = signedSrc
+ ? context.ICompareGreater(value, Const(value.Type, intMax))
+ : context.ICompareGreaterUI(value, Const(value.Type, intMax));
+
+ Operand lt = signedSrc
+ ? context.ICompareLess(value, Const(value.Type, intMin))
+ : context.ICompareLessUI(value, Const(value.Type, intMin));
+
+ value = context.ConditionalSelect(gt, Const(value.Type, intMax), value);
+ value = context.ConditionalSelect(lt, Const(value.Type, intMin), value);
+
+ Operand lblNoSat = Label();
+
+ context.BranchIfFalse(lblNoSat, context.BitwiseOr(gt, lt));
+
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+
+ context.MarkLabel(lblNoSat);
+
+ return value;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSystem.cs b/src/ARMeilleure/Instructions/InstEmitSystem.cs
new file mode 100644
index 00000000..f84829aa
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSystem.cs
@@ -0,0 +1,248 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ private const int DczSizeLog2 = 4; // Log2 size in words
+ public const int DczSizeInBytes = 4 << DczSizeLog2;
+
+ public static void Isb(ArmEmitterContext context)
+ {
+ // Execute as no-op.
+ }
+
+ public static void Mrs(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ MethodInfo info;
+
+ switch (GetPackedId(op))
+ {
+ case 0b11_011_0000_0000_001: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCtrEl0)); break;
+ case 0b11_011_0000_0000_111: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetDczidEl0)); break;
+ case 0b11_011_0100_0010_000: EmitGetNzcv(context); return;
+ case 0b11_011_0100_0100_000: EmitGetFpcr(context); return;
+ case 0b11_011_0100_0100_001: EmitGetFpsr(context); return;
+ case 0b11_011_1101_0000_010: EmitGetTpidrEl0(context); return;
+ case 0b11_011_1101_0000_011: EmitGetTpidrroEl0(context); return;
+ case 0b11_011_1110_0000_000: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntfrqEl0)); break;
+ case 0b11_011_1110_0000_001: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntpctEl0)); break;
+ case 0b11_011_1110_0000_010: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntvctEl0)); break;
+
+ default: throw new NotImplementedException($"Unknown MRS 0x{op.RawOpCode:X8} at 0x{op.Address:X16}.");
+ }
+
+ SetIntOrZR(context, op.Rt, context.Call(info));
+ }
+
+ public static void Msr(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ switch (GetPackedId(op))
+ {
+ case 0b11_011_0100_0010_000: EmitSetNzcv(context); return;
+ case 0b11_011_0100_0100_000: EmitSetFpcr(context); return;
+ case 0b11_011_0100_0100_001: EmitSetFpsr(context); return;
+ case 0b11_011_1101_0000_010: EmitSetTpidrEl0(context); return;
+
+ default: throw new NotImplementedException($"Unknown MSR 0x{op.RawOpCode:X8} at 0x{op.Address:X16}.");
+ }
+ }
+
+ public static void Nop(ArmEmitterContext context)
+ {
+ // Do nothing.
+ }
+
+ public static void Sys(ArmEmitterContext context)
+ {
+ // This instruction is used to do some operations on the CPU like cache invalidation,
+ // address translation and the like.
+ // We treat it as no-op here since we don't have any cache being emulated anyway.
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ switch (GetPackedId(op))
+ {
+ case 0b11_011_0111_0100_001:
+ {
+ // DC ZVA
+ Operand t = GetIntOrZR(context, op.Rt);
+
+ for (long offset = 0; offset < DczSizeInBytes; offset += 8)
+ {
+ Operand address = context.Add(t, Const(offset));
+
+ InstEmitMemoryHelper.EmitStore(context, address, RegisterConsts.ZeroIndex, 3);
+ }
+
+ break;
+ }
+
+ // No-op
+ case 0b11_011_0111_1110_001: // DC CIVAC
+ break;
+
+ case 0b11_011_0111_0101_001: // IC IVAU
+ Operand target = Register(op.Rt, RegisterType.Integer, OperandType.I64);
+ context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.InvalidateCacheLine)), target);
+ break;
+ }
+ }
+
+ private static int GetPackedId(OpCodeSystem op)
+ {
+ int id;
+
+ id = op.Op2 << 0;
+ id |= op.CRm << 3;
+ id |= op.CRn << 7;
+ id |= op.Op1 << 11;
+ id |= op.Op0 << 14;
+
+ return id;
+ }
+
+ private static void EmitGetNzcv(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Operand nzcv = context.ShiftLeft(GetFlag(PState.VFlag), Const((int)PState.VFlag));
+ nzcv = context.BitwiseOr(nzcv, context.ShiftLeft(GetFlag(PState.CFlag), Const((int)PState.CFlag)));
+ nzcv = context.BitwiseOr(nzcv, context.ShiftLeft(GetFlag(PState.ZFlag), Const((int)PState.ZFlag)));
+ nzcv = context.BitwiseOr(nzcv, context.ShiftLeft(GetFlag(PState.NFlag), Const((int)PState.NFlag)));
+
+ SetIntOrZR(context, op.Rt, nzcv);
+ }
+
+ private static void EmitGetFpcr(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Operand fpcr = Const(0);
+
+ for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
+ {
+ if (FPCR.Mask.HasFlag((FPCR)(1u << flag)))
+ {
+ fpcr = context.BitwiseOr(fpcr, context.ShiftLeft(GetFpFlag((FPState)flag), Const(flag)));
+ }
+ }
+
+ SetIntOrZR(context, op.Rt, fpcr);
+ }
+
+ private static void EmitGetFpsr(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ context.SyncQcFlag();
+
+ Operand fpsr = Const(0);
+
+ for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
+ {
+ if (FPSR.Mask.HasFlag((FPSR)(1u << flag)))
+ {
+ fpsr = context.BitwiseOr(fpsr, context.ShiftLeft(GetFpFlag((FPState)flag), Const(flag)));
+ }
+ }
+
+ SetIntOrZR(context, op.Rt, fpsr);
+ }
+
+ private static void EmitGetTpidrEl0(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+
+ Operand result = context.Load(OperandType.I64, context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrEl0Offset())));
+
+ SetIntOrZR(context, op.Rt, result);
+ }
+
+ private static void EmitGetTpidrroEl0(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+
+ Operand result = context.Load(OperandType.I64, context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrroEl0Offset())));
+
+ SetIntOrZR(context, op.Rt, result);
+ }
+
+ private static void EmitSetNzcv(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Operand nzcv = GetIntOrZR(context, op.Rt);
+ nzcv = context.ConvertI64ToI32(nzcv);
+
+ SetFlag(context, PState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const((int)PState.VFlag)), Const(1)));
+ SetFlag(context, PState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const((int)PState.CFlag)), Const(1)));
+ SetFlag(context, PState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const((int)PState.ZFlag)), Const(1)));
+ SetFlag(context, PState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const((int)PState.NFlag)), Const(1)));
+ }
+
+ private static void EmitSetFpcr(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Operand fpcr = GetIntOrZR(context, op.Rt);
+ fpcr = context.ConvertI64ToI32(fpcr);
+
+ for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
+ {
+ if (FPCR.Mask.HasFlag((FPCR)(1u << flag)))
+ {
+ SetFpFlag(context, (FPState)flag, context.BitwiseAnd(context.ShiftRightUI(fpcr, Const(flag)), Const(1)));
+ }
+ }
+
+ context.UpdateArmFpMode();
+ }
+
+ private static void EmitSetFpsr(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ context.ClearQcFlagIfModified();
+
+ Operand fpsr = GetIntOrZR(context, op.Rt);
+ fpsr = context.ConvertI64ToI32(fpsr);
+
+ for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
+ {
+ if (FPSR.Mask.HasFlag((FPSR)(1u << flag)))
+ {
+ SetFpFlag(context, (FPState)flag, context.BitwiseAnd(context.ShiftRightUI(fpsr, Const(flag)), Const(1)));
+ }
+ }
+
+ context.UpdateArmFpMode();
+ }
+
+ private static void EmitSetTpidrEl0(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Operand value = GetIntOrZR(context, op.Rt);
+
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+
+ context.Store(context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrEl0Offset())), value);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSystem32.cs b/src/ARMeilleure/Instructions/InstEmitSystem32.cs
new file mode 100644
index 00000000..f2732c99
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSystem32.cs
@@ -0,0 +1,351 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Mcr(ArmEmitterContext context)
+ {
+ OpCode32System op = (OpCode32System)context.CurrOp;
+
+ if (op.Coproc != 15 || op.Opc1 != 0)
+ {
+ InstEmit.Und(context);
+
+ return;
+ }
+
+ switch (op.CRn)
+ {
+ case 13: // Process and Thread Info.
+ if (op.CRm != 0)
+ {
+ throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X} at 0x{op.Address:X} (0x{op.RawOpCode:X}).");
+ }
+
+ switch (op.Opc2)
+ {
+ case 2:
+ EmitSetTpidrEl0(context); return;
+
+ default:
+ throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X} at 0x{op.Address:X} (0x{op.RawOpCode:X}).");
+ }
+
+ case 7:
+ switch (op.CRm) // Cache and Memory barrier.
+ {
+ case 10:
+ switch (op.Opc2)
+ {
+ case 5: // Data Memory Barrier Register.
+ return; // No-op.
+
+ default:
+ throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X16} at 0x{op.Address:X16} (0x{op.RawOpCode:X}).");
+ }
+
+ default:
+ throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X16} at 0x{op.Address:X16} (0x{op.RawOpCode:X}).");
+ }
+
+ default:
+ throw new NotImplementedException($"Unknown MRC 0x{op.RawOpCode:X8} at 0x{op.Address:X16}.");
+ }
+ }
+
+ public static void Mrc(ArmEmitterContext context)
+ {
+ OpCode32System op = (OpCode32System)context.CurrOp;
+
+ if (op.Coproc != 15 || op.Opc1 != 0)
+ {
+ InstEmit.Und(context);
+
+ return;
+ }
+
+ Operand result;
+
+ switch (op.CRn)
+ {
+ case 13: // Process and Thread Info.
+ if (op.CRm != 0)
+ {
+ throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X} at 0x{op.Address:X} (0x{op.RawOpCode:X}).");
+ }
+
+ switch (op.Opc2)
+ {
+ case 2:
+ result = EmitGetTpidrEl0(context); break;
+
+ case 3:
+ result = EmitGetTpidrroEl0(context); break;
+
+ default:
+ throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X} at 0x{op.Address:X} (0x{op.RawOpCode:X}).");
+ }
+
+ break;
+
+ default:
+ throw new NotImplementedException($"Unknown MRC 0x{op.RawOpCode:X} at 0x{op.Address:X}.");
+ }
+
+ if (op.Rt == RegisterAlias.Aarch32Pc)
+ {
+ // Special behavior: copy NZCV flags into APSR.
+ EmitSetNzcv(context, result);
+
+ return;
+ }
+ else
+ {
+ SetIntA32(context, op.Rt, result);
+ }
+ }
+
+ public static void Mrrc(ArmEmitterContext context)
+ {
+ OpCode32System op = (OpCode32System)context.CurrOp;
+
+ if (op.Coproc != 15)
+ {
+ InstEmit.Und(context);
+
+ return;
+ }
+
+ int opc = op.MrrcOp;
+
+ MethodInfo info;
+
+ switch (op.CRm)
+ {
+ case 14: // Timer.
+ switch (opc)
+ {
+ case 0:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntpctEl0)); break;
+
+ default:
+ throw new NotImplementedException($"Unknown MRRC Opc1 0x{opc:X} at 0x{op.Address:X} (0x{op.RawOpCode:X}).");
+ }
+
+ break;
+
+ default:
+ throw new NotImplementedException($"Unknown MRRC 0x{op.RawOpCode:X} at 0x{op.Address:X}.");
+ }
+
+ Operand result = context.Call(info);
+
+ SetIntA32(context, op.Rt, context.ConvertI64ToI32(result));
+ SetIntA32(context, op.CRn, context.ConvertI64ToI32(context.ShiftRightUI(result, Const(32))));
+ }
+
+ public static void Mrs(ArmEmitterContext context)
+ {
+ OpCode32Mrs op = (OpCode32Mrs)context.CurrOp;
+
+ if (op.R)
+ {
+ throw new NotImplementedException("SPSR");
+ }
+ else
+ {
+ Operand spsr = context.ShiftLeft(GetFlag(PState.VFlag), Const((int)PState.VFlag));
+ spsr = context.BitwiseOr(spsr, context.ShiftLeft(GetFlag(PState.CFlag), Const((int)PState.CFlag)));
+ spsr = context.BitwiseOr(spsr, context.ShiftLeft(GetFlag(PState.ZFlag), Const((int)PState.ZFlag)));
+ spsr = context.BitwiseOr(spsr, context.ShiftLeft(GetFlag(PState.NFlag), Const((int)PState.NFlag)));
+ spsr = context.BitwiseOr(spsr, context.ShiftLeft(GetFlag(PState.QFlag), Const((int)PState.QFlag)));
+
+ // TODO: Remaining flags.
+
+ SetIntA32(context, op.Rd, spsr);
+ }
+ }
+
+ public static void Msr(ArmEmitterContext context)
+ {
+ OpCode32MsrReg op = (OpCode32MsrReg)context.CurrOp;
+
+ if (op.R)
+ {
+ throw new NotImplementedException("SPSR");
+ }
+ else
+ {
+ if ((op.Mask & 8) != 0)
+ {
+ Operand value = GetIntA32(context, op.Rn);
+
+ EmitSetNzcv(context, value);
+
+ Operand q = context.BitwiseAnd(context.ShiftRightUI(value, Const((int)PState.QFlag)), Const(1));
+
+ SetFlag(context, PState.QFlag, q);
+ }
+
+ if ((op.Mask & 4) != 0)
+ {
+ throw new NotImplementedException("APSR_g");
+ }
+
+ if ((op.Mask & 2) != 0)
+ {
+ throw new NotImplementedException("CPSR_x");
+ }
+
+ if ((op.Mask & 1) != 0)
+ {
+ throw new NotImplementedException("CPSR_c");
+ }
+ }
+ }
+
+ public static void Nop(ArmEmitterContext context) { }
+
+ public static void Vmrs(ArmEmitterContext context)
+ {
+ OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp;
+
+ if (op.Rt == RegisterAlias.Aarch32Pc && op.Sreg == 0b0001)
+ {
+ // Special behavior: copy NZCV flags into APSR.
+ SetFlag(context, PState.VFlag, GetFpFlag(FPState.VFlag));
+ SetFlag(context, PState.CFlag, GetFpFlag(FPState.CFlag));
+ SetFlag(context, PState.ZFlag, GetFpFlag(FPState.ZFlag));
+ SetFlag(context, PState.NFlag, GetFpFlag(FPState.NFlag));
+
+ return;
+ }
+
+ switch (op.Sreg)
+ {
+ case 0b0000: // FPSID
+ throw new NotImplementedException("Supervisor Only");
+ case 0b0001: // FPSCR
+ EmitGetFpscr(context); return;
+ case 0b0101: // MVFR2
+ throw new NotImplementedException("MVFR2");
+ case 0b0110: // MVFR1
+ throw new NotImplementedException("MVFR1");
+ case 0b0111: // MVFR0
+ throw new NotImplementedException("MVFR0");
+ case 0b1000: // FPEXC
+ throw new NotImplementedException("Supervisor Only");
+ default:
+ throw new NotImplementedException($"Unknown VMRS 0x{op.RawOpCode:X} at 0x{op.Address:X}.");
+ }
+ }
+
+ public static void Vmsr(ArmEmitterContext context)
+ {
+ OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp;
+
+ switch (op.Sreg)
+ {
+ case 0b0000: // FPSID
+ throw new NotImplementedException("Supervisor Only");
+ case 0b0001: // FPSCR
+ EmitSetFpscr(context); return;
+ case 0b0101: // MVFR2
+ throw new NotImplementedException("MVFR2");
+ case 0b0110: // MVFR1
+ throw new NotImplementedException("MVFR1");
+ case 0b0111: // MVFR0
+ throw new NotImplementedException("MVFR0");
+ case 0b1000: // FPEXC
+ throw new NotImplementedException("Supervisor Only");
+ default:
+ throw new NotImplementedException($"Unknown VMSR 0x{op.RawOpCode:X} at 0x{op.Address:X}.");
+ }
+ }
+
+ private static void EmitSetNzcv(ArmEmitterContext context, Operand t)
+ {
+ Operand v = context.BitwiseAnd(context.ShiftRightUI(t, Const((int)PState.VFlag)), Const(1));
+ Operand c = context.BitwiseAnd(context.ShiftRightUI(t, Const((int)PState.CFlag)), Const(1));
+ Operand z = context.BitwiseAnd(context.ShiftRightUI(t, Const((int)PState.ZFlag)), Const(1));
+ Operand n = context.BitwiseAnd(context.ShiftRightUI(t, Const((int)PState.NFlag)), Const(1));
+
+ SetFlag(context, PState.VFlag, v);
+ SetFlag(context, PState.CFlag, c);
+ SetFlag(context, PState.ZFlag, z);
+ SetFlag(context, PState.NFlag, n);
+ }
+
+ private static void EmitGetFpscr(ArmEmitterContext context)
+ {
+ OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp;
+
+ Operand fpscr = Const(0);
+
+ for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
+ {
+ if (FPSCR.Mask.HasFlag((FPSCR)(1u << flag)))
+ {
+ fpscr = context.BitwiseOr(fpscr, context.ShiftLeft(GetFpFlag((FPState)flag), Const(flag)));
+ }
+ }
+
+ SetIntA32(context, op.Rt, fpscr);
+ }
+
+ private static void EmitSetFpscr(ArmEmitterContext context)
+ {
+ OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp;
+
+ Operand fpscr = GetIntA32(context, op.Rt);
+
+ for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
+ {
+ if (FPSCR.Mask.HasFlag((FPSCR)(1u << flag)))
+ {
+ SetFpFlag(context, (FPState)flag, context.BitwiseAnd(context.ShiftRightUI(fpscr, Const(flag)), Const(1)));
+ }
+ }
+
+ context.UpdateArmFpMode();
+ }
+
+ private static Operand EmitGetTpidrEl0(ArmEmitterContext context)
+ {
+ OpCode32System op = (OpCode32System)context.CurrOp;
+
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+
+ return context.Load(OperandType.I64, context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrEl0Offset())));
+ }
+
+ private static Operand EmitGetTpidrroEl0(ArmEmitterContext context)
+ {
+ OpCode32System op = (OpCode32System)context.CurrOp;
+
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+
+ return context.Load(OperandType.I64, context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrroEl0Offset())));
+ }
+
+ private static void EmitSetTpidrEl0(ArmEmitterContext context)
+ {
+ OpCode32System op = (OpCode32System)context.CurrOp;
+
+ Operand value = GetIntA32(context, op.Rt);
+
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+
+ context.Store(context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrEl0Offset())), context.ZeroExtend32(OperandType.I64, value));
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstName.cs b/src/ARMeilleure/Instructions/InstName.cs
new file mode 100644
index 00000000..fd71d92e
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstName.cs
@@ -0,0 +1,685 @@
+namespace ARMeilleure.Instructions
+{
+ enum InstName
+ {
+ // Base (AArch64)
+ Adc,
+ Adcs,
+ Add,
+ Adds,
+ Adr,
+ Adrp,
+ And,
+ Ands,
+ Asrv,
+ B,
+ B_Cond,
+ Bfm,
+ Bic,
+ Bics,
+ Bl,
+ Blr,
+ Br,
+ Brk,
+ Cbnz,
+ Cbz,
+ Ccmn,
+ Ccmp,
+ Clrex,
+ Cls,
+ Clz,
+ Crc32b,
+ Crc32h,
+ Crc32w,
+ Crc32x,
+ Crc32cb,
+ Crc32ch,
+ Crc32cw,
+ Crc32cx,
+ Csdb,
+ Csel,
+ Csinc,
+ Csinv,
+ Csneg,
+ Dmb,
+ Dsb,
+ Eon,
+ Eor,
+ Esb,
+ Extr,
+ Hint,
+ Isb,
+ It,
+ Ldar,
+ Ldaxp,
+ Ldaxr,
+ Ldp,
+ Ldr,
+ Ldr_Literal,
+ Ldrs,
+ Ldxr,
+ Ldxp,
+ Lslv,
+ Lsrv,
+ Madd,
+ Movk,
+ Movn,
+ Movz,
+ Mrs,
+ Msr,
+ Msub,
+ Nop,
+ Orn,
+ Orr,
+ Prfm,
+ Rbit,
+ Ret,
+ Rev16,
+ Rev32,
+ Rev64,
+ Rorv,
+ Sbc,
+ Sbcs,
+ Sbfm,
+ Sdiv,
+ Sel,
+ Sev,
+ Sevl,
+ Shsub8,
+ Smaddl,
+ Smsubl,
+ Smulh,
+ Smull,
+ Smulw_,
+ Ssat,
+ Ssat16,
+ Stlr,
+ Stlxp,
+ Stlxr,
+ Stp,
+ Str,
+ Stxp,
+ Stxr,
+ Sub,
+ Subs,
+ Svc,
+ Sxtb,
+ Sxth,
+ Sys,
+ Tbnz,
+ Tbz,
+ Tsb,
+ Ubfm,
+ Udiv,
+ Umaddl,
+ Umsubl,
+ Umulh,
+ Und,
+ Wfe,
+ Wfi,
+ Yield,
+
+ // FP & SIMD (AArch64)
+ Abs_S,
+ Abs_V,
+ Add_S,
+ Add_V,
+ Addhn_V,
+ Addp_S,
+ Addp_V,
+ Addv_V,
+ Aesd_V,
+ Aese_V,
+ Aesimc_V,
+ Aesmc_V,
+ And_V,
+ Bic_V,
+ Bic_Vi,
+ Bif_V,
+ Bit_V,
+ Bsl_V,
+ Cls_V,
+ Clz_V,
+ Cmeq_S,
+ Cmeq_V,
+ Cmge_S,
+ Cmge_V,
+ Cmgt_S,
+ Cmgt_V,
+ Cmhi_S,
+ Cmhi_V,
+ Cmhs_S,
+ Cmhs_V,
+ Cmle_S,
+ Cmle_V,
+ Cmlt_S,
+ Cmlt_V,
+ Cmtst_S,
+ Cmtst_V,
+ Cnt_V,
+ Dup_Gp,
+ Dup_S,
+ Dup_V,
+ Eor_V,
+ Ext_V,
+ Fabd_S,
+ Fabd_V,
+ Fabs_S,
+ Fabs_V,
+ Facge_S,
+ Facge_V,
+ Facgt_S,
+ Facgt_V,
+ Fadd_S,
+ Fadd_V,
+ Faddp_S,
+ Faddp_V,
+ Fccmp_S,
+ Fccmpe_S,
+ Fcmeq_S,
+ Fcmeq_V,
+ Fcmge_S,
+ Fcmge_V,
+ Fcmgt_S,
+ Fcmgt_V,
+ Fcmle_S,
+ Fcmle_V,
+ Fcmlt_S,
+ Fcmlt_V,
+ Fcmp_S,
+ Fcmpe_S,
+ Fcsel_S,
+ Fcvt_S,
+ Fcvtas_Gp,
+ Fcvtas_S,
+ Fcvtas_V,
+ Fcvtau_Gp,
+ Fcvtau_S,
+ Fcvtau_V,
+ Fcvtl_V,
+ Fcvtms_Gp,
+ Fcvtms_V,
+ Fcvtmu_Gp,
+ Fcvtn_V,
+ Fcvtns_Gp,
+ Fcvtns_S,
+ Fcvtns_V,
+ Fcvtnu_S,
+ Fcvtnu_V,
+ Fcvtps_Gp,
+ Fcvtpu_Gp,
+ Fcvtzs_Gp,
+ Fcvtzs_Gp_Fixed,
+ Fcvtzs_S,
+ Fcvtzs_V,
+ Fcvtzs_V_Fixed,
+ Fcvtzu_Gp,
+ Fcvtzu_Gp_Fixed,
+ Fcvtzu_S,
+ Fcvtzu_V,
+ Fcvtzu_V_Fixed,
+ Fdiv_S,
+ Fdiv_V,
+ Fmadd_S,
+ Fmax_S,
+ Fmax_V,
+ Fmaxnm_S,
+ Fmaxnm_V,
+ Fmaxnmp_S,
+ Fmaxnmp_V,
+ Fmaxnmv_V,
+ Fmaxp_V,
+ Fmaxv_V,
+ Fmin_S,
+ Fmin_V,
+ Fminnm_S,
+ Fminnm_V,
+ Fminnmp_S,
+ Fminnmp_V,
+ Fminnmv_V,
+ Fminp_V,
+ Fminv_V,
+ Fmla_Se,
+ Fmla_V,
+ Fmla_Ve,
+ Fmls_Se,
+ Fmls_V,
+ Fmls_Ve,
+ Fmov_S,
+ Fmov_Si,
+ Fmov_Vi,
+ Fmov_Ftoi,
+ Fmov_Itof,
+ Fmov_Ftoi1,
+ Fmov_Itof1,
+ Fmsub_S,
+ Fmul_S,
+ Fmul_Se,
+ Fmul_V,
+ Fmul_Ve,
+ Fmulx_S,
+ Fmulx_Se,
+ Fmulx_V,
+ Fmulx_Ve,
+ Fneg_S,
+ Fneg_V,
+ Fnmadd_S,
+ Fnmsub_S,
+ Fnmul_S,
+ Frecpe_S,
+ Frecpe_V,
+ Frecps_S,
+ Frecps_V,
+ Frecpx_S,
+ Frinta_S,
+ Frinta_V,
+ Frinti_S,
+ Frinti_V,
+ Frintm_S,
+ Frintm_V,
+ Frintn_S,
+ Frintn_V,
+ Frintp_S,
+ Frintp_V,
+ Frintx_S,
+ Frintx_V,
+ Frintz_S,
+ Frintz_V,
+ Frsqrte_S,
+ Frsqrte_V,
+ Frsqrts_S,
+ Frsqrts_V,
+ Fsqrt_S,
+ Fsqrt_V,
+ Fsub_S,
+ Fsub_V,
+ Ins_Gp,
+ Ins_V,
+ Ld__Vms,
+ Ld__Vss,
+ Mla_V,
+ Mla_Ve,
+ Mls_V,
+ Mls_Ve,
+ Movi_V,
+ Mul_V,
+ Mul_Ve,
+ Mvni_V,
+ Neg_S,
+ Neg_V,
+ Not_V,
+ Orn_V,
+ Orr_V,
+ Orr_Vi,
+ Pmull_V,
+ Raddhn_V,
+ Rbit_V,
+ Rev16_V,
+ Rev32_V,
+ Rev64_V,
+ Rshrn_V,
+ Rsubhn_V,
+ Saba_V,
+ Sabal_V,
+ Sabd_V,
+ Sabdl_V,
+ Sadalp_V,
+ Saddl_V,
+ Saddlp_V,
+ Saddlv_V,
+ Saddw_V,
+ Scvtf_Gp,
+ Scvtf_Gp_Fixed,
+ Scvtf_S,
+ Scvtf_S_Fixed,
+ Scvtf_V,
+ Scvtf_V_Fixed,
+ Sha1c_V,
+ Sha1h_V,
+ Sha1m_V,
+ Sha1p_V,
+ Sha1su0_V,
+ Sha1su1_V,
+ Sha256h_V,
+ Sha256h2_V,
+ Sha256su0_V,
+ Sha256su1_V,
+ Shadd_V,
+ Shl_S,
+ Shl_V,
+ Shll_V,
+ Shrn_V,
+ Shsub_V,
+ Sli_S,
+ Sli_V,
+ Smax_V,
+ Smaxp_V,
+ Smaxv_V,
+ Smin_V,
+ Sminp_V,
+ Sminv_V,
+ Smlal_V,
+ Smlal_Ve,
+ Smlsl_V,
+ Smlsl_Ve,
+ Smov_S,
+ Smull_V,
+ Smull_Ve,
+ Sqabs_S,
+ Sqabs_V,
+ Sqadd_S,
+ Sqadd_V,
+ Sqdmulh_S,
+ Sqdmulh_V,
+ Sqdmulh_Ve,
+ Sqneg_S,
+ Sqneg_V,
+ Sqrdmulh_S,
+ Sqrdmulh_V,
+ Sqrdmulh_Ve,
+ Sqrshl_V,
+ Sqrshrn_S,
+ Sqrshrn_V,
+ Sqrshrun_S,
+ Sqrshrun_V,
+ Sqshl_V,
+ Sqshrn_S,
+ Sqshrn_V,
+ Sqshrun_S,
+ Sqshrun_V,
+ Sqsub_S,
+ Sqsub_V,
+ Sqxtn_S,
+ Sqxtn_V,
+ Sqxtun_S,
+ Sqxtun_V,
+ Srhadd_V,
+ Sri_S,
+ Sri_V,
+ Srshl_V,
+ Srshr_S,
+ Srshr_V,
+ Srsra_S,
+ Srsra_V,
+ Sshl_S,
+ Sshl_V,
+ Sshll_V,
+ Sshr_S,
+ Sshr_V,
+ Ssra_S,
+ Ssra_V,
+ Ssubl_V,
+ Ssubw_V,
+ St__Vms,
+ St__Vss,
+ Sub_S,
+ Sub_V,
+ Subhn_V,
+ Suqadd_S,
+ Suqadd_V,
+ Tbl_V,
+ Tbx_V,
+ Trn1_V,
+ Trn2_V,
+ Uaba_V,
+ Uabal_V,
+ Uabd_V,
+ Uabdl_V,
+ Uadalp_V,
+ Uaddl_V,
+ Uaddlp_V,
+ Uaddlv_V,
+ Uaddw_V,
+ Ucvtf_Gp,
+ Ucvtf_Gp_Fixed,
+ Ucvtf_S,
+ Ucvtf_S_Fixed,
+ Ucvtf_V,
+ Ucvtf_V_Fixed,
+ Uhadd_V,
+ Uhsub_V,
+ Umax_V,
+ Umaxp_V,
+ Umaxv_V,
+ Umin_V,
+ Uminp_V,
+ Uminv_V,
+ Umlal_V,
+ Umlal_Ve,
+ Umlsl_V,
+ Umlsl_Ve,
+ Umov_S,
+ Umull_V,
+ Umull_Ve,
+ Uqadd_S,
+ Uqadd_V,
+ Uqrshl_V,
+ Uqrshrn_S,
+ Uqrshrn_V,
+ Uqshl_V,
+ Uqshrn_S,
+ Uqshrn_V,
+ Uqsub_S,
+ Uqsub_V,
+ Uqxtn_S,
+ Uqxtn_V,
+ Urhadd_V,
+ Urshl_V,
+ Urshr_S,
+ Urshr_V,
+ Ursra_S,
+ Ursra_V,
+ Ushl_S,
+ Ushl_V,
+ Ushll_V,
+ Ushr_S,
+ Ushr_V,
+ Usqadd_S,
+ Usqadd_V,
+ Usra_S,
+ Usra_V,
+ Usubl_V,
+ Usubw_V,
+ Uzp1_V,
+ Uzp2_V,
+ Xtn_V,
+ Zip1_V,
+ Zip2_V,
+
+ // Base (AArch32)
+ Bfc,
+ Bfi,
+ Blx,
+ Bx,
+ Cmp,
+ Cmn,
+ Movt,
+ Mul,
+ Lda,
+ Ldab,
+ Ldaex,
+ Ldaexb,
+ Ldaexd,
+ Ldaexh,
+ Ldah,
+ Ldm,
+ Ldrb,
+ Ldrd,
+ Ldrex,
+ Ldrexb,
+ Ldrexd,
+ Ldrexh,
+ Ldrh,
+ Ldrsb,
+ Ldrsh,
+ Mcr,
+ Mla,
+ Mls,
+ Mov,
+ Mrc,
+ Mrrc,
+ Mvn,
+ Pkh,
+ Pld,
+ Pop,
+ Push,
+ Rev,
+ Revsh,
+ Rsb,
+ Rsc,
+ Sadd8,
+ Sbfx,
+ Shadd8,
+ Smla__,
+ Smlal,
+ Smlal__,
+ Smlaw_,
+ Smmla,
+ Smmls,
+ Smul__,
+ Smmul,
+ Ssub8,
+ Stl,
+ Stlb,
+ Stlex,
+ Stlexb,
+ Stlexd,
+ Stlexh,
+ Stlh,
+ Stm,
+ Strb,
+ Strd,
+ Strex,
+ Strexb,
+ Strexd,
+ Strexh,
+ Strh,
+ Sxtb16,
+ Tbb,
+ Tbh,
+ Teq,
+ Trap,
+ Tst,
+ Uadd8,
+ Ubfx,
+ Uhadd8,
+ Uhsub8,
+ Umaal,
+ Umlal,
+ Umull,
+ Usat,
+ Usat16,
+ Usub8,
+ Uxtb,
+ Uxtb16,
+ Uxth,
+
+ // FP & SIMD (AArch32)
+ Vabd,
+ Vabdl,
+ Vabs,
+ Vadd,
+ Vaddl,
+ Vaddw,
+ Vand,
+ Vbic,
+ Vbif,
+ Vbit,
+ Vbsl,
+ Vceq,
+ Vcge,
+ Vcgt,
+ Vcle,
+ Vclt,
+ Vcmp,
+ Vcmpe,
+ Vcnt,
+ Vcvt,
+ Vdiv,
+ Vdup,
+ Veor,
+ Vext,
+ Vfma,
+ Vfms,
+ Vfnma,
+ Vfnms,
+ Vhadd,
+ Vld1,
+ Vld2,
+ Vld3,
+ Vld4,
+ Vldm,
+ Vldr,
+ Vmax,
+ Vmaxnm,
+ Vmin,
+ Vminnm,
+ Vmla,
+ Vmlal,
+ Vmls,
+ Vmlsl,
+ Vmov,
+ Vmovl,
+ Vmovn,
+ Vmrs,
+ Vmsr,
+ Vmul,
+ Vmull,
+ Vmvn,
+ Vneg,
+ Vnmul,
+ Vnmla,
+ Vnmls,
+ Vorn,
+ Vorr,
+ Vpadd,
+ Vpaddl,
+ Vpmax,
+ Vpmin,
+ Vqadd,
+ Vqdmulh,
+ Vqmovn,
+ Vqmovun,
+ Vqrshrn,
+ Vqrshrun,
+ Vqshrn,
+ Vqshrun,
+ Vqsub,
+ Vrev,
+ Vrhadd,
+ Vrint,
+ Vrinta,
+ Vrintm,
+ Vrintn,
+ Vrintp,
+ Vrintx,
+ Vrshr,
+ Vrshrn,
+ Vsel,
+ Vshl,
+ Vshll,
+ Vshr,
+ Vshrn,
+ Vst1,
+ Vst2,
+ Vst3,
+ Vst4,
+ Vstm,
+ Vstr,
+ Vsqrt,
+ Vrecpe,
+ Vrecps,
+ Vrsqrte,
+ Vrsqrts,
+ Vrsra,
+ Vsra,
+ Vsub,
+ Vsubl,
+ Vsubw,
+ Vtbl,
+ Vtrn,
+ Vtst,
+ Vuzp,
+ Vzip,
+ }
+}
diff --git a/src/ARMeilleure/Instructions/NativeInterface.cs b/src/ARMeilleure/Instructions/NativeInterface.cs
new file mode 100644
index 00000000..2c35387a
--- /dev/null
+++ b/src/ARMeilleure/Instructions/NativeInterface.cs
@@ -0,0 +1,195 @@
+using ARMeilleure.Memory;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+namespace ARMeilleure.Instructions
+{
+ static class NativeInterface
+ {
+ private class ThreadContext
+ {
+ public ExecutionContext Context { get; }
+ public IMemoryManager Memory { get; }
+ public Translator Translator { get; }
+
+ public ThreadContext(ExecutionContext context, IMemoryManager memory, Translator translator)
+ {
+ Context = context;
+ Memory = memory;
+ Translator = translator;
+ }
+ }
+
+ [ThreadStatic]
+ private static ThreadContext Context;
+
+ public static void RegisterThread(ExecutionContext context, IMemoryManager memory, Translator translator)
+ {
+ Context = new ThreadContext(context, memory, translator);
+ }
+
+ public static void UnregisterThread()
+ {
+ Context = null;
+ }
+
+ public static void Break(ulong address, int imm)
+ {
+ Statistics.PauseTimer();
+
+ GetContext().OnBreak(address, imm);
+
+ Statistics.ResumeTimer();
+ }
+
+ public static void SupervisorCall(ulong address, int imm)
+ {
+ Statistics.PauseTimer();
+
+ GetContext().OnSupervisorCall(address, imm);
+
+ Statistics.ResumeTimer();
+ }
+
+ public static void Undefined(ulong address, int opCode)
+ {
+ Statistics.PauseTimer();
+
+ GetContext().OnUndefined(address, opCode);
+
+ Statistics.ResumeTimer();
+ }
+
+ #region "System registers"
+ public static ulong GetCtrEl0()
+ {
+ return (ulong)GetContext().CtrEl0;
+ }
+
+ public static ulong GetDczidEl0()
+ {
+ return (ulong)GetContext().DczidEl0;
+ }
+
+ public static ulong GetCntfrqEl0()
+ {
+ return GetContext().CntfrqEl0;
+ }
+
+ public static ulong GetCntpctEl0()
+ {
+ return GetContext().CntpctEl0;
+ }
+
+ public static ulong GetCntvctEl0()
+ {
+ return GetContext().CntvctEl0;
+ }
+ #endregion
+
+ #region "Read"
+ public static byte ReadByte(ulong address)
+ {
+ return GetMemoryManager().ReadTracked<byte>(address);
+ }
+
+ public static ushort ReadUInt16(ulong address)
+ {
+ return GetMemoryManager().ReadTracked<ushort>(address);
+ }
+
+ public static uint ReadUInt32(ulong address)
+ {
+ return GetMemoryManager().ReadTracked<uint>(address);
+ }
+
+ public static ulong ReadUInt64(ulong address)
+ {
+ return GetMemoryManager().ReadTracked<ulong>(address);
+ }
+
+ public static V128 ReadVector128(ulong address)
+ {
+ return GetMemoryManager().ReadTracked<V128>(address);
+ }
+ #endregion
+
+ #region "Write"
+ public static void WriteByte(ulong address, byte value)
+ {
+ GetMemoryManager().Write(address, value);
+ }
+
+ public static void WriteUInt16(ulong address, ushort value)
+ {
+ GetMemoryManager().Write(address, value);
+ }
+
+ public static void WriteUInt32(ulong address, uint value)
+ {
+ GetMemoryManager().Write(address, value);
+ }
+
+ public static void WriteUInt64(ulong address, ulong value)
+ {
+ GetMemoryManager().Write(address, value);
+ }
+
+ public static void WriteVector128(ulong address, V128 value)
+ {
+ GetMemoryManager().Write(address, value);
+ }
+ #endregion
+
+ public static void EnqueueForRejit(ulong address)
+ {
+ Context.Translator.EnqueueForRejit(address, GetContext().ExecutionMode);
+ }
+
+ public static void SignalMemoryTracking(ulong address, ulong size, bool write)
+ {
+ GetMemoryManager().SignalMemoryTracking(address, size, write);
+ }
+
+ public static void ThrowInvalidMemoryAccess(ulong address)
+ {
+ throw new InvalidAccessException(address);
+ }
+
+ public static ulong GetFunctionAddress(ulong address)
+ {
+ TranslatedFunction function = Context.Translator.GetOrTranslate(address, GetContext().ExecutionMode);
+
+ return (ulong)function.FuncPointer.ToInt64();
+ }
+
+ public static void InvalidateCacheLine(ulong address)
+ {
+ Context.Translator.InvalidateJitCacheRegion(address, InstEmit.DczSizeInBytes);
+ }
+
+ public static bool CheckSynchronization()
+ {
+ Statistics.PauseTimer();
+
+ ExecutionContext context = GetContext();
+
+ context.CheckInterrupt();
+
+ Statistics.ResumeTimer();
+
+ return context.Running;
+ }
+
+ public static ExecutionContext GetContext()
+ {
+ return Context.Context;
+ }
+
+ public static IMemoryManager GetMemoryManager()
+ {
+ return Context.Memory;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/SoftFallback.cs b/src/ARMeilleure/Instructions/SoftFallback.cs
new file mode 100644
index 00000000..06d76a67
--- /dev/null
+++ b/src/ARMeilleure/Instructions/SoftFallback.cs
@@ -0,0 +1,624 @@
+using ARMeilleure.State;
+using System;
+
+namespace ARMeilleure.Instructions
+{
+ static class SoftFallback
+ {
+#region "ShrImm64"
+ public static long SignedShrImm64(long value, long roundConst, int shift)
+ {
+ if (roundConst == 0L)
+ {
+ if (shift <= 63)
+ {
+ return value >> shift;
+ }
+ else /* if (shift == 64) */
+ {
+ if (value < 0L)
+ {
+ return -1L;
+ }
+ else /* if (value >= 0L) */
+ {
+ return 0L;
+ }
+ }
+ }
+ else /* if (roundConst == 1L << (shift - 1)) */
+ {
+ if (shift <= 63)
+ {
+ long add = value + roundConst;
+
+ if ((~value & (value ^ add)) < 0L)
+ {
+ return (long)((ulong)add >> shift);
+ }
+ else
+ {
+ return add >> shift;
+ }
+ }
+ else /* if (shift == 64) */
+ {
+ return 0L;
+ }
+ }
+ }
+
+ public static ulong UnsignedShrImm64(ulong value, long roundConst, int shift)
+ {
+ if (roundConst == 0L)
+ {
+ if (shift <= 63)
+ {
+ return value >> shift;
+ }
+ else /* if (shift == 64) */
+ {
+ return 0UL;
+ }
+ }
+ else /* if (roundConst == 1L << (shift - 1)) */
+ {
+ ulong add = value + (ulong)roundConst;
+
+ if ((add < value) && (add < (ulong)roundConst))
+ {
+ if (shift <= 63)
+ {
+ return (add >> shift) | (0x8000000000000000UL >> (shift - 1));
+ }
+ else /* if (shift == 64) */
+ {
+ return 1UL;
+ }
+ }
+ else
+ {
+ if (shift <= 63)
+ {
+ return add >> shift;
+ }
+ else /* if (shift == 64) */
+ {
+ return 0UL;
+ }
+ }
+ }
+ }
+#endregion
+
+#region "Saturation"
+ public static int SatF32ToS32(float value)
+ {
+ if (float.IsNaN(value)) return 0;
+
+ return value >= int.MaxValue ? int.MaxValue :
+ value <= int.MinValue ? int.MinValue : (int)value;
+ }
+
+ public static long SatF32ToS64(float value)
+ {
+ if (float.IsNaN(value)) return 0;
+
+ return value >= long.MaxValue ? long.MaxValue :
+ value <= long.MinValue ? long.MinValue : (long)value;
+ }
+
+ public static uint SatF32ToU32(float value)
+ {
+ if (float.IsNaN(value)) return 0;
+
+ return value >= uint.MaxValue ? uint.MaxValue :
+ value <= uint.MinValue ? uint.MinValue : (uint)value;
+ }
+
+ public static ulong SatF32ToU64(float value)
+ {
+ if (float.IsNaN(value)) return 0;
+
+ return value >= ulong.MaxValue ? ulong.MaxValue :
+ value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
+ }
+
+ public static int SatF64ToS32(double value)
+ {
+ if (double.IsNaN(value)) return 0;
+
+ return value >= int.MaxValue ? int.MaxValue :
+ value <= int.MinValue ? int.MinValue : (int)value;
+ }
+
+ public static long SatF64ToS64(double value)
+ {
+ if (double.IsNaN(value)) return 0;
+
+ return value >= long.MaxValue ? long.MaxValue :
+ value <= long.MinValue ? long.MinValue : (long)value;
+ }
+
+ public static uint SatF64ToU32(double value)
+ {
+ if (double.IsNaN(value)) return 0;
+
+ return value >= uint.MaxValue ? uint.MaxValue :
+ value <= uint.MinValue ? uint.MinValue : (uint)value;
+ }
+
+ public static ulong SatF64ToU64(double value)
+ {
+ if (double.IsNaN(value)) return 0;
+
+ return value >= ulong.MaxValue ? ulong.MaxValue :
+ value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
+ }
+#endregion
+
+#region "Count"
+ public static ulong CountLeadingSigns(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
+ {
+ value ^= value >> 1;
+
+ int highBit = size - 2;
+
+ for (int bit = highBit; bit >= 0; bit--)
+ {
+ if (((int)(value >> bit) & 0b1) != 0)
+ {
+ return (ulong)(highBit - bit);
+ }
+ }
+
+ return (ulong)(size - 1);
+ }
+
+ private static ReadOnlySpan<byte> ClzNibbleTbl => new byte[] { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+ public static ulong CountLeadingZeros(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
+ {
+ if (value == 0ul)
+ {
+ return (ulong)size;
+ }
+
+ int nibbleIdx = size;
+ int preCount, count = 0;
+
+ do
+ {
+ nibbleIdx -= 4;
+ preCount = ClzNibbleTbl[(int)(value >> nibbleIdx) & 0b1111];
+ count += preCount;
+ }
+ while (preCount == 4);
+
+ return (ulong)count;
+ }
+#endregion
+
+#region "Table"
+ public static V128 Tbl1(V128 vector, int bytes, V128 tb0)
+ {
+ return TblOrTbx(default, vector, bytes, tb0);
+ }
+
+ public static V128 Tbl2(V128 vector, int bytes, V128 tb0, V128 tb1)
+ {
+ return TblOrTbx(default, vector, bytes, tb0, tb1);
+ }
+
+ public static V128 Tbl3(V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2)
+ {
+ return TblOrTbx(default, vector, bytes, tb0, tb1, tb2);
+ }
+
+ public static V128 Tbl4(V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2, V128 tb3)
+ {
+ return TblOrTbx(default, vector, bytes, tb0, tb1, tb2, tb3);
+ }
+
+ public static V128 Tbx1(V128 dest, V128 vector, int bytes, V128 tb0)
+ {
+ return TblOrTbx(dest, vector, bytes, tb0);
+ }
+
+ public static V128 Tbx2(V128 dest, V128 vector, int bytes, V128 tb0, V128 tb1)
+ {
+ return TblOrTbx(dest, vector, bytes, tb0, tb1);
+ }
+
+ public static V128 Tbx3(V128 dest, V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2)
+ {
+ return TblOrTbx(dest, vector, bytes, tb0, tb1, tb2);
+ }
+
+ public static V128 Tbx4(V128 dest, V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2, V128 tb3)
+ {
+ return TblOrTbx(dest, vector, bytes, tb0, tb1, tb2, tb3);
+ }
+
+ private static V128 TblOrTbx(V128 dest, V128 vector, int bytes, params V128[] tb)
+ {
+ byte[] res = new byte[16];
+
+ if (dest != default)
+ {
+ Buffer.BlockCopy(dest.ToArray(), 0, res, 0, bytes);
+ }
+
+ byte[] table = new byte[tb.Length * 16];
+
+ for (byte index = 0; index < tb.Length; index++)
+ {
+ Buffer.BlockCopy(tb[index].ToArray(), 0, table, index * 16, 16);
+ }
+
+ byte[] v = vector.ToArray();
+
+ for (byte index = 0; index < bytes; index++)
+ {
+ byte tblIndex = v[index];
+
+ if (tblIndex < table.Length)
+ {
+ res[index] = table[tblIndex];
+ }
+ }
+
+ return new V128(res);
+ }
+#endregion
+
+#region "Crc32"
+ private const uint Crc32RevPoly = 0xedb88320;
+ private const uint Crc32cRevPoly = 0x82f63b78;
+
+ public static uint Crc32b(uint crc, byte value) => Crc32 (crc, Crc32RevPoly, value);
+ public static uint Crc32h(uint crc, ushort value) => Crc32h(crc, Crc32RevPoly, value);
+ public static uint Crc32w(uint crc, uint value) => Crc32w(crc, Crc32RevPoly, value);
+ public static uint Crc32x(uint crc, ulong value) => Crc32x(crc, Crc32RevPoly, value);
+
+ public static uint Crc32cb(uint crc, byte value) => Crc32 (crc, Crc32cRevPoly, value);
+ public static uint Crc32ch(uint crc, ushort value) => Crc32h(crc, Crc32cRevPoly, value);
+ public static uint Crc32cw(uint crc, uint value) => Crc32w(crc, Crc32cRevPoly, value);
+ public static uint Crc32cx(uint crc, ulong value) => Crc32x(crc, Crc32cRevPoly, value);
+
+ private static uint Crc32h(uint crc, uint poly, ushort val)
+ {
+ crc = Crc32(crc, poly, (byte)(val >> 0));
+ crc = Crc32(crc, poly, (byte)(val >> 8));
+
+ return crc;
+ }
+
+ private static uint Crc32w(uint crc, uint poly, uint val)
+ {
+ crc = Crc32(crc, poly, (byte)(val >> 0));
+ crc = Crc32(crc, poly, (byte)(val >> 8));
+ crc = Crc32(crc, poly, (byte)(val >> 16));
+ crc = Crc32(crc, poly, (byte)(val >> 24));
+
+ return crc;
+ }
+
+ private static uint Crc32x(uint crc, uint poly, ulong val)
+ {
+ crc = Crc32(crc, poly, (byte)(val >> 0));
+ crc = Crc32(crc, poly, (byte)(val >> 8));
+ crc = Crc32(crc, poly, (byte)(val >> 16));
+ crc = Crc32(crc, poly, (byte)(val >> 24));
+ crc = Crc32(crc, poly, (byte)(val >> 32));
+ crc = Crc32(crc, poly, (byte)(val >> 40));
+ crc = Crc32(crc, poly, (byte)(val >> 48));
+ crc = Crc32(crc, poly, (byte)(val >> 56));
+
+ return crc;
+ }
+
+ private static uint Crc32(uint crc, uint poly, byte val)
+ {
+ crc ^= val;
+
+ for (int bit = 7; bit >= 0; bit--)
+ {
+ uint mask = (uint)(-(int)(crc & 1));
+
+ crc = (crc >> 1) ^ (poly & mask);
+ }
+
+ return crc;
+ }
+#endregion
+
+#region "Aes"
+ public static V128 Decrypt(V128 value, V128 roundKey)
+ {
+ return CryptoHelper.AesInvSubBytes(CryptoHelper.AesInvShiftRows(value ^ roundKey));
+ }
+
+ public static V128 Encrypt(V128 value, V128 roundKey)
+ {
+ return CryptoHelper.AesSubBytes(CryptoHelper.AesShiftRows(value ^ roundKey));
+ }
+
+ public static V128 InverseMixColumns(V128 value)
+ {
+ return CryptoHelper.AesInvMixColumns(value);
+ }
+
+ public static V128 MixColumns(V128 value)
+ {
+ return CryptoHelper.AesMixColumns(value);
+ }
+#endregion
+
+#region "Sha1"
+ public static V128 HashChoose(V128 hash_abcd, uint hash_e, V128 wk)
+ {
+ for (int e = 0; e <= 3; e++)
+ {
+ uint t = ShaChoose(hash_abcd.Extract<uint>(1),
+ hash_abcd.Extract<uint>(2),
+ hash_abcd.Extract<uint>(3));
+
+ hash_e += Rol(hash_abcd.Extract<uint>(0), 5) + t + wk.Extract<uint>(e);
+
+ t = Rol(hash_abcd.Extract<uint>(1), 30);
+
+ hash_abcd.Insert(1, t);
+
+ Rol32_160(ref hash_e, ref hash_abcd);
+ }
+
+ return hash_abcd;
+ }
+
+ public static uint FixedRotate(uint hash_e)
+ {
+ return hash_e.Rol(30);
+ }
+
+ public static V128 HashMajority(V128 hash_abcd, uint hash_e, V128 wk)
+ {
+ for (int e = 0; e <= 3; e++)
+ {
+ uint t = ShaMajority(hash_abcd.Extract<uint>(1),
+ hash_abcd.Extract<uint>(2),
+ hash_abcd.Extract<uint>(3));
+
+ hash_e += Rol(hash_abcd.Extract<uint>(0), 5) + t + wk.Extract<uint>(e);
+
+ t = Rol(hash_abcd.Extract<uint>(1), 30);
+
+ hash_abcd.Insert(1, t);
+
+ Rol32_160(ref hash_e, ref hash_abcd);
+ }
+
+ return hash_abcd;
+ }
+
+ public static V128 HashParity(V128 hash_abcd, uint hash_e, V128 wk)
+ {
+ for (int e = 0; e <= 3; e++)
+ {
+ uint t = ShaParity(hash_abcd.Extract<uint>(1),
+ hash_abcd.Extract<uint>(2),
+ hash_abcd.Extract<uint>(3));
+
+ hash_e += Rol(hash_abcd.Extract<uint>(0), 5) + t + wk.Extract<uint>(e);
+
+ t = Rol(hash_abcd.Extract<uint>(1), 30);
+
+ hash_abcd.Insert(1, t);
+
+ Rol32_160(ref hash_e, ref hash_abcd);
+ }
+
+ return hash_abcd;
+ }
+
+ public static V128 Sha1SchedulePart1(V128 w0_3, V128 w4_7, V128 w8_11)
+ {
+ ulong t2 = w4_7.Extract<ulong>(0);
+ ulong t1 = w0_3.Extract<ulong>(1);
+
+ V128 result = new V128(t1, t2);
+
+ return result ^ (w0_3 ^ w8_11);
+ }
+
+ public static V128 Sha1SchedulePart2(V128 tw0_3, V128 w12_15)
+ {
+ V128 t = tw0_3 ^ (w12_15 >> 32);
+
+ uint tE0 = t.Extract<uint>(0);
+ uint tE1 = t.Extract<uint>(1);
+ uint tE2 = t.Extract<uint>(2);
+ uint tE3 = t.Extract<uint>(3);
+
+ return new V128(tE0.Rol(1), tE1.Rol(1), tE2.Rol(1), tE3.Rol(1) ^ tE0.Rol(2));
+ }
+
+ private static void Rol32_160(ref uint y, ref V128 x)
+ {
+ uint xE3 = x.Extract<uint>(3);
+
+ x <<= 32;
+ x.Insert(0, y);
+
+ y = xE3;
+ }
+
+ private static uint ShaChoose(uint x, uint y, uint z)
+ {
+ return ((y ^ z) & x) ^ z;
+ }
+
+ private static uint ShaMajority(uint x, uint y, uint z)
+ {
+ return (x & y) | ((x | y) & z);
+ }
+
+ private static uint ShaParity(uint x, uint y, uint z)
+ {
+ return x ^ y ^ z;
+ }
+
+ private static uint Rol(this uint value, int count)
+ {
+ return (value << count) | (value >> (32 - count));
+ }
+#endregion
+
+#region "Sha256"
+ public static V128 HashLower(V128 hash_abcd, V128 hash_efgh, V128 wk)
+ {
+ return Sha256Hash(hash_abcd, hash_efgh, wk, part1: true);
+ }
+
+ public static V128 HashUpper(V128 hash_abcd, V128 hash_efgh, V128 wk)
+ {
+ return Sha256Hash(hash_abcd, hash_efgh, wk, part1: false);
+ }
+
+ public static V128 Sha256SchedulePart1(V128 w0_3, V128 w4_7)
+ {
+ V128 result = new V128();
+
+ for (int e = 0; e <= 3; e++)
+ {
+ uint elt = (e <= 2 ? w0_3 : w4_7).Extract<uint>(e <= 2 ? e + 1 : 0);
+
+ elt = elt.Ror(7) ^ elt.Ror(18) ^ elt.Lsr(3);
+
+ elt += w0_3.Extract<uint>(e);
+
+ result.Insert(e, elt);
+ }
+
+ return result;
+ }
+
+ public static V128 Sha256SchedulePart2(V128 w0_3, V128 w8_11, V128 w12_15)
+ {
+ V128 result = new V128();
+
+ ulong t1 = w12_15.Extract<ulong>(1);
+
+ for (int e = 0; e <= 1; e++)
+ {
+ uint elt = t1.ULongPart(e);
+
+ elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
+
+ elt += w0_3.Extract<uint>(e) + w8_11.Extract<uint>(e + 1);
+
+ result.Insert(e, elt);
+ }
+
+ t1 = result.Extract<ulong>(0);
+
+ for (int e = 2; e <= 3; e++)
+ {
+ uint elt = t1.ULongPart(e - 2);
+
+ elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
+
+ elt += w0_3.Extract<uint>(e) + (e == 2 ? w8_11 : w12_15).Extract<uint>(e == 2 ? 3 : 0);
+
+ result.Insert(e, elt);
+ }
+
+ return result;
+ }
+
+ private static V128 Sha256Hash(V128 x, V128 y, V128 w, bool part1)
+ {
+ for (int e = 0; e <= 3; e++)
+ {
+ uint chs = ShaChoose(y.Extract<uint>(0),
+ y.Extract<uint>(1),
+ y.Extract<uint>(2));
+
+ uint maj = ShaMajority(x.Extract<uint>(0),
+ x.Extract<uint>(1),
+ x.Extract<uint>(2));
+
+ uint t1 = y.Extract<uint>(3) + ShaHashSigma1(y.Extract<uint>(0)) + chs + w.Extract<uint>(e);
+
+ uint t2 = t1 + x.Extract<uint>(3);
+
+ x.Insert(3, t2);
+
+ t2 = t1 + ShaHashSigma0(x.Extract<uint>(0)) + maj;
+
+ y.Insert(3, t2);
+
+ Rol32_256(ref y, ref x);
+ }
+
+ return part1 ? x : y;
+ }
+
+ private static void Rol32_256(ref V128 y, ref V128 x)
+ {
+ uint yE3 = y.Extract<uint>(3);
+ uint xE3 = x.Extract<uint>(3);
+
+ y <<= 32;
+ x <<= 32;
+
+ y.Insert(0, xE3);
+ x.Insert(0, yE3);
+ }
+
+ private static uint ShaHashSigma0(uint x)
+ {
+ return x.Ror(2) ^ x.Ror(13) ^ x.Ror(22);
+ }
+
+ private static uint ShaHashSigma1(uint x)
+ {
+ return x.Ror(6) ^ x.Ror(11) ^ x.Ror(25);
+ }
+
+ private static uint Ror(this uint value, int count)
+ {
+ return (value >> count) | (value << (32 - count));
+ }
+
+ private static uint Lsr(this uint value, int count)
+ {
+ return value >> count;
+ }
+
+ private static uint ULongPart(this ulong value, int part)
+ {
+ return part == 0
+ ? (uint)(value & 0xFFFFFFFFUL)
+ : (uint)(value >> 32);
+ }
+#endregion
+
+ public static V128 PolynomialMult64_128(ulong op1, ulong op2)
+ {
+ V128 result = V128.Zero;
+
+ V128 op2_128 = new V128(op2, 0);
+
+ for (int i = 0; i < 64; i++)
+ {
+ if (((op1 >> i) & 1) == 1)
+ {
+ result ^= op2_128 << i;
+ }
+ }
+
+ return result;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/SoftFloat.cs b/src/ARMeilleure/Instructions/SoftFloat.cs
new file mode 100644
index 00000000..9e3db68d
--- /dev/null
+++ b/src/ARMeilleure/Instructions/SoftFloat.cs
@@ -0,0 +1,3480 @@
+using ARMeilleure.State;
+using System;
+using System.Diagnostics;
+
+namespace ARMeilleure.Instructions
+{
+ static class SoftFloat
+ {
+ static SoftFloat()
+ {
+ RecipEstimateTable = BuildRecipEstimateTable();
+ RecipSqrtEstimateTable = BuildRecipSqrtEstimateTable();
+ }
+
+ public static readonly byte[] RecipEstimateTable;
+ public static readonly byte[] RecipSqrtEstimateTable;
+
+ private static byte[] BuildRecipEstimateTable()
+ {
+ byte[] tbl = new byte[256];
+
+ for (int idx = 0; idx < 256; idx++)
+ {
+ uint src = (uint)idx + 256u;
+
+ Debug.Assert(256u <= src && src < 512u);
+
+ src = (src << 1) + 1u;
+
+ uint aux = (1u << 19) / src;
+
+ uint dst = (aux + 1u) >> 1;
+
+ Debug.Assert(256u <= dst && dst < 512u);
+
+ tbl[idx] = (byte)(dst - 256u);
+ }
+
+ return tbl;
+ }
+
+ private static byte[] BuildRecipSqrtEstimateTable()
+ {
+ byte[] tbl = new byte[384];
+
+ for (int idx = 0; idx < 384; idx++)
+ {
+ uint src = (uint)idx + 128u;
+
+ Debug.Assert(128u <= src && src < 512u);
+
+ if (src < 256u)
+ {
+ src = (src << 1) + 1u;
+ }
+ else
+ {
+ src = (src >> 1) << 1;
+ src = (src + 1u) << 1;
+ }
+
+ uint aux = 512u;
+
+ while (src * (aux + 1u) * (aux + 1u) < (1u << 28))
+ {
+ aux = aux + 1u;
+ }
+
+ uint dst = (aux + 1u) >> 1;
+
+ Debug.Assert(256u <= dst && dst < 512u);
+
+ tbl[idx] = (byte)(dst - 256u);
+ }
+
+ return tbl;
+ }
+
+ public static void FPProcessException(FPException exc, ExecutionContext context)
+ {
+ FPProcessException(exc, context, context.Fpcr);
+ }
+
+ public static void FPProcessException(FPException exc, ExecutionContext context, FPCR fpcr)
+ {
+ int enable = (int)exc + 8;
+
+ if ((fpcr & (FPCR)(1 << enable)) != 0)
+ {
+ throw new NotImplementedException("Floating-point trap handling.");
+ }
+ else
+ {
+ context.Fpsr |= (FPSR)(1 << (int)exc);
+ }
+ }
+
+ public static FPRoundingMode GetRoundingMode(this FPCR fpcr)
+ {
+ const int RModeShift = 22;
+
+ return (FPRoundingMode)(((uint)fpcr >> RModeShift) & 3u);
+ }
+ }
+
+ static class SoftFloat16
+ {
+ public static ushort FPDefaultNaN()
+ {
+ return (ushort)0x7E00u;
+ }
+
+ public static ushort FPInfinity(bool sign)
+ {
+ return sign ? (ushort)0xFC00u : (ushort)0x7C00u;
+ }
+
+ public static ushort FPZero(bool sign)
+ {
+ return sign ? (ushort)0x8000u : (ushort)0x0000u;
+ }
+
+ public static ushort FPMaxNormal(bool sign)
+ {
+ return sign ? (ushort)0xFBFFu : (ushort)0x7BFFu;
+ }
+
+ public static double FPUnpackCv(
+ this ushort valueBits,
+ out FPType type,
+ out bool sign,
+ ExecutionContext context)
+ {
+ sign = (~(uint)valueBits & 0x8000u) == 0u;
+
+ uint exp16 = ((uint)valueBits & 0x7C00u) >> 10;
+ uint frac16 = (uint)valueBits & 0x03FFu;
+
+ double real;
+
+ if (exp16 == 0u)
+ {
+ if (frac16 == 0u)
+ {
+ type = FPType.Zero;
+ real = 0d;
+ }
+ else
+ {
+ type = FPType.Nonzero; // Subnormal.
+ real = Math.Pow(2d, -14) * ((double)frac16 * Math.Pow(2d, -10));
+ }
+ }
+ else if (exp16 == 0x1Fu && (context.Fpcr & FPCR.Ahp) == 0)
+ {
+ if (frac16 == 0u)
+ {
+ type = FPType.Infinity;
+ real = Math.Pow(2d, 1000);
+ }
+ else
+ {
+ type = (~frac16 & 0x0200u) == 0u ? FPType.QNaN : FPType.SNaN;
+ real = 0d;
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero; // Normal.
+ real = Math.Pow(2d, (int)exp16 - 15) * (1d + (double)frac16 * Math.Pow(2d, -10));
+ }
+
+ return sign ? -real : real;
+ }
+
+ public static ushort FPRoundCv(double real, ExecutionContext context)
+ {
+ const int minimumExp = -14;
+
+ const int e = 5;
+ const int f = 10;
+
+ bool sign;
+ double mantissa;
+
+ if (real < 0d)
+ {
+ sign = true;
+ mantissa = -real;
+ }
+ else
+ {
+ sign = false;
+ mantissa = real;
+ }
+
+ int exponent = 0;
+
+ while (mantissa < 1d)
+ {
+ mantissa *= 2d;
+ exponent--;
+ }
+
+ while (mantissa >= 2d)
+ {
+ mantissa /= 2d;
+ exponent++;
+ }
+
+ uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0);
+
+ if (biasedExp == 0u)
+ {
+ mantissa /= Math.Pow(2d, minimumExp - exponent);
+ }
+
+ uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f));
+ double error = mantissa * Math.Pow(2d, f) - (double)intMant;
+
+ if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0))
+ {
+ SoftFloat.FPProcessException(FPException.Underflow, context);
+ }
+
+ bool overflowToInf;
+ bool roundUp;
+
+ switch (context.Fpcr.GetRoundingMode())
+ {
+ default:
+ case FPRoundingMode.ToNearest:
+ roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u));
+ overflowToInf = true;
+ break;
+
+ case FPRoundingMode.TowardsPlusInfinity:
+ roundUp = (error != 0d && !sign);
+ overflowToInf = !sign;
+ break;
+
+ case FPRoundingMode.TowardsMinusInfinity:
+ roundUp = (error != 0d && sign);
+ overflowToInf = sign;
+ break;
+
+ case FPRoundingMode.TowardsZero:
+ roundUp = false;
+ overflowToInf = false;
+ break;
+ }
+
+ if (roundUp)
+ {
+ intMant++;
+
+ if (intMant == 1u << f)
+ {
+ biasedExp = 1u;
+ }
+
+ if (intMant == 1u << (f + 1))
+ {
+ biasedExp++;
+ intMant >>= 1;
+ }
+ }
+
+ ushort resultBits;
+
+ if ((context.Fpcr & FPCR.Ahp) == 0)
+ {
+ if (biasedExp >= (1u << e) - 1u)
+ {
+ resultBits = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+ SoftFloat.FPProcessException(FPException.Overflow, context);
+
+ error = 1d;
+ }
+ else
+ {
+ resultBits = (ushort)((sign ? 1u : 0u) << 15 | (biasedExp & 0x1Fu) << 10 | (intMant & 0x03FFu));
+ }
+ }
+ else
+ {
+ if (biasedExp >= 1u << e)
+ {
+ resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu);
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context);
+
+ error = 0d;
+ }
+ else
+ {
+ resultBits = (ushort)((sign ? 1u : 0u) << 15 | (biasedExp & 0x1Fu) << 10 | (intMant & 0x03FFu));
+ }
+ }
+
+ if (error != 0d)
+ {
+ SoftFloat.FPProcessException(FPException.Inexact, context);
+ }
+
+ return resultBits;
+ }
+ }
+
+ static class SoftFloat16_32
+ {
+ public static float FPConvert(ushort valueBits)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ double real = valueBits.FPUnpackCv(out FPType type, out bool sign, context);
+
+ float result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ if ((context.Fpcr & FPCR.Dn) != 0)
+ {
+ result = SoftFloat32.FPDefaultNaN();
+ }
+ else
+ {
+ result = FPConvertNaN(valueBits);
+ }
+
+ if (type == FPType.SNaN)
+ {
+ SoftFloat.FPProcessException(FPException.InvalidOp, context);
+ }
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = SoftFloat32.FPInfinity(sign);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = SoftFloat32.FPZero(sign);
+ }
+ else
+ {
+ result = FPRoundCv(real, context);
+ }
+
+ return result;
+ }
+
+ private static float FPRoundCv(double real, ExecutionContext context)
+ {
+ const int minimumExp = -126;
+
+ const int e = 8;
+ const int f = 23;
+
+ bool sign;
+ double mantissa;
+
+ if (real < 0d)
+ {
+ sign = true;
+ mantissa = -real;
+ }
+ else
+ {
+ sign = false;
+ mantissa = real;
+ }
+
+ int exponent = 0;
+
+ while (mantissa < 1d)
+ {
+ mantissa *= 2d;
+ exponent--;
+ }
+
+ while (mantissa >= 2d)
+ {
+ mantissa /= 2d;
+ exponent++;
+ }
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && exponent < minimumExp)
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ return SoftFloat32.FPZero(sign);
+ }
+
+ uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0);
+
+ if (biasedExp == 0u)
+ {
+ mantissa /= Math.Pow(2d, minimumExp - exponent);
+ }
+
+ uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f));
+ double error = mantissa * Math.Pow(2d, f) - (double)intMant;
+
+ if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0))
+ {
+ SoftFloat.FPProcessException(FPException.Underflow, context);
+ }
+
+ bool overflowToInf;
+ bool roundUp;
+
+ switch (context.Fpcr.GetRoundingMode())
+ {
+ default:
+ case FPRoundingMode.ToNearest:
+ roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u));
+ overflowToInf = true;
+ break;
+
+ case FPRoundingMode.TowardsPlusInfinity:
+ roundUp = (error != 0d && !sign);
+ overflowToInf = !sign;
+ break;
+
+ case FPRoundingMode.TowardsMinusInfinity:
+ roundUp = (error != 0d && sign);
+ overflowToInf = sign;
+ break;
+
+ case FPRoundingMode.TowardsZero:
+ roundUp = false;
+ overflowToInf = false;
+ break;
+ }
+
+ if (roundUp)
+ {
+ intMant++;
+
+ if (intMant == 1u << f)
+ {
+ biasedExp = 1u;
+ }
+
+ if (intMant == 1u << (f + 1))
+ {
+ biasedExp++;
+ intMant >>= 1;
+ }
+ }
+
+ float result;
+
+ if (biasedExp >= (1u << e) - 1u)
+ {
+ result = overflowToInf ? SoftFloat32.FPInfinity(sign) : SoftFloat32.FPMaxNormal(sign);
+
+ SoftFloat.FPProcessException(FPException.Overflow, context);
+
+ error = 1d;
+ }
+ else
+ {
+ result = BitConverter.Int32BitsToSingle(
+ (int)((sign ? 1u : 0u) << 31 | (biasedExp & 0xFFu) << 23 | (intMant & 0x007FFFFFu)));
+ }
+
+ if (error != 0d)
+ {
+ SoftFloat.FPProcessException(FPException.Inexact, context);
+ }
+
+ return result;
+ }
+
+ private static float FPConvertNaN(ushort valueBits)
+ {
+ return BitConverter.Int32BitsToSingle(
+ (int)(((uint)valueBits & 0x8000u) << 16 | 0x7FC00000u | ((uint)valueBits & 0x01FFu) << 13));
+ }
+ }
+
+ static class SoftFloat16_64
+ {
+ public static double FPConvert(ushort valueBits)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ double real = valueBits.FPUnpackCv(out FPType type, out bool sign, context);
+
+ double result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ if ((context.Fpcr & FPCR.Dn) != 0)
+ {
+ result = SoftFloat64.FPDefaultNaN();
+ }
+ else
+ {
+ result = FPConvertNaN(valueBits);
+ }
+
+ if (type == FPType.SNaN)
+ {
+ SoftFloat.FPProcessException(FPException.InvalidOp, context);
+ }
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = SoftFloat64.FPInfinity(sign);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = SoftFloat64.FPZero(sign);
+ }
+ else
+ {
+ result = FPRoundCv(real, context);
+ }
+
+ return result;
+ }
+
+ private static double FPRoundCv(double real, ExecutionContext context)
+ {
+ const int minimumExp = -1022;
+
+ const int e = 11;
+ const int f = 52;
+
+ bool sign;
+ double mantissa;
+
+ if (real < 0d)
+ {
+ sign = true;
+ mantissa = -real;
+ }
+ else
+ {
+ sign = false;
+ mantissa = real;
+ }
+
+ int exponent = 0;
+
+ while (mantissa < 1d)
+ {
+ mantissa *= 2d;
+ exponent--;
+ }
+
+ while (mantissa >= 2d)
+ {
+ mantissa /= 2d;
+ exponent++;
+ }
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && exponent < minimumExp)
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ return SoftFloat64.FPZero(sign);
+ }
+
+ uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0);
+
+ if (biasedExp == 0u)
+ {
+ mantissa /= Math.Pow(2d, minimumExp - exponent);
+ }
+
+ ulong intMant = (ulong)Math.Floor(mantissa * Math.Pow(2d, f));
+ double error = mantissa * Math.Pow(2d, f) - (double)intMant;
+
+ if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0))
+ {
+ SoftFloat.FPProcessException(FPException.Underflow, context);
+ }
+
+ bool overflowToInf;
+ bool roundUp;
+
+ switch (context.Fpcr.GetRoundingMode())
+ {
+ default:
+ case FPRoundingMode.ToNearest:
+ roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u));
+ overflowToInf = true;
+ break;
+
+ case FPRoundingMode.TowardsPlusInfinity:
+ roundUp = (error != 0d && !sign);
+ overflowToInf = !sign;
+ break;
+
+ case FPRoundingMode.TowardsMinusInfinity:
+ roundUp = (error != 0d && sign);
+ overflowToInf = sign;
+ break;
+
+ case FPRoundingMode.TowardsZero:
+ roundUp = false;
+ overflowToInf = false;
+ break;
+ }
+
+ if (roundUp)
+ {
+ intMant++;
+
+ if (intMant == 1ul << f)
+ {
+ biasedExp = 1u;
+ }
+
+ if (intMant == 1ul << (f + 1))
+ {
+ biasedExp++;
+ intMant >>= 1;
+ }
+ }
+
+ double result;
+
+ if (biasedExp >= (1u << e) - 1u)
+ {
+ result = overflowToInf ? SoftFloat64.FPInfinity(sign) : SoftFloat64.FPMaxNormal(sign);
+
+ SoftFloat.FPProcessException(FPException.Overflow, context);
+
+ error = 1d;
+ }
+ else
+ {
+ result = BitConverter.Int64BitsToDouble(
+ (long)((sign ? 1ul : 0ul) << 63 | (biasedExp & 0x7FFul) << 52 | (intMant & 0x000FFFFFFFFFFFFFul)));
+ }
+
+ if (error != 0d)
+ {
+ SoftFloat.FPProcessException(FPException.Inexact, context);
+ }
+
+ return result;
+ }
+
+ private static double FPConvertNaN(ushort valueBits)
+ {
+ return BitConverter.Int64BitsToDouble(
+ (long)(((ulong)valueBits & 0x8000ul) << 48 | 0x7FF8000000000000ul | ((ulong)valueBits & 0x01FFul) << 42));
+ }
+ }
+
+ static class SoftFloat32_16
+ {
+ public static ushort FPConvert(float value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ double real = value.FPUnpackCv(out FPType type, out bool sign, out uint valueBits, context);
+
+ bool altHp = (context.Fpcr & FPCR.Ahp) != 0;
+
+ ushort resultBits;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ if (altHp)
+ {
+ resultBits = SoftFloat16.FPZero(sign);
+ }
+ else if ((context.Fpcr & FPCR.Dn) != 0)
+ {
+ resultBits = SoftFloat16.FPDefaultNaN();
+ }
+ else
+ {
+ resultBits = FPConvertNaN(valueBits);
+ }
+
+ if (type == FPType.SNaN || altHp)
+ {
+ SoftFloat.FPProcessException(FPException.InvalidOp, context);
+ }
+ }
+ else if (type == FPType.Infinity)
+ {
+ if (altHp)
+ {
+ resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu);
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context);
+ }
+ else
+ {
+ resultBits = SoftFloat16.FPInfinity(sign);
+ }
+ }
+ else if (type == FPType.Zero)
+ {
+ resultBits = SoftFloat16.FPZero(sign);
+ }
+ else
+ {
+ resultBits = SoftFloat16.FPRoundCv(real, context);
+ }
+
+ return resultBits;
+ }
+
+ private static double FPUnpackCv(
+ this float value,
+ out FPType type,
+ out bool sign,
+ out uint valueBits,
+ ExecutionContext context)
+ {
+ valueBits = (uint)BitConverter.SingleToInt32Bits(value);
+
+ sign = (~valueBits & 0x80000000u) == 0u;
+
+ uint exp32 = (valueBits & 0x7F800000u) >> 23;
+ uint frac32 = valueBits & 0x007FFFFFu;
+
+ double real;
+
+ if (exp32 == 0u)
+ {
+ if (frac32 == 0u || (context.Fpcr & FPCR.Fz) != 0)
+ {
+ type = FPType.Zero;
+ real = 0d;
+
+ if (frac32 != 0u)
+ {
+ SoftFloat.FPProcessException(FPException.InputDenorm, context);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero; // Subnormal.
+ real = Math.Pow(2d, -126) * ((double)frac32 * Math.Pow(2d, -23));
+ }
+ }
+ else if (exp32 == 0xFFu)
+ {
+ if (frac32 == 0u)
+ {
+ type = FPType.Infinity;
+ real = Math.Pow(2d, 1000);
+ }
+ else
+ {
+ type = (~frac32 & 0x00400000u) == 0u ? FPType.QNaN : FPType.SNaN;
+ real = 0d;
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero; // Normal.
+ real = Math.Pow(2d, (int)exp32 - 127) * (1d + (double)frac32 * Math.Pow(2d, -23));
+ }
+
+ return sign ? -real : real;
+ }
+
+ private static ushort FPConvertNaN(uint valueBits)
+ {
+ return (ushort)((valueBits & 0x80000000u) >> 16 | 0x7E00u | (valueBits & 0x003FE000u) >> 13);
+ }
+ }
+
+ static class SoftFloat32
+ {
+ public static float FPAdd(float value1, float value2)
+ {
+ return FPAddFpscr(value1, value2, false);
+ }
+
+ public static float FPAddFpscr(float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if (inf1 && inf2 && sign1 == !sign2)
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if ((inf1 && !sign1) || (inf2 && !sign2))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((inf1 && sign1) || (inf2 && sign2))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zero1 && zero2 && sign1 == sign2)
+ {
+ result = FPZero(sign1);
+ }
+ else
+ {
+ result = value1 + value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static int FPCompare(float value1, float value2, bool signalNaNs)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context, fpcr);
+
+ int result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = 0b0011;
+
+ if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs)
+ {
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ }
+ else
+ {
+ if (value1 == value2)
+ {
+ result = 0b0110;
+ }
+ else if (value1 < value2)
+ {
+ result = 0b1000;
+ }
+ else
+ {
+ result = 0b0010;
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPCompareEQ(float value1, float value2)
+ {
+ return FPCompareEQFpscr(value1, value2, false);
+ }
+
+ public static float FPCompareEQFpscr(float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr);
+
+ float result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ if (type1 == FPType.SNaN || type2 == FPType.SNaN)
+ {
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 == value2);
+ }
+
+ return result;
+ }
+
+ public static float FPCompareGE(float value1, float value2)
+ {
+ return FPCompareGEFpscr(value1, value2, false);
+ }
+
+ public static float FPCompareGEFpscr(float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr);
+
+ float result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 >= value2);
+ }
+
+ return result;
+ }
+
+ public static float FPCompareGT(float value1, float value2)
+ {
+ return FPCompareGTFpscr(value1, value2, false);
+ }
+
+ public static float FPCompareGTFpscr(float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr);
+
+ float result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 > value2);
+ }
+
+ return result;
+ }
+
+ public static float FPCompareLE(float value1, float value2)
+ {
+ return FPCompareGE(value2, value1);
+ }
+
+ public static float FPCompareLT(float value1, float value2)
+ {
+ return FPCompareGT(value2, value1);
+ }
+
+ public static float FPCompareLEFpscr(float value1, float value2, bool standardFpscr)
+ {
+ return FPCompareGEFpscr(value2, value1, standardFpscr);
+ }
+
+ public static float FPCompareLTFpscr(float value1, float value2, bool standardFpscr)
+ {
+ return FPCompareGTFpscr(value2, value1, standardFpscr);
+ }
+
+ public static float FPDiv(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && inf2) || (zero1 && zero2))
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if (inf1 || zero2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+
+ if (!inf1)
+ {
+ SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr);
+ }
+ }
+ else if (zero1 || inf2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 / value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPMax(float value1, float value2)
+ {
+ return FPMaxFpscr(value1, value2, false);
+ }
+
+ public static float FPMaxFpscr(float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ if (value1 > value2)
+ {
+ if (type1 == FPType.Infinity)
+ {
+ result = FPInfinity(sign1);
+ }
+ else if (type1 == FPType.Zero)
+ {
+ result = FPZero(sign1 && sign2);
+ }
+ else
+ {
+ result = value1;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+ else
+ {
+ if (type2 == FPType.Infinity)
+ {
+ result = FPInfinity(sign2);
+ }
+ else if (type2 == FPType.Zero)
+ {
+ result = FPZero(sign1 && sign2);
+ }
+ else
+ {
+ result = value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPMaxNum(float value1, float value2)
+ {
+ return FPMaxNumFpscr(value1, value2, false);
+ }
+
+ public static float FPMaxNumFpscr(float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1.FPUnpack(out FPType type1, out _, out _, context, fpcr);
+ value2.FPUnpack(out FPType type2, out _, out _, context, fpcr);
+
+ if (type1 == FPType.QNaN && type2 != FPType.QNaN)
+ {
+ value1 = FPInfinity(true);
+ }
+ else if (type1 != FPType.QNaN && type2 == FPType.QNaN)
+ {
+ value2 = FPInfinity(true);
+ }
+
+ return FPMaxFpscr(value1, value2, standardFpscr);
+ }
+
+ public static float FPMin(float value1, float value2)
+ {
+ return FPMinFpscr(value1, value2, false);
+ }
+
+ public static float FPMinFpscr(float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ if (value1 < value2)
+ {
+ if (type1 == FPType.Infinity)
+ {
+ result = FPInfinity(sign1);
+ }
+ else if (type1 == FPType.Zero)
+ {
+ result = FPZero(sign1 || sign2);
+ }
+ else
+ {
+ result = value1;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+ else
+ {
+ if (type2 == FPType.Infinity)
+ {
+ result = FPInfinity(sign2);
+ }
+ else if (type2 == FPType.Zero)
+ {
+ result = FPZero(sign1 || sign2);
+ }
+ else
+ {
+ result = value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPMinNum(float value1, float value2)
+ {
+ return FPMinNumFpscr(value1, value2, false);
+ }
+
+ public static float FPMinNumFpscr(float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1.FPUnpack(out FPType type1, out _, out _, context, fpcr);
+ value2.FPUnpack(out FPType type2, out _, out _, context, fpcr);
+
+ if (type1 == FPType.QNaN && type2 != FPType.QNaN)
+ {
+ value1 = FPInfinity(false);
+ }
+ else if (type1 != FPType.QNaN && type2 == FPType.QNaN)
+ {
+ value2 = FPInfinity(false);
+ }
+
+ return FPMinFpscr(value1, value2, standardFpscr);
+ }
+
+ public static float FPMul(float value1, float value2)
+ {
+ return FPMulFpscr(value1, value2, false);
+ }
+
+ public static float FPMulFpscr(float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else if (zero1 || zero2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 * value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPMulAdd(float valueA, float value1, float value2)
+ {
+ return FPMulAddFpscr(valueA, value1, value2, false);
+ }
+
+ public static float FPMulAddFpscr(float valueA, float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out uint addend, context, fpcr);
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ float result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context, fpcr);
+
+ if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2)))
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+
+ if (!done)
+ {
+ bool infA = typeA == FPType.Infinity; bool zeroA = typeA == FPType.Zero;
+
+ bool signP = sign1 ^ sign2;
+ bool infP = inf1 || inf2;
+ bool zeroP = zero1 || zero2;
+
+ if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP))
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if ((infA && !signA) || (infP && !signP))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((infA && signA) || (infP && signP))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zeroA && zeroP && signA == signP)
+ {
+ result = FPZero(signA);
+ }
+ else
+ {
+ result = MathF.FusedMultiplyAdd(value1, value2, valueA);
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPMulSub(float valueA, float value1, float value2)
+ {
+ value1 = value1.FPNeg();
+
+ return FPMulAdd(valueA, value1, value2);
+ }
+
+ public static float FPMulSubFpscr(float valueA, float value1, float value2, bool standardFpscr)
+ {
+ value1 = value1.FPNeg();
+
+ return FPMulAddFpscr(valueA, value1, value2, standardFpscr);
+ }
+
+ public static float FPMulX(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPTwo(sign1 ^ sign2);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else if (zero1 || zero2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 * value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPNegMulAdd(float valueA, float value1, float value2)
+ {
+ valueA = valueA.FPNeg();
+ value1 = value1.FPNeg();
+
+ return FPMulAdd(valueA, value1, value2);
+ }
+
+ public static float FPNegMulSub(float valueA, float value1, float value2)
+ {
+ valueA = valueA.FPNeg();
+
+ return FPMulAdd(valueA, value1, value2);
+ }
+
+ public static float FPRecipEstimate(float value)
+ {
+ return FPRecipEstimateFpscr(value, false);
+ }
+
+ public static float FPRecipEstimateFpscr(float value, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr);
+
+ float result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context, fpcr);
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = FPZero(sign);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPInfinity(sign);
+
+ SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr);
+ }
+ else if (MathF.Abs(value) < MathF.Pow(2f, -128))
+ {
+ bool overflowToInf;
+
+ switch (fpcr.GetRoundingMode())
+ {
+ default:
+ case FPRoundingMode.ToNearest: overflowToInf = true; break;
+ case FPRoundingMode.TowardsPlusInfinity: overflowToInf = !sign; break;
+ case FPRoundingMode.TowardsMinusInfinity: overflowToInf = sign; break;
+ case FPRoundingMode.TowardsZero: overflowToInf = false; break;
+ }
+
+ result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+ SoftFloat.FPProcessException(FPException.Overflow, context, fpcr);
+ SoftFloat.FPProcessException(FPException.Inexact, context, fpcr);
+ }
+ else if ((fpcr & FPCR.Fz) != 0 && (MathF.Abs(value) >= MathF.Pow(2f, 126)))
+ {
+ result = FPZero(sign);
+
+ context.Fpsr |= FPSR.Ufc;
+ }
+ else
+ {
+ ulong fraction = (ulong)(op & 0x007FFFFFu) << 29;
+ uint exp = (op & 0x7F800000u) >> 23;
+
+ if (exp == 0u)
+ {
+ if ((fraction & 0x0008000000000000ul) == 0ul)
+ {
+ fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2;
+ exp -= 1u;
+ }
+ else
+ {
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ }
+ }
+
+ uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+
+ uint resultExp = 253u - exp;
+
+ uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u;
+
+ fraction = (ulong)(estimate & 0xFFu) << 44;
+
+ if (resultExp == 0u)
+ {
+ fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1;
+ }
+ else if (resultExp + 1u == 0u)
+ {
+ fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2;
+ resultExp = 0u;
+ }
+
+ result = BitConverter.Int32BitsToSingle(
+ (int)((sign ? 1u : 0u) << 31 | (resultExp & 0xFFu) << 23 | (uint)(fraction >> 29) & 0x007FFFFFu));
+ }
+
+ return result;
+ }
+
+ public static float FPRecipStep(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.StandardFpcrValue;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ float product;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ product = FPZero(false);
+ }
+ else
+ {
+ product = FPMulFpscr(value1, value2, true);
+ }
+
+ result = FPSubFpscr(FPTwo(false), product, true);
+ }
+
+ return result;
+ }
+
+ public static float FPRecipStepFused(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value1 = value1.FPNeg();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPTwo(false);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else
+ {
+ result = MathF.FusedMultiplyAdd(value1, value2, 2f);
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPRecpX(float value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr);
+
+ float result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context, fpcr);
+ }
+ else
+ {
+ uint notExp = (~op >> 23) & 0xFFu;
+ uint maxExp = 0xFEu;
+
+ result = BitConverter.Int32BitsToSingle(
+ (int)((sign ? 1u : 0u) << 31 | (notExp == 0xFFu ? maxExp : notExp) << 23));
+ }
+
+ return result;
+ }
+
+ public static float FPRSqrtEstimate(float value)
+ {
+ return FPRSqrtEstimateFpscr(value, false);
+ }
+
+ public static float FPRSqrtEstimateFpscr(float value, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr);
+
+ float result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context, fpcr);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPInfinity(sign);
+
+ SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr);
+ }
+ else if (sign)
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = FPZero(false);
+ }
+ else
+ {
+ ulong fraction = (ulong)(op & 0x007FFFFFu) << 29;
+ uint exp = (op & 0x7F800000u) >> 23;
+
+ if (exp == 0u)
+ {
+ while ((fraction & 0x0008000000000000ul) == 0ul)
+ {
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ exp -= 1u;
+ }
+
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ }
+
+ uint scaled;
+
+ if ((exp & 1u) == 0u)
+ {
+ scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+ }
+ else
+ {
+ scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45);
+ }
+
+ uint resultExp = (380u - exp) >> 1;
+
+ uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u;
+
+ result = BitConverter.Int32BitsToSingle((int)((resultExp & 0xFFu) << 23 | (estimate & 0xFFu) << 15));
+ }
+
+ return result;
+ }
+
+ public static float FPHalvedSub(float value1, float value2, ExecutionContext context, FPCR fpcr)
+ {
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if (inf1 && inf2 && sign1 == sign2)
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if ((inf1 && !sign1) || (inf2 && sign2))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((inf1 && sign1) || (inf2 && !sign2))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zero1 && zero2 && sign1 == !sign2)
+ {
+ result = FPZero(sign1);
+ }
+ else
+ {
+ result = (value1 - value2) / 2.0f;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPRSqrtStep(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.StandardFpcrValue;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ float product;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ product = FPZero(false);
+ }
+ else
+ {
+ product = FPMulFpscr(value1, value2, true);
+ }
+
+ result = FPHalvedSub(FPThree(false), product, context, fpcr);
+ }
+
+ return result;
+ }
+
+ public static float FPRSqrtStepFused(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value1 = value1.FPNeg();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPOnePointFive(false);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else
+ {
+ result = MathF.FusedMultiplyAdd(value1, value2, 3f) / 2f;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPSqrt(float value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value = value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr);
+
+ float result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context, fpcr);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPZero(sign);
+ }
+ else if (type == FPType.Infinity && !sign)
+ {
+ result = FPInfinity(sign);
+ }
+ else if (sign)
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else
+ {
+ result = MathF.Sqrt(value);
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPSub(float value1, float value2)
+ {
+ return FPSubFpscr(value1, value2, false);
+ }
+
+ public static float FPSubFpscr(float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if (inf1 && inf2 && sign1 == sign2)
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if ((inf1 && !sign1) || (inf2 && sign2))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((inf1 && sign1) || (inf2 && !sign2))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zero1 && zero2 && sign1 == !sign2)
+ {
+ result = FPZero(sign1);
+ }
+ else
+ {
+ result = value1 - value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPDefaultNaN()
+ {
+ return BitConverter.Int32BitsToSingle(0x7fc00000);
+ }
+
+ public static float FPInfinity(bool sign)
+ {
+ return sign ? float.NegativeInfinity : float.PositiveInfinity;
+ }
+
+ public static float FPZero(bool sign)
+ {
+ return sign ? -0f : +0f;
+ }
+
+ public static float FPMaxNormal(bool sign)
+ {
+ return sign ? float.MinValue : float.MaxValue;
+ }
+
+ private static float FPTwo(bool sign)
+ {
+ return sign ? -2f : +2f;
+ }
+
+ private static float FPThree(bool sign)
+ {
+ return sign ? -3f : +3f;
+ }
+
+ private static float FPOnePointFive(bool sign)
+ {
+ return sign ? -1.5f : +1.5f;
+ }
+
+ private static float FPNeg(this float value)
+ {
+ return -value;
+ }
+
+ private static float ZerosOrOnes(bool ones)
+ {
+ return BitConverter.Int32BitsToSingle(ones ? -1 : 0);
+ }
+
+ private static float FPUnpack(
+ this float value,
+ out FPType type,
+ out bool sign,
+ out uint valueBits,
+ ExecutionContext context,
+ FPCR fpcr)
+ {
+ valueBits = (uint)BitConverter.SingleToInt32Bits(value);
+
+ sign = (~valueBits & 0x80000000u) == 0u;
+
+ if ((valueBits & 0x7F800000u) == 0u)
+ {
+ if ((valueBits & 0x007FFFFFu) == 0u || (fpcr & FPCR.Fz) != 0)
+ {
+ type = FPType.Zero;
+ value = FPZero(sign);
+
+ if ((valueBits & 0x007FFFFFu) != 0u)
+ {
+ SoftFloat.FPProcessException(FPException.InputDenorm, context, fpcr);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero;
+ }
+ }
+ else if ((~valueBits & 0x7F800000u) == 0u)
+ {
+ if ((valueBits & 0x007FFFFFu) == 0u)
+ {
+ type = FPType.Infinity;
+ }
+ else
+ {
+ type = (~valueBits & 0x00400000u) == 0u ? FPType.QNaN : FPType.SNaN;
+ value = FPZero(sign);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero;
+ }
+
+ return value;
+ }
+
+ private static float FPProcessNaNs(
+ FPType type1,
+ FPType type2,
+ uint op1,
+ uint op2,
+ out bool done,
+ ExecutionContext context,
+ FPCR fpcr)
+ {
+ done = true;
+
+ if (type1 == FPType.SNaN)
+ {
+ return FPProcessNaN(type1, op1, context, fpcr);
+ }
+ else if (type2 == FPType.SNaN)
+ {
+ return FPProcessNaN(type2, op2, context, fpcr);
+ }
+ else if (type1 == FPType.QNaN)
+ {
+ return FPProcessNaN(type1, op1, context, fpcr);
+ }
+ else if (type2 == FPType.QNaN)
+ {
+ return FPProcessNaN(type2, op2, context, fpcr);
+ }
+
+ done = false;
+
+ return FPZero(false);
+ }
+
+ private static float FPProcessNaNs3(
+ FPType type1,
+ FPType type2,
+ FPType type3,
+ uint op1,
+ uint op2,
+ uint op3,
+ out bool done,
+ ExecutionContext context,
+ FPCR fpcr)
+ {
+ done = true;
+
+ if (type1 == FPType.SNaN)
+ {
+ return FPProcessNaN(type1, op1, context, fpcr);
+ }
+ else if (type2 == FPType.SNaN)
+ {
+ return FPProcessNaN(type2, op2, context, fpcr);
+ }
+ else if (type3 == FPType.SNaN)
+ {
+ return FPProcessNaN(type3, op3, context, fpcr);
+ }
+ else if (type1 == FPType.QNaN)
+ {
+ return FPProcessNaN(type1, op1, context, fpcr);
+ }
+ else if (type2 == FPType.QNaN)
+ {
+ return FPProcessNaN(type2, op2, context, fpcr);
+ }
+ else if (type3 == FPType.QNaN)
+ {
+ return FPProcessNaN(type3, op3, context, fpcr);
+ }
+
+ done = false;
+
+ return FPZero(false);
+ }
+
+ private static float FPProcessNaN(FPType type, uint op, ExecutionContext context, FPCR fpcr)
+ {
+ if (type == FPType.SNaN)
+ {
+ op |= 1u << 22;
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+
+ if ((fpcr & FPCR.Dn) != 0)
+ {
+ return FPDefaultNaN();
+ }
+
+ return BitConverter.Int32BitsToSingle((int)op);
+ }
+ }
+
+ static class SoftFloat64_16
+ {
+ public static ushort FPConvert(double value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ double real = value.FPUnpackCv(out FPType type, out bool sign, out ulong valueBits, context);
+
+ bool altHp = (context.Fpcr & FPCR.Ahp) != 0;
+
+ ushort resultBits;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ if (altHp)
+ {
+ resultBits = SoftFloat16.FPZero(sign);
+ }
+ else if ((context.Fpcr & FPCR.Dn) != 0)
+ {
+ resultBits = SoftFloat16.FPDefaultNaN();
+ }
+ else
+ {
+ resultBits = FPConvertNaN(valueBits);
+ }
+
+ if (type == FPType.SNaN || altHp)
+ {
+ SoftFloat.FPProcessException(FPException.InvalidOp, context);
+ }
+ }
+ else if (type == FPType.Infinity)
+ {
+ if (altHp)
+ {
+ resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu);
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context);
+ }
+ else
+ {
+ resultBits = SoftFloat16.FPInfinity(sign);
+ }
+ }
+ else if (type == FPType.Zero)
+ {
+ resultBits = SoftFloat16.FPZero(sign);
+ }
+ else
+ {
+ resultBits = SoftFloat16.FPRoundCv(real, context);
+ }
+
+ return resultBits;
+ }
+
+ private static double FPUnpackCv(
+ this double value,
+ out FPType type,
+ out bool sign,
+ out ulong valueBits,
+ ExecutionContext context)
+ {
+ valueBits = (ulong)BitConverter.DoubleToInt64Bits(value);
+
+ sign = (~valueBits & 0x8000000000000000ul) == 0u;
+
+ ulong exp64 = (valueBits & 0x7FF0000000000000ul) >> 52;
+ ulong frac64 = valueBits & 0x000FFFFFFFFFFFFFul;
+
+ double real;
+
+ if (exp64 == 0u)
+ {
+ if (frac64 == 0u || (context.Fpcr & FPCR.Fz) != 0)
+ {
+ type = FPType.Zero;
+ real = 0d;
+
+ if (frac64 != 0u)
+ {
+ SoftFloat.FPProcessException(FPException.InputDenorm, context);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero; // Subnormal.
+ real = Math.Pow(2d, -1022) * ((double)frac64 * Math.Pow(2d, -52));
+ }
+ }
+ else if (exp64 == 0x7FFul)
+ {
+ if (frac64 == 0u)
+ {
+ type = FPType.Infinity;
+ real = Math.Pow(2d, 1000000);
+ }
+ else
+ {
+ type = (~frac64 & 0x0008000000000000ul) == 0u ? FPType.QNaN : FPType.SNaN;
+ real = 0d;
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero; // Normal.
+ real = Math.Pow(2d, (int)exp64 - 1023) * (1d + (double)frac64 * Math.Pow(2d, -52));
+ }
+
+ return sign ? -real : real;
+ }
+
+ private static ushort FPConvertNaN(ulong valueBits)
+ {
+ return (ushort)((valueBits & 0x8000000000000000ul) >> 48 | 0x7E00u | (valueBits & 0x0007FC0000000000ul) >> 42);
+ }
+ }
+
+ static class SoftFloat64
+ {
+ public static double FPAdd(double value1, double value2)
+ {
+ return FPAddFpscr(value1, value2, false);
+ }
+
+ public static double FPAddFpscr(double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if (inf1 && inf2 && sign1 == !sign2)
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if ((inf1 && !sign1) || (inf2 && !sign2))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((inf1 && sign1) || (inf2 && sign2))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zero1 && zero2 && sign1 == sign2)
+ {
+ result = FPZero(sign1);
+ }
+ else
+ {
+ result = value1 + value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static int FPCompare(double value1, double value2, bool signalNaNs)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context, fpcr);
+
+ int result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = 0b0011;
+
+ if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs)
+ {
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ }
+ else
+ {
+ if (value1 == value2)
+ {
+ result = 0b0110;
+ }
+ else if (value1 < value2)
+ {
+ result = 0b1000;
+ }
+ else
+ {
+ result = 0b0010;
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPCompareEQ(double value1, double value2)
+ {
+ return FPCompareEQFpscr(value1, value2, false);
+ }
+
+ public static double FPCompareEQFpscr(double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr);
+
+ double result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ if (type1 == FPType.SNaN || type2 == FPType.SNaN)
+ {
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 == value2);
+ }
+
+ return result;
+ }
+
+ public static double FPCompareGE(double value1, double value2)
+ {
+ return FPCompareGEFpscr(value1, value2, false);
+ }
+
+ public static double FPCompareGEFpscr(double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr);
+
+ double result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 >= value2);
+ }
+
+ return result;
+ }
+
+ public static double FPCompareGT(double value1, double value2)
+ {
+ return FPCompareGTFpscr(value1, value2, false);
+ }
+
+ public static double FPCompareGTFpscr(double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr);
+
+ double result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 > value2);
+ }
+
+ return result;
+ }
+
+ public static double FPCompareLE(double value1, double value2)
+ {
+ return FPCompareGE(value2, value1);
+ }
+
+ public static double FPCompareLT(double value1, double value2)
+ {
+ return FPCompareGT(value2, value1);
+ }
+
+ public static double FPCompareLEFpscr(double value1, double value2, bool standardFpscr)
+ {
+ return FPCompareGEFpscr(value2, value1, standardFpscr);
+ }
+
+ public static double FPCompareLTFpscr(double value1, double value2, bool standardFpscr)
+ {
+ return FPCompareGTFpscr(value2, value1, standardFpscr);
+ }
+
+ public static double FPDiv(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && inf2) || (zero1 && zero2))
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if (inf1 || zero2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+
+ if (!inf1)
+ {
+ SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr);
+ }
+ }
+ else if (zero1 || inf2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 / value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPMax(double value1, double value2)
+ {
+ return FPMaxFpscr(value1, value2, false);
+ }
+
+ public static double FPMaxFpscr(double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ if (value1 > value2)
+ {
+ if (type1 == FPType.Infinity)
+ {
+ result = FPInfinity(sign1);
+ }
+ else if (type1 == FPType.Zero)
+ {
+ result = FPZero(sign1 && sign2);
+ }
+ else
+ {
+ result = value1;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+ else
+ {
+ if (type2 == FPType.Infinity)
+ {
+ result = FPInfinity(sign2);
+ }
+ else if (type2 == FPType.Zero)
+ {
+ result = FPZero(sign1 && sign2);
+ }
+ else
+ {
+ result = value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPMaxNum(double value1, double value2)
+ {
+ return FPMaxNumFpscr(value1, value2, false);
+ }
+
+ public static double FPMaxNumFpscr(double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1.FPUnpack(out FPType type1, out _, out _, context, fpcr);
+ value2.FPUnpack(out FPType type2, out _, out _, context, fpcr);
+
+ if (type1 == FPType.QNaN && type2 != FPType.QNaN)
+ {
+ value1 = FPInfinity(true);
+ }
+ else if (type1 != FPType.QNaN && type2 == FPType.QNaN)
+ {
+ value2 = FPInfinity(true);
+ }
+
+ return FPMaxFpscr(value1, value2, standardFpscr);
+ }
+
+ public static double FPMin(double value1, double value2)
+ {
+ return FPMinFpscr(value1, value2, false);
+ }
+
+ public static double FPMinFpscr(double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ if (value1 < value2)
+ {
+ if (type1 == FPType.Infinity)
+ {
+ result = FPInfinity(sign1);
+ }
+ else if (type1 == FPType.Zero)
+ {
+ result = FPZero(sign1 || sign2);
+ }
+ else
+ {
+ result = value1;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+ else
+ {
+ if (type2 == FPType.Infinity)
+ {
+ result = FPInfinity(sign2);
+ }
+ else if (type2 == FPType.Zero)
+ {
+ result = FPZero(sign1 || sign2);
+ }
+ else
+ {
+ result = value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPMinNum(double value1, double value2)
+ {
+ return FPMinNumFpscr(value1, value2, false);
+ }
+
+ public static double FPMinNumFpscr(double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1.FPUnpack(out FPType type1, out _, out _, context, fpcr);
+ value2.FPUnpack(out FPType type2, out _, out _, context, fpcr);
+
+ if (type1 == FPType.QNaN && type2 != FPType.QNaN)
+ {
+ value1 = FPInfinity(false);
+ }
+ else if (type1 != FPType.QNaN && type2 == FPType.QNaN)
+ {
+ value2 = FPInfinity(false);
+ }
+
+ return FPMinFpscr(value1, value2, standardFpscr);
+ }
+
+ public static double FPMul(double value1, double value2)
+ {
+ return FPMulFpscr(value1, value2, false);
+ }
+
+ public static double FPMulFpscr(double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else if (zero1 || zero2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 * value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPMulAdd(double valueA, double value1, double value2)
+ {
+ return FPMulAddFpscr(valueA, value1, value2, false);
+ }
+
+ public static double FPMulAddFpscr(double valueA, double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out ulong addend, context, fpcr);
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ double result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context, fpcr);
+
+ if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2)))
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+
+ if (!done)
+ {
+ bool infA = typeA == FPType.Infinity; bool zeroA = typeA == FPType.Zero;
+
+ bool signP = sign1 ^ sign2;
+ bool infP = inf1 || inf2;
+ bool zeroP = zero1 || zero2;
+
+ if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP))
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if ((infA && !signA) || (infP && !signP))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((infA && signA) || (infP && signP))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zeroA && zeroP && signA == signP)
+ {
+ result = FPZero(signA);
+ }
+ else
+ {
+ result = Math.FusedMultiplyAdd(value1, value2, valueA);
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPMulSub(double valueA, double value1, double value2)
+ {
+ value1 = value1.FPNeg();
+
+ return FPMulAdd(valueA, value1, value2);
+ }
+
+ public static double FPMulSubFpscr(double valueA, double value1, double value2, bool standardFpscr)
+ {
+ value1 = value1.FPNeg();
+
+ return FPMulAddFpscr(valueA, value1, value2, standardFpscr);
+ }
+
+ public static double FPMulX(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPTwo(sign1 ^ sign2);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else if (zero1 || zero2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 * value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPNegMulAdd(double valueA, double value1, double value2)
+ {
+ valueA = valueA.FPNeg();
+ value1 = value1.FPNeg();
+
+ return FPMulAdd(valueA, value1, value2);
+ }
+
+ public static double FPNegMulSub(double valueA, double value1, double value2)
+ {
+ valueA = valueA.FPNeg();
+
+ return FPMulAdd(valueA, value1, value2);
+ }
+
+ public static double FPRecipEstimate(double value)
+ {
+ return FPRecipEstimateFpscr(value, false);
+ }
+
+ public static double FPRecipEstimateFpscr(double value, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr);
+
+ double result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context, fpcr);
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = FPZero(sign);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPInfinity(sign);
+
+ SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr);
+ }
+ else if (Math.Abs(value) < Math.Pow(2d, -1024))
+ {
+ bool overflowToInf;
+
+ switch (fpcr.GetRoundingMode())
+ {
+ default:
+ case FPRoundingMode.ToNearest: overflowToInf = true; break;
+ case FPRoundingMode.TowardsPlusInfinity: overflowToInf = !sign; break;
+ case FPRoundingMode.TowardsMinusInfinity: overflowToInf = sign; break;
+ case FPRoundingMode.TowardsZero: overflowToInf = false; break;
+ }
+
+ result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+ SoftFloat.FPProcessException(FPException.Overflow, context, fpcr);
+ SoftFloat.FPProcessException(FPException.Inexact, context, fpcr);
+ }
+ else if ((fpcr & FPCR.Fz) != 0 && (Math.Abs(value) >= Math.Pow(2d, 1022)))
+ {
+ result = FPZero(sign);
+
+ context.Fpsr |= FPSR.Ufc;
+ }
+ else
+ {
+ ulong fraction = op & 0x000FFFFFFFFFFFFFul;
+ uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52);
+
+ if (exp == 0u)
+ {
+ if ((fraction & 0x0008000000000000ul) == 0ul)
+ {
+ fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2;
+ exp -= 1u;
+ }
+ else
+ {
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ }
+ }
+
+ uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+
+ uint resultExp = 2045u - exp;
+
+ uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u;
+
+ fraction = (ulong)(estimate & 0xFFu) << 44;
+
+ if (resultExp == 0u)
+ {
+ fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1;
+ }
+ else if (resultExp + 1u == 0u)
+ {
+ fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2;
+ resultExp = 0u;
+ }
+
+ result = BitConverter.Int64BitsToDouble(
+ (long)((sign ? 1ul : 0ul) << 63 | (resultExp & 0x7FFul) << 52 | (fraction & 0x000FFFFFFFFFFFFFul)));
+ }
+
+ return result;
+ }
+
+ public static double FPRecipStep(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.StandardFpcrValue;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ double product;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ product = FPZero(false);
+ }
+ else
+ {
+ product = FPMulFpscr(value1, value2, true);
+ }
+
+ result = FPSubFpscr(FPTwo(false), product, true);
+ }
+
+ return result;
+ }
+
+ public static double FPRecipStepFused(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value1 = value1.FPNeg();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPTwo(false);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else
+ {
+ result = Math.FusedMultiplyAdd(value1, value2, 2d);
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPRecpX(double value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr);
+
+ double result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context, fpcr);
+ }
+ else
+ {
+ ulong notExp = (~op >> 52) & 0x7FFul;
+ ulong maxExp = 0x7FEul;
+
+ result = BitConverter.Int64BitsToDouble(
+ (long)((sign ? 1ul : 0ul) << 63 | (notExp == 0x7FFul ? maxExp : notExp) << 52));
+ }
+
+ return result;
+ }
+
+ public static double FPRSqrtEstimate(double value)
+ {
+ return FPRSqrtEstimateFpscr(value, false);
+ }
+
+ public static double FPRSqrtEstimateFpscr(double value, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr);
+
+ double result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context, fpcr);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPInfinity(sign);
+
+ SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr);
+ }
+ else if (sign)
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = FPZero(false);
+ }
+ else
+ {
+ ulong fraction = op & 0x000FFFFFFFFFFFFFul;
+ uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52);
+
+ if (exp == 0u)
+ {
+ while ((fraction & 0x0008000000000000ul) == 0ul)
+ {
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ exp -= 1u;
+ }
+
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ }
+
+ uint scaled;
+
+ if ((exp & 1u) == 0u)
+ {
+ scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+ }
+ else
+ {
+ scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45);
+ }
+
+ uint resultExp = (3068u - exp) >> 1;
+
+ uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u;
+
+ result = BitConverter.Int64BitsToDouble((long)((resultExp & 0x7FFul) << 52 | (estimate & 0xFFul) << 44));
+ }
+
+ return result;
+ }
+
+ public static double FPHalvedSub(double value1, double value2, ExecutionContext context, FPCR fpcr)
+ {
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if (inf1 && inf2 && sign1 == sign2)
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if ((inf1 && !sign1) || (inf2 && sign2))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((inf1 && sign1) || (inf2 && !sign2))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zero1 && zero2 && sign1 == !sign2)
+ {
+ result = FPZero(sign1);
+ }
+ else
+ {
+ result = (value1 - value2) / 2.0;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPRSqrtStep(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.StandardFpcrValue;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ double product;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ product = FPZero(false);
+ }
+ else
+ {
+ product = FPMulFpscr(value1, value2, true);
+ }
+
+ result = FPHalvedSub(FPThree(false), product, context, fpcr);
+ }
+
+ return result;
+ }
+
+ public static double FPRSqrtStepFused(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value1 = value1.FPNeg();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPOnePointFive(false);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else
+ {
+ result = Math.FusedMultiplyAdd(value1, value2, 3d) / 2d;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPSqrt(double value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value = value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr);
+
+ double result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context, fpcr);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPZero(sign);
+ }
+ else if (type == FPType.Infinity && !sign)
+ {
+ result = FPInfinity(sign);
+ }
+ else if (sign)
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else
+ {
+ result = Math.Sqrt(value);
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPSub(double value1, double value2)
+ {
+ return FPSubFpscr(value1, value2, false);
+ }
+
+ public static double FPSubFpscr(double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if (inf1 && inf2 && sign1 == sign2)
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if ((inf1 && !sign1) || (inf2 && sign2))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((inf1 && sign1) || (inf2 && !sign2))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zero1 && zero2 && sign1 == !sign2)
+ {
+ result = FPZero(sign1);
+ }
+ else
+ {
+ result = value1 - value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPDefaultNaN()
+ {
+ return BitConverter.Int64BitsToDouble(0x7ff8000000000000);
+ }
+
+ public static double FPInfinity(bool sign)
+ {
+ return sign ? double.NegativeInfinity : double.PositiveInfinity;
+ }
+
+ public static double FPZero(bool sign)
+ {
+ return sign ? -0d : +0d;
+ }
+
+ public static double FPMaxNormal(bool sign)
+ {
+ return sign ? double.MinValue : double.MaxValue;
+ }
+
+ private static double FPTwo(bool sign)
+ {
+ return sign ? -2d : +2d;
+ }
+
+ private static double FPThree(bool sign)
+ {
+ return sign ? -3d : +3d;
+ }
+
+ private static double FPOnePointFive(bool sign)
+ {
+ return sign ? -1.5d : +1.5d;
+ }
+
+ private static double FPNeg(this double value)
+ {
+ return -value;
+ }
+
+ private static double ZerosOrOnes(bool ones)
+ {
+ return BitConverter.Int64BitsToDouble(ones ? -1L : 0L);
+ }
+
+ private static double FPUnpack(
+ this double value,
+ out FPType type,
+ out bool sign,
+ out ulong valueBits,
+ ExecutionContext context,
+ FPCR fpcr)
+ {
+ valueBits = (ulong)BitConverter.DoubleToInt64Bits(value);
+
+ sign = (~valueBits & 0x8000000000000000ul) == 0ul;
+
+ if ((valueBits & 0x7FF0000000000000ul) == 0ul)
+ {
+ if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul || (fpcr & FPCR.Fz) != 0)
+ {
+ type = FPType.Zero;
+ value = FPZero(sign);
+
+ if ((valueBits & 0x000FFFFFFFFFFFFFul) != 0ul)
+ {
+ SoftFloat.FPProcessException(FPException.InputDenorm, context, fpcr);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero;
+ }
+ }
+ else if ((~valueBits & 0x7FF0000000000000ul) == 0ul)
+ {
+ if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul)
+ {
+ type = FPType.Infinity;
+ }
+ else
+ {
+ type = (~valueBits & 0x0008000000000000ul) == 0ul ? FPType.QNaN : FPType.SNaN;
+ value = FPZero(sign);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero;
+ }
+
+ return value;
+ }
+
+ private static double FPProcessNaNs(
+ FPType type1,
+ FPType type2,
+ ulong op1,
+ ulong op2,
+ out bool done,
+ ExecutionContext context,
+ FPCR fpcr)
+ {
+ done = true;
+
+ if (type1 == FPType.SNaN)
+ {
+ return FPProcessNaN(type1, op1, context, fpcr);
+ }
+ else if (type2 == FPType.SNaN)
+ {
+ return FPProcessNaN(type2, op2, context, fpcr);
+ }
+ else if (type1 == FPType.QNaN)
+ {
+ return FPProcessNaN(type1, op1, context, fpcr);
+ }
+ else if (type2 == FPType.QNaN)
+ {
+ return FPProcessNaN(type2, op2, context, fpcr);
+ }
+
+ done = false;
+
+ return FPZero(false);
+ }
+
+ private static double FPProcessNaNs3(
+ FPType type1,
+ FPType type2,
+ FPType type3,
+ ulong op1,
+ ulong op2,
+ ulong op3,
+ out bool done,
+ ExecutionContext context,
+ FPCR fpcr)
+ {
+ done = true;
+
+ if (type1 == FPType.SNaN)
+ {
+ return FPProcessNaN(type1, op1, context, fpcr);
+ }
+ else if (type2 == FPType.SNaN)
+ {
+ return FPProcessNaN(type2, op2, context, fpcr);
+ }
+ else if (type3 == FPType.SNaN)
+ {
+ return FPProcessNaN(type3, op3, context, fpcr);
+ }
+ else if (type1 == FPType.QNaN)
+ {
+ return FPProcessNaN(type1, op1, context, fpcr);
+ }
+ else if (type2 == FPType.QNaN)
+ {
+ return FPProcessNaN(type2, op2, context, fpcr);
+ }
+ else if (type3 == FPType.QNaN)
+ {
+ return FPProcessNaN(type3, op3, context, fpcr);
+ }
+
+ done = false;
+
+ return FPZero(false);
+ }
+
+ private static double FPProcessNaN(FPType type, ulong op, ExecutionContext context, FPCR fpcr)
+ {
+ if (type == FPType.SNaN)
+ {
+ op |= 1ul << 51;
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+
+ if ((fpcr & FPCR.Dn) != 0)
+ {
+ return FPDefaultNaN();
+ }
+
+ return BitConverter.Int64BitsToDouble((long)op);
+ }
+ }
+}