aboutsummaryrefslogtreecommitdiff
path: root/ARMeilleure/Instructions
diff options
context:
space:
mode:
authorgdkchan <gab.dark.100@gmail.com>2019-08-08 15:56:22 -0300
committeremmauss <emmausssss@gmail.com>2019-08-08 21:56:22 +0300
commita731ab3a2aad56e6ceb8b4e2444a61353246295c (patch)
treec7f13f51bfec6b19431e62167811ae31e9d2fea9 /ARMeilleure/Instructions
parent1ba58e9942e54175e3f3a0e1d57a48537f4888b1 (diff)
Add a new JIT compiler for CPU code (#693)
* Start of the ARMeilleure project * Refactoring around the old IRAdapter, now renamed to PreAllocator * Optimize the LowestBitSet method * Add CLZ support and fix CLS implementation * Add missing Equals and GetHashCode overrides on some structs, misc small tweaks * Implement the ByteSwap IR instruction, and some refactoring on the assembler * Implement the DivideUI IR instruction and fix 64-bits IDIV * Correct constant operand type on CSINC * Move division instructions implementation to InstEmitDiv * Fix destination type for the ConditionalSelect IR instruction * Implement UMULH and SMULH, with new IR instructions * Fix some issues with shift instructions * Fix constant types for BFM instructions * Fix up new tests using the new V128 struct * Update tests * Move DIV tests to a separate file * Add support for calls, and some instructions that depends on them * Start adding support for SIMD & FP types, along with some of the related ARM instructions * Fix some typos and the divide instruction with FP operands * Fix wrong method call on Clz_V * Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes * Implement SIMD logical instructions and more misc. fixes * Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations * Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes * Implement SIMD shift instruction and fix Dup_V * Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table * Fix check with tolerance on tester * Implement FP & SIMD comparison instructions, and some fixes * Update FCVT (Scalar) encoding on the table to support the Half-float variants * Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes * Use old memory access methods, made a start on SIMD memory insts support, some fixes * Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes * Fix arguments count with struct return values, other fixes * More instructions * Misc. fixes and integrate LDj3SNuD fixes * Update tests * Add a faster linear scan allocator, unwinding support on windows, and other changes * Update Ryujinx.HLE * Update Ryujinx.Graphics * Fix V128 return pointer passing, RCX is clobbered * Update Ryujinx.Tests * Update ITimeZoneService * Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks * Use generic GetFunctionPointerForDelegate method and other tweaks * Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics * Remove some unused code on the assembler * Fix REX.W prefix regression on float conversion instructions, add some sort of profiler * Add hardware capability detection * Fix regression on Sha1h and revert Fcm** changes * Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator * Fix silly mistake introduced on last commit on CpuId * Generate inline stack probes when the stack allocation is too large * Initial support for the System-V ABI * Support multiple destination operands * Fix SSE2 VectorInsert8 path, and other fixes * Change placement of XMM callee save and restore code to match other compilers * Rename Dest to Destination and Inst to Instruction * Fix a regression related to calls and the V128 type * Add an extra space on comments to match code style * Some refactoring * Fix vector insert FP32 SSE2 path * Port over the ARM32 instructions * Avoid memory protection races on JIT Cache * Another fix on VectorInsert FP32 (thanks to LDj3SNuD * Float operands don't need to use the same register when VEX is supported * Add a new register allocator, higher quality code for hot code (tier up), and other tweaks * Some nits, small improvements on the pre allocator * CpuThreadState is gone * Allow changing CPU emulators with a config entry * Add runtime identifiers on the ARMeilleure project * Allow switching between CPUs through a config entry (pt. 2) * Change win10-x64 to win-x64 on projects * Update the Ryujinx project to use ARMeilleure * Ensure that the selected register is valid on the hybrid allocator * Allow exiting on returns to 0 (should fix test regression) * Remove register assignments for most used variables on the hybrid allocator * Do not use fixed registers as spill temp * Add missing namespace and remove unneeded using * Address PR feedback * Fix types, etc * Enable AssumeStrictAbiCompliance by default * Ensure that Spill and Fill don't load or store any more than necessary
Diffstat (limited to 'ARMeilleure/Instructions')
-rw-r--r--ARMeilleure/Instructions/CryptoHelper.cs279
-rw-r--r--ARMeilleure/Instructions/DelegateTypes.cs78
-rw-r--r--ARMeilleure/Instructions/InstEmitAlu.cs369
-rw-r--r--ARMeilleure/Instructions/InstEmitAlu32.cs129
-rw-r--r--ARMeilleure/Instructions/InstEmitAluHelper.cs351
-rw-r--r--ARMeilleure/Instructions/InstEmitBfm.cs196
-rw-r--r--ARMeilleure/Instructions/InstEmitCcmp.cs61
-rw-r--r--ARMeilleure/Instructions/InstEmitCsel.cs53
-rw-r--r--ARMeilleure/Instructions/InstEmitDiv.cs67
-rw-r--r--ARMeilleure/Instructions/InstEmitException.cs55
-rw-r--r--ARMeilleure/Instructions/InstEmitFlow.cs159
-rw-r--r--ARMeilleure/Instructions/InstEmitFlow32.cs71
-rw-r--r--ARMeilleure/Instructions/InstEmitFlowHelper.cs192
-rw-r--r--ARMeilleure/Instructions/InstEmitHash.cs64
-rw-r--r--ARMeilleure/Instructions/InstEmitHelper.cs218
-rw-r--r--ARMeilleure/Instructions/InstEmitMemory.cs177
-rw-r--r--ARMeilleure/Instructions/InstEmitMemory32.cs256
-rw-r--r--ARMeilleure/Instructions/InstEmitMemoryEx.cs261
-rw-r--r--ARMeilleure/Instructions/InstEmitMemoryHelper.cs512
-rw-r--r--ARMeilleure/Instructions/InstEmitMove.cs41
-rw-r--r--ARMeilleure/Instructions/InstEmitMul.cs100
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdArithmetic.cs3159
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdCmp.cs712
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdCrypto.cs49
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdCvt.cs1166
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdHash.cs147
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdHelper.cs1477
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdLogical.cs456
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdMemory.cs160
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdMove.cs794
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdShift.cs1057
-rw-r--r--ARMeilleure/Instructions/InstEmitSystem.cs114
-rw-r--r--ARMeilleure/Instructions/InstName.cs459
-rw-r--r--ARMeilleure/Instructions/NativeInterface.cs367
-rw-r--r--ARMeilleure/Instructions/SoftFallback.cs1307
-rw-r--r--ARMeilleure/Instructions/SoftFloat.cs2757
36 files changed, 17870 insertions, 0 deletions
diff --git a/ARMeilleure/Instructions/CryptoHelper.cs b/ARMeilleure/Instructions/CryptoHelper.cs
new file mode 100644
index 00000000..b6b4a62d
--- /dev/null
+++ b/ARMeilleure/Instructions/CryptoHelper.cs
@@ -0,0 +1,279 @@
+// https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf
+
+using ARMeilleure.State;
+
+namespace ARMeilleure.Instructions
+{
+ static class CryptoHelper
+ {
+#region "LookUp Tables"
+ private static readonly byte[] _sBox = new byte[]
+ {
+ 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+ 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+ 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+ 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+ 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+ 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+ 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+ 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+ 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+ 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+ 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+ 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+ 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+ 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+ 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+ 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+ };
+
+ private static readonly byte[] _invSBox = new byte[]
+ {
+ 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+ };
+
+ private static readonly byte[] _gfMul02 = new byte[]
+ {
+ 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
+ 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
+ 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,
+ 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e,
+ 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e,
+ 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe,
+ 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde,
+ 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe,
+ 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05,
+ 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25,
+ 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45,
+ 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65,
+ 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85,
+ 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5,
+ 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5,
+ 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5
+ };
+
+ private static readonly byte[] _gfMul03 = new byte[]
+ {
+ 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11,
+ 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21,
+ 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71,
+ 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41,
+ 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1,
+ 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1,
+ 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1,
+ 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81,
+ 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a,
+ 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba,
+ 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea,
+ 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda,
+ 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a,
+ 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a,
+ 0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a,
+ 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a
+ };
+
+ private static readonly byte[] _gfMul09 = new byte[]
+ {
+ 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
+ 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7,
+ 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
+ 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc,
+ 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01,
+ 0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91,
+ 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a,
+ 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa,
+ 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b,
+ 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b,
+ 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0,
+ 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30,
+ 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed,
+ 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d,
+ 0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6,
+ 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46
+ };
+
+ private static readonly byte[] _gfMul0B = new byte[]
+ {
+ 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69,
+ 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9,
+ 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12,
+ 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2,
+ 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f,
+ 0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f,
+ 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4,
+ 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54,
+ 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e,
+ 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e,
+ 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5,
+ 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55,
+ 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68,
+ 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8,
+ 0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13,
+ 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3
+ };
+
+ private static readonly byte[] _gfMul0D = new byte[]
+ {
+ 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b,
+ 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b,
+ 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0,
+ 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20,
+ 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26,
+ 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6,
+ 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d,
+ 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d,
+ 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91,
+ 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41,
+ 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a,
+ 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa,
+ 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc,
+ 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c,
+ 0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47,
+ 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97
+ };
+
+ private static readonly byte[] _gfMul0E = new byte[]
+ {
+ 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a,
+ 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba,
+ 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81,
+ 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61,
+ 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7,
+ 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17,
+ 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c,
+ 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc,
+ 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b,
+ 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb,
+ 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0,
+ 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20,
+ 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6,
+ 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56,
+ 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d,
+ 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d
+ };
+
+ private static readonly byte[] _srPerm = new byte[]
+ {
+ 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3
+ };
+
+ private static readonly byte[] _isrPerm = new byte[]
+ {
+ 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11
+ };
+#endregion
+
+ public static V128 AesInvMixColumns(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int columns = 0; columns <= 3; columns++)
+ {
+ int idx = columns << 2;
+
+ byte row0 = inState[idx + 0]; // A, E, I, M: [row0, col0-col3]
+ byte row1 = inState[idx + 1]; // B, F, J, N: [row1, col0-col3]
+ byte row2 = inState[idx + 2]; // C, G, K, O: [row2, col0-col3]
+ byte row3 = inState[idx + 3]; // D, H, L, P: [row3, col0-col3]
+
+ outState[idx + 0] = (byte)((uint)_gfMul0E[row0] ^ _gfMul0B[row1] ^ _gfMul0D[row2] ^ _gfMul09[row3]);
+ outState[idx + 1] = (byte)((uint)_gfMul09[row0] ^ _gfMul0E[row1] ^ _gfMul0B[row2] ^ _gfMul0D[row3]);
+ outState[idx + 2] = (byte)((uint)_gfMul0D[row0] ^ _gfMul09[row1] ^ _gfMul0E[row2] ^ _gfMul0B[row3]);
+ outState[idx + 3] = (byte)((uint)_gfMul0B[row0] ^ _gfMul0D[row1] ^ _gfMul09[row2] ^ _gfMul0E[row3]);
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesInvShiftRows(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int idx = 0; idx <= 15; idx++)
+ {
+ outState[_isrPerm[idx]] = inState[idx];
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesInvSubBytes(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int idx = 0; idx <= 15; idx++)
+ {
+ outState[idx] = _invSBox[inState[idx]];
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesMixColumns(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int columns = 0; columns <= 3; columns++)
+ {
+ int idx = columns << 2;
+
+ byte row0 = inState[idx + 0]; // A, E, I, M: [row0, col0-col3]
+ byte row1 = inState[idx + 1]; // B, F, J, N: [row1, col0-col3]
+ byte row2 = inState[idx + 2]; // C, G, K, O: [row2, col0-col3]
+ byte row3 = inState[idx + 3]; // D, H, L, P: [row3, col0-col3]
+
+ outState[idx + 0] = (byte)((uint)_gfMul02[row0] ^ _gfMul03[row1] ^ row2 ^ row3);
+ outState[idx + 1] = (byte)((uint)row0 ^ _gfMul02[row1] ^ _gfMul03[row2] ^ row3);
+ outState[idx + 2] = (byte)((uint)row0 ^ row1 ^ _gfMul02[row2] ^ _gfMul03[row3]);
+ outState[idx + 3] = (byte)((uint)_gfMul03[row0] ^ row1 ^ row2 ^ _gfMul02[row3]);
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesShiftRows(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int idx = 0; idx <= 15; idx++)
+ {
+ outState[_srPerm[idx]] = inState[idx];
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesSubBytes(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int idx = 0; idx <= 15; idx++)
+ {
+ outState[idx] = _sBox[inState[idx]];
+ }
+
+ return new V128(outState);
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/DelegateTypes.cs b/ARMeilleure/Instructions/DelegateTypes.cs
new file mode 100644
index 00000000..e90e4d77
--- /dev/null
+++ b/ARMeilleure/Instructions/DelegateTypes.cs
@@ -0,0 +1,78 @@
+using ARMeilleure.State;
+using System;
+
+namespace ARMeilleure.Instructions
+{
+ delegate double _F64_F64(double a1);
+ delegate double _F64_F64_F64(double a1, double a2);
+ delegate double _F64_F64_F64_F64(double a1, double a2, double a3);
+ delegate double _F64_F64_MidpointRounding(double a1, MidpointRounding a2);
+
+ delegate float _F32_F32(float a1);
+ delegate float _F32_F32_F32(float a1, float a2);
+ delegate float _F32_F32_F32_F32(float a1, float a2, float a3);
+ delegate float _F32_F32_MidpointRounding(float a1, MidpointRounding a2);
+ delegate float _F32_U16(ushort a1);
+
+ delegate int _S32_F32(float a1);
+ delegate int _S32_F32_F32_Bool(float a1, float a2, bool a3);
+ delegate int _S32_F64(double a1);
+ delegate int _S32_F64_F64_Bool(double a1, double a2, bool a3);
+ delegate int _S32_U64_U16(ulong a1, ushort a2);
+ delegate int _S32_U64_U32(ulong a1, uint a2);
+ delegate int _S32_U64_U64(ulong a1, ulong a2);
+ delegate int _S32_U64_U8(ulong a1, byte a2);
+ delegate int _S32_U64_V128(ulong a1, V128 a2);
+
+ delegate long _S64_F32(float a1);
+ delegate long _S64_F64(double a1);
+ delegate long _S64_S64(long a1);
+ delegate long _S64_S64_S32(long a1, int a2);
+ delegate long _S64_S64_S64(long a1, long a2);
+ delegate long _S64_S64_S64_Bool_S32(long a1, long a2, bool a3, int a4);
+ delegate long _S64_S64_S64_S32(long a1, long a2, int a3);
+ delegate long _S64_U64_S32(ulong a1, int a2);
+ delegate long _S64_U64_S64(ulong a1, long a2);
+
+ delegate ushort _U16_F32(float a1);
+ delegate ushort _U16_U64(ulong a1);
+
+ delegate uint _U32_F32(float a1);
+ delegate uint _U32_F64(double a1);
+ delegate uint _U32_U32(uint a1);
+ delegate uint _U32_U32_U16(uint a1, ushort a2);
+ delegate uint _U32_U32_U32(uint a1, uint a2);
+ delegate uint _U32_U32_U64(uint a1, ulong a2);
+ delegate uint _U32_U32_U8(uint a1, byte a2);
+ delegate uint _U32_U64(ulong a1);
+
+ delegate ulong _U64();
+ delegate ulong _U64_F32(float a1);
+ delegate ulong _U64_F64(double a1);
+ delegate ulong _U64_S64_S32(long a1, int a2);
+ delegate ulong _U64_S64_U64(long a1, ulong a2);
+ delegate ulong _U64_U64(ulong a1);
+ delegate ulong _U64_U64_S32(ulong a1, int a2);
+ delegate ulong _U64_U64_S64_S32(ulong a1, long a2, int a3);
+ delegate ulong _U64_U64_U64(ulong a1, ulong a2);
+ delegate ulong _U64_U64_U64_Bool_S32(ulong a1, ulong a2, bool a3, int a4);
+
+ delegate byte _U8_U64(ulong a1);
+
+ delegate V128 _V128_U64(ulong a1);
+ delegate V128 _V128_V128(V128 a1);
+ delegate V128 _V128_V128_U32_V128(V128 a1, uint a2, V128 a3);
+ delegate V128 _V128_V128_V128(V128 a1, V128 a2);
+ delegate V128 _V128_V128_V128_V128(V128 a1, V128 a2, V128 a3);
+ delegate V128 _V128_V128_V128_V128_V128(V128 a1, V128 a2, V128 a3, V128 a4);
+ delegate V128 _V128_V128_V128_V128_V128_V128(V128 a1, V128 a2, V128 a3, V128 a4, V128 a5);
+
+ delegate void _Void();
+ delegate void _Void_U64(ulong a1);
+ delegate void _Void_U64_S32(ulong a1, int a2);
+ delegate void _Void_U64_U16(ulong a1, ushort a2);
+ delegate void _Void_U64_U32(ulong a1, uint a2);
+ delegate void _Void_U64_U64(ulong a1, ulong a2);
+ delegate void _Void_U64_U8(ulong a1, byte a2);
+ delegate void _Void_U64_V128(ulong a1, V128 a2);
+} \ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitAlu.cs b/ARMeilleure/Instructions/InstEmitAlu.cs
new file mode 100644
index 00000000..947c9f70
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitAlu.cs
@@ -0,0 +1,369 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitAluHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Adc(ArmEmitterContext context) => EmitAdc(context, setFlags: false);
+ public static void Adcs(ArmEmitterContext context) => EmitAdc(context, setFlags: true);
+
+ private static void EmitAdc(ArmEmitterContext context, bool setFlags)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.Add(n, m);
+
+ Operand carry = GetFlag(PState.CFlag);
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int64)
+ {
+ carry = context.ZeroExtend32(OperandType.I64, carry);
+ }
+
+ d = context.Add(d, carry);
+
+ if (setFlags)
+ {
+ EmitNZFlagsCheck(context, d);
+
+ EmitAdcsCCheck(context, n, d);
+ EmitAddsVCheck(context, n, m, d);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Add(ArmEmitterContext context)
+ {
+ SetAluD(context, context.Add(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Adds(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ context.MarkComparison(n, m);
+
+ Operand d = context.Add(n, m);
+
+ EmitNZFlagsCheck(context, d);
+
+ EmitAddsCCheck(context, n, d);
+ EmitAddsVCheck(context, n, m, d);
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void And(ArmEmitterContext context)
+ {
+ SetAluD(context, context.BitwiseAnd(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Ands(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.BitwiseAnd(n, m);
+
+ EmitNZFlagsCheck(context, d);
+ EmitCVFlagsClear(context);
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Asrv(ArmEmitterContext context)
+ {
+ SetAluDOrZR(context, context.ShiftRightSI(GetAluN(context), GetAluMShift(context)));
+ }
+
+ public static void Bic(ArmEmitterContext context) => EmitBic(context, setFlags: false);
+ public static void Bics(ArmEmitterContext context) => EmitBic(context, setFlags: true);
+
+ private static void EmitBic(ArmEmitterContext context, bool setFlags)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.BitwiseAnd(n, context.BitwiseNot(m));
+
+ if (setFlags)
+ {
+ EmitNZFlagsCheck(context, d);
+ EmitCVFlagsClear(context);
+ }
+
+ SetAluD(context, d, setFlags);
+ }
+
+ public static void Cls(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ Operand nHigh = context.ShiftRightUI(n, Const(1));
+
+ bool is32Bits = op.RegisterSize == RegisterSize.Int32;
+
+ Operand mask = is32Bits ? Const(int.MaxValue) : Const(long.MaxValue);
+
+ Operand nLow = context.BitwiseAnd(n, mask);
+
+ Operand res = context.CountLeadingZeros(context.BitwiseExclusiveOr(nHigh, nLow));
+
+ res = context.Subtract(res, Const(res.Type, 1));
+
+ SetAluDOrZR(context, res);
+ }
+
+ public static void Clz(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ Operand d = context.CountLeadingZeros(n);
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Eon(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.BitwiseExclusiveOr(n, context.BitwiseNot(m));
+
+ SetAluD(context, d);
+ }
+
+ public static void Eor(ArmEmitterContext context)
+ {
+ SetAluD(context, context.BitwiseExclusiveOr(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Extr(ArmEmitterContext context)
+ {
+ OpCodeAluRs op = (OpCodeAluRs)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rm);
+
+ if (op.Shift != 0)
+ {
+ if (op.Rn == op.Rm)
+ {
+ res = context.RotateRight(res, Const(op.Shift));
+ }
+ else
+ {
+ res = context.ShiftRightUI(res, Const(op.Shift));
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ int invShift = op.GetBitsCount() - op.Shift;
+
+ res = context.BitwiseOr(res, context.ShiftLeft(n, Const(invShift)));
+ }
+ }
+
+ SetAluDOrZR(context, res);
+ }
+
+ public static void Lslv(ArmEmitterContext context)
+ {
+ SetAluDOrZR(context, context.ShiftLeft(GetAluN(context), GetAluMShift(context)));
+ }
+
+ public static void Lsrv(ArmEmitterContext context)
+ {
+ SetAluDOrZR(context, context.ShiftRightUI(GetAluN(context), GetAluMShift(context)));
+ }
+
+ public static void Sbc(ArmEmitterContext context) => EmitSbc(context, setFlags: false);
+ public static void Sbcs(ArmEmitterContext context) => EmitSbc(context, setFlags: true);
+
+ private static void EmitSbc(ArmEmitterContext context, bool setFlags)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.Subtract(n, m);
+
+ Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1));
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int64)
+ {
+ borrow = context.ZeroExtend32(OperandType.I64, borrow);
+ }
+
+ d = context.Subtract(d, borrow);
+
+ if (setFlags)
+ {
+ EmitNZFlagsCheck(context, d);
+
+ EmitSbcsCCheck(context, n, m);
+ EmitSubsVCheck(context, n, m, d);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Sub(ArmEmitterContext context)
+ {
+ SetAluD(context, context.Subtract(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Subs(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ context.MarkComparison(n, m);
+
+ Operand d = context.Subtract(n, m);
+
+ EmitNZFlagsCheck(context, d);
+
+ EmitSubsCCheck(context, n, m);
+ EmitSubsVCheck(context, n, m, d);
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Orn(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.BitwiseOr(n, context.BitwiseNot(m));
+
+ SetAluD(context, d);
+ }
+
+ public static void Orr(ArmEmitterContext context)
+ {
+ SetAluD(context, context.BitwiseOr(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Rbit(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand d;
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ d = context.Call(new _U32_U32(SoftFallback.ReverseBits32), n);
+ }
+ else
+ {
+ d = context.Call(new _U64_U64(SoftFallback.ReverseBits64), n);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Rev16(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand d;
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ d = context.Call(new _U32_U32(SoftFallback.ReverseBytes16_32), n);
+ }
+ else
+ {
+ d = context.Call(new _U64_U64(SoftFallback.ReverseBytes16_64), n);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Rev32(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ SetAluDOrZR(context, context.ByteSwap(n));
+ }
+ else
+ {
+ Operand d = context.Call(new _U64_U64(SoftFallback.ReverseBytes32_64), n);
+
+ SetAluDOrZR(context, d);
+ }
+ }
+
+ public static void Rev64(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ SetAluDOrZR(context, context.ByteSwap(GetIntOrZR(context, op.Rn)));
+ }
+
+ public static void Rorv(ArmEmitterContext context)
+ {
+ SetAluDOrZR(context, context.RotateRight(GetAluN(context), GetAluMShift(context)));
+ }
+
+ private static Operand GetAluMShift(ArmEmitterContext context)
+ {
+ IOpCodeAluRs op = (IOpCodeAluRs)context.CurrOp;
+
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Int64)
+ {
+ m = context.ConvertI64ToI32(m);
+ }
+
+ return context.BitwiseAnd(m, Const(context.CurrOp.GetBitsCount() - 1));
+ }
+
+ private static void EmitCVFlagsClear(ArmEmitterContext context)
+ {
+ SetFlag(context, PState.CFlag, Const(0));
+ SetFlag(context, PState.VFlag, Const(0));
+ }
+
+ public static void SetAluD(ArmEmitterContext context, Operand d)
+ {
+ SetAluD(context, d, x31IsZR: false);
+ }
+
+ public static void SetAluDOrZR(ArmEmitterContext context, Operand d)
+ {
+ SetAluD(context, d, x31IsZR: true);
+ }
+
+ public static void SetAluD(ArmEmitterContext context, Operand d, bool x31IsZR)
+ {
+ IOpCodeAlu op = (IOpCodeAlu)context.CurrOp;
+
+ if ((x31IsZR || op is IOpCodeAluRs) && op.Rd == RegisterConsts.ZeroIndex)
+ {
+ return;
+ }
+
+ SetIntOrSP(context, op.Rd, d);
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitAlu32.cs b/ARMeilleure/Instructions/InstEmitAlu32.cs
new file mode 100644
index 00000000..79b0abbc
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitAlu32.cs
@@ -0,0 +1,129 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitAluHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Add(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Add(n, m);
+
+ if (op.SetFlags)
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitAddsCCheck(context, n, res);
+ EmitAddsVCheck(context, n, m, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Cmp(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Subtract(n, m);
+
+ EmitNZFlagsCheck(context, res);
+
+ EmitSubsCCheck(context, n, res);
+ EmitSubsVCheck(context, n, m, res);
+ }
+
+ public static void Mov(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand m = GetAluM(context);
+
+ if (op.SetFlags)
+ {
+ EmitNZFlagsCheck(context, m);
+ }
+
+ EmitAluStore(context, m);
+ }
+
+ public static void Sub(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Subtract(n, m);
+
+ if (op.SetFlags)
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitSubsCCheck(context, n, res);
+ EmitSubsVCheck(context, n, m, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ private static void EmitAluStore(ArmEmitterContext context, Operand value)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ if (op.Rd == RegisterAlias.Aarch32Pc)
+ {
+ if (op.SetFlags)
+ {
+ // TODO: Load SPSR etc.
+ Operand isThumb = GetFlag(PState.TFlag);
+
+ Operand lblThumb = Label();
+
+ context.BranchIfTrue(lblThumb, isThumb);
+
+ context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~3))));
+
+ context.MarkLabel(lblThumb);
+
+ context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~1))));
+ }
+ else
+ {
+ EmitAluWritePc(context, value);
+ }
+ }
+ else
+ {
+ SetIntA32(context, op.Rd, value);
+ }
+ }
+
+ private static void EmitAluWritePc(ArmEmitterContext context, Operand value)
+ {
+ context.StoreToContext();
+
+ if (IsThumb(context.CurrOp))
+ {
+ context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~1))));
+ }
+ else
+ {
+ EmitBxWritePc(context, value);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitAluHelper.cs b/ARMeilleure/Instructions/InstEmitAluHelper.cs
new file mode 100644
index 00000000..81d5c9eb
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitAluHelper.cs
@@ -0,0 +1,351 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitAluHelper
+ {
+ public static void EmitNZFlagsCheck(ArmEmitterContext context, Operand d)
+ {
+ SetFlag(context, PState.NFlag, context.ICompareLess (d, Const(d.Type, 0)));
+ SetFlag(context, PState.ZFlag, context.ICompareEqual(d, Const(d.Type, 0)));
+ }
+
+ public static void EmitAdcsCCheck(ArmEmitterContext context, Operand n, Operand d)
+ {
+ // C = (Rd == Rn && CIn) || Rd < Rn
+ Operand cIn = GetFlag(PState.CFlag);
+
+ Operand cOut = context.BitwiseAnd(context.ICompareEqual(d, n), cIn);
+
+ cOut = context.BitwiseOr(cOut, context.ICompareLessUI(d, n));
+
+ SetFlag(context, PState.CFlag, cOut);
+ }
+
+ public static void EmitAddsCCheck(ArmEmitterContext context, Operand n, Operand d)
+ {
+ // C = Rd < Rn
+ SetFlag(context, PState.CFlag, context.ICompareLessUI(d, n));
+ }
+
+ public static void EmitAddsVCheck(ArmEmitterContext context, Operand n, Operand m, Operand d)
+ {
+ // V = (Rd ^ Rn) & ~(Rn ^ Rm) < 0
+ Operand vOut = context.BitwiseExclusiveOr(d, n);
+
+ vOut = context.BitwiseAnd(vOut, context.BitwiseNot(context.BitwiseExclusiveOr(n, m)));
+
+ vOut = context.ICompareLess(vOut, Const(vOut.Type, 0));
+
+ SetFlag(context, PState.VFlag, vOut);
+ }
+
+ public static void EmitSbcsCCheck(ArmEmitterContext context, Operand n, Operand m)
+ {
+ // C = (Rn == Rm && CIn) || Rn > Rm
+ Operand cIn = GetFlag(PState.CFlag);
+
+ Operand cOut = context.BitwiseAnd(context.ICompareEqual(n, m), cIn);
+
+ cOut = context.BitwiseOr(cOut, context.ICompareGreaterUI(n, m));
+
+ SetFlag(context, PState.CFlag, cOut);
+ }
+
+ public static void EmitSubsCCheck(ArmEmitterContext context, Operand n, Operand m)
+ {
+ // C = Rn >= Rm
+ SetFlag(context, PState.CFlag, context.ICompareGreaterOrEqualUI(n, m));
+ }
+
+ public static void EmitSubsVCheck(ArmEmitterContext context, Operand n, Operand m, Operand d)
+ {
+ // V = (Rd ^ Rn) & (Rn ^ Rm) < 0
+ Operand vOut = context.BitwiseExclusiveOr(d, n);
+
+ vOut = context.BitwiseAnd(vOut, context.BitwiseExclusiveOr(n, m));
+
+ vOut = context.ICompareLess(vOut, Const(vOut.Type, 0));
+
+ SetFlag(context, PState.VFlag, vOut);
+ }
+
+
+ public static Operand GetAluN(ArmEmitterContext context)
+ {
+ if (context.CurrOp is IOpCodeAlu op)
+ {
+ if (op.DataOp == DataOp.Logical || op is IOpCodeAluRs)
+ {
+ return GetIntOrZR(context, op.Rn);
+ }
+ else
+ {
+ return GetIntOrSP(context, op.Rn);
+ }
+ }
+ else if (context.CurrOp is IOpCode32Alu op32)
+ {
+ return GetIntA32(context, op32.Rn);
+ }
+ else
+ {
+ throw InvalidOpCodeType(context.CurrOp);
+ }
+ }
+
+ public static Operand GetAluM(ArmEmitterContext context, bool setCarry = true)
+ {
+ switch (context.CurrOp)
+ {
+ // ARM32.
+ case OpCode32AluImm op:
+ {
+ if (op.SetFlags && op.IsRotated)
+ {
+ SetFlag(context, PState.CFlag, Const((uint)op.Immediate >> 31));
+ }
+
+ return Const(op.Immediate);
+ }
+
+ case OpCode32AluRsImm op: return GetMShiftedByImmediate(context, op, setCarry);
+
+ case OpCodeT16AluImm8 op: return Const(op.Immediate);
+
+ // ARM64.
+ case IOpCodeAluImm op:
+ {
+ if (op.GetOperandType() == OperandType.I32)
+ {
+ return Const((int)op.Immediate);
+ }
+ else
+ {
+ return Const(op.Immediate);
+ }
+ }
+
+ case IOpCodeAluRs op:
+ {
+ Operand value = GetIntOrZR(context, op.Rm);
+
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsl: value = context.ShiftLeft (value, Const(op.Shift)); break;
+ case ShiftType.Lsr: value = context.ShiftRightUI(value, Const(op.Shift)); break;
+ case ShiftType.Asr: value = context.ShiftRightSI(value, Const(op.Shift)); break;
+ case ShiftType.Ror: value = context.RotateRight (value, Const(op.Shift)); break;
+ }
+
+ return value;
+ }
+
+ case IOpCodeAluRx op:
+ {
+ Operand value = GetExtendedM(context, op.Rm, op.IntType);
+
+ value = context.ShiftLeft(value, Const(op.Shift));
+
+ return value;
+ }
+
+ default: throw InvalidOpCodeType(context.CurrOp);
+ }
+ }
+
+ private static Exception InvalidOpCodeType(OpCode opCode)
+ {
+ return new InvalidOperationException($"Invalid OpCode type \"{opCode?.GetType().Name ?? "null"}\".");
+ }
+
+ // ARM32 helpers.
+ private static Operand GetMShiftedByImmediate(ArmEmitterContext context, OpCode32AluRsImm op, bool setCarry)
+ {
+ Operand m = GetIntA32(context, op.Rm);
+
+ int shift = op.Imm;
+
+ if (shift == 0)
+ {
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsr: shift = 32; break;
+ case ShiftType.Asr: shift = 32; break;
+ case ShiftType.Ror: shift = 1; break;
+ }
+ }
+
+ if (shift != 0)
+ {
+ setCarry &= op.SetFlags;
+
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsl: m = GetLslC(context, m, setCarry, shift); break;
+ case ShiftType.Lsr: m = GetLsrC(context, m, setCarry, shift); break;
+ case ShiftType.Asr: m = GetAsrC(context, m, setCarry, shift); break;
+ case ShiftType.Ror:
+ if (op.Imm != 0)
+ {
+ m = GetRorC(context, m, setCarry, shift);
+ }
+ else
+ {
+ m = GetRrxC(context, m, setCarry);
+ }
+ break;
+ }
+ }
+
+ return m;
+ }
+
+ private static Operand GetLslC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+ {
+ if ((uint)shift > 32)
+ {
+ return GetShiftByMoreThan32(context, setCarry);
+ }
+ else if (shift == 32)
+ {
+ if (setCarry)
+ {
+ SetCarryMLsb(context, m);
+ }
+
+ return Const(0);
+ }
+ else
+ {
+ if (setCarry)
+ {
+ Operand cOut = context.ShiftRightUI(m, Const(32 - shift));
+
+ cOut = context.BitwiseAnd(cOut, Const(1));
+
+ SetFlag(context, PState.CFlag, cOut);
+ }
+
+ return context.ShiftLeft(m, Const(shift));
+ }
+ }
+
+ private static Operand GetLsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+ {
+ if ((uint)shift > 32)
+ {
+ return GetShiftByMoreThan32(context, setCarry);
+ }
+ else if (shift == 32)
+ {
+ if (setCarry)
+ {
+ SetCarryMMsb(context, m);
+ }
+
+ return Const(0);
+ }
+ else
+ {
+ if (setCarry)
+ {
+ SetCarryMShrOut(context, m, shift);
+ }
+
+ return context.ShiftRightUI(m, Const(shift));
+ }
+ }
+
+ private static Operand GetShiftByMoreThan32(ArmEmitterContext context, bool setCarry)
+ {
+ if (setCarry)
+ {
+ SetFlag(context, PState.CFlag, Const(0));;
+ }
+
+ return Const(0);
+ }
+
+ private static Operand GetAsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+ {
+ if ((uint)shift >= 32)
+ {
+ m = context.ShiftRightSI(m, Const(31));
+
+ if (setCarry)
+ {
+ SetCarryMLsb(context, m);
+ }
+
+ return m;
+ }
+ else
+ {
+ if (setCarry)
+ {
+ SetCarryMShrOut(context, m, shift);
+ }
+
+ return context.ShiftRightSI(m, Const(shift));
+ }
+ }
+
+ private static Operand GetRorC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+ {
+ shift &= 0x1f;
+
+ m = context.RotateRight(m, Const(shift));
+
+ if (setCarry)
+ {
+ SetCarryMMsb(context, m);
+ }
+
+ return m;
+ }
+
+ private static Operand GetRrxC(ArmEmitterContext context, Operand m, bool setCarry)
+ {
+ // Rotate right by 1 with carry.
+ Operand cIn = context.Copy(GetFlag(PState.CFlag));
+
+ if (setCarry)
+ {
+ SetCarryMLsb(context, m);
+ }
+
+ m = context.ShiftRightUI(m, Const(1));
+
+ m = context.BitwiseOr(m, context.ShiftLeft(cIn, Const(31)));
+
+ return m;
+ }
+
+ private static void SetCarryMLsb(ArmEmitterContext context, Operand m)
+ {
+ SetFlag(context, PState.CFlag, context.BitwiseAnd(m, Const(1)));
+ }
+
+ private static void SetCarryMMsb(ArmEmitterContext context, Operand m)
+ {
+ SetFlag(context, PState.CFlag, context.ShiftRightUI(m, Const(31)));
+ }
+
+ private static void SetCarryMShrOut(ArmEmitterContext context, Operand m, int shift)
+ {
+ Operand cOut = context.ShiftRightUI(m, Const(shift - 1));
+
+ cOut = context.BitwiseAnd(cOut, Const(1));
+
+ SetFlag(context, PState.CFlag, cOut);
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitBfm.cs b/ARMeilleure/Instructions/InstEmitBfm.cs
new file mode 100644
index 00000000..8fdbf6cf
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitBfm.cs
@@ -0,0 +1,196 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Bfm(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ Operand d = GetIntOrZR(context, op.Rd);
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ Operand res;
+
+ if (op.Pos < op.Shift)
+ {
+ // BFI.
+ int shift = op.GetBitsCount() - op.Shift;
+
+ int width = op.Pos + 1;
+
+ long mask = (long)(ulong.MaxValue >> (64 - width));
+
+ res = context.ShiftLeft(context.BitwiseAnd(n, Const(n.Type, mask)), Const(shift));
+
+ res = context.BitwiseOr(res, context.BitwiseAnd(d, Const(d.Type, ~(mask << shift))));
+ }
+ else
+ {
+ // BFXIL.
+ int shift = op.Shift;
+
+ int width = op.Pos - shift + 1;
+
+ long mask = (long)(ulong.MaxValue >> (64 - width));
+
+ res = context.BitwiseAnd(context.ShiftRightUI(n, Const(shift)), Const(n.Type, mask));
+
+ res = context.BitwiseOr(res, context.BitwiseAnd(d, Const(d.Type, ~mask)));
+ }
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ public static void Sbfm(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ int bitsCount = op.GetBitsCount();
+
+ if (op.Pos + 1 == bitsCount)
+ {
+ EmitSbfmShift(context);
+ }
+ else if (op.Pos < op.Shift)
+ {
+ EmitSbfiz(context);
+ }
+ else if (op.Pos == 7 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.SignExtend8(n.Type, n));
+ }
+ else if (op.Pos == 15 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.SignExtend16(n.Type, n));
+ }
+ else if (op.Pos == 31 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.SignExtend32(n.Type, n));
+ }
+ else
+ {
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = context.ShiftLeft (res, Const(bitsCount - 1 - op.Pos));
+ res = context.ShiftRightSI(res, Const(bitsCount - 1));
+ res = context.BitwiseAnd (res, Const(res.Type, ~op.TMask));
+
+ Operand n2 = GetBfmN(context);
+
+ SetIntOrZR(context, op.Rd, context.BitwiseOr(res, n2));
+ }
+ }
+
+ public static void Ubfm(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ if (op.Pos + 1 == op.GetBitsCount())
+ {
+ EmitUbfmShift(context);
+ }
+ else if (op.Pos < op.Shift)
+ {
+ EmitUbfiz(context);
+ }
+ else if (op.Pos + 1 == op.Shift)
+ {
+ EmitBfmLsl(context);
+ }
+ else if (op.Pos == 7 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.BitwiseAnd(n, Const(n.Type, 0xff)));
+ }
+ else if (op.Pos == 15 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.BitwiseAnd(n, Const(n.Type, 0xffff)));
+ }
+ else
+ {
+ SetIntOrZR(context, op.Rd, GetBfmN(context));
+ }
+ }
+
+ private static void EmitSbfiz(ArmEmitterContext context) => EmitBfiz(context, signed: true);
+ private static void EmitUbfiz(ArmEmitterContext context) => EmitBfiz(context, signed: false);
+
+ private static void EmitBfiz(ArmEmitterContext context, bool signed)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ int width = op.Pos + 1;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = context.ShiftLeft(res, Const(op.GetBitsCount() - width));
+
+ res = signed
+ ? context.ShiftRightSI(res, Const(op.Shift - width))
+ : context.ShiftRightUI(res, Const(op.Shift - width));
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ private static void EmitSbfmShift(ArmEmitterContext context)
+ {
+ EmitBfmShift(context, signed: true);
+ }
+
+ private static void EmitUbfmShift(ArmEmitterContext context)
+ {
+ EmitBfmShift(context, signed: false);
+ }
+
+ private static void EmitBfmShift(ArmEmitterContext context, bool signed)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = signed
+ ? context.ShiftRightSI(res, Const(op.Shift))
+ : context.ShiftRightUI(res, Const(op.Shift));
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ private static void EmitBfmLsl(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ int shift = op.GetBitsCount() - op.Shift;
+
+ SetIntOrZR(context, op.Rd, context.ShiftLeft(res, Const(shift)));
+ }
+
+ private static Operand GetBfmN(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ long mask = op.WMask & op.TMask;
+
+ return context.BitwiseAnd(context.RotateRight(res, Const(op.Shift)), Const(res.Type, mask));
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitCcmp.cs b/ARMeilleure/Instructions/InstEmitCcmp.cs
new file mode 100644
index 00000000..b1b0a2a1
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitCcmp.cs
@@ -0,0 +1,61 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitAluHelper;
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Ccmn(ArmEmitterContext context) => EmitCcmp(context, isNegated: true);
+ public static void Ccmp(ArmEmitterContext context) => EmitCcmp(context, isNegated: false);
+
+ private static void EmitCcmp(ArmEmitterContext context, bool isNegated)
+ {
+ OpCodeCcmp op = (OpCodeCcmp)context.CurrOp;
+
+ Operand lblTrue = Label();
+ Operand lblEnd = Label();
+
+ EmitCondBranch(context, lblTrue, op.Cond);
+
+ SetFlag(context, PState.VFlag, Const((op.Nzcv >> 0) & 1));
+ SetFlag(context, PState.CFlag, Const((op.Nzcv >> 1) & 1));
+ SetFlag(context, PState.ZFlag, Const((op.Nzcv >> 2) & 1));
+ SetFlag(context, PState.NFlag, Const((op.Nzcv >> 3) & 1));
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblTrue);
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ if (isNegated)
+ {
+ Operand d = context.Add(n, m);
+
+ EmitNZFlagsCheck(context, d);
+
+ EmitAddsCCheck(context, n, d);
+ EmitAddsVCheck(context, n, m, d);
+ }
+ else
+ {
+ Operand d = context.Subtract(n, m);
+
+ EmitNZFlagsCheck(context, d);
+
+ EmitSubsCCheck(context, n, m);
+ EmitSubsVCheck(context, n, m, d);
+ }
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitCsel.cs b/ARMeilleure/Instructions/InstEmitCsel.cs
new file mode 100644
index 00000000..60baf0bc
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitCsel.cs
@@ -0,0 +1,53 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ private enum CselOperation
+ {
+ None,
+ Increment,
+ Invert,
+ Negate
+ }
+
+ public static void Csel(ArmEmitterContext context) => EmitCsel(context, CselOperation.None);
+ public static void Csinc(ArmEmitterContext context) => EmitCsel(context, CselOperation.Increment);
+ public static void Csinv(ArmEmitterContext context) => EmitCsel(context, CselOperation.Invert);
+ public static void Csneg(ArmEmitterContext context) => EmitCsel(context, CselOperation.Negate);
+
+ private static void EmitCsel(ArmEmitterContext context, CselOperation cselOp)
+ {
+ OpCodeCsel op = (OpCodeCsel)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ if (cselOp == CselOperation.Increment)
+ {
+ m = context.Add(m, Const(m.Type, 1));
+ }
+ else if (cselOp == CselOperation.Invert)
+ {
+ m = context.BitwiseNot(m);
+ }
+ else if (cselOp == CselOperation.Negate)
+ {
+ m = context.Negate(m);
+ }
+
+ Operand condTrue = GetCondTrue(context, op.Cond);
+
+ Operand d = context.ConditionalSelect(condTrue, n, m);
+
+ SetIntOrZR(context, op.Rd, d);
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitDiv.cs b/ARMeilleure/Instructions/InstEmitDiv.cs
new file mode 100644
index 00000000..0c21dd1b
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitDiv.cs
@@ -0,0 +1,67 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Sdiv(ArmEmitterContext context) => EmitDiv(context, unsigned: false);
+ public static void Udiv(ArmEmitterContext context) => EmitDiv(context, unsigned: true);
+
+ private static void EmitDiv(ArmEmitterContext context, bool unsigned)
+ {
+ OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp;
+
+ // If Rm == 0, Rd = 0 (division by zero).
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand divisorIsZero = context.ICompareEqual(m, Const(m.Type, 0));
+
+ Operand lblBadDiv = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBadDiv, divisorIsZero);
+
+ if (!unsigned)
+ {
+ // If Rn == INT_MIN && Rm == -1, Rd = INT_MIN (overflow).
+ bool is32Bits = op.RegisterSize == RegisterSize.Int32;
+
+ Operand intMin = is32Bits ? Const(int.MinValue) : Const(long.MinValue);
+ Operand minus1 = is32Bits ? Const(-1) : Const(-1L);
+
+ Operand nIsIntMin = context.ICompareEqual(n, intMin);
+ Operand mIsMinus1 = context.ICompareEqual(m, minus1);
+
+ Operand lblGoodDiv = Label();
+
+ context.BranchIfFalse(lblGoodDiv, context.BitwiseAnd(nIsIntMin, mIsMinus1));
+
+ SetAluDOrZR(context, intMin);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblGoodDiv);
+ }
+
+ Operand d = unsigned
+ ? context.DivideUI(n, m)
+ : context.Divide (n, m);
+
+ SetAluDOrZR(context, d);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBadDiv);
+
+ SetAluDOrZR(context, Const(op.GetOperandType(), 0));
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitException.cs b/ARMeilleure/Instructions/InstEmitException.cs
new file mode 100644
index 00000000..6f7b6fd5
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitException.cs
@@ -0,0 +1,55 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Brk(ArmEmitterContext context)
+ {
+ EmitExceptionCall(context, NativeInterface.Break);
+ }
+
+ public static void Svc(ArmEmitterContext context)
+ {
+ EmitExceptionCall(context, NativeInterface.SupervisorCall);
+ }
+
+ private static void EmitExceptionCall(ArmEmitterContext context, _Void_U64_S32 func)
+ {
+ OpCodeException op = (OpCodeException)context.CurrOp;
+
+ context.StoreToContext();
+
+ context.Call(func, Const(op.Address), Const(op.Id));
+
+ context.LoadFromContext();
+
+ if (context.CurrBlock.Next == null)
+ {
+ context.Return(Const(op.Address + 4));
+ }
+ }
+
+ public static void Und(ArmEmitterContext context)
+ {
+ OpCode op = context.CurrOp;
+
+ Delegate dlg = new _Void_U64_S32(NativeInterface.Undefined);
+
+ context.StoreToContext();
+
+ context.Call(dlg, Const(op.Address), Const(op.RawOpCode));
+
+ context.LoadFromContext();
+
+ if (context.CurrBlock.Next == null)
+ {
+ context.Return(Const(op.Address + 4));
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitFlow.cs b/ARMeilleure/Instructions/InstEmitFlow.cs
new file mode 100644
index 00000000..93d36e1b
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitFlow.cs
@@ -0,0 +1,159 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void B(ArmEmitterContext context)
+ {
+ OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp;
+
+ if (context.CurrBlock.Branch != null)
+ {
+ context.Branch(context.GetLabel((ulong)op.Immediate));
+ }
+ else
+ {
+ context.Return(Const(op.Immediate));
+ }
+ }
+
+ public static void B_Cond(ArmEmitterContext context)
+ {
+ OpCodeBImmCond op = (OpCodeBImmCond)context.CurrOp;
+
+ EmitBranch(context, op.Cond);
+ }
+
+ public static void Bl(ArmEmitterContext context)
+ {
+ OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp;
+
+ context.Copy(GetIntOrZR(context, RegisterAlias.Lr), Const(op.Address + 4));
+
+ EmitCall(context, (ulong)op.Immediate);
+ }
+
+ public static void Blr(ArmEmitterContext context)
+ {
+ OpCodeBReg op = (OpCodeBReg)context.CurrOp;
+
+ Operand n = context.Copy(GetIntOrZR(context, op.Rn));
+
+ context.Copy(GetIntOrZR(context, RegisterAlias.Lr), Const(op.Address + 4));
+
+ EmitVirtualCall(context, n);
+ }
+
+ public static void Br(ArmEmitterContext context)
+ {
+ OpCodeBReg op = (OpCodeBReg)context.CurrOp;
+
+ EmitVirtualJump(context, GetIntOrZR(context, op.Rn));
+ }
+
+ public static void Cbnz(ArmEmitterContext context) => EmitCb(context, onNotZero: true);
+ public static void Cbz(ArmEmitterContext context) => EmitCb(context, onNotZero: false);
+
+ private static void EmitCb(ArmEmitterContext context, bool onNotZero)
+ {
+ OpCodeBImmCmp op = (OpCodeBImmCmp)context.CurrOp;
+
+ EmitBranch(context, GetIntOrZR(context, op.Rt), onNotZero);
+ }
+
+ public static void Ret(ArmEmitterContext context)
+ {
+ context.Return(context.BitwiseOr(GetIntOrZR(context, RegisterAlias.Lr), Const(CallFlag)));
+ }
+
+ public static void Tbnz(ArmEmitterContext context) => EmitTb(context, onNotZero: true);
+ public static void Tbz(ArmEmitterContext context) => EmitTb(context, onNotZero: false);
+
+ private static void EmitTb(ArmEmitterContext context, bool onNotZero)
+ {
+ OpCodeBImmTest op = (OpCodeBImmTest)context.CurrOp;
+
+ Operand value = context.BitwiseAnd(GetIntOrZR(context, op.Rt), Const(1L << op.Bit));
+
+ EmitBranch(context, value, onNotZero);
+ }
+
+ private static void EmitBranch(ArmEmitterContext context, Condition cond)
+ {
+ OpCodeBImm op = (OpCodeBImm)context.CurrOp;
+
+ if (context.CurrBlock.Branch != null)
+ {
+ EmitCondBranch(context, context.GetLabel((ulong)op.Immediate), cond);
+
+ if (context.CurrBlock.Next == null)
+ {
+ context.Return(Const(op.Address + 4));
+ }
+ }
+ else
+ {
+ Operand lblTaken = Label();
+
+ EmitCondBranch(context, lblTaken, cond);
+
+ context.Return(Const(op.Address + 4));
+
+ context.MarkLabel(lblTaken);
+
+ context.Return(Const(op.Immediate));
+ }
+ }
+
+ private static void EmitBranch(ArmEmitterContext context, Operand value, bool onNotZero)
+ {
+ OpCodeBImm op = (OpCodeBImm)context.CurrOp;
+
+ if (context.CurrBlock.Branch != null)
+ {
+ Operand lblTarget = context.GetLabel((ulong)op.Immediate);
+
+ if (onNotZero)
+ {
+ context.BranchIfTrue(lblTarget, value);
+ }
+ else
+ {
+ context.BranchIfFalse(lblTarget, value);
+ }
+
+ if (context.CurrBlock.Next == null)
+ {
+ context.Return(Const(op.Address + 4));
+ }
+ }
+ else
+ {
+ Operand lblTaken = Label();
+
+ if (onNotZero)
+ {
+ context.BranchIfTrue(lblTaken, value);
+ }
+ else
+ {
+ context.BranchIfFalse(lblTaken, value);
+ }
+
+ context.Return(Const(op.Address + 4));
+
+ context.MarkLabel(lblTaken);
+
+ context.Return(Const(op.Immediate));
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitFlow32.cs b/ARMeilleure/Instructions/InstEmitFlow32.cs
new file mode 100644
index 00000000..27addc78
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitFlow32.cs
@@ -0,0 +1,71 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void B(ArmEmitterContext context)
+ {
+ IOpCode32BImm op = (IOpCode32BImm)context.CurrOp;
+
+ if (context.CurrBlock.Branch != null)
+ {
+ context.Branch(context.GetLabel((ulong)op.Immediate));
+ }
+ else
+ {
+ context.StoreToContext();
+
+ context.Return(Const(op.Immediate));
+ }
+ }
+
+ public static void Bl(ArmEmitterContext context)
+ {
+ Blx(context, x: false);
+ }
+
+ public static void Blx(ArmEmitterContext context)
+ {
+ Blx(context, x: true);
+ }
+
+ public static void Bx(ArmEmitterContext context)
+ {
+ IOpCode32BReg op = (IOpCode32BReg)context.CurrOp;
+
+ context.StoreToContext();
+
+ EmitBxWritePc(context, GetIntA32(context, op.Rm));
+ }
+
+ private static void Blx(ArmEmitterContext context, bool x)
+ {
+ IOpCode32BImm op = (IOpCode32BImm)context.CurrOp;
+
+ uint pc = op.GetPc();
+
+ bool isThumb = IsThumb(context.CurrOp);
+
+ uint currentPc = isThumb
+ ? op.GetPc() | 1
+ : op.GetPc() - 4;
+
+ SetIntOrSP(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc));
+
+ // If x is true, then this is a branch with link and exchange.
+ // In this case we need to swap the mode between Arm <-> Thumb.
+ if (x)
+ {
+ SetFlag(context, PState.TFlag, Const(isThumb ? 0 : 1));
+ }
+
+ InstEmitFlowHelper.EmitCall(context, (ulong)op.Immediate);
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitFlowHelper.cs b/ARMeilleure/Instructions/InstEmitFlowHelper.cs
new file mode 100644
index 00000000..a8eb21d3
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitFlowHelper.cs
@@ -0,0 +1,192 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitFlowHelper
+ {
+ public const ulong CallFlag = 1;
+
+ public static void EmitCondBranch(ArmEmitterContext context, Operand target, Condition cond)
+ {
+ if (cond != Condition.Al)
+ {
+ context.BranchIfTrue(target, GetCondTrue(context, cond));
+ }
+ else
+ {
+ context.Branch(target);
+ }
+ }
+
+ public static Operand GetCondTrue(ArmEmitterContext context, Condition condition)
+ {
+ Operand cmpResult = context.TryGetComparisonResult(condition);
+
+ if (cmpResult != null)
+ {
+ return cmpResult;
+ }
+
+ Operand value = Const(1);
+
+ Operand Inverse(Operand val)
+ {
+ return context.BitwiseExclusiveOr(val, Const(1));
+ }
+
+ switch (condition)
+ {
+ case Condition.Eq:
+ value = GetFlag(PState.ZFlag);
+ break;
+
+ case Condition.Ne:
+ value = Inverse(GetFlag(PState.ZFlag));
+ break;
+
+ case Condition.GeUn:
+ value = GetFlag(PState.CFlag);
+ break;
+
+ case Condition.LtUn:
+ value = Inverse(GetFlag(PState.CFlag));
+ break;
+
+ case Condition.Mi:
+ value = GetFlag(PState.NFlag);
+ break;
+
+ case Condition.Pl:
+ value = Inverse(GetFlag(PState.NFlag));
+ break;
+
+ case Condition.Vs:
+ value = GetFlag(PState.VFlag);
+ break;
+
+ case Condition.Vc:
+ value = Inverse(GetFlag(PState.VFlag));
+ break;
+
+ case Condition.GtUn:
+ {
+ Operand c = GetFlag(PState.CFlag);
+ Operand z = GetFlag(PState.ZFlag);
+
+ value = context.BitwiseAnd(c, Inverse(z));
+
+ break;
+ }
+
+ case Condition.LeUn:
+ {
+ Operand c = GetFlag(PState.CFlag);
+ Operand z = GetFlag(PState.ZFlag);
+
+ value = context.BitwiseOr(Inverse(c), z);
+
+ break;
+ }
+
+ case Condition.Ge:
+ {
+ Operand n = GetFlag(PState.NFlag);
+ Operand v = GetFlag(PState.VFlag);
+
+ value = context.ICompareEqual(n, v);
+
+ break;
+ }
+
+ case Condition.Lt:
+ {
+ Operand n = GetFlag(PState.NFlag);
+ Operand v = GetFlag(PState.VFlag);
+
+ value = context.ICompareNotEqual(n, v);
+
+ break;
+ }
+
+ case Condition.Gt:
+ {
+ Operand n = GetFlag(PState.NFlag);
+ Operand z = GetFlag(PState.ZFlag);
+ Operand v = GetFlag(PState.VFlag);
+
+ value = context.BitwiseAnd(Inverse(z), context.ICompareEqual(n, v));
+
+ break;
+ }
+
+ case Condition.Le:
+ {
+ Operand n = GetFlag(PState.NFlag);
+ Operand z = GetFlag(PState.ZFlag);
+ Operand v = GetFlag(PState.VFlag);
+
+ value = context.BitwiseOr(z, context.ICompareNotEqual(n, v));
+
+ break;
+ }
+ }
+
+ return value;
+ }
+
+ public static void EmitCall(ArmEmitterContext context, ulong immediate)
+ {
+ context.Return(Const(immediate | CallFlag));
+ }
+
+ public static void EmitVirtualCall(ArmEmitterContext context, Operand target)
+ {
+ EmitVirtualCallOrJump(context, target, isJump: false);
+ }
+
+ public static void EmitVirtualJump(ArmEmitterContext context, Operand target)
+ {
+ EmitVirtualCallOrJump(context, target, isJump: true);
+ }
+
+ private static void EmitVirtualCallOrJump(ArmEmitterContext context, Operand target, bool isJump)
+ {
+ context.Return(context.BitwiseOr(target, Const(target.Type, (long)CallFlag)));
+ }
+
+ private static void EmitContinueOrReturnCheck(ArmEmitterContext context, Operand retVal)
+ {
+ // Note: The return value of the called method will be placed
+ // at the Stack, the return value is always a Int64 with the
+ // return address of the function. We check if the address is
+ // correct, if it isn't we keep returning until we reach the dispatcher.
+ ulong nextAddr = GetNextOpAddress(context.CurrOp);
+
+ if (context.CurrBlock.Next != null)
+ {
+ Operand lblContinue = Label();
+
+ context.BranchIfTrue(lblContinue, context.ICompareEqual(retVal, Const(nextAddr)));
+
+ context.Return(Const(nextAddr));
+
+ context.MarkLabel(lblContinue);
+ }
+ else
+ {
+ context.Return(Const(nextAddr));
+ }
+ }
+
+ private static ulong GetNextOpAddress(OpCode op)
+ {
+ return op.Address + (ulong)op.OpCodeSizeInBytes;
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitHash.cs b/ARMeilleure/Instructions/InstEmitHash.cs
new file mode 100644
index 00000000..0be8458e
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitHash.cs
@@ -0,0 +1,64 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Crc32b(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, new _U32_U32_U8(SoftFallback.Crc32b));
+ }
+
+ public static void Crc32h(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, new _U32_U32_U16(SoftFallback.Crc32h));
+ }
+
+ public static void Crc32w(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, new _U32_U32_U32(SoftFallback.Crc32w));
+ }
+
+ public static void Crc32x(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, new _U32_U32_U64(SoftFallback.Crc32x));
+ }
+
+ public static void Crc32cb(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, new _U32_U32_U8(SoftFallback.Crc32cb));
+ }
+
+ public static void Crc32ch(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, new _U32_U32_U16(SoftFallback.Crc32ch));
+ }
+
+ public static void Crc32cw(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, new _U32_U32_U32(SoftFallback.Crc32cw));
+ }
+
+ public static void Crc32cx(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, new _U32_U32_U64(SoftFallback.Crc32cx));
+ }
+
+ private static void EmitCrc32Call(ArmEmitterContext context, Delegate dlg)
+ {
+ OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand d = context.Call(dlg, n, m);
+
+ SetIntOrZR(context, op.Rd, d);
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitHelper.cs b/ARMeilleure/Instructions/InstEmitHelper.cs
new file mode 100644
index 00000000..02e104a4
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitHelper.cs
@@ -0,0 +1,218 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitHelper
+ {
+ public static bool IsThumb(OpCode op)
+ {
+ return op is OpCodeT16;
+ }
+
+ public static Operand GetExtendedM(ArmEmitterContext context, int rm, IntType type)
+ {
+ Operand value = GetIntOrZR(context, rm);
+
+ switch (type)
+ {
+ case IntType.UInt8: value = context.ZeroExtend8 (value.Type, value); break;
+ case IntType.UInt16: value = context.ZeroExtend16(value.Type, value); break;
+ case IntType.UInt32: value = context.ZeroExtend32(value.Type, value); break;
+
+ case IntType.Int8: value = context.SignExtend8 (value.Type, value); break;
+ case IntType.Int16: value = context.SignExtend16(value.Type, value); break;
+ case IntType.Int32: value = context.SignExtend32(value.Type, value); break;
+ }
+
+ return value;
+ }
+
+ public static Operand GetIntA32(ArmEmitterContext context, int regIndex)
+ {
+ if (regIndex == RegisterAlias.Aarch32Pc)
+ {
+ OpCode32 op = (OpCode32)context.CurrOp;
+
+ return Const((int)op.GetPc());
+ }
+ else
+ {
+ return GetIntOrSP(context, GetRegisterAlias(context.Mode, regIndex));
+ }
+ }
+
+ public static void SetIntA32(ArmEmitterContext context, int regIndex, Operand value)
+ {
+ if (regIndex == RegisterAlias.Aarch32Pc)
+ {
+ context.StoreToContext();
+
+ EmitBxWritePc(context, value);
+ }
+ else
+ {
+ SetIntOrSP(context, GetRegisterAlias(context.Mode, regIndex), value);
+ }
+ }
+
+ public static int GetRegisterAlias(Aarch32Mode mode, int regIndex)
+ {
+ // Only registers >= 8 are banked,
+ // with registers in the range [8, 12] being
+ // banked for the FIQ mode, and registers
+ // 13 and 14 being banked for all modes.
+ if ((uint)regIndex < 8)
+ {
+ return regIndex;
+ }
+
+ return GetBankedRegisterAlias(mode, regIndex);
+ }
+
+ public static int GetBankedRegisterAlias(Aarch32Mode mode, int regIndex)
+ {
+ switch (regIndex)
+ {
+ case 8: return mode == Aarch32Mode.Fiq
+ ? RegisterAlias.R8Fiq
+ : RegisterAlias.R8Usr;
+
+ case 9: return mode == Aarch32Mode.Fiq
+ ? RegisterAlias.R9Fiq
+ : RegisterAlias.R9Usr;
+
+ case 10: return mode == Aarch32Mode.Fiq
+ ? RegisterAlias.R10Fiq
+ : RegisterAlias.R10Usr;
+
+ case 11: return mode == Aarch32Mode.Fiq
+ ? RegisterAlias.R11Fiq
+ : RegisterAlias.R11Usr;
+
+ case 12: return mode == Aarch32Mode.Fiq
+ ? RegisterAlias.R12Fiq
+ : RegisterAlias.R12Usr;
+
+ case 13:
+ switch (mode)
+ {
+ case Aarch32Mode.User:
+ case Aarch32Mode.System: return RegisterAlias.SpUsr;
+ case Aarch32Mode.Fiq: return RegisterAlias.SpFiq;
+ case Aarch32Mode.Irq: return RegisterAlias.SpIrq;
+ case Aarch32Mode.Supervisor: return RegisterAlias.SpSvc;
+ case Aarch32Mode.Abort: return RegisterAlias.SpAbt;
+ case Aarch32Mode.Hypervisor: return RegisterAlias.SpHyp;
+ case Aarch32Mode.Undefined: return RegisterAlias.SpUnd;
+
+ default: throw new ArgumentException(nameof(mode));
+ }
+
+ case 14:
+ switch (mode)
+ {
+ case Aarch32Mode.User:
+ case Aarch32Mode.Hypervisor:
+ case Aarch32Mode.System: return RegisterAlias.LrUsr;
+ case Aarch32Mode.Fiq: return RegisterAlias.LrFiq;
+ case Aarch32Mode.Irq: return RegisterAlias.LrIrq;
+ case Aarch32Mode.Supervisor: return RegisterAlias.LrSvc;
+ case Aarch32Mode.Abort: return RegisterAlias.LrAbt;
+ case Aarch32Mode.Undefined: return RegisterAlias.LrUnd;
+
+ default: throw new ArgumentException(nameof(mode));
+ }
+
+ default: throw new ArgumentOutOfRangeException(nameof(regIndex));
+ }
+ }
+
+ public static void EmitBxWritePc(ArmEmitterContext context, Operand pc)
+ {
+ Operand mode = context.BitwiseAnd(pc, Const(1));
+
+ SetFlag(context, PState.TFlag, mode);
+
+ Operand lblArmMode = Label();
+
+ context.BranchIfTrue(lblArmMode, mode);
+
+ context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(pc, Const(~1))));
+
+ context.MarkLabel(lblArmMode);
+
+ context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(pc, Const(~3))));
+ }
+
+ public static Operand GetIntOrZR(ArmEmitterContext context, int regIndex)
+ {
+ if (regIndex == RegisterConsts.ZeroIndex)
+ {
+ OperandType type = context.CurrOp.GetOperandType();
+
+ return type == OperandType.I32 ? Const(0) : Const(0L);
+ }
+ else
+ {
+ return GetIntOrSP(context, regIndex);
+ }
+ }
+
+ public static void SetIntOrZR(ArmEmitterContext context, int regIndex, Operand value)
+ {
+ if (regIndex == RegisterConsts.ZeroIndex)
+ {
+ return;
+ }
+
+ SetIntOrSP(context, regIndex, value);
+ }
+
+ public static Operand GetIntOrSP(ArmEmitterContext context, int regIndex)
+ {
+ Operand value = Register(regIndex, RegisterType.Integer, OperandType.I64);
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ return value;
+ }
+
+ public static void SetIntOrSP(ArmEmitterContext context, int regIndex, Operand value)
+ {
+ Operand reg = Register(regIndex, RegisterType.Integer, OperandType.I64);
+
+ if (value.Type == OperandType.I32)
+ {
+ value = context.ZeroExtend32(OperandType.I64, value);
+ }
+
+ context.Copy(reg, value);
+ }
+
+ public static Operand GetVec(int regIndex)
+ {
+ return Register(regIndex, RegisterType.Vector, OperandType.V128);
+ }
+
+ public static Operand GetFlag(PState stateFlag)
+ {
+ return Register((int)stateFlag, RegisterType.Flag, OperandType.I32);
+ }
+
+ public static void SetFlag(ArmEmitterContext context, PState stateFlag, Operand value)
+ {
+ context.Copy(GetFlag(stateFlag), value);
+
+ context.MarkFlagSet(stateFlag);
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitMemory.cs b/ARMeilleure/Instructions/InstEmitMemory.cs
new file mode 100644
index 00000000..1d5953fb
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitMemory.cs
@@ -0,0 +1,177 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Adr(ArmEmitterContext context)
+ {
+ OpCodeAdr op = (OpCodeAdr)context.CurrOp;
+
+ SetIntOrZR(context, op.Rd, Const(op.Address + (ulong)op.Immediate));
+ }
+
+ public static void Adrp(ArmEmitterContext context)
+ {
+ OpCodeAdr op = (OpCodeAdr)context.CurrOp;
+
+ ulong address = (op.Address & ~0xfffUL) + ((ulong)op.Immediate << 12);
+
+ SetIntOrZR(context, op.Rd, Const(address));
+ }
+
+ public static void Ldr(ArmEmitterContext context) => EmitLdr(context, signed: false);
+ public static void Ldrs(ArmEmitterContext context) => EmitLdr(context, signed: true);
+
+ private static void EmitLdr(ArmEmitterContext context, bool signed)
+ {
+ OpCodeMem op = (OpCodeMem)context.CurrOp;
+
+ Operand address = GetAddress(context);
+
+ if (signed && op.Extend64)
+ {
+ EmitLoadSx64(context, address, op.Rt, op.Size);
+ }
+ else if (signed)
+ {
+ EmitLoadSx32(context, address, op.Rt, op.Size);
+ }
+ else
+ {
+ EmitLoadZx(context, address, op.Rt, op.Size);
+ }
+
+ EmitWBackIfNeeded(context, address);
+ }
+
+ public static void Ldr_Literal(ArmEmitterContext context)
+ {
+ IOpCodeLit op = (IOpCodeLit)context.CurrOp;
+
+ if (op.Prefetch)
+ {
+ return;
+ }
+
+ if (op.Signed)
+ {
+ EmitLoadSx64(context, Const(op.Immediate), op.Rt, op.Size);
+ }
+ else
+ {
+ EmitLoadZx(context, Const(op.Immediate), op.Rt, op.Size);
+ }
+ }
+
+ public static void Ldp(ArmEmitterContext context)
+ {
+ OpCodeMemPair op = (OpCodeMemPair)context.CurrOp;
+
+ void EmitLoad(int rt, Operand ldAddr)
+ {
+ if (op.Extend64)
+ {
+ EmitLoadSx64(context, ldAddr, rt, op.Size);
+ }
+ else
+ {
+ EmitLoadZx(context, ldAddr, rt, op.Size);
+ }
+ }
+
+ Operand address = GetAddress(context);
+
+ Operand address2 = context.Add(address, Const(1L << op.Size));
+
+ EmitLoad(op.Rt, address);
+ EmitLoad(op.Rt2, address2);
+
+ EmitWBackIfNeeded(context, address);
+ }
+
+ public static void Str(ArmEmitterContext context)
+ {
+ OpCodeMem op = (OpCodeMem)context.CurrOp;
+
+ Operand address = GetAddress(context);
+
+ InstEmitMemoryHelper.EmitStore(context, address, op.Rt, op.Size);
+
+ EmitWBackIfNeeded(context, address);
+ }
+
+ public static void Stp(ArmEmitterContext context)
+ {
+ OpCodeMemPair op = (OpCodeMemPair)context.CurrOp;
+
+ Operand address = GetAddress(context);
+
+ Operand address2 = context.Add(address, Const(1L << op.Size));
+
+ InstEmitMemoryHelper.EmitStore(context, address, op.Rt, op.Size);
+ InstEmitMemoryHelper.EmitStore(context, address2, op.Rt2, op.Size);
+
+ EmitWBackIfNeeded(context, address);
+ }
+
+ private static Operand GetAddress(ArmEmitterContext context)
+ {
+ Operand address = null;
+
+ switch (context.CurrOp)
+ {
+ case OpCodeMemImm op:
+ {
+ address = context.Copy(GetIntOrSP(context, op.Rn));
+
+ // Pre-indexing.
+ if (!op.PostIdx)
+ {
+ address = context.Add(address, Const(op.Immediate));
+ }
+
+ break;
+ }
+
+ case OpCodeMemReg op:
+ {
+ Operand n = GetIntOrSP(context, op.Rn);
+
+ Operand m = GetExtendedM(context, op.Rm, op.IntType);
+
+ if (op.Shift)
+ {
+ m = context.ShiftLeft(m, Const(op.Size));
+ }
+
+ address = context.Add(n, m);
+
+ break;
+ }
+ }
+
+ return address;
+ }
+
+ private static void EmitWBackIfNeeded(ArmEmitterContext context, Operand address)
+ {
+ // Check whenever the current OpCode has post-indexed write back, if so write it.
+ if (context.CurrOp is OpCodeMemImm op && op.WBack)
+ {
+ if (op.PostIdx)
+ {
+ address = context.Add(address, Const(op.Immediate));
+ }
+
+ SetIntOrSP(context, op.Rn, address);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitMemory32.cs b/ARMeilleure/Instructions/InstEmitMemory32.cs
new file mode 100644
index 00000000..002d2c5c
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitMemory32.cs
@@ -0,0 +1,256 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ private const int ByteSizeLog2 = 0;
+ private const int HWordSizeLog2 = 1;
+ private const int WordSizeLog2 = 2;
+ private const int DWordSizeLog2 = 3;
+
+ [Flags]
+ enum AccessType
+ {
+ Store = 0,
+ Signed = 1,
+ Load = 2,
+
+ LoadZx = Load,
+ LoadSx = Load | Signed,
+ }
+
+ public static void Ldm(ArmEmitterContext context)
+ {
+ OpCode32MemMult op = (OpCode32MemMult)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+
+ Operand baseAddress = context.Add(n, Const(op.Offset));
+
+ bool writesToPc = (op.RegisterMask & (1 << RegisterAlias.Aarch32Pc)) != 0;
+
+ bool writeBack = op.PostOffset != 0 && (op.Rn != RegisterAlias.Aarch32Pc || !writesToPc);
+
+ if (writeBack)
+ {
+ SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset)));
+ }
+
+ int mask = op.RegisterMask;
+ int offset = 0;
+
+ for (int register = 0; mask != 0; mask >>= 1, register++)
+ {
+ if ((mask & 1) != 0)
+ {
+ Operand address = context.Add(baseAddress, Const(offset));
+
+ EmitLoadZx(context, address, register, WordSizeLog2);
+
+ offset += 4;
+ }
+ }
+ }
+
+ public static void Ldr(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, WordSizeLog2, AccessType.LoadZx);
+ }
+
+ public static void Ldrb(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx);
+ }
+
+ public static void Ldrd(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx);
+ }
+
+ public static void Ldrh(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx);
+ }
+
+ public static void Ldrsb(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, ByteSizeLog2, AccessType.LoadSx);
+ }
+
+ public static void Ldrsh(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, HWordSizeLog2, AccessType.LoadSx);
+ }
+
+ public static void Stm(ArmEmitterContext context)
+ {
+ OpCode32MemMult op = (OpCode32MemMult)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+
+ Operand baseAddress = context.Add(n, Const(op.Offset));
+
+ int mask = op.RegisterMask;
+ int offset = 0;
+
+ for (int register = 0; mask != 0; mask >>= 1, register++)
+ {
+ if ((mask & 1) != 0)
+ {
+ Operand address = context.Add(baseAddress, Const(offset));
+
+ EmitStore(context, address, register, WordSizeLog2);
+
+ // Note: If Rn is also specified on the register list,
+ // and Rn is the first register on this list, then the
+ // value that is written to memory is the unmodified value,
+ // before the write back. If it is on the list, but it's
+ // not the first one, then the value written to memory
+ // varies between CPUs.
+ if (offset == 0 && op.PostOffset != 0)
+ {
+ // Emit write back after the first write.
+ SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset)));
+ }
+
+ offset += 4;
+ }
+ }
+ }
+
+ public static void Str(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, WordSizeLog2, AccessType.Store);
+ }
+
+ public static void Strb(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, ByteSizeLog2, AccessType.Store);
+ }
+
+ public static void Strd(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, DWordSizeLog2, AccessType.Store);
+ }
+
+ public static void Strh(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, HWordSizeLog2, AccessType.Store);
+ }
+
+ private static void EmitLoadOrStore(ArmEmitterContext context, int size, AccessType accType)
+ {
+ OpCode32Mem op = (OpCode32Mem)context.CurrOp;
+
+ Operand n = context.Copy(GetIntA32(context, op.Rn));
+
+ Operand temp = null;
+
+ if (op.Index || op.WBack)
+ {
+ temp = op.Add
+ ? context.Add (n, Const(op.Immediate))
+ : context.Subtract(n, Const(op.Immediate));
+ }
+
+ if (op.WBack)
+ {
+ SetIntA32(context, op.Rn, temp);
+ }
+
+ Operand address;
+
+ if (op.Index)
+ {
+ address = temp;
+ }
+ else
+ {
+ address = n;
+ }
+
+ if ((accType & AccessType.Load) != 0)
+ {
+ void Load(int rt, int offs, int loadSize)
+ {
+ Operand addr = context.Add(address, Const(offs));
+
+ if ((accType & AccessType.Signed) != 0)
+ {
+ EmitLoadSx32(context, addr, rt, loadSize);
+ }
+ else
+ {
+ EmitLoadZx(context, addr, rt, loadSize);
+ }
+ }
+
+ if (size == DWordSizeLog2)
+ {
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ Load(op.Rt, 0, WordSizeLog2);
+ Load(op.Rt | 1, 4, WordSizeLog2);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ Load(op.Rt | 1, 0, WordSizeLog2);
+ Load(op.Rt, 4, WordSizeLog2);
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ Load(op.Rt, 0, size);
+ }
+ }
+ else
+ {
+ void Store(int rt, int offs, int storeSize)
+ {
+ Operand addr = context.Add(address, Const(offs));
+
+ EmitStore(context, addr, rt, storeSize);
+ }
+
+ if (size == DWordSizeLog2)
+ {
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ Store(op.Rt, 0, WordSizeLog2);
+ Store(op.Rt | 1, 4, WordSizeLog2);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ Store(op.Rt | 1, 0, WordSizeLog2);
+ Store(op.Rt, 4, WordSizeLog2);
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ Store(op.Rt, 0, size);
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitMemoryEx.cs b/ARMeilleure/Instructions/InstEmitMemoryEx.cs
new file mode 100644
index 00000000..bcca7619
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitMemoryEx.cs
@@ -0,0 +1,261 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ [Flags]
+ private enum AccessType
+ {
+ None = 0,
+ Ordered = 1,
+ Exclusive = 2,
+ OrderedEx = Ordered | Exclusive
+ }
+
+ public static void Clrex(ArmEmitterContext context)
+ {
+ context.Call(new _Void(NativeInterface.ClearExclusive));
+ }
+
+ public static void Dmb(ArmEmitterContext context) => EmitBarrier(context);
+ public static void Dsb(ArmEmitterContext context) => EmitBarrier(context);
+
+ public static void Ldar(ArmEmitterContext context) => EmitLdr(context, AccessType.Ordered);
+ public static void Ldaxr(ArmEmitterContext context) => EmitLdr(context, AccessType.OrderedEx);
+ public static void Ldxr(ArmEmitterContext context) => EmitLdr(context, AccessType.Exclusive);
+ public static void Ldxp(ArmEmitterContext context) => EmitLdp(context, AccessType.Exclusive);
+ public static void Ldaxp(ArmEmitterContext context) => EmitLdp(context, AccessType.OrderedEx);
+
+ private static void EmitLdr(ArmEmitterContext context, AccessType accType)
+ {
+ EmitLoadEx(context, accType, pair: false);
+ }
+
+ private static void EmitLdp(ArmEmitterContext context, AccessType accType)
+ {
+ EmitLoadEx(context, accType, pair: true);
+ }
+
+ private static void EmitLoadEx(ArmEmitterContext context, AccessType accType, bool pair)
+ {
+ OpCodeMemEx op = (OpCodeMemEx)context.CurrOp;
+
+ bool ordered = (accType & AccessType.Ordered) != 0;
+ bool exclusive = (accType & AccessType.Exclusive) != 0;
+
+ if (ordered)
+ {
+ EmitBarrier(context);
+ }
+
+ Operand address = context.Copy(GetIntOrSP(context, op.Rn));
+
+ if (pair)
+ {
+ // Exclusive loads should be atomic. For pairwise loads, we need to
+ // read all the data at once. For a 32-bits pairwise load, we do a
+ // simple 64-bits load, for a 128-bits load, we need to call a special
+ // method to read 128-bits atomically.
+ if (op.Size == 2)
+ {
+ Operand value = EmitLoad(context, address, exclusive, 3);
+
+ Operand valueLow = context.ConvertI64ToI32(value);
+
+ valueLow = context.ZeroExtend32(OperandType.I64, valueLow);
+
+ Operand valueHigh = context.ShiftRightUI(value, Const(32));
+
+ SetIntOrZR(context, op.Rt, valueLow);
+ SetIntOrZR(context, op.Rt2, valueHigh);
+ }
+ else if (op.Size == 3)
+ {
+ Operand value = EmitLoad(context, address, exclusive, 4);
+
+ Operand valueLow = context.VectorExtract(OperandType.I64, value, 0);
+ Operand valueHigh = context.VectorExtract(OperandType.I64, value, 1);
+
+ SetIntOrZR(context, op.Rt, valueLow);
+ SetIntOrZR(context, op.Rt2, valueHigh);
+ }
+ else
+ {
+ throw new InvalidOperationException($"Invalid load size of {1 << op.Size} bytes.");
+ }
+ }
+ else
+ {
+ // 8, 16, 32 or 64-bits (non-pairwise) load.
+ Operand value = EmitLoad(context, address, exclusive, op.Size);
+
+ SetIntOrZR(context, op.Rt, value);
+ }
+ }
+
+ private static Operand EmitLoad(
+ ArmEmitterContext context,
+ Operand address,
+ bool exclusive,
+ int size)
+ {
+ Delegate fallbackMethodDlg = null;
+
+ if (exclusive)
+ {
+ switch (size)
+ {
+ case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByteExclusive); break;
+ case 1: fallbackMethodDlg = new _U16_U64 (NativeInterface.ReadUInt16Exclusive); break;
+ case 2: fallbackMethodDlg = new _U32_U64 (NativeInterface.ReadUInt32Exclusive); break;
+ case 3: fallbackMethodDlg = new _U64_U64 (NativeInterface.ReadUInt64Exclusive); break;
+ case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128Exclusive); break;
+ }
+ }
+ else
+ {
+ switch (size)
+ {
+ case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByte); break;
+ case 1: fallbackMethodDlg = new _U16_U64 (NativeInterface.ReadUInt16); break;
+ case 2: fallbackMethodDlg = new _U32_U64 (NativeInterface.ReadUInt32); break;
+ case 3: fallbackMethodDlg = new _U64_U64 (NativeInterface.ReadUInt64); break;
+ case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128); break;
+ }
+ }
+
+ return context.Call(fallbackMethodDlg, address);
+ }
+
+ public static void Pfrm(ArmEmitterContext context)
+ {
+ // Memory Prefetch, execute as no-op.
+ }
+
+ public static void Stlr(ArmEmitterContext context) => EmitStr(context, AccessType.Ordered);
+ public static void Stlxr(ArmEmitterContext context) => EmitStr(context, AccessType.OrderedEx);
+ public static void Stxr(ArmEmitterContext context) => EmitStr(context, AccessType.Exclusive);
+ public static void Stxp(ArmEmitterContext context) => EmitStp(context, AccessType.Exclusive);
+ public static void Stlxp(ArmEmitterContext context) => EmitStp(context, AccessType.OrderedEx);
+
+ private static void EmitStr(ArmEmitterContext context, AccessType accType)
+ {
+ EmitStoreEx(context, accType, pair: false);
+ }
+
+ private static void EmitStp(ArmEmitterContext context, AccessType accType)
+ {
+ EmitStoreEx(context, accType, pair: true);
+ }
+
+ private static void EmitStoreEx(ArmEmitterContext context, AccessType accType, bool pair)
+ {
+ OpCodeMemEx op = (OpCodeMemEx)context.CurrOp;
+
+ bool ordered = (accType & AccessType.Ordered) != 0;
+ bool exclusive = (accType & AccessType.Exclusive) != 0;
+
+ if (ordered)
+ {
+ EmitBarrier(context);
+ }
+
+ Operand address = context.Copy(GetIntOrSP(context, op.Rn));
+
+ Operand t = GetIntOrZR(context, op.Rt);
+
+ Operand s = null;
+
+ if (pair)
+ {
+ Debug.Assert(op.Size == 2 || op.Size == 3, "Invalid size for pairwise store.");
+
+ Operand t2 = GetIntOrZR(context, op.Rt2);
+
+ Operand value;
+
+ if (op.Size == 2)
+ {
+ value = context.BitwiseOr(t, context.ShiftLeft(t2, Const(32)));
+ }
+ else /* if (op.Size == 3) */
+ {
+ value = context.VectorInsert(context.VectorZero(), t, 0);
+ value = context.VectorInsert(value, t2, 1);
+ }
+
+ s = EmitStore(context, address, value, exclusive, op.Size + 1);
+ }
+ else
+ {
+ s = EmitStore(context, address, t, exclusive, op.Size);
+ }
+
+ if (s != null)
+ {
+ // This is only needed for exclusive stores. The function returns 0
+ // when the store is successful, and 1 otherwise.
+ SetIntOrZR(context, op.Rs, s);
+ }
+ }
+
+ private static Operand EmitStore(
+ ArmEmitterContext context,
+ Operand address,
+ Operand value,
+ bool exclusive,
+ int size)
+ {
+ if (size < 3)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ Delegate fallbackMethodDlg = null;
+
+ if (exclusive)
+ {
+ switch (size)
+ {
+ case 0: fallbackMethodDlg = new _S32_U64_U8 (NativeInterface.WriteByteExclusive); break;
+ case 1: fallbackMethodDlg = new _S32_U64_U16 (NativeInterface.WriteUInt16Exclusive); break;
+ case 2: fallbackMethodDlg = new _S32_U64_U32 (NativeInterface.WriteUInt32Exclusive); break;
+ case 3: fallbackMethodDlg = new _S32_U64_U64 (NativeInterface.WriteUInt64Exclusive); break;
+ case 4: fallbackMethodDlg = new _S32_U64_V128(NativeInterface.WriteVector128Exclusive); break;
+ }
+
+ return context.Call(fallbackMethodDlg, address, value);
+ }
+ else
+ {
+ switch (size)
+ {
+ case 0: fallbackMethodDlg = new _Void_U64_U8 (NativeInterface.WriteByte); break;
+ case 1: fallbackMethodDlg = new _Void_U64_U16 (NativeInterface.WriteUInt16); break;
+ case 2: fallbackMethodDlg = new _Void_U64_U32 (NativeInterface.WriteUInt32); break;
+ case 3: fallbackMethodDlg = new _Void_U64_U64 (NativeInterface.WriteUInt64); break;
+ case 4: fallbackMethodDlg = new _Void_U64_V128(NativeInterface.WriteVector128); break;
+ }
+
+ context.Call(fallbackMethodDlg, address, value);
+
+ return null;
+ }
+ }
+
+ private static void EmitBarrier(ArmEmitterContext context)
+ {
+ // Note: This barrier is most likely not necessary, and probably
+ // doesn't make any difference since we need to do a ton of stuff
+ // (software MMU emulation) to read or write anything anyway.
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
new file mode 100644
index 00000000..0ae5e3f2
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
@@ -0,0 +1,512 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Memory;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitMemoryHelper
+ {
+ private enum Extension
+ {
+ Zx,
+ Sx32,
+ Sx64
+ }
+
+ public static void EmitLoadZx(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ EmitLoad(context, address, Extension.Zx, rt, size);
+ }
+
+ public static void EmitLoadSx32(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ EmitLoad(context, address, Extension.Sx32, rt, size);
+ }
+
+ public static void EmitLoadSx64(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ EmitLoad(context, address, Extension.Sx64, rt, size);
+ }
+
+ private static void EmitLoad(ArmEmitterContext context, Operand address, Extension ext, int rt, int size)
+ {
+ bool isSimd = IsSimd(context);
+
+ if ((uint)size > (isSimd ? 4 : 3))
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ if (isSimd)
+ {
+ EmitReadVector(context, address, context.VectorZero(), rt, 0, size);
+ }
+ else
+ {
+ EmitReadInt(context, address, rt, size);
+ }
+
+ if (!isSimd)
+ {
+ Operand value = GetIntOrZR(context, rt);
+
+ if (ext == Extension.Sx32 || ext == Extension.Sx64)
+ {
+ OperandType destType = ext == Extension.Sx64 ? OperandType.I64 : OperandType.I32;
+
+ switch (size)
+ {
+ case 0: value = context.SignExtend8 (destType, value); break;
+ case 1: value = context.SignExtend16(destType, value); break;
+ case 2: value = context.SignExtend32(destType, value); break;
+ }
+ }
+
+ SetIntOrZR(context, rt, value);
+ }
+ }
+
+ public static void EmitLoadSimd(
+ ArmEmitterContext context,
+ Operand address,
+ Operand vector,
+ int rt,
+ int elem,
+ int size)
+ {
+ EmitReadVector(context, address, vector, rt, elem, size);
+ }
+
+ public static void EmitStore(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ bool isSimd = IsSimd(context);
+
+ if ((uint)size > (isSimd ? 4 : 3))
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ if (isSimd)
+ {
+ EmitWriteVector(context, address, rt, 0, size);
+ }
+ else
+ {
+ EmitWriteInt(context, address, rt, size);
+ }
+ }
+
+ public static void EmitStoreSimd(
+ ArmEmitterContext context,
+ Operand address,
+ int rt,
+ int elem,
+ int size)
+ {
+ EmitWriteVector(context, address, rt, elem, size);
+ }
+
+ private static bool IsSimd(ArmEmitterContext context)
+ {
+ return context.CurrOp is IOpCodeSimd &&
+ !(context.CurrOp is OpCodeSimdMemMs ||
+ context.CurrOp is OpCodeSimdMemSs);
+ }
+
+ private static void EmitReadInt(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ Operand isUnalignedAddr = EmitAddressCheck(context, address, size);
+
+ Operand lblFastPath = Label();
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfFalse(lblFastPath, isUnalignedAddr);
+
+ context.MarkLabel(lblSlowPath);
+
+ EmitReadIntFallback(context, address, rt, size);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblFastPath);
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath);
+
+ Operand value = null;
+
+ switch (size)
+ {
+ case 0:
+ value = context.Load8(physAddr);
+ break;
+
+ case 1:
+ value = context.Load16(physAddr);
+ break;
+
+ case 2:
+ value = context.Load(OperandType.I32, physAddr);
+ break;
+
+ case 3:
+ value = context.Load(OperandType.I64, physAddr);
+ break;
+ }
+
+ SetInt(context, rt, value);
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitReadVector(
+ ArmEmitterContext context,
+ Operand address,
+ Operand vector,
+ int rt,
+ int elem,
+ int size)
+ {
+ Operand isUnalignedAddr = EmitAddressCheck(context, address, size);
+
+ Operand lblFastPath = Label();
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfFalse(lblFastPath, isUnalignedAddr);
+
+ context.MarkLabel(lblSlowPath);
+
+ EmitReadVectorFallback(context, address, vector, rt, elem, size);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblFastPath);
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath);
+
+ Operand value = null;
+
+ switch (size)
+ {
+ case 0:
+ value = context.VectorInsert8(vector, context.Load8(physAddr), elem);
+ break;
+
+ case 1:
+ value = context.VectorInsert16(vector, context.Load16(physAddr), elem);
+ break;
+
+ case 2:
+ value = context.VectorInsert(vector, context.Load(OperandType.I32, physAddr), elem);
+ break;
+
+ case 3:
+ value = context.VectorInsert(vector, context.Load(OperandType.I64, physAddr), elem);
+ break;
+
+ case 4:
+ value = context.Load(OperandType.V128, physAddr);
+ break;
+ }
+
+ context.Copy(GetVec(rt), value);
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static Operand VectorCreate(ArmEmitterContext context, Operand value)
+ {
+ return context.VectorInsert(context.VectorZero(), value, 0);
+ }
+
+ private static void EmitWriteInt(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ Operand isUnalignedAddr = EmitAddressCheck(context, address, size);
+
+ Operand lblFastPath = Label();
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfFalse(lblFastPath, isUnalignedAddr);
+
+ context.MarkLabel(lblSlowPath);
+
+ EmitWriteIntFallback(context, address, rt, size);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblFastPath);
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath);
+
+ Operand value = GetInt(context, rt);
+
+ if (size < 3 && value.Type == OperandType.I64)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ switch (size)
+ {
+ case 0: context.Store8 (physAddr, value); break;
+ case 1: context.Store16(physAddr, value); break;
+ case 2: context.Store (physAddr, value); break;
+ case 3: context.Store (physAddr, value); break;
+ }
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitWriteVector(
+ ArmEmitterContext context,
+ Operand address,
+ int rt,
+ int elem,
+ int size)
+ {
+ Operand isUnalignedAddr = EmitAddressCheck(context, address, size);
+
+ Operand lblFastPath = Label();
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfFalse(lblFastPath, isUnalignedAddr);
+
+ context.MarkLabel(lblSlowPath);
+
+ EmitWriteVectorFallback(context, address, rt, elem, size);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblFastPath);
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath);
+
+ Operand value = GetVec(rt);
+
+ switch (size)
+ {
+ case 0:
+ context.Store8(physAddr, context.VectorExtract8(value, elem));
+ break;
+
+ case 1:
+ context.Store16(physAddr, context.VectorExtract16(value, elem));
+ break;
+
+ case 2:
+ context.Store(physAddr, context.VectorExtract(OperandType.FP32, value, elem));
+ break;
+
+ case 3:
+ context.Store(physAddr, context.VectorExtract(OperandType.FP64, value, elem));
+ break;
+
+ case 4:
+ context.Store(physAddr, value);
+ break;
+ }
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static Operand EmitAddressCheck(ArmEmitterContext context, Operand address, int size)
+ {
+ long addressCheckMask = ~(context.Memory.AddressSpaceSize - 1);
+
+ addressCheckMask |= (1u << size) - 1;
+
+ return context.BitwiseAnd(address, Const(address.Type, addressCheckMask));
+ }
+
+ private static Operand EmitPtPointerLoad(ArmEmitterContext context, Operand address, Operand lblFallbackPath)
+ {
+ Operand pte = Const(context.Memory.PageTable.ToInt64());
+
+ int bit = MemoryManager.PageBits;
+
+ do
+ {
+ Operand addrPart = context.ShiftRightUI(address, Const(bit));
+
+ bit += context.Memory.PtLevelBits;
+
+ if (bit < context.Memory.AddressSpaceBits)
+ {
+ addrPart = context.BitwiseAnd(addrPart, Const(addrPart.Type, context.Memory.PtLevelMask));
+ }
+
+ Operand pteOffset = context.ShiftLeft(addrPart, Const(3));
+
+ if (pteOffset.Type == OperandType.I32)
+ {
+ pteOffset = context.ZeroExtend32(OperandType.I64, pteOffset);
+ }
+
+ Operand pteAddress = context.Add(pte, pteOffset);
+
+ pte = context.Load(OperandType.I64, pteAddress);
+ }
+ while (bit < context.Memory.AddressSpaceBits);
+
+ if (!context.Memory.HasWriteWatchSupport)
+ {
+ Operand hasFlagSet = context.BitwiseAnd(pte, Const((long)MemoryManager.PteFlagsMask));
+
+ context.BranchIfTrue(lblFallbackPath, hasFlagSet);
+ }
+
+ Operand pageOffset = context.BitwiseAnd(address, Const(address.Type, MemoryManager.PageMask));
+
+ if (pageOffset.Type == OperandType.I32)
+ {
+ pageOffset = context.ZeroExtend32(OperandType.I64, pageOffset);
+ }
+
+ Operand physAddr = context.Add(pte, pageOffset);
+
+ return physAddr;
+ }
+
+ private static void EmitReadIntFallback(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ Delegate fallbackMethodDlg = null;
+
+ switch (size)
+ {
+ case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByte); break;
+ case 1: fallbackMethodDlg = new _U16_U64(NativeInterface.ReadUInt16); break;
+ case 2: fallbackMethodDlg = new _U32_U64(NativeInterface.ReadUInt32); break;
+ case 3: fallbackMethodDlg = new _U64_U64(NativeInterface.ReadUInt64); break;
+ }
+
+ SetInt(context, rt, context.Call(fallbackMethodDlg, address));
+ }
+
+ private static void EmitReadVectorFallback(
+ ArmEmitterContext context,
+ Operand address,
+ Operand vector,
+ int rt,
+ int elem,
+ int size)
+ {
+ Delegate fallbackMethodDlg = null;
+
+ switch (size)
+ {
+ case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByte); break;
+ case 1: fallbackMethodDlg = new _U16_U64 (NativeInterface.ReadUInt16); break;
+ case 2: fallbackMethodDlg = new _U32_U64 (NativeInterface.ReadUInt32); break;
+ case 3: fallbackMethodDlg = new _U64_U64 (NativeInterface.ReadUInt64); break;
+ case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128); break;
+ }
+
+ Operand value = context.Call(fallbackMethodDlg, address);
+
+ switch (size)
+ {
+ case 0: value = context.VectorInsert8 (vector, value, elem); break;
+ case 1: value = context.VectorInsert16(vector, value, elem); break;
+ case 2: value = context.VectorInsert (vector, value, elem); break;
+ case 3: value = context.VectorInsert (vector, value, elem); break;
+ }
+
+ context.Copy(GetVec(rt), value);
+ }
+
+ private static void EmitWriteIntFallback(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ Delegate fallbackMethodDlg = null;
+
+ switch (size)
+ {
+ case 0: fallbackMethodDlg = new _Void_U64_U8 (NativeInterface.WriteByte); break;
+ case 1: fallbackMethodDlg = new _Void_U64_U16(NativeInterface.WriteUInt16); break;
+ case 2: fallbackMethodDlg = new _Void_U64_U32(NativeInterface.WriteUInt32); break;
+ case 3: fallbackMethodDlg = new _Void_U64_U64(NativeInterface.WriteUInt64); break;
+ }
+
+ Operand value = GetInt(context, rt);
+
+ if (size < 3 && value.Type == OperandType.I64)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ context.Call(fallbackMethodDlg, address, value);
+ }
+
+ private static void EmitWriteVectorFallback(
+ ArmEmitterContext context,
+ Operand address,
+ int rt,
+ int elem,
+ int size)
+ {
+ Delegate fallbackMethodDlg = null;
+
+ switch (size)
+ {
+ case 0: fallbackMethodDlg = new _Void_U64_U8 (NativeInterface.WriteByte); break;
+ case 1: fallbackMethodDlg = new _Void_U64_U16 (NativeInterface.WriteUInt16); break;
+ case 2: fallbackMethodDlg = new _Void_U64_U32 (NativeInterface.WriteUInt32); break;
+ case 3: fallbackMethodDlg = new _Void_U64_U64 (NativeInterface.WriteUInt64); break;
+ case 4: fallbackMethodDlg = new _Void_U64_V128(NativeInterface.WriteVector128); break;
+ }
+
+ Operand value = null;
+
+ if (size < 4)
+ {
+ switch (size)
+ {
+ case 0:
+ value = context.VectorExtract8(GetVec(rt), elem);
+ break;
+
+ case 1:
+ value = context.VectorExtract16(GetVec(rt), elem);
+ break;
+
+ case 2:
+ value = context.VectorExtract(OperandType.I32, GetVec(rt), elem);
+ break;
+
+ case 3:
+ value = context.VectorExtract(OperandType.I64, GetVec(rt), elem);
+ break;
+ }
+ }
+ else
+ {
+ value = GetVec(rt);
+ }
+
+ context.Call(fallbackMethodDlg, address, value);
+ }
+
+ private static Operand GetInt(ArmEmitterContext context, int rt)
+ {
+ return context.CurrOp is OpCode32 ? GetIntA32(context, rt) : GetIntOrZR(context, rt);
+ }
+
+ private static void SetInt(ArmEmitterContext context, int rt, Operand value)
+ {
+ if (context.CurrOp is OpCode32)
+ {
+ SetIntA32(context, rt, value);
+ }
+ else
+ {
+ SetIntOrZR(context, rt, value);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitMove.cs b/ARMeilleure/Instructions/InstEmitMove.cs
new file mode 100644
index 00000000..bf051f32
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitMove.cs
@@ -0,0 +1,41 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Movk(ArmEmitterContext context)
+ {
+ OpCodeMov op = (OpCodeMov)context.CurrOp;
+
+ OperandType type = op.GetOperandType();
+
+ Operand res = GetIntOrZR(context, op.Rd);
+
+ res = context.BitwiseAnd(res, Const(type, ~(0xffffL << op.Bit)));
+
+ res = context.BitwiseOr(res, Const(type, op.Immediate));
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ public static void Movn(ArmEmitterContext context)
+ {
+ OpCodeMov op = (OpCodeMov)context.CurrOp;
+
+ SetIntOrZR(context, op.Rd, Const(op.GetOperandType(), ~op.Immediate));
+ }
+
+ public static void Movz(ArmEmitterContext context)
+ {
+ OpCodeMov op = (OpCodeMov)context.CurrOp;
+
+ SetIntOrZR(context, op.Rd, Const(op.GetOperandType(), op.Immediate));
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitMul.cs b/ARMeilleure/Instructions/InstEmitMul.cs
new file mode 100644
index 00000000..65d11b30
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitMul.cs
@@ -0,0 +1,100 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Madd(ArmEmitterContext context) => EmitMul(context, isAdd: true);
+ public static void Msub(ArmEmitterContext context) => EmitMul(context, isAdd: false);
+
+ private static void EmitMul(ArmEmitterContext context, bool isAdd)
+ {
+ OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+ Operand a = GetIntOrZR(context, op.Ra);
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand res = context.Multiply(n, m);
+
+ res = isAdd ? context.Add(a, res) : context.Subtract(a, res);
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ public static void Smaddl(ArmEmitterContext context) => EmitMull(context, MullFlags.SignedAdd);
+ public static void Smsubl(ArmEmitterContext context) => EmitMull(context, MullFlags.SignedSubtract);
+ public static void Umaddl(ArmEmitterContext context) => EmitMull(context, MullFlags.Add);
+ public static void Umsubl(ArmEmitterContext context) => EmitMull(context, MullFlags.Subtract);
+
+ [Flags]
+ private enum MullFlags
+ {
+ Subtract = 0,
+ Add = 1 << 0,
+ Signed = 1 << 1,
+
+ SignedAdd = Signed | Add,
+ SignedSubtract = Signed | Subtract
+ }
+
+ private static void EmitMull(ArmEmitterContext context, MullFlags flags)
+ {
+ OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+ Operand GetExtendedRegister32(int index)
+ {
+ Operand value = GetIntOrZR(context, index);
+
+ if ((flags & MullFlags.Signed) != 0)
+ {
+ return context.SignExtend32(value.Type, value);
+ }
+ else
+ {
+ return context.ZeroExtend32(value.Type, value);
+ }
+ }
+
+ Operand a = GetIntOrZR(context, op.Ra);
+
+ Operand n = GetExtendedRegister32(op.Rn);
+ Operand m = GetExtendedRegister32(op.Rm);
+
+ Operand res = context.Multiply(n, m);
+
+ res = (flags & MullFlags.Add) != 0 ? context.Add(a, res) : context.Subtract(a, res);
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ public static void Smulh(ArmEmitterContext context)
+ {
+ OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand d = context.Multiply64HighSI(n, m);
+
+ SetIntOrZR(context, op.Rd, d);
+ }
+
+ public static void Umulh(ArmEmitterContext context)
+ {
+ OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand d = context.Multiply64HighUI(n, m);
+
+ SetIntOrZR(context, op.Rd, d);
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
new file mode 100644
index 00000000..44659e80
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
@@ -0,0 +1,3159 @@
+// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
+// https://www.agner.org/optimize/#vectorclass @ vectori128.h
+
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ using Func2I = Func<Operand, Operand, Operand>;
+
+ static partial class InstEmit
+ {
+ public static void Abs_S(ArmEmitterContext context)
+ {
+ EmitScalarUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+ }
+
+ public static void Abs_V(ArmEmitterContext context)
+ {
+ EmitVectorUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+ }
+
+ public static void Add_S(ArmEmitterContext context)
+ {
+ EmitScalarBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+
+ public static void Add_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(addInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Addhn_V(ArmEmitterContext context)
+ {
+ EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: false);
+ }
+
+ public static void Addp_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand ne0 = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+ Operand ne1 = EmitVectorExtractZx(context, op.Rn, 1, op.Size);
+
+ Operand res = context.Add(ne0, ne1);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size));
+ }
+
+ public static void Addp_V(ArmEmitterContext context)
+ {
+ EmitVectorPairwiseOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+
+ public static void Addv_V(ArmEmitterContext context)
+ {
+ EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+
+ public static void Cls_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ int eSize = 8 << op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ Operand de = context.Call(new _U64_U64_S32(SoftFallback.CountLeadingSigns), ne, Const(eSize));
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Clz_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ int eSize = 8 << op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ Operand de;
+
+ if (eSize == 64)
+ {
+ de = context.CountLeadingZeros(ne);
+ }
+ else
+ {
+ de = context.Call(new _U64_U64_S32(SoftFallback.CountLeadingZeros), ne, Const(eSize));
+ }
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Cnt_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
+
+ Operand de;
+
+ if (Optimizations.UsePopCnt)
+ {
+ de = context.AddIntrinsicLong(Intrinsic.X86Popcnt, ne);
+ }
+ else
+ {
+ de = context.Call(new _U64_U64(SoftFallback.CountSetBits8), ne);
+ }
+
+ res = EmitVectorInsert(context, res, de, index, 0);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Fabd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subss, GetVec(op.Rn), GetVec(op.Rm));
+
+ Operand mask = X86GetScalar(context, -0f);
+
+ res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subsd, GetVec(op.Rn), GetVec(op.Rm));
+
+ Operand mask = X86GetScalar(context, -0d);
+
+ res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ Operand res = EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2);
+
+ return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, res);
+ });
+ }
+ }
+
+ public static void Fabd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subps, GetVec(op.Rn), GetVec(op.Rm));
+
+ Operand mask = X86GetAllElements(context, -0f);
+
+ res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, GetVec(op.Rn), GetVec(op.Rm));
+
+ Operand mask = X86GetAllElements(context, -0d);
+
+ res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ Operand res = EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2);
+
+ return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, res);
+ });
+ }
+ }
+
+ public static void Fabs_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (op.Size == 0)
+ {
+ Operand mask = X86GetScalar(context, -0f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand mask = X86GetScalar(context, -0d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1);
+ });
+ }
+ }
+
+ public static void Fabs_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand mask = X86GetAllElements(context, -0f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, GetVec(op.Rn));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand mask = X86GetAllElements(context, -0d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1);
+ });
+ }
+ }
+
+ public static void Fadd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Addss, Intrinsic.X86Addsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Add(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, op1, op2);
+ });
+ }
+ }
+
+ public static void Fadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) => context.Add(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, op1, op2);
+ });
+ }
+ }
+
+ public static void Faddp_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseSse3)
+ {
+ if (sizeF == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Haddps, GetVec(op.Rn), GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Haddpd, GetVec(op.Rn), GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ OperandType type = sizeF != 0 ? OperandType.FP64
+ : OperandType.FP32;
+
+ Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1);
+
+ Operand res = EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, ne0, ne1);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+
+ public static void Faddp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorPairwiseOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, op1, op2);
+ });
+ }
+ }
+
+ public static void Fdiv_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Divss, Intrinsic.X86Divsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Divide(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPDiv, SoftFloat64.FPDiv, op1, op2);
+ });
+ }
+ }
+
+ public static void Fdiv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Divps, Intrinsic.X86Divpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) => context.Divide(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPDiv, SoftFloat64.FPDiv, op1, op2);
+ });
+ }
+ }
+
+ public static void Fmadd_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand a = GetVec(op.Ra);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.Size == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Addss, a, res);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Addsd, a, res);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmax_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Maxss, Intrinsic.X86Maxsd);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMax, SoftFloat64.FPMax, op1, op2);
+ });
+ }
+ }
+
+ public static void Fmax_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd);
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMax, SoftFloat64.FPMax, op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxnm_S(ArmEmitterContext context)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMaxNum, SoftFloat64.FPMaxNum, op1, op2);
+ });
+ }
+
+ public static void Fmaxnm_V(ArmEmitterContext context)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMaxNum, SoftFloat64.FPMaxNum, op1, op2);
+ });
+ }
+
+ public static void Fmaxp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorPairwiseOpF(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd);
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMax, SoftFloat64.FPMax, op1, op2);
+ });
+ }
+ }
+
+ public static void Fmin_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Minss, Intrinsic.X86Minsd);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMin, SoftFloat64.FPMin, op1, op2);
+ });
+ }
+ }
+
+ public static void Fmin_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Minps, Intrinsic.X86Minpd);
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMin, SoftFloat64.FPMin, op1, op2);
+ });
+ }
+ }
+
+ public static void Fminnm_S(ArmEmitterContext context)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMinNum, SoftFloat64.FPMinNum, op1, op2);
+ });
+ }
+
+ public static void Fminnm_V(ArmEmitterContext context)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMinNum, SoftFloat64.FPMinNum, op1, op2);
+ });
+ }
+
+ public static void Fminp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorPairwiseOpF(context, Intrinsic.X86Minps, Intrinsic.X86Minpd);
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMin, SoftFloat64.FPMin, op1, op2);
+ });
+ }
+ }
+
+ public static void Fmla_Se(ArmEmitterContext context) // Fused.
+ {
+ EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+
+ public static void Fmla_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorTernaryOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmla_Ve(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorTernaryOpByElemF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmls_Se(ArmEmitterContext context) // Fused.
+ {
+ EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+
+ public static void Fmls_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorTernaryOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmls_Ve(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorTernaryOpByElemF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmsub_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand a = GetVec(op.Ra);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.Size == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Subss, a, res);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, a, res);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmul_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2);
+ });
+ }
+ }
+
+ public static void Fmul_Se(ArmEmitterContext context)
+ {
+ EmitScalarBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+
+ public static void Fmul_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2);
+ });
+ }
+ }
+
+ public static void Fmul_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpByElemF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2);
+ });
+ }
+ }
+
+ public static void Fmulx_S(ArmEmitterContext context)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2);
+ });
+ }
+
+ public static void Fmulx_Se(ArmEmitterContext context)
+ {
+ EmitScalarBinaryOpByElemF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2);
+ });
+ }
+
+ public static void Fmulx_V(ArmEmitterContext context)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2);
+ });
+ }
+
+ public static void Fmulx_Ve(ArmEmitterContext context)
+ {
+ EmitVectorBinaryOpByElemF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2);
+ });
+ }
+
+ public static void Fneg_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (op.Size == 0)
+ {
+ Operand mask = X86GetScalar(context, -0f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand mask = X86GetScalar(context, -0d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Fneg_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand mask = X86GetAllElements(context, -0f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand mask = X86GetAllElements(context, -0d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Fnmadd_S(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64
+ : OperandType.FP32;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), 0);
+ Operand ae = context.VectorExtract(type, GetVec(op.Ra), 0);
+
+ Operand res = context.Subtract(context.Multiply(context.Negate(ne), me), ae);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+
+ public static void Fnmsub_S(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64
+ : OperandType.FP32;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), 0);
+ Operand ae = context.VectorExtract(type, GetVec(op.Ra), 0);
+
+ Operand res = context.Subtract(context.Multiply(ne, me), ae);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+
+ public static void Fnmul_S(ArmEmitterContext context)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Negate(context.Multiply(op1, op2)));
+ }
+
+ public static void Frecpe_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
+ {
+ EmitScalarUnaryOpF(context, Intrinsic.X86Rcpss, 0);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPRecipEstimate, SoftFloat64.FPRecipEstimate, op1);
+ });
+ }
+ }
+
+ public static void Frecpe_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
+ {
+ EmitVectorUnaryOpF(context, Intrinsic.X86Rcpps, 0);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPRecipEstimate, SoftFloat64.FPRecipEstimate, op1);
+ });
+ }
+ }
+
+ public static void Frecps_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand mask = X86GetScalar(context, 2f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = context.AddIntrinsic(Intrinsic.X86Subss, mask, res);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand mask = X86GetScalar(context, 2d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, mask, res);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPRecipStepFused, SoftFloat64.FPRecipStepFused, op1, op2);
+ });
+ }
+ }
+
+ public static void Frecps_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand mask = X86GetAllElements(context, 2f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = context.AddIntrinsic(Intrinsic.X86Subps, mask, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand mask = X86GetAllElements(context, 2d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, mask, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPRecipStepFused, SoftFloat64.FPRecipStepFused, op1, op2);
+ });
+ }
+ }
+
+ public static void Frecpx_S(ArmEmitterContext context)
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPRecpX, SoftFloat64.FPRecpX, op1);
+ });
+ }
+
+ public static void Frinta_S(ArmEmitterContext context)
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1);
+ });
+ }
+
+ public static void Frinta_V(ArmEmitterContext context)
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1);
+ });
+ }
+
+ public static void Frinti_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ if (op.Size == 0)
+ {
+ return context.Call(new _F32_F32(SoftFallback.RoundF), op1);
+ }
+ else /* if (op.Size == 1) */
+ {
+ return context.Call(new _F64_F64(SoftFallback.Round), op1);
+ }
+ });
+ }
+
+ public static void Frinti_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ if (sizeF == 0)
+ {
+ return context.Call(new _F32_F32(SoftFallback.RoundF), op1);
+ }
+ else /* if (sizeF == 1) */
+ {
+ return context.Call(new _F64_F64(SoftFallback.Round), op1);
+ }
+ });
+ }
+
+ public static void Frintm_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1);
+ });
+ }
+ }
+
+ public static void Frintm_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitVectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1);
+ });
+ }
+ }
+
+ public static void Frintn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitScalarRoundOpF(context, FPRoundingMode.ToNearest);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.ToEven, op1);
+ });
+ }
+ }
+
+ public static void Frintn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitVectorRoundOpF(context, FPRoundingMode.ToNearest);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.ToEven, op1);
+ });
+ }
+ }
+
+ public static void Frintp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1);
+ });
+ }
+ }
+
+ public static void Frintp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitVectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1);
+ });
+ }
+ }
+
+ public static void Frintx_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ if (op.Size == 0)
+ {
+ return context.Call(new _F32_F32(SoftFallback.RoundF), op1);
+ }
+ else /* if (op.Size == 1) */
+ {
+ return context.Call(new _F64_F64(SoftFallback.Round), op1);
+ }
+ });
+ }
+
+ public static void Frintx_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ if (sizeF == 0)
+ {
+ return context.Call(new _F32_F32(SoftFallback.RoundF), op1);
+ }
+ else /* if (sizeF == 1) */
+ {
+ return context.Call(new _F64_F64(SoftFallback.Round), op1);
+ }
+ });
+ }
+
+ public static void Frintz_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitScalarRoundOpF(context, FPRoundingMode.TowardsZero);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1);
+ });
+ }
+ }
+
+ public static void Frintz_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitVectorRoundOpF(context, FPRoundingMode.TowardsZero);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1);
+ });
+ }
+ }
+
+ public static void Frsqrte_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
+ {
+ EmitScalarUnaryOpF(context, Intrinsic.X86Rsqrtss, 0);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtEstimate, SoftFloat64.FPRSqrtEstimate, op1);
+ });
+ }
+ }
+
+ public static void Frsqrte_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
+ {
+ EmitVectorUnaryOpF(context, Intrinsic.X86Rsqrtps, 0);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtEstimate, SoftFloat64.FPRSqrtEstimate, op1);
+ });
+ }
+ }
+
+ public static void Frsqrts_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand maskHalf = X86GetScalar(context, 0.5f);
+ Operand maskThree = X86GetScalar(context, 3f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = context.AddIntrinsic(Intrinsic.X86Subss, maskThree, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, maskHalf, res);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand maskHalf = X86GetScalar(context, 0.5d);
+ Operand maskThree = X86GetScalar(context, 3d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, maskThree, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, maskHalf, res);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtStepFused, SoftFloat64.FPRSqrtStepFused, op1, op2);
+ });
+ }
+ }
+
+ public static void Frsqrts_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand maskHalf = X86GetAllElements(context, 0.5f);
+ Operand maskThree = X86GetAllElements(context, 3f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand maskHalf = X86GetAllElements(context, 0.5d);
+ Operand maskThree = X86GetAllElements(context, 3d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtStepFused, SoftFloat64.FPRSqrtStepFused, op1, op2);
+ });
+ }
+ }
+
+ public static void Fsqrt_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarUnaryOpF(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPSqrt, SoftFloat64.FPSqrt, op1);
+ });
+ }
+ }
+
+ public static void Fsqrt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpF(context, Intrinsic.X86Sqrtps, Intrinsic.X86Sqrtpd);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPSqrt, SoftFloat64.FPSqrt, op1);
+ });
+ }
+ }
+
+ public static void Fsub_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Subss, Intrinsic.X86Subsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2);
+ });
+ }
+ }
+
+ public static void Fsub_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Subps, Intrinsic.X86Subpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2);
+ });
+ }
+ }
+
+ public static void Mla_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Mul_AddSub(context, AddSub.Add);
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Mla_Ve(ArmEmitterContext context)
+ {
+ EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+
+ public static void Mls_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Mul_AddSub(context, AddSub.Subtract);
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Mls_Ve(ArmEmitterContext context)
+ {
+ EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+
+ public static void Mul_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Mul_AddSub(context, AddSub.None);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Mul_Ve(ArmEmitterContext context)
+ {
+ EmitVectorBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+
+ public static void Neg_S(ArmEmitterContext context)
+ {
+ EmitScalarUnaryOpSx(context, (op1) => context.Negate(op1));
+ }
+
+ public static void Neg_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(subInst, context.VectorZero(), GetVec(op.Rn));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorUnaryOpSx(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Raddhn_V(ArmEmitterContext context)
+ {
+ EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: true);
+ }
+
+ public static void Rsubhn_V(ArmEmitterContext context)
+ {
+ EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: true);
+ }
+
+ public static void Saba_V(ArmEmitterContext context)
+ {
+ EmitVectorTernaryOpSx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+ });
+ }
+
+ public static void Sabal_V(ArmEmitterContext context)
+ {
+ EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+ });
+ }
+
+ public static void Sabd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ EmitSse41Sabd(context, op, n, m, isLong: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) =>
+ {
+ return EmitAbs(context, context.Subtract(op1, op2));
+ });
+ }
+ }
+
+ public static void Sabdl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = op.Size == 0
+ ? Intrinsic.X86Pmovsxbw
+ : Intrinsic.X86Pmovsxwd;
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ EmitSse41Sabd(context, op, n, m, isLong: true);
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) =>
+ {
+ return EmitAbs(context, context.Subtract(op1, op2));
+ });
+ }
+ }
+
+ public static void Sadalp_V(ArmEmitterContext context)
+ {
+ EmitAddLongPairwise(context, signed: true, accumulate: true);
+ }
+
+ public static void Saddl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Saddlp_V(ArmEmitterContext context)
+ {
+ EmitAddLongPairwise(context, signed: true, accumulate: false);
+ }
+
+ public static void Saddlv_V(ArmEmitterContext context)
+ {
+ EmitVectorLongAcrossVectorOpSx(context, (op1, op2) => context.Add(op1, op2));
+ }
+
+ public static void Saddw_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Shadd_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+ Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psraw : Intrinsic.X86Psrad;
+
+ res2 = context.AddIntrinsic(shiftInst, res2, Const(1));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, res2);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) =>
+ {
+ return context.ShiftRightSI(context.Add(op1, op2), Const(1));
+ });
+ }
+ }
+
+ public static void Shsub_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ Operand nPlusMask = context.AddIntrinsic(addInst, n, mask);
+ Operand mPlusMask = context.AddIntrinsic(addInst, m, mask);
+
+ Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+ Operand res = context.AddIntrinsic(avgInst, nPlusMask, mPlusMask);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ res = context.AddIntrinsic(subInst, nPlusMask, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) =>
+ {
+ return context.ShiftRightSI(context.Subtract(op1, op2), Const(1));
+ });
+ }
+ }
+
+ public static void Smax_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic maxInst = X86PmaxsInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(maxInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Delegate dlg = new _S64_S64_S64(Math.Max);
+
+ EmitVectorBinaryOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+ }
+
+ public static void Smaxp_V(ArmEmitterContext context)
+ {
+ Delegate dlg = new _S64_S64_S64(Math.Max);
+
+ EmitVectorPairwiseOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+
+ public static void Smaxv_V(ArmEmitterContext context)
+ {
+ Delegate dlg = new _S64_S64_S64(Math.Max);
+
+ EmitVectorAcrossVectorOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+
+ public static void Smin_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic minInst = X86PminsInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(minInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Delegate dlg = new _S64_S64_S64(Math.Min);
+
+ EmitVectorBinaryOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+ }
+
+ public static void Sminp_V(ArmEmitterContext context)
+ {
+ Delegate dlg = new _S64_S64_S64(Math.Min);
+
+ EmitVectorPairwiseOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+
+ public static void Sminv_V(ArmEmitterContext context)
+ {
+ Delegate dlg = new _S64_S64_S64(Math.Min);
+
+ EmitVectorAcrossVectorOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+
+ public static void Smlal_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+ Operand res = context.AddIntrinsic(mullInst, n, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(d, context.AddIntrinsic(addInst, d, res));
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Smlal_Ve(ArmEmitterContext context)
+ {
+ EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+
+ public static void Smlsl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = op.Size == 0
+ ? Intrinsic.X86Pmovsxbw
+ : Intrinsic.X86Pmovsxwd;
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+ Operand res = context.AddIntrinsic(mullInst, n, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(d, context.AddIntrinsic(subInst, d, res));
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Smlsl_Ve(ArmEmitterContext context)
+ {
+ EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+
+ public static void Smull_V(ArmEmitterContext context)
+ {
+ EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+
+ public static void Smull_Ve(ArmEmitterContext context)
+ {
+ EmitVectorWidenBinaryOpByElemSx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+
+ public static void Sqabs_S(ArmEmitterContext context)
+ {
+ EmitScalarSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+ }
+
+ public static void Sqabs_V(ArmEmitterContext context)
+ {
+ EmitVectorSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+ }
+
+ public static void Sqadd_S(ArmEmitterContext context)
+ {
+ EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Add);
+ }
+
+ public static void Sqadd_V(ArmEmitterContext context)
+ {
+ EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Add);
+ }
+
+ public static void Sqdmulh_S(ArmEmitterContext context)
+ {
+ EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false), SaturatingFlags.ScalarSx);
+ }
+
+ public static void Sqdmulh_V(ArmEmitterContext context)
+ {
+ EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false), SaturatingFlags.VectorSx);
+ }
+
+ public static void Sqneg_S(ArmEmitterContext context)
+ {
+ EmitScalarSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
+ }
+
+ public static void Sqneg_V(ArmEmitterContext context)
+ {
+ EmitVectorSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
+ }
+
+ public static void Sqrdmulh_S(ArmEmitterContext context)
+ {
+ EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true), SaturatingFlags.ScalarSx);
+ }
+
+ public static void Sqrdmulh_V(ArmEmitterContext context)
+ {
+ EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true), SaturatingFlags.VectorSx);
+ }
+
+ public static void Sqsub_S(ArmEmitterContext context)
+ {
+ EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Sub);
+ }
+
+ public static void Sqsub_V(ArmEmitterContext context)
+ {
+ EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Sub);
+ }
+
+ public static void Sqxtn_S(ArmEmitterContext context)
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx);
+ }
+
+ public static void Sqxtn_V(ArmEmitterContext context)
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx);
+ }
+
+ public static void Sqxtun_S(ArmEmitterContext context)
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx);
+ }
+
+ public static void Sqxtun_V(ArmEmitterContext context)
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx);
+ }
+
+ public static void Srhadd_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u));
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ Operand nMinusMask = context.AddIntrinsic(subInst, n, mask);
+ Operand mMinusMask = context.AddIntrinsic(subInst, m, mask);
+
+ Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+ Operand res = context.AddIntrinsic(avgInst, nMinusMask, mMinusMask);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, mask, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) =>
+ {
+ Operand res = context.Add(op1, op2);
+
+ res = context.Add(res, Const(1L));
+
+ return context.ShiftRightSI(res, Const(1));
+ });
+ }
+ }
+
+ public static void Ssubl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Ssubw_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Sub_S(ArmEmitterContext context)
+ {
+ EmitScalarBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+
+ public static void Sub_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(subInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Subhn_V(ArmEmitterContext context)
+ {
+ EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: false);
+ }
+
+ public static void Suqadd_S(ArmEmitterContext context)
+ {
+ EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Accumulate);
+ }
+
+ public static void Suqadd_V(ArmEmitterContext context)
+ {
+ EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Accumulate);
+ }
+
+ public static void Uaba_V(ArmEmitterContext context)
+ {
+ EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+ });
+ }
+
+ public static void Uabal_V(ArmEmitterContext context)
+ {
+ EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+ });
+ }
+
+ public static void Uabd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ EmitSse41Uabd(context, op, n, m, isLong: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return EmitAbs(context, context.Subtract(op1, op2));
+ });
+ }
+ }
+
+ public static void Uabdl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = op.Size == 0
+ ? Intrinsic.X86Pmovzxbw
+ : Intrinsic.X86Pmovzxwd;
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ EmitSse41Uabd(context, op, n, m, isLong: true);
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) =>
+ {
+ return EmitAbs(context, context.Subtract(op1, op2));
+ });
+ }
+ }
+
+ public static void Uadalp_V(ArmEmitterContext context)
+ {
+ EmitAddLongPairwise(context, signed: false, accumulate: true);
+ }
+
+ public static void Uaddl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Uaddlp_V(ArmEmitterContext context)
+ {
+ EmitAddLongPairwise(context, signed: false, accumulate: false);
+ }
+
+ public static void Uaddlv_V(ArmEmitterContext context)
+ {
+ EmitVectorLongAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+
+ public static void Uaddw_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Uhadd_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+ Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psrlw : Intrinsic.X86Psrld;
+
+ res2 = context.AddIntrinsic(shiftInst, res2, Const(1));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, res2);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return context.ShiftRightUI(context.Add(op1, op2), Const(1));
+ });
+ }
+ }
+
+ public static void Uhsub_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+ Operand res = context.AddIntrinsic(avgInst, n, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ res = context.AddIntrinsic(subInst, n, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return context.ShiftRightUI(context.Subtract(op1, op2), Const(1));
+ });
+ }
+ }
+
+ public static void Umax_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic maxInst = X86PmaxuInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(maxInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Delegate dlg = new _U64_U64_U64(Math.Max);
+
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+ }
+
+ public static void Umaxp_V(ArmEmitterContext context)
+ {
+ Delegate dlg = new _U64_U64_U64(Math.Max);
+
+ EmitVectorPairwiseOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+
+ public static void Umaxv_V(ArmEmitterContext context)
+ {
+ Delegate dlg = new _U64_U64_U64(Math.Max);
+
+ EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+
+ public static void Umin_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic minInst = X86PminuInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(minInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Delegate dlg = new _U64_U64_U64(Math.Min);
+
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+ }
+
+ public static void Uminp_V(ArmEmitterContext context)
+ {
+ Delegate dlg = new _U64_U64_U64(Math.Min);
+
+ EmitVectorPairwiseOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+
+ public static void Uminv_V(ArmEmitterContext context)
+ {
+ Delegate dlg = new _U64_U64_U64(Math.Min);
+
+ EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+
+ public static void Umlal_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+ Operand res = context.AddIntrinsic(mullInst, n, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(d, context.AddIntrinsic(addInst, d, res));
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Umlal_Ve(ArmEmitterContext context)
+ {
+ EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+
+ public static void Umlsl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = op.Size == 0
+ ? Intrinsic.X86Pmovzxbw
+ : Intrinsic.X86Pmovzxwd;
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+ Operand res = context.AddIntrinsic(mullInst, n, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(d, context.AddIntrinsic(subInst, d, res));
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Umlsl_Ve(ArmEmitterContext context)
+ {
+ EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+
+ public static void Umull_V(ArmEmitterContext context)
+ {
+ EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+
+ public static void Umull_Ve(ArmEmitterContext context)
+ {
+ EmitVectorWidenBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+
+ public static void Uqadd_S(ArmEmitterContext context)
+ {
+ EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add);
+ }
+
+ public static void Uqadd_V(ArmEmitterContext context)
+ {
+ EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add);
+ }
+
+ public static void Uqsub_S(ArmEmitterContext context)
+ {
+ EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
+ }
+
+ public static void Uqsub_V(ArmEmitterContext context)
+ {
+ EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
+ }
+
+ public static void Uqxtn_S(ArmEmitterContext context)
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx);
+ }
+
+ public static void Uqxtn_V(ArmEmitterContext context)
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx);
+ }
+
+ public static void Urhadd_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+ Operand res = context.AddIntrinsic(avgInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ Operand res = context.Add(op1, op2);
+
+ res = context.Add(res, Const(1L));
+
+ return context.ShiftRightUI(res, Const(1));
+ });
+ }
+ }
+
+ public static void Usqadd_S(ArmEmitterContext context)
+ {
+ EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
+ }
+
+ public static void Usqadd_V(ArmEmitterContext context)
+ {
+ EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
+ }
+
+ public static void Usubl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Usubw_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ private static Operand EmitAbs(ArmEmitterContext context, Operand value)
+ {
+ Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0));
+
+ return context.ConditionalSelect(isPositive, value, context.Negate(value));
+ }
+
+ private static void EmitAddLongPairwise(ArmEmitterContext context, bool signed, bool accumulate)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand ne0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed);
+ Operand ne1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed);
+
+ Operand e = context.Add(ne0, ne1);
+
+ if (accumulate)
+ {
+ Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
+
+ e = context.Add(e, de);
+ }
+
+ res = EmitVectorInsert(context, res, e, index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static Operand EmitDoublingMultiplyHighHalf(
+ ArmEmitterContext context,
+ Operand n,
+ Operand m,
+ bool round)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int eSize = 8 << op.Size;
+
+ Operand res = context.Multiply(n, m);
+
+ if (!round)
+ {
+ res = context.ShiftRightSI(res, Const(eSize - 1));
+ }
+ else
+ {
+ long roundConst = 1L << (eSize - 1);
+
+ res = context.ShiftLeft(res, Const(1));
+
+ res = context.Add(res, Const(roundConst));
+
+ res = context.ShiftRightSI(res, Const(eSize));
+
+ Operand isIntMin = context.ICompareEqual(res, Const((long)int.MinValue));
+
+ res = context.ConditionalSelect(isIntMin, context.Negate(res), res);
+ }
+
+ return res;
+ }
+
+ private static void EmitHighNarrow(ArmEmitterContext context, Func2I emit, bool round)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int elems = 8 >> op.Size;
+ int eSize = 8 << op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
+
+ long roundConst = 1L << (eSize - 1);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size + 1);
+
+ Operand de = emit(ne, me);
+
+ if (round)
+ {
+ de = context.Add(de, Const(roundConst));
+ }
+
+ de = context.ShiftRightUI(de, Const(eSize));
+
+ res = EmitVectorInsert(context, res, de, part + index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitScalarRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundsd : Intrinsic.X86Roundss;
+
+ Operand res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode)));
+
+ if ((op.Size & 1) != 0)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+ else
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundpd : Intrinsic.X86Roundps;
+
+ Operand res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode)));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private enum AddSub
+ {
+ None,
+ Add,
+ Subtract
+ }
+
+ private static void EmitSse41Mul_AddSub(ArmEmitterContext context, AddSub addSub)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = null;
+
+ if (op.Size == 0)
+ {
+ Operand ns8 = context.AddIntrinsic(Intrinsic.X86Psrlw, n, Const(8));
+ Operand ms8 = context.AddIntrinsic(Intrinsic.X86Psrlw, m, Const(8));
+
+ res = context.AddIntrinsic(Intrinsic.X86Pmullw, ns8, ms8);
+
+ res = context.AddIntrinsic(Intrinsic.X86Psllw, res, Const(8));
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m);
+
+ Operand mask = X86GetAllElements(context, 0x00FF00FF);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pblendvb, res, res2, mask);
+ }
+ else if (op.Size == 1)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Pmulld, n, m);
+ }
+
+ Operand d = GetVec(op.Rd);
+
+ if (addSub == AddSub.Add)
+ {
+ switch (op.Size)
+ {
+ case 0: res = context.AddIntrinsic(Intrinsic.X86Paddb, d, res); break;
+ case 1: res = context.AddIntrinsic(Intrinsic.X86Paddw, d, res); break;
+ case 2: res = context.AddIntrinsic(Intrinsic.X86Paddd, d, res); break;
+ case 3: res = context.AddIntrinsic(Intrinsic.X86Paddq, d, res); break;
+ }
+ }
+ else if (addSub == AddSub.Subtract)
+ {
+ switch (op.Size)
+ {
+ case 0: res = context.AddIntrinsic(Intrinsic.X86Psubb, d, res); break;
+ case 1: res = context.AddIntrinsic(Intrinsic.X86Psubw, d, res); break;
+ case 2: res = context.AddIntrinsic(Intrinsic.X86Psubd, d, res); break;
+ case 3: res = context.AddIntrinsic(Intrinsic.X86Psubq, d, res); break;
+ }
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+
+ private static void EmitSse41Sabd(
+ ArmEmitterContext context,
+ OpCodeSimdReg op,
+ Operand n,
+ Operand m,
+ bool isLong)
+ {
+ int size = isLong ? op.Size + 1 : op.Size;
+
+ Intrinsic cmpgtInst = X86PcmpgtInstruction[size];
+
+ Operand cmpMask = context.AddIntrinsic(cmpgtInst, n, m);
+
+ Intrinsic subInst = X86PsubInstruction[size];
+
+ Operand res = context.AddIntrinsic(subInst, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res);
+
+ Operand res2 = context.AddIntrinsic(subInst, m, n);
+
+ res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
+
+ if (!isLong && op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitSse41Uabd(
+ ArmEmitterContext context,
+ OpCodeSimdReg op,
+ Operand n,
+ Operand m,
+ bool isLong)
+ {
+ int size = isLong ? op.Size + 1 : op.Size;
+
+ Intrinsic maxInst = X86PmaxuInstruction[size];
+
+ Operand max = context.AddIntrinsic(maxInst, m, n);
+
+ Intrinsic cmpeqInst = X86PcmpeqInstruction[size];
+
+ Operand cmpMask = context.AddIntrinsic(cmpeqInst, max, m);
+
+ Operand onesMask = X86GetAllElements(context, -1L);
+
+ cmpMask = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, onesMask);
+
+ Intrinsic subInst = X86PsubInstruction[size];
+
+ Operand res = context.AddIntrinsic(subInst, n, m);
+ Operand res2 = context.AddIntrinsic(subInst, m, n);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res);
+ res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
+
+ if (!isLong && op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp.cs b/ARMeilleure/Instructions/InstEmitSimdCmp.cs
new file mode 100644
index 00000000..f27121bb
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdCmp.cs
@@ -0,0 +1,712 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ using Func2I = Func<Operand, Operand, Operand>;
+
+ static partial class InstEmit
+ {
+ public static void Cmeq_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareEqual(op1, op2), scalar: true);
+ }
+
+ public static void Cmeq_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ m = GetVec(binOp.Rm);
+ }
+ else
+ {
+ m = context.VectorZero();
+ }
+
+ Intrinsic cmpInst = X86PcmpeqInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareEqual(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmge_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqual(op1, op2), scalar: true);
+ }
+
+ public static void Cmge_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ m = GetVec(binOp.Rm);
+ }
+ else
+ {
+ m = context.VectorZero();
+ }
+
+ Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, m, n);
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqual(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmgt_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreater(op1, op2), scalar: true);
+ }
+
+ public static void Cmgt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ m = GetVec(binOp.Rm);
+ }
+ else
+ {
+ m = context.VectorZero();
+ }
+
+ Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreater(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmhi_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterUI(op1, op2), scalar: true);
+ }
+
+ public static void Cmhi_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 3)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic maxInst = X86PmaxuInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(maxInst, m, n);
+
+ Intrinsic cmpInst = X86PcmpeqInstruction[op.Size];
+
+ res = context.AddIntrinsic(cmpInst, res, m);
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterUI(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmhs_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqualUI(op1, op2), scalar: true);
+ }
+
+ public static void Cmhs_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 3)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic maxInst = X86PmaxuInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(maxInst, n, m);
+
+ Intrinsic cmpInst = X86PcmpeqInstruction[op.Size];
+
+ res = context.AddIntrinsic(cmpInst, res, n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqualUI(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmle_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareLessOrEqual(op1, op2), scalar: true);
+ }
+
+ public static void Cmle_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, n, context.VectorZero());
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareLessOrEqual(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmlt_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareLess(op1, op2), scalar: true);
+ }
+
+ public static void Cmlt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, context.VectorZero(), n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareLess(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmtst_S(ArmEmitterContext context)
+ {
+ EmitCmtstOp(context, scalar: true);
+ }
+
+ public static void Cmtst_V(ArmEmitterContext context)
+ {
+ EmitCmtstOp(context, scalar: false);
+ }
+
+ public static void Fccmp_S(ArmEmitterContext context)
+ {
+ EmitFccmpOrFccmpe(context, signalNaNs: false);
+ }
+
+ public static void Fccmpe_S(ArmEmitterContext context)
+ {
+ EmitFccmpOrFccmpe(context, signalNaNs: true);
+ }
+
+ public static void Fcmeq_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, CmpCondition.Equal, scalar: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, SoftFloat32.FPCompareEQ, SoftFloat64.FPCompareEQ, scalar: true);
+ }
+ }
+
+ public static void Fcmeq_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, CmpCondition.Equal, scalar: false);
+ }
+ else
+ {
+ EmitCmpOpF(context, SoftFloat32.FPCompareEQ, SoftFloat64.FPCompareEQ, scalar: false);
+ }
+ }
+
+ public static void Fcmge_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, SoftFloat32.FPCompareGE, SoftFloat64.FPCompareGE, scalar: true);
+ }
+ }
+
+ public static void Fcmge_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false);
+ }
+ else
+ {
+ EmitCmpOpF(context, SoftFloat32.FPCompareGE, SoftFloat64.FPCompareGE, scalar: false);
+ }
+ }
+
+ public static void Fcmgt_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, SoftFloat32.FPCompareGT, SoftFloat64.FPCompareGT, scalar: true);
+ }
+ }
+
+ public static void Fcmgt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false);
+ }
+ else
+ {
+ EmitCmpOpF(context, SoftFloat32.FPCompareGT, SoftFloat64.FPCompareGT, scalar: false);
+ }
+ }
+
+ public static void Fcmle_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true, isLeOrLt: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, SoftFloat32.FPCompareLE, SoftFloat64.FPCompareLE, scalar: true);
+ }
+ }
+
+ public static void Fcmle_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false, isLeOrLt: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, SoftFloat32.FPCompareLE, SoftFloat64.FPCompareLE, scalar: false);
+ }
+ }
+
+ public static void Fcmlt_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true, isLeOrLt: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, SoftFloat32.FPCompareLT, SoftFloat64.FPCompareLT, scalar: true);
+ }
+ }
+
+ public static void Fcmlt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false, isLeOrLt: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, SoftFloat32.FPCompareLT, SoftFloat64.FPCompareLT, scalar: false);
+ }
+ }
+
+ public static void Fcmp_S(ArmEmitterContext context)
+ {
+ EmitFcmpOrFcmpe(context, signalNaNs: false);
+ }
+
+ public static void Fcmpe_S(ArmEmitterContext context)
+ {
+ EmitFcmpOrFcmpe(context, signalNaNs: true);
+ }
+
+ public static void EmitFccmpOrFccmpe(ArmEmitterContext context, bool signalNaNs)
+ {
+ OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp;
+
+ Operand lblTrue = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblTrue, InstEmitFlowHelper.GetCondTrue(context, op.Cond));
+
+ EmitSetNzcv(context, Const(op.Nzcv));
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblTrue);
+
+ EmitFcmpOrFcmpe(context, signalNaNs);
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ const int cmpOrdered = 7;
+
+ bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false;
+
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = cmpWithZero ? context.VectorZero() : GetVec(op.Rm);
+
+ Operand lblNaN = Label();
+ Operand lblEnd = Label();
+
+ if (op.Size == 0)
+ {
+ Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const(cmpOrdered));
+
+ Operand isOrdered = context.VectorExtract16(ordMask, 0);
+
+ context.BranchIfFalse(lblNaN, isOrdered);
+
+ Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, n, m);
+ Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, n, m);
+ Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, n, m);
+
+ SetFlag(context, PState.VFlag, Const(0));
+ SetFlag(context, PState.CFlag, cf);
+ SetFlag(context, PState.ZFlag, zf);
+ SetFlag(context, PState.NFlag, nf);
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const(cmpOrdered));
+
+ Operand isOrdered = context.VectorExtract16(ordMask, 0);
+
+ context.BranchIfFalse(lblNaN, isOrdered);
+
+ Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, n, m);
+ Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, n, m);
+ Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, n, m);
+
+ SetFlag(context, PState.VFlag, Const(0));
+ SetFlag(context, PState.CFlag, cf);
+ SetFlag(context, PState.ZFlag, zf);
+ SetFlag(context, PState.NFlag, nf);
+ }
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblNaN);
+
+ SetFlag(context, PState.VFlag, Const(1));
+ SetFlag(context, PState.CFlag, Const(1));
+ SetFlag(context, PState.ZFlag, Const(0));
+ SetFlag(context, PState.NFlag, Const(0));
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ OperandType type = op.Size != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand me;
+
+ if (cmpWithZero)
+ {
+ me = op.Size == 0 ? ConstF(0f) : ConstF(0d);
+ }
+ else
+ {
+ me = context.VectorExtract(type, GetVec(op.Rm), 0);
+ }
+
+ Delegate dlg = op.Size != 0
+ ? (Delegate)new _S32_F64_F64_Bool(SoftFloat64.FPCompare)
+ : (Delegate)new _S32_F32_F32_Bool(SoftFloat32.FPCompare);
+
+ Operand nzcv = context.Call(dlg, ne, me, Const(signalNaNs));
+
+ EmitSetNzcv(context, nzcv);
+ }
+ }
+
+ private static void EmitSetNzcv(ArmEmitterContext context, Operand nzcv)
+ {
+ Operand Extract(Operand value, int bit)
+ {
+ if (bit != 0)
+ {
+ value = context.ShiftRightUI(value, Const(bit));
+ }
+
+ value = context.BitwiseAnd(value, Const(1));
+
+ return value;
+ }
+
+ SetFlag(context, PState.VFlag, Extract(nzcv, 0));
+ SetFlag(context, PState.CFlag, Extract(nzcv, 1));
+ SetFlag(context, PState.ZFlag, Extract(nzcv, 2));
+ SetFlag(context, PState.NFlag, Extract(nzcv, 3));
+ }
+
+ private static void EmitCmpOp(ArmEmitterContext context, Func2I emitCmp, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size));
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ me = EmitVectorExtractSx(context, binOp.Rm, index, op.Size);
+ }
+ else
+ {
+ me = Const(0L);
+ }
+
+ Operand isTrue = emitCmp(ne, me);
+
+ Operand mask = context.ConditionalSelect(isTrue, Const(szMask), Const(0L));
+
+ res = EmitVectorInsert(context, res, mask, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitCmtstOp(ArmEmitterContext context, bool scalar)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size));
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ Operand test = context.BitwiseAnd(ne, me);
+
+ Operand isTrue = context.ICompareNotEqual(test, Const(0L));
+
+ Operand mask = context.ConditionalSelect(isTrue, Const(szMask), Const(0L));
+
+ res = EmitVectorInsert(context, res, mask, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitCmpOpF(
+ ArmEmitterContext context,
+ _F32_F32_F32 f32,
+ _F64_F64_F64 f64,
+ bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = !scalar ? op.GetBytesCount() >> sizeF + 2 : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ me = context.VectorExtract(type, GetVec(binOp.Rm), index);
+ }
+ else
+ {
+ me = sizeF == 0 ? ConstF(0f) : ConstF(0d);
+ }
+
+ Operand e = EmitSoftFloatCall(context, f32, f64, ne, me);
+
+ res = context.VectorInsert(res, e, index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private enum CmpCondition
+ {
+ Equal = 0,
+ GreaterThanOrEqual = 5,
+ GreaterThan = 6
+ }
+
+ private static void EmitCmpSseOrSse2OpF(
+ ArmEmitterContext context,
+ CmpCondition cond,
+ bool scalar,
+ bool isLeOrLt = false)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = op is OpCodeSimdReg binOp ? GetVec(binOp.Rm) : context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Intrinsic inst = scalar ? Intrinsic.X86Cmpss : Intrinsic.X86Cmpps;
+
+ Operand res = isLeOrLt
+ ? context.AddIntrinsic(inst, m, n, Const((int)cond))
+ : context.AddIntrinsic(inst, n, m, Const((int)cond));
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Intrinsic inst = scalar ? Intrinsic.X86Cmpsd : Intrinsic.X86Cmppd;
+
+ Operand res = isLeOrLt
+ ? context.AddIntrinsic(inst, m, n, Const((int)cond))
+ : context.AddIntrinsic(inst, n, m, Const((int)cond));
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitSimdCrypto.cs b/ARMeilleure/Instructions/InstEmitSimdCrypto.cs
new file mode 100644
index 00000000..2b61fada
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdCrypto.cs
@@ -0,0 +1,49 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Aesd_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Decrypt), d, n));
+ }
+
+ public static void Aese_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Encrypt), d, n));
+ }
+
+ public static void Aesimc_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ context.Copy(GetVec(op.Rd), context.Call(new _V128_V128(SoftFallback.InverseMixColumns), n));
+ }
+
+ public static void Aesmc_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ context.Copy(GetVec(op.Rd), context.Call(new _V128_V128(SoftFallback.MixColumns), n));
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt.cs b/ARMeilleure/Instructions/InstEmitSimdCvt.cs
new file mode 100644
index 00000000..012bfcce
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdCvt.cs
@@ -0,0 +1,1166 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ using Func1I = Func<Operand, Operand>;
+
+ static partial class InstEmit
+ {
+ public static void Fcvt_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (op.Size == 0 && op.Opc == 1) // Single -> Double.
+ {
+ if (Optimizations.UseSse2)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), n);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
+
+ Operand res = context.ConvertToFP(OperandType.FP64, ne);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+ else if (op.Size == 1 && op.Opc == 0) // Double -> Single.
+ {
+ if (Optimizations.UseSse2)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0);
+
+ Operand res = context.ConvertToFP(OperandType.FP32, ne);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+ else if (op.Size == 0 && op.Opc == 3) // Single -> Half.
+ {
+ Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
+
+ Delegate dlg = new _U16_F32(SoftFloat32_16.FPConvert);
+
+ Operand res = context.Call(dlg, ne);
+
+ res = context.ZeroExtend16(OperandType.I64, res);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1));
+ }
+ else if (op.Size == 3 && op.Opc == 0) // Half -> Single.
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1);
+
+ Delegate dlg = new _F32_U16(SoftFloat16_32.FPConvert);
+
+ Operand res = context.Call(dlg, ne);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ else if (op.Size == 1 && op.Opc == 3) // Double -> Half.
+ {
+ throw new NotImplementedException("Double-precision to half-precision.");
+ }
+ else if (op.Size == 3 && op.Opc == 1) // Double -> Half.
+ {
+ throw new NotImplementedException("Half-precision to double-precision.");
+ }
+ else // Invalid encoding.
+ {
+ Debug.Assert(false, $"type == {op.Size} && opc == {op.Opc}");
+ }
+ }
+
+ public static void Fcvtas_Gp(ArmEmitterContext context)
+ {
+ EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
+ }
+
+ public static void Fcvtau_Gp(ArmEmitterContext context)
+ {
+ EmitFcvt_u_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
+ }
+
+ public static void Fcvtl_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseSse2 && sizeF == 1)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand res;
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Movhlps, n, n);
+ }
+ else
+ {
+ res = n;
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Cvtps2pd, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int elems = 4 >> sizeF;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ if (sizeF == 0)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, part + index, 1);
+
+ Delegate dlg = new _F32_U16(SoftFloat16_32.FPConvert);
+
+ Operand e = context.Call(dlg, ne);
+
+ res = context.VectorInsert(res, e, index);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), part + index);
+
+ Operand e = context.ConvertToFP(OperandType.FP64, ne);
+
+ res = context.VectorInsert(res, e, index);
+ }
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Fcvtms_Gp(ArmEmitterContext context)
+ {
+ EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1));
+ }
+
+ public static void Fcvtmu_Gp(ArmEmitterContext context)
+ {
+ EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1));
+ }
+
+ public static void Fcvtn_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseSse2 && sizeF == 1)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero());
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtpd2ps, n);
+
+ nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
+ ? Intrinsic.X86Movlhps
+ : Intrinsic.X86Movhlps;
+
+ res = context.AddIntrinsic(movInst, res, nInt);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ int elems = 4 >> sizeF;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ if (sizeF == 0)
+ {
+ Delegate dlg = new _U16_F32(SoftFloat32_16.FPConvert);
+
+ Operand e = context.Call(dlg, ne);
+
+ e = context.ZeroExtend16(OperandType.I64, e);
+
+ res = EmitVectorInsert(context, res, e, part + index, 1);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand e = context.ConvertToFP(OperandType.FP32, ne);
+
+ res = context.VectorInsert(res, e, part + index);
+ }
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Fcvtns_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts(context, FPRoundingMode.ToNearest, scalar: true);
+ }
+ else
+ {
+ EmitFcvtn(context, signed: true, scalar: true);
+ }
+ }
+
+ public static void Fcvtns_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts(context, FPRoundingMode.ToNearest, scalar: false);
+ }
+ else
+ {
+ EmitFcvtn(context, signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtnu_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu(context, FPRoundingMode.ToNearest, scalar: true);
+ }
+ else
+ {
+ EmitFcvtn(context, signed: false, scalar: true);
+ }
+ }
+
+ public static void Fcvtnu_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu(context, FPRoundingMode.ToNearest, scalar: false);
+ }
+ else
+ {
+ EmitFcvtn(context, signed: false, scalar: false);
+ }
+ }
+
+ public static void Fcvtps_Gp(ArmEmitterContext context)
+ {
+ EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1));
+ }
+
+ public static void Fcvtpu_Gp(ArmEmitterContext context)
+ {
+ EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1));
+ }
+
+ public static void Fcvtzs_Gp(ArmEmitterContext context)
+ {
+ EmitFcvt_s_Gp(context, (op1) => op1);
+ }
+
+ public static void Fcvtzs_Gp_Fixed(ArmEmitterContext context)
+ {
+ EmitFcvtzs_Gp_Fixed(context);
+ }
+
+ public static void Fcvtzs_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts(context, FPRoundingMode.TowardsZero, scalar: true);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: true, scalar: true);
+ }
+ }
+
+ public static void Fcvtzs_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts(context, FPRoundingMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtzs_V_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts(context, FPRoundingMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtzu_Gp(ArmEmitterContext context)
+ {
+ EmitFcvt_u_Gp(context, (op1) => op1);
+ }
+
+ public static void Fcvtzu_Gp_Fixed(ArmEmitterContext context)
+ {
+ EmitFcvtzu_Gp_Fixed(context);
+ }
+
+ public static void Fcvtzu_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu(context, FPRoundingMode.TowardsZero, scalar: true);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: false, scalar: true);
+ }
+ }
+
+ public static void Fcvtzu_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu(context, FPRoundingMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: false, scalar: false);
+ }
+ }
+
+ public static void Fcvtzu_V_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu(context, FPRoundingMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: false, scalar: false);
+ }
+ }
+
+ public static void Scvtf_Gp(ArmEmitterContext context)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ res = context.SignExtend32(OperandType.I64, res);
+ }
+
+ res = EmitFPConvert(context, res, op.Size, signed: true);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+
+ public static void Scvtf_Gp_Fixed(ArmEmitterContext context)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ res = context.SignExtend32(OperandType.I64, res);
+ }
+
+ res = EmitFPConvert(context, res, op.Size, signed: true);
+
+ res = EmitI2fFBitsMul(context, res, op.FBits);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+
+ public static void Scvtf_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitSse2Scvtf(context, scalar: true);
+ }
+ else
+ {
+ Operand res = EmitVectorLongExtract(context, op.Rn, 0, sizeF + 2);
+
+ res = EmitFPConvert(context, res, op.Size, signed: true);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+
+ public static void Scvtf_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitSse2Scvtf(context, scalar: false);
+ }
+ else
+ {
+ EmitVectorCvtf(context, signed: true);
+ }
+ }
+
+ public static void Scvtf_V_Fixed(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ // sizeF == ((OpCodeSimdShImm64)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitSse2Scvtf(context, scalar: false);
+ }
+ else
+ {
+ EmitVectorCvtf(context, signed: true);
+ }
+ }
+
+ public static void Ucvtf_Gp(ArmEmitterContext context)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = EmitFPConvert(context, res, op.Size, signed: false);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+
+ public static void Ucvtf_Gp_Fixed(ArmEmitterContext context)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = EmitFPConvert(context, res, op.Size, signed: false);
+
+ res = EmitI2fFBitsMul(context, res, op.FBits);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+
+ public static void Ucvtf_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitSse2Ucvtf(context, scalar: true);
+ }
+ else
+ {
+ Operand ne = EmitVectorLongExtract(context, op.Rn, 0, sizeF + 2);
+
+ Operand res = EmitFPConvert(context, ne, sizeF, signed: false);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+
+ public static void Ucvtf_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitSse2Ucvtf(context, scalar: false);
+ }
+ else
+ {
+ EmitVectorCvtf(context, signed: false);
+ }
+ }
+
+ public static void Ucvtf_V_Fixed(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ // sizeF == ((OpCodeSimdShImm)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitSse2Ucvtf(context, scalar: false);
+ }
+ else
+ {
+ EmitVectorCvtf(context, signed: false);
+ }
+ }
+
+ private static void EmitFcvtn(ArmEmitterContext context, bool signed, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand n = GetVec(op.Rn);
+
+ int sizeF = op.Size & 1;
+ int sizeI = sizeF + 2;
+
+ OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, n, index);
+
+ Operand e = EmitRoundMathCall(context, MidpointRounding.ToEven, ne);
+
+ if (sizeF == 0)
+ {
+ Delegate dlg = signed
+ ? (Delegate)new _S32_F32(SoftFallback.SatF32ToS32)
+ : (Delegate)new _U32_F32(SoftFallback.SatF32ToU32);
+
+ e = context.Call(dlg, e);
+
+ e = context.ZeroExtend32(OperandType.I64, e);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Delegate dlg = signed
+ ? (Delegate)new _S64_F64(SoftFallback.SatF64ToS64)
+ : (Delegate)new _U64_F64(SoftFallback.SatF64ToU64);
+
+ e = context.Call(dlg, e);
+ }
+
+ res = EmitVectorInsert(context, res, e, index, sizeI);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitFcvtz(ArmEmitterContext context, bool signed, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand n = GetVec(op.Rn);
+
+ int sizeF = op.Size & 1;
+ int sizeI = sizeF + 2;
+
+ OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ int fBits = GetFBits(context);
+
+ int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, n, index);
+
+ Operand e = EmitF2iFBitsMul(context, ne, fBits);
+
+ if (sizeF == 0)
+ {
+ Delegate dlg = signed
+ ? (Delegate)new _S32_F32(SoftFallback.SatF32ToS32)
+ : (Delegate)new _U32_F32(SoftFallback.SatF32ToU32);
+
+ e = context.Call(dlg, e);
+
+ e = context.ZeroExtend32(OperandType.I64, e);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Delegate dlg = signed
+ ? (Delegate)new _S64_F64(SoftFallback.SatF64ToS64)
+ : (Delegate)new _U64_F64(SoftFallback.SatF64ToU64);
+
+ e = context.Call(dlg, e);
+ }
+
+ res = EmitVectorInsert(context, res, e, index, sizeI);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitFcvt_s_Gp(ArmEmitterContext context, Func1I emit)
+ {
+ EmitFcvt___Gp(context, emit, signed: true);
+ }
+
+ private static void EmitFcvt_u_Gp(ArmEmitterContext context, Func1I emit)
+ {
+ EmitFcvt___Gp(context, emit, signed: false);
+ }
+
+ private static void EmitFcvt___Gp(ArmEmitterContext context, Func1I emit, bool signed)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ Operand res = signed
+ ? EmitScalarFcvts(context, emit(ne), 0)
+ : EmitScalarFcvtu(context, emit(ne), 0);
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ private static void EmitFcvtzs_Gp_Fixed(ArmEmitterContext context)
+ {
+ EmitFcvtz__Gp_Fixed(context, signed: true);
+ }
+
+ private static void EmitFcvtzu_Gp_Fixed(ArmEmitterContext context)
+ {
+ EmitFcvtz__Gp_Fixed(context, signed: false);
+ }
+
+ private static void EmitFcvtz__Gp_Fixed(ArmEmitterContext context, bool signed)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ Operand res = signed
+ ? EmitScalarFcvts(context, ne, op.FBits)
+ : EmitScalarFcvtu(context, ne, op.FBits);
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ private static void EmitVectorCvtf(ArmEmitterContext context, bool signed)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+ int sizeI = sizeF + 2;
+
+ int fBits = GetFBits(context);
+
+ int elems = op.GetBytesCount() >> sizeI;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorLongExtract(context, op.Rn, index, sizeI);
+
+ Operand e = EmitFPConvert(context, ne, sizeF, signed);
+
+ e = EmitI2fFBitsMul(context, e, fBits);
+
+ res = context.VectorInsert(res, e, index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static int GetFBits(ArmEmitterContext context)
+ {
+ if (context.CurrOp is OpCodeSimdShImm op)
+ {
+ return GetImmShr(op);
+ }
+
+ return 0;
+ }
+
+ private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, int size, bool signed)
+ {
+ Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
+ Debug.Assert((uint)size < 2);
+
+ OperandType type = size == 0 ? OperandType.FP32
+ : OperandType.FP64;
+
+ if (signed)
+ {
+ return context.ConvertToFP(type, value);
+ }
+ else
+ {
+ return context.ConvertToFPUI(type, value);
+ }
+ }
+
+ private static Operand EmitScalarFcvts(ArmEmitterContext context, Operand value, int fBits)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+ value = EmitF2iFBitsMul(context, value, fBits);
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ Delegate dlg = value.Type == OperandType.FP32
+ ? (Delegate)new _S32_F32(SoftFallback.SatF32ToS32)
+ : (Delegate)new _S32_F64(SoftFallback.SatF64ToS32);
+
+ return context.Call(dlg, value);
+ }
+ else
+ {
+ Delegate dlg = value.Type == OperandType.FP32
+ ? (Delegate)new _S64_F32(SoftFallback.SatF32ToS64)
+ : (Delegate)new _S64_F64(SoftFallback.SatF64ToS64);
+
+ return context.Call(dlg, value);
+ }
+ }
+
+ private static Operand EmitScalarFcvtu(ArmEmitterContext context, Operand value, int fBits)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+ value = EmitF2iFBitsMul(context, value, fBits);
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ Delegate dlg = value.Type == OperandType.FP32
+ ? (Delegate)new _U32_F32(SoftFallback.SatF32ToU32)
+ : (Delegate)new _U32_F64(SoftFallback.SatF64ToU32);
+
+ return context.Call(dlg, value);
+ }
+ else
+ {
+ Delegate dlg = value.Type == OperandType.FP32
+ ? (Delegate)new _U64_F32(SoftFallback.SatF32ToU64)
+ : (Delegate)new _U64_F64(SoftFallback.SatF64ToU64);
+
+ return context.Call(dlg, value);
+ }
+ }
+
+ private static Operand EmitF2iFBitsMul(ArmEmitterContext context, Operand value, int fBits)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+ if (fBits == 0)
+ {
+ return value;
+ }
+
+ if (value.Type == OperandType.FP32)
+ {
+ return context.Multiply(value, ConstF(MathF.Pow(2f, fBits)));
+ }
+ else /* if (value.Type == OperandType.FP64) */
+ {
+ return context.Multiply(value, ConstF(Math.Pow(2d, fBits)));
+ }
+ }
+
+ private static Operand EmitI2fFBitsMul(ArmEmitterContext context, Operand value, int fBits)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+ if (fBits == 0)
+ {
+ return value;
+ }
+
+ if (value.Type == OperandType.FP32)
+ {
+ return context.Multiply(value, ConstF(1f / MathF.Pow(2f, fBits)));
+ }
+ else /* if (value.Type == OperandType.FP64) */
+ {
+ return context.Multiply(value, ConstF(1d / Math.Pow(2d, fBits)));
+ }
+ }
+
+ private static void EmitSse41Fcvts(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ const int cmpGreaterThanOrEqual = 5;
+ const int cmpOrdered = 7;
+
+ // sizeF == ((OpCodeSimdShImm64)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const(cmpOrdered));
+
+ Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 + fBits * 0x800000;
+
+ Operand scale = X86GetAllElements(context, fpScaled);
+
+ nScaled = context.AddIntrinsic(Intrinsic.X86Mulps, nScaled, scale);
+ }
+
+ Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundps, nScaled, Const(X86GetRoundControl(roundMode)));
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRnd);
+
+ Operand mask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648)
+
+ Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, nRnd, mask, Const(cmpGreaterThanOrEqual));
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, mask2);
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const(cmpOrdered));
+
+ Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
+ long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
+
+ Operand scale = X86GetAllElements(context, fpScaled);
+
+ nScaled = context.AddIntrinsic(Intrinsic.X86Mulpd, nScaled, scale);
+ }
+
+ Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundpd, nScaled, Const(X86GetRoundControl(roundMode)));
+
+ Operand high;
+
+ if (!scalar)
+ {
+ high = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nRnd, nRnd);
+ high = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, high);
+ }
+ else
+ {
+ high = Const(0L);
+ }
+
+ Operand low = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRnd);
+
+ Operand nInt = EmitVectorLongCreate(context, low, high);
+
+ Operand mask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
+
+ Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, nRnd, mask, Const(cmpGreaterThanOrEqual));
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, mask2);
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitSse41Fcvtu(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ const int cmpGreaterThanOrEqual = 5;
+ const int cmpGreaterThan = 6;
+ const int cmpOrdered = 7;
+
+ // sizeF == ((OpCodeSimdShImm)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const(cmpOrdered));
+
+ Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 + fBits * 0x800000;
+
+ Operand scale = X86GetAllElements(context, fpScaled);
+
+ nScaled = context.AddIntrinsic(Intrinsic.X86Mulps, nScaled, scale);
+ }
+
+ Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundps, nScaled, Const(X86GetRoundControl(roundMode)));
+
+ Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmpps, nRnd, context.VectorZero(), Const(cmpGreaterThan));
+
+ Operand nRndMasked = context.AddIntrinsic(Intrinsic.X86Pand, nRnd, nRndMask);
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRndMasked);
+
+ Operand mask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648)
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subps, nRndMasked, mask);
+
+ Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, res, context.VectorZero(), Const(cmpGreaterThan));
+
+ Operand resMasked = context.AddIntrinsic(Intrinsic.X86Pand, res, mask2);
+
+ res = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, resMasked);
+
+ Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmpps, resMasked, mask, Const(cmpGreaterThanOrEqual));
+
+ res = context.AddIntrinsic(Intrinsic.X86Pxor, res, mask3);
+ res = context.AddIntrinsic(Intrinsic.X86Paddd, res, nInt);
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const(cmpOrdered));
+
+ Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
+ long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
+
+ Operand scale = X86GetAllElements(context, fpScaled);
+
+ nScaled = context.AddIntrinsic(Intrinsic.X86Mulpd, nScaled, scale);
+ }
+
+ Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundpd, nScaled, Const(X86GetRoundControl(roundMode)));
+
+ Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmppd, nRnd, context.VectorZero(), Const(cmpGreaterThan));
+
+ Operand nRndMasked = context.AddIntrinsic(Intrinsic.X86Pand, nRnd, nRndMask);
+
+ Operand high;
+
+ if (!scalar)
+ {
+ high = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nRndMasked, nRndMasked);
+ high = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, high);
+ }
+ else
+ {
+ high = Const(0L);
+ }
+
+ Operand low = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRndMasked);
+
+ Operand nInt = EmitVectorLongCreate(context, low, high);
+
+ Operand mask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, nRndMasked, mask);
+
+ Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, res, context.VectorZero(), Const(cmpGreaterThan));
+
+ Operand resMasked = context.AddIntrinsic(Intrinsic.X86Pand, res, mask2);
+
+ if (!scalar)
+ {
+ high = context.AddIntrinsic(Intrinsic.X86Unpckhpd, resMasked, resMasked);
+ high = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, high);
+ }
+
+ low = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, resMasked);
+
+ res = EmitVectorLongCreate(context, low, high);
+
+ Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmppd, resMasked, mask, Const(cmpGreaterThanOrEqual));
+
+ res = context.AddIntrinsic(Intrinsic.X86Pxor, res, mask3);
+ res = context.AddIntrinsic(Intrinsic.X86Paddq, res, nInt);
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitSse2Scvtf(ArmEmitterContext context, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 - fBits * 0x800000;
+
+ Operand scale = X86GetAllElements(context, fpScaled);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, res, scale);
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitSse2Ucvtf(ArmEmitterContext context, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16));
+
+ res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res);
+
+ Operand mask = X86GetAllElements(context, 0x47800000); // 65536.0f (1 << 16)
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16));
+
+ res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16));
+ res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2);
+
+ res = context.AddIntrinsic(Intrinsic.X86Addps, res, res2);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 - fBits * 0x800000;
+
+ Operand scale = X86GetAllElements(context, fpScaled);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, res, scale);
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static Operand EmitVectorLongExtract(ArmEmitterContext context, int reg, int index, int size)
+ {
+ OperandType type = size == 3 ? OperandType.I64 : OperandType.I32;
+
+ return context.VectorExtract(type, GetVec(reg), index);
+ }
+
+ private static Operand EmitVectorLongCreate(ArmEmitterContext context, Operand low, Operand high)
+ {
+ Operand vector = context.VectorCreateScalar(low);
+
+ vector = context.VectorInsert(vector, high, 1);
+
+ return vector;
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSimdHash.cs b/ARMeilleure/Instructions/InstEmitSimdHash.cs
new file mode 100644
index 00000000..4ed96061
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdHash.cs
@@ -0,0 +1,147 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+#region "Sha1"
+ public static void Sha1c_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(new _V128_V128_U32_V128(SoftFallback.HashChoose), d, ne, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha1h_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+ Operand res = context.Call(new _U32_U32(SoftFallback.FixedRotate), ne);
+
+ context.Copy(GetVec(op.Rd), context.VectorCreateScalar(res));
+ }
+
+ public static void Sha1m_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(new _V128_V128_U32_V128(SoftFallback.HashMajority), d, ne, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha1p_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(new _V128_V128_U32_V128(SoftFallback.HashParity), d, ne, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha1su0_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(new _V128_V128_V128_V128(SoftFallback.Sha1SchedulePart1), d, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha1su1_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.Call(new _V128_V128_V128(SoftFallback.Sha1SchedulePart2), d, n);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+#endregion
+
+#region "Sha256"
+ public static void Sha256h_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(new _V128_V128_V128_V128(SoftFallback.HashLower), d, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha256h2_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(new _V128_V128_V128_V128(SoftFallback.HashUpper), d, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha256su0_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.Call(new _V128_V128_V128(SoftFallback.Sha256SchedulePart1), d, n);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha256su1_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(new _V128_V128_V128_V128(SoftFallback.Sha256SchedulePart2), d, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+#endregion
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs
new file mode 100644
index 00000000..a3da80fb
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs
@@ -0,0 +1,1477 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ using Func1I = Func<Operand, Operand>;
+ using Func2I = Func<Operand, Operand, Operand>;
+ using Func3I = Func<Operand, Operand, Operand, Operand>;
+
+ static class InstEmitSimdHelper
+ {
+#region "X86 SSE Intrinsics"
+ public static readonly Intrinsic[] X86PaddInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Paddb,
+ Intrinsic.X86Paddw,
+ Intrinsic.X86Paddd,
+ Intrinsic.X86Paddq
+ };
+
+ public static readonly Intrinsic[] X86PcmpeqInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pcmpeqb,
+ Intrinsic.X86Pcmpeqw,
+ Intrinsic.X86Pcmpeqd,
+ Intrinsic.X86Pcmpeqq
+ };
+
+ public static readonly Intrinsic[] X86PcmpgtInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pcmpgtb,
+ Intrinsic.X86Pcmpgtw,
+ Intrinsic.X86Pcmpgtd,
+ Intrinsic.X86Pcmpgtq
+ };
+
+ public static readonly Intrinsic[] X86PmaxsInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pmaxsb,
+ Intrinsic.X86Pmaxsw,
+ Intrinsic.X86Pmaxsd
+ };
+
+ public static readonly Intrinsic[] X86PmaxuInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pmaxub,
+ Intrinsic.X86Pmaxuw,
+ Intrinsic.X86Pmaxud
+ };
+
+ public static readonly Intrinsic[] X86PminsInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pminsb,
+ Intrinsic.X86Pminsw,
+ Intrinsic.X86Pminsd
+ };
+
+ public static readonly Intrinsic[] X86PminuInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pminub,
+ Intrinsic.X86Pminuw,
+ Intrinsic.X86Pminud
+ };
+
+ public static readonly Intrinsic[] X86PmovsxInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pmovsxbw,
+ Intrinsic.X86Pmovsxwd,
+ Intrinsic.X86Pmovsxdq
+ };
+
+ public static readonly Intrinsic[] X86PmovzxInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pmovzxbw,
+ Intrinsic.X86Pmovzxwd,
+ Intrinsic.X86Pmovzxdq
+ };
+
+ public static readonly Intrinsic[] X86PsllInstruction = new Intrinsic[]
+ {
+ 0,
+ Intrinsic.X86Psllw,
+ Intrinsic.X86Pslld,
+ Intrinsic.X86Psllq
+ };
+
+ public static readonly Intrinsic[] X86PsraInstruction = new Intrinsic[]
+ {
+ 0,
+ Intrinsic.X86Psraw,
+ Intrinsic.X86Psrad
+ };
+
+ public static readonly Intrinsic[] X86PsrlInstruction = new Intrinsic[]
+ {
+ 0,
+ Intrinsic.X86Psrlw,
+ Intrinsic.X86Psrld,
+ Intrinsic.X86Psrlq
+ };
+
+ public static readonly Intrinsic[] X86PsubInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Psubb,
+ Intrinsic.X86Psubw,
+ Intrinsic.X86Psubd,
+ Intrinsic.X86Psubq
+ };
+
+ public static readonly Intrinsic[] X86PunpckhInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Punpckhbw,
+ Intrinsic.X86Punpckhwd,
+ Intrinsic.X86Punpckhdq,
+ Intrinsic.X86Punpckhqdq
+ };
+
+ public static readonly Intrinsic[] X86PunpcklInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Punpcklbw,
+ Intrinsic.X86Punpcklwd,
+ Intrinsic.X86Punpckldq,
+ Intrinsic.X86Punpcklqdq
+ };
+#endregion
+
+ public static int GetImmShl(OpCodeSimdShImm op)
+ {
+ return op.Imm - (8 << op.Size);
+ }
+
+ public static int GetImmShr(OpCodeSimdShImm op)
+ {
+ return (8 << (op.Size + 1)) - op.Imm;
+ }
+
+ public static Operand X86GetScalar(ArmEmitterContext context, float value)
+ {
+ return X86GetScalar(context, BitConverter.SingleToInt32Bits(value));
+ }
+
+ public static Operand X86GetScalar(ArmEmitterContext context, double value)
+ {
+ return X86GetScalar(context, BitConverter.DoubleToInt64Bits(value));
+ }
+
+ public static Operand X86GetScalar(ArmEmitterContext context, int value)
+ {
+ return context.VectorCreateScalar(Const(value));
+ }
+
+ public static Operand X86GetScalar(ArmEmitterContext context, long value)
+ {
+ return context.VectorCreateScalar(Const(value));
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, float value)
+ {
+ return X86GetAllElements(context, BitConverter.SingleToInt32Bits(value));
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, double value)
+ {
+ return X86GetAllElements(context, BitConverter.DoubleToInt64Bits(value));
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, int value)
+ {
+ Operand vector = context.VectorCreateScalar(Const(value));
+
+ vector = context.AddIntrinsic(Intrinsic.X86Shufps, vector, vector, Const(0));
+
+ return vector;
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, long value)
+ {
+ Operand vector = context.VectorCreateScalar(Const(value));
+
+ vector = context.AddIntrinsic(Intrinsic.X86Movlhps, vector, vector);
+
+ return vector;
+ }
+
+ public static int X86GetRoundControl(FPRoundingMode roundMode)
+ {
+ switch (roundMode)
+ {
+ case FPRoundingMode.ToNearest: return 8 | 0;
+ case FPRoundingMode.TowardsPlusInfinity: return 8 | 2;
+ case FPRoundingMode.TowardsMinusInfinity: return 8 | 1;
+ case FPRoundingMode.TowardsZero: return 8 | 3;
+ }
+
+ throw new ArgumentException($"Invalid rounding mode \"{roundMode}\".");
+ }
+
+ public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ Operand res = context.AddIntrinsic(inst, n);
+
+ if ((op.Size & 1) != 0)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+ else
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ Operand res = context.AddIntrinsic(inst, n, m);
+
+ if ((op.Size & 1) != 0)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+ else
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ Operand res = context.AddIntrinsic(inst, n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ Operand res = context.AddIntrinsic(inst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static Operand EmitUnaryMathCall(ArmEmitterContext context, _F32_F32 f32, _F64_F64 f64, Operand n)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ return (op.Size & 1) == 0 ? context.Call(f32, n) : context.Call(f64, n);
+ }
+
+ public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ Delegate dlg;
+
+ if ((op.Size & 1) == 0)
+ {
+ dlg = new _F32_F32_MidpointRounding(MathF.Round);
+ }
+ else /* if ((op.Size & 1) == 1) */
+ {
+ dlg = new _F64_F64_MidpointRounding(Math.Round);
+ }
+
+ return context.Call(dlg, n, Const((int)roundMode));
+ }
+
+ public static Operand EmitSoftFloatCall(
+ ArmEmitterContext context,
+ _F32_F32 f32,
+ _F64_F64 f64,
+ params Operand[] callArgs)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64;
+
+ return context.Call(dlg, callArgs);
+ }
+
+ public static Operand EmitSoftFloatCall(
+ ArmEmitterContext context,
+ _F32_F32_F32 f32,
+ _F64_F64_F64 f64,
+ params Operand[] callArgs)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64;
+
+ return context.Call(dlg, callArgs);
+ }
+
+ public static Operand EmitSoftFloatCall(
+ ArmEmitterContext context,
+ _F32_F32_F32_F32 f32,
+ _F64_F64_F64_F64 f64,
+ params Operand[] callArgs)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64;
+
+ return context.Call(dlg, callArgs);
+ }
+
+ public static void EmitScalarBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
+ }
+
+ public static void EmitScalarTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand d = context.VectorExtract(type, GetVec(op.Rd), 0);
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(d, n, m), 0));
+ }
+
+ public static void EmitScalarUnaryOpSx(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarBinaryOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
+ Operand m = EmitVectorExtractSx(context, op.Rm, 0, op.Size);
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarUnaryOpZx(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarBinaryOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+ Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarTernaryOpZx(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = EmitVectorExtractZx(context, op.Rd, 0, op.Size);
+ Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+ Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
+
+ d = EmitVectorInsert(context, context.VectorZero(), emit(d, n, m), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarUnaryOpF(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n), 0));
+ }
+
+ public static void EmitScalarBinaryOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
+ }
+
+ public static void EmitScalarTernaryRaOpF(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand a = context.VectorExtract(type, GetVec(op.Ra), 0);
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(a, n, m), 0));
+ }
+
+ public static void EmitVectorUnaryOpF(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+
+ res = context.VectorInsert(res, emit(ne), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
+
+ res = context.VectorInsert(res, emit(ne, me), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpF(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
+
+ res = context.VectorInsert(res, emit(de, ne, me), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+ res = context.VectorInsert(res, emit(ne, me), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+ res = context.VectorInsert(res, emit(de, ne, me), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorUnaryOpSx(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpSx(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtractSx(context, op.Rd, index, op.Size);
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorUnaryOpZx(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpZx(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtractSx(context, op.Rm, op.Index, op.Size);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorImmUnaryOp(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ Operand imm = Const(op.Immediate);
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ res = EmitVectorInsert(context, res, emit(imm), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorImmBinaryOp(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ Operand imm = Const(op.Immediate);
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(de, imm), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenRmBinaryOp(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenRmBinaryOp(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signed);
+ Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenRnRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenRnRmBinaryOp(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenRnRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenRnRmBinaryOp(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenRnRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+ Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenRnRmTernaryOpSx(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorWidenRnRmTernaryOp(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenRnRmTernaryOpZx(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorWidenRnRmTernaryOp(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenRnRmTernaryOp(ArmEmitterContext context, Func3I emit, bool signed)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+ Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenBinaryOpByElem(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenBinaryOpByElem(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenBinaryOpByElem(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);;
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenTernaryOpByElemSx(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorWidenTernaryOpByElem(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorWidenTernaryOpByElem(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenTernaryOpByElem(ArmEmitterContext context, Func3I emit, bool signed)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);;
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorPairwiseOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorPairwiseOp(context, emit, signed: true);
+ }
+
+ public static void EmitVectorPairwiseOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorPairwiseOp(context, emit, signed: false);
+ }
+
+ private static void EmitVectorPairwiseOp(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand n0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed);
+ Operand n1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed);
+
+ Operand m0 = EmitVectorExtract(context, op.Rm, pairIndex, op.Size, signed);
+ Operand m1 = EmitVectorExtract(context, op.Rm, pairIndex + 1, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(n0, n1), index, op.Size);
+ res = EmitVectorInsert(context, res, emit(m0, m1), pairs + index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: false);
+ }
+
+ public static void EmitVectorAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: false);
+ }
+
+ public static void EmitVectorLongAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: true);
+ }
+
+ public static void EmitVectorLongAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: true);
+ }
+
+ private static void EmitVectorAcrossVectorOp(
+ ArmEmitterContext context,
+ Func2I emit,
+ bool signed,
+ bool isLong)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ Operand res = EmitVectorExtract(context, op.Rn, 0, op.Size, signed);
+
+ for (int index = 1; index < elems; index++)
+ {
+ Operand n = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+
+ res = emit(res, n);
+ }
+
+ int size = isLong ? op.Size + 1 : op.Size;
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), res, 0, size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int pairs = op.GetPairsCount() >> sizeF + 2;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand n0 = context.VectorExtract(type, GetVec(op.Rn), pairIndex);
+ Operand n1 = context.VectorExtract(type, GetVec(op.Rn), pairIndex + 1);
+
+ Operand m0 = context.VectorExtract(type, GetVec(op.Rm), pairIndex);
+ Operand m1 = context.VectorExtract(type, GetVec(op.Rm), pairIndex + 1);
+
+ res = context.VectorInsert(res, emit(n0, n1), index);
+ res = context.VectorInsert(res, emit(m0, m1), pairs + index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, n, m);
+
+ Operand zero = context.VectorZero();
+
+ Operand part0 = context.AddIntrinsic(Intrinsic.X86Movlhps, unpck, zero);
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, unpck);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1));
+ }
+ else /* if (op.RegisterSize == RegisterSize.Simd128) */
+ {
+ const int sm0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0;
+ const int sm1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0;
+
+ Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, n, m, Const(sm0));
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, n, m, Const(sm1));
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1));
+ }
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, n, m);
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, n, m);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst64, part0, part1));
+ }
+ }
+
+
+ [Flags]
+ public enum SaturatingFlags
+ {
+ Scalar = 1 << 0,
+ Signed = 1 << 1,
+
+ Add = 1 << 2,
+ Sub = 1 << 3,
+
+ Accumulate = 1 << 4,
+
+ ScalarSx = Scalar | Signed,
+ ScalarZx = Scalar,
+
+ VectorSx = Signed,
+ VectorZx = 0
+ }
+
+ public static void EmitScalarSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
+ {
+ EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.ScalarSx);
+ }
+
+ public static void EmitVectorSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
+ {
+ EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.VectorSx);
+ }
+
+ private static void EmitSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit, SaturatingFlags flags)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ bool scalar = (flags & SaturatingFlags.Scalar) != 0;
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand de;
+
+ if (op.Size <= 2)
+ {
+ de = EmitSatQ(context, emit(ne), op.Size, signedSrc: true, signedDst: true);
+ }
+ else /* if (op.Size == 3) */
+ {
+ de = EmitUnarySignedSatQAbsOrNeg(context, emit(ne));
+ }
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitScalarSaturatingBinaryOpSx(ArmEmitterContext context, SaturatingFlags flags)
+ {
+ EmitSaturatingBinaryOp(context, null, SaturatingFlags.ScalarSx | flags);
+ }
+
+ public static void EmitScalarSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
+ {
+ EmitSaturatingBinaryOp(context, null, SaturatingFlags.ScalarZx | flags);
+ }
+
+ public static void EmitVectorSaturatingBinaryOpSx(ArmEmitterContext context, SaturatingFlags flags)
+ {
+ EmitSaturatingBinaryOp(context, null, SaturatingFlags.VectorSx | flags);
+ }
+
+ public static void EmitVectorSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
+ {
+ EmitSaturatingBinaryOp(context, null, SaturatingFlags.VectorZx | flags);
+ }
+
+ public static void EmitSaturatingBinaryOp(ArmEmitterContext context, Func2I emit, SaturatingFlags flags)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ bool scalar = (flags & SaturatingFlags.Scalar) != 0;
+ bool signed = (flags & SaturatingFlags.Signed) != 0;
+
+ bool add = (flags & SaturatingFlags.Add) != 0;
+ bool sub = (flags & SaturatingFlags.Sub) != 0;
+
+ bool accumulate = (flags & SaturatingFlags.Accumulate) != 0;
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ if (add || sub)
+ {
+ OpCodeSimdReg opReg = (OpCodeSimdReg)op;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de;
+ Operand ne = EmitVectorExtract(context, opReg.Rn, index, op.Size, signed);
+ Operand me = EmitVectorExtract(context, opReg.Rm, index, op.Size, signed);
+
+ if (op.Size <= 2)
+ {
+ Operand temp = add ? context.Add (ne, me)
+ : context.Subtract(ne, me);
+
+ de = EmitSatQ(context, temp, op.Size, signedSrc: true, signedDst: signed);
+ }
+ else if (add) /* if (op.Size == 3) */
+ {
+ de = EmitBinarySatQAdd(context, ne, me, signed);
+ }
+ else /* if (sub) */
+ {
+ de = EmitBinarySatQSub(context, ne, me, signed);
+ }
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+ }
+ else if (accumulate)
+ {
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de;
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, !signed);
+ Operand me = EmitVectorExtract(context, op.Rd, index, op.Size, signed);
+
+ if (op.Size <= 2)
+ {
+ Operand temp = context.Add(ne, me);
+
+ de = EmitSatQ(context, temp, op.Size, signedSrc: true, signedDst: signed);
+ }
+ else /* if (op.Size == 3) */
+ {
+ de = EmitBinarySatQAccumulate(context, ne, me, signed);
+ }
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+ }
+ else
+ {
+ OpCodeSimdReg opReg = (OpCodeSimdReg)op;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, opReg.Rn, index, op.Size, signed);
+ Operand me = EmitVectorExtract(context, opReg.Rm, index, op.Size, signed);
+
+ Operand de = EmitSatQ(context, emit(ne, me), op.Size, true, signed);
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ [Flags]
+ public enum SaturatingNarrowFlags
+ {
+ Scalar = 1 << 0,
+ SignedSrc = 1 << 1,
+ SignedDst = 1 << 2,
+
+ ScalarSxSx = Scalar | SignedSrc | SignedDst,
+ ScalarSxZx = Scalar | SignedSrc,
+ ScalarZxZx = Scalar,
+
+ VectorSxSx = SignedSrc | SignedDst,
+ VectorSxZx = SignedSrc,
+ VectorZxZx = 0
+ }
+
+ public static void EmitSaturatingNarrowOp(ArmEmitterContext context, SaturatingNarrowFlags flags)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ bool scalar = (flags & SaturatingNarrowFlags.Scalar) != 0;
+ bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0;
+ bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0;
+
+ int elems = !scalar ? 8 >> op.Size : 1;
+
+ int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
+
+ Operand temp = EmitSatQ(context, ne, op.Size, signedSrc, signedDst);
+
+ res = EmitVectorInsert(context, res, temp, part + index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ // TSrc (16bit, 32bit, 64bit; signed, unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned).
+ public static Operand EmitSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedSrc, bool signedDst)
+ {
+ if ((uint)sizeDst > 2u)
+ {
+ throw new ArgumentOutOfRangeException(nameof(sizeDst));
+ }
+
+ Delegate dlg;
+
+ if (signedSrc)
+ {
+ dlg = signedDst
+ ? (Delegate)new _S64_S64_S32(SoftFallback.SignedSrcSignedDstSatQ)
+ : (Delegate)new _U64_S64_S32(SoftFallback.SignedSrcUnsignedDstSatQ);
+ }
+ else
+ {
+ dlg = signedDst
+ ? (Delegate)new _S64_U64_S32(SoftFallback.UnsignedSrcSignedDstSatQ)
+ : (Delegate)new _U64_U64_S32(SoftFallback.UnsignedSrcUnsignedDstSatQ);
+ }
+
+ return context.Call(dlg, op, Const(sizeDst));
+ }
+
+ // TSrc (64bit) == TDst (64bit); signed.
+ public static Operand EmitUnarySignedSatQAbsOrNeg(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size.");
+
+ return context.Call(new _S64_S64(SoftFallback.UnarySignedSatQAbsOrNeg), op);
+ }
+
+ // TSrcs (64bit) == TDst (64bit); signed, unsigned.
+ public static Operand EmitBinarySatQAdd(ArmEmitterContext context, Operand op1, Operand op2, bool signed)
+ {
+ Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size.");
+
+ Delegate dlg = signed
+ ? (Delegate)new _S64_S64_S64(SoftFallback.BinarySignedSatQAdd)
+ : (Delegate)new _U64_U64_U64(SoftFallback.BinaryUnsignedSatQAdd);
+
+ return context.Call(dlg, op1, op2);
+ }
+
+ // TSrcs (64bit) == TDst (64bit); signed, unsigned.
+ public static Operand EmitBinarySatQSub(ArmEmitterContext context, Operand op1, Operand op2, bool signed)
+ {
+ Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size.");
+
+ Delegate dlg = signed
+ ? (Delegate)new _S64_S64_S64(SoftFallback.BinarySignedSatQSub)
+ : (Delegate)new _U64_U64_U64(SoftFallback.BinaryUnsignedSatQSub);
+
+ return context.Call(dlg, op1, op2);
+ }
+
+ // TSrcs (64bit) == TDst (64bit); signed, unsigned.
+ public static Operand EmitBinarySatQAccumulate(ArmEmitterContext context, Operand op1, Operand op2, bool signed)
+ {
+ Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size.");
+
+ Delegate dlg = signed
+ ? (Delegate)new _S64_U64_S64(SoftFallback.BinarySignedSatQAcc)
+ : (Delegate)new _U64_S64_U64(SoftFallback.BinaryUnsignedSatQAcc);
+
+ return context.Call(dlg, op1, op2);
+ }
+
+ public static Operand EmitVectorExtractSx(ArmEmitterContext context, int reg, int index, int size)
+ {
+ return EmitVectorExtract(context, reg, index, size, true);
+ }
+
+ public static Operand EmitVectorExtractZx(ArmEmitterContext context, int reg, int index, int size)
+ {
+ return EmitVectorExtract(context, reg, index, size, false);
+ }
+
+ public static Operand EmitVectorExtract(ArmEmitterContext context, int reg, int index, int size, bool signed)
+ {
+ ThrowIfInvalid(index, size);
+
+ Operand res = null;
+
+ switch (size)
+ {
+ case 0:
+ res = context.VectorExtract8(GetVec(reg), index);
+ break;
+
+ case 1:
+ res = context.VectorExtract16(GetVec(reg), index);
+ break;
+
+ case 2:
+ res = context.VectorExtract(OperandType.I32, GetVec(reg), index);
+ break;
+
+ case 3:
+ res = context.VectorExtract(OperandType.I64, GetVec(reg), index);
+ break;
+ }
+
+ if (signed)
+ {
+ switch (size)
+ {
+ case 0: res = context.SignExtend8 (OperandType.I64, res); break;
+ case 1: res = context.SignExtend16(OperandType.I64, res); break;
+ case 2: res = context.SignExtend32(OperandType.I64, res); break;
+ }
+ }
+ else
+ {
+ switch (size)
+ {
+ case 0: res = context.ZeroExtend8 (OperandType.I64, res); break;
+ case 1: res = context.ZeroExtend16(OperandType.I64, res); break;
+ case 2: res = context.ZeroExtend32(OperandType.I64, res); break;
+ }
+ }
+
+ return res;
+ }
+
+ public static Operand EmitVectorInsert(ArmEmitterContext context, Operand vector, Operand value, int index, int size)
+ {
+ ThrowIfInvalid(index, size);
+
+ if (size < 3)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ switch (size)
+ {
+ case 0: vector = context.VectorInsert8 (vector, value, index); break;
+ case 1: vector = context.VectorInsert16(vector, value, index); break;
+ case 2: vector = context.VectorInsert (vector, value, index); break;
+ case 3: vector = context.VectorInsert (vector, value, index); break;
+ }
+
+ return vector;
+ }
+
+ private static void ThrowIfInvalid(int index, int size)
+ {
+ if ((uint)size > 3u)
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ if ((uint)index >= 16u >> size)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical.cs b/ARMeilleure/Instructions/InstEmitSimdLogical.cs
new file mode 100644
index 00000000..551752d2
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdLogical.cs
@@ -0,0 +1,456 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void And_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseAnd(op1, op2));
+ }
+ }
+
+ public static void Bic_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return context.BitwiseAnd(op1, context.BitwiseNot(op2));
+ });
+ }
+ }
+
+ public static void Bic_Vi(ArmEmitterContext context)
+ {
+ EmitVectorImmBinaryOp(context, (op1, op2) =>
+ {
+ return context.BitwiseAnd(op1, context.BitwiseNot(op2));
+ });
+ }
+
+ public static void Bif_V(ArmEmitterContext context)
+ {
+ EmitBifBit(context, notRm: true);
+ }
+
+ public static void Bit_V(ArmEmitterContext context)
+ {
+ EmitBifBit(context, notRm: false);
+ }
+
+ private static void EmitBifBit(ArmEmitterContext context, bool notRm)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d);
+
+ if (notRm)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Pandn, m, res);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Pand, m, res);
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Pxor, d, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand d = EmitVectorExtractZx(context, op.Rd, index, 3);
+ Operand n = EmitVectorExtractZx(context, op.Rn, index, 3);
+ Operand m = EmitVectorExtractZx(context, op.Rm, index, 3);
+
+ if (notRm)
+ {
+ m = context.BitwiseNot(m);
+ }
+
+ Operand e = context.BitwiseExclusiveOr(d, n);
+
+ e = context.BitwiseAnd(e, m);
+ e = context.BitwiseExclusiveOr(e, d);
+
+ res = EmitVectorInsert(context, res, e, index, 3);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Bsl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pand, res, d);
+ res = context.AddIntrinsic(Intrinsic.X86Pxor, res, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.BitwiseExclusiveOr(
+ context.BitwiseAnd(op1,
+ context.BitwiseExclusiveOr(op2, op3)), op3);
+ });
+ }
+ }
+
+ public static void Eor_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseExclusiveOr(op1, op2));
+ }
+ }
+
+ public static void Not_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, n, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorUnaryOpZx(context, (op1) => context.BitwiseNot(op1));
+ }
+ }
+
+ public static void Orn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, mask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return context.BitwiseOr(op1, context.BitwiseNot(op2));
+ });
+ }
+ }
+
+ public static void Orr_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseOr(op1, op2));
+ }
+ }
+
+ public static void Orr_Vi(ArmEmitterContext context)
+ {
+ EmitVectorImmBinaryOp(context, (op1, op2) => context.BitwiseOr(op1, op2));
+ }
+
+ public static void Rbit_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
+
+ ne = context.ConvertI64ToI32(ne);
+
+ Operand de = context.Call(new _U32_U32(SoftFallback.ReverseBits8), ne);
+
+ de = context.ZeroExtend32(OperandType.I64, de);
+
+ res = EmitVectorInsert(context, res, de, index, 0);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Rev16_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ const long maskE0 = 06L << 56 | 07L << 48 | 04L << 40 | 05L << 32 | 02L << 24 | 03L << 16 | 00L << 8 | 01L << 0;
+ const long maskE1 = 14L << 56 | 15L << 48 | 12L << 40 | 13L << 32 | 10L << 24 | 11L << 16 | 08L << 8 | 09L << 0;
+
+ Operand mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitRev_V(context, containerSize: 1);
+ }
+ }
+
+ public static void Rev32_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand mask;
+
+ if (op.Size == 0)
+ {
+ const long maskE0 = 04L << 56 | 05L << 48 | 06L << 40 | 07L << 32 | 00L << 24 | 01L << 16 | 02L << 8 | 03L << 0;
+ const long maskE1 = 12L << 56 | 13L << 48 | 14L << 40 | 15L << 32 | 08L << 24 | 09L << 16 | 10L << 8 | 11L << 0;
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+ else /* if (op.Size == 1) */
+ {
+ const long maskE0 = 05L << 56 | 04L << 48 | 07L << 40 | 06L << 32 | 01L << 24 | 00L << 16 | 03L << 8 | 02L << 0;
+ const long maskE1 = 13L << 56 | 12L << 48 | 15L << 40 | 14L << 32 | 09L << 24 | 08L << 16 | 11L << 8 | 10L << 0;
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitRev_V(context, containerSize: 2);
+ }
+ }
+
+ public static void Rev64_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand mask;
+
+ if (op.Size == 0)
+ {
+ const long maskE0 = 00L << 56 | 01L << 48 | 02L << 40 | 03L << 32 | 04L << 24 | 05L << 16 | 06L << 8 | 07L << 0;
+ const long maskE1 = 08L << 56 | 09L << 48 | 10L << 40 | 11L << 32 | 12L << 24 | 13L << 16 | 14L << 8 | 15L << 0;
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+ else if (op.Size == 1)
+ {
+ const long maskE0 = 01L << 56 | 00L << 48 | 03L << 40 | 02L << 32 | 05L << 24 | 04L << 16 | 07L << 8 | 06L << 0;
+ const long maskE1 = 09L << 56 | 08L << 48 | 11L << 40 | 10L << 32 | 13L << 24 | 12L << 16 | 15L << 8 | 14L << 0;
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+ else /* if (op.Size == 2) */
+ {
+ const long maskE0 = 03L << 56 | 02L << 48 | 01L << 40 | 00L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0;
+ const long maskE1 = 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 15L << 24 | 14L << 16 | 13L << 8 | 12L << 0;
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitRev_V(context, containerSize: 3);
+ }
+ }
+
+ private static void EmitRev_V(ArmEmitterContext context, int containerSize)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ int containerMask = (1 << (containerSize - op.Size)) - 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ int revIndex = index ^ containerMask;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, revIndex, op.Size);
+
+ res = EmitVectorInsert(context, res, ne, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSimdMemory.cs b/ARMeilleure/Instructions/InstEmitSimdMemory.cs
new file mode 100644
index 00000000..22e9ef7a
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdMemory.cs
@@ -0,0 +1,160 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Ld__Vms(ArmEmitterContext context)
+ {
+ EmitSimdMemMs(context, isLoad: true);
+ }
+
+ public static void Ld__Vss(ArmEmitterContext context)
+ {
+ EmitSimdMemSs(context, isLoad: true);
+ }
+
+ public static void St__Vms(ArmEmitterContext context)
+ {
+ EmitSimdMemMs(context, isLoad: false);
+ }
+
+ public static void St__Vss(ArmEmitterContext context)
+ {
+ EmitSimdMemSs(context, isLoad: false);
+ }
+
+ private static void EmitSimdMemMs(ArmEmitterContext context, bool isLoad)
+ {
+ OpCodeSimdMemMs op = (OpCodeSimdMemMs)context.CurrOp;
+
+ Operand n = GetIntOrSP(context, op.Rn);
+
+ long offset = 0;
+
+ for (int rep = 0; rep < op.Reps; rep++)
+ for (int elem = 0; elem < op.Elems; elem++)
+ for (int sElem = 0; sElem < op.SElems; sElem++)
+ {
+ int rtt = (op.Rt + rep + sElem) & 0x1f;
+
+ Operand tt = GetVec(rtt);
+
+ Operand address = context.Add(n, Const(offset));
+
+ if (isLoad)
+ {
+ EmitLoadSimd(context, address, tt, rtt, elem, op.Size);
+
+ if (op.RegisterSize == RegisterSize.Simd64 && elem == op.Elems - 1)
+ {
+ context.Copy(tt, context.VectorZeroUpper64(tt));
+ }
+ }
+ else
+ {
+ EmitStoreSimd(context, address, rtt, elem, op.Size);
+ }
+
+ offset += 1 << op.Size;
+ }
+
+ if (op.WBack)
+ {
+ EmitSimdMemWBack(context, offset);
+ }
+ }
+
+ private static void EmitSimdMemSs(ArmEmitterContext context, bool isLoad)
+ {
+ OpCodeSimdMemSs op = (OpCodeSimdMemSs)context.CurrOp;
+
+ Operand n = GetIntOrSP(context, op.Rn);
+
+ long offset = 0;
+
+ if (op.Replicate)
+ {
+ // Only loads uses the replicate mode.
+ Debug.Assert(isLoad, "Replicate mode is not valid for stores.");
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int sElem = 0; sElem < op.SElems; sElem++)
+ {
+ int rt = (op.Rt + sElem) & 0x1f;
+
+ Operand t = GetVec(rt);
+
+ Operand address = context.Add(n, Const(offset));
+
+ for (int index = 0; index < elems; index++)
+ {
+ EmitLoadSimd(context, address, t, rt, index, op.Size);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ context.Copy(t, context.VectorZeroUpper64(t));
+ }
+
+ offset += 1 << op.Size;
+ }
+ }
+ else
+ {
+ for (int sElem = 0; sElem < op.SElems; sElem++)
+ {
+ int rt = (op.Rt + sElem) & 0x1f;
+
+ Operand t = GetVec(rt);
+
+ Operand address = context.Add(n, Const(offset));
+
+ if (isLoad)
+ {
+ EmitLoadSimd(context, address, t, rt, op.Index, op.Size);
+ }
+ else
+ {
+ EmitStoreSimd(context, address, rt, op.Index, op.Size);
+ }
+
+ offset += 1 << op.Size;
+ }
+ }
+
+ if (op.WBack)
+ {
+ EmitSimdMemWBack(context, offset);
+ }
+ }
+
+ private static void EmitSimdMemWBack(ArmEmitterContext context, long offset)
+ {
+ OpCodeMemReg op = (OpCodeMemReg)context.CurrOp;
+
+ Operand n = GetIntOrSP(context, op.Rn);
+ Operand m;
+
+ if (op.Rm != RegisterAlias.Zr)
+ {
+ m = GetIntOrZR(context, op.Rm);
+ }
+ else
+ {
+ m = Const(offset);
+ }
+
+ context.Copy(n, context.Add(n, m));
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitSimdMove.cs b/ARMeilleure/Instructions/InstEmitSimdMove.cs
new file mode 100644
index 00000000..47359161
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdMove.cs
@@ -0,0 +1,794 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+#region "Masks"
+ private static readonly long[] _masksE0_TrnUzpXtn = new long[]
+ {
+ 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0,
+ 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0,
+ 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0
+ };
+
+ private static readonly long[] _masksE1_TrnUzp = new long[]
+ {
+ 15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0,
+ 15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0,
+ 15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0
+ };
+
+ private static readonly long[] _masksE0_Uzp = new long[]
+ {
+ 13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0,
+ 11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0
+ };
+
+ private static readonly long[] _masksE1_Uzp = new long[]
+ {
+ 15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0,
+ 15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0
+ };
+#endregion
+
+ public static void Dup_Gp(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ if (Optimizations.UseSse2)
+ {
+ switch (op.Size)
+ {
+ case 0: n = context.ZeroExtend8 (n.Type, n); n = context.Multiply(n, Const(n.Type, 0x01010101)); break;
+ case 1: n = context.ZeroExtend16(n.Type, n); n = context.Multiply(n, Const(n.Type, 0x00010001)); break;
+ case 2: n = context.ZeroExtend32(n.Type, n); break;
+ }
+
+ Operand res = context.VectorInsert(context.VectorZero(), n, 0);
+
+ if (op.Size < 3)
+ {
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0xf0));
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
+ }
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ res = EmitVectorInsert(context, res, n, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Dup_S(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), ne, 0, op.Size));
+ }
+
+ public static void Dup_V(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ Operand res = GetVec(op.Rn);
+
+ if (op.Size == 0)
+ {
+ if (op.DstIndex != 0)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex));
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Punpcklbw, res, res);
+ res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res);
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
+ }
+ else if (op.Size == 1)
+ {
+ if (op.DstIndex != 0)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex * 2));
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res);
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
+ }
+ else if (op.Size == 2)
+ {
+ int mask = op.DstIndex * 0b01010101;
+
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(mask));
+ }
+ else if (op.DstIndex == 0 && op.RegisterSize != RegisterSize.Simd64)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res);
+ }
+ else if (op.DstIndex == 1)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Movhlps, res, res);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ res = EmitVectorInsert(context, res, ne, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Ext_V(ArmEmitterContext context)
+ {
+ OpCodeSimdExt op = (OpCodeSimdExt)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ Operand nShifted = GetVec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ nShifted = context.AddIntrinsic(Intrinsic.X86Movlhps, nShifted, context.VectorZero());
+ }
+
+ nShifted = context.AddIntrinsic(Intrinsic.X86Psrldq, nShifted, Const(op.Imm4));
+
+ Operand mShifted = GetVec(op.Rm);
+
+ mShifted = context.AddIntrinsic(Intrinsic.X86Pslldq, mShifted, Const(op.GetBytesCount() - op.Imm4));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ mShifted = context.AddIntrinsic(Intrinsic.X86Movlhps, mShifted, context.VectorZero());
+ }
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, mShifted);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int bytes = op.GetBytesCount();
+
+ int position = op.Imm4 & (bytes - 1);
+
+ for (int index = 0; index < bytes; index++)
+ {
+ int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm;
+
+ Operand e = EmitVectorExtractZx(context, reg, position, 0);
+
+ position = (position + 1) & (bytes - 1);
+
+ res = EmitVectorInsert(context, res, e, index, 0);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Fcsel_S(ArmEmitterContext context)
+ {
+ OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp;
+
+ Operand lblTrue = Label();
+ Operand lblEnd = Label();
+
+ Operand isTrue = InstEmitFlowHelper.GetCondTrue(context, op.Cond);
+
+ context.BranchIfTrue(lblTrue, isTrue);
+
+ OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), me, 0));
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblTrue);
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0));
+
+ context.MarkLabel(lblEnd);
+ }
+
+ public static void Fmov_Ftoi(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, 0, op.Size + 2);
+
+ SetIntOrZR(context, op.Rd, ne);
+ }
+
+ public static void Fmov_Ftoi1(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, 1, 3);
+
+ SetIntOrZR(context, op.Rd, ne);
+ }
+
+ public static void Fmov_Itof(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), n, 0, op.Size + 2));
+ }
+
+ public static void Fmov_Itof1(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, GetVec(op.Rd), n, 1, 3));
+ }
+
+ public static void Fmov_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0));
+ }
+
+ public static void Fmov_Si(ArmEmitterContext context)
+ {
+ OpCodeSimdFmov op = (OpCodeSimdFmov)context.CurrOp;
+
+ if (op.Size == 0)
+ {
+ context.Copy(GetVec(op.Rd), X86GetScalar(context, (int)op.Immediate));
+ }
+ else
+ {
+ context.Copy(GetVec(op.Rd), X86GetScalar(context, op.Immediate));
+ }
+ }
+
+ public static void Fmov_Vi(ArmEmitterContext context)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ Operand e = Const(op.Immediate);
+
+ Operand res = context.VectorZero();
+
+ int elems = op.RegisterSize == RegisterSize.Simd128 ? 4 : 2;
+
+ for (int index = 0; index < (elems >> op.Size); index++)
+ {
+ res = EmitVectorInsert(context, res, e, index, op.Size + 2);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Ins_Gp(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ context.Copy(d, EmitVectorInsert(context, d, n, op.DstIndex, op.Size));
+ }
+
+ public static void Ins_V(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand ne = EmitVectorExtractZx(context, op.Rn, op.SrcIndex, op.Size);
+
+ context.Copy(d, EmitVectorInsert(context, d, ne, op.DstIndex, op.Size));
+ }
+
+ public static void Movi_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitMoviMvni(context, not: false);
+ }
+ else
+ {
+ EmitVectorImmUnaryOp(context, (op1) => op1);
+ }
+ }
+
+ public static void Mvni_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitMoviMvni(context, not: true);
+ }
+ else
+ {
+ EmitVectorImmUnaryOp(context, (op1) => context.BitwiseNot(op1));
+ }
+ }
+
+ public static void Smov_S(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand ne = EmitVectorExtractSx(context, op.Rn, op.DstIndex, op.Size);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ ne = context.ZeroExtend32(OperandType.I64, ne);
+ }
+
+ SetIntOrZR(context, op.Rd, ne);
+ }
+
+ public static void Tbl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand mask = X86GetAllElements(context, 0x0F0F0F0F0F0F0F0FL);
+
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, mask);
+
+ mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, m);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mMask);
+
+ for (int index = 1; index < op.Size; index++)
+ {
+ Operand ni = GetVec((op.Rn + index) & 0x1f);
+
+ Operand indexMask = X86GetAllElements(context, 0x1010101010101010L * index);
+
+ Operand mMinusMask = context.AddIntrinsic(Intrinsic.X86Psubb, m, indexMask);
+
+ Operand mMask2 = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, mMinusMask, mask);
+
+ mMask2 = context.AddIntrinsic(Intrinsic.X86Por, mMask2, mMinusMask);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, ni, mMask2);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand[] args = new Operand[1 + op.Size];
+
+ args[0] = GetVec(op.Rm);
+
+ for (int index = 0; index < op.Size; index++)
+ {
+ args[1 + index] = GetVec((op.Rn + index) & 0x1f);
+ }
+
+ Delegate dlg = null;
+
+ switch (op.Size)
+ {
+ case 1: dlg = op.RegisterSize == RegisterSize.Simd64
+ ? (Delegate)new _V128_V128_V128(SoftFallback.Tbl1_V64)
+ : (Delegate)new _V128_V128_V128(SoftFallback.Tbl1_V128); break;
+
+ case 2: dlg = op.RegisterSize == RegisterSize.Simd64
+ ? (Delegate)new _V128_V128_V128_V128(SoftFallback.Tbl2_V64)
+ : (Delegate)new _V128_V128_V128_V128(SoftFallback.Tbl2_V128); break;
+
+ case 3: dlg = op.RegisterSize == RegisterSize.Simd64
+ ? (Delegate)new _V128_V128_V128_V128_V128(SoftFallback.Tbl3_V64)
+ : (Delegate)new _V128_V128_V128_V128_V128(SoftFallback.Tbl3_V128); break;
+
+ case 4: dlg = op.RegisterSize == RegisterSize.Simd64
+ ? (Delegate)new _V128_V128_V128_V128_V128_V128(SoftFallback.Tbl4_V64)
+ : (Delegate)new _V128_V128_V128_V128_V128_V128(SoftFallback.Tbl4_V128); break;
+ }
+
+ context.Copy(GetVec(op.Rd), context.Call(dlg, args));
+ }
+ }
+
+ public static void Trn1_V(ArmEmitterContext context)
+ {
+ EmitVectorTranspose(context, part: 0);
+ }
+
+ public static void Trn2_V(ArmEmitterContext context)
+ {
+ EmitVectorTranspose(context, part: 1);
+ }
+
+ public static void Umov_S(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
+
+ SetIntOrZR(context, op.Rd, ne);
+ }
+
+ public static void Uzp1_V(ArmEmitterContext context)
+ {
+ EmitVectorUnzip(context, part: 0);
+ }
+
+ public static void Uzp2_V(ArmEmitterContext context)
+ {
+ EmitVectorUnzip(context, part: 1);
+ }
+
+ public static void Xtn_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ Operand d = GetVec(op.Rd);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero());
+
+ Operand n = GetVec(op.Rn);
+
+ Operand mask = X86GetAllElements(context, _masksE0_TrnUzpXtn[op.Size]);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
+ ? Intrinsic.X86Movlhps
+ : Intrinsic.X86Movhlps;
+
+ res = context.AddIntrinsic(movInst, res, res2);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
+
+ res = EmitVectorInsert(context, res, ne, part + index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Zip1_V(ArmEmitterContext context)
+ {
+ EmitVectorZip(context, part: 0);
+ }
+
+ public static void Zip2_V(ArmEmitterContext context)
+ {
+ EmitVectorZip(context, part: 1);
+ }
+
+ private static void EmitMoviMvni(ArmEmitterContext context, bool not)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ long imm = op.Immediate;
+
+ switch (op.Size)
+ {
+ case 0: imm *= 0x01010101; break;
+ case 1: imm *= 0x00010001; break;
+ }
+
+ if (not)
+ {
+ imm = ~imm;
+ }
+
+ Operand mask;
+
+ if (op.Size < 3)
+ {
+ mask = X86GetAllElements(context, (int)imm);
+ }
+ else
+ {
+ mask = X86GetAllElements(context, imm);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ mask = context.VectorZeroUpper64(mask);
+ }
+
+ context.Copy(GetVec(op.Rd), mask);
+ }
+
+ private static void EmitVectorTranspose(ArmEmitterContext context, int part)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ Operand mask = null;
+
+ if (op.Size < 3)
+ {
+ long maskE0 = _masksE0_TrnUzpXtn[op.Size];
+ long maskE1 = _masksE1_TrnUzp [op.Size];
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+
+ Operand n = GetVec(op.Rn);
+
+ if (op.Size < 3)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+ }
+
+ Operand m = GetVec(op.Rm);
+
+ if (op.Size < 3)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
+ }
+
+ Intrinsic punpckInst = part == 0
+ ? X86PunpcklInstruction[op.Size]
+ : X86PunpckhInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(punpckInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, pairIndex + part, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, pairIndex + part, op.Size);
+
+ res = EmitVectorInsert(context, res, ne, pairIndex, op.Size);
+ res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitVectorUnzip(ArmEmitterContext context, int part)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ Operand mask = null;
+
+ if (op.Size < 3)
+ {
+ long maskE0 = _masksE0_TrnUzpXtn[op.Size];
+ long maskE1 = _masksE1_TrnUzp [op.Size];
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+
+ Operand n = GetVec(op.Rn);
+
+ if (op.Size < 3)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+ }
+
+ Operand m = GetVec(op.Rm);
+
+ if (op.Size < 3)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
+ }
+
+ Intrinsic punpckInst = part == 0
+ ? Intrinsic.X86Punpcklqdq
+ : Intrinsic.X86Punpckhqdq;
+
+ Operand res = context.AddIntrinsic(punpckInst, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic punpcklInst = X86PunpcklInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(punpcklInst, n, m);
+
+ if (op.Size < 2)
+ {
+ long maskE0 = _masksE0_Uzp[op.Size];
+ long maskE1 = _masksE1_Uzp[op.Size];
+
+ Operand mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask);
+ }
+
+ Intrinsic punpckInst = part == 0
+ ? Intrinsic.X86Punpcklqdq
+ : Intrinsic.X86Punpckhqdq;
+
+ res = context.AddIntrinsic(punpckInst, res, context.VectorZero());
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int idx = index << 1;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, idx + part, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, idx + part, op.Size);
+
+ res = EmitVectorInsert(context, res, ne, index, op.Size);
+ res = EmitVectorInsert(context, res, me, pairs + index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitVectorZip(ArmEmitterContext context, int part)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ Intrinsic punpckInst = part == 0
+ ? X86PunpcklInstruction[op.Size]
+ : X86PunpckhInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(punpckInst, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], n, m);
+
+ Intrinsic punpckInst = part == 0
+ ? Intrinsic.X86Punpcklqdq
+ : Intrinsic.X86Punpckhqdq;
+
+ res = context.AddIntrinsic(punpckInst, res, context.VectorZero());
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ int baseIndex = part != 0 ? pairs : 0;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, baseIndex + index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, baseIndex + index, op.Size);
+
+ res = EmitVectorInsert(context, res, ne, pairIndex, op.Size);
+ res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitSimdShift.cs b/ARMeilleure/Instructions/InstEmitSimdShift.cs
new file mode 100644
index 00000000..1aae491d
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdShift.cs
@@ -0,0 +1,1057 @@
+// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
+
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ using Func2I = Func<Operand, Operand, Operand>;
+
+ static partial class InstEmit
+ {
+#region "Masks"
+ private static readonly long[] _masks_RshrnShrn = new long[]
+ {
+ 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0,
+ 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0,
+ 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0
+ };
+#endregion
+
+ public static void Rshrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand dLow = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero());
+
+ Operand mask = null;
+
+ switch (op.Size + 1)
+ {
+ case 1: mask = X86GetAllElements(context, (int)roundConst * 0x00010001); break;
+ case 2: mask = X86GetAllElements(context, (int)roundConst); break;
+ case 3: mask = X86GetAllElements(context, roundConst); break;
+ }
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ Operand res = context.AddIntrinsic(addInst, n, mask);
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size + 1];
+
+ res = context.AddIntrinsic(srlInst, res, Const(shift));
+
+ Operand mask2 = X86GetAllElements(context, _masks_RshrnShrn[op.Size]);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask2);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
+ ? Intrinsic.X86Movlhps
+ : Intrinsic.X86Movhlps;
+
+ res = context.AddIntrinsic(movInst, dLow, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShrImmNarrowOpZx(context, round: true);
+ }
+ }
+
+ public static void Shl_S(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
+ }
+
+ public static void Shl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(shift));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
+ }
+ }
+
+ public static void Shll_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int shift = 8 << op.Size;
+
+ if (Optimizations.UseSse41)
+ {
+ Operand n = GetVec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ }
+
+ Intrinsic movsxInst = X86PmovsxInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(movsxInst, n);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
+
+ res = context.AddIntrinsic(sllInst, res, Const(shift));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
+ }
+ }
+
+ public static void Shrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand dLow = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero());
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size + 1];
+
+ Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Operand mask = X86GetAllElements(context, _masks_RshrnShrn[op.Size]);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, nShifted, mask);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
+ ? Intrinsic.X86Movlhps
+ : Intrinsic.X86Movhlps;
+
+ res = context.AddIntrinsic(movInst, dLow, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShrImmNarrowOpZx(context, round: false);
+ }
+ }
+
+ public static void Sli_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ int shift = GetImmShl(op);
+
+ ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ Operand neShifted = context.ShiftLeft(ne, Const(shift));
+
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+
+ Operand deMasked = context.BitwiseAnd(de, Const(mask));
+
+ Operand e = context.BitwiseOr(neShifted, deMasked);
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sqrshl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+ Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlRegSatQ), ne, me, Const(1), Const(op.Size));
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sqrshrn_S(ArmEmitterContext context)
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
+ }
+
+ public static void Sqrshrn_V(ArmEmitterContext context)
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
+ }
+
+ public static void Sqrshrun_S(ArmEmitterContext context)
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
+ }
+
+ public static void Sqrshrun_V(ArmEmitterContext context)
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+ }
+
+ public static void Sqshl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+ Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlRegSatQ), ne, me, Const(0), Const(op.Size));
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sqshrn_S(ArmEmitterContext context)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
+ }
+
+ public static void Sqshrn_V(ArmEmitterContext context)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
+ }
+
+ public static void Sqshrun_S(ArmEmitterContext context)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
+ }
+
+ public static void Sqshrun_V(ArmEmitterContext context)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+ }
+
+ public static void Srshl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+ Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlReg), ne, me, Const(1), Const(op.Size));
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Srshr_S(ArmEmitterContext context)
+ {
+ EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
+ }
+
+ public static void Srshr_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+ {
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+ Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+ Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, nSra);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShrImmOpSx(context, ShrImmFlags.Round);
+ }
+ }
+
+ public static void Srsra_S(ArmEmitterContext context)
+ {
+ EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
+
+ public static void Srsra_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+ {
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+ Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+ Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, nSra);
+ res = context.AddIntrinsic(addInst, res, d);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Sshl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+ Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlReg), ne, me, Const(0), Const(op.Size));
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sshll_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ if (Optimizations.UseSse41)
+ {
+ Operand n = GetVec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ }
+
+ Intrinsic movsxInst = X86PmovsxInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(movsxInst, n);
+
+ if (shift != 0)
+ {
+ Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
+
+ res = context.AddIntrinsic(sllInst, res, Const(shift));
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShImmWidenBinarySx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
+ }
+ }
+
+ public static void Sshr_S(ArmEmitterContext context)
+ {
+ EmitShrImmOp(context, ShrImmFlags.ScalarSx);
+ }
+
+ public static void Sshr_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+ {
+ int shift = GetImmShr(op);
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sraInst, n, Const(shift));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitShrImmOp(context, ShrImmFlags.VectorSx);
+ }
+ }
+
+ public static void Ssra_S(ArmEmitterContext context)
+ {
+ EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate);
+ }
+
+ public static void Ssra_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+ {
+ int shift = GetImmShr(op);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sraInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, d);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmOpSx(context, ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Uqrshl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlRegSatQ), ne, me, Const(1), Const(op.Size));
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Uqrshrn_S(ArmEmitterContext context)
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
+ }
+
+ public static void Uqrshrn_V(ArmEmitterContext context)
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
+ }
+
+ public static void Uqshl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlRegSatQ), ne, me, Const(0), Const(op.Size));
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Uqshrn_S(ArmEmitterContext context)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
+ }
+
+ public static void Uqshrn_V(ArmEmitterContext context)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
+ }
+
+ public static void Urshl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlReg), ne, me, Const(1), Const(op.Size));
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Urshr_S(ArmEmitterContext context)
+ {
+ EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
+ }
+
+ public static void Urshr_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+ Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, nSrl);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShrImmOpZx(context, ShrImmFlags.Round);
+ }
+ }
+
+ public static void Ursra_S(ArmEmitterContext context)
+ {
+ EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
+
+ public static void Ursra_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+ Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, nSrl);
+ res = context.AddIntrinsic(addInst, res, d);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Ushl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlReg), ne, me, Const(0), Const(op.Size));
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Ushll_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ if (Optimizations.UseSse41)
+ {
+ Operand n = GetVec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ }
+
+ Intrinsic movzxInst = X86PmovzxInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(movzxInst, n);
+
+ if (shift != 0)
+ {
+ Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
+
+ res = context.AddIntrinsic(sllInst, res, Const(shift));
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
+ }
+ }
+
+ public static void Ushr_S(ArmEmitterContext context)
+ {
+ EmitShrImmOp(context, ShrImmFlags.ScalarZx);
+ }
+
+ public static void Ushr_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ int shift = GetImmShr(op);
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitShrImmOp(context, ShrImmFlags.VectorZx);
+ }
+ }
+
+ public static void Usra_S(ArmEmitterContext context)
+ {
+ EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate);
+ }
+
+ public static void Usra_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ int shift = GetImmShr(op);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, d);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate);
+ }
+ }
+
+ [Flags]
+ private enum ShrImmFlags
+ {
+ Scalar = 1 << 0,
+ Signed = 1 << 1,
+
+ Round = 1 << 2,
+ Accumulate = 1 << 3,
+
+ ScalarSx = Scalar | Signed,
+ ScalarZx = Scalar,
+
+ VectorSx = Signed,
+ VectorZx = 0
+ }
+
+ private static void EmitScalarShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ EmitShrImmOp(context, ShrImmFlags.ScalarSx | flags);
+ }
+
+ private static void EmitScalarShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ EmitShrImmOp(context, ShrImmFlags.ScalarZx | flags);
+ }
+
+ private static void EmitVectorShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ EmitShrImmOp(context, ShrImmFlags.VectorSx | flags);
+ }
+
+ private static void EmitVectorShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ EmitShrImmOp(context, ShrImmFlags.VectorZx | flags);
+ }
+
+ private static void EmitShrImmOp(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ bool scalar = (flags & ShrImmFlags.Scalar) != 0;
+ bool signed = (flags & ShrImmFlags.Signed) != 0;
+ bool round = (flags & ShrImmFlags.Round) != 0;
+ bool accumulate = (flags & ShrImmFlags.Accumulate) != 0;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand e = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+
+ if (op.Size <= 2)
+ {
+ if (round)
+ {
+ e = context.Add(e, Const(roundConst));
+ }
+
+ e = signed
+ ? context.ShiftRightSI(e, Const(shift))
+ : context.ShiftRightUI(e, Const(shift));
+ }
+ else /* if (op.Size == 3) */
+ {
+ e = EmitShrImm64(context, e, signed, round ? roundConst : 0L, shift);
+ }
+
+ if (accumulate)
+ {
+ Operand de = EmitVectorExtract(context, op.Rd, index, op.Size, signed);
+
+ e = context.Add(e, de);
+ }
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitVectorShrImmNarrowOpZx(ArmEmitterContext context, bool round)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand e = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
+
+ if (round)
+ {
+ e = context.Add(e, Const(roundConst));
+ }
+
+ e = context.ShiftRightUI(e, Const(shift));
+
+ res = EmitVectorInsert(context, res, e, part + index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ [Flags]
+ private enum ShrImmSaturatingNarrowFlags
+ {
+ Scalar = 1 << 0,
+ SignedSrc = 1 << 1,
+ SignedDst = 1 << 2,
+
+ Round = 1 << 3,
+
+ ScalarSxSx = Scalar | SignedSrc | SignedDst,
+ ScalarSxZx = Scalar | SignedSrc,
+ ScalarZxZx = Scalar,
+
+ VectorSxSx = SignedSrc | SignedDst,
+ VectorSxZx = SignedSrc,
+ VectorZxZx = 0
+ }
+
+ private static void EmitRoundShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags);
+ }
+
+ private static void EmitShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0;
+ bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0;
+ bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0;
+ bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ int elems = !scalar ? 8 >> op.Size : 1;
+
+ int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand e = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
+
+ if (op.Size <= 1 || !round)
+ {
+ if (round)
+ {
+ e = context.Add(e, Const(roundConst));
+ }
+
+ e = signedSrc
+ ? context.ShiftRightSI(e, Const(shift))
+ : context.ShiftRightUI(e, Const(shift));
+ }
+ else /* if (op.Size == 2 && round) */
+ {
+ e = EmitShrImm64(context, e, signedSrc, roundConst, shift); // shift <= 32
+ }
+
+ e = EmitSatQ(context, e, op.Size, signedSrc, signedDst);
+
+ res = EmitVectorInsert(context, res, e, part + index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ // dst64 = (Int(src64, signed) + roundConst) >> shift;
+ private static Operand EmitShrImm64(
+ ArmEmitterContext context,
+ Operand value,
+ bool signed,
+ long roundConst,
+ int shift)
+ {
+ Delegate dlg = signed
+ ? (Delegate)new _S64_S64_S64_S32(SoftFallback.SignedShrImm64)
+ : (Delegate)new _U64_U64_S64_S32(SoftFallback.UnsignedShrImm64);
+
+ return context.Call(dlg, value, Const(roundConst), Const(shift));
+ }
+
+ private static void EmitVectorShImmWidenBinarySx(ArmEmitterContext context, Func2I emit, int imm)
+ {
+ EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: true);
+ }
+
+ private static void EmitVectorShImmWidenBinaryZx(ArmEmitterContext context, Func2I emit, int imm)
+ {
+ EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: false);
+ }
+
+ private static void EmitVectorShImmWidenBinaryOp(ArmEmitterContext context, Func2I emit, int imm, bool signed)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, Const(imm)), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSystem.cs b/ARMeilleure/Instructions/InstEmitSystem.cs
new file mode 100644
index 00000000..eeb53c1f
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSystem.cs
@@ -0,0 +1,114 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ private const int DczSizeLog2 = 4;
+
+ public static void Hint(ArmEmitterContext context)
+ {
+ // Execute as no-op.
+ }
+
+ public static void Isb(ArmEmitterContext context)
+ {
+ // Execute as no-op.
+ }
+
+ public static void Mrs(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Delegate dlg;
+
+ switch (GetPackedId(op))
+ {
+ case 0b11_011_0000_0000_001: dlg = new _U64(NativeInterface.GetCtrEl0); break;
+ case 0b11_011_0000_0000_111: dlg = new _U64(NativeInterface.GetDczidEl0); break;
+ case 0b11_011_0100_0100_000: dlg = new _U64(NativeInterface.GetFpcr); break;
+ case 0b11_011_0100_0100_001: dlg = new _U64(NativeInterface.GetFpsr); break;
+ case 0b11_011_1101_0000_010: dlg = new _U64(NativeInterface.GetTpidrEl0); break;
+ case 0b11_011_1101_0000_011: dlg = new _U64(NativeInterface.GetTpidr); break;
+ case 0b11_011_1110_0000_000: dlg = new _U64(NativeInterface.GetCntfrqEl0); break;
+ case 0b11_011_1110_0000_001: dlg = new _U64(NativeInterface.GetCntpctEl0); break;
+
+ default: throw new NotImplementedException($"Unknown MRS 0x{op.RawOpCode:X8} at 0x{op.Address:X16}.");
+ }
+
+ SetIntOrZR(context, op.Rt, context.Call(dlg));
+ }
+
+ public static void Msr(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Delegate dlg;
+
+ switch (GetPackedId(op))
+ {
+ case 0b11_011_0100_0100_000: dlg = new _Void_U64(NativeInterface.SetFpcr); break;
+ case 0b11_011_0100_0100_001: dlg = new _Void_U64(NativeInterface.SetFpsr); break;
+ case 0b11_011_1101_0000_010: dlg = new _Void_U64(NativeInterface.SetTpidrEl0); break;
+
+ default: throw new NotImplementedException($"Unknown MSR 0x{op.RawOpCode:X8} at 0x{op.Address:X16}.");
+ }
+
+ context.Call(dlg, GetIntOrZR(context, op.Rt));
+ }
+
+ public static void Nop(ArmEmitterContext context)
+ {
+ // Do nothing.
+ }
+
+ public static void Sys(ArmEmitterContext context)
+ {
+ // This instruction is used to do some operations on the CPU like cache invalidation,
+ // address translation and the like.
+ // We treat it as no-op here since we don't have any cache being emulated anyway.
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ switch (GetPackedId(op))
+ {
+ case 0b11_011_0111_0100_001:
+ {
+ // DC ZVA
+ Operand t = GetIntOrZR(context, op.Rt);
+
+ for (long offset = 0; offset < (4 << DczSizeLog2); offset += 8)
+ {
+ Operand address = context.Add(t, Const(offset));
+
+ context.Call(new _Void_U64_U64(NativeInterface.WriteUInt64), address, Const(0L));
+ }
+
+ break;
+ }
+
+ // No-op
+ case 0b11_011_0111_1110_001: //DC CIVAC
+ break;
+ }
+ }
+
+ private static int GetPackedId(OpCodeSystem op)
+ {
+ int id;
+
+ id = op.Op2 << 0;
+ id |= op.CRm << 3;
+ id |= op.CRn << 7;
+ id |= op.Op1 << 11;
+ id |= op.Op0 << 14;
+
+ return id;
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs
new file mode 100644
index 00000000..e70ca34b
--- /dev/null
+++ b/ARMeilleure/Instructions/InstName.cs
@@ -0,0 +1,459 @@
+namespace ARMeilleure.Instructions
+{
+ enum InstName
+ {
+ // Base (AArch64)
+ Adc,
+ Adcs,
+ Add,
+ Adds,
+ Adr,
+ Adrp,
+ And,
+ Ands,
+ Asrv,
+ B,
+ B_Cond,
+ Bfm,
+ Bic,
+ Bics,
+ Bl,
+ Blr,
+ Br,
+ Brk,
+ Cbnz,
+ Cbz,
+ Ccmn,
+ Ccmp,
+ Clrex,
+ Cls,
+ Clz,
+ Crc32b,
+ Crc32h,
+ Crc32w,
+ Crc32x,
+ Crc32cb,
+ Crc32ch,
+ Crc32cw,
+ Crc32cx,
+ Csel,
+ Csinc,
+ Csinv,
+ Csneg,
+ Dmb,
+ Dsb,
+ Eon,
+ Eor,
+ Extr,
+ Hint,
+ Isb,
+ Ldar,
+ Ldaxp,
+ Ldaxr,
+ Ldp,
+ Ldr,
+ Ldr_Literal,
+ Ldrs,
+ Ldxr,
+ Ldxp,
+ Lslv,
+ Lsrv,
+ Madd,
+ Movk,
+ Movn,
+ Movz,
+ Mrs,
+ Msr,
+ Msub,
+ Nop,
+ Orn,
+ Orr,
+ Pfrm,
+ Rbit,
+ Ret,
+ Rev16,
+ Rev32,
+ Rev64,
+ Rorv,
+ Sbc,
+ Sbcs,
+ Sbfm,
+ Sdiv,
+ Smaddl,
+ Smsubl,
+ Smulh,
+ Stlr,
+ Stlxp,
+ Stlxr,
+ Stp,
+ Str,
+ Stxp,
+ Stxr,
+ Sub,
+ Subs,
+ Svc,
+ Sys,
+ Tbnz,
+ Tbz,
+ Ubfm,
+ Udiv,
+ Umaddl,
+ Umsubl,
+ Umulh,
+ Und,
+
+ // FP & SIMD (AArch64)
+ Abs_S,
+ Abs_V,
+ Add_S,
+ Add_V,
+ Addhn_V,
+ Addp_S,
+ Addp_V,
+ Addv_V,
+ Aesd_V,
+ Aese_V,
+ Aesimc_V,
+ Aesmc_V,
+ And_V,
+ Bic_V,
+ Bic_Vi,
+ Bif_V,
+ Bit_V,
+ Bsl_V,
+ Cls_V,
+ Clz_V,
+ Cmeq_S,
+ Cmeq_V,
+ Cmge_S,
+ Cmge_V,
+ Cmgt_S,
+ Cmgt_V,
+ Cmhi_S,
+ Cmhi_V,
+ Cmhs_S,
+ Cmhs_V,
+ Cmle_S,
+ Cmle_V,
+ Cmlt_S,
+ Cmlt_V,
+ Cmtst_S,
+ Cmtst_V,
+ Cnt_V,
+ Dup_Gp,
+ Dup_S,
+ Dup_V,
+ Eor_V,
+ Ext_V,
+ Fabd_S,
+ Fabd_V,
+ Fabs_S,
+ Fabs_V,
+ Fadd_S,
+ Fadd_V,
+ Faddp_S,
+ Faddp_V,
+ Fccmp_S,
+ Fccmpe_S,
+ Fcmeq_S,
+ Fcmeq_V,
+ Fcmge_S,
+ Fcmge_V,
+ Fcmgt_S,
+ Fcmgt_V,
+ Fcmle_S,
+ Fcmle_V,
+ Fcmlt_S,
+ Fcmlt_V,
+ Fcmp_S,
+ Fcmpe_S,
+ Fcsel_S,
+ Fcvt_S,
+ Fcvtas_Gp,
+ Fcvtau_Gp,
+ Fcvtl_V,
+ Fcvtms_Gp,
+ Fcvtmu_Gp,
+ Fcvtn_V,
+ Fcvtns_S,
+ Fcvtns_V,
+ Fcvtnu_S,
+ Fcvtnu_V,
+ Fcvtps_Gp,
+ Fcvtpu_Gp,
+ Fcvtzs_Gp,
+ Fcvtzs_Gp_Fixed,
+ Fcvtzs_S,
+ Fcvtzs_V,
+ Fcvtzs_V_Fixed,
+ Fcvtzu_Gp,
+ Fcvtzu_Gp_Fixed,
+ Fcvtzu_S,
+ Fcvtzu_V,
+ Fcvtzu_V_Fixed,
+ Fdiv_S,
+ Fdiv_V,
+ Fmadd_S,
+ Fmax_S,
+ Fmax_V,
+ Fmaxnm_S,
+ Fmaxnm_V,
+ Fmaxp_V,
+ Fmin_S,
+ Fmin_V,
+ Fminnm_S,
+ Fminnm_V,
+ Fminp_V,
+ Fmla_Se,
+ Fmla_V,
+ Fmla_Ve,
+ Fmls_Se,
+ Fmls_V,
+ Fmls_Ve,
+ Fmov_S,
+ Fmov_Si,
+ Fmov_Vi,
+ Fmov_Ftoi,
+ Fmov_Itof,
+ Fmov_Ftoi1,
+ Fmov_Itof1,
+ Fmsub_S,
+ Fmul_S,
+ Fmul_Se,
+ Fmul_V,
+ Fmul_Ve,
+ Fmulx_S,
+ Fmulx_Se,
+ Fmulx_V,
+ Fmulx_Ve,
+ Fneg_S,
+ Fneg_V,
+ Fnmadd_S,
+ Fnmsub_S,
+ Fnmul_S,
+ Frecpe_S,
+ Frecpe_V,
+ Frecps_S,
+ Frecps_V,
+ Frecpx_S,
+ Frinta_S,
+ Frinta_V,
+ Frinti_S,
+ Frinti_V,
+ Frintm_S,
+ Frintm_V,
+ Frintn_S,
+ Frintn_V,
+ Frintp_S,
+ Frintp_V,
+ Frintx_S,
+ Frintx_V,
+ Frintz_S,
+ Frintz_V,
+ Frsqrte_S,
+ Frsqrte_V,
+ Frsqrts_S,
+ Frsqrts_V,
+ Fsqrt_S,
+ Fsqrt_V,
+ Fsub_S,
+ Fsub_V,
+ Ins_Gp,
+ Ins_V,
+ Ld__Vms,
+ Ld__Vss,
+ Mla_V,
+ Mla_Ve,
+ Mls_V,
+ Mls_Ve,
+ Movi_V,
+ Mul_V,
+ Mul_Ve,
+ Mvni_V,
+ Neg_S,
+ Neg_V,
+ Not_V,
+ Orn_V,
+ Orr_V,
+ Orr_Vi,
+ Raddhn_V,
+ Rbit_V,
+ Rev16_V,
+ Rev32_V,
+ Rev64_V,
+ Rshrn_V,
+ Rsubhn_V,
+ Saba_V,
+ Sabal_V,
+ Sabd_V,
+ Sabdl_V,
+ Sadalp_V,
+ Saddl_V,
+ Saddlp_V,
+ Saddlv_V,
+ Saddw_V,
+ Scvtf_Gp,
+ Scvtf_Gp_Fixed,
+ Scvtf_S,
+ Scvtf_V,
+ Scvtf_V_Fixed,
+ Sha1c_V,
+ Sha1h_V,
+ Sha1m_V,
+ Sha1p_V,
+ Sha1su0_V,
+ Sha1su1_V,
+ Sha256h_V,
+ Sha256h2_V,
+ Sha256su0_V,
+ Sha256su1_V,
+ Shadd_V,
+ Shl_S,
+ Shl_V,
+ Shll_V,
+ Shrn_V,
+ Shsub_V,
+ Sli_V,
+ Smax_V,
+ Smaxp_V,
+ Smaxv_V,
+ Smin_V,
+ Sminp_V,
+ Sminv_V,
+ Smlal_V,
+ Smlal_Ve,
+ Smlsl_V,
+ Smlsl_Ve,
+ Smov_S,
+ Smull_V,
+ Smull_Ve,
+ Sqabs_S,
+ Sqabs_V,
+ Sqadd_S,
+ Sqadd_V,
+ Sqdmulh_S,
+ Sqdmulh_V,
+ Sqneg_S,
+ Sqneg_V,
+ Sqrdmulh_S,
+ Sqrdmulh_V,
+ Sqrshl_V,
+ Sqrshrn_S,
+ Sqrshrn_V,
+ Sqrshrun_S,
+ Sqrshrun_V,
+ Sqshl_V,
+ Sqshrn_S,
+ Sqshrn_V,
+ Sqshrun_S,
+ Sqshrun_V,
+ Sqsub_S,
+ Sqsub_V,
+ Sqxtn_S,
+ Sqxtn_V,
+ Sqxtun_S,
+ Sqxtun_V,
+ Srhadd_V,
+ Srshl_V,
+ Srshr_S,
+ Srshr_V,
+ Srsra_S,
+ Srsra_V,
+ Sshl_V,
+ Sshll_V,
+ Sshr_S,
+ Sshr_V,
+ Ssra_S,
+ Ssra_V,
+ Ssubl_V,
+ Ssubw_V,
+ St__Vms,
+ St__Vss,
+ Sub_S,
+ Sub_V,
+ Subhn_V,
+ Suqadd_S,
+ Suqadd_V,
+ Tbl_V,
+ Trn1_V,
+ Trn2_V,
+ Uaba_V,
+ Uabal_V,
+ Uabd_V,
+ Uabdl_V,
+ Uadalp_V,
+ Uaddl_V,
+ Uaddlp_V,
+ Uaddlv_V,
+ Uaddw_V,
+ Ucvtf_Gp,
+ Ucvtf_Gp_Fixed,
+ Ucvtf_S,
+ Ucvtf_V,
+ Ucvtf_V_Fixed,
+ Uhadd_V,
+ Uhsub_V,
+ Umax_V,
+ Umaxp_V,
+ Umaxv_V,
+ Umin_V,
+ Uminp_V,
+ Uminv_V,
+ Umlal_V,
+ Umlal_Ve,
+ Umlsl_V,
+ Umlsl_Ve,
+ Umov_S,
+ Umull_V,
+ Umull_Ve,
+ Uqadd_S,
+ Uqadd_V,
+ Uqrshl_V,
+ Uqrshrn_S,
+ Uqrshrn_V,
+ Uqshl_V,
+ Uqshrn_S,
+ Uqshrn_V,
+ Uqsub_S,
+ Uqsub_V,
+ Uqxtn_S,
+ Uqxtn_V,
+ Urhadd_V,
+ Urshl_V,
+ Urshr_S,
+ Urshr_V,
+ Ursra_S,
+ Ursra_V,
+ Ushl_V,
+ Ushll_V,
+ Ushr_S,
+ Ushr_V,
+ Usqadd_S,
+ Usqadd_V,
+ Usra_S,
+ Usra_V,
+ Usubl_V,
+ Usubw_V,
+ Uzp1_V,
+ Uzp2_V,
+ Xtn_V,
+ Zip1_V,
+ Zip2_V,
+
+ // Base (AArch32)
+ Blx,
+ Bx,
+ Cmp,
+ Ldm,
+ Ldrb,
+ Ldrd,
+ Ldrh,
+ Ldrsb,
+ Ldrsh,
+ Mov,
+ Stm,
+ Strb,
+ Strd,
+ Strh
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/Instructions/NativeInterface.cs b/ARMeilleure/Instructions/NativeInterface.cs
new file mode 100644
index 00000000..3a1e91c8
--- /dev/null
+++ b/ARMeilleure/Instructions/NativeInterface.cs
@@ -0,0 +1,367 @@
+using ARMeilleure.Memory;
+using ARMeilleure.State;
+using System;
+
+namespace ARMeilleure.Instructions
+{
+ static class NativeInterface
+ {
+ private const int ErgSizeLog2 = 4;
+
+ private class ThreadContext
+ {
+ public ExecutionContext Context { get; }
+ public MemoryManager Memory { get; }
+
+ public ulong ExclusiveAddress { get; set; }
+ public ulong ExclusiveValueLow { get; set; }
+ public ulong ExclusiveValueHigh { get; set; }
+
+ public ThreadContext(ExecutionContext context, MemoryManager memory)
+ {
+ Context = context;
+ Memory = memory;
+
+ ExclusiveAddress = ulong.MaxValue;
+ }
+ }
+
+ [ThreadStatic]
+ private static ThreadContext _context;
+
+ public static void RegisterThread(ExecutionContext context, MemoryManager memory)
+ {
+ _context = new ThreadContext(context, memory);
+ }
+
+ public static void UnregisterThread()
+ {
+ _context = null;
+ }
+
+ public static void Break(ulong address, int imm)
+ {
+ Statistics.PauseTimer();
+
+ GetContext().OnBreak(address, imm);
+
+ Statistics.ResumeTimer();
+ }
+
+ public static void SupervisorCall(ulong address, int imm)
+ {
+ Statistics.PauseTimer();
+
+ GetContext().OnSupervisorCall(address, imm);
+
+ Statistics.ResumeTimer();
+ }
+
+ public static void Undefined(ulong address, int opCode)
+ {
+ Statistics.PauseTimer();
+
+ GetContext().OnUndefined(address, opCode);
+
+ Statistics.ResumeTimer();
+ }
+
+#region "System registers"
+ public static ulong GetCtrEl0()
+ {
+ return (ulong)GetContext().CtrEl0;
+ }
+
+ public static ulong GetDczidEl0()
+ {
+ return (ulong)GetContext().DczidEl0;
+ }
+
+ public static ulong GetFpcr()
+ {
+ return (ulong)GetContext().Fpcr;
+ }
+
+ public static ulong GetFpsr()
+ {
+ return (ulong)GetContext().Fpsr;
+ }
+
+ public static ulong GetTpidrEl0()
+ {
+ return (ulong)GetContext().TpidrEl0;
+ }
+
+ public static ulong GetTpidr()
+ {
+ return (ulong)GetContext().Tpidr;
+ }
+
+ public static ulong GetCntfrqEl0()
+ {
+ return GetContext().CntfrqEl0;
+ }
+
+ public static ulong GetCntpctEl0()
+ {
+ return GetContext().CntpctEl0;
+ }
+
+ public static void SetFpcr(ulong value)
+ {
+ GetContext().Fpcr = (FPCR)value;
+ }
+
+ public static void SetFpsr(ulong value)
+ {
+ GetContext().Fpsr = (FPSR)value;
+ }
+
+ public static void SetTpidrEl0(ulong value)
+ {
+ GetContext().TpidrEl0 = (long)value;
+ }
+#endregion
+
+#region "Read"
+ public static byte ReadByte(ulong address)
+ {
+ return GetMemoryManager().ReadByte((long)address);
+ }
+
+ public static ushort ReadUInt16(ulong address)
+ {
+ return GetMemoryManager().ReadUInt16((long)address);
+ }
+
+ public static uint ReadUInt32(ulong address)
+ {
+ return GetMemoryManager().ReadUInt32((long)address);
+ }
+
+ public static ulong ReadUInt64(ulong address)
+ {
+ return GetMemoryManager().ReadUInt64((long)address);
+ }
+
+ public static V128 ReadVector128(ulong address)
+ {
+ return GetMemoryManager().ReadVector128((long)address);
+ }
+#endregion
+
+#region "Read exclusive"
+ public static byte ReadByteExclusive(ulong address)
+ {
+ byte value = _context.Memory.ReadByte((long)address);
+
+ _context.ExclusiveAddress = GetMaskedExclusiveAddress(address);
+ _context.ExclusiveValueLow = value;
+ _context.ExclusiveValueHigh = 0;
+
+ return value;
+ }
+
+ public static ushort ReadUInt16Exclusive(ulong address)
+ {
+ ushort value = _context.Memory.ReadUInt16((long)address);
+
+ _context.ExclusiveAddress = GetMaskedExclusiveAddress(address);
+ _context.ExclusiveValueLow = value;
+ _context.ExclusiveValueHigh = 0;
+
+ return value;
+ }
+
+ public static uint ReadUInt32Exclusive(ulong address)
+ {
+ uint value = _context.Memory.ReadUInt32((long)address);
+
+ _context.ExclusiveAddress = GetMaskedExclusiveAddress(address);
+ _context.ExclusiveValueLow = value;
+ _context.ExclusiveValueHigh = 0;
+
+ return value;
+ }
+
+ public static ulong ReadUInt64Exclusive(ulong address)
+ {
+ ulong value = _context.Memory.ReadUInt64((long)address);
+
+ _context.ExclusiveAddress = GetMaskedExclusiveAddress(address);
+ _context.ExclusiveValueLow = value;
+ _context.ExclusiveValueHigh = 0;
+
+ return value;
+ }
+
+ public static V128 ReadVector128Exclusive(ulong address)
+ {
+ V128 value = _context.Memory.AtomicLoadInt128((long)address);
+
+ _context.ExclusiveAddress = GetMaskedExclusiveAddress(address);
+ _context.ExclusiveValueLow = value.GetUInt64(0);
+ _context.ExclusiveValueHigh = value.GetUInt64(1);
+
+ return value;
+ }
+#endregion
+
+#region "Write"
+ public static void WriteByte(ulong address, byte value)
+ {
+ GetMemoryManager().WriteByte((long)address, value);
+ }
+
+ public static void WriteUInt16(ulong address, ushort value)
+ {
+ GetMemoryManager().WriteUInt16((long)address, value);
+ }
+
+ public static void WriteUInt32(ulong address, uint value)
+ {
+ GetMemoryManager().WriteUInt32((long)address, value);
+ }
+
+ public static void WriteUInt64(ulong address, ulong value)
+ {
+ GetMemoryManager().WriteUInt64((long)address, value);
+ }
+
+ public static void WriteVector128(ulong address, V128 value)
+ {
+ GetMemoryManager().WriteVector128((long)address, value);
+ }
+#endregion
+
+#region "Write exclusive"
+ public static int WriteByteExclusive(ulong address, byte value)
+ {
+ bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address);
+
+ if (success)
+ {
+ success = _context.Memory.AtomicCompareExchangeByte(
+ (long)address,
+ (byte)_context.ExclusiveValueLow,
+ (byte)value);
+
+ if (success)
+ {
+ ClearExclusive();
+ }
+ }
+
+ return success ? 0 : 1;
+ }
+
+ public static int WriteUInt16Exclusive(ulong address, ushort value)
+ {
+ bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address);
+
+ if (success)
+ {
+ success = _context.Memory.AtomicCompareExchangeInt16(
+ (long)address,
+ (short)_context.ExclusiveValueLow,
+ (short)value);
+
+ if (success)
+ {
+ ClearExclusive();
+ }
+ }
+
+ return success ? 0 : 1;
+ }
+
+ public static int WriteUInt32Exclusive(ulong address, uint value)
+ {
+ bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address);
+
+ if (success)
+ {
+ success = _context.Memory.AtomicCompareExchangeInt32(
+ (long)address,
+ (int)_context.ExclusiveValueLow,
+ (int)value);
+
+ if (success)
+ {
+ ClearExclusive();
+ }
+ }
+
+ return success ? 0 : 1;
+ }
+
+ public static int WriteUInt64Exclusive(ulong address, ulong value)
+ {
+ bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address);
+
+ if (success)
+ {
+ success = _context.Memory.AtomicCompareExchangeInt64(
+ (long)address,
+ (long)_context.ExclusiveValueLow,
+ (long)value);
+
+ if (success)
+ {
+ ClearExclusive();
+ }
+ }
+
+ return success ? 0 : 1;
+ }
+
+ public static int WriteVector128Exclusive(ulong address, V128 value)
+ {
+ bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address);
+
+ if (success)
+ {
+ V128 expected = new V128(_context.ExclusiveValueLow, _context.ExclusiveValueHigh);
+
+ success = _context.Memory.AtomicCompareExchangeInt128((long)address, expected, value);
+
+ if (success)
+ {
+ ClearExclusive();
+ }
+ }
+
+ return success ? 0 : 1;
+ }
+#endregion
+
+ private static ulong GetMaskedExclusiveAddress(ulong address)
+ {
+ return address & ~((4UL << ErgSizeLog2) - 1);
+ }
+
+ public static void ClearExclusive()
+ {
+ _context.ExclusiveAddress = ulong.MaxValue;
+ }
+
+ public static void CheckSynchronization()
+ {
+ Statistics.PauseTimer();
+
+ GetContext().CheckInterrupt();
+
+ Statistics.ResumeTimer();
+ }
+
+ public static ExecutionContext GetContext()
+ {
+ return _context.Context;
+ }
+
+ public static MemoryManager GetMemoryManager()
+ {
+ return _context.Memory;
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/Instructions/SoftFallback.cs b/ARMeilleure/Instructions/SoftFallback.cs
new file mode 100644
index 00000000..dc030921
--- /dev/null
+++ b/ARMeilleure/Instructions/SoftFallback.cs
@@ -0,0 +1,1307 @@
+using ARMeilleure.State;
+using System;
+
+namespace ARMeilleure.Instructions
+{
+ static class SoftFallback
+ {
+#region "ShlReg"
+ public static long SignedShlReg(long value, long shift, bool round, int size)
+ {
+ int eSize = 8 << size;
+
+ int shiftLsB = (sbyte)shift;
+
+ if (shiftLsB < 0)
+ {
+ return SignedShrReg(value, -shiftLsB, round, eSize);
+ }
+ else if (shiftLsB > 0)
+ {
+ if (shiftLsB >= eSize)
+ {
+ return 0L;
+ }
+
+ return value << shiftLsB;
+ }
+ else /* if (shiftLsB == 0) */
+ {
+ return value;
+ }
+ }
+
+ public static ulong UnsignedShlReg(ulong value, ulong shift, bool round, int size)
+ {
+ int eSize = 8 << size;
+
+ int shiftLsB = (sbyte)shift;
+
+ if (shiftLsB < 0)
+ {
+ return UnsignedShrReg(value, -shiftLsB, round, eSize);
+ }
+ else if (shiftLsB > 0)
+ {
+ if (shiftLsB >= eSize)
+ {
+ return 0UL;
+ }
+
+ return value << shiftLsB;
+ }
+ else /* if (shiftLsB == 0) */
+ {
+ return value;
+ }
+ }
+
+ public static long SignedShlRegSatQ(long value, long shift, bool round, int size)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ int eSize = 8 << size;
+
+ int shiftLsB = (sbyte)shift;
+
+ if (shiftLsB < 0)
+ {
+ return SignedShrReg(value, -shiftLsB, round, eSize);
+ }
+ else if (shiftLsB > 0)
+ {
+ if (shiftLsB >= eSize)
+ {
+ return SignedSignSatQ(value, eSize, context);
+ }
+
+ if (eSize == 64)
+ {
+ long shl = value << shiftLsB;
+ long shr = shl >> shiftLsB;
+
+ if (shr != value)
+ {
+ return SignedSignSatQ(value, eSize, context);
+ }
+ else /* if (shr == value) */
+ {
+ return shl;
+ }
+ }
+ else /* if (eSize != 64) */
+ {
+ return SignedSrcSignedDstSatQ(value << shiftLsB, size);
+ }
+ }
+ else /* if (shiftLsB == 0) */
+ {
+ return value;
+ }
+ }
+
+ public static ulong UnsignedShlRegSatQ(ulong value, ulong shift, bool round, int size)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ int eSize = 8 << size;
+
+ int shiftLsB = (sbyte)shift;
+
+ if (shiftLsB < 0)
+ {
+ return UnsignedShrReg(value, -shiftLsB, round, eSize);
+ }
+ else if (shiftLsB > 0)
+ {
+ if (shiftLsB >= eSize)
+ {
+ return UnsignedSignSatQ(value, eSize, context);
+ }
+
+ if (eSize == 64)
+ {
+ ulong shl = value << shiftLsB;
+ ulong shr = shl >> shiftLsB;
+
+ if (shr != value)
+ {
+ return UnsignedSignSatQ(value, eSize, context);
+ }
+ else /* if (shr == value) */
+ {
+ return shl;
+ }
+ }
+ else /* if (eSize != 64) */
+ {
+ return UnsignedSrcUnsignedDstSatQ(value << shiftLsB, size);
+ }
+ }
+ else /* if (shiftLsB == 0) */
+ {
+ return value;
+ }
+ }
+
+ private static long SignedShrReg(long value, int shift, bool round, int eSize) // shift := [1, 128]; eSize := {8, 16, 32, 64}.
+ {
+ if (round)
+ {
+ if (shift >= eSize)
+ {
+ return 0L;
+ }
+
+ long roundConst = 1L << (shift - 1);
+
+ long add = value + roundConst;
+
+ if (eSize == 64)
+ {
+ if ((~value & (value ^ add)) < 0L)
+ {
+ return (long)((ulong)add >> shift);
+ }
+ else
+ {
+ return add >> shift;
+ }
+ }
+ else /* if (eSize != 64) */
+ {
+ return add >> shift;
+ }
+ }
+ else /* if (!round) */
+ {
+ if (shift >= eSize)
+ {
+ if (value < 0L)
+ {
+ return -1L;
+ }
+ else /* if (value >= 0L) */
+ {
+ return 0L;
+ }
+ }
+
+ return value >> shift;
+ }
+ }
+
+ private static ulong UnsignedShrReg(ulong value, int shift, bool round, int eSize) // shift := [1, 128]; eSize := {8, 16, 32, 64}.
+ {
+ if (round)
+ {
+ if (shift > 64)
+ {
+ return 0UL;
+ }
+
+ ulong roundConst = 1UL << (shift - 1);
+
+ ulong add = value + roundConst;
+
+ if (eSize == 64)
+ {
+ if ((add < value) && (add < roundConst))
+ {
+ if (shift == 64)
+ {
+ return 1UL;
+ }
+
+ return (add >> shift) | (0x8000000000000000UL >> (shift - 1));
+ }
+ else
+ {
+ if (shift == 64)
+ {
+ return 0UL;
+ }
+
+ return add >> shift;
+ }
+ }
+ else /* if (eSize != 64) */
+ {
+ if (shift == 64)
+ {
+ return 0UL;
+ }
+
+ return add >> shift;
+ }
+ }
+ else /* if (!round) */
+ {
+ if (shift >= eSize)
+ {
+ return 0UL;
+ }
+
+ return value >> shift;
+ }
+ }
+
+ private static long SignedSignSatQ(long op, int eSize, ExecutionContext context) // eSize := {8, 16, 32, 64}.
+ {
+ long tMaxValue = (1L << (eSize - 1)) - 1L;
+ long tMinValue = -(1L << (eSize - 1));
+
+ if (op > 0L)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return tMaxValue;
+ }
+ else if (op < 0L)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return tMinValue;
+ }
+ else
+ {
+ return 0L;
+ }
+ }
+
+ private static ulong UnsignedSignSatQ(ulong op, int eSize, ExecutionContext context) // eSize := {8, 16, 32, 64}.
+ {
+ ulong tMaxValue = ulong.MaxValue >> (64 - eSize);
+
+ if (op > 0UL)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return tMaxValue;
+ }
+ else
+ {
+ return 0UL;
+ }
+ }
+#endregion
+
+#region "ShrImm64"
+ public static long SignedShrImm64(long value, long roundConst, int shift)
+ {
+ if (roundConst == 0L)
+ {
+ if (shift <= 63)
+ {
+ return value >> shift;
+ }
+ else /* if (shift == 64) */
+ {
+ if (value < 0L)
+ {
+ return -1L;
+ }
+ else /* if (value >= 0L) */
+ {
+ return 0L;
+ }
+ }
+ }
+ else /* if (roundConst == 1L << (shift - 1)) */
+ {
+ if (shift <= 63)
+ {
+ long add = value + roundConst;
+
+ if ((~value & (value ^ add)) < 0L)
+ {
+ return (long)((ulong)add >> shift);
+ }
+ else
+ {
+ return add >> shift;
+ }
+ }
+ else /* if (shift == 64) */
+ {
+ return 0L;
+ }
+ }
+ }
+
+ public static ulong UnsignedShrImm64(ulong value, long roundConst, int shift)
+ {
+ if (roundConst == 0L)
+ {
+ if (shift <= 63)
+ {
+ return value >> shift;
+ }
+ else /* if (shift == 64) */
+ {
+ return 0UL;
+ }
+ }
+ else /* if (roundConst == 1L << (shift - 1)) */
+ {
+ ulong add = value + (ulong)roundConst;
+
+ if ((add < value) && (add < (ulong)roundConst))
+ {
+ if (shift <= 63)
+ {
+ return (add >> shift) | (0x8000000000000000UL >> (shift - 1));
+ }
+ else /* if (shift == 64) */
+ {
+ return 1UL;
+ }
+ }
+ else
+ {
+ if (shift <= 63)
+ {
+ return add >> shift;
+ }
+ else /* if (shift == 64) */
+ {
+ return 0UL;
+ }
+ }
+ }
+ }
+#endregion
+
+#region "Rounding"
+ public static double Round(double value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ FPRoundingMode roundMode = context.Fpcr.GetRoundingMode();
+
+ if (roundMode == FPRoundingMode.ToNearest)
+ {
+ return Math.Round(value); // even
+ }
+ else if (roundMode == FPRoundingMode.TowardsPlusInfinity)
+ {
+ return Math.Ceiling(value);
+ }
+ else if (roundMode == FPRoundingMode.TowardsMinusInfinity)
+ {
+ return Math.Floor(value);
+ }
+ else /* if (roundMode == FPRoundingMode.TowardsZero) */
+ {
+ return Math.Truncate(value);
+ }
+ }
+
+ public static float RoundF(float value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ FPRoundingMode roundMode = context.Fpcr.GetRoundingMode();
+
+ if (roundMode == FPRoundingMode.ToNearest)
+ {
+ return MathF.Round(value); // even
+ }
+ else if (roundMode == FPRoundingMode.TowardsPlusInfinity)
+ {
+ return MathF.Ceiling(value);
+ }
+ else if (roundMode == FPRoundingMode.TowardsMinusInfinity)
+ {
+ return MathF.Floor(value);
+ }
+ else /* if (roundMode == FPRoundingMode.TowardsZero) */
+ {
+ return MathF.Truncate(value);
+ }
+ }
+#endregion
+
+#region "Saturation"
+ public static int SatF32ToS32(float value)
+ {
+ if (float.IsNaN(value)) return 0;
+
+ return value >= int.MaxValue ? int.MaxValue :
+ value <= int.MinValue ? int.MinValue : (int)value;
+ }
+
+ public static long SatF32ToS64(float value)
+ {
+ if (float.IsNaN(value)) return 0;
+
+ return value >= long.MaxValue ? long.MaxValue :
+ value <= long.MinValue ? long.MinValue : (long)value;
+ }
+
+ public static uint SatF32ToU32(float value)
+ {
+ if (float.IsNaN(value)) return 0;
+
+ return value >= uint.MaxValue ? uint.MaxValue :
+ value <= uint.MinValue ? uint.MinValue : (uint)value;
+ }
+
+ public static ulong SatF32ToU64(float value)
+ {
+ if (float.IsNaN(value)) return 0;
+
+ return value >= ulong.MaxValue ? ulong.MaxValue :
+ value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
+ }
+
+ public static int SatF64ToS32(double value)
+ {
+ if (double.IsNaN(value)) return 0;
+
+ return value >= int.MaxValue ? int.MaxValue :
+ value <= int.MinValue ? int.MinValue : (int)value;
+ }
+
+ public static long SatF64ToS64(double value)
+ {
+ if (double.IsNaN(value)) return 0;
+
+ return value >= long.MaxValue ? long.MaxValue :
+ value <= long.MinValue ? long.MinValue : (long)value;
+ }
+
+ public static uint SatF64ToU32(double value)
+ {
+ if (double.IsNaN(value)) return 0;
+
+ return value >= uint.MaxValue ? uint.MaxValue :
+ value <= uint.MinValue ? uint.MinValue : (uint)value;
+ }
+
+ public static ulong SatF64ToU64(double value)
+ {
+ if (double.IsNaN(value)) return 0;
+
+ return value >= ulong.MaxValue ? ulong.MaxValue :
+ value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
+ }
+#endregion
+
+#region "Saturating"
+ public static long SignedSrcSignedDstSatQ(long op, int size)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ int eSize = 8 << size;
+
+ long tMaxValue = (1L << (eSize - 1)) - 1L;
+ long tMinValue = -(1L << (eSize - 1));
+
+ if (op > tMaxValue)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return tMaxValue;
+ }
+ else if (op < tMinValue)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return tMinValue;
+ }
+ else
+ {
+ return op;
+ }
+ }
+
+ public static ulong SignedSrcUnsignedDstSatQ(long op, int size)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ int eSize = 8 << size;
+
+ ulong tMaxValue = (1UL << eSize) - 1UL;
+ ulong tMinValue = 0UL;
+
+ if (op > (long)tMaxValue)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return tMaxValue;
+ }
+ else if (op < (long)tMinValue)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return tMinValue;
+ }
+ else
+ {
+ return (ulong)op;
+ }
+ }
+
+ public static long UnsignedSrcSignedDstSatQ(ulong op, int size)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ int eSize = 8 << size;
+
+ long tMaxValue = (1L << (eSize - 1)) - 1L;
+
+ if (op > (ulong)tMaxValue)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return tMaxValue;
+ }
+ else
+ {
+ return (long)op;
+ }
+ }
+
+ public static ulong UnsignedSrcUnsignedDstSatQ(ulong op, int size)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ int eSize = 8 << size;
+
+ ulong tMaxValue = (1UL << eSize) - 1UL;
+
+ if (op > tMaxValue)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return tMaxValue;
+ }
+ else
+ {
+ return op;
+ }
+ }
+
+ public static long UnarySignedSatQAbsOrNeg(long op)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ if (op == long.MinValue)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return long.MaxValue;
+ }
+ else
+ {
+ return op;
+ }
+ }
+
+ public static long BinarySignedSatQAdd(long op1, long op2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ long add = op1 + op2;
+
+ if ((~(op1 ^ op2) & (op1 ^ add)) < 0L)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ if (op1 < 0L)
+ {
+ return long.MinValue;
+ }
+ else
+ {
+ return long.MaxValue;
+ }
+ }
+ else
+ {
+ return add;
+ }
+ }
+
+ public static ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ ulong add = op1 + op2;
+
+ if ((add < op1) && (add < op2))
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return ulong.MaxValue;
+ }
+ else
+ {
+ return add;
+ }
+ }
+
+ public static long BinarySignedSatQSub(long op1, long op2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ long sub = op1 - op2;
+
+ if (((op1 ^ op2) & (op1 ^ sub)) < 0L)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ if (op1 < 0L)
+ {
+ return long.MinValue;
+ }
+ else
+ {
+ return long.MaxValue;
+ }
+ }
+ else
+ {
+ return sub;
+ }
+ }
+
+ public static ulong BinaryUnsignedSatQSub(ulong op1, ulong op2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ ulong sub = op1 - op2;
+
+ if (op1 < op2)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return ulong.MinValue;
+ }
+ else
+ {
+ return sub;
+ }
+ }
+
+ public static long BinarySignedSatQAcc(ulong op1, long op2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ if (op1 <= (ulong)long.MaxValue)
+ {
+ // op1 from ulong.MinValue to (ulong)long.MaxValue
+ // op2 from long.MinValue to long.MaxValue
+
+ long add = (long)op1 + op2;
+
+ if ((~op2 & add) < 0L)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return long.MaxValue;
+ }
+ else
+ {
+ return add;
+ }
+ }
+ else if (op2 >= 0L)
+ {
+ // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
+ // op2 from (long)ulong.MinValue to long.MaxValue
+
+ context.Fpsr |= FPSR.Qc;
+
+ return long.MaxValue;
+ }
+ else
+ {
+ // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
+ // op2 from long.MinValue to (long)ulong.MinValue - 1L
+
+ ulong add = op1 + (ulong)op2;
+
+ if (add > (ulong)long.MaxValue)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return long.MaxValue;
+ }
+ else
+ {
+ return (long)add;
+ }
+ }
+ }
+
+ public static ulong BinaryUnsignedSatQAcc(long op1, ulong op2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ if (op1 >= 0L)
+ {
+ // op1 from (long)ulong.MinValue to long.MaxValue
+ // op2 from ulong.MinValue to ulong.MaxValue
+
+ ulong add = (ulong)op1 + op2;
+
+ if ((add < (ulong)op1) && (add < op2))
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return ulong.MaxValue;
+ }
+ else
+ {
+ return add;
+ }
+ }
+ else if (op2 > (ulong)long.MaxValue)
+ {
+ // op1 from long.MinValue to (long)ulong.MinValue - 1L
+ // op2 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
+
+ return (ulong)op1 + op2;
+ }
+ else
+ {
+ // op1 from long.MinValue to (long)ulong.MinValue - 1L
+ // op2 from ulong.MinValue to (ulong)long.MaxValue
+
+ long add = op1 + (long)op2;
+
+ if (add < (long)ulong.MinValue)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return ulong.MinValue;
+ }
+ else
+ {
+ return (ulong)add;
+ }
+ }
+ }
+#endregion
+
+#region "Count"
+ public static ulong CountLeadingSigns(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
+ {
+ value ^= value >> 1;
+
+ int highBit = size - 2;
+
+ for (int bit = highBit; bit >= 0; bit--)
+ {
+ if (((int)(value >> bit) & 0b1) != 0)
+ {
+ return (ulong)(highBit - bit);
+ }
+ }
+
+ return (ulong)(size - 1);
+ }
+
+ private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+ public static ulong CountLeadingZeros(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
+ {
+ if (value == 0ul)
+ {
+ return (ulong)size;
+ }
+
+ int nibbleIdx = size;
+ int preCount, count = 0;
+
+ do
+ {
+ nibbleIdx -= 4;
+ preCount = ClzNibbleTbl[(int)(value >> nibbleIdx) & 0b1111];
+ count += preCount;
+ }
+ while (preCount == 4);
+
+ return (ulong)count;
+ }
+
+ public static ulong CountSetBits8(ulong value) // "size" is 8 (SIMD&FP Inst.).
+ {
+ value = ((value >> 1) & 0x55ul) + (value & 0x55ul);
+ value = ((value >> 2) & 0x33ul) + (value & 0x33ul);
+
+ return (value >> 4) + (value & 0x0ful);
+ }
+#endregion
+
+#region "Table"
+ public static V128 Tbl1_V64(V128 vector, V128 tb0)
+ {
+ return Tbl(vector, 8, tb0);
+ }
+
+ public static V128 Tbl1_V128(V128 vector, V128 tb0)
+ {
+ return Tbl(vector, 16, tb0);
+ }
+
+ public static V128 Tbl2_V64(V128 vector, V128 tb0, V128 tb1)
+ {
+ return Tbl(vector, 8, tb0, tb1);
+ }
+
+ public static V128 Tbl2_V128(V128 vector, V128 tb0, V128 tb1)
+ {
+ return Tbl(vector, 16, tb0, tb1);
+ }
+
+ public static V128 Tbl3_V64(V128 vector, V128 tb0, V128 tb1, V128 tb2)
+ {
+ return Tbl(vector, 8, tb0, tb1, tb2);
+ }
+
+ public static V128 Tbl3_V128(V128 vector, V128 tb0, V128 tb1, V128 tb2)
+ {
+ return Tbl(vector, 16, tb0, tb1, tb2);
+ }
+
+ public static V128 Tbl4_V64(V128 vector, V128 tb0, V128 tb1, V128 tb2, V128 tb3)
+ {
+ return Tbl(vector, 8, tb0, tb1, tb2, tb3);
+ }
+
+ public static V128 Tbl4_V128(V128 vector, V128 tb0, V128 tb1, V128 tb2, V128 tb3)
+ {
+ return Tbl(vector, 16, tb0, tb1, tb2, tb3);
+ }
+
+ private static V128 Tbl(V128 vector, int bytes, params V128[] tb)
+ {
+ byte[] res = new byte[16];
+ byte[] table = new byte[tb.Length * 16];
+
+ for (byte index = 0; index < tb.Length; index++)
+ {
+ Buffer.BlockCopy(tb[index].ToArray(), 0, table, index * 16, 16);
+ }
+
+ byte[] v = vector.ToArray();
+
+ for (byte index = 0; index < bytes; index++)
+ {
+ byte tblIndex = v[index];
+
+ if (tblIndex < table.Length)
+ {
+ res[index] = table[tblIndex];
+ }
+ }
+
+ return new V128(res);
+ }
+#endregion
+
+#region "Crc32"
+ private const uint Crc32RevPoly = 0xedb88320;
+ private const uint Crc32cRevPoly = 0x82f63b78;
+
+ public static uint Crc32b(uint crc, byte value) => Crc32 (crc, Crc32RevPoly, value);
+ public static uint Crc32h(uint crc, ushort value) => Crc32h(crc, Crc32RevPoly, value);
+ public static uint Crc32w(uint crc, uint value) => Crc32w(crc, Crc32RevPoly, value);
+ public static uint Crc32x(uint crc, ulong value) => Crc32x(crc, Crc32RevPoly, value);
+
+ public static uint Crc32cb(uint crc, byte value) => Crc32 (crc, Crc32cRevPoly, value);
+ public static uint Crc32ch(uint crc, ushort value) => Crc32h(crc, Crc32cRevPoly, value);
+ public static uint Crc32cw(uint crc, uint value) => Crc32w(crc, Crc32cRevPoly, value);
+ public static uint Crc32cx(uint crc, ulong value) => Crc32x(crc, Crc32cRevPoly, value);
+
+ private static uint Crc32h(uint crc, uint poly, ushort val)
+ {
+ crc = Crc32(crc, poly, (byte)(val >> 0));
+ crc = Crc32(crc, poly, (byte)(val >> 8));
+
+ return crc;
+ }
+
+ private static uint Crc32w(uint crc, uint poly, uint val)
+ {
+ crc = Crc32(crc, poly, (byte)(val >> 0));
+ crc = Crc32(crc, poly, (byte)(val >> 8));
+ crc = Crc32(crc, poly, (byte)(val >> 16));
+ crc = Crc32(crc, poly, (byte)(val >> 24));
+
+ return crc;
+ }
+
+ private static uint Crc32x(uint crc, uint poly, ulong val)
+ {
+ crc = Crc32(crc, poly, (byte)(val >> 0));
+ crc = Crc32(crc, poly, (byte)(val >> 8));
+ crc = Crc32(crc, poly, (byte)(val >> 16));
+ crc = Crc32(crc, poly, (byte)(val >> 24));
+ crc = Crc32(crc, poly, (byte)(val >> 32));
+ crc = Crc32(crc, poly, (byte)(val >> 40));
+ crc = Crc32(crc, poly, (byte)(val >> 48));
+ crc = Crc32(crc, poly, (byte)(val >> 56));
+
+ return crc;
+ }
+
+ private static uint Crc32(uint crc, uint poly, byte val)
+ {
+ crc ^= val;
+
+ for (int bit = 7; bit >= 0; bit--)
+ {
+ uint mask = (uint)(-(int)(crc & 1));
+
+ crc = (crc >> 1) ^ (poly & mask);
+ }
+
+ return crc;
+ }
+#endregion
+
+#region "Aes"
+ public static V128 Decrypt(V128 value, V128 roundKey)
+ {
+ return CryptoHelper.AesInvSubBytes(CryptoHelper.AesInvShiftRows(value ^ roundKey));
+ }
+
+ public static V128 Encrypt(V128 value, V128 roundKey)
+ {
+ return CryptoHelper.AesSubBytes(CryptoHelper.AesShiftRows(value ^ roundKey));
+ }
+
+ public static V128 InverseMixColumns(V128 value)
+ {
+ return CryptoHelper.AesInvMixColumns(value);
+ }
+
+ public static V128 MixColumns(V128 value)
+ {
+ return CryptoHelper.AesMixColumns(value);
+ }
+#endregion
+
+#region "Sha1"
+ public static V128 HashChoose(V128 hash_abcd, uint hash_e, V128 wk)
+ {
+ for (int e = 0; e <= 3; e++)
+ {
+ uint t = ShaChoose(hash_abcd.GetUInt32(1),
+ hash_abcd.GetUInt32(2),
+ hash_abcd.GetUInt32(3));
+
+ hash_e += Rol(hash_abcd.GetUInt32(0), 5) + t + wk.GetUInt32(e);
+
+ t = Rol(hash_abcd.GetUInt32(1), 30);
+
+ hash_abcd.Insert(1, t);
+
+ Rol32_160(ref hash_e, ref hash_abcd);
+ }
+
+ return hash_abcd;
+ }
+
+ public static uint FixedRotate(uint hash_e)
+ {
+ return hash_e.Rol(30);
+ }
+
+ public static V128 HashMajority(V128 hash_abcd, uint hash_e, V128 wk)
+ {
+ for (int e = 0; e <= 3; e++)
+ {
+ uint t = ShaMajority(hash_abcd.GetUInt32(1),
+ hash_abcd.GetUInt32(2),
+ hash_abcd.GetUInt32(3));
+
+ hash_e += Rol(hash_abcd.GetUInt32(0), 5) + t + wk.GetUInt32(e);
+
+ t = Rol(hash_abcd.GetUInt32(1), 30);
+
+ hash_abcd.Insert(1, t);
+
+ Rol32_160(ref hash_e, ref hash_abcd);
+ }
+
+ return hash_abcd;
+ }
+
+ public static V128 HashParity(V128 hash_abcd, uint hash_e, V128 wk)
+ {
+ for (int e = 0; e <= 3; e++)
+ {
+ uint t = ShaParity(hash_abcd.GetUInt32(1),
+ hash_abcd.GetUInt32(2),
+ hash_abcd.GetUInt32(3));
+
+ hash_e += Rol(hash_abcd.GetUInt32(0), 5) + t + wk.GetUInt32(e);
+
+ t = Rol(hash_abcd.GetUInt32(1), 30);
+
+ hash_abcd.Insert(1, t);
+
+ Rol32_160(ref hash_e, ref hash_abcd);
+ }
+
+ return hash_abcd;
+ }
+
+ public static V128 Sha1SchedulePart1(V128 w0_3, V128 w4_7, V128 w8_11)
+ {
+ ulong t2 = w4_7.GetUInt64(0);
+ ulong t1 = w0_3.GetUInt64(1);
+
+ V128 result = new V128(t1, t2);
+
+ return result ^ (w0_3 ^ w8_11);
+ }
+
+ public static V128 Sha1SchedulePart2(V128 tw0_3, V128 w12_15)
+ {
+ V128 t = tw0_3 ^ (w12_15 >> 32);
+
+ uint tE0 = t.GetUInt32(0);
+ uint tE1 = t.GetUInt32(1);
+ uint tE2 = t.GetUInt32(2);
+ uint tE3 = t.GetUInt32(3);
+
+ return new V128(tE0.Rol(1), tE1.Rol(1), tE2.Rol(1), tE3.Rol(1) ^ tE0.Rol(2));
+ }
+
+ private static void Rol32_160(ref uint y, ref V128 x)
+ {
+ uint xE3 = x.GetUInt32(3);
+
+ x <<= 32;
+ x.Insert(0, y);
+
+ y = xE3;
+ }
+
+ private static uint ShaChoose(uint x, uint y, uint z)
+ {
+ return ((y ^ z) & x) ^ z;
+ }
+
+ private static uint ShaMajority(uint x, uint y, uint z)
+ {
+ return (x & y) | ((x | y) & z);
+ }
+
+ private static uint ShaParity(uint x, uint y, uint z)
+ {
+ return x ^ y ^ z;
+ }
+
+ private static uint Rol(this uint value, int count)
+ {
+ return (value << count) | (value >> (32 - count));
+ }
+#endregion
+
+#region "Sha256"
+ public static V128 HashLower(V128 hash_abcd, V128 hash_efgh, V128 wk)
+ {
+ return Sha256Hash(hash_abcd, hash_efgh, wk, part1: true);
+ }
+
+ public static V128 HashUpper(V128 hash_efgh, V128 hash_abcd, V128 wk)
+ {
+ return Sha256Hash(hash_abcd, hash_efgh, wk, part1: false);
+ }
+
+ public static V128 Sha256SchedulePart1(V128 w0_3, V128 w4_7)
+ {
+ V128 result = new V128();
+
+ for (int e = 0; e <= 3; e++)
+ {
+ uint elt = (e <= 2 ? w0_3 : w4_7).GetUInt32(e <= 2 ? e + 1 : 0);
+
+ elt = elt.Ror(7) ^ elt.Ror(18) ^ elt.Lsr(3);
+
+ elt += w0_3.GetUInt32(e);
+
+ result.Insert(e, elt);
+ }
+
+ return result;
+ }
+
+ public static V128 Sha256SchedulePart2(V128 w0_3, V128 w8_11, V128 w12_15)
+ {
+ V128 result = new V128();
+
+ ulong t1 = w12_15.GetUInt64(1);
+
+ for (int e = 0; e <= 1; e++)
+ {
+ uint elt = t1.ULongPart(e);
+
+ elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
+
+ elt += w0_3.GetUInt32(e) + w8_11.GetUInt32(e + 1);
+
+ result.Insert(e, elt);
+ }
+
+ t1 = result.GetUInt64(0);
+
+ for (int e = 2; e <= 3; e++)
+ {
+ uint elt = t1.ULongPart(e - 2);
+
+ elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
+
+ elt += w0_3.GetUInt32(e) + (e == 2 ? w8_11 : w12_15).GetUInt32(e == 2 ? 3 : 0);
+
+ result.Insert(e, elt);
+ }
+
+ return result;
+ }
+
+ private static V128 Sha256Hash(V128 x, V128 y, V128 w, bool part1)
+ {
+ for (int e = 0; e <= 3; e++)
+ {
+ uint chs = ShaChoose(y.GetUInt32(0),
+ y.GetUInt32(1),
+ y.GetUInt32(2));
+
+ uint maj = ShaMajority(x.GetUInt32(0),
+ x.GetUInt32(1),
+ x.GetUInt32(2));
+
+ uint t1 = y.GetUInt32(3) + ShaHashSigma1(y.GetUInt32(0)) + chs + w.GetUInt32(e);
+
+ uint t2 = t1 + x.GetUInt32(3);
+
+ x.Insert(3, t2);
+
+ t2 = t1 + ShaHashSigma0(x.GetUInt32(0)) + maj;
+
+ y.Insert(3, t2);
+
+ Rol32_256(ref y, ref x);
+ }
+
+ return part1 ? x : y;
+ }
+
+ private static void Rol32_256(ref V128 y, ref V128 x)
+ {
+ uint yE3 = y.GetUInt32(3);
+ uint xE3 = x.GetUInt32(3);
+
+ y <<= 32;
+ x <<= 32;
+
+ y.Insert(0, xE3);
+ x.Insert(0, yE3);
+ }
+
+ private static uint ShaHashSigma0(uint x)
+ {
+ return x.Ror(2) ^ x.Ror(13) ^ x.Ror(22);
+ }
+
+ private static uint ShaHashSigma1(uint x)
+ {
+ return x.Ror(6) ^ x.Ror(11) ^ x.Ror(25);
+ }
+
+ private static uint Ror(this uint value, int count)
+ {
+ return (value >> count) | (value << (32 - count));
+ }
+
+ private static uint Lsr(this uint value, int count)
+ {
+ return value >> count;
+ }
+
+ private static uint ULongPart(this ulong value, int part)
+ {
+ return part == 0
+ ? (uint)(value & 0xFFFFFFFFUL)
+ : (uint)(value >> 32);
+ }
+#endregion
+
+#region "Reverse"
+ public static uint ReverseBits8(uint value)
+ {
+ value = ((value & 0xaa) >> 1) | ((value & 0x55) << 1);
+ value = ((value & 0xcc) >> 2) | ((value & 0x33) << 2);
+
+ return (value >> 4) | ((value & 0x0f) << 4);
+ }
+
+ public static uint ReverseBits32(uint value)
+ {
+ value = ((value & 0xaaaaaaaa) >> 1) | ((value & 0x55555555) << 1);
+ value = ((value & 0xcccccccc) >> 2) | ((value & 0x33333333) << 2);
+ value = ((value & 0xf0f0f0f0) >> 4) | ((value & 0x0f0f0f0f) << 4);
+ value = ((value & 0xff00ff00) >> 8) | ((value & 0x00ff00ff) << 8);
+
+ return (value >> 16) | (value << 16);
+ }
+
+ public static ulong ReverseBits64(ulong value)
+ {
+ value = ((value & 0xaaaaaaaaaaaaaaaa) >> 1 ) | ((value & 0x5555555555555555) << 1 );
+ value = ((value & 0xcccccccccccccccc) >> 2 ) | ((value & 0x3333333333333333) << 2 );
+ value = ((value & 0xf0f0f0f0f0f0f0f0) >> 4 ) | ((value & 0x0f0f0f0f0f0f0f0f) << 4 );
+ value = ((value & 0xff00ff00ff00ff00) >> 8 ) | ((value & 0x00ff00ff00ff00ff) << 8 );
+ value = ((value & 0xffff0000ffff0000) >> 16) | ((value & 0x0000ffff0000ffff) << 16);
+
+ return (value >> 32) | (value << 32);
+ }
+
+ public static uint ReverseBytes16_32(uint value) => (uint)ReverseBytes16_64(value);
+
+ public static ulong ReverseBytes16_64(ulong value) => ReverseBytes(value, RevSize.Rev16);
+ public static ulong ReverseBytes32_64(ulong value) => ReverseBytes(value, RevSize.Rev32);
+
+ private enum RevSize
+ {
+ Rev16,
+ Rev32,
+ Rev64
+ }
+
+ private static ulong ReverseBytes(ulong value, RevSize size)
+ {
+ value = ((value & 0xff00ff00ff00ff00) >> 8) | ((value & 0x00ff00ff00ff00ff) << 8);
+
+ if (size == RevSize.Rev16)
+ {
+ return value;
+ }
+
+ value = ((value & 0xffff0000ffff0000) >> 16) | ((value & 0x0000ffff0000ffff) << 16);
+
+ if (size == RevSize.Rev32)
+ {
+ return value;
+ }
+
+ value = ((value & 0xffffffff00000000) >> 32) | ((value & 0x00000000ffffffff) << 32);
+
+ if (size == RevSize.Rev64)
+ {
+ return value;
+ }
+
+ throw new ArgumentException(nameof(size));
+ }
+#endregion
+ }
+}
diff --git a/ARMeilleure/Instructions/SoftFloat.cs b/ARMeilleure/Instructions/SoftFloat.cs
new file mode 100644
index 00000000..7358e6b2
--- /dev/null
+++ b/ARMeilleure/Instructions/SoftFloat.cs
@@ -0,0 +1,2757 @@
+using ARMeilleure.State;
+using System;
+using System.Diagnostics;
+
+namespace ARMeilleure.Instructions
+{
+ static class SoftFloat
+ {
+ static SoftFloat()
+ {
+ RecipEstimateTable = BuildRecipEstimateTable();
+ RecipSqrtEstimateTable = BuildRecipSqrtEstimateTable();
+ }
+
+ internal static readonly byte[] RecipEstimateTable;
+ internal static readonly byte[] RecipSqrtEstimateTable;
+
+ private static byte[] BuildRecipEstimateTable()
+ {
+ byte[] tbl = new byte[256];
+
+ for (int idx = 0; idx < 256; idx++)
+ {
+ uint src = (uint)idx + 256u;
+
+ Debug.Assert(256u <= src && src < 512u);
+
+ src = (src << 1) + 1u;
+
+ uint aux = (1u << 19) / src;
+
+ uint dst = (aux + 1u) >> 1;
+
+ Debug.Assert(256u <= dst && dst < 512u);
+
+ tbl[idx] = (byte)(dst - 256u);
+ }
+
+ return tbl;
+ }
+
+ private static byte[] BuildRecipSqrtEstimateTable()
+ {
+ byte[] tbl = new byte[384];
+
+ for (int idx = 0; idx < 384; idx++)
+ {
+ uint src = (uint)idx + 128u;
+
+ Debug.Assert(128u <= src && src < 512u);
+
+ if (src < 256u)
+ {
+ src = (src << 1) + 1u;
+ }
+ else
+ {
+ src = (src >> 1) << 1;
+ src = (src + 1u) << 1;
+ }
+
+ uint aux = 512u;
+
+ while (src * (aux + 1u) * (aux + 1u) < (1u << 28))
+ {
+ aux = aux + 1u;
+ }
+
+ uint dst = (aux + 1u) >> 1;
+
+ Debug.Assert(256u <= dst && dst < 512u);
+
+ tbl[idx] = (byte)(dst - 256u);
+ }
+
+ return tbl;
+ }
+ }
+
+ static class SoftFloat16_32
+ {
+ public static float FPConvert(ushort valueBits)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ double real = valueBits.FPUnpackCv(out FPType type, out bool sign, context);
+
+ float result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ if ((context.Fpcr & FPCR.Dn) != 0)
+ {
+ result = FPDefaultNaN();
+ }
+ else
+ {
+ result = FPConvertNaN(valueBits);
+ }
+
+ if (type == FPType.SNaN)
+ {
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = FPInfinity(sign);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPZero(sign);
+ }
+ else
+ {
+ result = FPRoundCv(real, context);
+ }
+
+ return result;
+ }
+
+ private static float FPDefaultNaN()
+ {
+ return -float.NaN;
+ }
+
+ private static float FPInfinity(bool sign)
+ {
+ return sign ? float.NegativeInfinity : float.PositiveInfinity;
+ }
+
+ private static float FPZero(bool sign)
+ {
+ return sign ? -0f : +0f;
+ }
+
+ private static float FPMaxNormal(bool sign)
+ {
+ return sign ? float.MinValue : float.MaxValue;
+ }
+
+ private static double FPUnpackCv(
+ this ushort valueBits,
+ out FPType type,
+ out bool sign,
+ ExecutionContext context)
+ {
+ sign = (~(uint)valueBits & 0x8000u) == 0u;
+
+ uint exp16 = ((uint)valueBits & 0x7C00u) >> 10;
+ uint frac16 = (uint)valueBits & 0x03FFu;
+
+ double real;
+
+ if (exp16 == 0u)
+ {
+ if (frac16 == 0u)
+ {
+ type = FPType.Zero;
+ real = 0d;
+ }
+ else
+ {
+ type = FPType.Nonzero; // Subnormal.
+ real = Math.Pow(2d, -14) * ((double)frac16 * Math.Pow(2d, -10));
+ }
+ }
+ else if (exp16 == 0x1Fu && (context.Fpcr & FPCR.Ahp) == 0)
+ {
+ if (frac16 == 0u)
+ {
+ type = FPType.Infinity;
+ real = Math.Pow(2d, 1000);
+ }
+ else
+ {
+ type = (~frac16 & 0x0200u) == 0u ? FPType.QNaN : FPType.SNaN;
+ real = 0d;
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero; // Normal.
+ real = Math.Pow(2d, (int)exp16 - 15) * (1d + (double)frac16 * Math.Pow(2d, -10));
+ }
+
+ return sign ? -real : real;
+ }
+
+ private static float FPRoundCv(double real, ExecutionContext context)
+ {
+ const int minimumExp = -126;
+
+ const int e = 8;
+ const int f = 23;
+
+ bool sign;
+ double mantissa;
+
+ if (real < 0d)
+ {
+ sign = true;
+ mantissa = -real;
+ }
+ else
+ {
+ sign = false;
+ mantissa = real;
+ }
+
+ int exponent = 0;
+
+ while (mantissa < 1d)
+ {
+ mantissa *= 2d;
+ exponent--;
+ }
+
+ while (mantissa >= 2d)
+ {
+ mantissa /= 2d;
+ exponent++;
+ }
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && exponent < minimumExp)
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ return FPZero(sign);
+ }
+
+ uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0);
+
+ if (biasedExp == 0u)
+ {
+ mantissa /= Math.Pow(2d, minimumExp - exponent);
+ }
+
+ uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f));
+ double error = mantissa * Math.Pow(2d, f) - (double)intMant;
+
+ if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0))
+ {
+ FPProcessException(FPException.Underflow, context);
+ }
+
+ bool overflowToInf;
+ bool roundUp;
+
+ switch (context.Fpcr.GetRoundingMode())
+ {
+ default:
+ case FPRoundingMode.ToNearest:
+ roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u));
+ overflowToInf = true;
+ break;
+
+ case FPRoundingMode.TowardsPlusInfinity:
+ roundUp = (error != 0d && !sign);
+ overflowToInf = !sign;
+ break;
+
+ case FPRoundingMode.TowardsMinusInfinity:
+ roundUp = (error != 0d && sign);
+ overflowToInf = sign;
+ break;
+
+ case FPRoundingMode.TowardsZero:
+ roundUp = false;
+ overflowToInf = false;
+ break;
+ }
+
+ if (roundUp)
+ {
+ intMant++;
+
+ if (intMant == 1u << f)
+ {
+ biasedExp = 1u;
+ }
+
+ if (intMant == 1u << (f + 1))
+ {
+ biasedExp++;
+ intMant >>= 1;
+ }
+ }
+
+ float result;
+
+ if (biasedExp >= (1u << e) - 1u)
+ {
+ result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+ FPProcessException(FPException.Overflow, context);
+
+ error = 1d;
+ }
+ else
+ {
+ result = BitConverter.Int32BitsToSingle(
+ (int)((sign ? 1u : 0u) << 31 | (biasedExp & 0xFFu) << 23 | (intMant & 0x007FFFFFu)));
+ }
+
+ if (error != 0d)
+ {
+ FPProcessException(FPException.Inexact, context);
+ }
+
+ return result;
+ }
+
+ private static float FPConvertNaN(ushort valueBits)
+ {
+ return BitConverter.Int32BitsToSingle(
+ (int)(((uint)valueBits & 0x8000u) << 16 | 0x7FC00000u | ((uint)valueBits & 0x01FFu) << 13));
+ }
+
+ private static void FPProcessException(FPException exc, ExecutionContext context)
+ {
+ int enable = (int)exc + 8;
+
+ if ((context.Fpcr & (FPCR)(1 << enable)) != 0)
+ {
+ throw new NotImplementedException("Floating-point trap handling.");
+ }
+ else
+ {
+ context.Fpsr |= (FPSR)(1 << (int)exc);
+ }
+ }
+ }
+
+ static class SoftFloat32_16
+ {
+ public static ushort FPConvert(float value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ double real = value.FPUnpackCv(out FPType type, out bool sign, out uint valueBits, context);
+
+ bool altHp = (context.Fpcr & FPCR.Ahp) != 0;
+
+ ushort resultBits;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ if (altHp)
+ {
+ resultBits = FPZero(sign);
+ }
+ else if ((context.Fpcr & FPCR.Dn) != 0)
+ {
+ resultBits = FPDefaultNaN();
+ }
+ else
+ {
+ resultBits = FPConvertNaN(valueBits);
+ }
+
+ if (type == FPType.SNaN || altHp)
+ {
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ }
+ else if (type == FPType.Infinity)
+ {
+ if (altHp)
+ {
+ resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu);
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else
+ {
+ resultBits = FPInfinity(sign);
+ }
+ }
+ else if (type == FPType.Zero)
+ {
+ resultBits = FPZero(sign);
+ }
+ else
+ {
+ resultBits = FPRoundCv(real, context);
+ }
+
+ return resultBits;
+ }
+
+ private static ushort FPDefaultNaN()
+ {
+ return (ushort)0x7E00u;
+ }
+
+ private static ushort FPInfinity(bool sign)
+ {
+ return sign ? (ushort)0xFC00u : (ushort)0x7C00u;
+ }
+
+ private static ushort FPZero(bool sign)
+ {
+ return sign ? (ushort)0x8000u : (ushort)0x0000u;
+ }
+
+ private static ushort FPMaxNormal(bool sign)
+ {
+ return sign ? (ushort)0xFBFFu : (ushort)0x7BFFu;
+ }
+
+ private static double FPUnpackCv(
+ this float value,
+ out FPType type,
+ out bool sign,
+ out uint valueBits,
+ ExecutionContext context)
+ {
+ valueBits = (uint)BitConverter.SingleToInt32Bits(value);
+
+ sign = (~valueBits & 0x80000000u) == 0u;
+
+ uint exp32 = (valueBits & 0x7F800000u) >> 23;
+ uint frac32 = valueBits & 0x007FFFFFu;
+
+ double real;
+
+ if (exp32 == 0u)
+ {
+ if (frac32 == 0u || (context.Fpcr & FPCR.Fz) != 0)
+ {
+ type = FPType.Zero;
+ real = 0d;
+
+ if (frac32 != 0u)
+ {
+ FPProcessException(FPException.InputDenorm, context);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero; // Subnormal.
+ real = Math.Pow(2d, -126) * ((double)frac32 * Math.Pow(2d, -23));
+ }
+ }
+ else if (exp32 == 0xFFu)
+ {
+ if (frac32 == 0u)
+ {
+ type = FPType.Infinity;
+ real = Math.Pow(2d, 1000);
+ }
+ else
+ {
+ type = (~frac32 & 0x00400000u) == 0u ? FPType.QNaN : FPType.SNaN;
+ real = 0d;
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero; // Normal.
+ real = Math.Pow(2d, (int)exp32 - 127) * (1d + (double)frac32 * Math.Pow(2d, -23));
+ }
+
+ return sign ? -real : real;
+ }
+
+ private static ushort FPRoundCv(double real, ExecutionContext context)
+ {
+ const int minimumExp = -14;
+
+ const int e = 5;
+ const int f = 10;
+
+ bool sign;
+ double mantissa;
+
+ if (real < 0d)
+ {
+ sign = true;
+ mantissa = -real;
+ }
+ else
+ {
+ sign = false;
+ mantissa = real;
+ }
+
+ int exponent = 0;
+
+ while (mantissa < 1d)
+ {
+ mantissa *= 2d;
+ exponent--;
+ }
+
+ while (mantissa >= 2d)
+ {
+ mantissa /= 2d;
+ exponent++;
+ }
+
+ uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0);
+
+ if (biasedExp == 0u)
+ {
+ mantissa /= Math.Pow(2d, minimumExp - exponent);
+ }
+
+ uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f));
+ double error = mantissa * Math.Pow(2d, f) - (double)intMant;
+
+ if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0))
+ {
+ FPProcessException(FPException.Underflow, context);
+ }
+
+ bool overflowToInf;
+ bool roundUp;
+
+ switch (context.Fpcr.GetRoundingMode())
+ {
+ default:
+ case FPRoundingMode.ToNearest:
+ roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u));
+ overflowToInf = true;
+ break;
+
+ case FPRoundingMode.TowardsPlusInfinity:
+ roundUp = (error != 0d && !sign);
+ overflowToInf = !sign;
+ break;
+
+ case FPRoundingMode.TowardsMinusInfinity:
+ roundUp = (error != 0d && sign);
+ overflowToInf = sign;
+ break;
+
+ case FPRoundingMode.TowardsZero:
+ roundUp = false;
+ overflowToInf = false;
+ break;
+ }
+
+ if (roundUp)
+ {
+ intMant++;
+
+ if (intMant == 1u << f)
+ {
+ biasedExp = 1u;
+ }
+
+ if (intMant == 1u << (f + 1))
+ {
+ biasedExp++;
+ intMant >>= 1;
+ }
+ }
+
+ ushort resultBits;
+
+ if ((context.Fpcr & FPCR.Ahp) == 0)
+ {
+ if (biasedExp >= (1u << e) - 1u)
+ {
+ resultBits = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+ FPProcessException(FPException.Overflow, context);
+
+ error = 1d;
+ }
+ else
+ {
+ resultBits = (ushort)((sign ? 1u : 0u) << 15 | (biasedExp & 0x1Fu) << 10 | (intMant & 0x03FFu));
+ }
+ }
+ else
+ {
+ if (biasedExp >= 1u << e)
+ {
+ resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu);
+
+ FPProcessException(FPException.InvalidOp, context);
+
+ error = 0d;
+ }
+ else
+ {
+ resultBits = (ushort)((sign ? 1u : 0u) << 15 | (biasedExp & 0x1Fu) << 10 | (intMant & 0x03FFu));
+ }
+ }
+
+ if (error != 0d)
+ {
+ FPProcessException(FPException.Inexact, context);
+ }
+
+ return resultBits;
+ }
+
+ private static ushort FPConvertNaN(uint valueBits)
+ {
+ return (ushort)((valueBits & 0x80000000u) >> 16 | 0x7E00u | (valueBits & 0x003FE000u) >> 13);
+ }
+
+ private static void FPProcessException(FPException exc, ExecutionContext context)
+ {
+ int enable = (int)exc + 8;
+
+ if ((context.Fpcr & (FPCR)(1 << enable)) != 0)
+ {
+ throw new NotImplementedException("Floating-point trap handling.");
+ }
+ else
+ {
+ context.Fpsr |= (FPSR)(1 << (int)exc);
+ }
+ }
+ }
+
+ static class SoftFloat32
+ {
+ public static float FPAdd(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if (inf1 && inf2 && sign1 == !sign2)
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if ((inf1 && !sign1) || (inf2 && !sign2))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((inf1 && sign1) || (inf2 && sign2))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zero1 && zero2 && sign1 == sign2)
+ {
+ result = FPZero(sign1);
+ }
+ else
+ {
+ result = value1 + value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static int FPCompare(float value1, float value2, bool signalNaNs)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context);
+
+ int result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = 0b0011;
+
+ if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs)
+ {
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ }
+ else
+ {
+ if (value1 == value2)
+ {
+ result = 0b0110;
+ }
+ else if (value1 < value2)
+ {
+ result = 0b1000;
+ }
+ else
+ {
+ result = 0b0010;
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPCompareEQ(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
+
+ float result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ if (type1 == FPType.SNaN || type2 == FPType.SNaN)
+ {
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 == value2);
+ }
+
+ return result;
+ }
+
+ public static float FPCompareGE(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
+
+ float result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 >= value2);
+ }
+
+ return result;
+ }
+
+ public static float FPCompareGT(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
+
+ float result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 > value2);
+ }
+
+ return result;
+ }
+
+ public static float FPCompareLE(float value1, float value2)
+ {
+ return FPCompareGE(value2, value1);
+ }
+
+ public static float FPCompareLT(float value1, float value2)
+ {
+ return FPCompareGT(value2, value1);
+ }
+
+ public static float FPDiv(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && inf2) || (zero1 && zero2))
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if (inf1 || zero2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+
+ if (!inf1)
+ {
+ FPProcessException(FPException.DivideByZero, context);
+ }
+ }
+ else if (zero1 || inf2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 / value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPMax(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ if (value1 > value2)
+ {
+ if (type1 == FPType.Infinity)
+ {
+ result = FPInfinity(sign1);
+ }
+ else if (type1 == FPType.Zero)
+ {
+ result = FPZero(sign1 && sign2);
+ }
+ else
+ {
+ result = value1;
+ }
+ }
+ else
+ {
+ if (type2 == FPType.Infinity)
+ {
+ result = FPInfinity(sign2);
+ }
+ else if (type2 == FPType.Zero)
+ {
+ result = FPZero(sign1 && sign2);
+ }
+ else
+ {
+ result = value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPMaxNum(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1.FPUnpack(out FPType type1, out _, out _, context);
+ value2.FPUnpack(out FPType type2, out _, out _, context);
+
+ if (type1 == FPType.QNaN && type2 != FPType.QNaN)
+ {
+ value1 = FPInfinity(true);
+ }
+ else if (type1 != FPType.QNaN && type2 == FPType.QNaN)
+ {
+ value2 = FPInfinity(true);
+ }
+
+ return FPMax(value1, value2);
+ }
+
+ public static float FPMin(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ if (value1 < value2)
+ {
+ if (type1 == FPType.Infinity)
+ {
+ result = FPInfinity(sign1);
+ }
+ else if (type1 == FPType.Zero)
+ {
+ result = FPZero(sign1 || sign2);
+ }
+ else
+ {
+ result = value1;
+ }
+ }
+ else
+ {
+ if (type2 == FPType.Infinity)
+ {
+ result = FPInfinity(sign2);
+ }
+ else if (type2 == FPType.Zero)
+ {
+ result = FPZero(sign1 || sign2);
+ }
+ else
+ {
+ result = value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPMinNum(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1.FPUnpack(out FPType type1, out _, out _, context);
+ value2.FPUnpack(out FPType type2, out _, out _, context);
+
+ if (type1 == FPType.QNaN && type2 != FPType.QNaN)
+ {
+ value1 = FPInfinity(false);
+ }
+ else if (type1 != FPType.QNaN && type2 == FPType.QNaN)
+ {
+ value2 = FPInfinity(false);
+ }
+
+ return FPMin(value1, value2);
+ }
+
+ public static float FPMul(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else if (zero1 || zero2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 * value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPMulAdd(float valueA, float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out uint addend, context);
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ float result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context);
+
+ if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2)))
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+
+ if (!done)
+ {
+ bool infA = typeA == FPType.Infinity; bool zeroA = typeA == FPType.Zero;
+
+ bool signP = sign1 ^ sign2;
+ bool infP = inf1 || inf2;
+ bool zeroP = zero1 || zero2;
+
+ if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP))
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if ((infA && !signA) || (infP && !signP))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((infA && signA) || (infP && signP))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zeroA && zeroP && signA == signP)
+ {
+ result = FPZero(signA);
+ }
+ else
+ {
+ // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T);
+ // https://github.com/dotnet/corefx/issues/31903
+
+ result = valueA + (value1 * value2);
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPMulSub(float valueA, float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPNeg();
+
+ return FPMulAdd(valueA, value1, value2);
+ }
+
+ public static float FPMulX(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPTwo(sign1 ^ sign2);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else if (zero1 || zero2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 * value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPRecipEstimate(float value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value.FPUnpack(out FPType type, out bool sign, out uint op, context);
+
+ float result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context);
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = FPZero(sign);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPInfinity(sign);
+
+ FPProcessException(FPException.DivideByZero, context);
+ }
+ else if (MathF.Abs(value) < MathF.Pow(2f, -128))
+ {
+ bool overflowToInf;
+
+ switch (context.Fpcr.GetRoundingMode())
+ {
+ default:
+ case FPRoundingMode.ToNearest: overflowToInf = true; break;
+ case FPRoundingMode.TowardsPlusInfinity: overflowToInf = !sign; break;
+ case FPRoundingMode.TowardsMinusInfinity: overflowToInf = sign; break;
+ case FPRoundingMode.TowardsZero: overflowToInf = false; break;
+ }
+
+ result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+ FPProcessException(FPException.Overflow, context);
+ FPProcessException(FPException.Inexact, context);
+ }
+ else if ((context.Fpcr & FPCR.Fz) != 0 && (MathF.Abs(value) >= MathF.Pow(2f, 126)))
+ {
+ result = FPZero(sign);
+
+ context.Fpsr |= FPSR.Ufc;
+ }
+ else
+ {
+ ulong fraction = (ulong)(op & 0x007FFFFFu) << 29;
+ uint exp = (op & 0x7F800000u) >> 23;
+
+ if (exp == 0u)
+ {
+ if ((fraction & 0x0008000000000000ul) == 0ul)
+ {
+ fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2;
+ exp -= 1u;
+ }
+ else
+ {
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ }
+ }
+
+ uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+
+ uint resultExp = 253u - exp;
+
+ uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u;
+
+ fraction = (ulong)(estimate & 0xFFu) << 44;
+
+ if (resultExp == 0u)
+ {
+ fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1;
+ }
+ else if (resultExp + 1u == 0u)
+ {
+ fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2;
+ resultExp = 0u;
+ }
+
+ result = BitConverter.Int32BitsToSingle(
+ (int)((sign ? 1u : 0u) << 31 | (resultExp & 0xFFu) << 23 | (uint)(fraction >> 29) & 0x007FFFFFu));
+ }
+
+ return result;
+ }
+
+ public static float FPRecipStepFused(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPNeg();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPTwo(false);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else
+ {
+ // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T);
+ // https://github.com/dotnet/corefx/issues/31903
+
+ result = 2f + (value1 * value2);
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPRecpX(float value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value.FPUnpack(out FPType type, out bool sign, out uint op, context);
+
+ float result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context);
+ }
+ else
+ {
+ uint notExp = (~op >> 23) & 0xFFu;
+ uint maxExp = 0xFEu;
+
+ result = BitConverter.Int32BitsToSingle(
+ (int)((sign ? 1u : 0u) << 31 | (notExp == 0xFFu ? maxExp : notExp) << 23));
+ }
+
+ return result;
+ }
+
+ public static float FPRSqrtEstimate(float value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value.FPUnpack(out FPType type, out bool sign, out uint op, context);
+
+ float result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPInfinity(sign);
+
+ FPProcessException(FPException.DivideByZero, context);
+ }
+ else if (sign)
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = FPZero(false);
+ }
+ else
+ {
+ ulong fraction = (ulong)(op & 0x007FFFFFu) << 29;
+ uint exp = (op & 0x7F800000u) >> 23;
+
+ if (exp == 0u)
+ {
+ while ((fraction & 0x0008000000000000ul) == 0ul)
+ {
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ exp -= 1u;
+ }
+
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ }
+
+ uint scaled;
+
+ if ((exp & 1u) == 0u)
+ {
+ scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+ }
+ else
+ {
+ scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45);
+ }
+
+ uint resultExp = (380u - exp) >> 1;
+
+ uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u;
+
+ result = BitConverter.Int32BitsToSingle((int)((resultExp & 0xFFu) << 23 | (estimate & 0xFFu) << 15));
+ }
+
+ return result;
+ }
+
+ public static float FPRSqrtStepFused(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPNeg();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPOnePointFive(false);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else
+ {
+ // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T);
+ // https://github.com/dotnet/corefx/issues/31903
+
+ result = (3f + (value1 * value2)) / 2f;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPSqrt(float value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value = value.FPUnpack(out FPType type, out bool sign, out uint op, context);
+
+ float result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPZero(sign);
+ }
+ else if (type == FPType.Infinity && !sign)
+ {
+ result = FPInfinity(sign);
+ }
+ else if (sign)
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else
+ {
+ result = MathF.Sqrt(value);
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPSub(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if (inf1 && inf2 && sign1 == sign2)
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if ((inf1 && !sign1) || (inf2 && sign2))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((inf1 && sign1) || (inf2 && !sign2))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zero1 && zero2 && sign1 == !sign2)
+ {
+ result = FPZero(sign1);
+ }
+ else
+ {
+ result = value1 - value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ private static float FPDefaultNaN()
+ {
+ return -float.NaN;
+ }
+
+ private static float FPInfinity(bool sign)
+ {
+ return sign ? float.NegativeInfinity : float.PositiveInfinity;
+ }
+
+ private static float FPZero(bool sign)
+ {
+ return sign ? -0f : +0f;
+ }
+
+ private static float FPMaxNormal(bool sign)
+ {
+ return sign ? float.MinValue : float.MaxValue;
+ }
+
+ private static float FPTwo(bool sign)
+ {
+ return sign ? -2f : +2f;
+ }
+
+ private static float FPOnePointFive(bool sign)
+ {
+ return sign ? -1.5f : +1.5f;
+ }
+
+ private static float FPNeg(this float value)
+ {
+ return -value;
+ }
+
+ private static float ZerosOrOnes(bool ones)
+ {
+ return BitConverter.Int32BitsToSingle(ones ? -1 : 0);
+ }
+
+ private static float FPUnpack(
+ this float value,
+ out FPType type,
+ out bool sign,
+ out uint valueBits,
+ ExecutionContext context)
+ {
+ valueBits = (uint)BitConverter.SingleToInt32Bits(value);
+
+ sign = (~valueBits & 0x80000000u) == 0u;
+
+ if ((valueBits & 0x7F800000u) == 0u)
+ {
+ if ((valueBits & 0x007FFFFFu) == 0u || (context.Fpcr & FPCR.Fz) != 0)
+ {
+ type = FPType.Zero;
+ value = FPZero(sign);
+
+ if ((valueBits & 0x007FFFFFu) != 0u)
+ {
+ FPProcessException(FPException.InputDenorm, context);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero;
+ }
+ }
+ else if ((~valueBits & 0x7F800000u) == 0u)
+ {
+ if ((valueBits & 0x007FFFFFu) == 0u)
+ {
+ type = FPType.Infinity;
+ }
+ else
+ {
+ type = (~valueBits & 0x00400000u) == 0u ? FPType.QNaN : FPType.SNaN;
+ value = FPZero(sign);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero;
+ }
+
+ return value;
+ }
+
+ private static float FPProcessNaNs(
+ FPType type1,
+ FPType type2,
+ uint op1,
+ uint op2,
+ out bool done,
+ ExecutionContext context)
+ {
+ done = true;
+
+ if (type1 == FPType.SNaN)
+ {
+ return FPProcessNaN(type1, op1, context);
+ }
+ else if (type2 == FPType.SNaN)
+ {
+ return FPProcessNaN(type2, op2, context);
+ }
+ else if (type1 == FPType.QNaN)
+ {
+ return FPProcessNaN(type1, op1, context);
+ }
+ else if (type2 == FPType.QNaN)
+ {
+ return FPProcessNaN(type2, op2, context);
+ }
+
+ done = false;
+
+ return FPZero(false);
+ }
+
+ private static float FPProcessNaNs3(
+ FPType type1,
+ FPType type2,
+ FPType type3,
+ uint op1,
+ uint op2,
+ uint op3,
+ out bool done,
+ ExecutionContext context)
+ {
+ done = true;
+
+ if (type1 == FPType.SNaN)
+ {
+ return FPProcessNaN(type1, op1, context);
+ }
+ else if (type2 == FPType.SNaN)
+ {
+ return FPProcessNaN(type2, op2, context);
+ }
+ else if (type3 == FPType.SNaN)
+ {
+ return FPProcessNaN(type3, op3, context);
+ }
+ else if (type1 == FPType.QNaN)
+ {
+ return FPProcessNaN(type1, op1, context);
+ }
+ else if (type2 == FPType.QNaN)
+ {
+ return FPProcessNaN(type2, op2, context);
+ }
+ else if (type3 == FPType.QNaN)
+ {
+ return FPProcessNaN(type3, op3, context);
+ }
+
+ done = false;
+
+ return FPZero(false);
+ }
+
+ private static float FPProcessNaN(FPType type, uint op, ExecutionContext context)
+ {
+ if (type == FPType.SNaN)
+ {
+ op |= 1u << 22;
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+
+ if ((context.Fpcr & FPCR.Dn) != 0)
+ {
+ return FPDefaultNaN();
+ }
+
+ return BitConverter.Int32BitsToSingle((int)op);
+ }
+
+ private static void FPProcessException(FPException exc, ExecutionContext context)
+ {
+ int enable = (int)exc + 8;
+
+ if ((context.Fpcr & (FPCR)(1 << enable)) != 0)
+ {
+ throw new NotImplementedException("Floating-point trap handling.");
+ }
+ else
+ {
+ context.Fpsr |= (FPSR)(1 << (int)exc);
+ }
+ }
+ }
+
+ static class SoftFloat64
+ {
+ public static double FPAdd(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if (inf1 && inf2 && sign1 == !sign2)
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if ((inf1 && !sign1) || (inf2 && !sign2))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((inf1 && sign1) || (inf2 && sign2))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zero1 && zero2 && sign1 == sign2)
+ {
+ result = FPZero(sign1);
+ }
+ else
+ {
+ result = value1 + value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static int FPCompare(double value1, double value2, bool signalNaNs)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context);
+
+ int result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = 0b0011;
+
+ if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs)
+ {
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ }
+ else
+ {
+ if (value1 == value2)
+ {
+ result = 0b0110;
+ }
+ else if (value1 < value2)
+ {
+ result = 0b1000;
+ }
+ else
+ {
+ result = 0b0010;
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPCompareEQ(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
+
+ double result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ if (type1 == FPType.SNaN || type2 == FPType.SNaN)
+ {
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 == value2);
+ }
+
+ return result;
+ }
+
+ public static double FPCompareGE(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
+
+ double result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 >= value2);
+ }
+
+ return result;
+ }
+
+ public static double FPCompareGT(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
+
+ double result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 > value2);
+ }
+
+ return result;
+ }
+
+ public static double FPCompareLE(double value1, double value2)
+ {
+ return FPCompareGE(value2, value1);
+ }
+
+ public static double FPCompareLT(double value1, double value2)
+ {
+ return FPCompareGT(value2, value1);
+ }
+
+ public static double FPDiv(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && inf2) || (zero1 && zero2))
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if (inf1 || zero2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+
+ if (!inf1)
+ {
+ FPProcessException(FPException.DivideByZero, context);
+ }
+ }
+ else if (zero1 || inf2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 / value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPMax(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ if (value1 > value2)
+ {
+ if (type1 == FPType.Infinity)
+ {
+ result = FPInfinity(sign1);
+ }
+ else if (type1 == FPType.Zero)
+ {
+ result = FPZero(sign1 && sign2);
+ }
+ else
+ {
+ result = value1;
+ }
+ }
+ else
+ {
+ if (type2 == FPType.Infinity)
+ {
+ result = FPInfinity(sign2);
+ }
+ else if (type2 == FPType.Zero)
+ {
+ result = FPZero(sign1 && sign2);
+ }
+ else
+ {
+ result = value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPMaxNum(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1.FPUnpack(out FPType type1, out _, out _, context);
+ value2.FPUnpack(out FPType type2, out _, out _, context);
+
+ if (type1 == FPType.QNaN && type2 != FPType.QNaN)
+ {
+ value1 = FPInfinity(true);
+ }
+ else if (type1 != FPType.QNaN && type2 == FPType.QNaN)
+ {
+ value2 = FPInfinity(true);
+ }
+
+ return FPMax(value1, value2);
+ }
+
+ public static double FPMin(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ if (value1 < value2)
+ {
+ if (type1 == FPType.Infinity)
+ {
+ result = FPInfinity(sign1);
+ }
+ else if (type1 == FPType.Zero)
+ {
+ result = FPZero(sign1 || sign2);
+ }
+ else
+ {
+ result = value1;
+ }
+ }
+ else
+ {
+ if (type2 == FPType.Infinity)
+ {
+ result = FPInfinity(sign2);
+ }
+ else if (type2 == FPType.Zero)
+ {
+ result = FPZero(sign1 || sign2);
+ }
+ else
+ {
+ result = value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPMinNum(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1.FPUnpack(out FPType type1, out _, out _, context);
+ value2.FPUnpack(out FPType type2, out _, out _, context);
+
+ if (type1 == FPType.QNaN && type2 != FPType.QNaN)
+ {
+ value1 = FPInfinity(false);
+ }
+ else if (type1 != FPType.QNaN && type2 == FPType.QNaN)
+ {
+ value2 = FPInfinity(false);
+ }
+
+ return FPMin(value1, value2);
+ }
+
+ public static double FPMul(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else if (zero1 || zero2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 * value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPMulAdd(double valueA, double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out ulong addend, context);
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ double result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context);
+
+ if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2)))
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+
+ if (!done)
+ {
+ bool infA = typeA == FPType.Infinity; bool zeroA = typeA == FPType.Zero;
+
+ bool signP = sign1 ^ sign2;
+ bool infP = inf1 || inf2;
+ bool zeroP = zero1 || zero2;
+
+ if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP))
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if ((infA && !signA) || (infP && !signP))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((infA && signA) || (infP && signP))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zeroA && zeroP && signA == signP)
+ {
+ result = FPZero(signA);
+ }
+ else
+ {
+ // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T);
+ // https://github.com/dotnet/corefx/issues/31903
+
+ result = valueA + (value1 * value2);
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPMulSub(double valueA, double value1, double value2)
+ {
+ value1 = value1.FPNeg();
+
+ return FPMulAdd(valueA, value1, value2);
+ }
+
+ public static double FPMulX(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPTwo(sign1 ^ sign2);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else if (zero1 || zero2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 * value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPRecipEstimate(double value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value.FPUnpack(out FPType type, out bool sign, out ulong op, context);
+
+ double result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context);
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = FPZero(sign);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPInfinity(sign);
+
+ FPProcessException(FPException.DivideByZero, context);
+ }
+ else if (Math.Abs(value) < Math.Pow(2d, -1024))
+ {
+ bool overflowToInf;
+
+ switch (context.Fpcr.GetRoundingMode())
+ {
+ default:
+ case FPRoundingMode.ToNearest: overflowToInf = true; break;
+ case FPRoundingMode.TowardsPlusInfinity: overflowToInf = !sign; break;
+ case FPRoundingMode.TowardsMinusInfinity: overflowToInf = sign; break;
+ case FPRoundingMode.TowardsZero: overflowToInf = false; break;
+ }
+
+ result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+ FPProcessException(FPException.Overflow, context);
+ FPProcessException(FPException.Inexact, context);
+ }
+ else if ((context.Fpcr & FPCR.Fz) != 0 && (Math.Abs(value) >= Math.Pow(2d, 1022)))
+ {
+ result = FPZero(sign);
+
+ context.Fpsr |= FPSR.Ufc;
+ }
+ else
+ {
+ ulong fraction = op & 0x000FFFFFFFFFFFFFul;
+ uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52);
+
+ if (exp == 0u)
+ {
+ if ((fraction & 0x0008000000000000ul) == 0ul)
+ {
+ fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2;
+ exp -= 1u;
+ }
+ else
+ {
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ }
+ }
+
+ uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+
+ uint resultExp = 2045u - exp;
+
+ uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u;
+
+ fraction = (ulong)(estimate & 0xFFu) << 44;
+
+ if (resultExp == 0u)
+ {
+ fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1;
+ }
+ else if (resultExp + 1u == 0u)
+ {
+ fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2;
+ resultExp = 0u;
+ }
+
+ result = BitConverter.Int64BitsToDouble(
+ (long)((sign ? 1ul : 0ul) << 63 | (resultExp & 0x7FFul) << 52 | (fraction & 0x000FFFFFFFFFFFFFul)));
+ }
+
+ return result;
+ }
+
+ public static double FPRecipStepFused(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPNeg();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPTwo(false);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else
+ {
+ // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T);
+ // https://github.com/dotnet/corefx/issues/31903
+
+ result = 2d + (value1 * value2);
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPRecpX(double value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value.FPUnpack(out FPType type, out bool sign, out ulong op, context);
+
+ double result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context);
+ }
+ else
+ {
+ ulong notExp = (~op >> 52) & 0x7FFul;
+ ulong maxExp = 0x7FEul;
+
+ result = BitConverter.Int64BitsToDouble(
+ (long)((sign ? 1ul : 0ul) << 63 | (notExp == 0x7FFul ? maxExp : notExp) << 52));
+ }
+
+ return result;
+ }
+
+ public static double FPRSqrtEstimate(double value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value.FPUnpack(out FPType type, out bool sign, out ulong op, context);
+
+ double result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPInfinity(sign);
+
+ FPProcessException(FPException.DivideByZero, context);
+ }
+ else if (sign)
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = FPZero(false);
+ }
+ else
+ {
+ ulong fraction = op & 0x000FFFFFFFFFFFFFul;
+ uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52);
+
+ if (exp == 0u)
+ {
+ while ((fraction & 0x0008000000000000ul) == 0ul)
+ {
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ exp -= 1u;
+ }
+
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ }
+
+ uint scaled;
+
+ if ((exp & 1u) == 0u)
+ {
+ scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+ }
+ else
+ {
+ scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45);
+ }
+
+ uint resultExp = (3068u - exp) >> 1;
+
+ uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u;
+
+ result = BitConverter.Int64BitsToDouble((long)((resultExp & 0x7FFul) << 52 | (estimate & 0xFFul) << 44));
+ }
+
+ return result;
+ }
+
+ public static double FPRSqrtStepFused(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPNeg();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPOnePointFive(false);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else
+ {
+ // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T);
+ // https://github.com/dotnet/corefx/issues/31903
+
+ result = (3d + (value1 * value2)) / 2d;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPSqrt(double value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value = value.FPUnpack(out FPType type, out bool sign, out ulong op, context);
+
+ double result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPZero(sign);
+ }
+ else if (type == FPType.Infinity && !sign)
+ {
+ result = FPInfinity(sign);
+ }
+ else if (sign)
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else
+ {
+ result = Math.Sqrt(value);
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPSub(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if (inf1 && inf2 && sign1 == sign2)
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if ((inf1 && !sign1) || (inf2 && sign2))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((inf1 && sign1) || (inf2 && !sign2))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zero1 && zero2 && sign1 == !sign2)
+ {
+ result = FPZero(sign1);
+ }
+ else
+ {
+ result = value1 - value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ private static double FPDefaultNaN()
+ {
+ return -double.NaN;
+ }
+
+ private static double FPInfinity(bool sign)
+ {
+ return sign ? double.NegativeInfinity : double.PositiveInfinity;
+ }
+
+ private static double FPZero(bool sign)
+ {
+ return sign ? -0d : +0d;
+ }
+
+ private static double FPMaxNormal(bool sign)
+ {
+ return sign ? double.MinValue : double.MaxValue;
+ }
+
+ private static double FPTwo(bool sign)
+ {
+ return sign ? -2d : +2d;
+ }
+
+ private static double FPOnePointFive(bool sign)
+ {
+ return sign ? -1.5d : +1.5d;
+ }
+
+ private static double FPNeg(this double value)
+ {
+ return -value;
+ }
+
+ private static double ZerosOrOnes(bool ones)
+ {
+ return BitConverter.Int64BitsToDouble(ones ? -1L : 0L);
+ }
+
+ private static double FPUnpack(
+ this double value,
+ out FPType type,
+ out bool sign,
+ out ulong valueBits,
+ ExecutionContext context)
+ {
+ valueBits = (ulong)BitConverter.DoubleToInt64Bits(value);
+
+ sign = (~valueBits & 0x8000000000000000ul) == 0ul;
+
+ if ((valueBits & 0x7FF0000000000000ul) == 0ul)
+ {
+ if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul || (context.Fpcr & FPCR.Fz) != 0)
+ {
+ type = FPType.Zero;
+ value = FPZero(sign);
+
+ if ((valueBits & 0x000FFFFFFFFFFFFFul) != 0ul)
+ {
+ FPProcessException(FPException.InputDenorm, context);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero;
+ }
+ }
+ else if ((~valueBits & 0x7FF0000000000000ul) == 0ul)
+ {
+ if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul)
+ {
+ type = FPType.Infinity;
+ }
+ else
+ {
+ type = (~valueBits & 0x0008000000000000ul) == 0ul ? FPType.QNaN : FPType.SNaN;
+ value = FPZero(sign);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero;
+ }
+
+ return value;
+ }
+
+ private static double FPProcessNaNs(
+ FPType type1,
+ FPType type2,
+ ulong op1,
+ ulong op2,
+ out bool done,
+ ExecutionContext context)
+ {
+ done = true;
+
+ if (type1 == FPType.SNaN)
+ {
+ return FPProcessNaN(type1, op1, context);
+ }
+ else if (type2 == FPType.SNaN)
+ {
+ return FPProcessNaN(type2, op2, context);
+ }
+ else if (type1 == FPType.QNaN)
+ {
+ return FPProcessNaN(type1, op1, context);
+ }
+ else if (type2 == FPType.QNaN)
+ {
+ return FPProcessNaN(type2, op2, context);
+ }
+
+ done = false;
+
+ return FPZero(false);
+ }
+
+ private static double FPProcessNaNs3(
+ FPType type1,
+ FPType type2,
+ FPType type3,
+ ulong op1,
+ ulong op2,
+ ulong op3,
+ out bool done,
+ ExecutionContext context)
+ {
+ done = true;
+
+ if (type1 == FPType.SNaN)
+ {
+ return FPProcessNaN(type1, op1, context);
+ }
+ else if (type2 == FPType.SNaN)
+ {
+ return FPProcessNaN(type2, op2, context);
+ }
+ else if (type3 == FPType.SNaN)
+ {
+ return FPProcessNaN(type3, op3, context);
+ }
+ else if (type1 == FPType.QNaN)
+ {
+ return FPProcessNaN(type1, op1, context);
+ }
+ else if (type2 == FPType.QNaN)
+ {
+ return FPProcessNaN(type2, op2, context);
+ }
+ else if (type3 == FPType.QNaN)
+ {
+ return FPProcessNaN(type3, op3, context);
+ }
+
+ done = false;
+
+ return FPZero(false);
+ }
+
+ private static double FPProcessNaN(FPType type, ulong op, ExecutionContext context)
+ {
+ if (type == FPType.SNaN)
+ {
+ op |= 1ul << 51;
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+
+ if ((context.Fpcr & FPCR.Dn) != 0)
+ {
+ return FPDefaultNaN();
+ }
+
+ return BitConverter.Int64BitsToDouble((long)op);
+ }
+
+ private static void FPProcessException(FPException exc, ExecutionContext context)
+ {
+ int enable = (int)exc + 8;
+
+ if ((context.Fpcr & (FPCR)(1 << enable)) != 0)
+ {
+ throw new NotImplementedException("Floating-point trap handling.");
+ }
+ else
+ {
+ context.Fpsr |= (FPSR)(1 << (int)exc);
+ }
+ }
+ }
+}