diff options
| author | gdkchan <gab.dark.100@gmail.com> | 2019-08-08 15:56:22 -0300 |
|---|---|---|
| committer | emmauss <emmausssss@gmail.com> | 2019-08-08 21:56:22 +0300 |
| commit | a731ab3a2aad56e6ceb8b4e2444a61353246295c (patch) | |
| tree | c7f13f51bfec6b19431e62167811ae31e9d2fea9 /ARMeilleure/Instructions | |
| parent | 1ba58e9942e54175e3f3a0e1d57a48537f4888b1 (diff) | |
Add a new JIT compiler for CPU code (#693)
* Start of the ARMeilleure project
* Refactoring around the old IRAdapter, now renamed to PreAllocator
* Optimize the LowestBitSet method
* Add CLZ support and fix CLS implementation
* Add missing Equals and GetHashCode overrides on some structs, misc small tweaks
* Implement the ByteSwap IR instruction, and some refactoring on the assembler
* Implement the DivideUI IR instruction and fix 64-bits IDIV
* Correct constant operand type on CSINC
* Move division instructions implementation to InstEmitDiv
* Fix destination type for the ConditionalSelect IR instruction
* Implement UMULH and SMULH, with new IR instructions
* Fix some issues with shift instructions
* Fix constant types for BFM instructions
* Fix up new tests using the new V128 struct
* Update tests
* Move DIV tests to a separate file
* Add support for calls, and some instructions that depends on them
* Start adding support for SIMD & FP types, along with some of the related ARM instructions
* Fix some typos and the divide instruction with FP operands
* Fix wrong method call on Clz_V
* Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes
* Implement SIMD logical instructions and more misc. fixes
* Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations
* Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes
* Implement SIMD shift instruction and fix Dup_V
* Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table
* Fix check with tolerance on tester
* Implement FP & SIMD comparison instructions, and some fixes
* Update FCVT (Scalar) encoding on the table to support the Half-float variants
* Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes
* Use old memory access methods, made a start on SIMD memory insts support, some fixes
* Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes
* Fix arguments count with struct return values, other fixes
* More instructions
* Misc. fixes and integrate LDj3SNuD fixes
* Update tests
* Add a faster linear scan allocator, unwinding support on windows, and other changes
* Update Ryujinx.HLE
* Update Ryujinx.Graphics
* Fix V128 return pointer passing, RCX is clobbered
* Update Ryujinx.Tests
* Update ITimeZoneService
* Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks
* Use generic GetFunctionPointerForDelegate method and other tweaks
* Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics
* Remove some unused code on the assembler
* Fix REX.W prefix regression on float conversion instructions, add some sort of profiler
* Add hardware capability detection
* Fix regression on Sha1h and revert Fcm** changes
* Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator
* Fix silly mistake introduced on last commit on CpuId
* Generate inline stack probes when the stack allocation is too large
* Initial support for the System-V ABI
* Support multiple destination operands
* Fix SSE2 VectorInsert8 path, and other fixes
* Change placement of XMM callee save and restore code to match other compilers
* Rename Dest to Destination and Inst to Instruction
* Fix a regression related to calls and the V128 type
* Add an extra space on comments to match code style
* Some refactoring
* Fix vector insert FP32 SSE2 path
* Port over the ARM32 instructions
* Avoid memory protection races on JIT Cache
* Another fix on VectorInsert FP32 (thanks to LDj3SNuD
* Float operands don't need to use the same register when VEX is supported
* Add a new register allocator, higher quality code for hot code (tier up), and other tweaks
* Some nits, small improvements on the pre allocator
* CpuThreadState is gone
* Allow changing CPU emulators with a config entry
* Add runtime identifiers on the ARMeilleure project
* Allow switching between CPUs through a config entry (pt. 2)
* Change win10-x64 to win-x64 on projects
* Update the Ryujinx project to use ARMeilleure
* Ensure that the selected register is valid on the hybrid allocator
* Allow exiting on returns to 0 (should fix test regression)
* Remove register assignments for most used variables on the hybrid allocator
* Do not use fixed registers as spill temp
* Add missing namespace and remove unneeded using
* Address PR feedback
* Fix types, etc
* Enable AssumeStrictAbiCompliance by default
* Ensure that Spill and Fill don't load or store any more than necessary
Diffstat (limited to 'ARMeilleure/Instructions')
36 files changed, 17870 insertions, 0 deletions
diff --git a/ARMeilleure/Instructions/CryptoHelper.cs b/ARMeilleure/Instructions/CryptoHelper.cs new file mode 100644 index 00000000..b6b4a62d --- /dev/null +++ b/ARMeilleure/Instructions/CryptoHelper.cs @@ -0,0 +1,279 @@ +// https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf + +using ARMeilleure.State; + +namespace ARMeilleure.Instructions +{ + static class CryptoHelper + { +#region "LookUp Tables" + private static readonly byte[] _sBox = new byte[] + { + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 + }; + + private static readonly byte[] _invSBox = new byte[] + { + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d + }; + + private static readonly byte[] _gfMul02 = new byte[] + { + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, + 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, + 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, + 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, + 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, + 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, + 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, + 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, + 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05, + 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25, + 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45, + 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65, + 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85, + 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5, + 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5, + 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5 + }; + + private static readonly byte[] _gfMul03 = new byte[] + { + 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11, + 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, + 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, + 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41, + 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, + 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1, + 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, + 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, + 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a, + 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba, + 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea, + 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda, + 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a, + 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a, + 0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a, + 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a + }; + + private static readonly byte[] _gfMul09 = new byte[] + { + 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, + 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, + 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, + 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc, + 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01, + 0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91, + 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a, + 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa, + 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b, + 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b, + 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, + 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, + 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed, + 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d, + 0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6, + 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46 + }; + + private static readonly byte[] _gfMul0B = new byte[] + { + 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69, + 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, + 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12, + 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2, + 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f, + 0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f, + 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4, + 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54, + 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e, + 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e, + 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5, + 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55, + 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, + 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, + 0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13, + 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3 + }; + + private static readonly byte[] _gfMul0D = new byte[] + { + 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b, + 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, + 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0, + 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20, + 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, + 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, + 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d, + 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d, + 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91, + 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41, + 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a, + 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa, + 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc, + 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, + 0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47, + 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97 + }; + + private static readonly byte[] _gfMul0E = new byte[] + { + 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a, + 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, + 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81, + 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61, + 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, + 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17, + 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c, + 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc, + 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b, + 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb, + 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, + 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, + 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6, + 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56, + 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d, + 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d + }; + + private static readonly byte[] _srPerm = new byte[] + { + 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3 + }; + + private static readonly byte[] _isrPerm = new byte[] + { + 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11 + }; +#endregion + + public static V128 AesInvMixColumns(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int columns = 0; columns <= 3; columns++) + { + int idx = columns << 2; + + byte row0 = inState[idx + 0]; // A, E, I, M: [row0, col0-col3] + byte row1 = inState[idx + 1]; // B, F, J, N: [row1, col0-col3] + byte row2 = inState[idx + 2]; // C, G, K, O: [row2, col0-col3] + byte row3 = inState[idx + 3]; // D, H, L, P: [row3, col0-col3] + + outState[idx + 0] = (byte)((uint)_gfMul0E[row0] ^ _gfMul0B[row1] ^ _gfMul0D[row2] ^ _gfMul09[row3]); + outState[idx + 1] = (byte)((uint)_gfMul09[row0] ^ _gfMul0E[row1] ^ _gfMul0B[row2] ^ _gfMul0D[row3]); + outState[idx + 2] = (byte)((uint)_gfMul0D[row0] ^ _gfMul09[row1] ^ _gfMul0E[row2] ^ _gfMul0B[row3]); + outState[idx + 3] = (byte)((uint)_gfMul0B[row0] ^ _gfMul0D[row1] ^ _gfMul09[row2] ^ _gfMul0E[row3]); + } + + return new V128(outState); + } + + public static V128 AesInvShiftRows(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int idx = 0; idx <= 15; idx++) + { + outState[_isrPerm[idx]] = inState[idx]; + } + + return new V128(outState); + } + + public static V128 AesInvSubBytes(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int idx = 0; idx <= 15; idx++) + { + outState[idx] = _invSBox[inState[idx]]; + } + + return new V128(outState); + } + + public static V128 AesMixColumns(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int columns = 0; columns <= 3; columns++) + { + int idx = columns << 2; + + byte row0 = inState[idx + 0]; // A, E, I, M: [row0, col0-col3] + byte row1 = inState[idx + 1]; // B, F, J, N: [row1, col0-col3] + byte row2 = inState[idx + 2]; // C, G, K, O: [row2, col0-col3] + byte row3 = inState[idx + 3]; // D, H, L, P: [row3, col0-col3] + + outState[idx + 0] = (byte)((uint)_gfMul02[row0] ^ _gfMul03[row1] ^ row2 ^ row3); + outState[idx + 1] = (byte)((uint)row0 ^ _gfMul02[row1] ^ _gfMul03[row2] ^ row3); + outState[idx + 2] = (byte)((uint)row0 ^ row1 ^ _gfMul02[row2] ^ _gfMul03[row3]); + outState[idx + 3] = (byte)((uint)_gfMul03[row0] ^ row1 ^ row2 ^ _gfMul02[row3]); + } + + return new V128(outState); + } + + public static V128 AesShiftRows(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int idx = 0; idx <= 15; idx++) + { + outState[_srPerm[idx]] = inState[idx]; + } + + return new V128(outState); + } + + public static V128 AesSubBytes(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int idx = 0; idx <= 15; idx++) + { + outState[idx] = _sBox[inState[idx]]; + } + + return new V128(outState); + } + } +} diff --git a/ARMeilleure/Instructions/DelegateTypes.cs b/ARMeilleure/Instructions/DelegateTypes.cs new file mode 100644 index 00000000..e90e4d77 --- /dev/null +++ b/ARMeilleure/Instructions/DelegateTypes.cs @@ -0,0 +1,78 @@ +using ARMeilleure.State; +using System; + +namespace ARMeilleure.Instructions +{ + delegate double _F64_F64(double a1); + delegate double _F64_F64_F64(double a1, double a2); + delegate double _F64_F64_F64_F64(double a1, double a2, double a3); + delegate double _F64_F64_MidpointRounding(double a1, MidpointRounding a2); + + delegate float _F32_F32(float a1); + delegate float _F32_F32_F32(float a1, float a2); + delegate float _F32_F32_F32_F32(float a1, float a2, float a3); + delegate float _F32_F32_MidpointRounding(float a1, MidpointRounding a2); + delegate float _F32_U16(ushort a1); + + delegate int _S32_F32(float a1); + delegate int _S32_F32_F32_Bool(float a1, float a2, bool a3); + delegate int _S32_F64(double a1); + delegate int _S32_F64_F64_Bool(double a1, double a2, bool a3); + delegate int _S32_U64_U16(ulong a1, ushort a2); + delegate int _S32_U64_U32(ulong a1, uint a2); + delegate int _S32_U64_U64(ulong a1, ulong a2); + delegate int _S32_U64_U8(ulong a1, byte a2); + delegate int _S32_U64_V128(ulong a1, V128 a2); + + delegate long _S64_F32(float a1); + delegate long _S64_F64(double a1); + delegate long _S64_S64(long a1); + delegate long _S64_S64_S32(long a1, int a2); + delegate long _S64_S64_S64(long a1, long a2); + delegate long _S64_S64_S64_Bool_S32(long a1, long a2, bool a3, int a4); + delegate long _S64_S64_S64_S32(long a1, long a2, int a3); + delegate long _S64_U64_S32(ulong a1, int a2); + delegate long _S64_U64_S64(ulong a1, long a2); + + delegate ushort _U16_F32(float a1); + delegate ushort _U16_U64(ulong a1); + + delegate uint _U32_F32(float a1); + delegate uint _U32_F64(double a1); + delegate uint _U32_U32(uint a1); + delegate uint _U32_U32_U16(uint a1, ushort a2); + delegate uint _U32_U32_U32(uint a1, uint a2); + delegate uint _U32_U32_U64(uint a1, ulong a2); + delegate uint _U32_U32_U8(uint a1, byte a2); + delegate uint _U32_U64(ulong a1); + + delegate ulong _U64(); + delegate ulong _U64_F32(float a1); + delegate ulong _U64_F64(double a1); + delegate ulong _U64_S64_S32(long a1, int a2); + delegate ulong _U64_S64_U64(long a1, ulong a2); + delegate ulong _U64_U64(ulong a1); + delegate ulong _U64_U64_S32(ulong a1, int a2); + delegate ulong _U64_U64_S64_S32(ulong a1, long a2, int a3); + delegate ulong _U64_U64_U64(ulong a1, ulong a2); + delegate ulong _U64_U64_U64_Bool_S32(ulong a1, ulong a2, bool a3, int a4); + + delegate byte _U8_U64(ulong a1); + + delegate V128 _V128_U64(ulong a1); + delegate V128 _V128_V128(V128 a1); + delegate V128 _V128_V128_U32_V128(V128 a1, uint a2, V128 a3); + delegate V128 _V128_V128_V128(V128 a1, V128 a2); + delegate V128 _V128_V128_V128_V128(V128 a1, V128 a2, V128 a3); + delegate V128 _V128_V128_V128_V128_V128(V128 a1, V128 a2, V128 a3, V128 a4); + delegate V128 _V128_V128_V128_V128_V128_V128(V128 a1, V128 a2, V128 a3, V128 a4, V128 a5); + + delegate void _Void(); + delegate void _Void_U64(ulong a1); + delegate void _Void_U64_S32(ulong a1, int a2); + delegate void _Void_U64_U16(ulong a1, ushort a2); + delegate void _Void_U64_U32(ulong a1, uint a2); + delegate void _Void_U64_U64(ulong a1, ulong a2); + delegate void _Void_U64_U8(ulong a1, byte a2); + delegate void _Void_U64_V128(ulong a1, V128 a2); +}
\ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitAlu.cs b/ARMeilleure/Instructions/InstEmitAlu.cs new file mode 100644 index 00000000..947c9f70 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitAlu.cs @@ -0,0 +1,369 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitAluHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Adc(ArmEmitterContext context) => EmitAdc(context, setFlags: false); + public static void Adcs(ArmEmitterContext context) => EmitAdc(context, setFlags: true); + + private static void EmitAdc(ArmEmitterContext context, bool setFlags) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand d = context.Add(n, m); + + Operand carry = GetFlag(PState.CFlag); + + if (context.CurrOp.RegisterSize == RegisterSize.Int64) + { + carry = context.ZeroExtend32(OperandType.I64, carry); + } + + d = context.Add(d, carry); + + if (setFlags) + { + EmitNZFlagsCheck(context, d); + + EmitAdcsCCheck(context, n, d); + EmitAddsVCheck(context, n, m, d); + } + + SetAluDOrZR(context, d); + } + + public static void Add(ArmEmitterContext context) + { + SetAluD(context, context.Add(GetAluN(context), GetAluM(context))); + } + + public static void Adds(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + context.MarkComparison(n, m); + + Operand d = context.Add(n, m); + + EmitNZFlagsCheck(context, d); + + EmitAddsCCheck(context, n, d); + EmitAddsVCheck(context, n, m, d); + + SetAluDOrZR(context, d); + } + + public static void And(ArmEmitterContext context) + { + SetAluD(context, context.BitwiseAnd(GetAluN(context), GetAluM(context))); + } + + public static void Ands(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand d = context.BitwiseAnd(n, m); + + EmitNZFlagsCheck(context, d); + EmitCVFlagsClear(context); + + SetAluDOrZR(context, d); + } + + public static void Asrv(ArmEmitterContext context) + { + SetAluDOrZR(context, context.ShiftRightSI(GetAluN(context), GetAluMShift(context))); + } + + public static void Bic(ArmEmitterContext context) => EmitBic(context, setFlags: false); + public static void Bics(ArmEmitterContext context) => EmitBic(context, setFlags: true); + + private static void EmitBic(ArmEmitterContext context, bool setFlags) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand d = context.BitwiseAnd(n, context.BitwiseNot(m)); + + if (setFlags) + { + EmitNZFlagsCheck(context, d); + EmitCVFlagsClear(context); + } + + SetAluD(context, d, setFlags); + } + + public static void Cls(ArmEmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + Operand nHigh = context.ShiftRightUI(n, Const(1)); + + bool is32Bits = op.RegisterSize == RegisterSize.Int32; + + Operand mask = is32Bits ? Const(int.MaxValue) : Const(long.MaxValue); + + Operand nLow = context.BitwiseAnd(n, mask); + + Operand res = context.CountLeadingZeros(context.BitwiseExclusiveOr(nHigh, nLow)); + + res = context.Subtract(res, Const(res.Type, 1)); + + SetAluDOrZR(context, res); + } + + public static void Clz(ArmEmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + Operand d = context.CountLeadingZeros(n); + + SetAluDOrZR(context, d); + } + + public static void Eon(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand d = context.BitwiseExclusiveOr(n, context.BitwiseNot(m)); + + SetAluD(context, d); + } + + public static void Eor(ArmEmitterContext context) + { + SetAluD(context, context.BitwiseExclusiveOr(GetAluN(context), GetAluM(context))); + } + + public static void Extr(ArmEmitterContext context) + { + OpCodeAluRs op = (OpCodeAluRs)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rm); + + if (op.Shift != 0) + { + if (op.Rn == op.Rm) + { + res = context.RotateRight(res, Const(op.Shift)); + } + else + { + res = context.ShiftRightUI(res, Const(op.Shift)); + + Operand n = GetIntOrZR(context, op.Rn); + + int invShift = op.GetBitsCount() - op.Shift; + + res = context.BitwiseOr(res, context.ShiftLeft(n, Const(invShift))); + } + } + + SetAluDOrZR(context, res); + } + + public static void Lslv(ArmEmitterContext context) + { + SetAluDOrZR(context, context.ShiftLeft(GetAluN(context), GetAluMShift(context))); + } + + public static void Lsrv(ArmEmitterContext context) + { + SetAluDOrZR(context, context.ShiftRightUI(GetAluN(context), GetAluMShift(context))); + } + + public static void Sbc(ArmEmitterContext context) => EmitSbc(context, setFlags: false); + public static void Sbcs(ArmEmitterContext context) => EmitSbc(context, setFlags: true); + + private static void EmitSbc(ArmEmitterContext context, bool setFlags) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand d = context.Subtract(n, m); + + Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1)); + + if (context.CurrOp.RegisterSize == RegisterSize.Int64) + { + borrow = context.ZeroExtend32(OperandType.I64, borrow); + } + + d = context.Subtract(d, borrow); + + if (setFlags) + { + EmitNZFlagsCheck(context, d); + + EmitSbcsCCheck(context, n, m); + EmitSubsVCheck(context, n, m, d); + } + + SetAluDOrZR(context, d); + } + + public static void Sub(ArmEmitterContext context) + { + SetAluD(context, context.Subtract(GetAluN(context), GetAluM(context))); + } + + public static void Subs(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + context.MarkComparison(n, m); + + Operand d = context.Subtract(n, m); + + EmitNZFlagsCheck(context, d); + + EmitSubsCCheck(context, n, m); + EmitSubsVCheck(context, n, m, d); + + SetAluDOrZR(context, d); + } + + public static void Orn(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand d = context.BitwiseOr(n, context.BitwiseNot(m)); + + SetAluD(context, d); + } + + public static void Orr(ArmEmitterContext context) + { + SetAluD(context, context.BitwiseOr(GetAluN(context), GetAluM(context))); + } + + public static void Rbit(ArmEmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + Operand d; + + if (op.RegisterSize == RegisterSize.Int32) + { + d = context.Call(new _U32_U32(SoftFallback.ReverseBits32), n); + } + else + { + d = context.Call(new _U64_U64(SoftFallback.ReverseBits64), n); + } + + SetAluDOrZR(context, d); + } + + public static void Rev16(ArmEmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + Operand d; + + if (op.RegisterSize == RegisterSize.Int32) + { + d = context.Call(new _U32_U32(SoftFallback.ReverseBytes16_32), n); + } + else + { + d = context.Call(new _U64_U64(SoftFallback.ReverseBytes16_64), n); + } + + SetAluDOrZR(context, d); + } + + public static void Rev32(ArmEmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + if (op.RegisterSize == RegisterSize.Int32) + { + SetAluDOrZR(context, context.ByteSwap(n)); + } + else + { + Operand d = context.Call(new _U64_U64(SoftFallback.ReverseBytes32_64), n); + + SetAluDOrZR(context, d); + } + } + + public static void Rev64(ArmEmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + SetAluDOrZR(context, context.ByteSwap(GetIntOrZR(context, op.Rn))); + } + + public static void Rorv(ArmEmitterContext context) + { + SetAluDOrZR(context, context.RotateRight(GetAluN(context), GetAluMShift(context))); + } + + private static Operand GetAluMShift(ArmEmitterContext context) + { + IOpCodeAluRs op = (IOpCodeAluRs)context.CurrOp; + + Operand m = GetIntOrZR(context, op.Rm); + + if (op.RegisterSize == RegisterSize.Int64) + { + m = context.ConvertI64ToI32(m); + } + + return context.BitwiseAnd(m, Const(context.CurrOp.GetBitsCount() - 1)); + } + + private static void EmitCVFlagsClear(ArmEmitterContext context) + { + SetFlag(context, PState.CFlag, Const(0)); + SetFlag(context, PState.VFlag, Const(0)); + } + + public static void SetAluD(ArmEmitterContext context, Operand d) + { + SetAluD(context, d, x31IsZR: false); + } + + public static void SetAluDOrZR(ArmEmitterContext context, Operand d) + { + SetAluD(context, d, x31IsZR: true); + } + + public static void SetAluD(ArmEmitterContext context, Operand d, bool x31IsZR) + { + IOpCodeAlu op = (IOpCodeAlu)context.CurrOp; + + if ((x31IsZR || op is IOpCodeAluRs) && op.Rd == RegisterConsts.ZeroIndex) + { + return; + } + + SetIntOrSP(context, op.Rd, d); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitAlu32.cs b/ARMeilleure/Instructions/InstEmitAlu32.cs new file mode 100644 index 00000000..79b0abbc --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitAlu32.cs @@ -0,0 +1,129 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitAluHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Add(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Add(n, m); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + + EmitAddsCCheck(context, n, res); + EmitAddsVCheck(context, n, m, res); + } + + EmitAluStore(context, res); + } + + public static void Cmp(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Subtract(n, m); + + EmitNZFlagsCheck(context, res); + + EmitSubsCCheck(context, n, res); + EmitSubsVCheck(context, n, m, res); + } + + public static void Mov(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand m = GetAluM(context); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, m); + } + + EmitAluStore(context, m); + } + + public static void Sub(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Subtract(n, m); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + + EmitSubsCCheck(context, n, res); + EmitSubsVCheck(context, n, m, res); + } + + EmitAluStore(context, res); + } + + private static void EmitAluStore(ArmEmitterContext context, Operand value) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + if (op.Rd == RegisterAlias.Aarch32Pc) + { + if (op.SetFlags) + { + // TODO: Load SPSR etc. + Operand isThumb = GetFlag(PState.TFlag); + + Operand lblThumb = Label(); + + context.BranchIfTrue(lblThumb, isThumb); + + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~3)))); + + context.MarkLabel(lblThumb); + + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~1)))); + } + else + { + EmitAluWritePc(context, value); + } + } + else + { + SetIntA32(context, op.Rd, value); + } + } + + private static void EmitAluWritePc(ArmEmitterContext context, Operand value) + { + context.StoreToContext(); + + if (IsThumb(context.CurrOp)) + { + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~1)))); + } + else + { + EmitBxWritePc(context, value); + } + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitAluHelper.cs b/ARMeilleure/Instructions/InstEmitAluHelper.cs new file mode 100644 index 00000000..81d5c9eb --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitAluHelper.cs @@ -0,0 +1,351 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static class InstEmitAluHelper + { + public static void EmitNZFlagsCheck(ArmEmitterContext context, Operand d) + { + SetFlag(context, PState.NFlag, context.ICompareLess (d, Const(d.Type, 0))); + SetFlag(context, PState.ZFlag, context.ICompareEqual(d, Const(d.Type, 0))); + } + + public static void EmitAdcsCCheck(ArmEmitterContext context, Operand n, Operand d) + { + // C = (Rd == Rn && CIn) || Rd < Rn + Operand cIn = GetFlag(PState.CFlag); + + Operand cOut = context.BitwiseAnd(context.ICompareEqual(d, n), cIn); + + cOut = context.BitwiseOr(cOut, context.ICompareLessUI(d, n)); + + SetFlag(context, PState.CFlag, cOut); + } + + public static void EmitAddsCCheck(ArmEmitterContext context, Operand n, Operand d) + { + // C = Rd < Rn + SetFlag(context, PState.CFlag, context.ICompareLessUI(d, n)); + } + + public static void EmitAddsVCheck(ArmEmitterContext context, Operand n, Operand m, Operand d) + { + // V = (Rd ^ Rn) & ~(Rn ^ Rm) < 0 + Operand vOut = context.BitwiseExclusiveOr(d, n); + + vOut = context.BitwiseAnd(vOut, context.BitwiseNot(context.BitwiseExclusiveOr(n, m))); + + vOut = context.ICompareLess(vOut, Const(vOut.Type, 0)); + + SetFlag(context, PState.VFlag, vOut); + } + + public static void EmitSbcsCCheck(ArmEmitterContext context, Operand n, Operand m) + { + // C = (Rn == Rm && CIn) || Rn > Rm + Operand cIn = GetFlag(PState.CFlag); + + Operand cOut = context.BitwiseAnd(context.ICompareEqual(n, m), cIn); + + cOut = context.BitwiseOr(cOut, context.ICompareGreaterUI(n, m)); + + SetFlag(context, PState.CFlag, cOut); + } + + public static void EmitSubsCCheck(ArmEmitterContext context, Operand n, Operand m) + { + // C = Rn >= Rm + SetFlag(context, PState.CFlag, context.ICompareGreaterOrEqualUI(n, m)); + } + + public static void EmitSubsVCheck(ArmEmitterContext context, Operand n, Operand m, Operand d) + { + // V = (Rd ^ Rn) & (Rn ^ Rm) < 0 + Operand vOut = context.BitwiseExclusiveOr(d, n); + + vOut = context.BitwiseAnd(vOut, context.BitwiseExclusiveOr(n, m)); + + vOut = context.ICompareLess(vOut, Const(vOut.Type, 0)); + + SetFlag(context, PState.VFlag, vOut); + } + + + public static Operand GetAluN(ArmEmitterContext context) + { + if (context.CurrOp is IOpCodeAlu op) + { + if (op.DataOp == DataOp.Logical || op is IOpCodeAluRs) + { + return GetIntOrZR(context, op.Rn); + } + else + { + return GetIntOrSP(context, op.Rn); + } + } + else if (context.CurrOp is IOpCode32Alu op32) + { + return GetIntA32(context, op32.Rn); + } + else + { + throw InvalidOpCodeType(context.CurrOp); + } + } + + public static Operand GetAluM(ArmEmitterContext context, bool setCarry = true) + { + switch (context.CurrOp) + { + // ARM32. + case OpCode32AluImm op: + { + if (op.SetFlags && op.IsRotated) + { + SetFlag(context, PState.CFlag, Const((uint)op.Immediate >> 31)); + } + + return Const(op.Immediate); + } + + case OpCode32AluRsImm op: return GetMShiftedByImmediate(context, op, setCarry); + + case OpCodeT16AluImm8 op: return Const(op.Immediate); + + // ARM64. + case IOpCodeAluImm op: + { + if (op.GetOperandType() == OperandType.I32) + { + return Const((int)op.Immediate); + } + else + { + return Const(op.Immediate); + } + } + + case IOpCodeAluRs op: + { + Operand value = GetIntOrZR(context, op.Rm); + + switch (op.ShiftType) + { + case ShiftType.Lsl: value = context.ShiftLeft (value, Const(op.Shift)); break; + case ShiftType.Lsr: value = context.ShiftRightUI(value, Const(op.Shift)); break; + case ShiftType.Asr: value = context.ShiftRightSI(value, Const(op.Shift)); break; + case ShiftType.Ror: value = context.RotateRight (value, Const(op.Shift)); break; + } + + return value; + } + + case IOpCodeAluRx op: + { + Operand value = GetExtendedM(context, op.Rm, op.IntType); + + value = context.ShiftLeft(value, Const(op.Shift)); + + return value; + } + + default: throw InvalidOpCodeType(context.CurrOp); + } + } + + private static Exception InvalidOpCodeType(OpCode opCode) + { + return new InvalidOperationException($"Invalid OpCode type \"{opCode?.GetType().Name ?? "null"}\"."); + } + + // ARM32 helpers. + private static Operand GetMShiftedByImmediate(ArmEmitterContext context, OpCode32AluRsImm op, bool setCarry) + { + Operand m = GetIntA32(context, op.Rm); + + int shift = op.Imm; + + if (shift == 0) + { + switch (op.ShiftType) + { + case ShiftType.Lsr: shift = 32; break; + case ShiftType.Asr: shift = 32; break; + case ShiftType.Ror: shift = 1; break; + } + } + + if (shift != 0) + { + setCarry &= op.SetFlags; + + switch (op.ShiftType) + { + case ShiftType.Lsl: m = GetLslC(context, m, setCarry, shift); break; + case ShiftType.Lsr: m = GetLsrC(context, m, setCarry, shift); break; + case ShiftType.Asr: m = GetAsrC(context, m, setCarry, shift); break; + case ShiftType.Ror: + if (op.Imm != 0) + { + m = GetRorC(context, m, setCarry, shift); + } + else + { + m = GetRrxC(context, m, setCarry); + } + break; + } + } + + return m; + } + + private static Operand GetLslC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + if ((uint)shift > 32) + { + return GetShiftByMoreThan32(context, setCarry); + } + else if (shift == 32) + { + if (setCarry) + { + SetCarryMLsb(context, m); + } + + return Const(0); + } + else + { + if (setCarry) + { + Operand cOut = context.ShiftRightUI(m, Const(32 - shift)); + + cOut = context.BitwiseAnd(cOut, Const(1)); + + SetFlag(context, PState.CFlag, cOut); + } + + return context.ShiftLeft(m, Const(shift)); + } + } + + private static Operand GetLsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + if ((uint)shift > 32) + { + return GetShiftByMoreThan32(context, setCarry); + } + else if (shift == 32) + { + if (setCarry) + { + SetCarryMMsb(context, m); + } + + return Const(0); + } + else + { + if (setCarry) + { + SetCarryMShrOut(context, m, shift); + } + + return context.ShiftRightUI(m, Const(shift)); + } + } + + private static Operand GetShiftByMoreThan32(ArmEmitterContext context, bool setCarry) + { + if (setCarry) + { + SetFlag(context, PState.CFlag, Const(0));; + } + + return Const(0); + } + + private static Operand GetAsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + if ((uint)shift >= 32) + { + m = context.ShiftRightSI(m, Const(31)); + + if (setCarry) + { + SetCarryMLsb(context, m); + } + + return m; + } + else + { + if (setCarry) + { + SetCarryMShrOut(context, m, shift); + } + + return context.ShiftRightSI(m, Const(shift)); + } + } + + private static Operand GetRorC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + shift &= 0x1f; + + m = context.RotateRight(m, Const(shift)); + + if (setCarry) + { + SetCarryMMsb(context, m); + } + + return m; + } + + private static Operand GetRrxC(ArmEmitterContext context, Operand m, bool setCarry) + { + // Rotate right by 1 with carry. + Operand cIn = context.Copy(GetFlag(PState.CFlag)); + + if (setCarry) + { + SetCarryMLsb(context, m); + } + + m = context.ShiftRightUI(m, Const(1)); + + m = context.BitwiseOr(m, context.ShiftLeft(cIn, Const(31))); + + return m; + } + + private static void SetCarryMLsb(ArmEmitterContext context, Operand m) + { + SetFlag(context, PState.CFlag, context.BitwiseAnd(m, Const(1))); + } + + private static void SetCarryMMsb(ArmEmitterContext context, Operand m) + { + SetFlag(context, PState.CFlag, context.ShiftRightUI(m, Const(31))); + } + + private static void SetCarryMShrOut(ArmEmitterContext context, Operand m, int shift) + { + Operand cOut = context.ShiftRightUI(m, Const(shift - 1)); + + cOut = context.BitwiseAnd(cOut, Const(1)); + + SetFlag(context, PState.CFlag, cOut); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitBfm.cs b/ARMeilleure/Instructions/InstEmitBfm.cs new file mode 100644 index 00000000..8fdbf6cf --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitBfm.cs @@ -0,0 +1,196 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Bfm(ArmEmitterContext context) + { + OpCodeBfm op = (OpCodeBfm)context.CurrOp; + + Operand d = GetIntOrZR(context, op.Rd); + Operand n = GetIntOrZR(context, op.Rn); + + Operand res; + + if (op.Pos < op.Shift) + { + // BFI. + int shift = op.GetBitsCount() - op.Shift; + + int width = op.Pos + 1; + + long mask = (long)(ulong.MaxValue >> (64 - width)); + + res = context.ShiftLeft(context.BitwiseAnd(n, Const(n.Type, mask)), Const(shift)); + + res = context.BitwiseOr(res, context.BitwiseAnd(d, Const(d.Type, ~(mask << shift)))); + } + else + { + // BFXIL. + int shift = op.Shift; + + int width = op.Pos - shift + 1; + + long mask = (long)(ulong.MaxValue >> (64 - width)); + + res = context.BitwiseAnd(context.ShiftRightUI(n, Const(shift)), Const(n.Type, mask)); + + res = context.BitwiseOr(res, context.BitwiseAnd(d, Const(d.Type, ~mask))); + } + + SetIntOrZR(context, op.Rd, res); + } + + public static void Sbfm(ArmEmitterContext context) + { + OpCodeBfm op = (OpCodeBfm)context.CurrOp; + + int bitsCount = op.GetBitsCount(); + + if (op.Pos + 1 == bitsCount) + { + EmitSbfmShift(context); + } + else if (op.Pos < op.Shift) + { + EmitSbfiz(context); + } + else if (op.Pos == 7 && op.Shift == 0) + { + Operand n = GetIntOrZR(context, op.Rn); + + SetIntOrZR(context, op.Rd, context.SignExtend8(n.Type, n)); + } + else if (op.Pos == 15 && op.Shift == 0) + { + Operand n = GetIntOrZR(context, op.Rn); + + SetIntOrZR(context, op.Rd, context.SignExtend16(n.Type, n)); + } + else if (op.Pos == 31 && op.Shift == 0) + { + Operand n = GetIntOrZR(context, op.Rn); + + SetIntOrZR(context, op.Rd, context.SignExtend32(n.Type, n)); + } + else + { + Operand res = GetIntOrZR(context, op.Rn); + + res = context.ShiftLeft (res, Const(bitsCount - 1 - op.Pos)); + res = context.ShiftRightSI(res, Const(bitsCount - 1)); + res = context.BitwiseAnd (res, Const(res.Type, ~op.TMask)); + + Operand n2 = GetBfmN(context); + + SetIntOrZR(context, op.Rd, context.BitwiseOr(res, n2)); + } + } + + public static void Ubfm(ArmEmitterContext context) + { + OpCodeBfm op = (OpCodeBfm)context.CurrOp; + + if (op.Pos + 1 == op.GetBitsCount()) + { + EmitUbfmShift(context); + } + else if (op.Pos < op.Shift) + { + EmitUbfiz(context); + } + else if (op.Pos + 1 == op.Shift) + { + EmitBfmLsl(context); + } + else if (op.Pos == 7 && op.Shift == 0) + { + Operand n = GetIntOrZR(context, op.Rn); + + SetIntOrZR(context, op.Rd, context.BitwiseAnd(n, Const(n.Type, 0xff))); + } + else if (op.Pos == 15 && op.Shift == 0) + { + Operand n = GetIntOrZR(context, op.Rn); + + SetIntOrZR(context, op.Rd, context.BitwiseAnd(n, Const(n.Type, 0xffff))); + } + else + { + SetIntOrZR(context, op.Rd, GetBfmN(context)); + } + } + + private static void EmitSbfiz(ArmEmitterContext context) => EmitBfiz(context, signed: true); + private static void EmitUbfiz(ArmEmitterContext context) => EmitBfiz(context, signed: false); + + private static void EmitBfiz(ArmEmitterContext context, bool signed) + { + OpCodeBfm op = (OpCodeBfm)context.CurrOp; + + int width = op.Pos + 1; + + Operand res = GetIntOrZR(context, op.Rn); + + res = context.ShiftLeft(res, Const(op.GetBitsCount() - width)); + + res = signed + ? context.ShiftRightSI(res, Const(op.Shift - width)) + : context.ShiftRightUI(res, Const(op.Shift - width)); + + SetIntOrZR(context, op.Rd, res); + } + + private static void EmitSbfmShift(ArmEmitterContext context) + { + EmitBfmShift(context, signed: true); + } + + private static void EmitUbfmShift(ArmEmitterContext context) + { + EmitBfmShift(context, signed: false); + } + + private static void EmitBfmShift(ArmEmitterContext context, bool signed) + { + OpCodeBfm op = (OpCodeBfm)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rn); + + res = signed + ? context.ShiftRightSI(res, Const(op.Shift)) + : context.ShiftRightUI(res, Const(op.Shift)); + + SetIntOrZR(context, op.Rd, res); + } + + private static void EmitBfmLsl(ArmEmitterContext context) + { + OpCodeBfm op = (OpCodeBfm)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rn); + + int shift = op.GetBitsCount() - op.Shift; + + SetIntOrZR(context, op.Rd, context.ShiftLeft(res, Const(shift))); + } + + private static Operand GetBfmN(ArmEmitterContext context) + { + OpCodeBfm op = (OpCodeBfm)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rn); + + long mask = op.WMask & op.TMask; + + return context.BitwiseAnd(context.RotateRight(res, Const(op.Shift)), Const(res.Type, mask)); + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitCcmp.cs b/ARMeilleure/Instructions/InstEmitCcmp.cs new file mode 100644 index 00000000..b1b0a2a1 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitCcmp.cs @@ -0,0 +1,61 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitAluHelper; +using static ARMeilleure.Instructions.InstEmitFlowHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Ccmn(ArmEmitterContext context) => EmitCcmp(context, isNegated: true); + public static void Ccmp(ArmEmitterContext context) => EmitCcmp(context, isNegated: false); + + private static void EmitCcmp(ArmEmitterContext context, bool isNegated) + { + OpCodeCcmp op = (OpCodeCcmp)context.CurrOp; + + Operand lblTrue = Label(); + Operand lblEnd = Label(); + + EmitCondBranch(context, lblTrue, op.Cond); + + SetFlag(context, PState.VFlag, Const((op.Nzcv >> 0) & 1)); + SetFlag(context, PState.CFlag, Const((op.Nzcv >> 1) & 1)); + SetFlag(context, PState.ZFlag, Const((op.Nzcv >> 2) & 1)); + SetFlag(context, PState.NFlag, Const((op.Nzcv >> 3) & 1)); + + context.Branch(lblEnd); + + context.MarkLabel(lblTrue); + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + if (isNegated) + { + Operand d = context.Add(n, m); + + EmitNZFlagsCheck(context, d); + + EmitAddsCCheck(context, n, d); + EmitAddsVCheck(context, n, m, d); + } + else + { + Operand d = context.Subtract(n, m); + + EmitNZFlagsCheck(context, d); + + EmitSubsCCheck(context, n, m); + EmitSubsVCheck(context, n, m, d); + } + + context.MarkLabel(lblEnd); + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitCsel.cs b/ARMeilleure/Instructions/InstEmitCsel.cs new file mode 100644 index 00000000..60baf0bc --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitCsel.cs @@ -0,0 +1,53 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitFlowHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + private enum CselOperation + { + None, + Increment, + Invert, + Negate + } + + public static void Csel(ArmEmitterContext context) => EmitCsel(context, CselOperation.None); + public static void Csinc(ArmEmitterContext context) => EmitCsel(context, CselOperation.Increment); + public static void Csinv(ArmEmitterContext context) => EmitCsel(context, CselOperation.Invert); + public static void Csneg(ArmEmitterContext context) => EmitCsel(context, CselOperation.Negate); + + private static void EmitCsel(ArmEmitterContext context, CselOperation cselOp) + { + OpCodeCsel op = (OpCodeCsel)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + Operand m = GetIntOrZR(context, op.Rm); + + if (cselOp == CselOperation.Increment) + { + m = context.Add(m, Const(m.Type, 1)); + } + else if (cselOp == CselOperation.Invert) + { + m = context.BitwiseNot(m); + } + else if (cselOp == CselOperation.Negate) + { + m = context.Negate(m); + } + + Operand condTrue = GetCondTrue(context, op.Cond); + + Operand d = context.ConditionalSelect(condTrue, n, m); + + SetIntOrZR(context, op.Rd, d); + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitDiv.cs b/ARMeilleure/Instructions/InstEmitDiv.cs new file mode 100644 index 00000000..0c21dd1b --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitDiv.cs @@ -0,0 +1,67 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Sdiv(ArmEmitterContext context) => EmitDiv(context, unsigned: false); + public static void Udiv(ArmEmitterContext context) => EmitDiv(context, unsigned: true); + + private static void EmitDiv(ArmEmitterContext context, bool unsigned) + { + OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp; + + // If Rm == 0, Rd = 0 (division by zero). + Operand n = GetIntOrZR(context, op.Rn); + Operand m = GetIntOrZR(context, op.Rm); + + Operand divisorIsZero = context.ICompareEqual(m, Const(m.Type, 0)); + + Operand lblBadDiv = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBadDiv, divisorIsZero); + + if (!unsigned) + { + // If Rn == INT_MIN && Rm == -1, Rd = INT_MIN (overflow). + bool is32Bits = op.RegisterSize == RegisterSize.Int32; + + Operand intMin = is32Bits ? Const(int.MinValue) : Const(long.MinValue); + Operand minus1 = is32Bits ? Const(-1) : Const(-1L); + + Operand nIsIntMin = context.ICompareEqual(n, intMin); + Operand mIsMinus1 = context.ICompareEqual(m, minus1); + + Operand lblGoodDiv = Label(); + + context.BranchIfFalse(lblGoodDiv, context.BitwiseAnd(nIsIntMin, mIsMinus1)); + + SetAluDOrZR(context, intMin); + + context.Branch(lblEnd); + + context.MarkLabel(lblGoodDiv); + } + + Operand d = unsigned + ? context.DivideUI(n, m) + : context.Divide (n, m); + + SetAluDOrZR(context, d); + + context.Branch(lblEnd); + + context.MarkLabel(lblBadDiv); + + SetAluDOrZR(context, Const(op.GetOperandType(), 0)); + + context.MarkLabel(lblEnd); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitException.cs b/ARMeilleure/Instructions/InstEmitException.cs new file mode 100644 index 00000000..6f7b6fd5 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitException.cs @@ -0,0 +1,55 @@ +using ARMeilleure.Decoders; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Brk(ArmEmitterContext context) + { + EmitExceptionCall(context, NativeInterface.Break); + } + + public static void Svc(ArmEmitterContext context) + { + EmitExceptionCall(context, NativeInterface.SupervisorCall); + } + + private static void EmitExceptionCall(ArmEmitterContext context, _Void_U64_S32 func) + { + OpCodeException op = (OpCodeException)context.CurrOp; + + context.StoreToContext(); + + context.Call(func, Const(op.Address), Const(op.Id)); + + context.LoadFromContext(); + + if (context.CurrBlock.Next == null) + { + context.Return(Const(op.Address + 4)); + } + } + + public static void Und(ArmEmitterContext context) + { + OpCode op = context.CurrOp; + + Delegate dlg = new _Void_U64_S32(NativeInterface.Undefined); + + context.StoreToContext(); + + context.Call(dlg, Const(op.Address), Const(op.RawOpCode)); + + context.LoadFromContext(); + + if (context.CurrBlock.Next == null) + { + context.Return(Const(op.Address + 4)); + } + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitFlow.cs b/ARMeilleure/Instructions/InstEmitFlow.cs new file mode 100644 index 00000000..93d36e1b --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitFlow.cs @@ -0,0 +1,159 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitFlowHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void B(ArmEmitterContext context) + { + OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp; + + if (context.CurrBlock.Branch != null) + { + context.Branch(context.GetLabel((ulong)op.Immediate)); + } + else + { + context.Return(Const(op.Immediate)); + } + } + + public static void B_Cond(ArmEmitterContext context) + { + OpCodeBImmCond op = (OpCodeBImmCond)context.CurrOp; + + EmitBranch(context, op.Cond); + } + + public static void Bl(ArmEmitterContext context) + { + OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp; + + context.Copy(GetIntOrZR(context, RegisterAlias.Lr), Const(op.Address + 4)); + + EmitCall(context, (ulong)op.Immediate); + } + + public static void Blr(ArmEmitterContext context) + { + OpCodeBReg op = (OpCodeBReg)context.CurrOp; + + Operand n = context.Copy(GetIntOrZR(context, op.Rn)); + + context.Copy(GetIntOrZR(context, RegisterAlias.Lr), Const(op.Address + 4)); + + EmitVirtualCall(context, n); + } + + public static void Br(ArmEmitterContext context) + { + OpCodeBReg op = (OpCodeBReg)context.CurrOp; + + EmitVirtualJump(context, GetIntOrZR(context, op.Rn)); + } + + public static void Cbnz(ArmEmitterContext context) => EmitCb(context, onNotZero: true); + public static void Cbz(ArmEmitterContext context) => EmitCb(context, onNotZero: false); + + private static void EmitCb(ArmEmitterContext context, bool onNotZero) + { + OpCodeBImmCmp op = (OpCodeBImmCmp)context.CurrOp; + + EmitBranch(context, GetIntOrZR(context, op.Rt), onNotZero); + } + + public static void Ret(ArmEmitterContext context) + { + context.Return(context.BitwiseOr(GetIntOrZR(context, RegisterAlias.Lr), Const(CallFlag))); + } + + public static void Tbnz(ArmEmitterContext context) => EmitTb(context, onNotZero: true); + public static void Tbz(ArmEmitterContext context) => EmitTb(context, onNotZero: false); + + private static void EmitTb(ArmEmitterContext context, bool onNotZero) + { + OpCodeBImmTest op = (OpCodeBImmTest)context.CurrOp; + + Operand value = context.BitwiseAnd(GetIntOrZR(context, op.Rt), Const(1L << op.Bit)); + + EmitBranch(context, value, onNotZero); + } + + private static void EmitBranch(ArmEmitterContext context, Condition cond) + { + OpCodeBImm op = (OpCodeBImm)context.CurrOp; + + if (context.CurrBlock.Branch != null) + { + EmitCondBranch(context, context.GetLabel((ulong)op.Immediate), cond); + + if (context.CurrBlock.Next == null) + { + context.Return(Const(op.Address + 4)); + } + } + else + { + Operand lblTaken = Label(); + + EmitCondBranch(context, lblTaken, cond); + + context.Return(Const(op.Address + 4)); + + context.MarkLabel(lblTaken); + + context.Return(Const(op.Immediate)); + } + } + + private static void EmitBranch(ArmEmitterContext context, Operand value, bool onNotZero) + { + OpCodeBImm op = (OpCodeBImm)context.CurrOp; + + if (context.CurrBlock.Branch != null) + { + Operand lblTarget = context.GetLabel((ulong)op.Immediate); + + if (onNotZero) + { + context.BranchIfTrue(lblTarget, value); + } + else + { + context.BranchIfFalse(lblTarget, value); + } + + if (context.CurrBlock.Next == null) + { + context.Return(Const(op.Address + 4)); + } + } + else + { + Operand lblTaken = Label(); + + if (onNotZero) + { + context.BranchIfTrue(lblTaken, value); + } + else + { + context.BranchIfFalse(lblTaken, value); + } + + context.Return(Const(op.Address + 4)); + + context.MarkLabel(lblTaken); + + context.Return(Const(op.Immediate)); + } + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitFlow32.cs b/ARMeilleure/Instructions/InstEmitFlow32.cs new file mode 100644 index 00000000..27addc78 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitFlow32.cs @@ -0,0 +1,71 @@ +using ARMeilleure.Decoders; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void B(ArmEmitterContext context) + { + IOpCode32BImm op = (IOpCode32BImm)context.CurrOp; + + if (context.CurrBlock.Branch != null) + { + context.Branch(context.GetLabel((ulong)op.Immediate)); + } + else + { + context.StoreToContext(); + + context.Return(Const(op.Immediate)); + } + } + + public static void Bl(ArmEmitterContext context) + { + Blx(context, x: false); + } + + public static void Blx(ArmEmitterContext context) + { + Blx(context, x: true); + } + + public static void Bx(ArmEmitterContext context) + { + IOpCode32BReg op = (IOpCode32BReg)context.CurrOp; + + context.StoreToContext(); + + EmitBxWritePc(context, GetIntA32(context, op.Rm)); + } + + private static void Blx(ArmEmitterContext context, bool x) + { + IOpCode32BImm op = (IOpCode32BImm)context.CurrOp; + + uint pc = op.GetPc(); + + bool isThumb = IsThumb(context.CurrOp); + + uint currentPc = isThumb + ? op.GetPc() | 1 + : op.GetPc() - 4; + + SetIntOrSP(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc)); + + // If x is true, then this is a branch with link and exchange. + // In this case we need to swap the mode between Arm <-> Thumb. + if (x) + { + SetFlag(context, PState.TFlag, Const(isThumb ? 0 : 1)); + } + + InstEmitFlowHelper.EmitCall(context, (ulong)op.Immediate); + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitFlowHelper.cs b/ARMeilleure/Instructions/InstEmitFlowHelper.cs new file mode 100644 index 00000000..a8eb21d3 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitFlowHelper.cs @@ -0,0 +1,192 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static class InstEmitFlowHelper + { + public const ulong CallFlag = 1; + + public static void EmitCondBranch(ArmEmitterContext context, Operand target, Condition cond) + { + if (cond != Condition.Al) + { + context.BranchIfTrue(target, GetCondTrue(context, cond)); + } + else + { + context.Branch(target); + } + } + + public static Operand GetCondTrue(ArmEmitterContext context, Condition condition) + { + Operand cmpResult = context.TryGetComparisonResult(condition); + + if (cmpResult != null) + { + return cmpResult; + } + + Operand value = Const(1); + + Operand Inverse(Operand val) + { + return context.BitwiseExclusiveOr(val, Const(1)); + } + + switch (condition) + { + case Condition.Eq: + value = GetFlag(PState.ZFlag); + break; + + case Condition.Ne: + value = Inverse(GetFlag(PState.ZFlag)); + break; + + case Condition.GeUn: + value = GetFlag(PState.CFlag); + break; + + case Condition.LtUn: + value = Inverse(GetFlag(PState.CFlag)); + break; + + case Condition.Mi: + value = GetFlag(PState.NFlag); + break; + + case Condition.Pl: + value = Inverse(GetFlag(PState.NFlag)); + break; + + case Condition.Vs: + value = GetFlag(PState.VFlag); + break; + + case Condition.Vc: + value = Inverse(GetFlag(PState.VFlag)); + break; + + case Condition.GtUn: + { + Operand c = GetFlag(PState.CFlag); + Operand z = GetFlag(PState.ZFlag); + + value = context.BitwiseAnd(c, Inverse(z)); + + break; + } + + case Condition.LeUn: + { + Operand c = GetFlag(PState.CFlag); + Operand z = GetFlag(PState.ZFlag); + + value = context.BitwiseOr(Inverse(c), z); + + break; + } + + case Condition.Ge: + { + Operand n = GetFlag(PState.NFlag); + Operand v = GetFlag(PState.VFlag); + + value = context.ICompareEqual(n, v); + + break; + } + + case Condition.Lt: + { + Operand n = GetFlag(PState.NFlag); + Operand v = GetFlag(PState.VFlag); + + value = context.ICompareNotEqual(n, v); + + break; + } + + case Condition.Gt: + { + Operand n = GetFlag(PState.NFlag); + Operand z = GetFlag(PState.ZFlag); + Operand v = GetFlag(PState.VFlag); + + value = context.BitwiseAnd(Inverse(z), context.ICompareEqual(n, v)); + + break; + } + + case Condition.Le: + { + Operand n = GetFlag(PState.NFlag); + Operand z = GetFlag(PState.ZFlag); + Operand v = GetFlag(PState.VFlag); + + value = context.BitwiseOr(z, context.ICompareNotEqual(n, v)); + + break; + } + } + + return value; + } + + public static void EmitCall(ArmEmitterContext context, ulong immediate) + { + context.Return(Const(immediate | CallFlag)); + } + + public static void EmitVirtualCall(ArmEmitterContext context, Operand target) + { + EmitVirtualCallOrJump(context, target, isJump: false); + } + + public static void EmitVirtualJump(ArmEmitterContext context, Operand target) + { + EmitVirtualCallOrJump(context, target, isJump: true); + } + + private static void EmitVirtualCallOrJump(ArmEmitterContext context, Operand target, bool isJump) + { + context.Return(context.BitwiseOr(target, Const(target.Type, (long)CallFlag))); + } + + private static void EmitContinueOrReturnCheck(ArmEmitterContext context, Operand retVal) + { + // Note: The return value of the called method will be placed + // at the Stack, the return value is always a Int64 with the + // return address of the function. We check if the address is + // correct, if it isn't we keep returning until we reach the dispatcher. + ulong nextAddr = GetNextOpAddress(context.CurrOp); + + if (context.CurrBlock.Next != null) + { + Operand lblContinue = Label(); + + context.BranchIfTrue(lblContinue, context.ICompareEqual(retVal, Const(nextAddr))); + + context.Return(Const(nextAddr)); + + context.MarkLabel(lblContinue); + } + else + { + context.Return(Const(nextAddr)); + } + } + + private static ulong GetNextOpAddress(OpCode op) + { + return op.Address + (ulong)op.OpCodeSizeInBytes; + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitHash.cs b/ARMeilleure/Instructions/InstEmitHash.cs new file mode 100644 index 00000000..0be8458e --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitHash.cs @@ -0,0 +1,64 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Crc32b(ArmEmitterContext context) + { + EmitCrc32Call(context, new _U32_U32_U8(SoftFallback.Crc32b)); + } + + public static void Crc32h(ArmEmitterContext context) + { + EmitCrc32Call(context, new _U32_U32_U16(SoftFallback.Crc32h)); + } + + public static void Crc32w(ArmEmitterContext context) + { + EmitCrc32Call(context, new _U32_U32_U32(SoftFallback.Crc32w)); + } + + public static void Crc32x(ArmEmitterContext context) + { + EmitCrc32Call(context, new _U32_U32_U64(SoftFallback.Crc32x)); + } + + public static void Crc32cb(ArmEmitterContext context) + { + EmitCrc32Call(context, new _U32_U32_U8(SoftFallback.Crc32cb)); + } + + public static void Crc32ch(ArmEmitterContext context) + { + EmitCrc32Call(context, new _U32_U32_U16(SoftFallback.Crc32ch)); + } + + public static void Crc32cw(ArmEmitterContext context) + { + EmitCrc32Call(context, new _U32_U32_U32(SoftFallback.Crc32cw)); + } + + public static void Crc32cx(ArmEmitterContext context) + { + EmitCrc32Call(context, new _U32_U32_U64(SoftFallback.Crc32cx)); + } + + private static void EmitCrc32Call(ArmEmitterContext context, Delegate dlg) + { + OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + Operand m = GetIntOrZR(context, op.Rm); + + Operand d = context.Call(dlg, n, m); + + SetIntOrZR(context, op.Rd, d); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitHelper.cs b/ARMeilleure/Instructions/InstEmitHelper.cs new file mode 100644 index 00000000..02e104a4 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitHelper.cs @@ -0,0 +1,218 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static class InstEmitHelper + { + public static bool IsThumb(OpCode op) + { + return op is OpCodeT16; + } + + public static Operand GetExtendedM(ArmEmitterContext context, int rm, IntType type) + { + Operand value = GetIntOrZR(context, rm); + + switch (type) + { + case IntType.UInt8: value = context.ZeroExtend8 (value.Type, value); break; + case IntType.UInt16: value = context.ZeroExtend16(value.Type, value); break; + case IntType.UInt32: value = context.ZeroExtend32(value.Type, value); break; + + case IntType.Int8: value = context.SignExtend8 (value.Type, value); break; + case IntType.Int16: value = context.SignExtend16(value.Type, value); break; + case IntType.Int32: value = context.SignExtend32(value.Type, value); break; + } + + return value; + } + + public static Operand GetIntA32(ArmEmitterContext context, int regIndex) + { + if (regIndex == RegisterAlias.Aarch32Pc) + { + OpCode32 op = (OpCode32)context.CurrOp; + + return Const((int)op.GetPc()); + } + else + { + return GetIntOrSP(context, GetRegisterAlias(context.Mode, regIndex)); + } + } + + public static void SetIntA32(ArmEmitterContext context, int regIndex, Operand value) + { + if (regIndex == RegisterAlias.Aarch32Pc) + { + context.StoreToContext(); + + EmitBxWritePc(context, value); + } + else + { + SetIntOrSP(context, GetRegisterAlias(context.Mode, regIndex), value); + } + } + + public static int GetRegisterAlias(Aarch32Mode mode, int regIndex) + { + // Only registers >= 8 are banked, + // with registers in the range [8, 12] being + // banked for the FIQ mode, and registers + // 13 and 14 being banked for all modes. + if ((uint)regIndex < 8) + { + return regIndex; + } + + return GetBankedRegisterAlias(mode, regIndex); + } + + public static int GetBankedRegisterAlias(Aarch32Mode mode, int regIndex) + { + switch (regIndex) + { + case 8: return mode == Aarch32Mode.Fiq + ? RegisterAlias.R8Fiq + : RegisterAlias.R8Usr; + + case 9: return mode == Aarch32Mode.Fiq + ? RegisterAlias.R9Fiq + : RegisterAlias.R9Usr; + + case 10: return mode == Aarch32Mode.Fiq + ? RegisterAlias.R10Fiq + : RegisterAlias.R10Usr; + + case 11: return mode == Aarch32Mode.Fiq + ? RegisterAlias.R11Fiq + : RegisterAlias.R11Usr; + + case 12: return mode == Aarch32Mode.Fiq + ? RegisterAlias.R12Fiq + : RegisterAlias.R12Usr; + + case 13: + switch (mode) + { + case Aarch32Mode.User: + case Aarch32Mode.System: return RegisterAlias.SpUsr; + case Aarch32Mode.Fiq: return RegisterAlias.SpFiq; + case Aarch32Mode.Irq: return RegisterAlias.SpIrq; + case Aarch32Mode.Supervisor: return RegisterAlias.SpSvc; + case Aarch32Mode.Abort: return RegisterAlias.SpAbt; + case Aarch32Mode.Hypervisor: return RegisterAlias.SpHyp; + case Aarch32Mode.Undefined: return RegisterAlias.SpUnd; + + default: throw new ArgumentException(nameof(mode)); + } + + case 14: + switch (mode) + { + case Aarch32Mode.User: + case Aarch32Mode.Hypervisor: + case Aarch32Mode.System: return RegisterAlias.LrUsr; + case Aarch32Mode.Fiq: return RegisterAlias.LrFiq; + case Aarch32Mode.Irq: return RegisterAlias.LrIrq; + case Aarch32Mode.Supervisor: return RegisterAlias.LrSvc; + case Aarch32Mode.Abort: return RegisterAlias.LrAbt; + case Aarch32Mode.Undefined: return RegisterAlias.LrUnd; + + default: throw new ArgumentException(nameof(mode)); + } + + default: throw new ArgumentOutOfRangeException(nameof(regIndex)); + } + } + + public static void EmitBxWritePc(ArmEmitterContext context, Operand pc) + { + Operand mode = context.BitwiseAnd(pc, Const(1)); + + SetFlag(context, PState.TFlag, mode); + + Operand lblArmMode = Label(); + + context.BranchIfTrue(lblArmMode, mode); + + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(pc, Const(~1)))); + + context.MarkLabel(lblArmMode); + + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(pc, Const(~3)))); + } + + public static Operand GetIntOrZR(ArmEmitterContext context, int regIndex) + { + if (regIndex == RegisterConsts.ZeroIndex) + { + OperandType type = context.CurrOp.GetOperandType(); + + return type == OperandType.I32 ? Const(0) : Const(0L); + } + else + { + return GetIntOrSP(context, regIndex); + } + } + + public static void SetIntOrZR(ArmEmitterContext context, int regIndex, Operand value) + { + if (regIndex == RegisterConsts.ZeroIndex) + { + return; + } + + SetIntOrSP(context, regIndex, value); + } + + public static Operand GetIntOrSP(ArmEmitterContext context, int regIndex) + { + Operand value = Register(regIndex, RegisterType.Integer, OperandType.I64); + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + value = context.ConvertI64ToI32(value); + } + + return value; + } + + public static void SetIntOrSP(ArmEmitterContext context, int regIndex, Operand value) + { + Operand reg = Register(regIndex, RegisterType.Integer, OperandType.I64); + + if (value.Type == OperandType.I32) + { + value = context.ZeroExtend32(OperandType.I64, value); + } + + context.Copy(reg, value); + } + + public static Operand GetVec(int regIndex) + { + return Register(regIndex, RegisterType.Vector, OperandType.V128); + } + + public static Operand GetFlag(PState stateFlag) + { + return Register((int)stateFlag, RegisterType.Flag, OperandType.I32); + } + + public static void SetFlag(ArmEmitterContext context, PState stateFlag, Operand value) + { + context.Copy(GetFlag(stateFlag), value); + + context.MarkFlagSet(stateFlag); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitMemory.cs b/ARMeilleure/Instructions/InstEmitMemory.cs new file mode 100644 index 00000000..1d5953fb --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitMemory.cs @@ -0,0 +1,177 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Adr(ArmEmitterContext context) + { + OpCodeAdr op = (OpCodeAdr)context.CurrOp; + + SetIntOrZR(context, op.Rd, Const(op.Address + (ulong)op.Immediate)); + } + + public static void Adrp(ArmEmitterContext context) + { + OpCodeAdr op = (OpCodeAdr)context.CurrOp; + + ulong address = (op.Address & ~0xfffUL) + ((ulong)op.Immediate << 12); + + SetIntOrZR(context, op.Rd, Const(address)); + } + + public static void Ldr(ArmEmitterContext context) => EmitLdr(context, signed: false); + public static void Ldrs(ArmEmitterContext context) => EmitLdr(context, signed: true); + + private static void EmitLdr(ArmEmitterContext context, bool signed) + { + OpCodeMem op = (OpCodeMem)context.CurrOp; + + Operand address = GetAddress(context); + + if (signed && op.Extend64) + { + EmitLoadSx64(context, address, op.Rt, op.Size); + } + else if (signed) + { + EmitLoadSx32(context, address, op.Rt, op.Size); + } + else + { + EmitLoadZx(context, address, op.Rt, op.Size); + } + + EmitWBackIfNeeded(context, address); + } + + public static void Ldr_Literal(ArmEmitterContext context) + { + IOpCodeLit op = (IOpCodeLit)context.CurrOp; + + if (op.Prefetch) + { + return; + } + + if (op.Signed) + { + EmitLoadSx64(context, Const(op.Immediate), op.Rt, op.Size); + } + else + { + EmitLoadZx(context, Const(op.Immediate), op.Rt, op.Size); + } + } + + public static void Ldp(ArmEmitterContext context) + { + OpCodeMemPair op = (OpCodeMemPair)context.CurrOp; + + void EmitLoad(int rt, Operand ldAddr) + { + if (op.Extend64) + { + EmitLoadSx64(context, ldAddr, rt, op.Size); + } + else + { + EmitLoadZx(context, ldAddr, rt, op.Size); + } + } + + Operand address = GetAddress(context); + + Operand address2 = context.Add(address, Const(1L << op.Size)); + + EmitLoad(op.Rt, address); + EmitLoad(op.Rt2, address2); + + EmitWBackIfNeeded(context, address); + } + + public static void Str(ArmEmitterContext context) + { + OpCodeMem op = (OpCodeMem)context.CurrOp; + + Operand address = GetAddress(context); + + InstEmitMemoryHelper.EmitStore(context, address, op.Rt, op.Size); + + EmitWBackIfNeeded(context, address); + } + + public static void Stp(ArmEmitterContext context) + { + OpCodeMemPair op = (OpCodeMemPair)context.CurrOp; + + Operand address = GetAddress(context); + + Operand address2 = context.Add(address, Const(1L << op.Size)); + + InstEmitMemoryHelper.EmitStore(context, address, op.Rt, op.Size); + InstEmitMemoryHelper.EmitStore(context, address2, op.Rt2, op.Size); + + EmitWBackIfNeeded(context, address); + } + + private static Operand GetAddress(ArmEmitterContext context) + { + Operand address = null; + + switch (context.CurrOp) + { + case OpCodeMemImm op: + { + address = context.Copy(GetIntOrSP(context, op.Rn)); + + // Pre-indexing. + if (!op.PostIdx) + { + address = context.Add(address, Const(op.Immediate)); + } + + break; + } + + case OpCodeMemReg op: + { + Operand n = GetIntOrSP(context, op.Rn); + + Operand m = GetExtendedM(context, op.Rm, op.IntType); + + if (op.Shift) + { + m = context.ShiftLeft(m, Const(op.Size)); + } + + address = context.Add(n, m); + + break; + } + } + + return address; + } + + private static void EmitWBackIfNeeded(ArmEmitterContext context, Operand address) + { + // Check whenever the current OpCode has post-indexed write back, if so write it. + if (context.CurrOp is OpCodeMemImm op && op.WBack) + { + if (op.PostIdx) + { + address = context.Add(address, Const(op.Immediate)); + } + + SetIntOrSP(context, op.Rn, address); + } + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitMemory32.cs b/ARMeilleure/Instructions/InstEmitMemory32.cs new file mode 100644 index 00000000..002d2c5c --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitMemory32.cs @@ -0,0 +1,256 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + private const int ByteSizeLog2 = 0; + private const int HWordSizeLog2 = 1; + private const int WordSizeLog2 = 2; + private const int DWordSizeLog2 = 3; + + [Flags] + enum AccessType + { + Store = 0, + Signed = 1, + Load = 2, + + LoadZx = Load, + LoadSx = Load | Signed, + } + + public static void Ldm(ArmEmitterContext context) + { + OpCode32MemMult op = (OpCode32MemMult)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + + Operand baseAddress = context.Add(n, Const(op.Offset)); + + bool writesToPc = (op.RegisterMask & (1 << RegisterAlias.Aarch32Pc)) != 0; + + bool writeBack = op.PostOffset != 0 && (op.Rn != RegisterAlias.Aarch32Pc || !writesToPc); + + if (writeBack) + { + SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset))); + } + + int mask = op.RegisterMask; + int offset = 0; + + for (int register = 0; mask != 0; mask >>= 1, register++) + { + if ((mask & 1) != 0) + { + Operand address = context.Add(baseAddress, Const(offset)); + + EmitLoadZx(context, address, register, WordSizeLog2); + + offset += 4; + } + } + } + + public static void Ldr(ArmEmitterContext context) + { + EmitLoadOrStore(context, WordSizeLog2, AccessType.LoadZx); + } + + public static void Ldrb(ArmEmitterContext context) + { + EmitLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx); + } + + public static void Ldrd(ArmEmitterContext context) + { + EmitLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx); + } + + public static void Ldrh(ArmEmitterContext context) + { + EmitLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx); + } + + public static void Ldrsb(ArmEmitterContext context) + { + EmitLoadOrStore(context, ByteSizeLog2, AccessType.LoadSx); + } + + public static void Ldrsh(ArmEmitterContext context) + { + EmitLoadOrStore(context, HWordSizeLog2, AccessType.LoadSx); + } + + public static void Stm(ArmEmitterContext context) + { + OpCode32MemMult op = (OpCode32MemMult)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + + Operand baseAddress = context.Add(n, Const(op.Offset)); + + int mask = op.RegisterMask; + int offset = 0; + + for (int register = 0; mask != 0; mask >>= 1, register++) + { + if ((mask & 1) != 0) + { + Operand address = context.Add(baseAddress, Const(offset)); + + EmitStore(context, address, register, WordSizeLog2); + + // Note: If Rn is also specified on the register list, + // and Rn is the first register on this list, then the + // value that is written to memory is the unmodified value, + // before the write back. If it is on the list, but it's + // not the first one, then the value written to memory + // varies between CPUs. + if (offset == 0 && op.PostOffset != 0) + { + // Emit write back after the first write. + SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset))); + } + + offset += 4; + } + } + } + + public static void Str(ArmEmitterContext context) + { + EmitLoadOrStore(context, WordSizeLog2, AccessType.Store); + } + + public static void Strb(ArmEmitterContext context) + { + EmitLoadOrStore(context, ByteSizeLog2, AccessType.Store); + } + + public static void Strd(ArmEmitterContext context) + { + EmitLoadOrStore(context, DWordSizeLog2, AccessType.Store); + } + + public static void Strh(ArmEmitterContext context) + { + EmitLoadOrStore(context, HWordSizeLog2, AccessType.Store); + } + + private static void EmitLoadOrStore(ArmEmitterContext context, int size, AccessType accType) + { + OpCode32Mem op = (OpCode32Mem)context.CurrOp; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + + Operand temp = null; + + if (op.Index || op.WBack) + { + temp = op.Add + ? context.Add (n, Const(op.Immediate)) + : context.Subtract(n, Const(op.Immediate)); + } + + if (op.WBack) + { + SetIntA32(context, op.Rn, temp); + } + + Operand address; + + if (op.Index) + { + address = temp; + } + else + { + address = n; + } + + if ((accType & AccessType.Load) != 0) + { + void Load(int rt, int offs, int loadSize) + { + Operand addr = context.Add(address, Const(offs)); + + if ((accType & AccessType.Signed) != 0) + { + EmitLoadSx32(context, addr, rt, loadSize); + } + else + { + EmitLoadZx(context, addr, rt, loadSize); + } + } + + if (size == DWordSizeLog2) + { + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + Load(op.Rt, 0, WordSizeLog2); + Load(op.Rt | 1, 4, WordSizeLog2); + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + Load(op.Rt | 1, 0, WordSizeLog2); + Load(op.Rt, 4, WordSizeLog2); + + context.MarkLabel(lblEnd); + } + else + { + Load(op.Rt, 0, size); + } + } + else + { + void Store(int rt, int offs, int storeSize) + { + Operand addr = context.Add(address, Const(offs)); + + EmitStore(context, addr, rt, storeSize); + } + + if (size == DWordSizeLog2) + { + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + Store(op.Rt, 0, WordSizeLog2); + Store(op.Rt | 1, 4, WordSizeLog2); + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + Store(op.Rt | 1, 0, WordSizeLog2); + Store(op.Rt, 4, WordSizeLog2); + + context.MarkLabel(lblEnd); + } + else + { + Store(op.Rt, 0, size); + } + } + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitMemoryEx.cs b/ARMeilleure/Instructions/InstEmitMemoryEx.cs new file mode 100644 index 00000000..bcca7619 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitMemoryEx.cs @@ -0,0 +1,261 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + [Flags] + private enum AccessType + { + None = 0, + Ordered = 1, + Exclusive = 2, + OrderedEx = Ordered | Exclusive + } + + public static void Clrex(ArmEmitterContext context) + { + context.Call(new _Void(NativeInterface.ClearExclusive)); + } + + public static void Dmb(ArmEmitterContext context) => EmitBarrier(context); + public static void Dsb(ArmEmitterContext context) => EmitBarrier(context); + + public static void Ldar(ArmEmitterContext context) => EmitLdr(context, AccessType.Ordered); + public static void Ldaxr(ArmEmitterContext context) => EmitLdr(context, AccessType.OrderedEx); + public static void Ldxr(ArmEmitterContext context) => EmitLdr(context, AccessType.Exclusive); + public static void Ldxp(ArmEmitterContext context) => EmitLdp(context, AccessType.Exclusive); + public static void Ldaxp(ArmEmitterContext context) => EmitLdp(context, AccessType.OrderedEx); + + private static void EmitLdr(ArmEmitterContext context, AccessType accType) + { + EmitLoadEx(context, accType, pair: false); + } + + private static void EmitLdp(ArmEmitterContext context, AccessType accType) + { + EmitLoadEx(context, accType, pair: true); + } + + private static void EmitLoadEx(ArmEmitterContext context, AccessType accType, bool pair) + { + OpCodeMemEx op = (OpCodeMemEx)context.CurrOp; + + bool ordered = (accType & AccessType.Ordered) != 0; + bool exclusive = (accType & AccessType.Exclusive) != 0; + + if (ordered) + { + EmitBarrier(context); + } + + Operand address = context.Copy(GetIntOrSP(context, op.Rn)); + + if (pair) + { + // Exclusive loads should be atomic. For pairwise loads, we need to + // read all the data at once. For a 32-bits pairwise load, we do a + // simple 64-bits load, for a 128-bits load, we need to call a special + // method to read 128-bits atomically. + if (op.Size == 2) + { + Operand value = EmitLoad(context, address, exclusive, 3); + + Operand valueLow = context.ConvertI64ToI32(value); + + valueLow = context.ZeroExtend32(OperandType.I64, valueLow); + + Operand valueHigh = context.ShiftRightUI(value, Const(32)); + + SetIntOrZR(context, op.Rt, valueLow); + SetIntOrZR(context, op.Rt2, valueHigh); + } + else if (op.Size == 3) + { + Operand value = EmitLoad(context, address, exclusive, 4); + + Operand valueLow = context.VectorExtract(OperandType.I64, value, 0); + Operand valueHigh = context.VectorExtract(OperandType.I64, value, 1); + + SetIntOrZR(context, op.Rt, valueLow); + SetIntOrZR(context, op.Rt2, valueHigh); + } + else + { + throw new InvalidOperationException($"Invalid load size of {1 << op.Size} bytes."); + } + } + else + { + // 8, 16, 32 or 64-bits (non-pairwise) load. + Operand value = EmitLoad(context, address, exclusive, op.Size); + + SetIntOrZR(context, op.Rt, value); + } + } + + private static Operand EmitLoad( + ArmEmitterContext context, + Operand address, + bool exclusive, + int size) + { + Delegate fallbackMethodDlg = null; + + if (exclusive) + { + switch (size) + { + case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByteExclusive); break; + case 1: fallbackMethodDlg = new _U16_U64 (NativeInterface.ReadUInt16Exclusive); break; + case 2: fallbackMethodDlg = new _U32_U64 (NativeInterface.ReadUInt32Exclusive); break; + case 3: fallbackMethodDlg = new _U64_U64 (NativeInterface.ReadUInt64Exclusive); break; + case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128Exclusive); break; + } + } + else + { + switch (size) + { + case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByte); break; + case 1: fallbackMethodDlg = new _U16_U64 (NativeInterface.ReadUInt16); break; + case 2: fallbackMethodDlg = new _U32_U64 (NativeInterface.ReadUInt32); break; + case 3: fallbackMethodDlg = new _U64_U64 (NativeInterface.ReadUInt64); break; + case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128); break; + } + } + + return context.Call(fallbackMethodDlg, address); + } + + public static void Pfrm(ArmEmitterContext context) + { + // Memory Prefetch, execute as no-op. + } + + public static void Stlr(ArmEmitterContext context) => EmitStr(context, AccessType.Ordered); + public static void Stlxr(ArmEmitterContext context) => EmitStr(context, AccessType.OrderedEx); + public static void Stxr(ArmEmitterContext context) => EmitStr(context, AccessType.Exclusive); + public static void Stxp(ArmEmitterContext context) => EmitStp(context, AccessType.Exclusive); + public static void Stlxp(ArmEmitterContext context) => EmitStp(context, AccessType.OrderedEx); + + private static void EmitStr(ArmEmitterContext context, AccessType accType) + { + EmitStoreEx(context, accType, pair: false); + } + + private static void EmitStp(ArmEmitterContext context, AccessType accType) + { + EmitStoreEx(context, accType, pair: true); + } + + private static void EmitStoreEx(ArmEmitterContext context, AccessType accType, bool pair) + { + OpCodeMemEx op = (OpCodeMemEx)context.CurrOp; + + bool ordered = (accType & AccessType.Ordered) != 0; + bool exclusive = (accType & AccessType.Exclusive) != 0; + + if (ordered) + { + EmitBarrier(context); + } + + Operand address = context.Copy(GetIntOrSP(context, op.Rn)); + + Operand t = GetIntOrZR(context, op.Rt); + + Operand s = null; + + if (pair) + { + Debug.Assert(op.Size == 2 || op.Size == 3, "Invalid size for pairwise store."); + + Operand t2 = GetIntOrZR(context, op.Rt2); + + Operand value; + + if (op.Size == 2) + { + value = context.BitwiseOr(t, context.ShiftLeft(t2, Const(32))); + } + else /* if (op.Size == 3) */ + { + value = context.VectorInsert(context.VectorZero(), t, 0); + value = context.VectorInsert(value, t2, 1); + } + + s = EmitStore(context, address, value, exclusive, op.Size + 1); + } + else + { + s = EmitStore(context, address, t, exclusive, op.Size); + } + + if (s != null) + { + // This is only needed for exclusive stores. The function returns 0 + // when the store is successful, and 1 otherwise. + SetIntOrZR(context, op.Rs, s); + } + } + + private static Operand EmitStore( + ArmEmitterContext context, + Operand address, + Operand value, + bool exclusive, + int size) + { + if (size < 3) + { + value = context.ConvertI64ToI32(value); + } + + Delegate fallbackMethodDlg = null; + + if (exclusive) + { + switch (size) + { + case 0: fallbackMethodDlg = new _S32_U64_U8 (NativeInterface.WriteByteExclusive); break; + case 1: fallbackMethodDlg = new _S32_U64_U16 (NativeInterface.WriteUInt16Exclusive); break; + case 2: fallbackMethodDlg = new _S32_U64_U32 (NativeInterface.WriteUInt32Exclusive); break; + case 3: fallbackMethodDlg = new _S32_U64_U64 (NativeInterface.WriteUInt64Exclusive); break; + case 4: fallbackMethodDlg = new _S32_U64_V128(NativeInterface.WriteVector128Exclusive); break; + } + + return context.Call(fallbackMethodDlg, address, value); + } + else + { + switch (size) + { + case 0: fallbackMethodDlg = new _Void_U64_U8 (NativeInterface.WriteByte); break; + case 1: fallbackMethodDlg = new _Void_U64_U16 (NativeInterface.WriteUInt16); break; + case 2: fallbackMethodDlg = new _Void_U64_U32 (NativeInterface.WriteUInt32); break; + case 3: fallbackMethodDlg = new _Void_U64_U64 (NativeInterface.WriteUInt64); break; + case 4: fallbackMethodDlg = new _Void_U64_V128(NativeInterface.WriteVector128); break; + } + + context.Call(fallbackMethodDlg, address, value); + + return null; + } + } + + private static void EmitBarrier(ArmEmitterContext context) + { + // Note: This barrier is most likely not necessary, and probably + // doesn't make any difference since we need to do a ton of stuff + // (software MMU emulation) to read or write anything anyway. + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs new file mode 100644 index 00000000..0ae5e3f2 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs @@ -0,0 +1,512 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Memory; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static class InstEmitMemoryHelper + { + private enum Extension + { + Zx, + Sx32, + Sx64 + } + + public static void EmitLoadZx(ArmEmitterContext context, Operand address, int rt, int size) + { + EmitLoad(context, address, Extension.Zx, rt, size); + } + + public static void EmitLoadSx32(ArmEmitterContext context, Operand address, int rt, int size) + { + EmitLoad(context, address, Extension.Sx32, rt, size); + } + + public static void EmitLoadSx64(ArmEmitterContext context, Operand address, int rt, int size) + { + EmitLoad(context, address, Extension.Sx64, rt, size); + } + + private static void EmitLoad(ArmEmitterContext context, Operand address, Extension ext, int rt, int size) + { + bool isSimd = IsSimd(context); + + if ((uint)size > (isSimd ? 4 : 3)) + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + if (isSimd) + { + EmitReadVector(context, address, context.VectorZero(), rt, 0, size); + } + else + { + EmitReadInt(context, address, rt, size); + } + + if (!isSimd) + { + Operand value = GetIntOrZR(context, rt); + + if (ext == Extension.Sx32 || ext == Extension.Sx64) + { + OperandType destType = ext == Extension.Sx64 ? OperandType.I64 : OperandType.I32; + + switch (size) + { + case 0: value = context.SignExtend8 (destType, value); break; + case 1: value = context.SignExtend16(destType, value); break; + case 2: value = context.SignExtend32(destType, value); break; + } + } + + SetIntOrZR(context, rt, value); + } + } + + public static void EmitLoadSimd( + ArmEmitterContext context, + Operand address, + Operand vector, + int rt, + int elem, + int size) + { + EmitReadVector(context, address, vector, rt, elem, size); + } + + public static void EmitStore(ArmEmitterContext context, Operand address, int rt, int size) + { + bool isSimd = IsSimd(context); + + if ((uint)size > (isSimd ? 4 : 3)) + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + if (isSimd) + { + EmitWriteVector(context, address, rt, 0, size); + } + else + { + EmitWriteInt(context, address, rt, size); + } + } + + public static void EmitStoreSimd( + ArmEmitterContext context, + Operand address, + int rt, + int elem, + int size) + { + EmitWriteVector(context, address, rt, elem, size); + } + + private static bool IsSimd(ArmEmitterContext context) + { + return context.CurrOp is IOpCodeSimd && + !(context.CurrOp is OpCodeSimdMemMs || + context.CurrOp is OpCodeSimdMemSs); + } + + private static void EmitReadInt(ArmEmitterContext context, Operand address, int rt, int size) + { + Operand isUnalignedAddr = EmitAddressCheck(context, address, size); + + Operand lblFastPath = Label(); + Operand lblSlowPath = Label(); + Operand lblEnd = Label(); + + context.BranchIfFalse(lblFastPath, isUnalignedAddr); + + context.MarkLabel(lblSlowPath); + + EmitReadIntFallback(context, address, rt, size); + + context.Branch(lblEnd); + + context.MarkLabel(lblFastPath); + + Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath); + + Operand value = null; + + switch (size) + { + case 0: + value = context.Load8(physAddr); + break; + + case 1: + value = context.Load16(physAddr); + break; + + case 2: + value = context.Load(OperandType.I32, physAddr); + break; + + case 3: + value = context.Load(OperandType.I64, physAddr); + break; + } + + SetInt(context, rt, value); + + context.MarkLabel(lblEnd); + } + + private static void EmitReadVector( + ArmEmitterContext context, + Operand address, + Operand vector, + int rt, + int elem, + int size) + { + Operand isUnalignedAddr = EmitAddressCheck(context, address, size); + + Operand lblFastPath = Label(); + Operand lblSlowPath = Label(); + Operand lblEnd = Label(); + + context.BranchIfFalse(lblFastPath, isUnalignedAddr); + + context.MarkLabel(lblSlowPath); + + EmitReadVectorFallback(context, address, vector, rt, elem, size); + + context.Branch(lblEnd); + + context.MarkLabel(lblFastPath); + + Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath); + + Operand value = null; + + switch (size) + { + case 0: + value = context.VectorInsert8(vector, context.Load8(physAddr), elem); + break; + + case 1: + value = context.VectorInsert16(vector, context.Load16(physAddr), elem); + break; + + case 2: + value = context.VectorInsert(vector, context.Load(OperandType.I32, physAddr), elem); + break; + + case 3: + value = context.VectorInsert(vector, context.Load(OperandType.I64, physAddr), elem); + break; + + case 4: + value = context.Load(OperandType.V128, physAddr); + break; + } + + context.Copy(GetVec(rt), value); + + context.MarkLabel(lblEnd); + } + + private static Operand VectorCreate(ArmEmitterContext context, Operand value) + { + return context.VectorInsert(context.VectorZero(), value, 0); + } + + private static void EmitWriteInt(ArmEmitterContext context, Operand address, int rt, int size) + { + Operand isUnalignedAddr = EmitAddressCheck(context, address, size); + + Operand lblFastPath = Label(); + Operand lblSlowPath = Label(); + Operand lblEnd = Label(); + + context.BranchIfFalse(lblFastPath, isUnalignedAddr); + + context.MarkLabel(lblSlowPath); + + EmitWriteIntFallback(context, address, rt, size); + + context.Branch(lblEnd); + + context.MarkLabel(lblFastPath); + + Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath); + + Operand value = GetInt(context, rt); + + if (size < 3 && value.Type == OperandType.I64) + { + value = context.ConvertI64ToI32(value); + } + + switch (size) + { + case 0: context.Store8 (physAddr, value); break; + case 1: context.Store16(physAddr, value); break; + case 2: context.Store (physAddr, value); break; + case 3: context.Store (physAddr, value); break; + } + + context.MarkLabel(lblEnd); + } + + private static void EmitWriteVector( + ArmEmitterContext context, + Operand address, + int rt, + int elem, + int size) + { + Operand isUnalignedAddr = EmitAddressCheck(context, address, size); + + Operand lblFastPath = Label(); + Operand lblSlowPath = Label(); + Operand lblEnd = Label(); + + context.BranchIfFalse(lblFastPath, isUnalignedAddr); + + context.MarkLabel(lblSlowPath); + + EmitWriteVectorFallback(context, address, rt, elem, size); + + context.Branch(lblEnd); + + context.MarkLabel(lblFastPath); + + Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath); + + Operand value = GetVec(rt); + + switch (size) + { + case 0: + context.Store8(physAddr, context.VectorExtract8(value, elem)); + break; + + case 1: + context.Store16(physAddr, context.VectorExtract16(value, elem)); + break; + + case 2: + context.Store(physAddr, context.VectorExtract(OperandType.FP32, value, elem)); + break; + + case 3: + context.Store(physAddr, context.VectorExtract(OperandType.FP64, value, elem)); + break; + + case 4: + context.Store(physAddr, value); + break; + } + + context.MarkLabel(lblEnd); + } + + private static Operand EmitAddressCheck(ArmEmitterContext context, Operand address, int size) + { + long addressCheckMask = ~(context.Memory.AddressSpaceSize - 1); + + addressCheckMask |= (1u << size) - 1; + + return context.BitwiseAnd(address, Const(address.Type, addressCheckMask)); + } + + private static Operand EmitPtPointerLoad(ArmEmitterContext context, Operand address, Operand lblFallbackPath) + { + Operand pte = Const(context.Memory.PageTable.ToInt64()); + + int bit = MemoryManager.PageBits; + + do + { + Operand addrPart = context.ShiftRightUI(address, Const(bit)); + + bit += context.Memory.PtLevelBits; + + if (bit < context.Memory.AddressSpaceBits) + { + addrPart = context.BitwiseAnd(addrPart, Const(addrPart.Type, context.Memory.PtLevelMask)); + } + + Operand pteOffset = context.ShiftLeft(addrPart, Const(3)); + + if (pteOffset.Type == OperandType.I32) + { + pteOffset = context.ZeroExtend32(OperandType.I64, pteOffset); + } + + Operand pteAddress = context.Add(pte, pteOffset); + + pte = context.Load(OperandType.I64, pteAddress); + } + while (bit < context.Memory.AddressSpaceBits); + + if (!context.Memory.HasWriteWatchSupport) + { + Operand hasFlagSet = context.BitwiseAnd(pte, Const((long)MemoryManager.PteFlagsMask)); + + context.BranchIfTrue(lblFallbackPath, hasFlagSet); + } + + Operand pageOffset = context.BitwiseAnd(address, Const(address.Type, MemoryManager.PageMask)); + + if (pageOffset.Type == OperandType.I32) + { + pageOffset = context.ZeroExtend32(OperandType.I64, pageOffset); + } + + Operand physAddr = context.Add(pte, pageOffset); + + return physAddr; + } + + private static void EmitReadIntFallback(ArmEmitterContext context, Operand address, int rt, int size) + { + Delegate fallbackMethodDlg = null; + + switch (size) + { + case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByte); break; + case 1: fallbackMethodDlg = new _U16_U64(NativeInterface.ReadUInt16); break; + case 2: fallbackMethodDlg = new _U32_U64(NativeInterface.ReadUInt32); break; + case 3: fallbackMethodDlg = new _U64_U64(NativeInterface.ReadUInt64); break; + } + + SetInt(context, rt, context.Call(fallbackMethodDlg, address)); + } + + private static void EmitReadVectorFallback( + ArmEmitterContext context, + Operand address, + Operand vector, + int rt, + int elem, + int size) + { + Delegate fallbackMethodDlg = null; + + switch (size) + { + case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByte); break; + case 1: fallbackMethodDlg = new _U16_U64 (NativeInterface.ReadUInt16); break; + case 2: fallbackMethodDlg = new _U32_U64 (NativeInterface.ReadUInt32); break; + case 3: fallbackMethodDlg = new _U64_U64 (NativeInterface.ReadUInt64); break; + case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128); break; + } + + Operand value = context.Call(fallbackMethodDlg, address); + + switch (size) + { + case 0: value = context.VectorInsert8 (vector, value, elem); break; + case 1: value = context.VectorInsert16(vector, value, elem); break; + case 2: value = context.VectorInsert (vector, value, elem); break; + case 3: value = context.VectorInsert (vector, value, elem); break; + } + + context.Copy(GetVec(rt), value); + } + + private static void EmitWriteIntFallback(ArmEmitterContext context, Operand address, int rt, int size) + { + Delegate fallbackMethodDlg = null; + + switch (size) + { + case 0: fallbackMethodDlg = new _Void_U64_U8 (NativeInterface.WriteByte); break; + case 1: fallbackMethodDlg = new _Void_U64_U16(NativeInterface.WriteUInt16); break; + case 2: fallbackMethodDlg = new _Void_U64_U32(NativeInterface.WriteUInt32); break; + case 3: fallbackMethodDlg = new _Void_U64_U64(NativeInterface.WriteUInt64); break; + } + + Operand value = GetInt(context, rt); + + if (size < 3 && value.Type == OperandType.I64) + { + value = context.ConvertI64ToI32(value); + } + + context.Call(fallbackMethodDlg, address, value); + } + + private static void EmitWriteVectorFallback( + ArmEmitterContext context, + Operand address, + int rt, + int elem, + int size) + { + Delegate fallbackMethodDlg = null; + + switch (size) + { + case 0: fallbackMethodDlg = new _Void_U64_U8 (NativeInterface.WriteByte); break; + case 1: fallbackMethodDlg = new _Void_U64_U16 (NativeInterface.WriteUInt16); break; + case 2: fallbackMethodDlg = new _Void_U64_U32 (NativeInterface.WriteUInt32); break; + case 3: fallbackMethodDlg = new _Void_U64_U64 (NativeInterface.WriteUInt64); break; + case 4: fallbackMethodDlg = new _Void_U64_V128(NativeInterface.WriteVector128); break; + } + + Operand value = null; + + if (size < 4) + { + switch (size) + { + case 0: + value = context.VectorExtract8(GetVec(rt), elem); + break; + + case 1: + value = context.VectorExtract16(GetVec(rt), elem); + break; + + case 2: + value = context.VectorExtract(OperandType.I32, GetVec(rt), elem); + break; + + case 3: + value = context.VectorExtract(OperandType.I64, GetVec(rt), elem); + break; + } + } + else + { + value = GetVec(rt); + } + + context.Call(fallbackMethodDlg, address, value); + } + + private static Operand GetInt(ArmEmitterContext context, int rt) + { + return context.CurrOp is OpCode32 ? GetIntA32(context, rt) : GetIntOrZR(context, rt); + } + + private static void SetInt(ArmEmitterContext context, int rt, Operand value) + { + if (context.CurrOp is OpCode32) + { + SetIntA32(context, rt, value); + } + else + { + SetIntOrZR(context, rt, value); + } + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitMove.cs b/ARMeilleure/Instructions/InstEmitMove.cs new file mode 100644 index 00000000..bf051f32 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitMove.cs @@ -0,0 +1,41 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Movk(ArmEmitterContext context) + { + OpCodeMov op = (OpCodeMov)context.CurrOp; + + OperandType type = op.GetOperandType(); + + Operand res = GetIntOrZR(context, op.Rd); + + res = context.BitwiseAnd(res, Const(type, ~(0xffffL << op.Bit))); + + res = context.BitwiseOr(res, Const(type, op.Immediate)); + + SetIntOrZR(context, op.Rd, res); + } + + public static void Movn(ArmEmitterContext context) + { + OpCodeMov op = (OpCodeMov)context.CurrOp; + + SetIntOrZR(context, op.Rd, Const(op.GetOperandType(), ~op.Immediate)); + } + + public static void Movz(ArmEmitterContext context) + { + OpCodeMov op = (OpCodeMov)context.CurrOp; + + SetIntOrZR(context, op.Rd, Const(op.GetOperandType(), op.Immediate)); + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitMul.cs b/ARMeilleure/Instructions/InstEmitMul.cs new file mode 100644 index 00000000..65d11b30 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitMul.cs @@ -0,0 +1,100 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Madd(ArmEmitterContext context) => EmitMul(context, isAdd: true); + public static void Msub(ArmEmitterContext context) => EmitMul(context, isAdd: false); + + private static void EmitMul(ArmEmitterContext context, bool isAdd) + { + OpCodeMul op = (OpCodeMul)context.CurrOp; + + Operand a = GetIntOrZR(context, op.Ra); + Operand n = GetIntOrZR(context, op.Rn); + Operand m = GetIntOrZR(context, op.Rm); + + Operand res = context.Multiply(n, m); + + res = isAdd ? context.Add(a, res) : context.Subtract(a, res); + + SetIntOrZR(context, op.Rd, res); + } + + public static void Smaddl(ArmEmitterContext context) => EmitMull(context, MullFlags.SignedAdd); + public static void Smsubl(ArmEmitterContext context) => EmitMull(context, MullFlags.SignedSubtract); + public static void Umaddl(ArmEmitterContext context) => EmitMull(context, MullFlags.Add); + public static void Umsubl(ArmEmitterContext context) => EmitMull(context, MullFlags.Subtract); + + [Flags] + private enum MullFlags + { + Subtract = 0, + Add = 1 << 0, + Signed = 1 << 1, + + SignedAdd = Signed | Add, + SignedSubtract = Signed | Subtract + } + + private static void EmitMull(ArmEmitterContext context, MullFlags flags) + { + OpCodeMul op = (OpCodeMul)context.CurrOp; + + Operand GetExtendedRegister32(int index) + { + Operand value = GetIntOrZR(context, index); + + if ((flags & MullFlags.Signed) != 0) + { + return context.SignExtend32(value.Type, value); + } + else + { + return context.ZeroExtend32(value.Type, value); + } + } + + Operand a = GetIntOrZR(context, op.Ra); + + Operand n = GetExtendedRegister32(op.Rn); + Operand m = GetExtendedRegister32(op.Rm); + + Operand res = context.Multiply(n, m); + + res = (flags & MullFlags.Add) != 0 ? context.Add(a, res) : context.Subtract(a, res); + + SetIntOrZR(context, op.Rd, res); + } + + public static void Smulh(ArmEmitterContext context) + { + OpCodeMul op = (OpCodeMul)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + Operand m = GetIntOrZR(context, op.Rm); + + Operand d = context.Multiply64HighSI(n, m); + + SetIntOrZR(context, op.Rd, d); + } + + public static void Umulh(ArmEmitterContext context) + { + OpCodeMul op = (OpCodeMul)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + Operand m = GetIntOrZR(context, op.Rm); + + Operand d = context.Multiply64HighUI(n, m); + + SetIntOrZR(context, op.Rd, d); + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs new file mode 100644 index 00000000..44659e80 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs @@ -0,0 +1,3159 @@ +// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h +// https://www.agner.org/optimize/#vectorclass @ vectori128.h + +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + using Func2I = Func<Operand, Operand, Operand>; + + static partial class InstEmit + { + public static void Abs_S(ArmEmitterContext context) + { + EmitScalarUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } + + public static void Abs_V(ArmEmitterContext context) + { + EmitVectorUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } + + public static void Add_S(ArmEmitterContext context) + { + EmitScalarBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + + public static void Add_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + Operand res = context.AddIntrinsic(addInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Addhn_V(ArmEmitterContext context) + { + EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: false); + } + + public static void Addp_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand ne0 = EmitVectorExtractZx(context, op.Rn, 0, op.Size); + Operand ne1 = EmitVectorExtractZx(context, op.Rn, 1, op.Size); + + Operand res = context.Add(ne0, ne1); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size)); + } + + public static void Addp_V(ArmEmitterContext context) + { + EmitVectorPairwiseOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + + public static void Addv_V(ArmEmitterContext context) + { + EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + + public static void Cls_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + int eSize = 8 << op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + Operand de = context.Call(new _U64_U64_S32(SoftFallback.CountLeadingSigns), ne, Const(eSize)); + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Clz_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + int eSize = 8 << op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + Operand de; + + if (eSize == 64) + { + de = context.CountLeadingZeros(ne); + } + else + { + de = context.Call(new _U64_U64_S32(SoftFallback.CountLeadingZeros), ne, Const(eSize)); + } + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Cnt_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0); + + Operand de; + + if (Optimizations.UsePopCnt) + { + de = context.AddIntrinsicLong(Intrinsic.X86Popcnt, ne); + } + else + { + de = context.Call(new _U64_U64(SoftFallback.CountSetBits8), ne); + } + + res = EmitVectorInsert(context, res, de, index, 0); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Fabd_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand res = context.AddIntrinsic(Intrinsic.X86Subss, GetVec(op.Rn), GetVec(op.Rm)); + + Operand mask = X86GetScalar(context, -0f); + + res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (sizeF == 1) */ + { + Operand res = context.AddIntrinsic(Intrinsic.X86Subsd, GetVec(op.Rn), GetVec(op.Rm)); + + Operand mask = X86GetScalar(context, -0d); + + res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + Operand res = EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2); + + return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, res); + }); + } + } + + public static void Fabd_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand res = context.AddIntrinsic(Intrinsic.X86Subps, GetVec(op.Rn), GetVec(op.Rm)); + + Operand mask = X86GetAllElements(context, -0f); + + res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, GetVec(op.Rn), GetVec(op.Rm)); + + Operand mask = X86GetAllElements(context, -0d); + + res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + Operand res = EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2); + + return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, res); + }); + } + } + + public static void Fabs_S(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + if (op.Size == 0) + { + Operand mask = X86GetScalar(context, -0f); + + Operand res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (op.Size == 1) */ + { + Operand mask = X86GetScalar(context, -0d); + + Operand res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1); + }); + } + } + + public static void Fabs_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand mask = X86GetAllElements(context, -0f); + + Operand res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, GetVec(op.Rn)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand mask = X86GetAllElements(context, -0d); + + Operand res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1); + }); + } + } + + public static void Fadd_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Intrinsic.X86Addss, Intrinsic.X86Addsd); + } + else if (Optimizations.FastFP) + { + EmitScalarBinaryOpF(context, (op1, op2) => context.Add(op1, op2)); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, op1, op2); + }); + } + } + + public static void Fadd_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd); + } + else if (Optimizations.FastFP) + { + EmitVectorBinaryOpF(context, (op1, op2) => context.Add(op1, op2)); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, op1, op2); + }); + } + } + + public static void Faddp_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.FastFP && Optimizations.UseSse3) + { + if (sizeF == 0) + { + Operand res = context.AddIntrinsic(Intrinsic.X86Haddps, GetVec(op.Rn), GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (sizeF == 1) */ + { + Operand res = context.AddIntrinsic(Intrinsic.X86Haddpd, GetVec(op.Rn), GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + OperandType type = sizeF != 0 ? OperandType.FP64 + : OperandType.FP32; + + Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1); + + Operand res = EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, ne0, ne1); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + } + + public static void Faddp_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorPairwiseOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd); + } + else + { + EmitVectorPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, op1, op2); + }); + } + } + + public static void Fdiv_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Intrinsic.X86Divss, Intrinsic.X86Divsd); + } + else if (Optimizations.FastFP) + { + EmitScalarBinaryOpF(context, (op1, op2) => context.Divide(op1, op2)); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPDiv, SoftFloat64.FPDiv, op1, op2); + }); + } + } + + public static void Fdiv_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Intrinsic.X86Divps, Intrinsic.X86Divpd); + } + else if (Optimizations.FastFP) + { + EmitVectorBinaryOpF(context, (op1, op2) => context.Divide(op1, op2)); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPDiv, SoftFloat64.FPDiv, op1, op2); + }); + } + } + + public static void Fmadd_S(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand a = GetVec(op.Ra); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.Size == 0) + { + Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Addss, a, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (op.Size == 1) */ + { + Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Addsd, a, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarTernaryRaOpF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3); + }); + } + } + + public static void Fmax_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Intrinsic.X86Maxss, Intrinsic.X86Maxsd); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMax, SoftFloat64.FPMax, op1, op2); + }); + } + } + + public static void Fmax_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMax, SoftFloat64.FPMax, op1, op2); + }); + } + } + + public static void Fmaxnm_S(ArmEmitterContext context) + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMaxNum, SoftFloat64.FPMaxNum, op1, op2); + }); + } + + public static void Fmaxnm_V(ArmEmitterContext context) + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMaxNum, SoftFloat64.FPMaxNum, op1, op2); + }); + } + + public static void Fmaxp_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorPairwiseOpF(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd); + } + else + { + EmitVectorPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMax, SoftFloat64.FPMax, op1, op2); + }); + } + } + + public static void Fmin_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Intrinsic.X86Minss, Intrinsic.X86Minsd); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMin, SoftFloat64.FPMin, op1, op2); + }); + } + } + + public static void Fmin_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Intrinsic.X86Minps, Intrinsic.X86Minpd); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMin, SoftFloat64.FPMin, op1, op2); + }); + } + } + + public static void Fminnm_S(ArmEmitterContext context) + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMinNum, SoftFloat64.FPMinNum, op1, op2); + }); + } + + public static void Fminnm_V(ArmEmitterContext context) + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMinNum, SoftFloat64.FPMinNum, op1, op2); + }); + } + + public static void Fminp_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorPairwiseOpF(context, Intrinsic.X86Minps, Intrinsic.X86Minpd); + } + else + { + EmitVectorPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMin, SoftFloat64.FPMin, op1, op2); + }); + } + } + + public static void Fmla_Se(ArmEmitterContext context) // Fused. + { + EmitScalarTernaryOpByElemF(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + + public static void Fmla_V(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Addps, d, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorTernaryOpF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3); + }); + } + } + + public static void Fmla_Ve(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res); + res = context.AddIntrinsic(Intrinsic.X86Addps, d, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + int shuffleMask = op.Index | op.Index << 1; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res); + res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorTernaryOpByElemF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3); + }); + } + } + + public static void Fmls_Se(ArmEmitterContext context) // Fused. + { + EmitScalarTernaryOpByElemF(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + + public static void Fmls_V(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Subps, d, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorTernaryOpF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3); + }); + } + } + + public static void Fmls_Ve(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res); + res = context.AddIntrinsic(Intrinsic.X86Subps, d, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + int shuffleMask = op.Index | op.Index << 1; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res); + res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorTernaryOpByElemF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3); + }); + } + } + + public static void Fmsub_S(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand a = GetVec(op.Ra); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.Size == 0) + { + Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Subss, a, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (op.Size == 1) */ + { + Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Subsd, a, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarTernaryRaOpF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3); + }); + } + } + + public static void Fmul_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd); + } + else if (Optimizations.FastFP) + { + EmitScalarBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2); + }); + } + } + + public static void Fmul_Se(ArmEmitterContext context) + { + EmitScalarBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2)); + } + + public static void Fmul_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd); + } + else if (Optimizations.FastFP) + { + EmitVectorBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2); + }); + } + } + + public static void Fmul_Ve(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + int shuffleMask = op.Index | op.Index << 1; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else if (Optimizations.FastFP) + { + EmitVectorBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitVectorBinaryOpByElemF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2); + }); + } + } + + public static void Fmulx_S(ArmEmitterContext context) + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2); + }); + } + + public static void Fmulx_Se(ArmEmitterContext context) + { + EmitScalarBinaryOpByElemF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2); + }); + } + + public static void Fmulx_V(ArmEmitterContext context) + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2); + }); + } + + public static void Fmulx_Ve(ArmEmitterContext context) + { + EmitVectorBinaryOpByElemF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2); + }); + } + + public static void Fneg_S(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + if (op.Size == 0) + { + Operand mask = X86GetScalar(context, -0f); + + Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (op.Size == 1) */ + { + Operand mask = X86GetScalar(context, -0d); + + Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarUnaryOpF(context, (op1) => context.Negate(op1)); + } + } + + public static void Fneg_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand mask = X86GetAllElements(context, -0f); + + Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand mask = X86GetAllElements(context, -0d); + + Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorUnaryOpF(context, (op1) => context.Negate(op1)); + } + } + + public static void Fnmadd_S(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 + : OperandType.FP32; + + Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand me = context.VectorExtract(type, GetVec(op.Rm), 0); + Operand ae = context.VectorExtract(type, GetVec(op.Ra), 0); + + Operand res = context.Subtract(context.Multiply(context.Negate(ne), me), ae); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + + public static void Fnmsub_S(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 + : OperandType.FP32; + + Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand me = context.VectorExtract(type, GetVec(op.Rm), 0); + Operand ae = context.VectorExtract(type, GetVec(op.Ra), 0); + + Operand res = context.Subtract(context.Multiply(ne, me), ae); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + + public static void Fnmul_S(ArmEmitterContext context) + { + EmitScalarBinaryOpF(context, (op1, op2) => context.Negate(context.Multiply(op1, op2))); + } + + public static void Frecpe_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0) + { + EmitScalarUnaryOpF(context, Intrinsic.X86Rcpss, 0); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRecipEstimate, SoftFloat64.FPRecipEstimate, op1); + }); + } + } + + public static void Frecpe_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0) + { + EmitVectorUnaryOpF(context, Intrinsic.X86Rcpps, 0); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRecipEstimate, SoftFloat64.FPRecipEstimate, op1); + }); + } + } + + public static void Frecps_S(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand mask = X86GetScalar(context, 2f); + + Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Intrinsic.X86Subss, mask, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (sizeF == 1) */ + { + Operand mask = X86GetScalar(context, 2d); + + Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Intrinsic.X86Subsd, mask, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRecipStepFused, SoftFloat64.FPRecipStepFused, op1, op2); + }); + } + } + + public static void Frecps_V(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand mask = X86GetAllElements(context, 2f); + + Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Intrinsic.X86Subps, mask, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand mask = X86GetAllElements(context, 2d); + + Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Intrinsic.X86Subpd, mask, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRecipStepFused, SoftFloat64.FPRecipStepFused, op1, op2); + }); + } + } + + public static void Frecpx_S(ArmEmitterContext context) + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRecpX, SoftFloat64.FPRecpX, op1); + }); + } + + public static void Frinta_S(ArmEmitterContext context) + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1); + }); + } + + public static void Frinta_V(ArmEmitterContext context) + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1); + }); + } + + public static void Frinti_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + EmitScalarUnaryOpF(context, (op1) => + { + if (op.Size == 0) + { + return context.Call(new _F32_F32(SoftFallback.RoundF), op1); + } + else /* if (op.Size == 1) */ + { + return context.Call(new _F64_F64(SoftFallback.Round), op1); + } + }); + } + + public static void Frinti_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + EmitVectorUnaryOpF(context, (op1) => + { + if (sizeF == 0) + { + return context.Call(new _F32_F32(SoftFallback.RoundF), op1); + } + else /* if (sizeF == 1) */ + { + return context.Call(new _F64_F64(SoftFallback.Round), op1); + } + }); + } + + public static void Frintm_S(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1); + }); + } + } + + public static void Frintm_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitVectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1); + }); + } + } + + public static void Frintn_S(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitScalarRoundOpF(context, FPRoundingMode.ToNearest); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitRoundMathCall(context, MidpointRounding.ToEven, op1); + }); + } + } + + public static void Frintn_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitVectorRoundOpF(context, FPRoundingMode.ToNearest); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitRoundMathCall(context, MidpointRounding.ToEven, op1); + }); + } + } + + public static void Frintp_S(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1); + }); + } + } + + public static void Frintp_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitVectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1); + }); + } + } + + public static void Frintx_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + EmitScalarUnaryOpF(context, (op1) => + { + if (op.Size == 0) + { + return context.Call(new _F32_F32(SoftFallback.RoundF), op1); + } + else /* if (op.Size == 1) */ + { + return context.Call(new _F64_F64(SoftFallback.Round), op1); + } + }); + } + + public static void Frintx_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + EmitVectorUnaryOpF(context, (op1) => + { + if (sizeF == 0) + { + return context.Call(new _F32_F32(SoftFallback.RoundF), op1); + } + else /* if (sizeF == 1) */ + { + return context.Call(new _F64_F64(SoftFallback.Round), op1); + } + }); + } + + public static void Frintz_S(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitScalarRoundOpF(context, FPRoundingMode.TowardsZero); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1); + }); + } + } + + public static void Frintz_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitVectorRoundOpF(context, FPRoundingMode.TowardsZero); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1); + }); + } + } + + public static void Frsqrte_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0) + { + EmitScalarUnaryOpF(context, Intrinsic.X86Rsqrtss, 0); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtEstimate, SoftFloat64.FPRSqrtEstimate, op1); + }); + } + } + + public static void Frsqrte_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0) + { + EmitVectorUnaryOpF(context, Intrinsic.X86Rsqrtps, 0); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtEstimate, SoftFloat64.FPRSqrtEstimate, op1); + }); + } + } + + public static void Frsqrts_S(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand maskHalf = X86GetScalar(context, 0.5f); + Operand maskThree = X86GetScalar(context, 3f); + + Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Intrinsic.X86Subss, maskThree, res); + res = context.AddIntrinsic(Intrinsic.X86Mulss, maskHalf, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (sizeF == 1) */ + { + Operand maskHalf = X86GetScalar(context, 0.5d); + Operand maskThree = X86GetScalar(context, 3d); + + Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Intrinsic.X86Subsd, maskThree, res); + res = context.AddIntrinsic(Intrinsic.X86Mulsd, maskHalf, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtStepFused, SoftFloat64.FPRSqrtStepFused, op1, op2); + }); + } + } + + public static void Frsqrts_V(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand maskHalf = X86GetAllElements(context, 0.5f); + Operand maskThree = X86GetAllElements(context, 3f); + + Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res); + res = context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand maskHalf = X86GetAllElements(context, 0.5d); + Operand maskThree = X86GetAllElements(context, 3d); + + Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res); + res = context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtStepFused, SoftFloat64.FPRSqrtStepFused, op1, op2); + }); + } + } + + public static void Fsqrt_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarUnaryOpF(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPSqrt, SoftFloat64.FPSqrt, op1); + }); + } + } + + public static void Fsqrt_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorUnaryOpF(context, Intrinsic.X86Sqrtps, Intrinsic.X86Sqrtpd); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPSqrt, SoftFloat64.FPSqrt, op1); + }); + } + } + + public static void Fsub_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Intrinsic.X86Subss, Intrinsic.X86Subsd); + } + else if (Optimizations.FastFP) + { + EmitScalarBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2)); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2); + }); + } + } + + public static void Fsub_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Intrinsic.X86Subps, Intrinsic.X86Subpd); + } + else if (Optimizations.FastFP) + { + EmitVectorBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2)); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2); + }); + } + } + + public static void Mla_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Mul_AddSub(context, AddSub.Add); + } + else + { + EmitVectorTernaryOpZx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Mla_Ve(ArmEmitterContext context) + { + EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + + public static void Mls_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Mul_AddSub(context, AddSub.Subtract); + } + else + { + EmitVectorTernaryOpZx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Mls_Ve(ArmEmitterContext context) + { + EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + + public static void Mul_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Mul_AddSub(context, AddSub.None); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2)); + } + } + + public static void Mul_Ve(ArmEmitterContext context) + { + EmitVectorBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2)); + } + + public static void Neg_S(ArmEmitterContext context) + { + EmitScalarUnaryOpSx(context, (op1) => context.Negate(op1)); + } + + public static void Neg_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Intrinsic subInst = X86PsubInstruction[op.Size]; + + Operand res = context.AddIntrinsic(subInst, context.VectorZero(), GetVec(op.Rn)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorUnaryOpSx(context, (op1) => context.Negate(op1)); + } + } + + public static void Raddhn_V(ArmEmitterContext context) + { + EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: true); + } + + public static void Rsubhn_V(ArmEmitterContext context) + { + EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: true); + } + + public static void Saba_V(ArmEmitterContext context) + { + EmitVectorTernaryOpSx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } + + public static void Sabal_V(ArmEmitterContext context) + { + EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } + + public static void Sabd_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + EmitSse41Sabd(context, op, n, m, isLong: false); + } + else + { + EmitVectorBinaryOpSx(context, (op1, op2) => + { + return EmitAbs(context, context.Subtract(op1, op2)); + }); + } + } + + public static void Sabdl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = op.Size == 0 + ? Intrinsic.X86Pmovsxbw + : Intrinsic.X86Pmovsxwd; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + EmitSse41Sabd(context, op, n, m, isLong: true); + } + else + { + EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => + { + return EmitAbs(context, context.Subtract(op1, op2)); + }); + } + } + + public static void Sadalp_V(ArmEmitterContext context) + { + EmitAddLongPairwise(context, signed: true, accumulate: true); + } + + public static void Saddl_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovsxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); + } + else + { + EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Saddlp_V(ArmEmitterContext context) + { + EmitAddLongPairwise(context, signed: true, accumulate: false); + } + + public static void Saddlv_V(ArmEmitterContext context) + { + EmitVectorLongAcrossVectorOpSx(context, (op1, op2) => context.Add(op1, op2)); + } + + public static void Saddw_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovsxInstruction[op.Size]; + + m = context.AddIntrinsic(movInst, m); + + Intrinsic addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); + } + else + { + EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Shadd_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m); + Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m); + + Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psraw : Intrinsic.X86Psrad; + + res2 = context.AddIntrinsic(shiftInst, res2, Const(1)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, res2); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpSx(context, (op1, op2) => + { + return context.ShiftRightSI(context.Add(op1, op2), Const(1)); + }); + } + } + + public static void Shsub_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + Operand nPlusMask = context.AddIntrinsic(addInst, n, mask); + Operand mPlusMask = context.AddIntrinsic(addInst, m, mask); + + Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw; + + Operand res = context.AddIntrinsic(avgInst, nPlusMask, mPlusMask); + + Intrinsic subInst = X86PsubInstruction[op.Size]; + + res = context.AddIntrinsic(subInst, nPlusMask, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpSx(context, (op1, op2) => + { + return context.ShiftRightSI(context.Subtract(op1, op2), Const(1)); + }); + } + } + + public static void Smax_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic maxInst = X86PmaxsInstruction[op.Size]; + + Operand res = context.AddIntrinsic(maxInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Delegate dlg = new _S64_S64_S64(Math.Max); + + EmitVectorBinaryOpSx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + } + + public static void Smaxp_V(ArmEmitterContext context) + { + Delegate dlg = new _S64_S64_S64(Math.Max); + + EmitVectorPairwiseOpSx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + + public static void Smaxv_V(ArmEmitterContext context) + { + Delegate dlg = new _S64_S64_S64(Math.Max); + + EmitVectorAcrossVectorOpSx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + + public static void Smin_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic minInst = X86PminsInstruction[op.Size]; + + Operand res = context.AddIntrinsic(minInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Delegate dlg = new _S64_S64_S64(Math.Min); + + EmitVectorBinaryOpSx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + } + + public static void Sminp_V(ArmEmitterContext context) + { + Delegate dlg = new _S64_S64_S64(Math.Min); + + EmitVectorPairwiseOpSx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + + public static void Sminv_V(ArmEmitterContext context) + { + Delegate dlg = new _S64_S64_S64(Math.Min); + + EmitVectorAcrossVectorOpSx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + + public static void Smlal_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovsxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld; + + Operand res = context.AddIntrinsic(mullInst, n, m); + + Intrinsic addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(d, context.AddIntrinsic(addInst, d, res)); + } + else + { + EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Smlal_Ve(ArmEmitterContext context) + { + EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + + public static void Smlsl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = op.Size == 0 + ? Intrinsic.X86Pmovsxbw + : Intrinsic.X86Pmovsxwd; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld; + + Operand res = context.AddIntrinsic(mullInst, n, m); + + Intrinsic subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(d, context.AddIntrinsic(subInst, d, res)); + } + else + { + EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Smlsl_Ve(ArmEmitterContext context) + { + EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + + public static void Smull_V(ArmEmitterContext context) + { + EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Multiply(op1, op2)); + } + + public static void Smull_Ve(ArmEmitterContext context) + { + EmitVectorWidenBinaryOpByElemSx(context, (op1, op2) => context.Multiply(op1, op2)); + } + + public static void Sqabs_S(ArmEmitterContext context) + { + EmitScalarSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } + + public static void Sqabs_V(ArmEmitterContext context) + { + EmitVectorSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } + + public static void Sqadd_S(ArmEmitterContext context) + { + EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Add); + } + + public static void Sqadd_V(ArmEmitterContext context) + { + EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Add); + } + + public static void Sqdmulh_S(ArmEmitterContext context) + { + EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false), SaturatingFlags.ScalarSx); + } + + public static void Sqdmulh_V(ArmEmitterContext context) + { + EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false), SaturatingFlags.VectorSx); + } + + public static void Sqneg_S(ArmEmitterContext context) + { + EmitScalarSaturatingUnaryOpSx(context, (op1) => context.Negate(op1)); + } + + public static void Sqneg_V(ArmEmitterContext context) + { + EmitVectorSaturatingUnaryOpSx(context, (op1) => context.Negate(op1)); + } + + public static void Sqrdmulh_S(ArmEmitterContext context) + { + EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true), SaturatingFlags.ScalarSx); + } + + public static void Sqrdmulh_V(ArmEmitterContext context) + { + EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true), SaturatingFlags.VectorSx); + } + + public static void Sqsub_S(ArmEmitterContext context) + { + EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Sub); + } + + public static void Sqsub_V(ArmEmitterContext context) + { + EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Sub); + } + + public static void Sqxtn_S(ArmEmitterContext context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx); + } + + public static void Sqxtn_V(ArmEmitterContext context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx); + } + + public static void Sqxtun_S(ArmEmitterContext context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx); + } + + public static void Sqxtun_V(ArmEmitterContext context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx); + } + + public static void Srhadd_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u)); + + Intrinsic subInst = X86PsubInstruction[op.Size]; + + Operand nMinusMask = context.AddIntrinsic(subInst, n, mask); + Operand mMinusMask = context.AddIntrinsic(subInst, m, mask); + + Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw; + + Operand res = context.AddIntrinsic(avgInst, nMinusMask, mMinusMask); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, mask, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpSx(context, (op1, op2) => + { + Operand res = context.Add(op1, op2); + + res = context.Add(res, Const(1L)); + + return context.ShiftRightSI(res, Const(1)); + }); + } + } + + public static void Ssubl_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovsxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); + } + else + { + EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + public static void Ssubw_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovsxInstruction[op.Size]; + + m = context.AddIntrinsic(movInst, m); + + Intrinsic subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); + } + else + { + EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + public static void Sub_S(ArmEmitterContext context) + { + EmitScalarBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); + } + + public static void Sub_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic subInst = X86PsubInstruction[op.Size]; + + Operand res = context.AddIntrinsic(subInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + public static void Subhn_V(ArmEmitterContext context) + { + EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: false); + } + + public static void Suqadd_S(ArmEmitterContext context) + { + EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Accumulate); + } + + public static void Suqadd_V(ArmEmitterContext context) + { + EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Accumulate); + } + + public static void Uaba_V(ArmEmitterContext context) + { + EmitVectorTernaryOpZx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } + + public static void Uabal_V(ArmEmitterContext context) + { + EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } + + public static void Uabd_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + EmitSse41Uabd(context, op, n, m, isLong: false); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + return EmitAbs(context, context.Subtract(op1, op2)); + }); + } + } + + public static void Uabdl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = op.Size == 0 + ? Intrinsic.X86Pmovzxbw + : Intrinsic.X86Pmovzxwd; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + EmitSse41Uabd(context, op, n, m, isLong: true); + } + else + { + EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => + { + return EmitAbs(context, context.Subtract(op1, op2)); + }); + } + } + + public static void Uadalp_V(ArmEmitterContext context) + { + EmitAddLongPairwise(context, signed: false, accumulate: true); + } + + public static void Uaddl_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovzxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); + } + else + { + EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Uaddlp_V(ArmEmitterContext context) + { + EmitAddLongPairwise(context, signed: false, accumulate: false); + } + + public static void Uaddlv_V(ArmEmitterContext context) + { + EmitVectorLongAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + + public static void Uaddw_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovzxInstruction[op.Size]; + + m = context.AddIntrinsic(movInst, m); + + Intrinsic addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); + } + else + { + EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Uhadd_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m); + Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m); + + Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psrlw : Intrinsic.X86Psrld; + + res2 = context.AddIntrinsic(shiftInst, res2, Const(1)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, res2); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + return context.ShiftRightUI(context.Add(op1, op2), Const(1)); + }); + } + } + + public static void Uhsub_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw; + + Operand res = context.AddIntrinsic(avgInst, n, m); + + Intrinsic subInst = X86PsubInstruction[op.Size]; + + res = context.AddIntrinsic(subInst, n, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + return context.ShiftRightUI(context.Subtract(op1, op2), Const(1)); + }); + } + } + + public static void Umax_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic maxInst = X86PmaxuInstruction[op.Size]; + + Operand res = context.AddIntrinsic(maxInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Delegate dlg = new _U64_U64_U64(Math.Max); + + EmitVectorBinaryOpZx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + } + + public static void Umaxp_V(ArmEmitterContext context) + { + Delegate dlg = new _U64_U64_U64(Math.Max); + + EmitVectorPairwiseOpZx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + + public static void Umaxv_V(ArmEmitterContext context) + { + Delegate dlg = new _U64_U64_U64(Math.Max); + + EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + + public static void Umin_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic minInst = X86PminuInstruction[op.Size]; + + Operand res = context.AddIntrinsic(minInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Delegate dlg = new _U64_U64_U64(Math.Min); + + EmitVectorBinaryOpZx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + } + + public static void Uminp_V(ArmEmitterContext context) + { + Delegate dlg = new _U64_U64_U64(Math.Min); + + EmitVectorPairwiseOpZx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + + public static void Uminv_V(ArmEmitterContext context) + { + Delegate dlg = new _U64_U64_U64(Math.Min); + + EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + + public static void Umlal_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovzxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld; + + Operand res = context.AddIntrinsic(mullInst, n, m); + + Intrinsic addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(d, context.AddIntrinsic(addInst, d, res)); + } + else + { + EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Umlal_Ve(ArmEmitterContext context) + { + EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + + public static void Umlsl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = op.Size == 0 + ? Intrinsic.X86Pmovzxbw + : Intrinsic.X86Pmovzxwd; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld; + + Operand res = context.AddIntrinsic(mullInst, n, m); + + Intrinsic subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(d, context.AddIntrinsic(subInst, d, res)); + } + else + { + EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Umlsl_Ve(ArmEmitterContext context) + { + EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + + public static void Umull_V(ArmEmitterContext context) + { + EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2)); + } + + public static void Umull_Ve(ArmEmitterContext context) + { + EmitVectorWidenBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2)); + } + + public static void Uqadd_S(ArmEmitterContext context) + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add); + } + + public static void Uqadd_V(ArmEmitterContext context) + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add); + } + + public static void Uqsub_S(ArmEmitterContext context) + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub); + } + + public static void Uqsub_V(ArmEmitterContext context) + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub); + } + + public static void Uqxtn_S(ArmEmitterContext context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx); + } + + public static void Uqxtn_V(ArmEmitterContext context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx); + } + + public static void Urhadd_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw; + + Operand res = context.AddIntrinsic(avgInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + Operand res = context.Add(op1, op2); + + res = context.Add(res, Const(1L)); + + return context.ShiftRightUI(res, Const(1)); + }); + } + } + + public static void Usqadd_S(ArmEmitterContext context) + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); + } + + public static void Usqadd_V(ArmEmitterContext context) + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); + } + + public static void Usubl_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovzxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); + } + else + { + EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + public static void Usubw_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovzxInstruction[op.Size]; + + m = context.AddIntrinsic(movInst, m); + + Intrinsic subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); + } + else + { + EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + private static Operand EmitAbs(ArmEmitterContext context, Operand value) + { + Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0)); + + return context.ConditionalSelect(isPositive, value, context.Negate(value)); + } + + private static void EmitAddLongPairwise(ArmEmitterContext context, bool signed, bool accumulate) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int pairs = op.GetPairsCount() >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand ne0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed); + Operand ne1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed); + + Operand e = context.Add(ne0, ne1); + + if (accumulate) + { + Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed); + + e = context.Add(e, de); + } + + res = EmitVectorInsert(context, res, e, index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static Operand EmitDoublingMultiplyHighHalf( + ArmEmitterContext context, + Operand n, + Operand m, + bool round) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int eSize = 8 << op.Size; + + Operand res = context.Multiply(n, m); + + if (!round) + { + res = context.ShiftRightSI(res, Const(eSize - 1)); + } + else + { + long roundConst = 1L << (eSize - 1); + + res = context.ShiftLeft(res, Const(1)); + + res = context.Add(res, Const(roundConst)); + + res = context.ShiftRightSI(res, Const(eSize)); + + Operand isIntMin = context.ICompareEqual(res, Const((long)int.MinValue)); + + res = context.ConditionalSelect(isIntMin, context.Negate(res), res); + } + + return res; + } + + private static void EmitHighNarrow(ArmEmitterContext context, Func2I emit, bool round) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int elems = 8 >> op.Size; + int eSize = 8 << op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd)); + + long roundConst = 1L << (eSize - 1); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size + 1); + + Operand de = emit(ne, me); + + if (round) + { + de = context.Add(de, Const(roundConst)); + } + + de = context.ShiftRightUI(de, Const(eSize)); + + res = EmitVectorInsert(context, res, de, part + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitScalarRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundsd : Intrinsic.X86Roundss; + + Operand res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode))); + + if ((op.Size & 1) != 0) + { + res = context.VectorZeroUpper64(res); + } + else + { + res = context.VectorZeroUpper96(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundpd : Intrinsic.X86Roundps; + + Operand res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode))); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + private enum AddSub + { + None, + Add, + Subtract + } + + private static void EmitSse41Mul_AddSub(ArmEmitterContext context, AddSub addSub) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = null; + + if (op.Size == 0) + { + Operand ns8 = context.AddIntrinsic(Intrinsic.X86Psrlw, n, Const(8)); + Operand ms8 = context.AddIntrinsic(Intrinsic.X86Psrlw, m, Const(8)); + + res = context.AddIntrinsic(Intrinsic.X86Pmullw, ns8, ms8); + + res = context.AddIntrinsic(Intrinsic.X86Psllw, res, Const(8)); + + Operand res2 = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m); + + Operand mask = X86GetAllElements(context, 0x00FF00FF); + + res = context.AddIntrinsic(Intrinsic.X86Pblendvb, res, res2, mask); + } + else if (op.Size == 1) + { + res = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Pmulld, n, m); + } + + Operand d = GetVec(op.Rd); + + if (addSub == AddSub.Add) + { + switch (op.Size) + { + case 0: res = context.AddIntrinsic(Intrinsic.X86Paddb, d, res); break; + case 1: res = context.AddIntrinsic(Intrinsic.X86Paddw, d, res); break; + case 2: res = context.AddIntrinsic(Intrinsic.X86Paddd, d, res); break; + case 3: res = context.AddIntrinsic(Intrinsic.X86Paddq, d, res); break; + } + } + else if (addSub == AddSub.Subtract) + { + switch (op.Size) + { + case 0: res = context.AddIntrinsic(Intrinsic.X86Psubb, d, res); break; + case 1: res = context.AddIntrinsic(Intrinsic.X86Psubw, d, res); break; + case 2: res = context.AddIntrinsic(Intrinsic.X86Psubd, d, res); break; + case 3: res = context.AddIntrinsic(Intrinsic.X86Psubq, d, res); break; + } + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + + private static void EmitSse41Sabd( + ArmEmitterContext context, + OpCodeSimdReg op, + Operand n, + Operand m, + bool isLong) + { + int size = isLong ? op.Size + 1 : op.Size; + + Intrinsic cmpgtInst = X86PcmpgtInstruction[size]; + + Operand cmpMask = context.AddIntrinsic(cmpgtInst, n, m); + + Intrinsic subInst = X86PsubInstruction[size]; + + Operand res = context.AddIntrinsic(subInst, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res); + + Operand res2 = context.AddIntrinsic(subInst, m, n); + + res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2); + + res = context.AddIntrinsic(Intrinsic.X86Por, res, res2); + + if (!isLong && op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitSse41Uabd( + ArmEmitterContext context, + OpCodeSimdReg op, + Operand n, + Operand m, + bool isLong) + { + int size = isLong ? op.Size + 1 : op.Size; + + Intrinsic maxInst = X86PmaxuInstruction[size]; + + Operand max = context.AddIntrinsic(maxInst, m, n); + + Intrinsic cmpeqInst = X86PcmpeqInstruction[size]; + + Operand cmpMask = context.AddIntrinsic(cmpeqInst, max, m); + + Operand onesMask = X86GetAllElements(context, -1L); + + cmpMask = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, onesMask); + + Intrinsic subInst = X86PsubInstruction[size]; + + Operand res = context.AddIntrinsic(subInst, n, m); + Operand res2 = context.AddIntrinsic(subInst, m, n); + + res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res); + res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2); + + res = context.AddIntrinsic(Intrinsic.X86Por, res, res2); + + if (!isLong && op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp.cs b/ARMeilleure/Instructions/InstEmitSimdCmp.cs new file mode 100644 index 00000000..f27121bb --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdCmp.cs @@ -0,0 +1,712 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + using Func2I = Func<Operand, Operand, Operand>; + + static partial class InstEmit + { + public static void Cmeq_S(ArmEmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareEqual(op1, op2), scalar: true); + } + + public static void Cmeq_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m; + + if (op is OpCodeSimdReg binOp) + { + m = GetVec(binOp.Rm); + } + else + { + m = context.VectorZero(); + } + + Intrinsic cmpInst = X86PcmpeqInstruction[op.Size]; + + Operand res = context.AddIntrinsic(cmpInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareEqual(op1, op2), scalar: false); + } + } + + public static void Cmge_S(ArmEmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqual(op1, op2), scalar: true); + } + + public static void Cmge_V(ArmEmitterContext context) + { + if (Optimizations.UseSse42) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m; + + if (op is OpCodeSimdReg binOp) + { + m = GetVec(binOp.Rm); + } + else + { + m = context.VectorZero(); + } + + Intrinsic cmpInst = X86PcmpgtInstruction[op.Size]; + + Operand res = context.AddIntrinsic(cmpInst, m, n); + + Operand mask = X86GetAllElements(context, -1L); + + res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqual(op1, op2), scalar: false); + } + } + + public static void Cmgt_S(ArmEmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreater(op1, op2), scalar: true); + } + + public static void Cmgt_V(ArmEmitterContext context) + { + if (Optimizations.UseSse42) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m; + + if (op is OpCodeSimdReg binOp) + { + m = GetVec(binOp.Rm); + } + else + { + m = context.VectorZero(); + } + + Intrinsic cmpInst = X86PcmpgtInstruction[op.Size]; + + Operand res = context.AddIntrinsic(cmpInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreater(op1, op2), scalar: false); + } + } + + public static void Cmhi_S(ArmEmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterUI(op1, op2), scalar: true); + } + + public static void Cmhi_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 3) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic maxInst = X86PmaxuInstruction[op.Size]; + + Operand res = context.AddIntrinsic(maxInst, m, n); + + Intrinsic cmpInst = X86PcmpeqInstruction[op.Size]; + + res = context.AddIntrinsic(cmpInst, res, m); + + Operand mask = X86GetAllElements(context, -1L); + + res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterUI(op1, op2), scalar: false); + } + } + + public static void Cmhs_S(ArmEmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqualUI(op1, op2), scalar: true); + } + + public static void Cmhs_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 3) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic maxInst = X86PmaxuInstruction[op.Size]; + + Operand res = context.AddIntrinsic(maxInst, n, m); + + Intrinsic cmpInst = X86PcmpeqInstruction[op.Size]; + + res = context.AddIntrinsic(cmpInst, res, n); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqualUI(op1, op2), scalar: false); + } + } + + public static void Cmle_S(ArmEmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareLessOrEqual(op1, op2), scalar: true); + } + + public static void Cmle_V(ArmEmitterContext context) + { + if (Optimizations.UseSse42) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Intrinsic cmpInst = X86PcmpgtInstruction[op.Size]; + + Operand res = context.AddIntrinsic(cmpInst, n, context.VectorZero()); + + Operand mask = X86GetAllElements(context, -1L); + + res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareLessOrEqual(op1, op2), scalar: false); + } + } + + public static void Cmlt_S(ArmEmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareLess(op1, op2), scalar: true); + } + + public static void Cmlt_V(ArmEmitterContext context) + { + if (Optimizations.UseSse42) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Intrinsic cmpInst = X86PcmpgtInstruction[op.Size]; + + Operand res = context.AddIntrinsic(cmpInst, context.VectorZero(), n); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareLess(op1, op2), scalar: false); + } + } + + public static void Cmtst_S(ArmEmitterContext context) + { + EmitCmtstOp(context, scalar: true); + } + + public static void Cmtst_V(ArmEmitterContext context) + { + EmitCmtstOp(context, scalar: false); + } + + public static void Fccmp_S(ArmEmitterContext context) + { + EmitFccmpOrFccmpe(context, signalNaNs: false); + } + + public static void Fccmpe_S(ArmEmitterContext context) + { + EmitFccmpOrFccmpe(context, signalNaNs: true); + } + + public static void Fcmeq_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.Equal, scalar: true); + } + else + { + EmitCmpOpF(context, SoftFloat32.FPCompareEQ, SoftFloat64.FPCompareEQ, scalar: true); + } + } + + public static void Fcmeq_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.Equal, scalar: false); + } + else + { + EmitCmpOpF(context, SoftFloat32.FPCompareEQ, SoftFloat64.FPCompareEQ, scalar: false); + } + } + + public static void Fcmge_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true); + } + else + { + EmitCmpOpF(context, SoftFloat32.FPCompareGE, SoftFloat64.FPCompareGE, scalar: true); + } + } + + public static void Fcmge_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false); + } + else + { + EmitCmpOpF(context, SoftFloat32.FPCompareGE, SoftFloat64.FPCompareGE, scalar: false); + } + } + + public static void Fcmgt_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true); + } + else + { + EmitCmpOpF(context, SoftFloat32.FPCompareGT, SoftFloat64.FPCompareGT, scalar: true); + } + } + + public static void Fcmgt_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false); + } + else + { + EmitCmpOpF(context, SoftFloat32.FPCompareGT, SoftFloat64.FPCompareGT, scalar: false); + } + } + + public static void Fcmle_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true, isLeOrLt: true); + } + else + { + EmitCmpOpF(context, SoftFloat32.FPCompareLE, SoftFloat64.FPCompareLE, scalar: true); + } + } + + public static void Fcmle_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false, isLeOrLt: true); + } + else + { + EmitCmpOpF(context, SoftFloat32.FPCompareLE, SoftFloat64.FPCompareLE, scalar: false); + } + } + + public static void Fcmlt_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true, isLeOrLt: true); + } + else + { + EmitCmpOpF(context, SoftFloat32.FPCompareLT, SoftFloat64.FPCompareLT, scalar: true); + } + } + + public static void Fcmlt_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false, isLeOrLt: true); + } + else + { + EmitCmpOpF(context, SoftFloat32.FPCompareLT, SoftFloat64.FPCompareLT, scalar: false); + } + } + + public static void Fcmp_S(ArmEmitterContext context) + { + EmitFcmpOrFcmpe(context, signalNaNs: false); + } + + public static void Fcmpe_S(ArmEmitterContext context) + { + EmitFcmpOrFcmpe(context, signalNaNs: true); + } + + public static void EmitFccmpOrFccmpe(ArmEmitterContext context, bool signalNaNs) + { + OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp; + + Operand lblTrue = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblTrue, InstEmitFlowHelper.GetCondTrue(context, op.Cond)); + + EmitSetNzcv(context, Const(op.Nzcv)); + + context.Branch(lblEnd); + + context.MarkLabel(lblTrue); + + EmitFcmpOrFcmpe(context, signalNaNs); + + context.MarkLabel(lblEnd); + } + + private static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + const int cmpOrdered = 7; + + bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false; + + if (Optimizations.FastFP && Optimizations.UseSse2) + { + Operand n = GetVec(op.Rn); + Operand m = cmpWithZero ? context.VectorZero() : GetVec(op.Rm); + + Operand lblNaN = Label(); + Operand lblEnd = Label(); + + if (op.Size == 0) + { + Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const(cmpOrdered)); + + Operand isOrdered = context.VectorExtract16(ordMask, 0); + + context.BranchIfFalse(lblNaN, isOrdered); + + Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, n, m); + Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, n, m); + Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, n, m); + + SetFlag(context, PState.VFlag, Const(0)); + SetFlag(context, PState.CFlag, cf); + SetFlag(context, PState.ZFlag, zf); + SetFlag(context, PState.NFlag, nf); + } + else /* if (op.Size == 1) */ + { + Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const(cmpOrdered)); + + Operand isOrdered = context.VectorExtract16(ordMask, 0); + + context.BranchIfFalse(lblNaN, isOrdered); + + Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, n, m); + Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, n, m); + Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, n, m); + + SetFlag(context, PState.VFlag, Const(0)); + SetFlag(context, PState.CFlag, cf); + SetFlag(context, PState.ZFlag, zf); + SetFlag(context, PState.NFlag, nf); + } + + context.Branch(lblEnd); + + context.MarkLabel(lblNaN); + + SetFlag(context, PState.VFlag, Const(1)); + SetFlag(context, PState.CFlag, Const(1)); + SetFlag(context, PState.ZFlag, Const(0)); + SetFlag(context, PState.NFlag, Const(0)); + + context.MarkLabel(lblEnd); + } + else + { + OperandType type = op.Size != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand me; + + if (cmpWithZero) + { + me = op.Size == 0 ? ConstF(0f) : ConstF(0d); + } + else + { + me = context.VectorExtract(type, GetVec(op.Rm), 0); + } + + Delegate dlg = op.Size != 0 + ? (Delegate)new _S32_F64_F64_Bool(SoftFloat64.FPCompare) + : (Delegate)new _S32_F32_F32_Bool(SoftFloat32.FPCompare); + + Operand nzcv = context.Call(dlg, ne, me, Const(signalNaNs)); + + EmitSetNzcv(context, nzcv); + } + } + + private static void EmitSetNzcv(ArmEmitterContext context, Operand nzcv) + { + Operand Extract(Operand value, int bit) + { + if (bit != 0) + { + value = context.ShiftRightUI(value, Const(bit)); + } + + value = context.BitwiseAnd(value, Const(1)); + + return value; + } + + SetFlag(context, PState.VFlag, Extract(nzcv, 0)); + SetFlag(context, PState.CFlag, Extract(nzcv, 1)); + SetFlag(context, PState.ZFlag, Extract(nzcv, 2)); + SetFlag(context, PState.NFlag, Extract(nzcv, 3)); + } + + private static void EmitCmpOp(ArmEmitterContext context, Func2I emitCmp, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size)); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me; + + if (op is OpCodeSimdReg binOp) + { + me = EmitVectorExtractSx(context, binOp.Rm, index, op.Size); + } + else + { + me = Const(0L); + } + + Operand isTrue = emitCmp(ne, me); + + Operand mask = context.ConditionalSelect(isTrue, Const(szMask), Const(0L)); + + res = EmitVectorInsert(context, res, mask, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitCmtstOp(ArmEmitterContext context, bool scalar) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size)); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + Operand test = context.BitwiseAnd(ne, me); + + Operand isTrue = context.ICompareNotEqual(test, Const(0L)); + + Operand mask = context.ConditionalSelect(isTrue, Const(szMask), Const(0L)); + + res = EmitVectorInsert(context, res, mask, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitCmpOpF( + ArmEmitterContext context, + _F32_F32_F32 f32, + _F64_F64_F64 f64, + bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = !scalar ? op.GetBytesCount() >> sizeF + 2 : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + Operand me; + + if (op is OpCodeSimdReg binOp) + { + me = context.VectorExtract(type, GetVec(binOp.Rm), index); + } + else + { + me = sizeF == 0 ? ConstF(0f) : ConstF(0d); + } + + Operand e = EmitSoftFloatCall(context, f32, f64, ne, me); + + res = context.VectorInsert(res, e, index); + } + + context.Copy(GetVec(op.Rd), res); + } + + private enum CmpCondition + { + Equal = 0, + GreaterThanOrEqual = 5, + GreaterThan = 6 + } + + private static void EmitCmpSseOrSse2OpF( + ArmEmitterContext context, + CmpCondition cond, + bool scalar, + bool isLeOrLt = false) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = op is OpCodeSimdReg binOp ? GetVec(binOp.Rm) : context.VectorZero(); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Intrinsic inst = scalar ? Intrinsic.X86Cmpss : Intrinsic.X86Cmpps; + + Operand res = isLeOrLt + ? context.AddIntrinsic(inst, m, n, Const((int)cond)) + : context.AddIntrinsic(inst, n, m, Const((int)cond)); + + if (scalar) + { + res = context.VectorZeroUpper96(res); + } + else if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Intrinsic inst = scalar ? Intrinsic.X86Cmpsd : Intrinsic.X86Cmppd; + + Operand res = isLeOrLt + ? context.AddIntrinsic(inst, m, n, Const((int)cond)) + : context.AddIntrinsic(inst, n, m, Const((int)cond)); + + if (scalar) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitSimdCrypto.cs b/ARMeilleure/Instructions/InstEmitSimdCrypto.cs new file mode 100644 index 00000000..2b61fada --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdCrypto.cs @@ -0,0 +1,49 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Aesd_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Decrypt), d, n)); + } + + public static void Aese_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Encrypt), d, n)); + } + + public static void Aesimc_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + context.Copy(GetVec(op.Rd), context.Call(new _V128_V128(SoftFallback.InverseMixColumns), n)); + } + + public static void Aesmc_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + context.Copy(GetVec(op.Rd), context.Call(new _V128_V128(SoftFallback.MixColumns), n)); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt.cs b/ARMeilleure/Instructions/InstEmitSimdCvt.cs new file mode 100644 index 00000000..012bfcce --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdCvt.cs @@ -0,0 +1,1166 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + using Func1I = Func<Operand, Operand>; + + static partial class InstEmit + { + public static void Fcvt_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + if (op.Size == 0 && op.Opc == 1) // Single -> Double. + { + if (Optimizations.UseSse2) + { + Operand n = GetVec(op.Rn); + + Operand res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), n); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0); + + Operand res = context.ConvertToFP(OperandType.FP64, ne); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + } + else if (op.Size == 1 && op.Opc == 0) // Double -> Single. + { + if (Optimizations.UseSse2) + { + Operand n = GetVec(op.Rn); + + Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0); + + Operand res = context.ConvertToFP(OperandType.FP32, ne); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + } + else if (op.Size == 0 && op.Opc == 3) // Single -> Half. + { + Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0); + + Delegate dlg = new _U16_F32(SoftFloat32_16.FPConvert); + + Operand res = context.Call(dlg, ne); + + res = context.ZeroExtend16(OperandType.I64, res); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1)); + } + else if (op.Size == 3 && op.Opc == 0) // Half -> Single. + { + Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1); + + Delegate dlg = new _F32_U16(SoftFloat16_32.FPConvert); + + Operand res = context.Call(dlg, ne); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + else if (op.Size == 1 && op.Opc == 3) // Double -> Half. + { + throw new NotImplementedException("Double-precision to half-precision."); + } + else if (op.Size == 3 && op.Opc == 1) // Double -> Half. + { + throw new NotImplementedException("Half-precision to double-precision."); + } + else // Invalid encoding. + { + Debug.Assert(false, $"type == {op.Size} && opc == {op.Opc}"); + } + } + + public static void Fcvtas_Gp(ArmEmitterContext context) + { + EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1)); + } + + public static void Fcvtau_Gp(ArmEmitterContext context) + { + EmitFcvt_u_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1)); + } + + public static void Fcvtl_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.UseSse2 && sizeF == 1) + { + Operand n = GetVec(op.Rn); + Operand res; + + if (op.RegisterSize == RegisterSize.Simd128) + { + res = context.AddIntrinsic(Intrinsic.X86Movhlps, n, n); + } + else + { + res = n; + } + + res = context.AddIntrinsic(Intrinsic.X86Cvtps2pd, res); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.VectorZero(); + + int elems = 4 >> sizeF; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + if (sizeF == 0) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, part + index, 1); + + Delegate dlg = new _F32_U16(SoftFloat16_32.FPConvert); + + Operand e = context.Call(dlg, ne); + + res = context.VectorInsert(res, e, index); + } + else /* if (sizeF == 1) */ + { + Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), part + index); + + Operand e = context.ConvertToFP(OperandType.FP64, ne); + + res = context.VectorInsert(res, e, index); + } + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Fcvtms_Gp(ArmEmitterContext context) + { + EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1)); + } + + public static void Fcvtmu_Gp(ArmEmitterContext context) + { + EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1)); + } + + public static void Fcvtn_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.UseSse2 && sizeF == 1) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Operand res = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero()); + + Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtpd2ps, n); + + nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt); + + Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 + ? Intrinsic.X86Movlhps + : Intrinsic.X86Movhlps; + + res = context.AddIntrinsic(movInst, res, nInt); + + context.Copy(GetVec(op.Rd), res); + } + else + { + OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64; + + int elems = 4 >> sizeF; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd)); + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); + + if (sizeF == 0) + { + Delegate dlg = new _U16_F32(SoftFloat32_16.FPConvert); + + Operand e = context.Call(dlg, ne); + + e = context.ZeroExtend16(OperandType.I64, e); + + res = EmitVectorInsert(context, res, e, part + index, 1); + } + else /* if (sizeF == 1) */ + { + Operand e = context.ConvertToFP(OperandType.FP32, ne); + + res = context.VectorInsert(res, e, part + index); + } + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Fcvtns_S(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Fcvts(context, FPRoundingMode.ToNearest, scalar: true); + } + else + { + EmitFcvtn(context, signed: true, scalar: true); + } + } + + public static void Fcvtns_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Fcvts(context, FPRoundingMode.ToNearest, scalar: false); + } + else + { + EmitFcvtn(context, signed: true, scalar: false); + } + } + + public static void Fcvtnu_S(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Fcvtu(context, FPRoundingMode.ToNearest, scalar: true); + } + else + { + EmitFcvtn(context, signed: false, scalar: true); + } + } + + public static void Fcvtnu_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Fcvtu(context, FPRoundingMode.ToNearest, scalar: false); + } + else + { + EmitFcvtn(context, signed: false, scalar: false); + } + } + + public static void Fcvtps_Gp(ArmEmitterContext context) + { + EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1)); + } + + public static void Fcvtpu_Gp(ArmEmitterContext context) + { + EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1)); + } + + public static void Fcvtzs_Gp(ArmEmitterContext context) + { + EmitFcvt_s_Gp(context, (op1) => op1); + } + + public static void Fcvtzs_Gp_Fixed(ArmEmitterContext context) + { + EmitFcvtzs_Gp_Fixed(context); + } + + public static void Fcvtzs_S(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Fcvts(context, FPRoundingMode.TowardsZero, scalar: true); + } + else + { + EmitFcvtz(context, signed: true, scalar: true); + } + } + + public static void Fcvtzs_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Fcvts(context, FPRoundingMode.TowardsZero, scalar: false); + } + else + { + EmitFcvtz(context, signed: true, scalar: false); + } + } + + public static void Fcvtzs_V_Fixed(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Fcvts(context, FPRoundingMode.TowardsZero, scalar: false); + } + else + { + EmitFcvtz(context, signed: true, scalar: false); + } + } + + public static void Fcvtzu_Gp(ArmEmitterContext context) + { + EmitFcvt_u_Gp(context, (op1) => op1); + } + + public static void Fcvtzu_Gp_Fixed(ArmEmitterContext context) + { + EmitFcvtzu_Gp_Fixed(context); + } + + public static void Fcvtzu_S(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Fcvtu(context, FPRoundingMode.TowardsZero, scalar: true); + } + else + { + EmitFcvtz(context, signed: false, scalar: true); + } + } + + public static void Fcvtzu_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Fcvtu(context, FPRoundingMode.TowardsZero, scalar: false); + } + else + { + EmitFcvtz(context, signed: false, scalar: false); + } + } + + public static void Fcvtzu_V_Fixed(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Fcvtu(context, FPRoundingMode.TowardsZero, scalar: false); + } + else + { + EmitFcvtz(context, signed: false, scalar: false); + } + } + + public static void Scvtf_Gp(ArmEmitterContext context) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rn); + + if (op.RegisterSize == RegisterSize.Int32) + { + res = context.SignExtend32(OperandType.I64, res); + } + + res = EmitFPConvert(context, res, op.Size, signed: true); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + + public static void Scvtf_Gp_Fixed(ArmEmitterContext context) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rn); + + if (op.RegisterSize == RegisterSize.Int32) + { + res = context.SignExtend32(OperandType.I64, res); + } + + res = EmitFPConvert(context, res, op.Size, signed: true); + + res = EmitI2fFBitsMul(context, res, op.FBits); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + + public static void Scvtf_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.UseSse2 && sizeF == 0) + { + EmitSse2Scvtf(context, scalar: true); + } + else + { + Operand res = EmitVectorLongExtract(context, op.Rn, 0, sizeF + 2); + + res = EmitFPConvert(context, res, op.Size, signed: true); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + } + + public static void Scvtf_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.UseSse2 && sizeF == 0) + { + EmitSse2Scvtf(context, scalar: false); + } + else + { + EmitVectorCvtf(context, signed: true); + } + } + + public static void Scvtf_V_Fixed(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + // sizeF == ((OpCodeSimdShImm64)op).Size - 2 + int sizeF = op.Size & 1; + + if (Optimizations.UseSse2 && sizeF == 0) + { + EmitSse2Scvtf(context, scalar: false); + } + else + { + EmitVectorCvtf(context, signed: true); + } + } + + public static void Ucvtf_Gp(ArmEmitterContext context) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rn); + + res = EmitFPConvert(context, res, op.Size, signed: false); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + + public static void Ucvtf_Gp_Fixed(ArmEmitterContext context) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rn); + + res = EmitFPConvert(context, res, op.Size, signed: false); + + res = EmitI2fFBitsMul(context, res, op.FBits); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + + public static void Ucvtf_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.UseSse2 && sizeF == 0) + { + EmitSse2Ucvtf(context, scalar: true); + } + else + { + Operand ne = EmitVectorLongExtract(context, op.Rn, 0, sizeF + 2); + + Operand res = EmitFPConvert(context, ne, sizeF, signed: false); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + } + + public static void Ucvtf_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.UseSse2 && sizeF == 0) + { + EmitSse2Ucvtf(context, scalar: false); + } + else + { + EmitVectorCvtf(context, signed: false); + } + } + + public static void Ucvtf_V_Fixed(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + // sizeF == ((OpCodeSimdShImm)op).Size - 2 + int sizeF = op.Size & 1; + + if (Optimizations.UseSse2 && sizeF == 0) + { + EmitSse2Ucvtf(context, scalar: false); + } + else + { + EmitVectorCvtf(context, signed: false); + } + } + + private static void EmitFcvtn(ArmEmitterContext context, bool signed, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand n = GetVec(op.Rn); + + int sizeF = op.Size & 1; + int sizeI = sizeF + 2; + + OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64; + + int elems = !scalar ? op.GetBytesCount() >> sizeI : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, n, index); + + Operand e = EmitRoundMathCall(context, MidpointRounding.ToEven, ne); + + if (sizeF == 0) + { + Delegate dlg = signed + ? (Delegate)new _S32_F32(SoftFallback.SatF32ToS32) + : (Delegate)new _U32_F32(SoftFallback.SatF32ToU32); + + e = context.Call(dlg, e); + + e = context.ZeroExtend32(OperandType.I64, e); + } + else /* if (sizeF == 1) */ + { + Delegate dlg = signed + ? (Delegate)new _S64_F64(SoftFallback.SatF64ToS64) + : (Delegate)new _U64_F64(SoftFallback.SatF64ToU64); + + e = context.Call(dlg, e); + } + + res = EmitVectorInsert(context, res, e, index, sizeI); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitFcvtz(ArmEmitterContext context, bool signed, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand n = GetVec(op.Rn); + + int sizeF = op.Size & 1; + int sizeI = sizeF + 2; + + OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64; + + int fBits = GetFBits(context); + + int elems = !scalar ? op.GetBytesCount() >> sizeI : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, n, index); + + Operand e = EmitF2iFBitsMul(context, ne, fBits); + + if (sizeF == 0) + { + Delegate dlg = signed + ? (Delegate)new _S32_F32(SoftFallback.SatF32ToS32) + : (Delegate)new _U32_F32(SoftFallback.SatF32ToU32); + + e = context.Call(dlg, e); + + e = context.ZeroExtend32(OperandType.I64, e); + } + else /* if (sizeF == 1) */ + { + Delegate dlg = signed + ? (Delegate)new _S64_F64(SoftFallback.SatF64ToS64) + : (Delegate)new _U64_F64(SoftFallback.SatF64ToU64); + + e = context.Call(dlg, e); + } + + res = EmitVectorInsert(context, res, e, index, sizeI); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitFcvt_s_Gp(ArmEmitterContext context, Func1I emit) + { + EmitFcvt___Gp(context, emit, signed: true); + } + + private static void EmitFcvt_u_Gp(ArmEmitterContext context, Func1I emit) + { + EmitFcvt___Gp(context, emit, signed: false); + } + + private static void EmitFcvt___Gp(ArmEmitterContext context, Func1I emit, bool signed) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64; + + Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); + + Operand res = signed + ? EmitScalarFcvts(context, emit(ne), 0) + : EmitScalarFcvtu(context, emit(ne), 0); + + SetIntOrZR(context, op.Rd, res); + } + + private static void EmitFcvtzs_Gp_Fixed(ArmEmitterContext context) + { + EmitFcvtz__Gp_Fixed(context, signed: true); + } + + private static void EmitFcvtzu_Gp_Fixed(ArmEmitterContext context) + { + EmitFcvtz__Gp_Fixed(context, signed: false); + } + + private static void EmitFcvtz__Gp_Fixed(ArmEmitterContext context, bool signed) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64; + + Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); + + Operand res = signed + ? EmitScalarFcvts(context, ne, op.FBits) + : EmitScalarFcvtu(context, ne, op.FBits); + + SetIntOrZR(context, op.Rd, res); + } + + private static void EmitVectorCvtf(ArmEmitterContext context, bool signed) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + int sizeI = sizeF + 2; + + int fBits = GetFBits(context); + + int elems = op.GetBytesCount() >> sizeI; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorLongExtract(context, op.Rn, index, sizeI); + + Operand e = EmitFPConvert(context, ne, sizeF, signed); + + e = EmitI2fFBitsMul(context, e, fBits); + + res = context.VectorInsert(res, e, index); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static int GetFBits(ArmEmitterContext context) + { + if (context.CurrOp is OpCodeSimdShImm op) + { + return GetImmShr(op); + } + + return 0; + } + + private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, int size, bool signed) + { + Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64); + Debug.Assert((uint)size < 2); + + OperandType type = size == 0 ? OperandType.FP32 + : OperandType.FP64; + + if (signed) + { + return context.ConvertToFP(type, value); + } + else + { + return context.ConvertToFPUI(type, value); + } + } + + private static Operand EmitScalarFcvts(ArmEmitterContext context, Operand value, int fBits) + { + Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64); + + value = EmitF2iFBitsMul(context, value, fBits); + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + Delegate dlg = value.Type == OperandType.FP32 + ? (Delegate)new _S32_F32(SoftFallback.SatF32ToS32) + : (Delegate)new _S32_F64(SoftFallback.SatF64ToS32); + + return context.Call(dlg, value); + } + else + { + Delegate dlg = value.Type == OperandType.FP32 + ? (Delegate)new _S64_F32(SoftFallback.SatF32ToS64) + : (Delegate)new _S64_F64(SoftFallback.SatF64ToS64); + + return context.Call(dlg, value); + } + } + + private static Operand EmitScalarFcvtu(ArmEmitterContext context, Operand value, int fBits) + { + Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64); + + value = EmitF2iFBitsMul(context, value, fBits); + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + Delegate dlg = value.Type == OperandType.FP32 + ? (Delegate)new _U32_F32(SoftFallback.SatF32ToU32) + : (Delegate)new _U32_F64(SoftFallback.SatF64ToU32); + + return context.Call(dlg, value); + } + else + { + Delegate dlg = value.Type == OperandType.FP32 + ? (Delegate)new _U64_F32(SoftFallback.SatF32ToU64) + : (Delegate)new _U64_F64(SoftFallback.SatF64ToU64); + + return context.Call(dlg, value); + } + } + + private static Operand EmitF2iFBitsMul(ArmEmitterContext context, Operand value, int fBits) + { + Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64); + + if (fBits == 0) + { + return value; + } + + if (value.Type == OperandType.FP32) + { + return context.Multiply(value, ConstF(MathF.Pow(2f, fBits))); + } + else /* if (value.Type == OperandType.FP64) */ + { + return context.Multiply(value, ConstF(Math.Pow(2d, fBits))); + } + } + + private static Operand EmitI2fFBitsMul(ArmEmitterContext context, Operand value, int fBits) + { + Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64); + + if (fBits == 0) + { + return value; + } + + if (value.Type == OperandType.FP32) + { + return context.Multiply(value, ConstF(1f / MathF.Pow(2f, fBits))); + } + else /* if (value.Type == OperandType.FP64) */ + { + return context.Multiply(value, ConstF(1d / Math.Pow(2d, fBits))); + } + } + + private static void EmitSse41Fcvts(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + const int cmpGreaterThanOrEqual = 5; + const int cmpOrdered = 7; + + // sizeF == ((OpCodeSimdShImm64)op).Size - 2 + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const(cmpOrdered)); + + Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n); + + if (op is OpCodeSimdShImm fixedOp) + { + int fBits = GetImmShr(fixedOp); + + // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits) + int fpScaled = 0x3F800000 + fBits * 0x800000; + + Operand scale = X86GetAllElements(context, fpScaled); + + nScaled = context.AddIntrinsic(Intrinsic.X86Mulps, nScaled, scale); + } + + Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundps, nScaled, Const(X86GetRoundControl(roundMode))); + + Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRnd); + + Operand mask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648) + + Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, nRnd, mask, Const(cmpGreaterThanOrEqual)); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, mask2); + + if (scalar) + { + res = context.VectorZeroUpper96(res); + } + else if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const(cmpOrdered)); + + Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n); + + if (op is OpCodeSimdShImm fixedOp) + { + int fBits = GetImmShr(fixedOp); + + // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits) + long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L; + + Operand scale = X86GetAllElements(context, fpScaled); + + nScaled = context.AddIntrinsic(Intrinsic.X86Mulpd, nScaled, scale); + } + + Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundpd, nScaled, Const(X86GetRoundControl(roundMode))); + + Operand high; + + if (!scalar) + { + high = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nRnd, nRnd); + high = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, high); + } + else + { + high = Const(0L); + } + + Operand low = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRnd); + + Operand nInt = EmitVectorLongCreate(context, low, high); + + Operand mask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808) + + Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, nRnd, mask, Const(cmpGreaterThanOrEqual)); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, mask2); + + if (scalar) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + private static void EmitSse41Fcvtu(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + const int cmpGreaterThanOrEqual = 5; + const int cmpGreaterThan = 6; + const int cmpOrdered = 7; + + // sizeF == ((OpCodeSimdShImm)op).Size - 2 + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const(cmpOrdered)); + + Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n); + + if (op is OpCodeSimdShImm fixedOp) + { + int fBits = GetImmShr(fixedOp); + + // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits) + int fpScaled = 0x3F800000 + fBits * 0x800000; + + Operand scale = X86GetAllElements(context, fpScaled); + + nScaled = context.AddIntrinsic(Intrinsic.X86Mulps, nScaled, scale); + } + + Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundps, nScaled, Const(X86GetRoundControl(roundMode))); + + Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmpps, nRnd, context.VectorZero(), Const(cmpGreaterThan)); + + Operand nRndMasked = context.AddIntrinsic(Intrinsic.X86Pand, nRnd, nRndMask); + + Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRndMasked); + + Operand mask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648) + + Operand res = context.AddIntrinsic(Intrinsic.X86Subps, nRndMasked, mask); + + Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, res, context.VectorZero(), Const(cmpGreaterThan)); + + Operand resMasked = context.AddIntrinsic(Intrinsic.X86Pand, res, mask2); + + res = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, resMasked); + + Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmpps, resMasked, mask, Const(cmpGreaterThanOrEqual)); + + res = context.AddIntrinsic(Intrinsic.X86Pxor, res, mask3); + res = context.AddIntrinsic(Intrinsic.X86Paddd, res, nInt); + + if (scalar) + { + res = context.VectorZeroUpper96(res); + } + else if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const(cmpOrdered)); + + Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n); + + if (op is OpCodeSimdShImm fixedOp) + { + int fBits = GetImmShr(fixedOp); + + // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits) + long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L; + + Operand scale = X86GetAllElements(context, fpScaled); + + nScaled = context.AddIntrinsic(Intrinsic.X86Mulpd, nScaled, scale); + } + + Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundpd, nScaled, Const(X86GetRoundControl(roundMode))); + + Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmppd, nRnd, context.VectorZero(), Const(cmpGreaterThan)); + + Operand nRndMasked = context.AddIntrinsic(Intrinsic.X86Pand, nRnd, nRndMask); + + Operand high; + + if (!scalar) + { + high = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nRndMasked, nRndMasked); + high = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, high); + } + else + { + high = Const(0L); + } + + Operand low = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRndMasked); + + Operand nInt = EmitVectorLongCreate(context, low, high); + + Operand mask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808) + + Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, nRndMasked, mask); + + Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, res, context.VectorZero(), Const(cmpGreaterThan)); + + Operand resMasked = context.AddIntrinsic(Intrinsic.X86Pand, res, mask2); + + if (!scalar) + { + high = context.AddIntrinsic(Intrinsic.X86Unpckhpd, resMasked, resMasked); + high = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, high); + } + + low = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, resMasked); + + res = EmitVectorLongCreate(context, low, high); + + Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmppd, resMasked, mask, Const(cmpGreaterThanOrEqual)); + + res = context.AddIntrinsic(Intrinsic.X86Pxor, res, mask3); + res = context.AddIntrinsic(Intrinsic.X86Paddq, res, nInt); + + if (scalar) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + private static void EmitSse2Scvtf(ArmEmitterContext context, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n); + + if (op is OpCodeSimdShImm fixedOp) + { + int fBits = GetImmShr(fixedOp); + + // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits) + int fpScaled = 0x3F800000 - fBits * 0x800000; + + Operand scale = X86GetAllElements(context, fpScaled); + + res = context.AddIntrinsic(Intrinsic.X86Mulps, res, scale); + } + + if (scalar) + { + res = context.VectorZeroUpper96(res); + } + else if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitSse2Ucvtf(ArmEmitterContext context, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16)); + + res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res); + + Operand mask = X86GetAllElements(context, 0x47800000); // 65536.0f (1 << 16) + + res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask); + + Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16)); + + res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16)); + res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2); + + res = context.AddIntrinsic(Intrinsic.X86Addps, res, res2); + + if (op is OpCodeSimdShImm fixedOp) + { + int fBits = GetImmShr(fixedOp); + + // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits) + int fpScaled = 0x3F800000 - fBits * 0x800000; + + Operand scale = X86GetAllElements(context, fpScaled); + + res = context.AddIntrinsic(Intrinsic.X86Mulps, res, scale); + } + + if (scalar) + { + res = context.VectorZeroUpper96(res); + } + else if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static Operand EmitVectorLongExtract(ArmEmitterContext context, int reg, int index, int size) + { + OperandType type = size == 3 ? OperandType.I64 : OperandType.I32; + + return context.VectorExtract(type, GetVec(reg), index); + } + + private static Operand EmitVectorLongCreate(ArmEmitterContext context, Operand low, Operand high) + { + Operand vector = context.VectorCreateScalar(low); + + vector = context.VectorInsert(vector, high, 1); + + return vector; + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdHash.cs b/ARMeilleure/Instructions/InstEmitSimdHash.cs new file mode 100644 index 00000000..4ed96061 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdHash.cs @@ -0,0 +1,147 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { +#region "Sha1" + public static void Sha1c_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + + Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0); + + Operand m = GetVec(op.Rm); + + Operand res = context.Call(new _V128_V128_U32_V128(SoftFallback.HashChoose), d, ne, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha1h_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0); + + Operand res = context.Call(new _U32_U32(SoftFallback.FixedRotate), ne); + + context.Copy(GetVec(op.Rd), context.VectorCreateScalar(res)); + } + + public static void Sha1m_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + + Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0); + + Operand m = GetVec(op.Rm); + + Operand res = context.Call(new _V128_V128_U32_V128(SoftFallback.HashMajority), d, ne, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha1p_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + + Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0); + + Operand m = GetVec(op.Rm); + + Operand res = context.Call(new _V128_V128_U32_V128(SoftFallback.HashParity), d, ne, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha1su0_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.Call(new _V128_V128_V128_V128(SoftFallback.Sha1SchedulePart1), d, n, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha1su1_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Operand res = context.Call(new _V128_V128_V128(SoftFallback.Sha1SchedulePart2), d, n); + + context.Copy(GetVec(op.Rd), res); + } +#endregion + +#region "Sha256" + public static void Sha256h_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.Call(new _V128_V128_V128_V128(SoftFallback.HashLower), d, n, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha256h2_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.Call(new _V128_V128_V128_V128(SoftFallback.HashUpper), d, n, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha256su0_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Operand res = context.Call(new _V128_V128_V128(SoftFallback.Sha256SchedulePart1), d, n); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha256su1_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.Call(new _V128_V128_V128_V128(SoftFallback.Sha256SchedulePart2), d, n, m); + + context.Copy(GetVec(op.Rd), res); + } +#endregion + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs new file mode 100644 index 00000000..a3da80fb --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs @@ -0,0 +1,1477 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + using Func1I = Func<Operand, Operand>; + using Func2I = Func<Operand, Operand, Operand>; + using Func3I = Func<Operand, Operand, Operand, Operand>; + + static class InstEmitSimdHelper + { +#region "X86 SSE Intrinsics" + public static readonly Intrinsic[] X86PaddInstruction = new Intrinsic[] + { + Intrinsic.X86Paddb, + Intrinsic.X86Paddw, + Intrinsic.X86Paddd, + Intrinsic.X86Paddq + }; + + public static readonly Intrinsic[] X86PcmpeqInstruction = new Intrinsic[] + { + Intrinsic.X86Pcmpeqb, + Intrinsic.X86Pcmpeqw, + Intrinsic.X86Pcmpeqd, + Intrinsic.X86Pcmpeqq + }; + + public static readonly Intrinsic[] X86PcmpgtInstruction = new Intrinsic[] + { + Intrinsic.X86Pcmpgtb, + Intrinsic.X86Pcmpgtw, + Intrinsic.X86Pcmpgtd, + Intrinsic.X86Pcmpgtq + }; + + public static readonly Intrinsic[] X86PmaxsInstruction = new Intrinsic[] + { + Intrinsic.X86Pmaxsb, + Intrinsic.X86Pmaxsw, + Intrinsic.X86Pmaxsd + }; + + public static readonly Intrinsic[] X86PmaxuInstruction = new Intrinsic[] + { + Intrinsic.X86Pmaxub, + Intrinsic.X86Pmaxuw, + Intrinsic.X86Pmaxud + }; + + public static readonly Intrinsic[] X86PminsInstruction = new Intrinsic[] + { + Intrinsic.X86Pminsb, + Intrinsic.X86Pminsw, + Intrinsic.X86Pminsd + }; + + public static readonly Intrinsic[] X86PminuInstruction = new Intrinsic[] + { + Intrinsic.X86Pminub, + Intrinsic.X86Pminuw, + Intrinsic.X86Pminud + }; + + public static readonly Intrinsic[] X86PmovsxInstruction = new Intrinsic[] + { + Intrinsic.X86Pmovsxbw, + Intrinsic.X86Pmovsxwd, + Intrinsic.X86Pmovsxdq + }; + + public static readonly Intrinsic[] X86PmovzxInstruction = new Intrinsic[] + { + Intrinsic.X86Pmovzxbw, + Intrinsic.X86Pmovzxwd, + Intrinsic.X86Pmovzxdq + }; + + public static readonly Intrinsic[] X86PsllInstruction = new Intrinsic[] + { + 0, + Intrinsic.X86Psllw, + Intrinsic.X86Pslld, + Intrinsic.X86Psllq + }; + + public static readonly Intrinsic[] X86PsraInstruction = new Intrinsic[] + { + 0, + Intrinsic.X86Psraw, + Intrinsic.X86Psrad + }; + + public static readonly Intrinsic[] X86PsrlInstruction = new Intrinsic[] + { + 0, + Intrinsic.X86Psrlw, + Intrinsic.X86Psrld, + Intrinsic.X86Psrlq + }; + + public static readonly Intrinsic[] X86PsubInstruction = new Intrinsic[] + { + Intrinsic.X86Psubb, + Intrinsic.X86Psubw, + Intrinsic.X86Psubd, + Intrinsic.X86Psubq + }; + + public static readonly Intrinsic[] X86PunpckhInstruction = new Intrinsic[] + { + Intrinsic.X86Punpckhbw, + Intrinsic.X86Punpckhwd, + Intrinsic.X86Punpckhdq, + Intrinsic.X86Punpckhqdq + }; + + public static readonly Intrinsic[] X86PunpcklInstruction = new Intrinsic[] + { + Intrinsic.X86Punpcklbw, + Intrinsic.X86Punpcklwd, + Intrinsic.X86Punpckldq, + Intrinsic.X86Punpcklqdq + }; +#endregion + + public static int GetImmShl(OpCodeSimdShImm op) + { + return op.Imm - (8 << op.Size); + } + + public static int GetImmShr(OpCodeSimdShImm op) + { + return (8 << (op.Size + 1)) - op.Imm; + } + + public static Operand X86GetScalar(ArmEmitterContext context, float value) + { + return X86GetScalar(context, BitConverter.SingleToInt32Bits(value)); + } + + public static Operand X86GetScalar(ArmEmitterContext context, double value) + { + return X86GetScalar(context, BitConverter.DoubleToInt64Bits(value)); + } + + public static Operand X86GetScalar(ArmEmitterContext context, int value) + { + return context.VectorCreateScalar(Const(value)); + } + + public static Operand X86GetScalar(ArmEmitterContext context, long value) + { + return context.VectorCreateScalar(Const(value)); + } + + public static Operand X86GetAllElements(ArmEmitterContext context, float value) + { + return X86GetAllElements(context, BitConverter.SingleToInt32Bits(value)); + } + + public static Operand X86GetAllElements(ArmEmitterContext context, double value) + { + return X86GetAllElements(context, BitConverter.DoubleToInt64Bits(value)); + } + + public static Operand X86GetAllElements(ArmEmitterContext context, int value) + { + Operand vector = context.VectorCreateScalar(Const(value)); + + vector = context.AddIntrinsic(Intrinsic.X86Shufps, vector, vector, Const(0)); + + return vector; + } + + public static Operand X86GetAllElements(ArmEmitterContext context, long value) + { + Operand vector = context.VectorCreateScalar(Const(value)); + + vector = context.AddIntrinsic(Intrinsic.X86Movlhps, vector, vector); + + return vector; + } + + public static int X86GetRoundControl(FPRoundingMode roundMode) + { + switch (roundMode) + { + case FPRoundingMode.ToNearest: return 8 | 0; + case FPRoundingMode.TowardsPlusInfinity: return 8 | 2; + case FPRoundingMode.TowardsMinusInfinity: return 8 | 1; + case FPRoundingMode.TowardsZero: return 8 | 3; + } + + throw new ArgumentException($"Invalid rounding mode \"{roundMode}\"."); + } + + public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + + Operand res = context.AddIntrinsic(inst, n); + + if ((op.Size & 1) != 0) + { + res = context.VectorZeroUpper64(res); + } + else + { + res = context.VectorZeroUpper96(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + + Operand res = context.AddIntrinsic(inst, n, m); + + if ((op.Size & 1) != 0) + { + res = context.VectorZeroUpper64(res); + } + else + { + res = context.VectorZeroUpper96(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + + Operand res = context.AddIntrinsic(inst, n); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + + Operand res = context.AddIntrinsic(inst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static Operand EmitUnaryMathCall(ArmEmitterContext context, _F32_F32 f32, _F64_F64 f64, Operand n) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + return (op.Size & 1) == 0 ? context.Call(f32, n) : context.Call(f64, n); + } + + public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + Delegate dlg; + + if ((op.Size & 1) == 0) + { + dlg = new _F32_F32_MidpointRounding(MathF.Round); + } + else /* if ((op.Size & 1) == 1) */ + { + dlg = new _F64_F64_MidpointRounding(Math.Round); + } + + return context.Call(dlg, n, Const((int)roundMode)); + } + + public static Operand EmitSoftFloatCall( + ArmEmitterContext context, + _F32_F32 f32, + _F64_F64 f64, + params Operand[] callArgs) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64; + + return context.Call(dlg, callArgs); + } + + public static Operand EmitSoftFloatCall( + ArmEmitterContext context, + _F32_F32_F32 f32, + _F64_F64_F64 f64, + params Operand[] callArgs) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64; + + return context.Call(dlg, callArgs); + } + + public static Operand EmitSoftFloatCall( + ArmEmitterContext context, + _F32_F32_F32_F32 f32, + _F64_F64_F64_F64 f64, + params Operand[] callArgs) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64; + + return context.Call(dlg, callArgs); + } + + public static void EmitScalarBinaryOpByElemF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0)); + } + + public static void EmitScalarTernaryOpByElemF(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand d = context.VectorExtract(type, GetVec(op.Rd), 0); + Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(d, n, m), 0)); + } + + public static void EmitScalarUnaryOpSx(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size); + + Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarBinaryOpSx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size); + Operand m = EmitVectorExtractSx(context, op.Rm, 0, op.Size); + + Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarUnaryOpZx(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size); + + Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarBinaryOpZx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size); + Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size); + + Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarTernaryOpZx(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = EmitVectorExtractZx(context, op.Rd, 0, op.Size); + Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size); + Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size); + + d = EmitVectorInsert(context, context.VectorZero(), emit(d, n, m), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarUnaryOpF(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n), 0)); + } + + public static void EmitScalarBinaryOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand m = context.VectorExtract(type, GetVec(op.Rm), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0)); + } + + public static void EmitScalarTernaryRaOpF(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand a = context.VectorExtract(type, GetVec(op.Ra), 0); + Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand m = context.VectorExtract(type, GetVec(op.Rm), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(a, n, m), 0)); + } + + public static void EmitVectorUnaryOpF(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + + res = context.VectorInsert(res, emit(ne), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + Operand me = context.VectorExtract(type, GetVec(op.Rm), index); + + res = context.VectorInsert(res, emit(ne, me), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpF(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand de = context.VectorExtract(type, GetVec(op.Rd), index); + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + Operand me = context.VectorExtract(type, GetVec(op.Rm), index); + + res = context.VectorInsert(res, emit(de, ne, me), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpByElemF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index); + + res = context.VectorInsert(res, emit(ne, me), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpByElemF(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand de = context.VectorExtract(type, GetVec(op.Rd), index); + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index); + + res = context.VectorInsert(res, emit(de, ne, me), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorUnaryOpSx(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpSx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpSx(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtractSx(context, op.Rd, index, op.Size); + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorUnaryOpZx(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpZx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpZx(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpByElemSx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtractSx(context, op.Rm, op.Index, op.Size); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpByElemZx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpByElemZx(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorImmUnaryOp(ArmEmitterContext context, Func1I emit) + { + OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; + + Operand imm = Const(op.Immediate); + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + res = EmitVectorInsert(context, res, emit(imm), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorImmBinaryOp(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; + + Operand imm = Const(op.Immediate); + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + + res = EmitVectorInsert(context, res, emit(de, imm), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenRmBinaryOpSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenRmBinaryOp(context, emit, signed: true); + } + + public static void EmitVectorWidenRmBinaryOpZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenRmBinaryOp(context, emit, signed: false); + } + + private static void EmitVectorWidenRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signed); + Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenRnRmBinaryOpSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenRnRmBinaryOp(context, emit, signed: true); + } + + public static void EmitVectorWidenRnRmBinaryOpZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenRnRmBinaryOp(context, emit, signed: false); + } + + private static void EmitVectorWidenRnRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenRnRmTernaryOpSx(ArmEmitterContext context, Func3I emit) + { + EmitVectorWidenRnRmTernaryOp(context, emit, signed: true); + } + + public static void EmitVectorWidenRnRmTernaryOpZx(ArmEmitterContext context, Func3I emit) + { + EmitVectorWidenRnRmTernaryOp(context, emit, signed: false); + } + + private static void EmitVectorWidenRnRmTernaryOp(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed); + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenBinaryOpByElemSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenBinaryOpByElem(context, emit, signed: true); + } + + public static void EmitVectorWidenBinaryOpByElemZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenBinaryOpByElem(context, emit, signed: false); + } + + private static void EmitVectorWidenBinaryOpByElem(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);; + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenTernaryOpByElemSx(ArmEmitterContext context, Func3I emit) + { + EmitVectorWidenTernaryOpByElem(context, emit, signed: true); + } + + public static void EmitVectorWidenTernaryOpByElemZx(ArmEmitterContext context, Func3I emit) + { + EmitVectorWidenTernaryOpByElem(context, emit, signed: false); + } + + private static void EmitVectorWidenTernaryOpByElem(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);; + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed); + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorPairwiseOpSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorPairwiseOp(context, emit, signed: true); + } + + public static void EmitVectorPairwiseOpZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorPairwiseOp(context, emit, signed: false); + } + + private static void EmitVectorPairwiseOp(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int pairs = op.GetPairsCount() >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand n0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed); + Operand n1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed); + + Operand m0 = EmitVectorExtract(context, op.Rm, pairIndex, op.Size, signed); + Operand m1 = EmitVectorExtract(context, op.Rm, pairIndex + 1, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(n0, n1), index, op.Size); + res = EmitVectorInsert(context, res, emit(m0, m1), pairs + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorAcrossVectorOpSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: false); + } + + public static void EmitVectorAcrossVectorOpZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: false); + } + + public static void EmitVectorLongAcrossVectorOpSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: true); + } + + public static void EmitVectorLongAcrossVectorOpZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: true); + } + + private static void EmitVectorAcrossVectorOp( + ArmEmitterContext context, + Func2I emit, + bool signed, + bool isLong) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + + Operand res = EmitVectorExtract(context, op.Rn, 0, op.Size, signed); + + for (int index = 1; index < elems; index++) + { + Operand n = EmitVectorExtract(context, op.Rn, index, op.Size, signed); + + res = emit(res, n); + } + + int size = isLong ? op.Size + 1 : op.Size; + + Operand d = EmitVectorInsert(context, context.VectorZero(), res, 0, size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int pairs = op.GetPairsCount() >> sizeF + 2; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand n0 = context.VectorExtract(type, GetVec(op.Rn), pairIndex); + Operand n1 = context.VectorExtract(type, GetVec(op.Rn), pairIndex + 1); + + Operand m0 = context.VectorExtract(type, GetVec(op.Rm), pairIndex); + Operand m1 = context.VectorExtract(type, GetVec(op.Rm), pairIndex + 1); + + res = context.VectorInsert(res, emit(n0, n1), index); + res = context.VectorInsert(res, emit(m0, m1), pairs + index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + if (op.RegisterSize == RegisterSize.Simd64) + { + Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, n, m); + + Operand zero = context.VectorZero(); + + Operand part0 = context.AddIntrinsic(Intrinsic.X86Movlhps, unpck, zero); + Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, unpck); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1)); + } + else /* if (op.RegisterSize == RegisterSize.Simd128) */ + { + const int sm0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0; + const int sm1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0; + + Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, n, m, Const(sm0)); + Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, n, m, Const(sm1)); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1)); + } + } + else /* if (sizeF == 1) */ + { + Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, n, m); + Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, n, m); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst64, part0, part1)); + } + } + + + [Flags] + public enum SaturatingFlags + { + Scalar = 1 << 0, + Signed = 1 << 1, + + Add = 1 << 2, + Sub = 1 << 3, + + Accumulate = 1 << 4, + + ScalarSx = Scalar | Signed, + ScalarZx = Scalar, + + VectorSx = Signed, + VectorZx = 0 + } + + public static void EmitScalarSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit) + { + EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.ScalarSx); + } + + public static void EmitVectorSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit) + { + EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.VectorSx); + } + + private static void EmitSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit, SaturatingFlags flags) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + bool scalar = (flags & SaturatingFlags.Scalar) != 0; + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand de; + + if (op.Size <= 2) + { + de = EmitSatQ(context, emit(ne), op.Size, signedSrc: true, signedDst: true); + } + else /* if (op.Size == 3) */ + { + de = EmitUnarySignedSatQAbsOrNeg(context, emit(ne)); + } + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitScalarSaturatingBinaryOpSx(ArmEmitterContext context, SaturatingFlags flags) + { + EmitSaturatingBinaryOp(context, null, SaturatingFlags.ScalarSx | flags); + } + + public static void EmitScalarSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags) + { + EmitSaturatingBinaryOp(context, null, SaturatingFlags.ScalarZx | flags); + } + + public static void EmitVectorSaturatingBinaryOpSx(ArmEmitterContext context, SaturatingFlags flags) + { + EmitSaturatingBinaryOp(context, null, SaturatingFlags.VectorSx | flags); + } + + public static void EmitVectorSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags) + { + EmitSaturatingBinaryOp(context, null, SaturatingFlags.VectorZx | flags); + } + + public static void EmitSaturatingBinaryOp(ArmEmitterContext context, Func2I emit, SaturatingFlags flags) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + bool scalar = (flags & SaturatingFlags.Scalar) != 0; + bool signed = (flags & SaturatingFlags.Signed) != 0; + + bool add = (flags & SaturatingFlags.Add) != 0; + bool sub = (flags & SaturatingFlags.Sub) != 0; + + bool accumulate = (flags & SaturatingFlags.Accumulate) != 0; + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + if (add || sub) + { + OpCodeSimdReg opReg = (OpCodeSimdReg)op; + + for (int index = 0; index < elems; index++) + { + Operand de; + Operand ne = EmitVectorExtract(context, opReg.Rn, index, op.Size, signed); + Operand me = EmitVectorExtract(context, opReg.Rm, index, op.Size, signed); + + if (op.Size <= 2) + { + Operand temp = add ? context.Add (ne, me) + : context.Subtract(ne, me); + + de = EmitSatQ(context, temp, op.Size, signedSrc: true, signedDst: signed); + } + else if (add) /* if (op.Size == 3) */ + { + de = EmitBinarySatQAdd(context, ne, me, signed); + } + else /* if (sub) */ + { + de = EmitBinarySatQSub(context, ne, me, signed); + } + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + } + else if (accumulate) + { + for (int index = 0; index < elems; index++) + { + Operand de; + Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, !signed); + Operand me = EmitVectorExtract(context, op.Rd, index, op.Size, signed); + + if (op.Size <= 2) + { + Operand temp = context.Add(ne, me); + + de = EmitSatQ(context, temp, op.Size, signedSrc: true, signedDst: signed); + } + else /* if (op.Size == 3) */ + { + de = EmitBinarySatQAccumulate(context, ne, me, signed); + } + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + } + else + { + OpCodeSimdReg opReg = (OpCodeSimdReg)op; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, opReg.Rn, index, op.Size, signed); + Operand me = EmitVectorExtract(context, opReg.Rm, index, op.Size, signed); + + Operand de = EmitSatQ(context, emit(ne, me), op.Size, true, signed); + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + } + + context.Copy(GetVec(op.Rd), res); + } + + [Flags] + public enum SaturatingNarrowFlags + { + Scalar = 1 << 0, + SignedSrc = 1 << 1, + SignedDst = 1 << 2, + + ScalarSxSx = Scalar | SignedSrc | SignedDst, + ScalarSxZx = Scalar | SignedSrc, + ScalarZxZx = Scalar, + + VectorSxSx = SignedSrc | SignedDst, + VectorSxZx = SignedSrc, + VectorZxZx = 0 + } + + public static void EmitSaturatingNarrowOp(ArmEmitterContext context, SaturatingNarrowFlags flags) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + bool scalar = (flags & SaturatingNarrowFlags.Scalar) != 0; + bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0; + bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0; + + int elems = !scalar ? 8 >> op.Size : 1; + + int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0; + + Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd)); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc); + + Operand temp = EmitSatQ(context, ne, op.Size, signedSrc, signedDst); + + res = EmitVectorInsert(context, res, temp, part + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + // TSrc (16bit, 32bit, 64bit; signed, unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned). + public static Operand EmitSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedSrc, bool signedDst) + { + if ((uint)sizeDst > 2u) + { + throw new ArgumentOutOfRangeException(nameof(sizeDst)); + } + + Delegate dlg; + + if (signedSrc) + { + dlg = signedDst + ? (Delegate)new _S64_S64_S32(SoftFallback.SignedSrcSignedDstSatQ) + : (Delegate)new _U64_S64_S32(SoftFallback.SignedSrcUnsignedDstSatQ); + } + else + { + dlg = signedDst + ? (Delegate)new _S64_U64_S32(SoftFallback.UnsignedSrcSignedDstSatQ) + : (Delegate)new _U64_U64_S32(SoftFallback.UnsignedSrcUnsignedDstSatQ); + } + + return context.Call(dlg, op, Const(sizeDst)); + } + + // TSrc (64bit) == TDst (64bit); signed. + public static Operand EmitUnarySignedSatQAbsOrNeg(ArmEmitterContext context, Operand op) + { + Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size."); + + return context.Call(new _S64_S64(SoftFallback.UnarySignedSatQAbsOrNeg), op); + } + + // TSrcs (64bit) == TDst (64bit); signed, unsigned. + public static Operand EmitBinarySatQAdd(ArmEmitterContext context, Operand op1, Operand op2, bool signed) + { + Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size."); + + Delegate dlg = signed + ? (Delegate)new _S64_S64_S64(SoftFallback.BinarySignedSatQAdd) + : (Delegate)new _U64_U64_U64(SoftFallback.BinaryUnsignedSatQAdd); + + return context.Call(dlg, op1, op2); + } + + // TSrcs (64bit) == TDst (64bit); signed, unsigned. + public static Operand EmitBinarySatQSub(ArmEmitterContext context, Operand op1, Operand op2, bool signed) + { + Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size."); + + Delegate dlg = signed + ? (Delegate)new _S64_S64_S64(SoftFallback.BinarySignedSatQSub) + : (Delegate)new _U64_U64_U64(SoftFallback.BinaryUnsignedSatQSub); + + return context.Call(dlg, op1, op2); + } + + // TSrcs (64bit) == TDst (64bit); signed, unsigned. + public static Operand EmitBinarySatQAccumulate(ArmEmitterContext context, Operand op1, Operand op2, bool signed) + { + Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size."); + + Delegate dlg = signed + ? (Delegate)new _S64_U64_S64(SoftFallback.BinarySignedSatQAcc) + : (Delegate)new _U64_S64_U64(SoftFallback.BinaryUnsignedSatQAcc); + + return context.Call(dlg, op1, op2); + } + + public static Operand EmitVectorExtractSx(ArmEmitterContext context, int reg, int index, int size) + { + return EmitVectorExtract(context, reg, index, size, true); + } + + public static Operand EmitVectorExtractZx(ArmEmitterContext context, int reg, int index, int size) + { + return EmitVectorExtract(context, reg, index, size, false); + } + + public static Operand EmitVectorExtract(ArmEmitterContext context, int reg, int index, int size, bool signed) + { + ThrowIfInvalid(index, size); + + Operand res = null; + + switch (size) + { + case 0: + res = context.VectorExtract8(GetVec(reg), index); + break; + + case 1: + res = context.VectorExtract16(GetVec(reg), index); + break; + + case 2: + res = context.VectorExtract(OperandType.I32, GetVec(reg), index); + break; + + case 3: + res = context.VectorExtract(OperandType.I64, GetVec(reg), index); + break; + } + + if (signed) + { + switch (size) + { + case 0: res = context.SignExtend8 (OperandType.I64, res); break; + case 1: res = context.SignExtend16(OperandType.I64, res); break; + case 2: res = context.SignExtend32(OperandType.I64, res); break; + } + } + else + { + switch (size) + { + case 0: res = context.ZeroExtend8 (OperandType.I64, res); break; + case 1: res = context.ZeroExtend16(OperandType.I64, res); break; + case 2: res = context.ZeroExtend32(OperandType.I64, res); break; + } + } + + return res; + } + + public static Operand EmitVectorInsert(ArmEmitterContext context, Operand vector, Operand value, int index, int size) + { + ThrowIfInvalid(index, size); + + if (size < 3) + { + value = context.ConvertI64ToI32(value); + } + + switch (size) + { + case 0: vector = context.VectorInsert8 (vector, value, index); break; + case 1: vector = context.VectorInsert16(vector, value, index); break; + case 2: vector = context.VectorInsert (vector, value, index); break; + case 3: vector = context.VectorInsert (vector, value, index); break; + } + + return vector; + } + + private static void ThrowIfInvalid(int index, int size) + { + if ((uint)size > 3u) + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + if ((uint)index >= 16u >> size) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical.cs b/ARMeilleure/Instructions/InstEmitSimdLogical.cs new file mode 100644 index 00000000..551752d2 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdLogical.cs @@ -0,0 +1,456 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void And_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseAnd(op1, op2)); + } + } + + public static void Bic_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, n); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + return context.BitwiseAnd(op1, context.BitwiseNot(op2)); + }); + } + } + + public static void Bic_Vi(ArmEmitterContext context) + { + EmitVectorImmBinaryOp(context, (op1, op2) => + { + return context.BitwiseAnd(op1, context.BitwiseNot(op2)); + }); + } + + public static void Bif_V(ArmEmitterContext context) + { + EmitBifBit(context, notRm: true); + } + + public static void Bit_V(ArmEmitterContext context) + { + EmitBifBit(context, notRm: false); + } + + private static void EmitBifBit(ArmEmitterContext context, bool notRm) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d); + + if (notRm) + { + res = context.AddIntrinsic(Intrinsic.X86Pandn, m, res); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Pand, m, res); + } + + res = context.AddIntrinsic(Intrinsic.X86Pxor, d, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.VectorZero(); + + int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1; + + for (int index = 0; index < elems; index++) + { + Operand d = EmitVectorExtractZx(context, op.Rd, index, 3); + Operand n = EmitVectorExtractZx(context, op.Rn, index, 3); + Operand m = EmitVectorExtractZx(context, op.Rm, index, 3); + + if (notRm) + { + m = context.BitwiseNot(m); + } + + Operand e = context.BitwiseExclusiveOr(d, n); + + e = context.BitwiseAnd(e, m); + e = context.BitwiseExclusiveOr(e, d); + + res = EmitVectorInsert(context, res, e, index, 3); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Bsl_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Pand, res, d); + res = context.AddIntrinsic(Intrinsic.X86Pxor, res, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorTernaryOpZx(context, (op1, op2, op3) => + { + return context.BitwiseExclusiveOr( + context.BitwiseAnd(op1, + context.BitwiseExclusiveOr(op2, op3)), op3); + }); + } + } + + public static void Eor_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseExclusiveOr(op1, op2)); + } + } + + public static void Not_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand mask = X86GetAllElements(context, -1L); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, n, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorUnaryOpZx(context, (op1) => context.BitwiseNot(op1)); + } + } + + public static void Orn_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand mask = X86GetAllElements(context, -1L); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, mask); + + res = context.AddIntrinsic(Intrinsic.X86Por, res, n); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + return context.BitwiseOr(op1, context.BitwiseNot(op2)); + }); + } + } + + public static void Orr_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Por, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseOr(op1, op2)); + } + } + + public static void Orr_Vi(ArmEmitterContext context) + { + EmitVectorImmBinaryOp(context, (op1, op2) => context.BitwiseOr(op1, op2)); + } + + public static void Rbit_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0); + + ne = context.ConvertI64ToI32(ne); + + Operand de = context.Call(new _U32_U32(SoftFallback.ReverseBits8), ne); + + de = context.ZeroExtend32(OperandType.I64, de); + + res = EmitVectorInsert(context, res, de, index, 0); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Rev16_V(ArmEmitterContext context) + { + if (Optimizations.UseSsse3) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + const long maskE0 = 06L << 56 | 07L << 48 | 04L << 40 | 05L << 32 | 02L << 24 | 03L << 16 | 00L << 8 | 01L << 0; + const long maskE1 = 14L << 56 | 15L << 48 | 12L << 40 | 13L << 32 | 10L << 24 | 11L << 16 | 08L << 8 | 09L << 0; + + Operand mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitRev_V(context, containerSize: 1); + } + } + + public static void Rev32_V(ArmEmitterContext context) + { + if (Optimizations.UseSsse3) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand mask; + + if (op.Size == 0) + { + const long maskE0 = 04L << 56 | 05L << 48 | 06L << 40 | 07L << 32 | 00L << 24 | 01L << 16 | 02L << 8 | 03L << 0; + const long maskE1 = 12L << 56 | 13L << 48 | 14L << 40 | 15L << 32 | 08L << 24 | 09L << 16 | 10L << 8 | 11L << 0; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + else /* if (op.Size == 1) */ + { + const long maskE0 = 05L << 56 | 04L << 48 | 07L << 40 | 06L << 32 | 01L << 24 | 00L << 16 | 03L << 8 | 02L << 0; + const long maskE1 = 13L << 56 | 12L << 48 | 15L << 40 | 14L << 32 | 09L << 24 | 08L << 16 | 11L << 8 | 10L << 0; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + + Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitRev_V(context, containerSize: 2); + } + } + + public static void Rev64_V(ArmEmitterContext context) + { + if (Optimizations.UseSsse3) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand mask; + + if (op.Size == 0) + { + const long maskE0 = 00L << 56 | 01L << 48 | 02L << 40 | 03L << 32 | 04L << 24 | 05L << 16 | 06L << 8 | 07L << 0; + const long maskE1 = 08L << 56 | 09L << 48 | 10L << 40 | 11L << 32 | 12L << 24 | 13L << 16 | 14L << 8 | 15L << 0; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + else if (op.Size == 1) + { + const long maskE0 = 01L << 56 | 00L << 48 | 03L << 40 | 02L << 32 | 05L << 24 | 04L << 16 | 07L << 8 | 06L << 0; + const long maskE1 = 09L << 56 | 08L << 48 | 11L << 40 | 10L << 32 | 13L << 24 | 12L << 16 | 15L << 8 | 14L << 0; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + else /* if (op.Size == 2) */ + { + const long maskE0 = 03L << 56 | 02L << 48 | 01L << 40 | 00L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0; + const long maskE1 = 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 15L << 24 | 14L << 16 | 13L << 8 | 12L << 0; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + + Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitRev_V(context, containerSize: 3); + } + } + + private static void EmitRev_V(ArmEmitterContext context, int containerSize) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + int containerMask = (1 << (containerSize - op.Size)) - 1; + + for (int index = 0; index < elems; index++) + { + int revIndex = index ^ containerMask; + + Operand ne = EmitVectorExtractZx(context, op.Rn, revIndex, op.Size); + + res = EmitVectorInsert(context, res, ne, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdMemory.cs b/ARMeilleure/Instructions/InstEmitSimdMemory.cs new file mode 100644 index 00000000..22e9ef7a --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdMemory.cs @@ -0,0 +1,160 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Ld__Vms(ArmEmitterContext context) + { + EmitSimdMemMs(context, isLoad: true); + } + + public static void Ld__Vss(ArmEmitterContext context) + { + EmitSimdMemSs(context, isLoad: true); + } + + public static void St__Vms(ArmEmitterContext context) + { + EmitSimdMemMs(context, isLoad: false); + } + + public static void St__Vss(ArmEmitterContext context) + { + EmitSimdMemSs(context, isLoad: false); + } + + private static void EmitSimdMemMs(ArmEmitterContext context, bool isLoad) + { + OpCodeSimdMemMs op = (OpCodeSimdMemMs)context.CurrOp; + + Operand n = GetIntOrSP(context, op.Rn); + + long offset = 0; + + for (int rep = 0; rep < op.Reps; rep++) + for (int elem = 0; elem < op.Elems; elem++) + for (int sElem = 0; sElem < op.SElems; sElem++) + { + int rtt = (op.Rt + rep + sElem) & 0x1f; + + Operand tt = GetVec(rtt); + + Operand address = context.Add(n, Const(offset)); + + if (isLoad) + { + EmitLoadSimd(context, address, tt, rtt, elem, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64 && elem == op.Elems - 1) + { + context.Copy(tt, context.VectorZeroUpper64(tt)); + } + } + else + { + EmitStoreSimd(context, address, rtt, elem, op.Size); + } + + offset += 1 << op.Size; + } + + if (op.WBack) + { + EmitSimdMemWBack(context, offset); + } + } + + private static void EmitSimdMemSs(ArmEmitterContext context, bool isLoad) + { + OpCodeSimdMemSs op = (OpCodeSimdMemSs)context.CurrOp; + + Operand n = GetIntOrSP(context, op.Rn); + + long offset = 0; + + if (op.Replicate) + { + // Only loads uses the replicate mode. + Debug.Assert(isLoad, "Replicate mode is not valid for stores."); + + int elems = op.GetBytesCount() >> op.Size; + + for (int sElem = 0; sElem < op.SElems; sElem++) + { + int rt = (op.Rt + sElem) & 0x1f; + + Operand t = GetVec(rt); + + Operand address = context.Add(n, Const(offset)); + + for (int index = 0; index < elems; index++) + { + EmitLoadSimd(context, address, t, rt, index, op.Size); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + context.Copy(t, context.VectorZeroUpper64(t)); + } + + offset += 1 << op.Size; + } + } + else + { + for (int sElem = 0; sElem < op.SElems; sElem++) + { + int rt = (op.Rt + sElem) & 0x1f; + + Operand t = GetVec(rt); + + Operand address = context.Add(n, Const(offset)); + + if (isLoad) + { + EmitLoadSimd(context, address, t, rt, op.Index, op.Size); + } + else + { + EmitStoreSimd(context, address, rt, op.Index, op.Size); + } + + offset += 1 << op.Size; + } + } + + if (op.WBack) + { + EmitSimdMemWBack(context, offset); + } + } + + private static void EmitSimdMemWBack(ArmEmitterContext context, long offset) + { + OpCodeMemReg op = (OpCodeMemReg)context.CurrOp; + + Operand n = GetIntOrSP(context, op.Rn); + Operand m; + + if (op.Rm != RegisterAlias.Zr) + { + m = GetIntOrZR(context, op.Rm); + } + else + { + m = Const(offset); + } + + context.Copy(n, context.Add(n, m)); + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitSimdMove.cs b/ARMeilleure/Instructions/InstEmitSimdMove.cs new file mode 100644 index 00000000..47359161 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdMove.cs @@ -0,0 +1,794 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { +#region "Masks" + private static readonly long[] _masksE0_TrnUzpXtn = new long[] + { + 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, + 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, + 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 + }; + + private static readonly long[] _masksE1_TrnUzp = new long[] + { + 15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0, + 15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0, + 15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0 + }; + + private static readonly long[] _masksE0_Uzp = new long[] + { + 13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0, + 11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0 + }; + + private static readonly long[] _masksE1_Uzp = new long[] + { + 15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0, + 15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0 + }; +#endregion + + public static void Dup_Gp(ArmEmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + if (Optimizations.UseSse2) + { + switch (op.Size) + { + case 0: n = context.ZeroExtend8 (n.Type, n); n = context.Multiply(n, Const(n.Type, 0x01010101)); break; + case 1: n = context.ZeroExtend16(n.Type, n); n = context.Multiply(n, Const(n.Type, 0x00010001)); break; + case 2: n = context.ZeroExtend32(n.Type, n); break; + } + + Operand res = context.VectorInsert(context.VectorZero(), n, 0); + + if (op.Size < 3) + { + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0xf0)); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0)); + } + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + res = EmitVectorInsert(context, res, n, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Dup_S(ArmEmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), ne, 0, op.Size)); + } + + public static void Dup_V(ArmEmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + if (Optimizations.UseSse2) + { + Operand res = GetVec(op.Rn); + + if (op.Size == 0) + { + if (op.DstIndex != 0) + { + res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex)); + } + + res = context.AddIntrinsic(Intrinsic.X86Punpcklbw, res, res); + res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res); + res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0)); + } + else if (op.Size == 1) + { + if (op.DstIndex != 0) + { + res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex * 2)); + } + + res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res); + res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0)); + } + else if (op.Size == 2) + { + int mask = op.DstIndex * 0b01010101; + + res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(mask)); + } + else if (op.DstIndex == 0 && op.RegisterSize != RegisterSize.Simd64) + { + res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res); + } + else if (op.DstIndex == 1) + { + res = context.AddIntrinsic(Intrinsic.X86Movhlps, res, res); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + res = EmitVectorInsert(context, res, ne, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Ext_V(ArmEmitterContext context) + { + OpCodeSimdExt op = (OpCodeSimdExt)context.CurrOp; + + if (Optimizations.UseSse2) + { + Operand nShifted = GetVec(op.Rn); + + if (op.RegisterSize == RegisterSize.Simd64) + { + nShifted = context.AddIntrinsic(Intrinsic.X86Movlhps, nShifted, context.VectorZero()); + } + + nShifted = context.AddIntrinsic(Intrinsic.X86Psrldq, nShifted, Const(op.Imm4)); + + Operand mShifted = GetVec(op.Rm); + + mShifted = context.AddIntrinsic(Intrinsic.X86Pslldq, mShifted, Const(op.GetBytesCount() - op.Imm4)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + mShifted = context.AddIntrinsic(Intrinsic.X86Movlhps, mShifted, context.VectorZero()); + } + + Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, mShifted); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.VectorZero(); + + int bytes = op.GetBytesCount(); + + int position = op.Imm4 & (bytes - 1); + + for (int index = 0; index < bytes; index++) + { + int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm; + + Operand e = EmitVectorExtractZx(context, reg, position, 0); + + position = (position + 1) & (bytes - 1); + + res = EmitVectorInsert(context, res, e, index, 0); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Fcsel_S(ArmEmitterContext context) + { + OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp; + + Operand lblTrue = Label(); + Operand lblEnd = Label(); + + Operand isTrue = InstEmitFlowHelper.GetCondTrue(context, op.Cond); + + context.BranchIfTrue(lblTrue, isTrue); + + OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64; + + Operand me = context.VectorExtract(type, GetVec(op.Rm), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), me, 0)); + + context.Branch(lblEnd); + + context.MarkLabel(lblTrue); + + Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0)); + + context.MarkLabel(lblEnd); + } + + public static void Fmov_Ftoi(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand ne = EmitVectorExtractZx(context, op.Rn, 0, op.Size + 2); + + SetIntOrZR(context, op.Rd, ne); + } + + public static void Fmov_Ftoi1(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand ne = EmitVectorExtractZx(context, op.Rn, 1, 3); + + SetIntOrZR(context, op.Rd, ne); + } + + public static void Fmov_Itof(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), n, 0, op.Size + 2)); + } + + public static void Fmov_Itof1(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, GetVec(op.Rd), n, 1, 3)); + } + + public static void Fmov_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64; + + Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0)); + } + + public static void Fmov_Si(ArmEmitterContext context) + { + OpCodeSimdFmov op = (OpCodeSimdFmov)context.CurrOp; + + if (op.Size == 0) + { + context.Copy(GetVec(op.Rd), X86GetScalar(context, (int)op.Immediate)); + } + else + { + context.Copy(GetVec(op.Rd), X86GetScalar(context, op.Immediate)); + } + } + + public static void Fmov_Vi(ArmEmitterContext context) + { + OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; + + Operand e = Const(op.Immediate); + + Operand res = context.VectorZero(); + + int elems = op.RegisterSize == RegisterSize.Simd128 ? 4 : 2; + + for (int index = 0; index < (elems >> op.Size); index++) + { + res = EmitVectorInsert(context, res, e, index, op.Size + 2); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Ins_Gp(ArmEmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetIntOrZR(context, op.Rn); + + context.Copy(d, EmitVectorInsert(context, d, n, op.DstIndex, op.Size)); + } + + public static void Ins_V(ArmEmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand ne = EmitVectorExtractZx(context, op.Rn, op.SrcIndex, op.Size); + + context.Copy(d, EmitVectorInsert(context, d, ne, op.DstIndex, op.Size)); + } + + public static void Movi_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + EmitMoviMvni(context, not: false); + } + else + { + EmitVectorImmUnaryOp(context, (op1) => op1); + } + } + + public static void Mvni_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + EmitMoviMvni(context, not: true); + } + else + { + EmitVectorImmUnaryOp(context, (op1) => context.BitwiseNot(op1)); + } + } + + public static void Smov_S(ArmEmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand ne = EmitVectorExtractSx(context, op.Rn, op.DstIndex, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + ne = context.ZeroExtend32(OperandType.I64, ne); + } + + SetIntOrZR(context, op.Rd, ne); + } + + public static void Tbl_V(ArmEmitterContext context) + { + OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp; + + if (Optimizations.UseSsse3) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand mask = X86GetAllElements(context, 0x0F0F0F0F0F0F0F0FL); + + Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, mask); + + mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, m); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mMask); + + for (int index = 1; index < op.Size; index++) + { + Operand ni = GetVec((op.Rn + index) & 0x1f); + + Operand indexMask = X86GetAllElements(context, 0x1010101010101010L * index); + + Operand mMinusMask = context.AddIntrinsic(Intrinsic.X86Psubb, m, indexMask); + + Operand mMask2 = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, mMinusMask, mask); + + mMask2 = context.AddIntrinsic(Intrinsic.X86Por, mMask2, mMinusMask); + + Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, ni, mMask2); + + res = context.AddIntrinsic(Intrinsic.X86Por, res, res2); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand[] args = new Operand[1 + op.Size]; + + args[0] = GetVec(op.Rm); + + for (int index = 0; index < op.Size; index++) + { + args[1 + index] = GetVec((op.Rn + index) & 0x1f); + } + + Delegate dlg = null; + + switch (op.Size) + { + case 1: dlg = op.RegisterSize == RegisterSize.Simd64 + ? (Delegate)new _V128_V128_V128(SoftFallback.Tbl1_V64) + : (Delegate)new _V128_V128_V128(SoftFallback.Tbl1_V128); break; + + case 2: dlg = op.RegisterSize == RegisterSize.Simd64 + ? (Delegate)new _V128_V128_V128_V128(SoftFallback.Tbl2_V64) + : (Delegate)new _V128_V128_V128_V128(SoftFallback.Tbl2_V128); break; + + case 3: dlg = op.RegisterSize == RegisterSize.Simd64 + ? (Delegate)new _V128_V128_V128_V128_V128(SoftFallback.Tbl3_V64) + : (Delegate)new _V128_V128_V128_V128_V128(SoftFallback.Tbl3_V128); break; + + case 4: dlg = op.RegisterSize == RegisterSize.Simd64 + ? (Delegate)new _V128_V128_V128_V128_V128_V128(SoftFallback.Tbl4_V64) + : (Delegate)new _V128_V128_V128_V128_V128_V128(SoftFallback.Tbl4_V128); break; + } + + context.Copy(GetVec(op.Rd), context.Call(dlg, args)); + } + } + + public static void Trn1_V(ArmEmitterContext context) + { + EmitVectorTranspose(context, part: 0); + } + + public static void Trn2_V(ArmEmitterContext context) + { + EmitVectorTranspose(context, part: 1); + } + + public static void Umov_S(ArmEmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); + + SetIntOrZR(context, op.Rd, ne); + } + + public static void Uzp1_V(ArmEmitterContext context) + { + EmitVectorUnzip(context, part: 0); + } + + public static void Uzp2_V(ArmEmitterContext context) + { + EmitVectorUnzip(context, part: 1); + } + + public static void Xtn_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + if (Optimizations.UseSsse3) + { + Operand d = GetVec(op.Rd); + + Operand res = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero()); + + Operand n = GetVec(op.Rn); + + Operand mask = X86GetAllElements(context, _masksE0_TrnUzpXtn[op.Size]); + + Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); + + Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 + ? Intrinsic.X86Movlhps + : Intrinsic.X86Movhlps; + + res = context.AddIntrinsic(movInst, res, res2); + + context.Copy(GetVec(op.Rd), res); + } + else + { + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd)); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); + + res = EmitVectorInsert(context, res, ne, part + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Zip1_V(ArmEmitterContext context) + { + EmitVectorZip(context, part: 0); + } + + public static void Zip2_V(ArmEmitterContext context) + { + EmitVectorZip(context, part: 1); + } + + private static void EmitMoviMvni(ArmEmitterContext context, bool not) + { + OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; + + long imm = op.Immediate; + + switch (op.Size) + { + case 0: imm *= 0x01010101; break; + case 1: imm *= 0x00010001; break; + } + + if (not) + { + imm = ~imm; + } + + Operand mask; + + if (op.Size < 3) + { + mask = X86GetAllElements(context, (int)imm); + } + else + { + mask = X86GetAllElements(context, imm); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + mask = context.VectorZeroUpper64(mask); + } + + context.Copy(GetVec(op.Rd), mask); + } + + private static void EmitVectorTranspose(ArmEmitterContext context, int part) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSsse3) + { + Operand mask = null; + + if (op.Size < 3) + { + long maskE0 = _masksE0_TrnUzpXtn[op.Size]; + long maskE1 = _masksE1_TrnUzp [op.Size]; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + + Operand n = GetVec(op.Rn); + + if (op.Size < 3) + { + n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); + } + + Operand m = GetVec(op.Rm); + + if (op.Size < 3) + { + m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask); + } + + Intrinsic punpckInst = part == 0 + ? X86PunpcklInstruction[op.Size] + : X86PunpckhInstruction[op.Size]; + + Operand res = context.AddIntrinsic(punpckInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.VectorZero(); + + int pairs = op.GetPairsCount() >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand ne = EmitVectorExtractZx(context, op.Rn, pairIndex + part, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, pairIndex + part, op.Size); + + res = EmitVectorInsert(context, res, ne, pairIndex, op.Size); + res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + private static void EmitVectorUnzip(ArmEmitterContext context, int part) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSsse3) + { + if (op.RegisterSize == RegisterSize.Simd128) + { + Operand mask = null; + + if (op.Size < 3) + { + long maskE0 = _masksE0_TrnUzpXtn[op.Size]; + long maskE1 = _masksE1_TrnUzp [op.Size]; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + + Operand n = GetVec(op.Rn); + + if (op.Size < 3) + { + n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); + } + + Operand m = GetVec(op.Rm); + + if (op.Size < 3) + { + m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask); + } + + Intrinsic punpckInst = part == 0 + ? Intrinsic.X86Punpcklqdq + : Intrinsic.X86Punpckhqdq; + + Operand res = context.AddIntrinsic(punpckInst, n, m); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic punpcklInst = X86PunpcklInstruction[op.Size]; + + Operand res = context.AddIntrinsic(punpcklInst, n, m); + + if (op.Size < 2) + { + long maskE0 = _masksE0_Uzp[op.Size]; + long maskE1 = _masksE1_Uzp[op.Size]; + + Operand mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + + res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask); + } + + Intrinsic punpckInst = part == 0 + ? Intrinsic.X86Punpcklqdq + : Intrinsic.X86Punpckhqdq; + + res = context.AddIntrinsic(punpckInst, res, context.VectorZero()); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + Operand res = context.VectorZero(); + + int pairs = op.GetPairsCount() >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int idx = index << 1; + + Operand ne = EmitVectorExtractZx(context, op.Rn, idx + part, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, idx + part, op.Size); + + res = EmitVectorInsert(context, res, ne, index, op.Size); + res = EmitVectorInsert(context, res, me, pairs + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + private static void EmitVectorZip(ArmEmitterContext context, int part) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + Intrinsic punpckInst = part == 0 + ? X86PunpcklInstruction[op.Size] + : X86PunpckhInstruction[op.Size]; + + Operand res = context.AddIntrinsic(punpckInst, n, m); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], n, m); + + Intrinsic punpckInst = part == 0 + ? Intrinsic.X86Punpcklqdq + : Intrinsic.X86Punpckhqdq; + + res = context.AddIntrinsic(punpckInst, res, context.VectorZero()); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + Operand res = context.VectorZero(); + + int pairs = op.GetPairsCount() >> op.Size; + + int baseIndex = part != 0 ? pairs : 0; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand ne = EmitVectorExtractZx(context, op.Rn, baseIndex + index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, baseIndex + index, op.Size); + + res = EmitVectorInsert(context, res, ne, pairIndex, op.Size); + res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitSimdShift.cs b/ARMeilleure/Instructions/InstEmitSimdShift.cs new file mode 100644 index 00000000..1aae491d --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdShift.cs @@ -0,0 +1,1057 @@ +// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h + +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + using Func2I = Func<Operand, Operand, Operand>; + + static partial class InstEmit + { +#region "Masks" + private static readonly long[] _masks_RshrnShrn = new long[] + { + 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, + 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, + 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 + }; +#endregion + + public static void Rshrn_V(ArmEmitterContext context) + { + if (Optimizations.UseSsse3) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Operand dLow = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero()); + + Operand mask = null; + + switch (op.Size + 1) + { + case 1: mask = X86GetAllElements(context, (int)roundConst * 0x00010001); break; + case 2: mask = X86GetAllElements(context, (int)roundConst); break; + case 3: mask = X86GetAllElements(context, roundConst); break; + } + + Intrinsic addInst = X86PaddInstruction[op.Size + 1]; + + Operand res = context.AddIntrinsic(addInst, n, mask); + + Intrinsic srlInst = X86PsrlInstruction[op.Size + 1]; + + res = context.AddIntrinsic(srlInst, res, Const(shift)); + + Operand mask2 = X86GetAllElements(context, _masks_RshrnShrn[op.Size]); + + res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask2); + + Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 + ? Intrinsic.X86Movlhps + : Intrinsic.X86Movhlps; + + res = context.AddIntrinsic(movInst, dLow, res); + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShrImmNarrowOpZx(context, round: true); + } + } + + public static void Shl_S(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift))); + } + + public static void Shl_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + if (Optimizations.UseSse2 && op.Size > 0) + { + Operand n = GetVec(op.Rn); + + Intrinsic sllInst = X86PsllInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sllInst, n, Const(shift)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift))); + } + } + + public static void Shll_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int shift = 8 << op.Size; + + if (Optimizations.UseSse41) + { + Operand n = GetVec(op.Rn); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + } + + Intrinsic movsxInst = X86PmovsxInstruction[op.Size]; + + Operand res = context.AddIntrinsic(movsxInst, n); + + Intrinsic sllInst = X86PsllInstruction[op.Size + 1]; + + res = context.AddIntrinsic(sllInst, res, Const(shift)); + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift); + } + } + + public static void Shrn_V(ArmEmitterContext context) + { + if (Optimizations.UseSsse3) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Operand dLow = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero()); + + Intrinsic srlInst = X86PsrlInstruction[op.Size + 1]; + + Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift)); + + Operand mask = X86GetAllElements(context, _masks_RshrnShrn[op.Size]); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, nShifted, mask); + + Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 + ? Intrinsic.X86Movlhps + : Intrinsic.X86Movhlps; + + res = context.AddIntrinsic(movInst, dLow, res); + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShrImmNarrowOpZx(context, round: false); + } + } + + public static void Sli_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + int shift = GetImmShl(op); + + ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + Operand neShifted = context.ShiftLeft(ne, Const(shift)); + + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + + Operand deMasked = context.BitwiseAnd(de, Const(mask)); + + Operand e = context.BitwiseOr(neShifted, deMasked); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sqrshl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlRegSatQ), ne, me, Const(1), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sqrshrn_S(ArmEmitterContext context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + } + + public static void Sqrshrn_V(ArmEmitterContext context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); + } + + public static void Sqrshrun_S(ArmEmitterContext context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + } + + public static void Sqrshrun_V(ArmEmitterContext context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + } + + public static void Sqshl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlRegSatQ), ne, me, Const(0), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sqshrn_S(ArmEmitterContext context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + } + + public static void Sqshrn_V(ArmEmitterContext context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); + } + + public static void Sqshrun_S(ArmEmitterContext context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + } + + public static void Sqshrun_V(ArmEmitterContext context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + } + + public static void Srshl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlReg), ne, me, Const(1), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Srshr_S(ArmEmitterContext context) + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Round); + } + + public static void Srshr_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + { + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + Operand n = GetVec(op.Rn); + + Intrinsic sllInst = X86PsllInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift)); + + Intrinsic srlInst = X86PsrlInstruction[op.Size]; + + res = context.AddIntrinsic(srlInst, res, Const(eSize - 1)); + + Intrinsic sraInst = X86PsraInstruction[op.Size]; + + Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, nSra); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShrImmOpSx(context, ShrImmFlags.Round); + } + } + + public static void Srsra_S(ArmEmitterContext context) + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + + public static void Srsra_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + { + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Intrinsic sllInst = X86PsllInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift)); + + Intrinsic srlInst = X86PsrlInstruction[op.Size]; + + res = context.AddIntrinsic(srlInst, res, Const(eSize - 1)); + + Intrinsic sraInst = X86PsraInstruction[op.Size]; + + Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, nSra); + res = context.AddIntrinsic(addInst, res, d); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + } + + public static void Sshl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlReg), ne, me, Const(0), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sshll_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + if (Optimizations.UseSse41) + { + Operand n = GetVec(op.Rn); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + } + + Intrinsic movsxInst = X86PmovsxInstruction[op.Size]; + + Operand res = context.AddIntrinsic(movsxInst, n); + + if (shift != 0) + { + Intrinsic sllInst = X86PsllInstruction[op.Size + 1]; + + res = context.AddIntrinsic(sllInst, res, Const(shift)); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShImmWidenBinarySx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift); + } + } + + public static void Sshr_S(ArmEmitterContext context) + { + EmitShrImmOp(context, ShrImmFlags.ScalarSx); + } + + public static void Sshr_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + { + int shift = GetImmShr(op); + + Operand n = GetVec(op.Rn); + + Intrinsic sraInst = X86PsraInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sraInst, n, Const(shift)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitShrImmOp(context, ShrImmFlags.VectorSx); + } + } + + public static void Ssra_S(ArmEmitterContext context) + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate); + } + + public static void Ssra_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + { + int shift = GetImmShr(op); + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Intrinsic sraInst = X86PsraInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sraInst, n, Const(shift)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, d); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else + { + EmitVectorShrImmOpSx(context, ShrImmFlags.Accumulate); + } + } + + public static void Uqrshl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlRegSatQ), ne, me, Const(1), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Uqrshrn_S(ArmEmitterContext context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + } + + public static void Uqrshrn_V(ArmEmitterContext context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); + } + + public static void Uqshl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlRegSatQ), ne, me, Const(0), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Uqshrn_S(ArmEmitterContext context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + } + + public static void Uqshrn_V(ArmEmitterContext context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); + } + + public static void Urshl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlReg), ne, me, Const(1), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Urshr_S(ArmEmitterContext context) + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Round); + } + + public static void Urshr_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + Operand n = GetVec(op.Rn); + + Intrinsic sllInst = X86PsllInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift)); + + Intrinsic srlInst = X86PsrlInstruction[op.Size]; + + res = context.AddIntrinsic(srlInst, res, Const(eSize - 1)); + + Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, nSrl); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShrImmOpZx(context, ShrImmFlags.Round); + } + } + + public static void Ursra_S(ArmEmitterContext context) + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + + public static void Ursra_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Intrinsic sllInst = X86PsllInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift)); + + Intrinsic srlInst = X86PsrlInstruction[op.Size]; + + res = context.AddIntrinsic(srlInst, res, Const(eSize - 1)); + + Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, nSrl); + res = context.AddIntrinsic(addInst, res, d); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + } + + public static void Ushl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlReg), ne, me, Const(0), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Ushll_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + if (Optimizations.UseSse41) + { + Operand n = GetVec(op.Rn); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + } + + Intrinsic movzxInst = X86PmovzxInstruction[op.Size]; + + Operand res = context.AddIntrinsic(movzxInst, n); + + if (shift != 0) + { + Intrinsic sllInst = X86PsllInstruction[op.Size + 1]; + + res = context.AddIntrinsic(sllInst, res, Const(shift)); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift); + } + } + + public static void Ushr_S(ArmEmitterContext context) + { + EmitShrImmOp(context, ShrImmFlags.ScalarZx); + } + + public static void Ushr_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + int shift = GetImmShr(op); + + Operand n = GetVec(op.Rn); + + Intrinsic srlInst = X86PsrlInstruction[op.Size]; + + Operand res = context.AddIntrinsic(srlInst, n, Const(shift)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitShrImmOp(context, ShrImmFlags.VectorZx); + } + } + + public static void Usra_S(ArmEmitterContext context) + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate); + } + + public static void Usra_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + int shift = GetImmShr(op); + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Intrinsic srlInst = X86PsrlInstruction[op.Size]; + + Operand res = context.AddIntrinsic(srlInst, n, Const(shift)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, d); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else + { + EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate); + } + } + + [Flags] + private enum ShrImmFlags + { + Scalar = 1 << 0, + Signed = 1 << 1, + + Round = 1 << 2, + Accumulate = 1 << 3, + + ScalarSx = Scalar | Signed, + ScalarZx = Scalar, + + VectorSx = Signed, + VectorZx = 0 + } + + private static void EmitScalarShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.ScalarSx | flags); + } + + private static void EmitScalarShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.ScalarZx | flags); + } + + private static void EmitVectorShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.VectorSx | flags); + } + + private static void EmitVectorShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.VectorZx | flags); + } + + private static void EmitShrImmOp(ArmEmitterContext context, ShrImmFlags flags) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + Operand res = context.VectorZero(); + + bool scalar = (flags & ShrImmFlags.Scalar) != 0; + bool signed = (flags & ShrImmFlags.Signed) != 0; + bool round = (flags & ShrImmFlags.Round) != 0; + bool accumulate = (flags & ShrImmFlags.Accumulate) != 0; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + for (int index = 0; index < elems; index++) + { + Operand e = EmitVectorExtract(context, op.Rn, index, op.Size, signed); + + if (op.Size <= 2) + { + if (round) + { + e = context.Add(e, Const(roundConst)); + } + + e = signed + ? context.ShiftRightSI(e, Const(shift)) + : context.ShiftRightUI(e, Const(shift)); + } + else /* if (op.Size == 3) */ + { + e = EmitShrImm64(context, e, signed, round ? roundConst : 0L, shift); + } + + if (accumulate) + { + Operand de = EmitVectorExtract(context, op.Rd, index, op.Size, signed); + + e = context.Add(e, de); + } + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitVectorShrImmNarrowOpZx(ArmEmitterContext context, bool round) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd)); + + for (int index = 0; index < elems; index++) + { + Operand e = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); + + if (round) + { + e = context.Add(e, Const(roundConst)); + } + + e = context.ShiftRightUI(e, Const(shift)); + + res = EmitVectorInsert(context, res, e, part + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + [Flags] + private enum ShrImmSaturatingNarrowFlags + { + Scalar = 1 << 0, + SignedSrc = 1 << 1, + SignedDst = 1 << 2, + + Round = 1 << 3, + + ScalarSxSx = Scalar | SignedSrc | SignedDst, + ScalarSxZx = Scalar | SignedSrc, + ScalarZxZx = Scalar, + + VectorSxSx = SignedSrc | SignedDst, + VectorSxZx = SignedSrc, + VectorZxZx = 0 + } + + private static void EmitRoundShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags); + } + + private static void EmitShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0; + bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0; + bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0; + bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + int elems = !scalar ? 8 >> op.Size : 1; + + int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0; + + Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd)); + + for (int index = 0; index < elems; index++) + { + Operand e = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc); + + if (op.Size <= 1 || !round) + { + if (round) + { + e = context.Add(e, Const(roundConst)); + } + + e = signedSrc + ? context.ShiftRightSI(e, Const(shift)) + : context.ShiftRightUI(e, Const(shift)); + } + else /* if (op.Size == 2 && round) */ + { + e = EmitShrImm64(context, e, signedSrc, roundConst, shift); // shift <= 32 + } + + e = EmitSatQ(context, e, op.Size, signedSrc, signedDst); + + res = EmitVectorInsert(context, res, e, part + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + // dst64 = (Int(src64, signed) + roundConst) >> shift; + private static Operand EmitShrImm64( + ArmEmitterContext context, + Operand value, + bool signed, + long roundConst, + int shift) + { + Delegate dlg = signed + ? (Delegate)new _S64_S64_S64_S32(SoftFallback.SignedShrImm64) + : (Delegate)new _U64_U64_S64_S32(SoftFallback.UnsignedShrImm64); + + return context.Call(dlg, value, Const(roundConst), Const(shift)); + } + + private static void EmitVectorShImmWidenBinarySx(ArmEmitterContext context, Func2I emit, int imm) + { + EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: true); + } + + private static void EmitVectorShImmWidenBinaryZx(ArmEmitterContext context, Func2I emit, int imm) + { + EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: false); + } + + private static void EmitVectorShImmWidenBinaryOp(ArmEmitterContext context, Func2I emit, int imm, bool signed) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, Const(imm)), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSystem.cs b/ARMeilleure/Instructions/InstEmitSystem.cs new file mode 100644 index 00000000..eeb53c1f --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSystem.cs @@ -0,0 +1,114 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + private const int DczSizeLog2 = 4; + + public static void Hint(ArmEmitterContext context) + { + // Execute as no-op. + } + + public static void Isb(ArmEmitterContext context) + { + // Execute as no-op. + } + + public static void Mrs(ArmEmitterContext context) + { + OpCodeSystem op = (OpCodeSystem)context.CurrOp; + + Delegate dlg; + + switch (GetPackedId(op)) + { + case 0b11_011_0000_0000_001: dlg = new _U64(NativeInterface.GetCtrEl0); break; + case 0b11_011_0000_0000_111: dlg = new _U64(NativeInterface.GetDczidEl0); break; + case 0b11_011_0100_0100_000: dlg = new _U64(NativeInterface.GetFpcr); break; + case 0b11_011_0100_0100_001: dlg = new _U64(NativeInterface.GetFpsr); break; + case 0b11_011_1101_0000_010: dlg = new _U64(NativeInterface.GetTpidrEl0); break; + case 0b11_011_1101_0000_011: dlg = new _U64(NativeInterface.GetTpidr); break; + case 0b11_011_1110_0000_000: dlg = new _U64(NativeInterface.GetCntfrqEl0); break; + case 0b11_011_1110_0000_001: dlg = new _U64(NativeInterface.GetCntpctEl0); break; + + default: throw new NotImplementedException($"Unknown MRS 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + + SetIntOrZR(context, op.Rt, context.Call(dlg)); + } + + public static void Msr(ArmEmitterContext context) + { + OpCodeSystem op = (OpCodeSystem)context.CurrOp; + + Delegate dlg; + + switch (GetPackedId(op)) + { + case 0b11_011_0100_0100_000: dlg = new _Void_U64(NativeInterface.SetFpcr); break; + case 0b11_011_0100_0100_001: dlg = new _Void_U64(NativeInterface.SetFpsr); break; + case 0b11_011_1101_0000_010: dlg = new _Void_U64(NativeInterface.SetTpidrEl0); break; + + default: throw new NotImplementedException($"Unknown MSR 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + + context.Call(dlg, GetIntOrZR(context, op.Rt)); + } + + public static void Nop(ArmEmitterContext context) + { + // Do nothing. + } + + public static void Sys(ArmEmitterContext context) + { + // This instruction is used to do some operations on the CPU like cache invalidation, + // address translation and the like. + // We treat it as no-op here since we don't have any cache being emulated anyway. + OpCodeSystem op = (OpCodeSystem)context.CurrOp; + + switch (GetPackedId(op)) + { + case 0b11_011_0111_0100_001: + { + // DC ZVA + Operand t = GetIntOrZR(context, op.Rt); + + for (long offset = 0; offset < (4 << DczSizeLog2); offset += 8) + { + Operand address = context.Add(t, Const(offset)); + + context.Call(new _Void_U64_U64(NativeInterface.WriteUInt64), address, Const(0L)); + } + + break; + } + + // No-op + case 0b11_011_0111_1110_001: //DC CIVAC + break; + } + } + + private static int GetPackedId(OpCodeSystem op) + { + int id; + + id = op.Op2 << 0; + id |= op.CRm << 3; + id |= op.CRn << 7; + id |= op.Op1 << 11; + id |= op.Op0 << 14; + + return id; + } + } +} diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs new file mode 100644 index 00000000..e70ca34b --- /dev/null +++ b/ARMeilleure/Instructions/InstName.cs @@ -0,0 +1,459 @@ +namespace ARMeilleure.Instructions +{ + enum InstName + { + // Base (AArch64) + Adc, + Adcs, + Add, + Adds, + Adr, + Adrp, + And, + Ands, + Asrv, + B, + B_Cond, + Bfm, + Bic, + Bics, + Bl, + Blr, + Br, + Brk, + Cbnz, + Cbz, + Ccmn, + Ccmp, + Clrex, + Cls, + Clz, + Crc32b, + Crc32h, + Crc32w, + Crc32x, + Crc32cb, + Crc32ch, + Crc32cw, + Crc32cx, + Csel, + Csinc, + Csinv, + Csneg, + Dmb, + Dsb, + Eon, + Eor, + Extr, + Hint, + Isb, + Ldar, + Ldaxp, + Ldaxr, + Ldp, + Ldr, + Ldr_Literal, + Ldrs, + Ldxr, + Ldxp, + Lslv, + Lsrv, + Madd, + Movk, + Movn, + Movz, + Mrs, + Msr, + Msub, + Nop, + Orn, + Orr, + Pfrm, + Rbit, + Ret, + Rev16, + Rev32, + Rev64, + Rorv, + Sbc, + Sbcs, + Sbfm, + Sdiv, + Smaddl, + Smsubl, + Smulh, + Stlr, + Stlxp, + Stlxr, + Stp, + Str, + Stxp, + Stxr, + Sub, + Subs, + Svc, + Sys, + Tbnz, + Tbz, + Ubfm, + Udiv, + Umaddl, + Umsubl, + Umulh, + Und, + + // FP & SIMD (AArch64) + Abs_S, + Abs_V, + Add_S, + Add_V, + Addhn_V, + Addp_S, + Addp_V, + Addv_V, + Aesd_V, + Aese_V, + Aesimc_V, + Aesmc_V, + And_V, + Bic_V, + Bic_Vi, + Bif_V, + Bit_V, + Bsl_V, + Cls_V, + Clz_V, + Cmeq_S, + Cmeq_V, + Cmge_S, + Cmge_V, + Cmgt_S, + Cmgt_V, + Cmhi_S, + Cmhi_V, + Cmhs_S, + Cmhs_V, + Cmle_S, + Cmle_V, + Cmlt_S, + Cmlt_V, + Cmtst_S, + Cmtst_V, + Cnt_V, + Dup_Gp, + Dup_S, + Dup_V, + Eor_V, + Ext_V, + Fabd_S, + Fabd_V, + Fabs_S, + Fabs_V, + Fadd_S, + Fadd_V, + Faddp_S, + Faddp_V, + Fccmp_S, + Fccmpe_S, + Fcmeq_S, + Fcmeq_V, + Fcmge_S, + Fcmge_V, + Fcmgt_S, + Fcmgt_V, + Fcmle_S, + Fcmle_V, + Fcmlt_S, + Fcmlt_V, + Fcmp_S, + Fcmpe_S, + Fcsel_S, + Fcvt_S, + Fcvtas_Gp, + Fcvtau_Gp, + Fcvtl_V, + Fcvtms_Gp, + Fcvtmu_Gp, + Fcvtn_V, + Fcvtns_S, + Fcvtns_V, + Fcvtnu_S, + Fcvtnu_V, + Fcvtps_Gp, + Fcvtpu_Gp, + Fcvtzs_Gp, + Fcvtzs_Gp_Fixed, + Fcvtzs_S, + Fcvtzs_V, + Fcvtzs_V_Fixed, + Fcvtzu_Gp, + Fcvtzu_Gp_Fixed, + Fcvtzu_S, + Fcvtzu_V, + Fcvtzu_V_Fixed, + Fdiv_S, + Fdiv_V, + Fmadd_S, + Fmax_S, + Fmax_V, + Fmaxnm_S, + Fmaxnm_V, + Fmaxp_V, + Fmin_S, + Fmin_V, + Fminnm_S, + Fminnm_V, + Fminp_V, + Fmla_Se, + Fmla_V, + Fmla_Ve, + Fmls_Se, + Fmls_V, + Fmls_Ve, + Fmov_S, + Fmov_Si, + Fmov_Vi, + Fmov_Ftoi, + Fmov_Itof, + Fmov_Ftoi1, + Fmov_Itof1, + Fmsub_S, + Fmul_S, + Fmul_Se, + Fmul_V, + Fmul_Ve, + Fmulx_S, + Fmulx_Se, + Fmulx_V, + Fmulx_Ve, + Fneg_S, + Fneg_V, + Fnmadd_S, + Fnmsub_S, + Fnmul_S, + Frecpe_S, + Frecpe_V, + Frecps_S, + Frecps_V, + Frecpx_S, + Frinta_S, + Frinta_V, + Frinti_S, + Frinti_V, + Frintm_S, + Frintm_V, + Frintn_S, + Frintn_V, + Frintp_S, + Frintp_V, + Frintx_S, + Frintx_V, + Frintz_S, + Frintz_V, + Frsqrte_S, + Frsqrte_V, + Frsqrts_S, + Frsqrts_V, + Fsqrt_S, + Fsqrt_V, + Fsub_S, + Fsub_V, + Ins_Gp, + Ins_V, + Ld__Vms, + Ld__Vss, + Mla_V, + Mla_Ve, + Mls_V, + Mls_Ve, + Movi_V, + Mul_V, + Mul_Ve, + Mvni_V, + Neg_S, + Neg_V, + Not_V, + Orn_V, + Orr_V, + Orr_Vi, + Raddhn_V, + Rbit_V, + Rev16_V, + Rev32_V, + Rev64_V, + Rshrn_V, + Rsubhn_V, + Saba_V, + Sabal_V, + Sabd_V, + Sabdl_V, + Sadalp_V, + Saddl_V, + Saddlp_V, + Saddlv_V, + Saddw_V, + Scvtf_Gp, + Scvtf_Gp_Fixed, + Scvtf_S, + Scvtf_V, + Scvtf_V_Fixed, + Sha1c_V, + Sha1h_V, + Sha1m_V, + Sha1p_V, + Sha1su0_V, + Sha1su1_V, + Sha256h_V, + Sha256h2_V, + Sha256su0_V, + Sha256su1_V, + Shadd_V, + Shl_S, + Shl_V, + Shll_V, + Shrn_V, + Shsub_V, + Sli_V, + Smax_V, + Smaxp_V, + Smaxv_V, + Smin_V, + Sminp_V, + Sminv_V, + Smlal_V, + Smlal_Ve, + Smlsl_V, + Smlsl_Ve, + Smov_S, + Smull_V, + Smull_Ve, + Sqabs_S, + Sqabs_V, + Sqadd_S, + Sqadd_V, + Sqdmulh_S, + Sqdmulh_V, + Sqneg_S, + Sqneg_V, + Sqrdmulh_S, + Sqrdmulh_V, + Sqrshl_V, + Sqrshrn_S, + Sqrshrn_V, + Sqrshrun_S, + Sqrshrun_V, + Sqshl_V, + Sqshrn_S, + Sqshrn_V, + Sqshrun_S, + Sqshrun_V, + Sqsub_S, + Sqsub_V, + Sqxtn_S, + Sqxtn_V, + Sqxtun_S, + Sqxtun_V, + Srhadd_V, + Srshl_V, + Srshr_S, + Srshr_V, + Srsra_S, + Srsra_V, + Sshl_V, + Sshll_V, + Sshr_S, + Sshr_V, + Ssra_S, + Ssra_V, + Ssubl_V, + Ssubw_V, + St__Vms, + St__Vss, + Sub_S, + Sub_V, + Subhn_V, + Suqadd_S, + Suqadd_V, + Tbl_V, + Trn1_V, + Trn2_V, + Uaba_V, + Uabal_V, + Uabd_V, + Uabdl_V, + Uadalp_V, + Uaddl_V, + Uaddlp_V, + Uaddlv_V, + Uaddw_V, + Ucvtf_Gp, + Ucvtf_Gp_Fixed, + Ucvtf_S, + Ucvtf_V, + Ucvtf_V_Fixed, + Uhadd_V, + Uhsub_V, + Umax_V, + Umaxp_V, + Umaxv_V, + Umin_V, + Uminp_V, + Uminv_V, + Umlal_V, + Umlal_Ve, + Umlsl_V, + Umlsl_Ve, + Umov_S, + Umull_V, + Umull_Ve, + Uqadd_S, + Uqadd_V, + Uqrshl_V, + Uqrshrn_S, + Uqrshrn_V, + Uqshl_V, + Uqshrn_S, + Uqshrn_V, + Uqsub_S, + Uqsub_V, + Uqxtn_S, + Uqxtn_V, + Urhadd_V, + Urshl_V, + Urshr_S, + Urshr_V, + Ursra_S, + Ursra_V, + Ushl_V, + Ushll_V, + Ushr_S, + Ushr_V, + Usqadd_S, + Usqadd_V, + Usra_S, + Usra_V, + Usubl_V, + Usubw_V, + Uzp1_V, + Uzp2_V, + Xtn_V, + Zip1_V, + Zip2_V, + + // Base (AArch32) + Blx, + Bx, + Cmp, + Ldm, + Ldrb, + Ldrd, + Ldrh, + Ldrsb, + Ldrsh, + Mov, + Stm, + Strb, + Strd, + Strh + } +}
\ No newline at end of file diff --git a/ARMeilleure/Instructions/NativeInterface.cs b/ARMeilleure/Instructions/NativeInterface.cs new file mode 100644 index 00000000..3a1e91c8 --- /dev/null +++ b/ARMeilleure/Instructions/NativeInterface.cs @@ -0,0 +1,367 @@ +using ARMeilleure.Memory; +using ARMeilleure.State; +using System; + +namespace ARMeilleure.Instructions +{ + static class NativeInterface + { + private const int ErgSizeLog2 = 4; + + private class ThreadContext + { + public ExecutionContext Context { get; } + public MemoryManager Memory { get; } + + public ulong ExclusiveAddress { get; set; } + public ulong ExclusiveValueLow { get; set; } + public ulong ExclusiveValueHigh { get; set; } + + public ThreadContext(ExecutionContext context, MemoryManager memory) + { + Context = context; + Memory = memory; + + ExclusiveAddress = ulong.MaxValue; + } + } + + [ThreadStatic] + private static ThreadContext _context; + + public static void RegisterThread(ExecutionContext context, MemoryManager memory) + { + _context = new ThreadContext(context, memory); + } + + public static void UnregisterThread() + { + _context = null; + } + + public static void Break(ulong address, int imm) + { + Statistics.PauseTimer(); + + GetContext().OnBreak(address, imm); + + Statistics.ResumeTimer(); + } + + public static void SupervisorCall(ulong address, int imm) + { + Statistics.PauseTimer(); + + GetContext().OnSupervisorCall(address, imm); + + Statistics.ResumeTimer(); + } + + public static void Undefined(ulong address, int opCode) + { + Statistics.PauseTimer(); + + GetContext().OnUndefined(address, opCode); + + Statistics.ResumeTimer(); + } + +#region "System registers" + public static ulong GetCtrEl0() + { + return (ulong)GetContext().CtrEl0; + } + + public static ulong GetDczidEl0() + { + return (ulong)GetContext().DczidEl0; + } + + public static ulong GetFpcr() + { + return (ulong)GetContext().Fpcr; + } + + public static ulong GetFpsr() + { + return (ulong)GetContext().Fpsr; + } + + public static ulong GetTpidrEl0() + { + return (ulong)GetContext().TpidrEl0; + } + + public static ulong GetTpidr() + { + return (ulong)GetContext().Tpidr; + } + + public static ulong GetCntfrqEl0() + { + return GetContext().CntfrqEl0; + } + + public static ulong GetCntpctEl0() + { + return GetContext().CntpctEl0; + } + + public static void SetFpcr(ulong value) + { + GetContext().Fpcr = (FPCR)value; + } + + public static void SetFpsr(ulong value) + { + GetContext().Fpsr = (FPSR)value; + } + + public static void SetTpidrEl0(ulong value) + { + GetContext().TpidrEl0 = (long)value; + } +#endregion + +#region "Read" + public static byte ReadByte(ulong address) + { + return GetMemoryManager().ReadByte((long)address); + } + + public static ushort ReadUInt16(ulong address) + { + return GetMemoryManager().ReadUInt16((long)address); + } + + public static uint ReadUInt32(ulong address) + { + return GetMemoryManager().ReadUInt32((long)address); + } + + public static ulong ReadUInt64(ulong address) + { + return GetMemoryManager().ReadUInt64((long)address); + } + + public static V128 ReadVector128(ulong address) + { + return GetMemoryManager().ReadVector128((long)address); + } +#endregion + +#region "Read exclusive" + public static byte ReadByteExclusive(ulong address) + { + byte value = _context.Memory.ReadByte((long)address); + + _context.ExclusiveAddress = GetMaskedExclusiveAddress(address); + _context.ExclusiveValueLow = value; + _context.ExclusiveValueHigh = 0; + + return value; + } + + public static ushort ReadUInt16Exclusive(ulong address) + { + ushort value = _context.Memory.ReadUInt16((long)address); + + _context.ExclusiveAddress = GetMaskedExclusiveAddress(address); + _context.ExclusiveValueLow = value; + _context.ExclusiveValueHigh = 0; + + return value; + } + + public static uint ReadUInt32Exclusive(ulong address) + { + uint value = _context.Memory.ReadUInt32((long)address); + + _context.ExclusiveAddress = GetMaskedExclusiveAddress(address); + _context.ExclusiveValueLow = value; + _context.ExclusiveValueHigh = 0; + + return value; + } + + public static ulong ReadUInt64Exclusive(ulong address) + { + ulong value = _context.Memory.ReadUInt64((long)address); + + _context.ExclusiveAddress = GetMaskedExclusiveAddress(address); + _context.ExclusiveValueLow = value; + _context.ExclusiveValueHigh = 0; + + return value; + } + + public static V128 ReadVector128Exclusive(ulong address) + { + V128 value = _context.Memory.AtomicLoadInt128((long)address); + + _context.ExclusiveAddress = GetMaskedExclusiveAddress(address); + _context.ExclusiveValueLow = value.GetUInt64(0); + _context.ExclusiveValueHigh = value.GetUInt64(1); + + return value; + } +#endregion + +#region "Write" + public static void WriteByte(ulong address, byte value) + { + GetMemoryManager().WriteByte((long)address, value); + } + + public static void WriteUInt16(ulong address, ushort value) + { + GetMemoryManager().WriteUInt16((long)address, value); + } + + public static void WriteUInt32(ulong address, uint value) + { + GetMemoryManager().WriteUInt32((long)address, value); + } + + public static void WriteUInt64(ulong address, ulong value) + { + GetMemoryManager().WriteUInt64((long)address, value); + } + + public static void WriteVector128(ulong address, V128 value) + { + GetMemoryManager().WriteVector128((long)address, value); + } +#endregion + +#region "Write exclusive" + public static int WriteByteExclusive(ulong address, byte value) + { + bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address); + + if (success) + { + success = _context.Memory.AtomicCompareExchangeByte( + (long)address, + (byte)_context.ExclusiveValueLow, + (byte)value); + + if (success) + { + ClearExclusive(); + } + } + + return success ? 0 : 1; + } + + public static int WriteUInt16Exclusive(ulong address, ushort value) + { + bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address); + + if (success) + { + success = _context.Memory.AtomicCompareExchangeInt16( + (long)address, + (short)_context.ExclusiveValueLow, + (short)value); + + if (success) + { + ClearExclusive(); + } + } + + return success ? 0 : 1; + } + + public static int WriteUInt32Exclusive(ulong address, uint value) + { + bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address); + + if (success) + { + success = _context.Memory.AtomicCompareExchangeInt32( + (long)address, + (int)_context.ExclusiveValueLow, + (int)value); + + if (success) + { + ClearExclusive(); + } + } + + return success ? 0 : 1; + } + + public static int WriteUInt64Exclusive(ulong address, ulong value) + { + bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address); + + if (success) + { + success = _context.Memory.AtomicCompareExchangeInt64( + (long)address, + (long)_context.ExclusiveValueLow, + (long)value); + + if (success) + { + ClearExclusive(); + } + } + + return success ? 0 : 1; + } + + public static int WriteVector128Exclusive(ulong address, V128 value) + { + bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address); + + if (success) + { + V128 expected = new V128(_context.ExclusiveValueLow, _context.ExclusiveValueHigh); + + success = _context.Memory.AtomicCompareExchangeInt128((long)address, expected, value); + + if (success) + { + ClearExclusive(); + } + } + + return success ? 0 : 1; + } +#endregion + + private static ulong GetMaskedExclusiveAddress(ulong address) + { + return address & ~((4UL << ErgSizeLog2) - 1); + } + + public static void ClearExclusive() + { + _context.ExclusiveAddress = ulong.MaxValue; + } + + public static void CheckSynchronization() + { + Statistics.PauseTimer(); + + GetContext().CheckInterrupt(); + + Statistics.ResumeTimer(); + } + + public static ExecutionContext GetContext() + { + return _context.Context; + } + + public static MemoryManager GetMemoryManager() + { + return _context.Memory; + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/Instructions/SoftFallback.cs b/ARMeilleure/Instructions/SoftFallback.cs new file mode 100644 index 00000000..dc030921 --- /dev/null +++ b/ARMeilleure/Instructions/SoftFallback.cs @@ -0,0 +1,1307 @@ +using ARMeilleure.State; +using System; + +namespace ARMeilleure.Instructions +{ + static class SoftFallback + { +#region "ShlReg" + public static long SignedShlReg(long value, long shift, bool round, int size) + { + int eSize = 8 << size; + + int shiftLsB = (sbyte)shift; + + if (shiftLsB < 0) + { + return SignedShrReg(value, -shiftLsB, round, eSize); + } + else if (shiftLsB > 0) + { + if (shiftLsB >= eSize) + { + return 0L; + } + + return value << shiftLsB; + } + else /* if (shiftLsB == 0) */ + { + return value; + } + } + + public static ulong UnsignedShlReg(ulong value, ulong shift, bool round, int size) + { + int eSize = 8 << size; + + int shiftLsB = (sbyte)shift; + + if (shiftLsB < 0) + { + return UnsignedShrReg(value, -shiftLsB, round, eSize); + } + else if (shiftLsB > 0) + { + if (shiftLsB >= eSize) + { + return 0UL; + } + + return value << shiftLsB; + } + else /* if (shiftLsB == 0) */ + { + return value; + } + } + + public static long SignedShlRegSatQ(long value, long shift, bool round, int size) + { + ExecutionContext context = NativeInterface.GetContext(); + + int eSize = 8 << size; + + int shiftLsB = (sbyte)shift; + + if (shiftLsB < 0) + { + return SignedShrReg(value, -shiftLsB, round, eSize); + } + else if (shiftLsB > 0) + { + if (shiftLsB >= eSize) + { + return SignedSignSatQ(value, eSize, context); + } + + if (eSize == 64) + { + long shl = value << shiftLsB; + long shr = shl >> shiftLsB; + + if (shr != value) + { + return SignedSignSatQ(value, eSize, context); + } + else /* if (shr == value) */ + { + return shl; + } + } + else /* if (eSize != 64) */ + { + return SignedSrcSignedDstSatQ(value << shiftLsB, size); + } + } + else /* if (shiftLsB == 0) */ + { + return value; + } + } + + public static ulong UnsignedShlRegSatQ(ulong value, ulong shift, bool round, int size) + { + ExecutionContext context = NativeInterface.GetContext(); + + int eSize = 8 << size; + + int shiftLsB = (sbyte)shift; + + if (shiftLsB < 0) + { + return UnsignedShrReg(value, -shiftLsB, round, eSize); + } + else if (shiftLsB > 0) + { + if (shiftLsB >= eSize) + { + return UnsignedSignSatQ(value, eSize, context); + } + + if (eSize == 64) + { + ulong shl = value << shiftLsB; + ulong shr = shl >> shiftLsB; + + if (shr != value) + { + return UnsignedSignSatQ(value, eSize, context); + } + else /* if (shr == value) */ + { + return shl; + } + } + else /* if (eSize != 64) */ + { + return UnsignedSrcUnsignedDstSatQ(value << shiftLsB, size); + } + } + else /* if (shiftLsB == 0) */ + { + return value; + } + } + + private static long SignedShrReg(long value, int shift, bool round, int eSize) // shift := [1, 128]; eSize := {8, 16, 32, 64}. + { + if (round) + { + if (shift >= eSize) + { + return 0L; + } + + long roundConst = 1L << (shift - 1); + + long add = value + roundConst; + + if (eSize == 64) + { + if ((~value & (value ^ add)) < 0L) + { + return (long)((ulong)add >> shift); + } + else + { + return add >> shift; + } + } + else /* if (eSize != 64) */ + { + return add >> shift; + } + } + else /* if (!round) */ + { + if (shift >= eSize) + { + if (value < 0L) + { + return -1L; + } + else /* if (value >= 0L) */ + { + return 0L; + } + } + + return value >> shift; + } + } + + private static ulong UnsignedShrReg(ulong value, int shift, bool round, int eSize) // shift := [1, 128]; eSize := {8, 16, 32, 64}. + { + if (round) + { + if (shift > 64) + { + return 0UL; + } + + ulong roundConst = 1UL << (shift - 1); + + ulong add = value + roundConst; + + if (eSize == 64) + { + if ((add < value) && (add < roundConst)) + { + if (shift == 64) + { + return 1UL; + } + + return (add >> shift) | (0x8000000000000000UL >> (shift - 1)); + } + else + { + if (shift == 64) + { + return 0UL; + } + + return add >> shift; + } + } + else /* if (eSize != 64) */ + { + if (shift == 64) + { + return 0UL; + } + + return add >> shift; + } + } + else /* if (!round) */ + { + if (shift >= eSize) + { + return 0UL; + } + + return value >> shift; + } + } + + private static long SignedSignSatQ(long op, int eSize, ExecutionContext context) // eSize := {8, 16, 32, 64}. + { + long tMaxValue = (1L << (eSize - 1)) - 1L; + long tMinValue = -(1L << (eSize - 1)); + + if (op > 0L) + { + context.Fpsr |= FPSR.Qc; + + return tMaxValue; + } + else if (op < 0L) + { + context.Fpsr |= FPSR.Qc; + + return tMinValue; + } + else + { + return 0L; + } + } + + private static ulong UnsignedSignSatQ(ulong op, int eSize, ExecutionContext context) // eSize := {8, 16, 32, 64}. + { + ulong tMaxValue = ulong.MaxValue >> (64 - eSize); + + if (op > 0UL) + { + context.Fpsr |= FPSR.Qc; + + return tMaxValue; + } + else + { + return 0UL; + } + } +#endregion + +#region "ShrImm64" + public static long SignedShrImm64(long value, long roundConst, int shift) + { + if (roundConst == 0L) + { + if (shift <= 63) + { + return value >> shift; + } + else /* if (shift == 64) */ + { + if (value < 0L) + { + return -1L; + } + else /* if (value >= 0L) */ + { + return 0L; + } + } + } + else /* if (roundConst == 1L << (shift - 1)) */ + { + if (shift <= 63) + { + long add = value + roundConst; + + if ((~value & (value ^ add)) < 0L) + { + return (long)((ulong)add >> shift); + } + else + { + return add >> shift; + } + } + else /* if (shift == 64) */ + { + return 0L; + } + } + } + + public static ulong UnsignedShrImm64(ulong value, long roundConst, int shift) + { + if (roundConst == 0L) + { + if (shift <= 63) + { + return value >> shift; + } + else /* if (shift == 64) */ + { + return 0UL; + } + } + else /* if (roundConst == 1L << (shift - 1)) */ + { + ulong add = value + (ulong)roundConst; + + if ((add < value) && (add < (ulong)roundConst)) + { + if (shift <= 63) + { + return (add >> shift) | (0x8000000000000000UL >> (shift - 1)); + } + else /* if (shift == 64) */ + { + return 1UL; + } + } + else + { + if (shift <= 63) + { + return add >> shift; + } + else /* if (shift == 64) */ + { + return 0UL; + } + } + } + } +#endregion + +#region "Rounding" + public static double Round(double value) + { + ExecutionContext context = NativeInterface.GetContext(); + + FPRoundingMode roundMode = context.Fpcr.GetRoundingMode(); + + if (roundMode == FPRoundingMode.ToNearest) + { + return Math.Round(value); // even + } + else if (roundMode == FPRoundingMode.TowardsPlusInfinity) + { + return Math.Ceiling(value); + } + else if (roundMode == FPRoundingMode.TowardsMinusInfinity) + { + return Math.Floor(value); + } + else /* if (roundMode == FPRoundingMode.TowardsZero) */ + { + return Math.Truncate(value); + } + } + + public static float RoundF(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + + FPRoundingMode roundMode = context.Fpcr.GetRoundingMode(); + + if (roundMode == FPRoundingMode.ToNearest) + { + return MathF.Round(value); // even + } + else if (roundMode == FPRoundingMode.TowardsPlusInfinity) + { + return MathF.Ceiling(value); + } + else if (roundMode == FPRoundingMode.TowardsMinusInfinity) + { + return MathF.Floor(value); + } + else /* if (roundMode == FPRoundingMode.TowardsZero) */ + { + return MathF.Truncate(value); + } + } +#endregion + +#region "Saturation" + public static int SatF32ToS32(float value) + { + if (float.IsNaN(value)) return 0; + + return value >= int.MaxValue ? int.MaxValue : + value <= int.MinValue ? int.MinValue : (int)value; + } + + public static long SatF32ToS64(float value) + { + if (float.IsNaN(value)) return 0; + + return value >= long.MaxValue ? long.MaxValue : + value <= long.MinValue ? long.MinValue : (long)value; + } + + public static uint SatF32ToU32(float value) + { + if (float.IsNaN(value)) return 0; + + return value >= uint.MaxValue ? uint.MaxValue : + value <= uint.MinValue ? uint.MinValue : (uint)value; + } + + public static ulong SatF32ToU64(float value) + { + if (float.IsNaN(value)) return 0; + + return value >= ulong.MaxValue ? ulong.MaxValue : + value <= ulong.MinValue ? ulong.MinValue : (ulong)value; + } + + public static int SatF64ToS32(double value) + { + if (double.IsNaN(value)) return 0; + + return value >= int.MaxValue ? int.MaxValue : + value <= int.MinValue ? int.MinValue : (int)value; + } + + public static long SatF64ToS64(double value) + { + if (double.IsNaN(value)) return 0; + + return value >= long.MaxValue ? long.MaxValue : + value <= long.MinValue ? long.MinValue : (long)value; + } + + public static uint SatF64ToU32(double value) + { + if (double.IsNaN(value)) return 0; + + return value >= uint.MaxValue ? uint.MaxValue : + value <= uint.MinValue ? uint.MinValue : (uint)value; + } + + public static ulong SatF64ToU64(double value) + { + if (double.IsNaN(value)) return 0; + + return value >= ulong.MaxValue ? ulong.MaxValue : + value <= ulong.MinValue ? ulong.MinValue : (ulong)value; + } +#endregion + +#region "Saturating" + public static long SignedSrcSignedDstSatQ(long op, int size) + { + ExecutionContext context = NativeInterface.GetContext(); + + int eSize = 8 << size; + + long tMaxValue = (1L << (eSize - 1)) - 1L; + long tMinValue = -(1L << (eSize - 1)); + + if (op > tMaxValue) + { + context.Fpsr |= FPSR.Qc; + + return tMaxValue; + } + else if (op < tMinValue) + { + context.Fpsr |= FPSR.Qc; + + return tMinValue; + } + else + { + return op; + } + } + + public static ulong SignedSrcUnsignedDstSatQ(long op, int size) + { + ExecutionContext context = NativeInterface.GetContext(); + + int eSize = 8 << size; + + ulong tMaxValue = (1UL << eSize) - 1UL; + ulong tMinValue = 0UL; + + if (op > (long)tMaxValue) + { + context.Fpsr |= FPSR.Qc; + + return tMaxValue; + } + else if (op < (long)tMinValue) + { + context.Fpsr |= FPSR.Qc; + + return tMinValue; + } + else + { + return (ulong)op; + } + } + + public static long UnsignedSrcSignedDstSatQ(ulong op, int size) + { + ExecutionContext context = NativeInterface.GetContext(); + + int eSize = 8 << size; + + long tMaxValue = (1L << (eSize - 1)) - 1L; + + if (op > (ulong)tMaxValue) + { + context.Fpsr |= FPSR.Qc; + + return tMaxValue; + } + else + { + return (long)op; + } + } + + public static ulong UnsignedSrcUnsignedDstSatQ(ulong op, int size) + { + ExecutionContext context = NativeInterface.GetContext(); + + int eSize = 8 << size; + + ulong tMaxValue = (1UL << eSize) - 1UL; + + if (op > tMaxValue) + { + context.Fpsr |= FPSR.Qc; + + return tMaxValue; + } + else + { + return op; + } + } + + public static long UnarySignedSatQAbsOrNeg(long op) + { + ExecutionContext context = NativeInterface.GetContext(); + + if (op == long.MinValue) + { + context.Fpsr |= FPSR.Qc; + + return long.MaxValue; + } + else + { + return op; + } + } + + public static long BinarySignedSatQAdd(long op1, long op2) + { + ExecutionContext context = NativeInterface.GetContext(); + + long add = op1 + op2; + + if ((~(op1 ^ op2) & (op1 ^ add)) < 0L) + { + context.Fpsr |= FPSR.Qc; + + if (op1 < 0L) + { + return long.MinValue; + } + else + { + return long.MaxValue; + } + } + else + { + return add; + } + } + + public static ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2) + { + ExecutionContext context = NativeInterface.GetContext(); + + ulong add = op1 + op2; + + if ((add < op1) && (add < op2)) + { + context.Fpsr |= FPSR.Qc; + + return ulong.MaxValue; + } + else + { + return add; + } + } + + public static long BinarySignedSatQSub(long op1, long op2) + { + ExecutionContext context = NativeInterface.GetContext(); + + long sub = op1 - op2; + + if (((op1 ^ op2) & (op1 ^ sub)) < 0L) + { + context.Fpsr |= FPSR.Qc; + + if (op1 < 0L) + { + return long.MinValue; + } + else + { + return long.MaxValue; + } + } + else + { + return sub; + } + } + + public static ulong BinaryUnsignedSatQSub(ulong op1, ulong op2) + { + ExecutionContext context = NativeInterface.GetContext(); + + ulong sub = op1 - op2; + + if (op1 < op2) + { + context.Fpsr |= FPSR.Qc; + + return ulong.MinValue; + } + else + { + return sub; + } + } + + public static long BinarySignedSatQAcc(ulong op1, long op2) + { + ExecutionContext context = NativeInterface.GetContext(); + + if (op1 <= (ulong)long.MaxValue) + { + // op1 from ulong.MinValue to (ulong)long.MaxValue + // op2 from long.MinValue to long.MaxValue + + long add = (long)op1 + op2; + + if ((~op2 & add) < 0L) + { + context.Fpsr |= FPSR.Qc; + + return long.MaxValue; + } + else + { + return add; + } + } + else if (op2 >= 0L) + { + // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue + // op2 from (long)ulong.MinValue to long.MaxValue + + context.Fpsr |= FPSR.Qc; + + return long.MaxValue; + } + else + { + // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue + // op2 from long.MinValue to (long)ulong.MinValue - 1L + + ulong add = op1 + (ulong)op2; + + if (add > (ulong)long.MaxValue) + { + context.Fpsr |= FPSR.Qc; + + return long.MaxValue; + } + else + { + return (long)add; + } + } + } + + public static ulong BinaryUnsignedSatQAcc(long op1, ulong op2) + { + ExecutionContext context = NativeInterface.GetContext(); + + if (op1 >= 0L) + { + // op1 from (long)ulong.MinValue to long.MaxValue + // op2 from ulong.MinValue to ulong.MaxValue + + ulong add = (ulong)op1 + op2; + + if ((add < (ulong)op1) && (add < op2)) + { + context.Fpsr |= FPSR.Qc; + + return ulong.MaxValue; + } + else + { + return add; + } + } + else if (op2 > (ulong)long.MaxValue) + { + // op1 from long.MinValue to (long)ulong.MinValue - 1L + // op2 from (ulong)long.MaxValue + 1UL to ulong.MaxValue + + return (ulong)op1 + op2; + } + else + { + // op1 from long.MinValue to (long)ulong.MinValue - 1L + // op2 from ulong.MinValue to (ulong)long.MaxValue + + long add = op1 + (long)op2; + + if (add < (long)ulong.MinValue) + { + context.Fpsr |= FPSR.Qc; + + return ulong.MinValue; + } + else + { + return (ulong)add; + } + } + } +#endregion + +#region "Count" + public static ulong CountLeadingSigns(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.). + { + value ^= value >> 1; + + int highBit = size - 2; + + for (int bit = highBit; bit >= 0; bit--) + { + if (((int)(value >> bit) & 0b1) != 0) + { + return (ulong)(highBit - bit); + } + } + + return (ulong)(size - 1); + } + + private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 }; + + public static ulong CountLeadingZeros(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.). + { + if (value == 0ul) + { + return (ulong)size; + } + + int nibbleIdx = size; + int preCount, count = 0; + + do + { + nibbleIdx -= 4; + preCount = ClzNibbleTbl[(int)(value >> nibbleIdx) & 0b1111]; + count += preCount; + } + while (preCount == 4); + + return (ulong)count; + } + + public static ulong CountSetBits8(ulong value) // "size" is 8 (SIMD&FP Inst.). + { + value = ((value >> 1) & 0x55ul) + (value & 0x55ul); + value = ((value >> 2) & 0x33ul) + (value & 0x33ul); + + return (value >> 4) + (value & 0x0ful); + } +#endregion + +#region "Table" + public static V128 Tbl1_V64(V128 vector, V128 tb0) + { + return Tbl(vector, 8, tb0); + } + + public static V128 Tbl1_V128(V128 vector, V128 tb0) + { + return Tbl(vector, 16, tb0); + } + + public static V128 Tbl2_V64(V128 vector, V128 tb0, V128 tb1) + { + return Tbl(vector, 8, tb0, tb1); + } + + public static V128 Tbl2_V128(V128 vector, V128 tb0, V128 tb1) + { + return Tbl(vector, 16, tb0, tb1); + } + + public static V128 Tbl3_V64(V128 vector, V128 tb0, V128 tb1, V128 tb2) + { + return Tbl(vector, 8, tb0, tb1, tb2); + } + + public static V128 Tbl3_V128(V128 vector, V128 tb0, V128 tb1, V128 tb2) + { + return Tbl(vector, 16, tb0, tb1, tb2); + } + + public static V128 Tbl4_V64(V128 vector, V128 tb0, V128 tb1, V128 tb2, V128 tb3) + { + return Tbl(vector, 8, tb0, tb1, tb2, tb3); + } + + public static V128 Tbl4_V128(V128 vector, V128 tb0, V128 tb1, V128 tb2, V128 tb3) + { + return Tbl(vector, 16, tb0, tb1, tb2, tb3); + } + + private static V128 Tbl(V128 vector, int bytes, params V128[] tb) + { + byte[] res = new byte[16]; + byte[] table = new byte[tb.Length * 16]; + + for (byte index = 0; index < tb.Length; index++) + { + Buffer.BlockCopy(tb[index].ToArray(), 0, table, index * 16, 16); + } + + byte[] v = vector.ToArray(); + + for (byte index = 0; index < bytes; index++) + { + byte tblIndex = v[index]; + + if (tblIndex < table.Length) + { + res[index] = table[tblIndex]; + } + } + + return new V128(res); + } +#endregion + +#region "Crc32" + private const uint Crc32RevPoly = 0xedb88320; + private const uint Crc32cRevPoly = 0x82f63b78; + + public static uint Crc32b(uint crc, byte value) => Crc32 (crc, Crc32RevPoly, value); + public static uint Crc32h(uint crc, ushort value) => Crc32h(crc, Crc32RevPoly, value); + public static uint Crc32w(uint crc, uint value) => Crc32w(crc, Crc32RevPoly, value); + public static uint Crc32x(uint crc, ulong value) => Crc32x(crc, Crc32RevPoly, value); + + public static uint Crc32cb(uint crc, byte value) => Crc32 (crc, Crc32cRevPoly, value); + public static uint Crc32ch(uint crc, ushort value) => Crc32h(crc, Crc32cRevPoly, value); + public static uint Crc32cw(uint crc, uint value) => Crc32w(crc, Crc32cRevPoly, value); + public static uint Crc32cx(uint crc, ulong value) => Crc32x(crc, Crc32cRevPoly, value); + + private static uint Crc32h(uint crc, uint poly, ushort val) + { + crc = Crc32(crc, poly, (byte)(val >> 0)); + crc = Crc32(crc, poly, (byte)(val >> 8)); + + return crc; + } + + private static uint Crc32w(uint crc, uint poly, uint val) + { + crc = Crc32(crc, poly, (byte)(val >> 0)); + crc = Crc32(crc, poly, (byte)(val >> 8)); + crc = Crc32(crc, poly, (byte)(val >> 16)); + crc = Crc32(crc, poly, (byte)(val >> 24)); + + return crc; + } + + private static uint Crc32x(uint crc, uint poly, ulong val) + { + crc = Crc32(crc, poly, (byte)(val >> 0)); + crc = Crc32(crc, poly, (byte)(val >> 8)); + crc = Crc32(crc, poly, (byte)(val >> 16)); + crc = Crc32(crc, poly, (byte)(val >> 24)); + crc = Crc32(crc, poly, (byte)(val >> 32)); + crc = Crc32(crc, poly, (byte)(val >> 40)); + crc = Crc32(crc, poly, (byte)(val >> 48)); + crc = Crc32(crc, poly, (byte)(val >> 56)); + + return crc; + } + + private static uint Crc32(uint crc, uint poly, byte val) + { + crc ^= val; + + for (int bit = 7; bit >= 0; bit--) + { + uint mask = (uint)(-(int)(crc & 1)); + + crc = (crc >> 1) ^ (poly & mask); + } + + return crc; + } +#endregion + +#region "Aes" + public static V128 Decrypt(V128 value, V128 roundKey) + { + return CryptoHelper.AesInvSubBytes(CryptoHelper.AesInvShiftRows(value ^ roundKey)); + } + + public static V128 Encrypt(V128 value, V128 roundKey) + { + return CryptoHelper.AesSubBytes(CryptoHelper.AesShiftRows(value ^ roundKey)); + } + + public static V128 InverseMixColumns(V128 value) + { + return CryptoHelper.AesInvMixColumns(value); + } + + public static V128 MixColumns(V128 value) + { + return CryptoHelper.AesMixColumns(value); + } +#endregion + +#region "Sha1" + public static V128 HashChoose(V128 hash_abcd, uint hash_e, V128 wk) + { + for (int e = 0; e <= 3; e++) + { + uint t = ShaChoose(hash_abcd.GetUInt32(1), + hash_abcd.GetUInt32(2), + hash_abcd.GetUInt32(3)); + + hash_e += Rol(hash_abcd.GetUInt32(0), 5) + t + wk.GetUInt32(e); + + t = Rol(hash_abcd.GetUInt32(1), 30); + + hash_abcd.Insert(1, t); + + Rol32_160(ref hash_e, ref hash_abcd); + } + + return hash_abcd; + } + + public static uint FixedRotate(uint hash_e) + { + return hash_e.Rol(30); + } + + public static V128 HashMajority(V128 hash_abcd, uint hash_e, V128 wk) + { + for (int e = 0; e <= 3; e++) + { + uint t = ShaMajority(hash_abcd.GetUInt32(1), + hash_abcd.GetUInt32(2), + hash_abcd.GetUInt32(3)); + + hash_e += Rol(hash_abcd.GetUInt32(0), 5) + t + wk.GetUInt32(e); + + t = Rol(hash_abcd.GetUInt32(1), 30); + + hash_abcd.Insert(1, t); + + Rol32_160(ref hash_e, ref hash_abcd); + } + + return hash_abcd; + } + + public static V128 HashParity(V128 hash_abcd, uint hash_e, V128 wk) + { + for (int e = 0; e <= 3; e++) + { + uint t = ShaParity(hash_abcd.GetUInt32(1), + hash_abcd.GetUInt32(2), + hash_abcd.GetUInt32(3)); + + hash_e += Rol(hash_abcd.GetUInt32(0), 5) + t + wk.GetUInt32(e); + + t = Rol(hash_abcd.GetUInt32(1), 30); + + hash_abcd.Insert(1, t); + + Rol32_160(ref hash_e, ref hash_abcd); + } + + return hash_abcd; + } + + public static V128 Sha1SchedulePart1(V128 w0_3, V128 w4_7, V128 w8_11) + { + ulong t2 = w4_7.GetUInt64(0); + ulong t1 = w0_3.GetUInt64(1); + + V128 result = new V128(t1, t2); + + return result ^ (w0_3 ^ w8_11); + } + + public static V128 Sha1SchedulePart2(V128 tw0_3, V128 w12_15) + { + V128 t = tw0_3 ^ (w12_15 >> 32); + + uint tE0 = t.GetUInt32(0); + uint tE1 = t.GetUInt32(1); + uint tE2 = t.GetUInt32(2); + uint tE3 = t.GetUInt32(3); + + return new V128(tE0.Rol(1), tE1.Rol(1), tE2.Rol(1), tE3.Rol(1) ^ tE0.Rol(2)); + } + + private static void Rol32_160(ref uint y, ref V128 x) + { + uint xE3 = x.GetUInt32(3); + + x <<= 32; + x.Insert(0, y); + + y = xE3; + } + + private static uint ShaChoose(uint x, uint y, uint z) + { + return ((y ^ z) & x) ^ z; + } + + private static uint ShaMajority(uint x, uint y, uint z) + { + return (x & y) | ((x | y) & z); + } + + private static uint ShaParity(uint x, uint y, uint z) + { + return x ^ y ^ z; + } + + private static uint Rol(this uint value, int count) + { + return (value << count) | (value >> (32 - count)); + } +#endregion + +#region "Sha256" + public static V128 HashLower(V128 hash_abcd, V128 hash_efgh, V128 wk) + { + return Sha256Hash(hash_abcd, hash_efgh, wk, part1: true); + } + + public static V128 HashUpper(V128 hash_efgh, V128 hash_abcd, V128 wk) + { + return Sha256Hash(hash_abcd, hash_efgh, wk, part1: false); + } + + public static V128 Sha256SchedulePart1(V128 w0_3, V128 w4_7) + { + V128 result = new V128(); + + for (int e = 0; e <= 3; e++) + { + uint elt = (e <= 2 ? w0_3 : w4_7).GetUInt32(e <= 2 ? e + 1 : 0); + + elt = elt.Ror(7) ^ elt.Ror(18) ^ elt.Lsr(3); + + elt += w0_3.GetUInt32(e); + + result.Insert(e, elt); + } + + return result; + } + + public static V128 Sha256SchedulePart2(V128 w0_3, V128 w8_11, V128 w12_15) + { + V128 result = new V128(); + + ulong t1 = w12_15.GetUInt64(1); + + for (int e = 0; e <= 1; e++) + { + uint elt = t1.ULongPart(e); + + elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10); + + elt += w0_3.GetUInt32(e) + w8_11.GetUInt32(e + 1); + + result.Insert(e, elt); + } + + t1 = result.GetUInt64(0); + + for (int e = 2; e <= 3; e++) + { + uint elt = t1.ULongPart(e - 2); + + elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10); + + elt += w0_3.GetUInt32(e) + (e == 2 ? w8_11 : w12_15).GetUInt32(e == 2 ? 3 : 0); + + result.Insert(e, elt); + } + + return result; + } + + private static V128 Sha256Hash(V128 x, V128 y, V128 w, bool part1) + { + for (int e = 0; e <= 3; e++) + { + uint chs = ShaChoose(y.GetUInt32(0), + y.GetUInt32(1), + y.GetUInt32(2)); + + uint maj = ShaMajority(x.GetUInt32(0), + x.GetUInt32(1), + x.GetUInt32(2)); + + uint t1 = y.GetUInt32(3) + ShaHashSigma1(y.GetUInt32(0)) + chs + w.GetUInt32(e); + + uint t2 = t1 + x.GetUInt32(3); + + x.Insert(3, t2); + + t2 = t1 + ShaHashSigma0(x.GetUInt32(0)) + maj; + + y.Insert(3, t2); + + Rol32_256(ref y, ref x); + } + + return part1 ? x : y; + } + + private static void Rol32_256(ref V128 y, ref V128 x) + { + uint yE3 = y.GetUInt32(3); + uint xE3 = x.GetUInt32(3); + + y <<= 32; + x <<= 32; + + y.Insert(0, xE3); + x.Insert(0, yE3); + } + + private static uint ShaHashSigma0(uint x) + { + return x.Ror(2) ^ x.Ror(13) ^ x.Ror(22); + } + + private static uint ShaHashSigma1(uint x) + { + return x.Ror(6) ^ x.Ror(11) ^ x.Ror(25); + } + + private static uint Ror(this uint value, int count) + { + return (value >> count) | (value << (32 - count)); + } + + private static uint Lsr(this uint value, int count) + { + return value >> count; + } + + private static uint ULongPart(this ulong value, int part) + { + return part == 0 + ? (uint)(value & 0xFFFFFFFFUL) + : (uint)(value >> 32); + } +#endregion + +#region "Reverse" + public static uint ReverseBits8(uint value) + { + value = ((value & 0xaa) >> 1) | ((value & 0x55) << 1); + value = ((value & 0xcc) >> 2) | ((value & 0x33) << 2); + + return (value >> 4) | ((value & 0x0f) << 4); + } + + public static uint ReverseBits32(uint value) + { + value = ((value & 0xaaaaaaaa) >> 1) | ((value & 0x55555555) << 1); + value = ((value & 0xcccccccc) >> 2) | ((value & 0x33333333) << 2); + value = ((value & 0xf0f0f0f0) >> 4) | ((value & 0x0f0f0f0f) << 4); + value = ((value & 0xff00ff00) >> 8) | ((value & 0x00ff00ff) << 8); + + return (value >> 16) | (value << 16); + } + + public static ulong ReverseBits64(ulong value) + { + value = ((value & 0xaaaaaaaaaaaaaaaa) >> 1 ) | ((value & 0x5555555555555555) << 1 ); + value = ((value & 0xcccccccccccccccc) >> 2 ) | ((value & 0x3333333333333333) << 2 ); + value = ((value & 0xf0f0f0f0f0f0f0f0) >> 4 ) | ((value & 0x0f0f0f0f0f0f0f0f) << 4 ); + value = ((value & 0xff00ff00ff00ff00) >> 8 ) | ((value & 0x00ff00ff00ff00ff) << 8 ); + value = ((value & 0xffff0000ffff0000) >> 16) | ((value & 0x0000ffff0000ffff) << 16); + + return (value >> 32) | (value << 32); + } + + public static uint ReverseBytes16_32(uint value) => (uint)ReverseBytes16_64(value); + + public static ulong ReverseBytes16_64(ulong value) => ReverseBytes(value, RevSize.Rev16); + public static ulong ReverseBytes32_64(ulong value) => ReverseBytes(value, RevSize.Rev32); + + private enum RevSize + { + Rev16, + Rev32, + Rev64 + } + + private static ulong ReverseBytes(ulong value, RevSize size) + { + value = ((value & 0xff00ff00ff00ff00) >> 8) | ((value & 0x00ff00ff00ff00ff) << 8); + + if (size == RevSize.Rev16) + { + return value; + } + + value = ((value & 0xffff0000ffff0000) >> 16) | ((value & 0x0000ffff0000ffff) << 16); + + if (size == RevSize.Rev32) + { + return value; + } + + value = ((value & 0xffffffff00000000) >> 32) | ((value & 0x00000000ffffffff) << 32); + + if (size == RevSize.Rev64) + { + return value; + } + + throw new ArgumentException(nameof(size)); + } +#endregion + } +} diff --git a/ARMeilleure/Instructions/SoftFloat.cs b/ARMeilleure/Instructions/SoftFloat.cs new file mode 100644 index 00000000..7358e6b2 --- /dev/null +++ b/ARMeilleure/Instructions/SoftFloat.cs @@ -0,0 +1,2757 @@ +using ARMeilleure.State; +using System; +using System.Diagnostics; + +namespace ARMeilleure.Instructions +{ + static class SoftFloat + { + static SoftFloat() + { + RecipEstimateTable = BuildRecipEstimateTable(); + RecipSqrtEstimateTable = BuildRecipSqrtEstimateTable(); + } + + internal static readonly byte[] RecipEstimateTable; + internal static readonly byte[] RecipSqrtEstimateTable; + + private static byte[] BuildRecipEstimateTable() + { + byte[] tbl = new byte[256]; + + for (int idx = 0; idx < 256; idx++) + { + uint src = (uint)idx + 256u; + + Debug.Assert(256u <= src && src < 512u); + + src = (src << 1) + 1u; + + uint aux = (1u << 19) / src; + + uint dst = (aux + 1u) >> 1; + + Debug.Assert(256u <= dst && dst < 512u); + + tbl[idx] = (byte)(dst - 256u); + } + + return tbl; + } + + private static byte[] BuildRecipSqrtEstimateTable() + { + byte[] tbl = new byte[384]; + + for (int idx = 0; idx < 384; idx++) + { + uint src = (uint)idx + 128u; + + Debug.Assert(128u <= src && src < 512u); + + if (src < 256u) + { + src = (src << 1) + 1u; + } + else + { + src = (src >> 1) << 1; + src = (src + 1u) << 1; + } + + uint aux = 512u; + + while (src * (aux + 1u) * (aux + 1u) < (1u << 28)) + { + aux = aux + 1u; + } + + uint dst = (aux + 1u) >> 1; + + Debug.Assert(256u <= dst && dst < 512u); + + tbl[idx] = (byte)(dst - 256u); + } + + return tbl; + } + } + + static class SoftFloat16_32 + { + public static float FPConvert(ushort valueBits) + { + ExecutionContext context = NativeInterface.GetContext(); + + double real = valueBits.FPUnpackCv(out FPType type, out bool sign, context); + + float result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + if ((context.Fpcr & FPCR.Dn) != 0) + { + result = FPDefaultNaN(); + } + else + { + result = FPConvertNaN(valueBits); + } + + if (type == FPType.SNaN) + { + FPProcessException(FPException.InvalidOp, context); + } + } + else if (type == FPType.Infinity) + { + result = FPInfinity(sign); + } + else if (type == FPType.Zero) + { + result = FPZero(sign); + } + else + { + result = FPRoundCv(real, context); + } + + return result; + } + + private static float FPDefaultNaN() + { + return -float.NaN; + } + + private static float FPInfinity(bool sign) + { + return sign ? float.NegativeInfinity : float.PositiveInfinity; + } + + private static float FPZero(bool sign) + { + return sign ? -0f : +0f; + } + + private static float FPMaxNormal(bool sign) + { + return sign ? float.MinValue : float.MaxValue; + } + + private static double FPUnpackCv( + this ushort valueBits, + out FPType type, + out bool sign, + ExecutionContext context) + { + sign = (~(uint)valueBits & 0x8000u) == 0u; + + uint exp16 = ((uint)valueBits & 0x7C00u) >> 10; + uint frac16 = (uint)valueBits & 0x03FFu; + + double real; + + if (exp16 == 0u) + { + if (frac16 == 0u) + { + type = FPType.Zero; + real = 0d; + } + else + { + type = FPType.Nonzero; // Subnormal. + real = Math.Pow(2d, -14) * ((double)frac16 * Math.Pow(2d, -10)); + } + } + else if (exp16 == 0x1Fu && (context.Fpcr & FPCR.Ahp) == 0) + { + if (frac16 == 0u) + { + type = FPType.Infinity; + real = Math.Pow(2d, 1000); + } + else + { + type = (~frac16 & 0x0200u) == 0u ? FPType.QNaN : FPType.SNaN; + real = 0d; + } + } + else + { + type = FPType.Nonzero; // Normal. + real = Math.Pow(2d, (int)exp16 - 15) * (1d + (double)frac16 * Math.Pow(2d, -10)); + } + + return sign ? -real : real; + } + + private static float FPRoundCv(double real, ExecutionContext context) + { + const int minimumExp = -126; + + const int e = 8; + const int f = 23; + + bool sign; + double mantissa; + + if (real < 0d) + { + sign = true; + mantissa = -real; + } + else + { + sign = false; + mantissa = real; + } + + int exponent = 0; + + while (mantissa < 1d) + { + mantissa *= 2d; + exponent--; + } + + while (mantissa >= 2d) + { + mantissa /= 2d; + exponent++; + } + + if ((context.Fpcr & FPCR.Fz) != 0 && exponent < minimumExp) + { + context.Fpsr |= FPSR.Ufc; + + return FPZero(sign); + } + + uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0); + + if (biasedExp == 0u) + { + mantissa /= Math.Pow(2d, minimumExp - exponent); + } + + uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f)); + double error = mantissa * Math.Pow(2d, f) - (double)intMant; + + if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0)) + { + FPProcessException(FPException.Underflow, context); + } + + bool overflowToInf; + bool roundUp; + + switch (context.Fpcr.GetRoundingMode()) + { + default: + case FPRoundingMode.ToNearest: + roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u)); + overflowToInf = true; + break; + + case FPRoundingMode.TowardsPlusInfinity: + roundUp = (error != 0d && !sign); + overflowToInf = !sign; + break; + + case FPRoundingMode.TowardsMinusInfinity: + roundUp = (error != 0d && sign); + overflowToInf = sign; + break; + + case FPRoundingMode.TowardsZero: + roundUp = false; + overflowToInf = false; + break; + } + + if (roundUp) + { + intMant++; + + if (intMant == 1u << f) + { + biasedExp = 1u; + } + + if (intMant == 1u << (f + 1)) + { + biasedExp++; + intMant >>= 1; + } + } + + float result; + + if (biasedExp >= (1u << e) - 1u) + { + result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); + + FPProcessException(FPException.Overflow, context); + + error = 1d; + } + else + { + result = BitConverter.Int32BitsToSingle( + (int)((sign ? 1u : 0u) << 31 | (biasedExp & 0xFFu) << 23 | (intMant & 0x007FFFFFu))); + } + + if (error != 0d) + { + FPProcessException(FPException.Inexact, context); + } + + return result; + } + + private static float FPConvertNaN(ushort valueBits) + { + return BitConverter.Int32BitsToSingle( + (int)(((uint)valueBits & 0x8000u) << 16 | 0x7FC00000u | ((uint)valueBits & 0x01FFu) << 13)); + } + + private static void FPProcessException(FPException exc, ExecutionContext context) + { + int enable = (int)exc + 8; + + if ((context.Fpcr & (FPCR)(1 << enable)) != 0) + { + throw new NotImplementedException("Floating-point trap handling."); + } + else + { + context.Fpsr |= (FPSR)(1 << (int)exc); + } + } + } + + static class SoftFloat32_16 + { + public static ushort FPConvert(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + + double real = value.FPUnpackCv(out FPType type, out bool sign, out uint valueBits, context); + + bool altHp = (context.Fpcr & FPCR.Ahp) != 0; + + ushort resultBits; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + if (altHp) + { + resultBits = FPZero(sign); + } + else if ((context.Fpcr & FPCR.Dn) != 0) + { + resultBits = FPDefaultNaN(); + } + else + { + resultBits = FPConvertNaN(valueBits); + } + + if (type == FPType.SNaN || altHp) + { + FPProcessException(FPException.InvalidOp, context); + } + } + else if (type == FPType.Infinity) + { + if (altHp) + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu); + + FPProcessException(FPException.InvalidOp, context); + } + else + { + resultBits = FPInfinity(sign); + } + } + else if (type == FPType.Zero) + { + resultBits = FPZero(sign); + } + else + { + resultBits = FPRoundCv(real, context); + } + + return resultBits; + } + + private static ushort FPDefaultNaN() + { + return (ushort)0x7E00u; + } + + private static ushort FPInfinity(bool sign) + { + return sign ? (ushort)0xFC00u : (ushort)0x7C00u; + } + + private static ushort FPZero(bool sign) + { + return sign ? (ushort)0x8000u : (ushort)0x0000u; + } + + private static ushort FPMaxNormal(bool sign) + { + return sign ? (ushort)0xFBFFu : (ushort)0x7BFFu; + } + + private static double FPUnpackCv( + this float value, + out FPType type, + out bool sign, + out uint valueBits, + ExecutionContext context) + { + valueBits = (uint)BitConverter.SingleToInt32Bits(value); + + sign = (~valueBits & 0x80000000u) == 0u; + + uint exp32 = (valueBits & 0x7F800000u) >> 23; + uint frac32 = valueBits & 0x007FFFFFu; + + double real; + + if (exp32 == 0u) + { + if (frac32 == 0u || (context.Fpcr & FPCR.Fz) != 0) + { + type = FPType.Zero; + real = 0d; + + if (frac32 != 0u) + { + FPProcessException(FPException.InputDenorm, context); + } + } + else + { + type = FPType.Nonzero; // Subnormal. + real = Math.Pow(2d, -126) * ((double)frac32 * Math.Pow(2d, -23)); + } + } + else if (exp32 == 0xFFu) + { + if (frac32 == 0u) + { + type = FPType.Infinity; + real = Math.Pow(2d, 1000); + } + else + { + type = (~frac32 & 0x00400000u) == 0u ? FPType.QNaN : FPType.SNaN; + real = 0d; + } + } + else + { + type = FPType.Nonzero; // Normal. + real = Math.Pow(2d, (int)exp32 - 127) * (1d + (double)frac32 * Math.Pow(2d, -23)); + } + + return sign ? -real : real; + } + + private static ushort FPRoundCv(double real, ExecutionContext context) + { + const int minimumExp = -14; + + const int e = 5; + const int f = 10; + + bool sign; + double mantissa; + + if (real < 0d) + { + sign = true; + mantissa = -real; + } + else + { + sign = false; + mantissa = real; + } + + int exponent = 0; + + while (mantissa < 1d) + { + mantissa *= 2d; + exponent--; + } + + while (mantissa >= 2d) + { + mantissa /= 2d; + exponent++; + } + + uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0); + + if (biasedExp == 0u) + { + mantissa /= Math.Pow(2d, minimumExp - exponent); + } + + uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f)); + double error = mantissa * Math.Pow(2d, f) - (double)intMant; + + if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0)) + { + FPProcessException(FPException.Underflow, context); + } + + bool overflowToInf; + bool roundUp; + + switch (context.Fpcr.GetRoundingMode()) + { + default: + case FPRoundingMode.ToNearest: + roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u)); + overflowToInf = true; + break; + + case FPRoundingMode.TowardsPlusInfinity: + roundUp = (error != 0d && !sign); + overflowToInf = !sign; + break; + + case FPRoundingMode.TowardsMinusInfinity: + roundUp = (error != 0d && sign); + overflowToInf = sign; + break; + + case FPRoundingMode.TowardsZero: + roundUp = false; + overflowToInf = false; + break; + } + + if (roundUp) + { + intMant++; + + if (intMant == 1u << f) + { + biasedExp = 1u; + } + + if (intMant == 1u << (f + 1)) + { + biasedExp++; + intMant >>= 1; + } + } + + ushort resultBits; + + if ((context.Fpcr & FPCR.Ahp) == 0) + { + if (biasedExp >= (1u << e) - 1u) + { + resultBits = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); + + FPProcessException(FPException.Overflow, context); + + error = 1d; + } + else + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | (biasedExp & 0x1Fu) << 10 | (intMant & 0x03FFu)); + } + } + else + { + if (biasedExp >= 1u << e) + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu); + + FPProcessException(FPException.InvalidOp, context); + + error = 0d; + } + else + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | (biasedExp & 0x1Fu) << 10 | (intMant & 0x03FFu)); + } + } + + if (error != 0d) + { + FPProcessException(FPException.Inexact, context); + } + + return resultBits; + } + + private static ushort FPConvertNaN(uint valueBits) + { + return (ushort)((valueBits & 0x80000000u) >> 16 | 0x7E00u | (valueBits & 0x003FE000u) >> 13); + } + + private static void FPProcessException(FPException exc, ExecutionContext context) + { + int enable = (int)exc + 8; + + if ((context.Fpcr & (FPCR)(1 << enable)) != 0) + { + throw new NotImplementedException("Floating-point trap handling."); + } + else + { + context.Fpsr |= (FPSR)(1 << (int)exc); + } + } + } + + static class SoftFloat32 + { + public static float FPAdd(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == !sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if ((inf1 && !sign1) || (inf2 && !sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == sign2) + { + result = FPZero(sign1); + } + else + { + result = value1 + value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static int FPCompare(float value1, float value2, bool signalNaNs) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context); + + int result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = 0b0011; + + if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs) + { + FPProcessException(FPException.InvalidOp, context); + } + } + else + { + if (value1 == value2) + { + result = 0b0110; + } + else if (value1 < value2) + { + result = 0b1000; + } + else + { + result = 0b0010; + } + } + + return result; + } + + public static float FPCompareEQ(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + + float result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + if (type1 == FPType.SNaN || type2 == FPType.SNaN) + { + FPProcessException(FPException.InvalidOp, context); + } + } + else + { + result = ZerosOrOnes(value1 == value2); + } + + return result; + } + + public static float FPCompareGE(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + + float result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + FPProcessException(FPException.InvalidOp, context); + } + else + { + result = ZerosOrOnes(value1 >= value2); + } + + return result; + } + + public static float FPCompareGT(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + + float result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + FPProcessException(FPException.InvalidOp, context); + } + else + { + result = ZerosOrOnes(value1 > value2); + } + + return result; + } + + public static float FPCompareLE(float value1, float value2) + { + return FPCompareGE(value2, value1); + } + + public static float FPCompareLT(float value1, float value2) + { + return FPCompareGT(value2, value1); + } + + public static float FPDiv(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && inf2) || (zero1 && zero2)) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if (inf1 || zero2) + { + result = FPInfinity(sign1 ^ sign2); + + if (!inf1) + { + FPProcessException(FPException.DivideByZero, context); + } + } + else if (zero1 || inf2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 / value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPMax(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + if (value1 > value2) + { + if (type1 == FPType.Infinity) + { + result = FPInfinity(sign1); + } + else if (type1 == FPType.Zero) + { + result = FPZero(sign1 && sign2); + } + else + { + result = value1; + } + } + else + { + if (type2 == FPType.Infinity) + { + result = FPInfinity(sign2); + } + else if (type2 == FPType.Zero) + { + result = FPZero(sign1 && sign2); + } + else + { + result = value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + } + + return result; + } + + public static float FPMaxNum(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1.FPUnpack(out FPType type1, out _, out _, context); + value2.FPUnpack(out FPType type2, out _, out _, context); + + if (type1 == FPType.QNaN && type2 != FPType.QNaN) + { + value1 = FPInfinity(true); + } + else if (type1 != FPType.QNaN && type2 == FPType.QNaN) + { + value2 = FPInfinity(true); + } + + return FPMax(value1, value2); + } + + public static float FPMin(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + if (value1 < value2) + { + if (type1 == FPType.Infinity) + { + result = FPInfinity(sign1); + } + else if (type1 == FPType.Zero) + { + result = FPZero(sign1 || sign2); + } + else + { + result = value1; + } + } + else + { + if (type2 == FPType.Infinity) + { + result = FPInfinity(sign2); + } + else if (type2 == FPType.Zero) + { + result = FPZero(sign1 || sign2); + } + else + { + result = value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + } + + return result; + } + + public static float FPMinNum(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1.FPUnpack(out FPType type1, out _, out _, context); + value2.FPUnpack(out FPType type2, out _, out _, context); + + if (type1 == FPType.QNaN && type2 != FPType.QNaN) + { + value1 = FPInfinity(false); + } + else if (type1 != FPType.QNaN && type2 == FPType.QNaN) + { + value2 = FPInfinity(false); + } + + return FPMin(value1, value2); + } + + public static float FPMul(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else if (zero1 || zero2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 * value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPMulAdd(float valueA, float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out uint addend, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + float result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context); + + if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2))) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + + if (!done) + { + bool infA = typeA == FPType.Infinity; bool zeroA = typeA == FPType.Zero; + + bool signP = sign1 ^ sign2; + bool infP = inf1 || inf2; + bool zeroP = zero1 || zero2; + + if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP)) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if ((infA && !signA) || (infP && !signP)) + { + result = FPInfinity(false); + } + else if ((infA && signA) || (infP && signP)) + { + result = FPInfinity(true); + } + else if (zeroA && zeroP && signA == signP) + { + result = FPZero(signA); + } + else + { + // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = valueA + (value1 * value2); + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPMulSub(float valueA, float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPNeg(); + + return FPMulAdd(valueA, value1, value2); + } + + public static float FPMulX(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPTwo(sign1 ^ sign2); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else if (zero1 || zero2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 * value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPRecipEstimate(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value.FPUnpack(out FPType type, out bool sign, out uint op, context); + + float result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else if (type == FPType.Infinity) + { + result = FPZero(sign); + } + else if (type == FPType.Zero) + { + result = FPInfinity(sign); + + FPProcessException(FPException.DivideByZero, context); + } + else if (MathF.Abs(value) < MathF.Pow(2f, -128)) + { + bool overflowToInf; + + switch (context.Fpcr.GetRoundingMode()) + { + default: + case FPRoundingMode.ToNearest: overflowToInf = true; break; + case FPRoundingMode.TowardsPlusInfinity: overflowToInf = !sign; break; + case FPRoundingMode.TowardsMinusInfinity: overflowToInf = sign; break; + case FPRoundingMode.TowardsZero: overflowToInf = false; break; + } + + result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); + + FPProcessException(FPException.Overflow, context); + FPProcessException(FPException.Inexact, context); + } + else if ((context.Fpcr & FPCR.Fz) != 0 && (MathF.Abs(value) >= MathF.Pow(2f, 126))) + { + result = FPZero(sign); + + context.Fpsr |= FPSR.Ufc; + } + else + { + ulong fraction = (ulong)(op & 0x007FFFFFu) << 29; + uint exp = (op & 0x7F800000u) >> 23; + + if (exp == 0u) + { + if ((fraction & 0x0008000000000000ul) == 0ul) + { + fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2; + exp -= 1u; + } + else + { + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + } + } + + uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44); + + uint resultExp = 253u - exp; + + uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u; + + fraction = (ulong)(estimate & 0xFFu) << 44; + + if (resultExp == 0u) + { + fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1; + } + else if (resultExp + 1u == 0u) + { + fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2; + resultExp = 0u; + } + + result = BitConverter.Int32BitsToSingle( + (int)((sign ? 1u : 0u) << 31 | (resultExp & 0xFFu) << 23 | (uint)(fraction >> 29) & 0x007FFFFFu)); + } + + return result; + } + + public static float FPRecipStepFused(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPNeg(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPTwo(false); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else + { + // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = 2f + (value1 * value2); + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPRecpX(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value.FPUnpack(out FPType type, out bool sign, out uint op, context); + + float result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else + { + uint notExp = (~op >> 23) & 0xFFu; + uint maxExp = 0xFEu; + + result = BitConverter.Int32BitsToSingle( + (int)((sign ? 1u : 0u) << 31 | (notExp == 0xFFu ? maxExp : notExp) << 23)); + } + + return result; + } + + public static float FPRSqrtEstimate(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value.FPUnpack(out FPType type, out bool sign, out uint op, context); + + float result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else if (type == FPType.Zero) + { + result = FPInfinity(sign); + + FPProcessException(FPException.DivideByZero, context); + } + else if (sign) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if (type == FPType.Infinity) + { + result = FPZero(false); + } + else + { + ulong fraction = (ulong)(op & 0x007FFFFFu) << 29; + uint exp = (op & 0x7F800000u) >> 23; + + if (exp == 0u) + { + while ((fraction & 0x0008000000000000ul) == 0ul) + { + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + exp -= 1u; + } + + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + } + + uint scaled; + + if ((exp & 1u) == 0u) + { + scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44); + } + else + { + scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45); + } + + uint resultExp = (380u - exp) >> 1; + + uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u; + + result = BitConverter.Int32BitsToSingle((int)((resultExp & 0xFFu) << 23 | (estimate & 0xFFu) << 15)); + } + + return result; + } + + public static float FPRSqrtStepFused(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPNeg(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPOnePointFive(false); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else + { + // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = (3f + (value1 * value2)) / 2f; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPSqrt(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value = value.FPUnpack(out FPType type, out bool sign, out uint op, context); + + float result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else if (type == FPType.Zero) + { + result = FPZero(sign); + } + else if (type == FPType.Infinity && !sign) + { + result = FPInfinity(sign); + } + else if (sign) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else + { + result = MathF.Sqrt(value); + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + + return result; + } + + public static float FPSub(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if ((inf1 && !sign1) || (inf2 && sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && !sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == !sign2) + { + result = FPZero(sign1); + } + else + { + result = value1 - value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + private static float FPDefaultNaN() + { + return -float.NaN; + } + + private static float FPInfinity(bool sign) + { + return sign ? float.NegativeInfinity : float.PositiveInfinity; + } + + private static float FPZero(bool sign) + { + return sign ? -0f : +0f; + } + + private static float FPMaxNormal(bool sign) + { + return sign ? float.MinValue : float.MaxValue; + } + + private static float FPTwo(bool sign) + { + return sign ? -2f : +2f; + } + + private static float FPOnePointFive(bool sign) + { + return sign ? -1.5f : +1.5f; + } + + private static float FPNeg(this float value) + { + return -value; + } + + private static float ZerosOrOnes(bool ones) + { + return BitConverter.Int32BitsToSingle(ones ? -1 : 0); + } + + private static float FPUnpack( + this float value, + out FPType type, + out bool sign, + out uint valueBits, + ExecutionContext context) + { + valueBits = (uint)BitConverter.SingleToInt32Bits(value); + + sign = (~valueBits & 0x80000000u) == 0u; + + if ((valueBits & 0x7F800000u) == 0u) + { + if ((valueBits & 0x007FFFFFu) == 0u || (context.Fpcr & FPCR.Fz) != 0) + { + type = FPType.Zero; + value = FPZero(sign); + + if ((valueBits & 0x007FFFFFu) != 0u) + { + FPProcessException(FPException.InputDenorm, context); + } + } + else + { + type = FPType.Nonzero; + } + } + else if ((~valueBits & 0x7F800000u) == 0u) + { + if ((valueBits & 0x007FFFFFu) == 0u) + { + type = FPType.Infinity; + } + else + { + type = (~valueBits & 0x00400000u) == 0u ? FPType.QNaN : FPType.SNaN; + value = FPZero(sign); + } + } + else + { + type = FPType.Nonzero; + } + + return value; + } + + private static float FPProcessNaNs( + FPType type1, + FPType type2, + uint op1, + uint op2, + out bool done, + ExecutionContext context) + { + done = true; + + if (type1 == FPType.SNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.SNaN) + { + return FPProcessNaN(type2, op2, context); + } + else if (type1 == FPType.QNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.QNaN) + { + return FPProcessNaN(type2, op2, context); + } + + done = false; + + return FPZero(false); + } + + private static float FPProcessNaNs3( + FPType type1, + FPType type2, + FPType type3, + uint op1, + uint op2, + uint op3, + out bool done, + ExecutionContext context) + { + done = true; + + if (type1 == FPType.SNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.SNaN) + { + return FPProcessNaN(type2, op2, context); + } + else if (type3 == FPType.SNaN) + { + return FPProcessNaN(type3, op3, context); + } + else if (type1 == FPType.QNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.QNaN) + { + return FPProcessNaN(type2, op2, context); + } + else if (type3 == FPType.QNaN) + { + return FPProcessNaN(type3, op3, context); + } + + done = false; + + return FPZero(false); + } + + private static float FPProcessNaN(FPType type, uint op, ExecutionContext context) + { + if (type == FPType.SNaN) + { + op |= 1u << 22; + + FPProcessException(FPException.InvalidOp, context); + } + + if ((context.Fpcr & FPCR.Dn) != 0) + { + return FPDefaultNaN(); + } + + return BitConverter.Int32BitsToSingle((int)op); + } + + private static void FPProcessException(FPException exc, ExecutionContext context) + { + int enable = (int)exc + 8; + + if ((context.Fpcr & (FPCR)(1 << enable)) != 0) + { + throw new NotImplementedException("Floating-point trap handling."); + } + else + { + context.Fpsr |= (FPSR)(1 << (int)exc); + } + } + } + + static class SoftFloat64 + { + public static double FPAdd(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == !sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if ((inf1 && !sign1) || (inf2 && !sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == sign2) + { + result = FPZero(sign1); + } + else + { + result = value1 + value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static int FPCompare(double value1, double value2, bool signalNaNs) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context); + + int result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = 0b0011; + + if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs) + { + FPProcessException(FPException.InvalidOp, context); + } + } + else + { + if (value1 == value2) + { + result = 0b0110; + } + else if (value1 < value2) + { + result = 0b1000; + } + else + { + result = 0b0010; + } + } + + return result; + } + + public static double FPCompareEQ(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + + double result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + if (type1 == FPType.SNaN || type2 == FPType.SNaN) + { + FPProcessException(FPException.InvalidOp, context); + } + } + else + { + result = ZerosOrOnes(value1 == value2); + } + + return result; + } + + public static double FPCompareGE(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + + double result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + FPProcessException(FPException.InvalidOp, context); + } + else + { + result = ZerosOrOnes(value1 >= value2); + } + + return result; + } + + public static double FPCompareGT(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + + double result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + FPProcessException(FPException.InvalidOp, context); + } + else + { + result = ZerosOrOnes(value1 > value2); + } + + return result; + } + + public static double FPCompareLE(double value1, double value2) + { + return FPCompareGE(value2, value1); + } + + public static double FPCompareLT(double value1, double value2) + { + return FPCompareGT(value2, value1); + } + + public static double FPDiv(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && inf2) || (zero1 && zero2)) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if (inf1 || zero2) + { + result = FPInfinity(sign1 ^ sign2); + + if (!inf1) + { + FPProcessException(FPException.DivideByZero, context); + } + } + else if (zero1 || inf2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 / value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPMax(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + if (value1 > value2) + { + if (type1 == FPType.Infinity) + { + result = FPInfinity(sign1); + } + else if (type1 == FPType.Zero) + { + result = FPZero(sign1 && sign2); + } + else + { + result = value1; + } + } + else + { + if (type2 == FPType.Infinity) + { + result = FPInfinity(sign2); + } + else if (type2 == FPType.Zero) + { + result = FPZero(sign1 && sign2); + } + else + { + result = value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + } + + return result; + } + + public static double FPMaxNum(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1.FPUnpack(out FPType type1, out _, out _, context); + value2.FPUnpack(out FPType type2, out _, out _, context); + + if (type1 == FPType.QNaN && type2 != FPType.QNaN) + { + value1 = FPInfinity(true); + } + else if (type1 != FPType.QNaN && type2 == FPType.QNaN) + { + value2 = FPInfinity(true); + } + + return FPMax(value1, value2); + } + + public static double FPMin(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + if (value1 < value2) + { + if (type1 == FPType.Infinity) + { + result = FPInfinity(sign1); + } + else if (type1 == FPType.Zero) + { + result = FPZero(sign1 || sign2); + } + else + { + result = value1; + } + } + else + { + if (type2 == FPType.Infinity) + { + result = FPInfinity(sign2); + } + else if (type2 == FPType.Zero) + { + result = FPZero(sign1 || sign2); + } + else + { + result = value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + } + + return result; + } + + public static double FPMinNum(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1.FPUnpack(out FPType type1, out _, out _, context); + value2.FPUnpack(out FPType type2, out _, out _, context); + + if (type1 == FPType.QNaN && type2 != FPType.QNaN) + { + value1 = FPInfinity(false); + } + else if (type1 != FPType.QNaN && type2 == FPType.QNaN) + { + value2 = FPInfinity(false); + } + + return FPMin(value1, value2); + } + + public static double FPMul(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else if (zero1 || zero2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 * value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPMulAdd(double valueA, double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out ulong addend, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + double result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context); + + if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2))) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + + if (!done) + { + bool infA = typeA == FPType.Infinity; bool zeroA = typeA == FPType.Zero; + + bool signP = sign1 ^ sign2; + bool infP = inf1 || inf2; + bool zeroP = zero1 || zero2; + + if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP)) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if ((infA && !signA) || (infP && !signP)) + { + result = FPInfinity(false); + } + else if ((infA && signA) || (infP && signP)) + { + result = FPInfinity(true); + } + else if (zeroA && zeroP && signA == signP) + { + result = FPZero(signA); + } + else + { + // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = valueA + (value1 * value2); + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPMulSub(double valueA, double value1, double value2) + { + value1 = value1.FPNeg(); + + return FPMulAdd(valueA, value1, value2); + } + + public static double FPMulX(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPTwo(sign1 ^ sign2); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else if (zero1 || zero2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 * value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPRecipEstimate(double value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + + double result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else if (type == FPType.Infinity) + { + result = FPZero(sign); + } + else if (type == FPType.Zero) + { + result = FPInfinity(sign); + + FPProcessException(FPException.DivideByZero, context); + } + else if (Math.Abs(value) < Math.Pow(2d, -1024)) + { + bool overflowToInf; + + switch (context.Fpcr.GetRoundingMode()) + { + default: + case FPRoundingMode.ToNearest: overflowToInf = true; break; + case FPRoundingMode.TowardsPlusInfinity: overflowToInf = !sign; break; + case FPRoundingMode.TowardsMinusInfinity: overflowToInf = sign; break; + case FPRoundingMode.TowardsZero: overflowToInf = false; break; + } + + result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); + + FPProcessException(FPException.Overflow, context); + FPProcessException(FPException.Inexact, context); + } + else if ((context.Fpcr & FPCR.Fz) != 0 && (Math.Abs(value) >= Math.Pow(2d, 1022))) + { + result = FPZero(sign); + + context.Fpsr |= FPSR.Ufc; + } + else + { + ulong fraction = op & 0x000FFFFFFFFFFFFFul; + uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52); + + if (exp == 0u) + { + if ((fraction & 0x0008000000000000ul) == 0ul) + { + fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2; + exp -= 1u; + } + else + { + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + } + } + + uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44); + + uint resultExp = 2045u - exp; + + uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u; + + fraction = (ulong)(estimate & 0xFFu) << 44; + + if (resultExp == 0u) + { + fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1; + } + else if (resultExp + 1u == 0u) + { + fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2; + resultExp = 0u; + } + + result = BitConverter.Int64BitsToDouble( + (long)((sign ? 1ul : 0ul) << 63 | (resultExp & 0x7FFul) << 52 | (fraction & 0x000FFFFFFFFFFFFFul))); + } + + return result; + } + + public static double FPRecipStepFused(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPNeg(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPTwo(false); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else + { + // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = 2d + (value1 * value2); + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPRecpX(double value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + + double result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else + { + ulong notExp = (~op >> 52) & 0x7FFul; + ulong maxExp = 0x7FEul; + + result = BitConverter.Int64BitsToDouble( + (long)((sign ? 1ul : 0ul) << 63 | (notExp == 0x7FFul ? maxExp : notExp) << 52)); + } + + return result; + } + + public static double FPRSqrtEstimate(double value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + + double result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else if (type == FPType.Zero) + { + result = FPInfinity(sign); + + FPProcessException(FPException.DivideByZero, context); + } + else if (sign) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if (type == FPType.Infinity) + { + result = FPZero(false); + } + else + { + ulong fraction = op & 0x000FFFFFFFFFFFFFul; + uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52); + + if (exp == 0u) + { + while ((fraction & 0x0008000000000000ul) == 0ul) + { + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + exp -= 1u; + } + + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + } + + uint scaled; + + if ((exp & 1u) == 0u) + { + scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44); + } + else + { + scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45); + } + + uint resultExp = (3068u - exp) >> 1; + + uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u; + + result = BitConverter.Int64BitsToDouble((long)((resultExp & 0x7FFul) << 52 | (estimate & 0xFFul) << 44)); + } + + return result; + } + + public static double FPRSqrtStepFused(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPNeg(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPOnePointFive(false); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else + { + // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = (3d + (value1 * value2)) / 2d; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPSqrt(double value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value = value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + + double result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else if (type == FPType.Zero) + { + result = FPZero(sign); + } + else if (type == FPType.Infinity && !sign) + { + result = FPInfinity(sign); + } + else if (sign) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else + { + result = Math.Sqrt(value); + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + + return result; + } + + public static double FPSub(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if ((inf1 && !sign1) || (inf2 && sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && !sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == !sign2) + { + result = FPZero(sign1); + } + else + { + result = value1 - value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + private static double FPDefaultNaN() + { + return -double.NaN; + } + + private static double FPInfinity(bool sign) + { + return sign ? double.NegativeInfinity : double.PositiveInfinity; + } + + private static double FPZero(bool sign) + { + return sign ? -0d : +0d; + } + + private static double FPMaxNormal(bool sign) + { + return sign ? double.MinValue : double.MaxValue; + } + + private static double FPTwo(bool sign) + { + return sign ? -2d : +2d; + } + + private static double FPOnePointFive(bool sign) + { + return sign ? -1.5d : +1.5d; + } + + private static double FPNeg(this double value) + { + return -value; + } + + private static double ZerosOrOnes(bool ones) + { + return BitConverter.Int64BitsToDouble(ones ? -1L : 0L); + } + + private static double FPUnpack( + this double value, + out FPType type, + out bool sign, + out ulong valueBits, + ExecutionContext context) + { + valueBits = (ulong)BitConverter.DoubleToInt64Bits(value); + + sign = (~valueBits & 0x8000000000000000ul) == 0ul; + + if ((valueBits & 0x7FF0000000000000ul) == 0ul) + { + if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul || (context.Fpcr & FPCR.Fz) != 0) + { + type = FPType.Zero; + value = FPZero(sign); + + if ((valueBits & 0x000FFFFFFFFFFFFFul) != 0ul) + { + FPProcessException(FPException.InputDenorm, context); + } + } + else + { + type = FPType.Nonzero; + } + } + else if ((~valueBits & 0x7FF0000000000000ul) == 0ul) + { + if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul) + { + type = FPType.Infinity; + } + else + { + type = (~valueBits & 0x0008000000000000ul) == 0ul ? FPType.QNaN : FPType.SNaN; + value = FPZero(sign); + } + } + else + { + type = FPType.Nonzero; + } + + return value; + } + + private static double FPProcessNaNs( + FPType type1, + FPType type2, + ulong op1, + ulong op2, + out bool done, + ExecutionContext context) + { + done = true; + + if (type1 == FPType.SNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.SNaN) + { + return FPProcessNaN(type2, op2, context); + } + else if (type1 == FPType.QNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.QNaN) + { + return FPProcessNaN(type2, op2, context); + } + + done = false; + + return FPZero(false); + } + + private static double FPProcessNaNs3( + FPType type1, + FPType type2, + FPType type3, + ulong op1, + ulong op2, + ulong op3, + out bool done, + ExecutionContext context) + { + done = true; + + if (type1 == FPType.SNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.SNaN) + { + return FPProcessNaN(type2, op2, context); + } + else if (type3 == FPType.SNaN) + { + return FPProcessNaN(type3, op3, context); + } + else if (type1 == FPType.QNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.QNaN) + { + return FPProcessNaN(type2, op2, context); + } + else if (type3 == FPType.QNaN) + { + return FPProcessNaN(type3, op3, context); + } + + done = false; + + return FPZero(false); + } + + private static double FPProcessNaN(FPType type, ulong op, ExecutionContext context) + { + if (type == FPType.SNaN) + { + op |= 1ul << 51; + + FPProcessException(FPException.InvalidOp, context); + } + + if ((context.Fpcr & FPCR.Dn) != 0) + { + return FPDefaultNaN(); + } + + return BitConverter.Int64BitsToDouble((long)op); + } + + private static void FPProcessException(FPException exc, ExecutionContext context) + { + int enable = (int)exc + 8; + + if ((context.Fpcr & (FPCR)(1 << enable)) != 0) + { + throw new NotImplementedException("Floating-point trap handling."); + } + else + { + context.Fpsr |= (FPSR)(1 << (int)exc); + } + } + } +} |
