diff options
| author | Alex Barney <thealexbarney@gmail.com> | 2018-10-30 19:43:02 -0600 |
|---|---|---|
| committer | gdkchan <gab.dark.100@gmail.com> | 2018-10-30 22:43:02 -0300 |
| commit | 9cb57fb4bb3bbae0ae052a5af4a96a49fc5d864d (patch) | |
| tree | 0c97425aeb311c142bc92a6fcc503cb2c07d4376 /ChocolArm64/Instructions | |
| parent | 5a87e58183578f5b84ca8d01cbb76aed11820f78 (diff) | |
Adjust naming conventions for Ryujinx and ChocolArm64 projects (#484)
* Change naming convention for Ryujinx project
* Change naming convention for ChocolArm64 project
* Fix NaN
* Remove unneeded this. from Ryujinx project
* Adjust naming from new PRs
* Name changes based on feedback
* How did this get removed?
* Rebasing fix
* Change FP enum case
* Remove prefix from ChocolArm64 classes - Part 1
* Remove prefix from ChocolArm64 classes - Part 2
* Fix alignment from last commit's renaming
* Rename namespaces
* Rename stragglers
* Fix alignment
* Rename OpCode class
* Missed a few
* Adjust alignment
Diffstat (limited to 'ChocolArm64/Instructions')
31 files changed, 13615 insertions, 0 deletions
diff --git a/ChocolArm64/Instructions/CryptoHelper.cs b/ChocolArm64/Instructions/CryptoHelper.cs new file mode 100644 index 00000000..bb9a22a3 --- /dev/null +++ b/ChocolArm64/Instructions/CryptoHelper.cs @@ -0,0 +1,328 @@ +// https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf + +using System; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace ChocolArm64.Instructions +{ + static class CryptoHelper + { +#region "LookUp Tables" + private static byte[] _sBox = + { + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 + }; + + private static byte[] _invSBox = + { + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d + }; + + private static byte[] _gfMul02 = + { + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, + 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, + 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, + 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, + 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, + 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, + 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, + 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, + 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05, + 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25, + 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45, + 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65, + 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85, + 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5, + 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5, + 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5 + }; + + private static byte[] _gfMul03 = + { + 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11, + 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, + 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, + 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41, + 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, + 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1, + 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, + 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, + 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a, + 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba, + 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea, + 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda, + 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a, + 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a, + 0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a, + 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a + }; + + private static byte[] _gfMul09 = + { + 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, + 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, + 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, + 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc, + 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01, + 0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91, + 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a, + 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa, + 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b, + 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b, + 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, + 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, + 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed, + 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d, + 0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6, + 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46 + }; + + private static byte[] _gfMul0B = + { + 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69, + 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, + 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12, + 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2, + 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f, + 0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f, + 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4, + 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54, + 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e, + 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e, + 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5, + 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55, + 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, + 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, + 0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13, + 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3 + }; + + private static byte[] _gfMul0D = + { + 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b, + 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, + 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0, + 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20, + 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, + 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, + 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d, + 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d, + 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91, + 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41, + 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a, + 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa, + 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc, + 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, + 0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47, + 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97 + }; + + private static byte[] _gfMul0E = + { + 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a, + 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, + 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81, + 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61, + 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, + 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17, + 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c, + 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc, + 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b, + 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb, + 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, + 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, + 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6, + 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56, + 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d, + 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d + }; + + private static byte[] _srPerm = { 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3 }; + + private static byte[] _isrPerm = { 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11 }; +#endregion + + public static Vector128<float> AesInvMixColumns(Vector128<float> op) + { + byte[] inState = new byte[16]; + byte[] outState = new byte[16]; + + FromVectorToByteArray(inState, ref op); + + for (int columns = 0; columns <= 3; columns++) + { + int idx = columns << 2; + + byte row0 = inState[idx + 0]; // A, E, I, M: [Row0, Col0-Col3] + byte row1 = inState[idx + 1]; // B, F, J, N: [Row1, Col0-Col3] + byte row2 = inState[idx + 2]; // C, G, K, O: [Row2, Col0-Col3] + byte row3 = inState[idx + 3]; // D, H, L, P: [Row3, Col0-Col3] + + outState[idx + 0] = (byte)((uint)_gfMul0E[row0] ^ _gfMul0B[row1] ^ _gfMul0D[row2] ^ _gfMul09[row3]); + outState[idx + 1] = (byte)((uint)_gfMul09[row0] ^ _gfMul0E[row1] ^ _gfMul0B[row2] ^ _gfMul0D[row3]); + outState[idx + 2] = (byte)((uint)_gfMul0D[row0] ^ _gfMul09[row1] ^ _gfMul0E[row2] ^ _gfMul0B[row3]); + outState[idx + 3] = (byte)((uint)_gfMul0B[row0] ^ _gfMul0D[row1] ^ _gfMul09[row2] ^ _gfMul0E[row3]); + } + + FromByteArrayToVector(outState, ref op); + + return op; + } + + public static Vector128<float> AesInvShiftRows(Vector128<float> op) + { + byte[] inState = new byte[16]; + byte[] outState = new byte[16]; + + FromVectorToByteArray(inState, ref op); + + for (int idx = 0; idx <= 15; idx++) + { + outState[_isrPerm[idx]] = inState[idx]; + } + + FromByteArrayToVector(outState, ref op); + + return op; + } + + public static Vector128<float> AesInvSubBytes(Vector128<float> op) + { + byte[] inState = new byte[16]; + byte[] outState = new byte[16]; + + FromVectorToByteArray(inState, ref op); + + for (int idx = 0; idx <= 15; idx++) + { + outState[idx] = _invSBox[inState[idx]]; + } + + FromByteArrayToVector(outState, ref op); + + return op; + } + + public static Vector128<float> AesMixColumns(Vector128<float> op) + { + byte[] inState = new byte[16]; + byte[] outState = new byte[16]; + + FromVectorToByteArray(inState, ref op); + + for (int columns = 0; columns <= 3; columns++) + { + int idx = columns << 2; + + byte row0 = inState[idx + 0]; // A, E, I, M: [Row0, Col0-Col3] + byte row1 = inState[idx + 1]; // B, F, J, N: [Row1, Col0-Col3] + byte row2 = inState[idx + 2]; // C, G, K, O: [Row2, Col0-Col3] + byte row3 = inState[idx + 3]; // D, H, L, P: [Row3, Col0-Col3] + + outState[idx + 0] = (byte)((uint)_gfMul02[row0] ^ _gfMul03[row1] ^ row2 ^ row3); + outState[idx + 1] = (byte)((uint)row0 ^ _gfMul02[row1] ^ _gfMul03[row2] ^ row3); + outState[idx + 2] = (byte)((uint)row0 ^ row1 ^ _gfMul02[row2] ^ _gfMul03[row3]); + outState[idx + 3] = (byte)((uint)_gfMul03[row0] ^ row1 ^ row2 ^ _gfMul02[row3]); + } + + FromByteArrayToVector(outState, ref op); + + return op; + } + + public static Vector128<float> AesShiftRows(Vector128<float> op) + { + byte[] inState = new byte[16]; + byte[] outState = new byte[16]; + + FromVectorToByteArray(inState, ref op); + + for (int idx = 0; idx <= 15; idx++) + { + outState[_srPerm[idx]] = inState[idx]; + } + + FromByteArrayToVector(outState, ref op); + + return op; + } + + public static Vector128<float> AesSubBytes(Vector128<float> op) + { + byte[] inState = new byte[16]; + byte[] outState = new byte[16]; + + FromVectorToByteArray(inState, ref op); + + for (int idx = 0; idx <= 15; idx++) + { + outState[idx] = _sBox[inState[idx]]; + } + + FromByteArrayToVector(outState, ref op); + + return op; + } + + private static void FromVectorToByteArray(byte[] state, ref Vector128<float> op) + { + ulong uLongLow = VectorHelper.VectorExtractIntZx((op), (byte)0, 3); + ulong uLongHigh = VectorHelper.VectorExtractIntZx((op), (byte)1, 3); + + for (int idx = 0; idx <= 7; idx++) + { + state[idx + 0] = (byte)(uLongLow & 0xFFUL); + state[idx + 8] = (byte)(uLongHigh & 0xFFUL); + + uLongLow >>= 8; + uLongHigh >>= 8; + } + } + + private static void FromByteArrayToVector(byte[] state, ref Vector128<float> op) + { + if (!Sse2.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + op = Sse.StaticCast<byte, float>(Sse2.SetVector128( + state[15], state[14], state[13], state[12], + state[11], state[10], state[9], state[8], + state[7], state[6], state[5], state[4], + state[3], state[2], state[1], state[0])); + } + } +} diff --git a/ChocolArm64/Instructions/Inst.cs b/ChocolArm64/Instructions/Inst.cs new file mode 100644 index 00000000..5f6740ca --- /dev/null +++ b/ChocolArm64/Instructions/Inst.cs @@ -0,0 +1,20 @@ +using System; + +namespace ChocolArm64.Instructions +{ + struct Inst + { + public InstInterpreter Interpreter { get; private set; } + public InstEmitter Emitter { get; private set; } + public Type Type { get; private set; } + + public static Inst Undefined => new Inst(null, InstEmit.Und, null); + + public Inst(InstInterpreter interpreter, InstEmitter emitter, Type type) + { + Interpreter = interpreter; + Emitter = emitter; + Type = type; + } + } +}
\ No newline at end of file diff --git a/ChocolArm64/Instructions/InstEmitAlu.cs b/ChocolArm64/Instructions/InstEmitAlu.cs new file mode 100644 index 00000000..c0258ed2 --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitAlu.cs @@ -0,0 +1,402 @@ +using ChocolArm64.Decoders; +using ChocolArm64.State; +using ChocolArm64.Translation; +using System; +using System.Reflection; +using System.Reflection.Emit; +using System.Runtime.Intrinsics.X86; + +using static ChocolArm64.Instructions.InstEmitAluHelper; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + public static void Adc(ILEmitterCtx context) => EmitAdc(context, false); + public static void Adcs(ILEmitterCtx context) => EmitAdc(context, true); + + private static void EmitAdc(ILEmitterCtx context, bool setFlags) + { + EmitDataLoadOpers(context); + + context.Emit(OpCodes.Add); + + context.EmitLdflg((int)PState.CBit); + + Type[] mthdTypes = new Type[] { typeof(bool) }; + + MethodInfo mthdInfo = typeof(Convert).GetMethod(nameof(Convert.ToInt32), mthdTypes); + + context.EmitCall(mthdInfo); + + if (context.CurrOp.RegisterSize != RegisterSize.Int32) + { + context.Emit(OpCodes.Conv_U8); + } + + context.Emit(OpCodes.Add); + + if (setFlags) + { + context.EmitZnFlagCheck(); + + EmitAdcsCCheck(context); + EmitAddsVCheck(context); + } + + EmitDataStore(context); + } + + public static void Add(ILEmitterCtx context) => EmitDataOp(context, OpCodes.Add); + + public static void Adds(ILEmitterCtx context) + { + EmitDataLoadOpers(context); + + context.Emit(OpCodes.Add); + + context.EmitZnFlagCheck(); + + EmitAddsCCheck(context); + EmitAddsVCheck(context); + EmitDataStoreS(context); + } + + public static void And(ILEmitterCtx context) => EmitDataOp(context, OpCodes.And); + + public static void Ands(ILEmitterCtx context) + { + EmitDataLoadOpers(context); + + context.Emit(OpCodes.And); + + EmitZeroCvFlags(context); + + context.EmitZnFlagCheck(); + + EmitDataStoreS(context); + } + + public static void Asrv(ILEmitterCtx context) => EmitDataOpShift(context, OpCodes.Shr); + + public static void Bic(ILEmitterCtx context) => EmitBic(context, false); + public static void Bics(ILEmitterCtx context) => EmitBic(context, true); + + private static void EmitBic(ILEmitterCtx context, bool setFlags) + { + EmitDataLoadOpers(context); + + context.Emit(OpCodes.Not); + context.Emit(OpCodes.And); + + if (setFlags) + { + EmitZeroCvFlags(context); + + context.EmitZnFlagCheck(); + } + + EmitDataStore(context, setFlags); + } + + public static void Cls(ILEmitterCtx context) + { + OpCodeAlu64 op = (OpCodeAlu64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + + context.EmitLdc_I4(op.RegisterSize == RegisterSize.Int32 ? 32 : 64); + + SoftFallback.EmitCall(context, nameof(SoftFallback.CountLeadingSigns)); + + context.EmitStintzr(op.Rd); + } + + public static void Clz(ILEmitterCtx context) + { + OpCodeAlu64 op = (OpCodeAlu64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + + if (Lzcnt.IsSupported) + { + Type tValue = op.RegisterSize == RegisterSize.Int32 ? typeof(uint) : typeof(ulong); + + context.EmitCall(typeof(Lzcnt).GetMethod(nameof(Lzcnt.LeadingZeroCount), new Type[] { tValue })); + } + else + { + context.EmitLdc_I4(op.RegisterSize == RegisterSize.Int32 ? 32 : 64); + + SoftFallback.EmitCall(context, nameof(SoftFallback.CountLeadingZeros)); + } + + context.EmitStintzr(op.Rd); + } + + public static void Eon(ILEmitterCtx context) + { + EmitDataLoadOpers(context); + + context.Emit(OpCodes.Not); + context.Emit(OpCodes.Xor); + + EmitDataStore(context); + } + + public static void Eor(ILEmitterCtx context) => EmitDataOp(context, OpCodes.Xor); + + public static void Extr(ILEmitterCtx context) + { + //TODO: Ensure that the Shift is valid for the Is64Bits. + OpCodeAluRs64 op = (OpCodeAluRs64)context.CurrOp; + + context.EmitLdintzr(op.Rm); + + if (op.Shift > 0) + { + context.EmitLdc_I4(op.Shift); + + context.Emit(OpCodes.Shr_Un); + + context.EmitLdintzr(op.Rn); + context.EmitLdc_I4(op.GetBitsCount() - op.Shift); + + context.Emit(OpCodes.Shl); + context.Emit(OpCodes.Or); + } + + EmitDataStore(context); + } + + public static void Lslv(ILEmitterCtx context) => EmitDataOpShift(context, OpCodes.Shl); + public static void Lsrv(ILEmitterCtx context) => EmitDataOpShift(context, OpCodes.Shr_Un); + + public static void Sbc(ILEmitterCtx context) => EmitSbc(context, false); + public static void Sbcs(ILEmitterCtx context) => EmitSbc(context, true); + + private static void EmitSbc(ILEmitterCtx context, bool setFlags) + { + EmitDataLoadOpers(context); + + context.Emit(OpCodes.Sub); + + context.EmitLdflg((int)PState.CBit); + + Type[] mthdTypes = new Type[] { typeof(bool) }; + + MethodInfo mthdInfo = typeof(Convert).GetMethod(nameof(Convert.ToInt32), mthdTypes); + + context.EmitCall(mthdInfo); + + context.EmitLdc_I4(1); + + context.Emit(OpCodes.Xor); + + if (context.CurrOp.RegisterSize != RegisterSize.Int32) + { + context.Emit(OpCodes.Conv_U8); + } + + context.Emit(OpCodes.Sub); + + if (setFlags) + { + context.EmitZnFlagCheck(); + + EmitSbcsCCheck(context); + EmitSubsVCheck(context); + } + + EmitDataStore(context); + } + + public static void Sub(ILEmitterCtx context) => EmitDataOp(context, OpCodes.Sub); + + public static void Subs(ILEmitterCtx context) + { + context.TryOptMarkCondWithoutCmp(); + + EmitDataLoadOpers(context); + + context.Emit(OpCodes.Sub); + + context.EmitZnFlagCheck(); + + EmitSubsCCheck(context); + EmitSubsVCheck(context); + EmitDataStoreS(context); + } + + public static void Orn(ILEmitterCtx context) + { + EmitDataLoadOpers(context); + + context.Emit(OpCodes.Not); + context.Emit(OpCodes.Or); + + EmitDataStore(context); + } + + public static void Orr(ILEmitterCtx context) => EmitDataOp(context, OpCodes.Or); + + public static void Rbit(ILEmitterCtx context) => EmitFallback32_64(context, + nameof(SoftFallback.ReverseBits32), + nameof(SoftFallback.ReverseBits64)); + + public static void Rev16(ILEmitterCtx context) => EmitFallback32_64(context, + nameof(SoftFallback.ReverseBytes16_32), + nameof(SoftFallback.ReverseBytes16_64)); + + public static void Rev32(ILEmitterCtx context) => EmitFallback32_64(context, + nameof(SoftFallback.ReverseBytes32_32), + nameof(SoftFallback.ReverseBytes32_64)); + + private static void EmitFallback32_64(ILEmitterCtx context, string name32, string name64) + { + OpCodeAlu64 op = (OpCodeAlu64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + + if (op.RegisterSize == RegisterSize.Int32) + { + SoftFallback.EmitCall(context, name32); + } + else + { + SoftFallback.EmitCall(context, name64); + } + + context.EmitStintzr(op.Rd); + } + + public static void Rev64(ILEmitterCtx context) + { + OpCodeAlu64 op = (OpCodeAlu64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + + SoftFallback.EmitCall(context, nameof(SoftFallback.ReverseBytes64)); + + context.EmitStintzr(op.Rd); + } + + public static void Rorv(ILEmitterCtx context) + { + EmitDataLoadRn(context); + EmitDataLoadShift(context); + + context.Emit(OpCodes.Shr_Un); + + EmitDataLoadRn(context); + + context.EmitLdc_I4(context.CurrOp.GetBitsCount()); + + EmitDataLoadShift(context); + + context.Emit(OpCodes.Sub); + context.Emit(OpCodes.Shl); + context.Emit(OpCodes.Or); + + EmitDataStore(context); + } + + public static void Sdiv(ILEmitterCtx context) => EmitDiv(context, OpCodes.Div); + public static void Udiv(ILEmitterCtx context) => EmitDiv(context, OpCodes.Div_Un); + + private static void EmitDiv(ILEmitterCtx context, OpCode ilOp) + { + //If Rm == 0, Rd = 0 (division by zero). + context.EmitLdc_I(0); + + EmitDataLoadRm(context); + + context.EmitLdc_I(0); + + ILLabel badDiv = new ILLabel(); + + context.Emit(OpCodes.Beq_S, badDiv); + context.Emit(OpCodes.Pop); + + if (ilOp == OpCodes.Div) + { + //If Rn == INT_MIN && Rm == -1, Rd = INT_MIN (overflow). + long intMin = 1L << (context.CurrOp.GetBitsCount() - 1); + + context.EmitLdc_I(intMin); + + EmitDataLoadRn(context); + + context.EmitLdc_I(intMin); + + context.Emit(OpCodes.Ceq); + + EmitDataLoadRm(context); + + context.EmitLdc_I(-1); + + context.Emit(OpCodes.Ceq); + context.Emit(OpCodes.And); + context.Emit(OpCodes.Brtrue_S, badDiv); + context.Emit(OpCodes.Pop); + } + + EmitDataLoadRn(context); + EmitDataLoadRm(context); + + context.Emit(ilOp); + + context.MarkLabel(badDiv); + + EmitDataStore(context); + } + + private static void EmitDataOp(ILEmitterCtx context, OpCode ilOp) + { + EmitDataLoadOpers(context); + + context.Emit(ilOp); + + EmitDataStore(context); + } + + private static void EmitDataOpShift(ILEmitterCtx context, OpCode ilOp) + { + EmitDataLoadRn(context); + EmitDataLoadShift(context); + + context.Emit(ilOp); + + EmitDataStore(context); + } + + private static void EmitDataLoadShift(ILEmitterCtx context) + { + EmitDataLoadRm(context); + + context.EmitLdc_I(context.CurrOp.GetBitsCount() - 1); + + context.Emit(OpCodes.And); + + //Note: Only 32-bits shift values are valid, so when the value is 64-bits + //we need to cast it to a 32-bits integer. This is fine because we + //AND the value and only keep the lower 5 or 6 bits anyway -- it + //could very well fit on a byte. + if (context.CurrOp.RegisterSize != RegisterSize.Int32) + { + context.Emit(OpCodes.Conv_I4); + } + } + + private static void EmitZeroCvFlags(ILEmitterCtx context) + { + context.EmitLdc_I4(0); + + context.EmitStflg((int)PState.VBit); + + context.EmitLdc_I4(0); + + context.EmitStflg((int)PState.CBit); + } + } +} diff --git a/ChocolArm64/Instructions/InstEmitAluHelper.cs b/ChocolArm64/Instructions/InstEmitAluHelper.cs new file mode 100644 index 00000000..613dd234 --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitAluHelper.cs @@ -0,0 +1,212 @@ +using ChocolArm64.Decoders; +using ChocolArm64.State; +using ChocolArm64.Translation; +using System.Reflection.Emit; + +namespace ChocolArm64.Instructions +{ + static class InstEmitAluHelper + { + public static void EmitAdcsCCheck(ILEmitterCtx context) + { + //C = (Rd == Rn && CIn) || Rd < Rn + context.EmitSttmp(); + context.EmitLdtmp(); + context.EmitLdtmp(); + + EmitDataLoadRn(context); + + context.Emit(OpCodes.Ceq); + + context.EmitLdflg((int)PState.CBit); + + context.Emit(OpCodes.And); + + context.EmitLdtmp(); + + EmitDataLoadRn(context); + + context.Emit(OpCodes.Clt_Un); + context.Emit(OpCodes.Or); + + context.EmitStflg((int)PState.CBit); + } + + public static void EmitAddsCCheck(ILEmitterCtx context) + { + //C = Rd < Rn + context.Emit(OpCodes.Dup); + + EmitDataLoadRn(context); + + context.Emit(OpCodes.Clt_Un); + + context.EmitStflg((int)PState.CBit); + } + + public static void EmitAddsVCheck(ILEmitterCtx context) + { + //V = (Rd ^ Rn) & ~(Rn ^ Rm) < 0 + context.Emit(OpCodes.Dup); + + EmitDataLoadRn(context); + + context.Emit(OpCodes.Xor); + + EmitDataLoadOpers(context); + + context.Emit(OpCodes.Xor); + context.Emit(OpCodes.Not); + context.Emit(OpCodes.And); + + context.EmitLdc_I(0); + + context.Emit(OpCodes.Clt); + + context.EmitStflg((int)PState.VBit); + } + + public static void EmitSbcsCCheck(ILEmitterCtx context) + { + //C = (Rn == Rm && CIn) || Rn > Rm + EmitDataLoadOpers(context); + + context.Emit(OpCodes.Ceq); + + context.EmitLdflg((int)PState.CBit); + + context.Emit(OpCodes.And); + + EmitDataLoadOpers(context); + + context.Emit(OpCodes.Cgt_Un); + context.Emit(OpCodes.Or); + + context.EmitStflg((int)PState.CBit); + } + + public static void EmitSubsCCheck(ILEmitterCtx context) + { + //C = Rn == Rm || Rn > Rm = !(Rn < Rm) + EmitDataLoadOpers(context); + + context.Emit(OpCodes.Clt_Un); + + context.EmitLdc_I4(1); + + context.Emit(OpCodes.Xor); + + context.EmitStflg((int)PState.CBit); + } + + public static void EmitSubsVCheck(ILEmitterCtx context) + { + //V = (Rd ^ Rn) & (Rn ^ Rm) < 0 + context.Emit(OpCodes.Dup); + + EmitDataLoadRn(context); + + context.Emit(OpCodes.Xor); + + EmitDataLoadOpers(context); + + context.Emit(OpCodes.Xor); + context.Emit(OpCodes.And); + + context.EmitLdc_I(0); + + context.Emit(OpCodes.Clt); + + context.EmitStflg((int)PState.VBit); + } + + public static void EmitDataLoadRm(ILEmitterCtx context) + { + context.EmitLdintzr(((IOpCodeAluRs64)context.CurrOp).Rm); + } + + public static void EmitDataLoadOpers(ILEmitterCtx context) + { + EmitDataLoadRn(context); + EmitDataLoadOper2(context); + } + + public static void EmitDataLoadRn(ILEmitterCtx context) + { + IOpCodeAlu64 op = (IOpCodeAlu64)context.CurrOp; + + if (op.DataOp == DataOp.Logical || op is IOpCodeAluRs64) + { + context.EmitLdintzr(op.Rn); + } + else + { + context.EmitLdint(op.Rn); + } + } + + public static void EmitDataLoadOper2(ILEmitterCtx context) + { + switch (context.CurrOp) + { + case IOpCodeAluImm64 op: + context.EmitLdc_I(op.Imm); + break; + + case IOpCodeAluRs64 op: + context.EmitLdintzr(op.Rm); + + switch (op.ShiftType) + { + case ShiftType.Lsl: context.EmitLsl(op.Shift); break; + case ShiftType.Lsr: context.EmitLsr(op.Shift); break; + case ShiftType.Asr: context.EmitAsr(op.Shift); break; + case ShiftType.Ror: context.EmitRor(op.Shift); break; + } + break; + + case IOpCodeAluRx64 op: + context.EmitLdintzr(op.Rm); + context.EmitCast(op.IntType); + context.EmitLsl(op.Shift); + break; + } + } + + public static void EmitDataStore(ILEmitterCtx context) => EmitDataStore(context, false); + public static void EmitDataStoreS(ILEmitterCtx context) => EmitDataStore(context, true); + + public static void EmitDataStore(ILEmitterCtx context, bool setFlags) + { + IOpCodeAlu64 op = (IOpCodeAlu64)context.CurrOp; + + if (setFlags || op is IOpCodeAluRs64) + { + context.EmitStintzr(op.Rd); + } + else + { + context.EmitStint(op.Rd); + } + } + + public static void EmitSetNzcv(ILEmitterCtx context, int nzcv) + { + context.EmitLdc_I4((nzcv >> 0) & 1); + + context.EmitStflg((int)PState.VBit); + + context.EmitLdc_I4((nzcv >> 1) & 1); + + context.EmitStflg((int)PState.CBit); + + context.EmitLdc_I4((nzcv >> 2) & 1); + + context.EmitStflg((int)PState.ZBit); + + context.EmitLdc_I4((nzcv >> 3) & 1); + + context.EmitStflg((int)PState.NBit); + } + } +}
\ No newline at end of file diff --git a/ChocolArm64/Instructions/InstEmitBfm.cs b/ChocolArm64/Instructions/InstEmitBfm.cs new file mode 100644 index 00000000..d25af8be --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitBfm.cs @@ -0,0 +1,208 @@ +using ChocolArm64.Decoders; +using ChocolArm64.State; +using ChocolArm64.Translation; +using System.Reflection.Emit; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + public static void Bfm(ILEmitterCtx context) + { + OpCodeBfm64 op = (OpCodeBfm64)context.CurrOp; + + EmitBfmLoadRn(context); + + context.EmitLdintzr(op.Rd); + context.EmitLdc_I(~op.WMask & op.TMask); + + context.Emit(OpCodes.And); + context.Emit(OpCodes.Or); + + context.EmitLdintzr(op.Rd); + context.EmitLdc_I(~op.TMask); + + context.Emit(OpCodes.And); + context.Emit(OpCodes.Or); + + context.EmitStintzr(op.Rd); + } + + public static void Sbfm(ILEmitterCtx context) + { + OpCodeBfm64 op = (OpCodeBfm64)context.CurrOp; + + int bitsCount = op.GetBitsCount(); + + if (op.Pos + 1 == bitsCount) + { + EmitSbfmShift(context); + } + else if (op.Pos < op.Shift) + { + EmitSbfiz(context); + } + else if (op.Pos == 7 && op.Shift == 0) + { + EmitSbfmCast(context, OpCodes.Conv_I1); + } + else if (op.Pos == 15 && op.Shift == 0) + { + EmitSbfmCast(context, OpCodes.Conv_I2); + } + else if (op.Pos == 31 && op.Shift == 0) + { + EmitSbfmCast(context, OpCodes.Conv_I4); + } + else + { + EmitBfmLoadRn(context); + + context.EmitLdintzr(op.Rn); + + context.EmitLsl(bitsCount - 1 - op.Pos); + context.EmitAsr(bitsCount - 1); + + context.EmitLdc_I(~op.TMask); + + context.Emit(OpCodes.And); + context.Emit(OpCodes.Or); + + context.EmitStintzr(op.Rd); + } + } + + public static void Ubfm(ILEmitterCtx context) + { + OpCodeBfm64 op = (OpCodeBfm64)context.CurrOp; + + if (op.Pos + 1 == op.GetBitsCount()) + { + EmitUbfmShift(context); + } + else if (op.Pos < op.Shift) + { + EmitUbfiz(context); + } + else if (op.Pos + 1 == op.Shift) + { + EmitBfmLsl(context); + } + else if (op.Pos == 7 && op.Shift == 0) + { + EmitUbfmCast(context, OpCodes.Conv_U1); + } + else if (op.Pos == 15 && op.Shift == 0) + { + EmitUbfmCast(context, OpCodes.Conv_U2); + } + else + { + EmitBfmLoadRn(context); + + context.EmitStintzr(op.Rd); + } + } + + private static void EmitSbfiz(ILEmitterCtx context) => EmitBfiz(context, true); + private static void EmitUbfiz(ILEmitterCtx context) => EmitBfiz(context, false); + + private static void EmitBfiz(ILEmitterCtx context, bool signed) + { + OpCodeBfm64 op = (OpCodeBfm64)context.CurrOp; + + int width = op.Pos + 1; + + context.EmitLdintzr(op.Rn); + + context.EmitLsl(op.GetBitsCount() - width); + + if (signed) + { + context.EmitAsr(op.Shift - width); + } + else + { + context.EmitLsr(op.Shift - width); + } + + context.EmitStintzr(op.Rd); + } + + private static void EmitSbfmCast(ILEmitterCtx context, OpCode ilOp) + { + EmitBfmCast(context, ilOp, true); + } + + private static void EmitUbfmCast(ILEmitterCtx context, OpCode ilOp) + { + EmitBfmCast(context, ilOp, false); + } + + private static void EmitBfmCast(ILEmitterCtx context, OpCode ilOp, bool signed) + { + OpCodeBfm64 op = (OpCodeBfm64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + + context.Emit(ilOp); + + if (op.RegisterSize != RegisterSize.Int32) + { + context.Emit(signed + ? OpCodes.Conv_I8 + : OpCodes.Conv_U8); + } + + context.EmitStintzr(op.Rd); + } + + private static void EmitSbfmShift(ILEmitterCtx context) + { + EmitBfmShift(context, true); + } + + private static void EmitUbfmShift(ILEmitterCtx context) + { + EmitBfmShift(context, false); + } + + private static void EmitBfmShift(ILEmitterCtx context, bool signed) + { + OpCodeBfm64 op = (OpCodeBfm64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + context.EmitLdc_I4(op.Shift); + + context.Emit(signed + ? OpCodes.Shr + : OpCodes.Shr_Un); + + context.EmitStintzr(op.Rd); + } + + private static void EmitBfmLsl(ILEmitterCtx context) + { + OpCodeBfm64 op = (OpCodeBfm64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + + context.EmitLsl(op.GetBitsCount() - op.Shift); + + context.EmitStintzr(op.Rd); + } + + private static void EmitBfmLoadRn(ILEmitterCtx context) + { + OpCodeBfm64 op = (OpCodeBfm64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + + context.EmitRor(op.Shift); + + context.EmitLdc_I(op.WMask & op.TMask); + + context.Emit(OpCodes.And); + } + } +}
\ No newline at end of file diff --git a/ChocolArm64/Instructions/InstEmitCcmp.cs b/ChocolArm64/Instructions/InstEmitCcmp.cs new file mode 100644 index 00000000..b91104c9 --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitCcmp.cs @@ -0,0 +1,81 @@ +using ChocolArm64.Decoders; +using ChocolArm64.State; +using ChocolArm64.Translation; +using System; +using System.Reflection.Emit; + +using static ChocolArm64.Instructions.InstEmitAluHelper; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + private enum CcmpOp + { + Cmp, + Cmn + } + + public static void Ccmn(ILEmitterCtx context) => EmitCcmp(context, CcmpOp.Cmn); + public static void Ccmp(ILEmitterCtx context) => EmitCcmp(context, CcmpOp.Cmp); + + private static void EmitCcmp(ILEmitterCtx context, CcmpOp cmpOp) + { + OpCodeCcmp64 op = (OpCodeCcmp64)context.CurrOp; + + ILLabel lblTrue = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + context.EmitCondBranch(lblTrue, op.Cond); + + context.EmitLdc_I4((op.Nzcv >> 0) & 1); + + context.EmitStflg((int)PState.VBit); + + context.EmitLdc_I4((op.Nzcv >> 1) & 1); + + context.EmitStflg((int)PState.CBit); + + context.EmitLdc_I4((op.Nzcv >> 2) & 1); + + context.EmitStflg((int)PState.ZBit); + + context.EmitLdc_I4((op.Nzcv >> 3) & 1); + + context.EmitStflg((int)PState.NBit); + + context.Emit(OpCodes.Br_S, lblEnd); + + context.MarkLabel(lblTrue); + + EmitDataLoadOpers(context); + + if (cmpOp == CcmpOp.Cmp) + { + context.Emit(OpCodes.Sub); + + context.EmitZnFlagCheck(); + + EmitSubsCCheck(context); + EmitSubsVCheck(context); + } + else if (cmpOp == CcmpOp.Cmn) + { + context.Emit(OpCodes.Add); + + context.EmitZnFlagCheck(); + + EmitAddsCCheck(context); + EmitAddsVCheck(context); + } + else + { + throw new ArgumentException(nameof(cmpOp)); + } + + context.Emit(OpCodes.Pop); + + context.MarkLabel(lblEnd); + } + } +}
\ No newline at end of file diff --git a/ChocolArm64/Instructions/InstEmitCsel.cs b/ChocolArm64/Instructions/InstEmitCsel.cs new file mode 100644 index 00000000..19b073ce --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitCsel.cs @@ -0,0 +1,58 @@ +using ChocolArm64.Decoders; +using ChocolArm64.Translation; +using System.Reflection.Emit; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + private enum CselOperation + { + None, + Increment, + Invert, + Negate + } + + public static void Csel(ILEmitterCtx context) => EmitCsel(context, CselOperation.None); + public static void Csinc(ILEmitterCtx context) => EmitCsel(context, CselOperation.Increment); + public static void Csinv(ILEmitterCtx context) => EmitCsel(context, CselOperation.Invert); + public static void Csneg(ILEmitterCtx context) => EmitCsel(context, CselOperation.Negate); + + private static void EmitCsel(ILEmitterCtx context, CselOperation cselOp) + { + OpCodeCsel64 op = (OpCodeCsel64)context.CurrOp; + + ILLabel lblTrue = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + context.EmitCondBranch(lblTrue, op.Cond); + context.EmitLdintzr(op.Rm); + + if (cselOp == CselOperation.Increment) + { + context.EmitLdc_I(1); + + context.Emit(OpCodes.Add); + } + else if (cselOp == CselOperation.Invert) + { + context.Emit(OpCodes.Not); + } + else if (cselOp == CselOperation.Negate) + { + context.Emit(OpCodes.Neg); + } + + context.Emit(OpCodes.Br_S, lblEnd); + + context.MarkLabel(lblTrue); + + context.EmitLdintzr(op.Rn); + + context.MarkLabel(lblEnd); + + context.EmitStintzr(op.Rd); + } + } +}
\ No newline at end of file diff --git a/ChocolArm64/Instructions/InstEmitException.cs b/ChocolArm64/Instructions/InstEmitException.cs new file mode 100644 index 00000000..8325a397 --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitException.cs @@ -0,0 +1,86 @@ +using ChocolArm64.Decoders; +using ChocolArm64.State; +using ChocolArm64.Translation; +using System.Reflection.Emit; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + public static void Brk(ILEmitterCtx context) + { + EmitExceptionCall(context, nameof(CpuThreadState.OnBreak)); + } + + public static void Svc(ILEmitterCtx context) + { + EmitExceptionCall(context, nameof(CpuThreadState.OnSvcCall)); + } + + private static void EmitExceptionCall(ILEmitterCtx context, string mthdName) + { + OpCodeException64 op = (OpCodeException64)context.CurrOp; + + context.EmitStoreState(); + + context.EmitLdarg(TranslatedSub.StateArgIdx); + + context.EmitLdc_I8(op.Position); + context.EmitLdc_I4(op.Id); + + context.EmitPrivateCall(typeof(CpuThreadState), mthdName); + + //Check if the thread should still be running, if it isn't then we return 0 + //to force a return to the dispatcher and then exit the thread. + context.EmitLdarg(TranslatedSub.StateArgIdx); + + context.EmitCallPropGet(typeof(CpuThreadState), nameof(CpuThreadState.Running)); + + ILLabel lblEnd = new ILLabel(); + + context.Emit(OpCodes.Brtrue_S, lblEnd); + + context.EmitLdc_I8(0); + + context.Emit(OpCodes.Ret); + + context.MarkLabel(lblEnd); + + if (context.CurrBlock.Next != null) + { + context.EmitLoadState(context.CurrBlock.Next); + } + else + { + context.EmitLdc_I8(op.Position + 4); + + context.Emit(OpCodes.Ret); + } + } + + public static void Und(ILEmitterCtx context) + { + OpCode64 op = context.CurrOp; + + context.EmitStoreState(); + + context.EmitLdarg(TranslatedSub.StateArgIdx); + + context.EmitLdc_I8(op.Position); + context.EmitLdc_I4(op.RawOpCode); + + context.EmitPrivateCall(typeof(CpuThreadState), nameof(CpuThreadState.OnUndefined)); + + if (context.CurrBlock.Next != null) + { + context.EmitLoadState(context.CurrBlock.Next); + } + else + { + context.EmitLdc_I8(op.Position + 4); + + context.Emit(OpCodes.Ret); + } + } + } +}
\ No newline at end of file diff --git a/ChocolArm64/Instructions/InstEmitFlow.cs b/ChocolArm64/Instructions/InstEmitFlow.cs new file mode 100644 index 00000000..7d0897cd --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitFlow.cs @@ -0,0 +1,189 @@ +using ChocolArm64.Decoders; +using ChocolArm64.State; +using ChocolArm64.Translation; +using System.Reflection.Emit; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + public static void B(ILEmitterCtx context) + { + OpCodeBImmAl64 op = (OpCodeBImmAl64)context.CurrOp; + + if (context.CurrBlock.Branch != null) + { + context.Emit(OpCodes.Br, context.GetLabel(op.Imm)); + } + else + { + context.EmitStoreState(); + context.EmitLdc_I8(op.Imm); + + context.Emit(OpCodes.Ret); + } + } + + public static void B_Cond(ILEmitterCtx context) + { + OpCodeBImmCond64 op = (OpCodeBImmCond64)context.CurrOp; + + EmitBranch(context, op.Cond); + } + + public static void Bl(ILEmitterCtx context) + { + OpCodeBImmAl64 op = (OpCodeBImmAl64)context.CurrOp; + + context.EmitLdc_I(op.Position + 4); + context.EmitStint(CpuThreadState.LrIndex); + context.EmitStoreState(); + + if (context.TryOptEmitSubroutineCall()) + { + //Note: the return value of the called method will be placed + //at the Stack, the return value is always a Int64 with the + //return address of the function. We check if the address is + //correct, if it isn't we keep returning until we reach the dispatcher. + context.Emit(OpCodes.Dup); + + context.EmitLdc_I8(op.Position + 4); + + ILLabel lblContinue = new ILLabel(); + + context.Emit(OpCodes.Beq_S, lblContinue); + context.Emit(OpCodes.Ret); + + context.MarkLabel(lblContinue); + + context.Emit(OpCodes.Pop); + + context.EmitLoadState(context.CurrBlock.Next); + } + else + { + context.EmitLdc_I8(op.Imm); + + context.Emit(OpCodes.Ret); + } + } + + public static void Blr(ILEmitterCtx context) + { + OpCodeBReg64 op = (OpCodeBReg64)context.CurrOp; + + context.EmitLdc_I(op.Position + 4); + context.EmitStint(CpuThreadState.LrIndex); + context.EmitStoreState(); + context.EmitLdintzr(op.Rn); + + context.Emit(OpCodes.Ret); + } + + public static void Br(ILEmitterCtx context) + { + OpCodeBReg64 op = (OpCodeBReg64)context.CurrOp; + + context.EmitStoreState(); + context.EmitLdintzr(op.Rn); + + context.Emit(OpCodes.Ret); + } + + public static void Cbnz(ILEmitterCtx context) => EmitCb(context, OpCodes.Bne_Un); + public static void Cbz(ILEmitterCtx context) => EmitCb(context, OpCodes.Beq); + + private static void EmitCb(ILEmitterCtx context, OpCode ilOp) + { + OpCodeBImmCmp64 op = (OpCodeBImmCmp64)context.CurrOp; + + context.EmitLdintzr(op.Rt); + context.EmitLdc_I(0); + + EmitBranch(context, ilOp); + } + + public static void Ret(ILEmitterCtx context) + { + context.EmitStoreState(); + context.EmitLdint(CpuThreadState.LrIndex); + + context.Emit(OpCodes.Ret); + } + + public static void Tbnz(ILEmitterCtx context) => EmitTb(context, OpCodes.Bne_Un); + public static void Tbz(ILEmitterCtx context) => EmitTb(context, OpCodes.Beq); + + private static void EmitTb(ILEmitterCtx context, OpCode ilOp) + { + OpCodeBImmTest64 op = (OpCodeBImmTest64)context.CurrOp; + + context.EmitLdintzr(op.Rt); + context.EmitLdc_I(1L << op.Pos); + + context.Emit(OpCodes.And); + + context.EmitLdc_I(0); + + EmitBranch(context, ilOp); + } + + private static void EmitBranch(ILEmitterCtx context, Cond cond) + { + OpCodeBImm64 op = (OpCodeBImm64)context.CurrOp; + + if (context.CurrBlock.Next != null && + context.CurrBlock.Branch != null) + { + context.EmitCondBranch(context.GetLabel(op.Imm), cond); + } + else + { + context.EmitStoreState(); + + ILLabel lblTaken = new ILLabel(); + + context.EmitCondBranch(lblTaken, cond); + + context.EmitLdc_I8(op.Position + 4); + + context.Emit(OpCodes.Ret); + + context.MarkLabel(lblTaken); + + context.EmitLdc_I8(op.Imm); + + context.Emit(OpCodes.Ret); + } + } + + private static void EmitBranch(ILEmitterCtx context, OpCode ilOp) + { + OpCodeBImm64 op = (OpCodeBImm64)context.CurrOp; + + if (context.CurrBlock.Next != null && + context.CurrBlock.Branch != null) + { + context.Emit(ilOp, context.GetLabel(op.Imm)); + } + else + { + context.EmitStoreState(); + + ILLabel lblTaken = new ILLabel(); + + context.Emit(ilOp, lblTaken); + + context.EmitLdc_I8(op.Position + 4); + + context.Emit(OpCodes.Ret); + + context.MarkLabel(lblTaken); + + context.EmitLdc_I8(op.Imm); + + context.Emit(OpCodes.Ret); + } + } + } +}
\ No newline at end of file diff --git a/ChocolArm64/Instructions/InstEmitHash.cs b/ChocolArm64/Instructions/InstEmitHash.cs new file mode 100644 index 00000000..7e21a886 --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitHash.cs @@ -0,0 +1,115 @@ +using ChocolArm64.Decoders; +using ChocolArm64.State; +using ChocolArm64.Translation; +using System; +using System.Reflection.Emit; +using System.Runtime.Intrinsics.X86; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + public static void Crc32b(ILEmitterCtx context) + { + EmitCrc32(context, nameof(SoftFallback.Crc32B)); + } + + public static void Crc32h(ILEmitterCtx context) + { + EmitCrc32(context, nameof(SoftFallback.Crc32H)); + } + + public static void Crc32w(ILEmitterCtx context) + { + EmitCrc32(context, nameof(SoftFallback.Crc32W)); + } + + public static void Crc32x(ILEmitterCtx context) + { + EmitCrc32(context, nameof(SoftFallback.Crc32X)); + } + + public static void Crc32cb(ILEmitterCtx context) + { + if (Optimizations.UseSse42) + { + EmitSse42Crc32(context, typeof(uint), typeof(byte)); + } + else + { + EmitCrc32(context, nameof(SoftFallback.Crc32Cb)); + } + } + + public static void Crc32ch(ILEmitterCtx context) + { + if (Optimizations.UseSse42) + { + EmitSse42Crc32(context, typeof(uint), typeof(ushort)); + } + else + { + EmitCrc32(context, nameof(SoftFallback.Crc32Ch)); + } + } + + public static void Crc32cw(ILEmitterCtx context) + { + if (Optimizations.UseSse42) + { + EmitSse42Crc32(context, typeof(uint), typeof(uint)); + } + else + { + EmitCrc32(context, nameof(SoftFallback.Crc32Cw)); + } + } + + public static void Crc32cx(ILEmitterCtx context) + { + if (Optimizations.UseSse42) + { + EmitSse42Crc32(context, typeof(ulong), typeof(ulong)); + } + else + { + EmitCrc32(context, nameof(SoftFallback.Crc32Cx)); + } + } + + private static void EmitSse42Crc32(ILEmitterCtx context, Type tCrc, Type tData) + { + OpCodeAluRs64 op = (OpCodeAluRs64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + context.EmitLdintzr(op.Rm); + + context.EmitCall(typeof(Sse42).GetMethod(nameof(Sse42.Crc32), new Type[] { tCrc, tData })); + + context.EmitStintzr(op.Rd); + } + + private static void EmitCrc32(ILEmitterCtx context, string name) + { + OpCodeAluRs64 op = (OpCodeAluRs64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + + if (op.RegisterSize != RegisterSize.Int32) + { + context.Emit(OpCodes.Conv_U4); + } + + context.EmitLdintzr(op.Rm); + + SoftFallback.EmitCall(context, name); + + if (op.RegisterSize != RegisterSize.Int32) + { + context.Emit(OpCodes.Conv_U8); + } + + context.EmitStintzr(op.Rd); + } + } +} diff --git a/ChocolArm64/Instructions/InstEmitMemory.cs b/ChocolArm64/Instructions/InstEmitMemory.cs new file mode 100644 index 00000000..96e45b3f --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitMemory.cs @@ -0,0 +1,252 @@ +using ChocolArm64.Decoders; +using ChocolArm64.Translation; +using System.Reflection.Emit; + +using static ChocolArm64.Instructions.InstEmitMemoryHelper; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + public static void Adr(ILEmitterCtx context) + { + OpCodeAdr64 op = (OpCodeAdr64)context.CurrOp; + + context.EmitLdc_I(op.Position + op.Imm); + context.EmitStintzr(op.Rd); + } + + public static void Adrp(ILEmitterCtx context) + { + OpCodeAdr64 op = (OpCodeAdr64)context.CurrOp; + + context.EmitLdc_I((op.Position & ~0xfffL) + (op.Imm << 12)); + context.EmitStintzr(op.Rd); + } + + public static void Ldr(ILEmitterCtx context) => EmitLdr(context, false); + public static void Ldrs(ILEmitterCtx context) => EmitLdr(context, true); + + private static void EmitLdr(ILEmitterCtx context, bool signed) + { + OpCodeMem64 op = (OpCodeMem64)context.CurrOp; + + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + + EmitLoadAddress(context); + + if (signed && op.Extend64) + { + EmitReadSx64Call(context, op.Size); + } + else if (signed) + { + EmitReadSx32Call(context, op.Size); + } + else + { + EmitReadZxCall(context, op.Size); + } + + if (op is IOpCodeSimd64) + { + context.EmitStvec(op.Rt); + } + else + { + context.EmitStintzr(op.Rt); + } + + EmitWBackIfNeeded(context); + } + + public static void LdrLit(ILEmitterCtx context) + { + IOpCodeLit64 op = (IOpCodeLit64)context.CurrOp; + + if (op.Prefetch) + { + return; + } + + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + context.EmitLdc_I8(op.Imm); + + if (op.Signed) + { + EmitReadSx64Call(context, op.Size); + } + else + { + EmitReadZxCall(context, op.Size); + } + + if (op is IOpCodeSimd64) + { + context.EmitStvec(op.Rt); + } + else + { + context.EmitStint(op.Rt); + } + } + + public static void Ldp(ILEmitterCtx context) + { + OpCodeMemPair64 op = (OpCodeMemPair64)context.CurrOp; + + void EmitReadAndStore(int rt) + { + if (op.Extend64) + { + EmitReadSx64Call(context, op.Size); + } + else + { + EmitReadZxCall(context, op.Size); + } + + if (op is IOpCodeSimd64) + { + context.EmitStvec(rt); + } + else + { + context.EmitStintzr(rt); + } + } + + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + + EmitLoadAddress(context); + + EmitReadAndStore(op.Rt); + + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + context.EmitLdtmp(); + context.EmitLdc_I8(1 << op.Size); + + context.Emit(OpCodes.Add); + + EmitReadAndStore(op.Rt2); + + EmitWBackIfNeeded(context); + } + + public static void Str(ILEmitterCtx context) + { + OpCodeMem64 op = (OpCodeMem64)context.CurrOp; + + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + + EmitLoadAddress(context); + + if (op is IOpCodeSimd64) + { + context.EmitLdvec(op.Rt); + } + else + { + context.EmitLdintzr(op.Rt); + } + + EmitWriteCall(context, op.Size); + + EmitWBackIfNeeded(context); + } + + public static void Stp(ILEmitterCtx context) + { + OpCodeMemPair64 op = (OpCodeMemPair64)context.CurrOp; + + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + + EmitLoadAddress(context); + + if (op is IOpCodeSimd64) + { + context.EmitLdvec(op.Rt); + } + else + { + context.EmitLdintzr(op.Rt); + } + + EmitWriteCall(context, op.Size); + + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + context.EmitLdtmp(); + context.EmitLdc_I8(1 << op.Size); + + context.Emit(OpCodes.Add); + + if (op is IOpCodeSimd64) + { + context.EmitLdvec(op.Rt2); + } + else + { + context.EmitLdintzr(op.Rt2); + } + + EmitWriteCall(context, op.Size); + + EmitWBackIfNeeded(context); + } + + private static void EmitLoadAddress(ILEmitterCtx context) + { + switch (context.CurrOp) + { + case OpCodeMemImm64 op: + context.EmitLdint(op.Rn); + + if (!op.PostIdx) + { + //Pre-indexing. + context.EmitLdc_I(op.Imm); + + context.Emit(OpCodes.Add); + } + break; + + case OpCodeMemReg64 op: + context.EmitLdint(op.Rn); + context.EmitLdintzr(op.Rm); + context.EmitCast(op.IntType); + + if (op.Shift) + { + context.EmitLsl(op.Size); + } + + context.Emit(OpCodes.Add); + break; + } + + //Save address to Scratch var since the register value may change. + context.Emit(OpCodes.Dup); + + context.EmitSttmp(); + } + + private static void EmitWBackIfNeeded(ILEmitterCtx context) + { + //Check whenever the current OpCode has post-indexed write back, if so write it. + //Note: AOpCodeMemPair inherits from AOpCodeMemImm, so this works for both. + if (context.CurrOp is OpCodeMemImm64 op && op.WBack) + { + context.EmitLdtmp(); + + if (op.PostIdx) + { + context.EmitLdc_I(op.Imm); + + context.Emit(OpCodes.Add); + } + + context.EmitStint(op.Rn); + } + } + } +}
\ No newline at end of file diff --git a/ChocolArm64/Instructions/InstEmitMemoryEx.cs b/ChocolArm64/Instructions/InstEmitMemoryEx.cs new file mode 100644 index 00000000..42daca63 --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitMemoryEx.cs @@ -0,0 +1,192 @@ +using ChocolArm64.Decoders; +using ChocolArm64.Memory; +using ChocolArm64.State; +using ChocolArm64.Translation; +using System; +using System.Reflection.Emit; +using System.Threading; + +using static ChocolArm64.Instructions.InstEmitMemoryHelper; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + [Flags] + private enum AccessType + { + None = 0, + Ordered = 1, + Exclusive = 2, + OrderedEx = Ordered | Exclusive + } + + public static void Clrex(ILEmitterCtx context) + { + EmitMemoryCall(context, nameof(MemoryManager.ClearExclusive)); + } + + public static void Dmb(ILEmitterCtx context) => EmitBarrier(context); + public static void Dsb(ILEmitterCtx context) => EmitBarrier(context); + + public static void Ldar(ILEmitterCtx context) => EmitLdr(context, AccessType.Ordered); + public static void Ldaxr(ILEmitterCtx context) => EmitLdr(context, AccessType.OrderedEx); + public static void Ldxr(ILEmitterCtx context) => EmitLdr(context, AccessType.Exclusive); + public static void Ldxp(ILEmitterCtx context) => EmitLdp(context, AccessType.Exclusive); + public static void Ldaxp(ILEmitterCtx context) => EmitLdp(context, AccessType.OrderedEx); + + private static void EmitLdr(ILEmitterCtx context, AccessType accType) + { + EmitLoad(context, accType, false); + } + + private static void EmitLdp(ILEmitterCtx context, AccessType accType) + { + EmitLoad(context, accType, true); + } + + private static void EmitLoad(ILEmitterCtx context, AccessType accType, bool pair) + { + OpCodeMemEx64 op = (OpCodeMemEx64)context.CurrOp; + + bool ordered = (accType & AccessType.Ordered) != 0; + bool exclusive = (accType & AccessType.Exclusive) != 0; + + if (ordered) + { + EmitBarrier(context); + } + + if (exclusive) + { + EmitMemoryCall(context, nameof(MemoryManager.SetExclusive), op.Rn); + } + + context.EmitLdint(op.Rn); + context.EmitSttmp(); + + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + context.EmitLdtmp(); + + EmitReadZxCall(context, op.Size); + + context.EmitStintzr(op.Rt); + + if (pair) + { + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + context.EmitLdtmp(); + context.EmitLdc_I8(1 << op.Size); + + context.Emit(OpCodes.Add); + + EmitReadZxCall(context, op.Size); + + context.EmitStintzr(op.Rt2); + } + } + + public static void Pfrm(ILEmitterCtx context) + { + //Memory Prefetch, execute as no-op. + } + + public static void Stlr(ILEmitterCtx context) => EmitStr(context, AccessType.Ordered); + public static void Stlxr(ILEmitterCtx context) => EmitStr(context, AccessType.OrderedEx); + public static void Stxr(ILEmitterCtx context) => EmitStr(context, AccessType.Exclusive); + public static void Stxp(ILEmitterCtx context) => EmitStp(context, AccessType.Exclusive); + public static void Stlxp(ILEmitterCtx context) => EmitStp(context, AccessType.OrderedEx); + + private static void EmitStr(ILEmitterCtx context, AccessType accType) + { + EmitStore(context, accType, false); + } + + private static void EmitStp(ILEmitterCtx context, AccessType accType) + { + EmitStore(context, accType, true); + } + + private static void EmitStore(ILEmitterCtx context, AccessType accType, bool pair) + { + OpCodeMemEx64 op = (OpCodeMemEx64)context.CurrOp; + + bool ordered = (accType & AccessType.Ordered) != 0; + bool exclusive = (accType & AccessType.Exclusive) != 0; + + if (ordered) + { + EmitBarrier(context); + } + + ILLabel lblEx = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + if (exclusive) + { + EmitMemoryCall(context, nameof(MemoryManager.TestExclusive), op.Rn); + + context.Emit(OpCodes.Brtrue_S, lblEx); + + context.EmitLdc_I8(1); + context.EmitStintzr(op.Rs); + + context.Emit(OpCodes.Br_S, lblEnd); + } + + context.MarkLabel(lblEx); + + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + context.EmitLdint(op.Rn); + context.EmitLdintzr(op.Rt); + + EmitWriteCall(context, op.Size); + + if (pair) + { + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + context.EmitLdint(op.Rn); + context.EmitLdc_I8(1 << op.Size); + + context.Emit(OpCodes.Add); + + context.EmitLdintzr(op.Rt2); + + EmitWriteCall(context, op.Size); + } + + if (exclusive) + { + context.EmitLdc_I8(0); + context.EmitStintzr(op.Rs); + + EmitMemoryCall(context, nameof(MemoryManager.ClearExclusiveForStore)); + } + + context.MarkLabel(lblEnd); + } + + private static void EmitMemoryCall(ILEmitterCtx context, string name, int rn = -1) + { + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + context.EmitLdarg(TranslatedSub.StateArgIdx); + + context.EmitCallPropGet(typeof(CpuThreadState), nameof(CpuThreadState.Core)); + + if (rn != -1) + { + context.EmitLdint(rn); + } + + context.EmitCall(typeof(MemoryManager), name); + } + + private static void EmitBarrier(ILEmitterCtx context) + { + //Note: This barrier is most likely not necessary, and probably + //doesn't make any difference since we need to do a ton of stuff + //(software MMU emulation) to read or write anything anyway. + context.EmitCall(typeof(Thread), nameof(Thread.MemoryBarrier)); + } + } +}
\ No newline at end of file diff --git a/ChocolArm64/Instructions/InstEmitMemoryHelper.cs b/ChocolArm64/Instructions/InstEmitMemoryHelper.cs new file mode 100644 index 00000000..f953564c --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitMemoryHelper.cs @@ -0,0 +1,138 @@ +using ChocolArm64.Decoders; +using ChocolArm64.Memory; +using ChocolArm64.Translation; +using System; +using System.Reflection.Emit; + +namespace ChocolArm64.Instructions +{ + static class InstEmitMemoryHelper + { + private enum Extension + { + Zx, + Sx32, + Sx64 + } + + public static void EmitReadZxCall(ILEmitterCtx context, int size) + { + EmitReadCall(context, Extension.Zx, size); + } + + public static void EmitReadSx32Call(ILEmitterCtx context, int size) + { + EmitReadCall(context, Extension.Sx32, size); + } + + public static void EmitReadSx64Call(ILEmitterCtx context, int size) + { + EmitReadCall(context, Extension.Sx64, size); + } + + private static void EmitReadCall(ILEmitterCtx context, Extension ext, int size) + { + bool isSimd = GetIsSimd(context); + + string name = null; + + if (size < 0 || size > (isSimd ? 4 : 3)) + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + if (isSimd) + { + switch (size) + { + case 0: name = nameof(MemoryManager.ReadVector8); break; + case 1: name = nameof(MemoryManager.ReadVector16); break; + case 2: name = nameof(MemoryManager.ReadVector32); break; + case 3: name = nameof(MemoryManager.ReadVector64); break; + case 4: name = nameof(MemoryManager.ReadVector128); break; + } + } + else + { + switch (size) + { + case 0: name = nameof(MemoryManager.ReadByte); break; + case 1: name = nameof(MemoryManager.ReadUInt16); break; + case 2: name = nameof(MemoryManager.ReadUInt32); break; + case 3: name = nameof(MemoryManager.ReadUInt64); break; + } + } + + context.EmitCall(typeof(MemoryManager), name); + + if (!isSimd) + { + if (ext == Extension.Sx32 || + ext == Extension.Sx64) + { + switch (size) + { + case 0: context.Emit(OpCodes.Conv_I1); break; + case 1: context.Emit(OpCodes.Conv_I2); break; + case 2: context.Emit(OpCodes.Conv_I4); break; + } + } + + if (size < 3) + { + context.Emit(ext == Extension.Sx64 + ? OpCodes.Conv_I8 + : OpCodes.Conv_U8); + } + } + } + + public static void EmitWriteCall(ILEmitterCtx context, int size) + { + bool isSimd = GetIsSimd(context); + + string name = null; + + if (size < 0 || size > (isSimd ? 4 : 3)) + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + if (size < 3 && !isSimd) + { + context.Emit(OpCodes.Conv_I4); + } + + if (isSimd) + { + switch (size) + { + case 0: name = nameof(MemoryManager.WriteVector8); break; + case 1: name = nameof(MemoryManager.WriteVector16); break; + case 2: name = nameof(MemoryManager.WriteVector32); break; + case 3: name = nameof(MemoryManager.WriteVector64); break; + case 4: name = nameof(MemoryManager.WriteVector128); break; + } + } + else + { + switch (size) + { + case 0: name = nameof(MemoryManager.WriteByte); break; + case 1: name = nameof(MemoryManager.WriteUInt16); break; + case 2: name = nameof(MemoryManager.WriteUInt32); break; + case 3: name = nameof(MemoryManager.WriteUInt64); break; + } + } + + context.EmitCall(typeof(MemoryManager), name); + } + + private static bool GetIsSimd(ILEmitterCtx context) + { + return context.CurrOp is IOpCodeSimd64 && + !(context.CurrOp is OpCodeSimdMemMs64 || + context.CurrOp is OpCodeSimdMemSs64); + } + } +}
\ No newline at end of file diff --git a/ChocolArm64/Instructions/InstEmitMove.cs b/ChocolArm64/Instructions/InstEmitMove.cs new file mode 100644 index 00000000..be3e8e2d --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitMove.cs @@ -0,0 +1,41 @@ +using ChocolArm64.Decoders; +using ChocolArm64.Translation; +using System.Reflection.Emit; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + public static void Movk(ILEmitterCtx context) + { + OpCodeMov64 op = (OpCodeMov64)context.CurrOp; + + context.EmitLdintzr(op.Rd); + context.EmitLdc_I(~(0xffffL << op.Pos)); + + context.Emit(OpCodes.And); + + context.EmitLdc_I(op.Imm); + + context.Emit(OpCodes.Or); + + context.EmitStintzr(op.Rd); + } + + public static void Movn(ILEmitterCtx context) + { + OpCodeMov64 op = (OpCodeMov64)context.CurrOp; + + context.EmitLdc_I(~op.Imm); + context.EmitStintzr(op.Rd); + } + + public static void Movz(ILEmitterCtx context) + { + OpCodeMov64 op = (OpCodeMov64)context.CurrOp; + + context.EmitLdc_I(op.Imm); + context.EmitStintzr(op.Rd); + } + } +}
\ No newline at end of file diff --git a/ChocolArm64/Instructions/InstEmitMul.cs b/ChocolArm64/Instructions/InstEmitMul.cs new file mode 100644 index 00000000..b7418e69 --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitMul.cs @@ -0,0 +1,80 @@ +using ChocolArm64.Decoders; +using ChocolArm64.Translation; +using System.Reflection.Emit; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + public static void Madd(ILEmitterCtx context) => EmitMul(context, OpCodes.Add); + public static void Msub(ILEmitterCtx context) => EmitMul(context, OpCodes.Sub); + + private static void EmitMul(ILEmitterCtx context, OpCode ilOp) + { + OpCodeMul64 op = (OpCodeMul64)context.CurrOp; + + context.EmitLdintzr(op.Ra); + context.EmitLdintzr(op.Rn); + context.EmitLdintzr(op.Rm); + + context.Emit(OpCodes.Mul); + context.Emit(ilOp); + + context.EmitStintzr(op.Rd); + } + + public static void Smaddl(ILEmitterCtx context) => EmitMull(context, OpCodes.Add, true); + public static void Smsubl(ILEmitterCtx context) => EmitMull(context, OpCodes.Sub, true); + public static void Umaddl(ILEmitterCtx context) => EmitMull(context, OpCodes.Add, false); + public static void Umsubl(ILEmitterCtx context) => EmitMull(context, OpCodes.Sub, false); + + private static void EmitMull(ILEmitterCtx context, OpCode addSubOp, bool signed) + { + OpCodeMul64 op = (OpCodeMul64)context.CurrOp; + + OpCode castOp = signed + ? OpCodes.Conv_I8 + : OpCodes.Conv_U8; + + context.EmitLdintzr(op.Ra); + context.EmitLdintzr(op.Rn); + + context.Emit(OpCodes.Conv_I4); + context.Emit(castOp); + + context.EmitLdintzr(op.Rm); + + context.Emit(OpCodes.Conv_I4); + context.Emit(castOp); + context.Emit(OpCodes.Mul); + + context.Emit(addSubOp); + + context.EmitStintzr(op.Rd); + } + + public static void Smulh(ILEmitterCtx context) + { + OpCodeMul64 op = (OpCodeMul64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + context.EmitLdintzr(op.Rm); + + SoftFallback.EmitCall(context, nameof(SoftFallback.SMulHi128)); + + context.EmitStintzr(op.Rd); + } + + public static void Umulh(ILEmitterCtx context) + { + OpCodeMul64 op = (OpCodeMul64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + context.EmitLdintzr(op.Rm); + + SoftFallback.EmitCall(context, nameof(SoftFallback.UMulHi128)); + + context.EmitStintzr(op.Rd); + } + } +}
\ No newline at end of file diff --git a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs new file mode 100644 index 00000000..9217de5f --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs @@ -0,0 +1,2387 @@ +// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h + +using ChocolArm64.Decoders; +using ChocolArm64.State; +using ChocolArm64.Translation; +using System; +using System.Reflection; +using System.Reflection.Emit; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +using static ChocolArm64.Instructions.InstEmitSimdHelper; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + public static void Abs_S(ILEmitterCtx context) + { + EmitScalarUnaryOpSx(context, () => EmitAbs(context)); + } + + public static void Abs_V(ILEmitterCtx context) + { + EmitVectorUnaryOpSx(context, () => EmitAbs(context)); + } + + public static void Add_S(ILEmitterCtx context) + { + EmitScalarBinaryOpZx(context, () => context.Emit(OpCodes.Add)); + } + + public static void Add_V(ILEmitterCtx context) + { + if (Optimizations.UseSse2) + { + EmitSse2Op(context, nameof(Sse2.Add)); + } + else + { + EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Add)); + } + } + + public static void Addhn_V(ILEmitterCtx context) + { + EmitHighNarrow(context, () => context.Emit(OpCodes.Add), round: false); + } + + public static void Addp_S(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + EmitVectorExtractZx(context, op.Rn, 0, op.Size); + EmitVectorExtractZx(context, op.Rn, 1, op.Size); + + context.Emit(OpCodes.Add); + + EmitScalarSet(context, op.Rd, op.Size); + } + + public static void Addp_V(ILEmitterCtx context) + { + EmitVectorPairwiseOpZx(context, () => context.Emit(OpCodes.Add)); + } + + public static void Addv_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + EmitVectorExtractZx(context, op.Rn, 0, op.Size); + + for (int index = 1; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, index, op.Size); + + context.Emit(OpCodes.Add); + } + + EmitScalarSet(context, op.Rd, op.Size); + } + + public static void Cls_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + int eSize = 8 << op.Size; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, index, op.Size); + + context.EmitLdc_I4(eSize); + + SoftFallback.EmitCall(context, nameof(SoftFallback.CountLeadingSigns)); + + EmitVectorInsert(context, op.Rd, index, op.Size); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void Clz_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + int eSize = 8 << op.Size; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, index, op.Size); + + if (Lzcnt.IsSupported && eSize == 32) + { + context.Emit(OpCodes.Conv_U4); + + context.EmitCall(typeof(Lzcnt).GetMethod(nameof(Lzcnt.LeadingZeroCount), new Type[] { typeof(uint) })); + + context.Emit(OpCodes.Conv_U8); + } + else + { + context.EmitLdc_I4(eSize); + + SoftFallback.EmitCall(context, nameof(SoftFallback.CountLeadingZeros)); + } + + EmitVectorInsert(context, op.Rd, index, op.Size); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void Cnt_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, index, 0); + + if (Popcnt.IsSupported) + { + context.EmitCall(typeof(Popcnt).GetMethod(nameof(Popcnt.PopCount), new Type[] { typeof(ulong) })); + } + else + { + SoftFallback.EmitCall(context, nameof(SoftFallback.CountSetBits8)); + } + + EmitVectorInsert(context, op.Rd, index, 0); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void Fabd_S(ILEmitterCtx context) + { + EmitScalarBinaryOpF(context, () => + { + context.Emit(OpCodes.Sub); + + EmitUnaryMathCall(context, nameof(Math.Abs)); + }); + } + + public static void Fabs_S(ILEmitterCtx context) + { + EmitScalarUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Abs)); + }); + } + + public static void Fabs_V(ILEmitterCtx context) + { + EmitVectorUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Abs)); + }); + } + + public static void Fadd_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitScalarSseOrSse2OpF(context, nameof(Sse.AddScalar)); + } + else + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd)); + }); + } + } + + public static void Fadd_V(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitVectorSseOrSse2OpF(context, nameof(Sse.Add)); + } + else + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd)); + }); + } + } + + public static void Faddp_S(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + EmitVectorExtractF(context, op.Rn, 0, sizeF); + EmitVectorExtractF(context, op.Rn, 1, sizeF); + + context.Emit(OpCodes.Add); + + EmitScalarSetF(context, op.Rd, sizeF); + } + + public static void Faddp_V(ILEmitterCtx context) + { + EmitVectorPairwiseOpF(context, () => context.Emit(OpCodes.Add)); + } + + public static void Fdiv_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitScalarSseOrSse2OpF(context, nameof(Sse.DivideScalar)); + } + else + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPDiv)); + }); + } + } + + public static void Fdiv_V(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitVectorSseOrSse2OpF(context, nameof(Sse.Divide)); + } + else + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPDiv)); + }); + } + } + + public static void Fmadd_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (op.Size == 0) + { + Type[] typesMulAdd = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; + + context.EmitLdvec(op.Ra); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MultiplyScalar), typesMulAdd)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AddScalar), typesMulAdd)); + + context.EmitStvec(op.Rd); + + EmitVectorZero32_128(context, op.Rd); + } + else /* if (Op.Size == 1) */ + { + Type[] typesMulAdd = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; + + EmitLdvecWithCastToDouble(context, op.Ra); + EmitLdvecWithCastToDouble(context, op.Rn); + EmitLdvecWithCastToDouble(context, op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulAdd)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AddScalar), typesMulAdd)); + + EmitStvecWithCastFromDouble(context, op.Rd); + + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitScalarTernaryRaOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd)); + }); + } + } + + public static void Fmax_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitScalarSseOrSse2OpF(context, nameof(Sse.MaxScalar)); + } + else + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax)); + }); + } + } + + public static void Fmax_V(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitVectorSseOrSse2OpF(context, nameof(Sse.Max)); + } + else + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax)); + }); + } + } + + public static void Fmaxnm_S(ILEmitterCtx context) + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum)); + }); + } + + public static void Fmaxnm_V(ILEmitterCtx context) + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum)); + }); + } + + public static void Fmaxp_V(ILEmitterCtx context) + { + EmitVectorPairwiseOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax)); + }); + } + + public static void Fmin_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitScalarSseOrSse2OpF(context, nameof(Sse.MinScalar)); + } + else + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin)); + }); + } + } + + public static void Fmin_V(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitVectorSseOrSse2OpF(context, nameof(Sse.Min)); + } + else + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin)); + }); + } + } + + public static void Fminnm_S(ILEmitterCtx context) + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum)); + }); + } + + public static void Fminnm_V(ILEmitterCtx context) + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum)); + }); + } + + public static void Fminp_V(ILEmitterCtx context) + { + EmitVectorPairwiseOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin)); + }); + } + + public static void Fmla_Se(ILEmitterCtx context) + { + EmitScalarTernaryOpByElemF(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Add); + }); + } + + public static void Fmla_V(ILEmitterCtx context) + { + EmitVectorTernaryOpF(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Add); + }); + } + + public static void Fmla_Ve(ILEmitterCtx context) + { + EmitVectorTernaryOpByElemF(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Add); + }); + } + + public static void Fmls_Se(ILEmitterCtx context) + { + EmitScalarTernaryOpByElemF(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Sub); + }); + } + + public static void Fmls_V(ILEmitterCtx context) + { + EmitVectorTernaryOpF(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Sub); + }); + } + + public static void Fmls_Ve(ILEmitterCtx context) + { + EmitVectorTernaryOpByElemF(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Sub); + }); + } + + public static void Fmsub_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (op.Size == 0) + { + Type[] typesMulSub = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; + + context.EmitLdvec(op.Ra); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MultiplyScalar), typesMulSub)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesMulSub)); + + context.EmitStvec(op.Rd); + + EmitVectorZero32_128(context, op.Rd); + } + else /* if (Op.Size == 1) */ + { + Type[] typesMulSub = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; + + EmitLdvecWithCastToDouble(context, op.Ra); + EmitLdvecWithCastToDouble(context, op.Rn); + EmitLdvecWithCastToDouble(context, op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulSub)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesMulSub)); + + EmitStvecWithCastFromDouble(context, op.Rd); + + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitScalarTernaryRaOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub)); + }); + } + } + + public static void Fmul_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitScalarSseOrSse2OpF(context, nameof(Sse.MultiplyScalar)); + } + else + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul)); + }); + } + } + + public static void Fmul_Se(ILEmitterCtx context) + { + EmitScalarBinaryOpByElemF(context, () => context.Emit(OpCodes.Mul)); + } + + public static void Fmul_V(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitVectorSseOrSse2OpF(context, nameof(Sse.Multiply)); + } + else + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul)); + }); + } + } + + public static void Fmul_Ve(ILEmitterCtx context) + { + EmitVectorBinaryOpByElemF(context, () => context.Emit(OpCodes.Mul)); + } + + public static void Fmulx_S(ILEmitterCtx context) + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX)); + }); + } + + public static void Fmulx_Se(ILEmitterCtx context) + { + EmitScalarBinaryOpByElemF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX)); + }); + } + + public static void Fmulx_V(ILEmitterCtx context) + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX)); + }); + } + + public static void Fmulx_Ve(ILEmitterCtx context) + { + EmitVectorBinaryOpByElemF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX)); + }); + } + + public static void Fneg_S(ILEmitterCtx context) + { + EmitScalarUnaryOpF(context, () => context.Emit(OpCodes.Neg)); + } + + public static void Fneg_V(ILEmitterCtx context) + { + EmitVectorUnaryOpF(context, () => context.Emit(OpCodes.Neg)); + } + + public static void Fnmadd_S(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int sizeF = op.Size & 1; + + EmitVectorExtractF(context, op.Rn, 0, sizeF); + + context.Emit(OpCodes.Neg); + + EmitVectorExtractF(context, op.Rm, 0, sizeF); + + context.Emit(OpCodes.Mul); + + EmitVectorExtractF(context, op.Ra, 0, sizeF); + + context.Emit(OpCodes.Sub); + + EmitScalarSetF(context, op.Rd, sizeF); + } + + public static void Fnmsub_S(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int sizeF = op.Size & 1; + + EmitVectorExtractF(context, op.Rn, 0, sizeF); + EmitVectorExtractF(context, op.Rm, 0, sizeF); + + context.Emit(OpCodes.Mul); + + EmitVectorExtractF(context, op.Ra, 0, sizeF); + + context.Emit(OpCodes.Sub); + + EmitScalarSetF(context, op.Rd, sizeF); + } + + public static void Fnmul_S(ILEmitterCtx context) + { + EmitScalarBinaryOpF(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Neg); + }); + } + + public static void Frecpe_S(ILEmitterCtx context) + { + EmitScalarUnaryOpF(context, () => + { + EmitUnarySoftFloatCall(context, nameof(SoftFloat.RecipEstimate)); + }); + } + + public static void Frecpe_V(ILEmitterCtx context) + { + EmitVectorUnaryOpF(context, () => + { + EmitUnarySoftFloatCall(context, nameof(SoftFloat.RecipEstimate)); + }); + } + + public static void Frecps_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSsv = new Type[] { typeof(float) }; + Type[] typesMulSub = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; + + context.EmitLdc_R4(2f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv)); + + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MultiplyScalar), typesMulSub)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesMulSub)); + + context.EmitStvec(op.Rd); + + EmitVectorZero32_128(context, op.Rd); + } + else /* if (SizeF == 1) */ + { + Type[] typesSsv = new Type[] { typeof(double) }; + Type[] typesMulSub = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; + + context.EmitLdc_R8(2d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv)); + + EmitLdvecWithCastToDouble(context, op.Rn); + EmitLdvecWithCastToDouble(context, op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulSub)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesMulSub)); + + EmitStvecWithCastFromDouble(context, op.Rd); + + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipStepFused)); + }); + } + } + + public static void Frecps_V(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSav = new Type[] { typeof(float) }; + Type[] typesMulSub = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; + + context.EmitLdc_R4(2f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav)); + + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (SizeF == 1) */ + { + Type[] typesSav = new Type[] { typeof(double) }; + Type[] typesMulSub = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; + + context.EmitLdc_R8(2d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + EmitLdvecWithCastToDouble(context, op.Rn); + EmitLdvecWithCastToDouble(context, op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipStepFused)); + }); + } + } + + public static void Frecpx_S(ILEmitterCtx context) + { + EmitScalarUnaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecpX)); + }); + } + + public static void Frinta_S(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + EmitVectorExtractF(context, op.Rn, 0, op.Size); + + EmitRoundMathCall(context, MidpointRounding.AwayFromZero); + + EmitScalarSetF(context, op.Rd, op.Size); + } + + public static void Frinta_V(ILEmitterCtx context) + { + EmitVectorUnaryOpF(context, () => + { + EmitRoundMathCall(context, MidpointRounding.AwayFromZero); + }); + } + + public static void Frinti_S(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + EmitScalarUnaryOpF(context, () => + { + context.EmitLdarg(TranslatedSub.StateArgIdx); + + if (op.Size == 0) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.RoundF)); + } + else if (op.Size == 1) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.Round)); + } + else + { + throw new InvalidOperationException(); + } + }); + } + + public static void Frinti_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + EmitVectorUnaryOpF(context, () => + { + context.EmitLdarg(TranslatedSub.StateArgIdx); + + if (sizeF == 0) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.RoundF)); + } + else if (sizeF == 1) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.Round)); + } + else + { + throw new InvalidOperationException(); + } + }); + } + + public static void Frintm_S(ILEmitterCtx context) + { + EmitScalarUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Floor)); + }); + } + + public static void Frintm_V(ILEmitterCtx context) + { + EmitVectorUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Floor)); + }); + } + + public static void Frintn_S(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + EmitVectorExtractF(context, op.Rn, 0, op.Size); + + EmitRoundMathCall(context, MidpointRounding.ToEven); + + EmitScalarSetF(context, op.Rd, op.Size); + } + + public static void Frintn_V(ILEmitterCtx context) + { + EmitVectorUnaryOpF(context, () => + { + EmitRoundMathCall(context, MidpointRounding.ToEven); + }); + } + + public static void Frintp_S(ILEmitterCtx context) + { + EmitScalarUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Ceiling)); + }); + } + + public static void Frintp_V(ILEmitterCtx context) + { + EmitVectorUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Ceiling)); + }); + } + + public static void Frintx_S(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + EmitScalarUnaryOpF(context, () => + { + context.EmitLdarg(TranslatedSub.StateArgIdx); + + if (op.Size == 0) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.RoundF)); + } + else if (op.Size == 1) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.Round)); + } + else + { + throw new InvalidOperationException(); + } + }); + } + + public static void Frintx_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + EmitVectorUnaryOpF(context, () => + { + context.EmitLdarg(TranslatedSub.StateArgIdx); + + if (op.Size == 0) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.RoundF)); + } + else if (op.Size == 1) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.Round)); + } + else + { + throw new InvalidOperationException(); + } + }); + } + + public static void Frsqrte_S(ILEmitterCtx context) + { + EmitScalarUnaryOpF(context, () => + { + EmitUnarySoftFloatCall(context, nameof(SoftFloat.InvSqrtEstimate)); + }); + } + + public static void Frsqrte_V(ILEmitterCtx context) + { + EmitVectorUnaryOpF(context, () => + { + EmitUnarySoftFloatCall(context, nameof(SoftFloat.InvSqrtEstimate)); + }); + } + + public static void Frsqrts_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSsv = new Type[] { typeof(float) }; + Type[] typesMulSub = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; + + context.EmitLdc_R4(0.5f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv)); + + context.EmitLdc_R4(3f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv)); + + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MultiplyScalar), typesMulSub)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesMulSub)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MultiplyScalar), typesMulSub)); + + context.EmitStvec(op.Rd); + + EmitVectorZero32_128(context, op.Rd); + } + else /* if (SizeF == 1) */ + { + Type[] typesSsv = new Type[] { typeof(double) }; + Type[] typesMulSub = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; + + context.EmitLdc_R8(0.5d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv)); + + context.EmitLdc_R8(3d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv)); + + EmitLdvecWithCastToDouble(context, op.Rn); + EmitLdvecWithCastToDouble(context, op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulSub)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesMulSub)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulSub)); + + EmitStvecWithCastFromDouble(context, op.Rd); + + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FprSqrtStepFused)); + }); + } + } + + public static void Frsqrts_V(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSav = new Type[] { typeof(float) }; + Type[] typesMulSub = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; + + context.EmitLdc_R4(0.5f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav)); + + context.EmitLdc_R4(3f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav)); + + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (SizeF == 1) */ + { + Type[] typesSav = new Type[] { typeof(double) }; + Type[] typesMulSub = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; + + context.EmitLdc_R8(0.5d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + context.EmitLdc_R8(3d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + EmitLdvecWithCastToDouble(context, op.Rn); + EmitLdvecWithCastToDouble(context, op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FprSqrtStepFused)); + }); + } + } + + public static void Fsqrt_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitScalarSseOrSse2OpF(context, nameof(Sse.SqrtScalar)); + } + else + { + EmitScalarUnaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPSqrt)); + }); + } + } + + public static void Fsqrt_V(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitVectorSseOrSse2OpF(context, nameof(Sse.Sqrt)); + } + else + { + EmitVectorUnaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPSqrt)); + }); + } + } + + public static void Fsub_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitScalarSseOrSse2OpF(context, nameof(Sse.SubtractScalar)); + } + else + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub)); + }); + } + } + + public static void Fsub_V(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitVectorSseOrSse2OpF(context, nameof(Sse.Subtract)); + } + else + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub)); + }); + } + } + + public static void Mla_V(ILEmitterCtx context) + { + EmitVectorTernaryOpZx(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Add); + }); + } + + public static void Mla_Ve(ILEmitterCtx context) + { + EmitVectorTernaryOpByElemZx(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Add); + }); + } + + public static void Mls_V(ILEmitterCtx context) + { + EmitVectorTernaryOpZx(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Sub); + }); + } + + public static void Mls_Ve(ILEmitterCtx context) + { + EmitVectorTernaryOpByElemZx(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Sub); + }); + } + + public static void Mul_V(ILEmitterCtx context) + { + EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Mul)); + } + + public static void Mul_Ve(ILEmitterCtx context) + { + EmitVectorBinaryOpByElemZx(context, () => context.Emit(OpCodes.Mul)); + } + + public static void Neg_S(ILEmitterCtx context) + { + EmitScalarUnaryOpSx(context, () => context.Emit(OpCodes.Neg)); + } + + public static void Neg_V(ILEmitterCtx context) + { + EmitVectorUnaryOpSx(context, () => context.Emit(OpCodes.Neg)); + } + + public static void Raddhn_V(ILEmitterCtx context) + { + EmitHighNarrow(context, () => context.Emit(OpCodes.Add), round: true); + } + + public static void Rsubhn_V(ILEmitterCtx context) + { + EmitHighNarrow(context, () => context.Emit(OpCodes.Sub), round: true); + } + + public static void Saba_V(ILEmitterCtx context) + { + EmitVectorTernaryOpSx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + + context.Emit(OpCodes.Add); + }); + } + + public static void Sabal_V(ILEmitterCtx context) + { + EmitVectorWidenRnRmTernaryOpSx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + + context.Emit(OpCodes.Add); + }); + } + + public static void Sabd_V(ILEmitterCtx context) + { + EmitVectorBinaryOpSx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + }); + } + + public static void Sabdl_V(ILEmitterCtx context) + { + EmitVectorWidenRnRmBinaryOpSx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + }); + } + + public static void Sadalp_V(ILEmitterCtx context) + { + EmitAddLongPairwise(context, signed: true, accumulate: true); + } + + public static void Saddl_V(ILEmitterCtx context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] }; + Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], + VectorIntTypesPerSizeLog2[op.Size + 1] }; + + string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), + nameof(Sse41.ConvertToVector128Int32), + nameof(Sse41.ConvertToVector128Int64) }; + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRnRmBinaryOpSx(context, () => context.Emit(OpCodes.Add)); + } + } + + public static void Saddlp_V(ILEmitterCtx context) + { + EmitAddLongPairwise(context, signed: true, accumulate: false); + } + + public static void Saddw_V(ILEmitterCtx context) + { + EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Add)); + } + + public static void Shadd_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesAndXorAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp(); + + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp2(); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd)); + + context.EmitLdvectmp(); + context.EmitLdvectmp2(); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd)); + + context.EmitLdc_I4(1); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpSx(context, () => + { + context.Emit(OpCodes.Add); + + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Shr); + }); + } + } + + public static void Shsub_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size < 2) + { + Type[] typesSav = new Type[] { IntTypesPerSizeLog2[op.Size] }; + Type[] typesAddSub = new Type[] { VectorIntTypesPerSizeLog2 [op.Size], VectorIntTypesPerSizeLog2 [op.Size] }; + Type[] typesAvg = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + + context.EmitLdc_I4(op.Size == 0 ? sbyte.MinValue : short.MinValue); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + context.EmitStvectmp(); + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + context.EmitLdvectmp(); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAddSub)); + + context.Emit(OpCodes.Dup); + + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + context.EmitLdvectmp(); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAddSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvg)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesAddSub)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpSx(context, () => + { + context.Emit(OpCodes.Sub); + + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Shr); + }); + } + } + + public static void Smax_V(ILEmitterCtx context) + { + Type[] types = new Type[] { typeof(long), typeof(long) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); + + EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo)); + } + + public static void Smaxp_V(ILEmitterCtx context) + { + Type[] types = new Type[] { typeof(long), typeof(long) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); + + EmitVectorPairwiseOpSx(context, () => context.EmitCall(mthdInfo)); + } + + public static void Smin_V(ILEmitterCtx context) + { + Type[] types = new Type[] { typeof(long), typeof(long) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); + + EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo)); + } + + public static void Sminp_V(ILEmitterCtx context) + { + Type[] types = new Type[] { typeof(long), typeof(long) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); + + EmitVectorPairwiseOpSx(context, () => context.EmitCall(mthdInfo)); + } + + public static void Smlal_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] }; + Type[] typesMulAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], + VectorIntTypesPerSizeLog2[op.Size + 1] }; + + Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + + string nameCvt = op.Size == 0 + ? nameof(Sse41.ConvertToVector128Int16) + : nameof(Sse41.ConvertToVector128Int32); + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithSignedCast(context, op.Rd, op.Size + 1); + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRnRmTernaryOpSx(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Add); + }); + } + } + + public static void Smlsl_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] }; + Type[] typesMulSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], + VectorIntTypesPerSizeLog2[op.Size + 1] }; + + Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + + string nameCvt = op.Size == 0 + ? nameof(Sse41.ConvertToVector128Int16) + : nameof(Sse41.ConvertToVector128Int32); + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithSignedCast(context, op.Rd, op.Size + 1); + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRnRmTernaryOpSx(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Sub); + }); + } + } + + public static void Smull_V(ILEmitterCtx context) + { + EmitVectorWidenRnRmBinaryOpSx(context, () => context.Emit(OpCodes.Mul)); + } + + public static void Sqabs_S(ILEmitterCtx context) + { + EmitScalarSaturatingUnaryOpSx(context, () => EmitAbs(context)); + } + + public static void Sqabs_V(ILEmitterCtx context) + { + EmitVectorSaturatingUnaryOpSx(context, () => EmitAbs(context)); + } + + public static void Sqadd_S(ILEmitterCtx context) + { + EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Add); + } + + public static void Sqadd_V(ILEmitterCtx context) + { + EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Add); + } + + public static void Sqdmulh_S(ILEmitterCtx context) + { + EmitSaturatingBinaryOp(context, () => EmitDoublingMultiplyHighHalf(context, round: false), SaturatingFlags.ScalarSx); + } + + public static void Sqdmulh_V(ILEmitterCtx context) + { + EmitSaturatingBinaryOp(context, () => EmitDoublingMultiplyHighHalf(context, round: false), SaturatingFlags.VectorSx); + } + + public static void Sqneg_S(ILEmitterCtx context) + { + EmitScalarSaturatingUnaryOpSx(context, () => context.Emit(OpCodes.Neg)); + } + + public static void Sqneg_V(ILEmitterCtx context) + { + EmitVectorSaturatingUnaryOpSx(context, () => context.Emit(OpCodes.Neg)); + } + + public static void Sqrdmulh_S(ILEmitterCtx context) + { + EmitSaturatingBinaryOp(context, () => EmitDoublingMultiplyHighHalf(context, round: true), SaturatingFlags.ScalarSx); + } + + public static void Sqrdmulh_V(ILEmitterCtx context) + { + EmitSaturatingBinaryOp(context, () => EmitDoublingMultiplyHighHalf(context, round: true), SaturatingFlags.VectorSx); + } + + public static void Sqsub_S(ILEmitterCtx context) + { + EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Sub); + } + + public static void Sqsub_V(ILEmitterCtx context) + { + EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Sub); + } + + public static void Sqxtn_S(ILEmitterCtx context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx); + } + + public static void Sqxtn_V(ILEmitterCtx context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx); + } + + public static void Sqxtun_S(ILEmitterCtx context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx); + } + + public static void Sqxtun_V(ILEmitterCtx context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx); + } + + public static void Srhadd_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size < 2) + { + Type[] typesSav = new Type[] { IntTypesPerSizeLog2[op.Size] }; + Type[] typesSubAdd = new Type[] { VectorIntTypesPerSizeLog2 [op.Size], VectorIntTypesPerSizeLog2 [op.Size] }; + Type[] typesAvg = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + + context.EmitLdc_I4(op.Size == 0 ? sbyte.MinValue : short.MinValue); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp(); + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + context.EmitLdvectmp(); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAdd)); + + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + context.EmitLdvectmp(); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAdd)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvg)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesSubAdd)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpSx(context, () => + { + context.Emit(OpCodes.Add); + + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Add); + + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Shr); + }); + } + } + + public static void Ssubl_V(ILEmitterCtx context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] }; + Type[] typesSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], + VectorIntTypesPerSizeLog2[op.Size + 1] }; + + string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), + nameof(Sse41.ConvertToVector128Int32), + nameof(Sse41.ConvertToVector128Int64) }; + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRnRmBinaryOpSx(context, () => context.Emit(OpCodes.Sub)); + } + } + + public static void Ssubw_V(ILEmitterCtx context) + { + EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Sub)); + } + + public static void Sub_S(ILEmitterCtx context) + { + EmitScalarBinaryOpZx(context, () => context.Emit(OpCodes.Sub)); + } + + public static void Sub_V(ILEmitterCtx context) + { + if (Optimizations.UseSse2) + { + EmitSse2Op(context, nameof(Sse2.Subtract)); + } + else + { + EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Sub)); + } + } + + public static void Subhn_V(ILEmitterCtx context) + { + EmitHighNarrow(context, () => context.Emit(OpCodes.Sub), round: false); + } + + public static void Suqadd_S(ILEmitterCtx context) + { + EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Accumulate); + } + + public static void Suqadd_V(ILEmitterCtx context) + { + EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Accumulate); + } + + public static void Uaba_V(ILEmitterCtx context) + { + EmitVectorTernaryOpZx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + + context.Emit(OpCodes.Add); + }); + } + + public static void Uabal_V(ILEmitterCtx context) + { + EmitVectorWidenRnRmTernaryOpZx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + + context.Emit(OpCodes.Add); + }); + } + + public static void Uabd_V(ILEmitterCtx context) + { + EmitVectorBinaryOpZx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + }); + } + + public static void Uabdl_V(ILEmitterCtx context) + { + EmitVectorWidenRnRmBinaryOpZx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + }); + } + + public static void Uadalp_V(ILEmitterCtx context) + { + EmitAddLongPairwise(context, signed: false, accumulate: true); + } + + public static void Uaddl_V(ILEmitterCtx context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] }; + Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], + VectorUIntTypesPerSizeLog2[op.Size + 1] }; + + string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), + nameof(Sse41.ConvertToVector128Int32), + nameof(Sse41.ConvertToVector128Int64) }; + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRnRmBinaryOpZx(context, () => context.Emit(OpCodes.Add)); + } + } + + public static void Uaddlp_V(ILEmitterCtx context) + { + EmitAddLongPairwise(context, signed: false, accumulate: false); + } + + public static void Uaddlv_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + EmitVectorExtractZx(context, op.Rn, 0, op.Size); + + for (int index = 1; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, index, op.Size); + + context.Emit(OpCodes.Add); + } + + EmitScalarSet(context, op.Rd, op.Size + 1); + } + + public static void Uaddw_V(ILEmitterCtx context) + { + EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Add)); + } + + public static void Uhadd_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesAndXorAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp(); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp2(); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd)); + + context.EmitLdvectmp(); + context.EmitLdvectmp2(); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd)); + + context.EmitLdc_I4(1); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpZx(context, () => + { + context.Emit(OpCodes.Add); + + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Shr_Un); + }); + } + } + + public static void Uhsub_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size < 2) + { + Type[] typesAvgSub = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + context.Emit(OpCodes.Dup); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvgSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesAvgSub)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpZx(context, () => + { + context.Emit(OpCodes.Sub); + + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Shr_Un); + }); + } + } + + public static void Umax_V(ILEmitterCtx context) + { + Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); + + EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo)); + } + + public static void Umaxp_V(ILEmitterCtx context) + { + Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); + + EmitVectorPairwiseOpZx(context, () => context.EmitCall(mthdInfo)); + } + + public static void Umin_V(ILEmitterCtx context) + { + Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); + + EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo)); + } + + public static void Uminp_V(ILEmitterCtx context) + { + Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); + + EmitVectorPairwiseOpZx(context, () => context.EmitCall(mthdInfo)); + } + + public static void Umlal_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] }; + Type[] typesMulAdd = new Type[] { VectorIntTypesPerSizeLog2 [op.Size + 1], + VectorIntTypesPerSizeLog2 [op.Size + 1] }; + + Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + + string nameCvt = op.Size == 0 + ? nameof(Sse41.ConvertToVector128Int16) + : nameof(Sse41.ConvertToVector128Int32); + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithUnsignedCast(context, op.Rd, op.Size + 1); + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRnRmTernaryOpZx(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Add); + }); + } + } + + public static void Umlsl_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] }; + Type[] typesMulSub = new Type[] { VectorIntTypesPerSizeLog2 [op.Size + 1], + VectorIntTypesPerSizeLog2 [op.Size + 1] }; + + Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + + string nameCvt = op.Size == 0 + ? nameof(Sse41.ConvertToVector128Int16) + : nameof(Sse41.ConvertToVector128Int32); + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithUnsignedCast(context, op.Rd, op.Size + 1); + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRnRmTernaryOpZx(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Sub); + }); + } + } + + public static void Umull_V(ILEmitterCtx context) + { + EmitVectorWidenRnRmBinaryOpZx(context, () => context.Emit(OpCodes.Mul)); + } + + public static void Uqadd_S(ILEmitterCtx context) + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add); + } + + public static void Uqadd_V(ILEmitterCtx context) + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add); + } + + public static void Uqsub_S(ILEmitterCtx context) + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub); + } + + public static void Uqsub_V(ILEmitterCtx context) + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub); + } + + public static void Uqxtn_S(ILEmitterCtx context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx); + } + + public static void Uqxtn_V(ILEmitterCtx context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx); + } + + public static void Urhadd_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size < 2) + { + Type[] typesAvg = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvg)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpZx(context, () => + { + context.Emit(OpCodes.Add); + + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Add); + + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Shr_Un); + }); + } + } + + public static void Usqadd_S(ILEmitterCtx context) + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); + } + + public static void Usqadd_V(ILEmitterCtx context) + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); + } + + public static void Usubl_V(ILEmitterCtx context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] }; + Type[] typesSub = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], + VectorUIntTypesPerSizeLog2[op.Size + 1] }; + + string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), + nameof(Sse41.ConvertToVector128Int32), + nameof(Sse41.ConvertToVector128Int64) }; + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRnRmBinaryOpZx(context, () => context.Emit(OpCodes.Sub)); + } + } + + public static void Usubw_V(ILEmitterCtx context) + { + EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Sub)); + } + + private static void EmitAbs(ILEmitterCtx context) + { + ILLabel lblTrue = new ILLabel(); + + context.Emit(OpCodes.Dup); + context.Emit(OpCodes.Ldc_I4_0); + context.Emit(OpCodes.Bge_S, lblTrue); + + context.Emit(OpCodes.Neg); + + context.MarkLabel(lblTrue); + } + + private static void EmitAddLongPairwise(ILEmitterCtx context, bool signed, bool accumulate) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int words = op.GetBitsCount() >> 4; + int pairs = words >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int idx = index << 1; + + EmitVectorExtract(context, op.Rn, idx, op.Size, signed); + EmitVectorExtract(context, op.Rn, idx + 1, op.Size, signed); + + context.Emit(OpCodes.Add); + + if (accumulate) + { + EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed); + + context.Emit(OpCodes.Add); + } + + EmitVectorInsertTmp(context, index, op.Size + 1); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + private static void EmitDoublingMultiplyHighHalf(ILEmitterCtx context, bool round) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int eSize = 8 << op.Size; + + context.Emit(OpCodes.Mul); + + if (!round) + { + context.EmitAsr(eSize - 1); + } + else + { + long roundConst = 1L << (eSize - 1); + + ILLabel lblTrue = new ILLabel(); + + context.EmitLsl(1); + + context.EmitLdc_I8(roundConst); + + context.Emit(OpCodes.Add); + + context.EmitAsr(eSize); + + context.Emit(OpCodes.Dup); + context.EmitLdc_I8((long)int.MinValue); + context.Emit(OpCodes.Bne_Un_S, lblTrue); + + context.Emit(OpCodes.Neg); + + context.MarkLabel(lblTrue); + } + } + + private static void EmitHighNarrow(ILEmitterCtx context, Action emit, bool round) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int elems = 8 >> op.Size; + + int eSize = 8 << op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + long roundConst = 1L << (eSize - 1); + + if (part != 0) + { + context.EmitLdvec(op.Rd); + context.EmitStvectmp(); + } + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); + EmitVectorExtractZx(context, op.Rm, index, op.Size + 1); + + emit(); + + if (round) + { + context.EmitLdc_I8(roundConst); + + context.Emit(OpCodes.Add); + } + + context.EmitLsr(eSize); + + EmitVectorInsertTmp(context, part + index, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (part == 0) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + } +} diff --git a/ChocolArm64/Instructions/InstEmitSimdCmp.cs b/ChocolArm64/Instructions/InstEmitSimdCmp.cs new file mode 100644 index 00000000..c473c0ae --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitSimdCmp.cs @@ -0,0 +1,526 @@ +using ChocolArm64.Decoders; +using ChocolArm64.State; +using ChocolArm64.Translation; +using System; +using System.Reflection.Emit; +using System.Runtime.Intrinsics.X86; + +using static ChocolArm64.Instructions.InstEmitAluHelper; +using static ChocolArm64.Instructions.InstEmitSimdHelper; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + public static void Cmeq_S(ILEmitterCtx context) + { + EmitCmp(context, OpCodes.Beq_S, scalar: true); + } + + public static void Cmeq_V(ILEmitterCtx context) + { + if (context.CurrOp is OpCodeSimdReg64 op) + { + if (op.Size < 3 && Optimizations.UseSse2) + { + EmitSse2Op(context, nameof(Sse2.CompareEqual)); + } + else if (op.Size == 3 && Optimizations.UseSse41) + { + EmitSse41Op(context, nameof(Sse41.CompareEqual)); + } + else + { + EmitCmp(context, OpCodes.Beq_S, scalar: false); + } + } + else + { + EmitCmp(context, OpCodes.Beq_S, scalar: false); + } + } + + public static void Cmge_S(ILEmitterCtx context) + { + EmitCmp(context, OpCodes.Bge_S, scalar: true); + } + + public static void Cmge_V(ILEmitterCtx context) + { + EmitCmp(context, OpCodes.Bge_S, scalar: false); + } + + public static void Cmgt_S(ILEmitterCtx context) + { + EmitCmp(context, OpCodes.Bgt_S, scalar: true); + } + + public static void Cmgt_V(ILEmitterCtx context) + { + if (context.CurrOp is OpCodeSimdReg64 op) + { + if (op.Size < 3 && Optimizations.UseSse2) + { + EmitSse2Op(context, nameof(Sse2.CompareGreaterThan)); + } + else if (op.Size == 3 && Optimizations.UseSse42) + { + EmitSse42Op(context, nameof(Sse42.CompareGreaterThan)); + } + else + { + EmitCmp(context, OpCodes.Bgt_S, scalar: false); + } + } + else + { + EmitCmp(context, OpCodes.Bgt_S, scalar: false); + } + } + + public static void Cmhi_S(ILEmitterCtx context) + { + EmitCmp(context, OpCodes.Bgt_Un_S, scalar: true); + } + + public static void Cmhi_V(ILEmitterCtx context) + { + EmitCmp(context, OpCodes.Bgt_Un_S, scalar: false); + } + + public static void Cmhs_S(ILEmitterCtx context) + { + EmitCmp(context, OpCodes.Bge_Un_S, scalar: true); + } + + public static void Cmhs_V(ILEmitterCtx context) + { + EmitCmp(context, OpCodes.Bge_Un_S, scalar: false); + } + + public static void Cmle_S(ILEmitterCtx context) + { + EmitCmp(context, OpCodes.Ble_S, scalar: true); + } + + public static void Cmle_V(ILEmitterCtx context) + { + EmitCmp(context, OpCodes.Ble_S, scalar: false); + } + + public static void Cmlt_S(ILEmitterCtx context) + { + EmitCmp(context, OpCodes.Blt_S, scalar: true); + } + + public static void Cmlt_V(ILEmitterCtx context) + { + EmitCmp(context, OpCodes.Blt_S, scalar: false); + } + + public static void Cmtst_S(ILEmitterCtx context) + { + EmitCmtst(context, scalar: true); + } + + public static void Cmtst_V(ILEmitterCtx context) + { + EmitCmtst(context, scalar: false); + } + + public static void Fccmp_S(ILEmitterCtx context) + { + OpCodeSimdFcond64 op = (OpCodeSimdFcond64)context.CurrOp; + + ILLabel lblTrue = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + context.EmitCondBranch(lblTrue, op.Cond); + + EmitSetNzcv(context, op.Nzcv); + + context.Emit(OpCodes.Br, lblEnd); + + context.MarkLabel(lblTrue); + + Fcmp_S(context); + + context.MarkLabel(lblEnd); + } + + public static void Fccmpe_S(ILEmitterCtx context) + { + Fccmp_S(context); + } + + public static void Fcmeq_S(ILEmitterCtx context) + { + if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareEqualScalar)); + } + else + { + EmitScalarFcmp(context, OpCodes.Beq_S); + } + } + + public static void Fcmeq_V(ILEmitterCtx context) + { + if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareEqual)); + } + else + { + EmitVectorFcmp(context, OpCodes.Beq_S); + } + } + + public static void Fcmge_S(ILEmitterCtx context) + { + if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqualScalar)); + } + else + { + EmitScalarFcmp(context, OpCodes.Bge_S); + } + } + + public static void Fcmge_V(ILEmitterCtx context) + { + if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqual)); + } + else + { + EmitVectorFcmp(context, OpCodes.Bge_S); + } + } + + public static void Fcmgt_S(ILEmitterCtx context) + { + if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanScalar)); + } + else + { + EmitScalarFcmp(context, OpCodes.Bgt_S); + } + } + + public static void Fcmgt_V(ILEmitterCtx context) + { + if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareGreaterThan)); + } + else + { + EmitVectorFcmp(context, OpCodes.Bgt_S); + } + } + + public static void Fcmle_S(ILEmitterCtx context) + { + EmitScalarFcmp(context, OpCodes.Ble_S); + } + + public static void Fcmle_V(ILEmitterCtx context) + { + EmitVectorFcmp(context, OpCodes.Ble_S); + } + + public static void Fcmlt_S(ILEmitterCtx context) + { + EmitScalarFcmp(context, OpCodes.Blt_S); + } + + public static void Fcmlt_V(ILEmitterCtx context) + { + EmitVectorFcmp(context, OpCodes.Blt_S); + } + + public static void Fcmp_S(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + bool cmpWithZero = !(op is OpCodeSimdFcond64) ? op.Bit3 : false; + + //Handle NaN case. + //If any number is NaN, then NZCV = 0011. + if (cmpWithZero) + { + EmitNaNCheck(context, op.Rn); + } + else + { + EmitNaNCheck(context, op.Rn); + EmitNaNCheck(context, op.Rm); + + context.Emit(OpCodes.Or); + } + + ILLabel lblNaN = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + context.Emit(OpCodes.Brtrue_S, lblNaN); + + void EmitLoadOpers() + { + EmitVectorExtractF(context, op.Rn, 0, op.Size); + + if (cmpWithZero) + { + if (op.Size == 0) + { + context.EmitLdc_R4(0f); + } + else /* if (Op.Size == 1) */ + { + context.EmitLdc_R8(0d); + } + } + else + { + EmitVectorExtractF(context, op.Rm, 0, op.Size); + } + } + + //Z = Rn == Rm + EmitLoadOpers(); + + context.Emit(OpCodes.Ceq); + context.Emit(OpCodes.Dup); + + context.EmitStflg((int)PState.ZBit); + + //C = Rn >= Rm + EmitLoadOpers(); + + context.Emit(OpCodes.Cgt); + context.Emit(OpCodes.Or); + + context.EmitStflg((int)PState.CBit); + + //N = Rn < Rm + EmitLoadOpers(); + + context.Emit(OpCodes.Clt); + + context.EmitStflg((int)PState.NBit); + + //V = 0 + context.EmitLdc_I4(0); + + context.EmitStflg((int)PState.VBit); + + context.Emit(OpCodes.Br_S, lblEnd); + + context.MarkLabel(lblNaN); + + EmitSetNzcv(context, 0b0011); + + context.MarkLabel(lblEnd); + } + + public static void Fcmpe_S(ILEmitterCtx context) + { + Fcmp_S(context); + } + + private static void EmitNaNCheck(ILEmitterCtx context, int reg) + { + IOpCodeSimd64 op = (IOpCodeSimd64)context.CurrOp; + + EmitVectorExtractF(context, reg, 0, op.Size); + + if (op.Size == 0) + { + context.EmitCall(typeof(float), nameof(float.IsNaN)); + } + else if (op.Size == 1) + { + context.EmitCall(typeof(double), nameof(double.IsNaN)); + } + else + { + throw new InvalidOperationException(); + } + } + + private static void EmitCmp(ILEmitterCtx context, OpCode ilOp, bool scalar) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int bytes = op.GetBitsCount() >> 3; + int elems = !scalar ? bytes >> op.Size : 1; + + ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size)); + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractSx(context, op.Rn, index, op.Size); + + if (op is OpCodeSimdReg64 binOp) + { + EmitVectorExtractSx(context, binOp.Rm, index, op.Size); + } + else + { + context.EmitLdc_I8(0L); + } + + ILLabel lblTrue = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + context.Emit(ilOp, lblTrue); + + EmitVectorInsert(context, op.Rd, index, op.Size, 0); + + context.Emit(OpCodes.Br_S, lblEnd); + + context.MarkLabel(lblTrue); + + EmitVectorInsert(context, op.Rd, index, op.Size, (long)szMask); + + context.MarkLabel(lblEnd); + } + + if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + private static void EmitCmtst(ILEmitterCtx context, bool scalar) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int bytes = op.GetBitsCount() >> 3; + int elems = !scalar ? bytes >> op.Size : 1; + + ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size)); + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, index, op.Size); + EmitVectorExtractZx(context, op.Rm, index, op.Size); + + ILLabel lblTrue = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + context.Emit(OpCodes.And); + + context.EmitLdc_I8(0L); + + context.Emit(OpCodes.Bne_Un_S, lblTrue); + + EmitVectorInsert(context, op.Rd, index, op.Size, 0); + + context.Emit(OpCodes.Br_S, lblEnd); + + context.MarkLabel(lblTrue); + + EmitVectorInsert(context, op.Rd, index, op.Size, (long)szMask); + + context.MarkLabel(lblEnd); + } + + if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + private static void EmitScalarFcmp(ILEmitterCtx context, OpCode ilOp) + { + EmitFcmp(context, ilOp, 0, scalar: true); + } + + private static void EmitVectorFcmp(ILEmitterCtx context, OpCode ilOp) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + EmitFcmp(context, ilOp, index, scalar: false); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + private static void EmitFcmp(ILEmitterCtx context, OpCode ilOp, int index, bool scalar) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + ulong szMask = ulong.MaxValue >> (64 - (32 << sizeF)); + + EmitVectorExtractF(context, op.Rn, index, sizeF); + + if (op is OpCodeSimdReg64 binOp) + { + EmitVectorExtractF(context, binOp.Rm, index, sizeF); + } + else if (sizeF == 0) + { + context.EmitLdc_R4(0f); + } + else /* if (SizeF == 1) */ + { + context.EmitLdc_R8(0d); + } + + ILLabel lblTrue = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + context.Emit(ilOp, lblTrue); + + if (scalar) + { + EmitVectorZeroAll(context, op.Rd); + } + else + { + EmitVectorInsert(context, op.Rd, index, sizeF + 2, 0); + } + + context.Emit(OpCodes.Br_S, lblEnd); + + context.MarkLabel(lblTrue); + + if (scalar) + { + EmitVectorInsert(context, op.Rd, index, 3, (long)szMask); + + EmitVectorZeroUpper(context, op.Rd); + } + else + { + EmitVectorInsert(context, op.Rd, index, sizeF + 2, (long)szMask); + } + + context.MarkLabel(lblEnd); + } + } +} diff --git a/ChocolArm64/Instructions/InstEmitSimdCrypto.cs b/ChocolArm64/Instructions/InstEmitSimdCrypto.cs new file mode 100644 index 00000000..33c81aab --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitSimdCrypto.cs @@ -0,0 +1,54 @@ +using ChocolArm64.Decoders; +using ChocolArm64.Translation; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + public static void Aesd_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); + + SoftFallback.EmitCall(context, nameof(SoftFallback.Decrypt)); + + context.EmitStvec(op.Rd); + } + + public static void Aese_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); + + SoftFallback.EmitCall(context, nameof(SoftFallback.Encrypt)); + + context.EmitStvec(op.Rd); + } + + public static void Aesimc_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + context.EmitLdvec(op.Rn); + + SoftFallback.EmitCall(context, nameof(SoftFallback.InverseMixColumns)); + + context.EmitStvec(op.Rd); + } + + public static void Aesmc_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + context.EmitLdvec(op.Rn); + + SoftFallback.EmitCall(context, nameof(SoftFallback.MixColumns)); + + context.EmitStvec(op.Rd); + } + } +} diff --git a/ChocolArm64/Instructions/InstEmitSimdCvt.cs b/ChocolArm64/Instructions/InstEmitSimdCvt.cs new file mode 100644 index 00000000..fa17c09d --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitSimdCvt.cs @@ -0,0 +1,697 @@ +using ChocolArm64.Decoders; +using ChocolArm64.State; +using ChocolArm64.Translation; +using System; +using System.Reflection.Emit; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +using static ChocolArm64.Instructions.InstEmitSimdHelper; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + public static void Fcvt_S(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + if (Optimizations.UseSse2) + { + if (op.Size == 1 && op.Opc == 0) + { + //Double -> Single. + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + + EmitLdvecWithCastToDouble(context, op.Rn); + + Type[] types = new Type[] { typeof(Vector128<float>), typeof(Vector128<double>) }; + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Single), types)); + + context.EmitStvec(op.Rd); + } + else if (op.Size == 0 && op.Opc == 1) + { + //Single -> Double. + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero)); + + context.EmitLdvec(op.Rn); + + Type[] types = new Type[] { typeof(Vector128<double>), typeof(Vector128<float>) }; + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Double), types)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + else + { + //Invalid encoding. + throw new InvalidOperationException(); + } + } + else + { + EmitVectorExtractF(context, op.Rn, 0, op.Size); + + EmitFloatCast(context, op.Opc); + + EmitScalarSetF(context, op.Rd, op.Opc); + } + } + + public static void Fcvtas_Gp(ILEmitterCtx context) + { + EmitFcvt_s_Gp(context, () => EmitRoundMathCall(context, MidpointRounding.AwayFromZero)); + } + + public static void Fcvtau_Gp(ILEmitterCtx context) + { + EmitFcvt_u_Gp(context, () => EmitRoundMathCall(context, MidpointRounding.AwayFromZero)); + } + + public static void Fcvtl_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + int elems = 4 >> sizeF; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + if (sizeF == 0) + { + EmitVectorExtractZx(context, op.Rn, part + index, 1); + context.Emit(OpCodes.Conv_U2); + + context.EmitLdarg(TranslatedSub.StateArgIdx); + + context.EmitCall(typeof(SoftFloat1632), nameof(SoftFloat1632.FPConvert)); + } + else /* if (SizeF == 1) */ + { + EmitVectorExtractF(context, op.Rn, part + index, 0); + + context.Emit(OpCodes.Conv_R8); + } + + EmitVectorInsertTmpF(context, index, sizeF); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + } + + public static void Fcvtms_Gp(ILEmitterCtx context) + { + EmitFcvt_s_Gp(context, () => EmitUnaryMathCall(context, nameof(Math.Floor))); + } + + public static void Fcvtmu_Gp(ILEmitterCtx context) + { + EmitFcvt_u_Gp(context, () => EmitUnaryMathCall(context, nameof(Math.Floor))); + } + + public static void Fcvtn_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + int elems = 4 >> sizeF; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + if (part != 0) + { + context.EmitLdvec(op.Rd); + context.EmitStvectmp(); + } + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractF(context, op.Rn, index, sizeF); + + if (sizeF == 0) + { + context.EmitLdarg(TranslatedSub.StateArgIdx); + + context.EmitCall(typeof(SoftFloat3216), nameof(SoftFloat3216.FPConvert)); + + context.Emit(OpCodes.Conv_U8); + EmitVectorInsertTmp(context, part + index, 1); + } + else /* if (SizeF == 1) */ + { + context.Emit(OpCodes.Conv_R4); + + EmitVectorInsertTmpF(context, part + index, 0); + } + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (part == 0) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void Fcvtns_S(ILEmitterCtx context) + { + EmitFcvtn(context, signed: true, scalar: true); + } + + public static void Fcvtns_V(ILEmitterCtx context) + { + EmitFcvtn(context, signed: true, scalar: false); + } + + public static void Fcvtnu_S(ILEmitterCtx context) + { + EmitFcvtn(context, signed: false, scalar: true); + } + + public static void Fcvtnu_V(ILEmitterCtx context) + { + EmitFcvtn(context, signed: false, scalar: false); + } + + public static void Fcvtps_Gp(ILEmitterCtx context) + { + EmitFcvt_s_Gp(context, () => EmitUnaryMathCall(context, nameof(Math.Ceiling))); + } + + public static void Fcvtpu_Gp(ILEmitterCtx context) + { + EmitFcvt_u_Gp(context, () => EmitUnaryMathCall(context, nameof(Math.Ceiling))); + } + + public static void Fcvtzs_Gp(ILEmitterCtx context) + { + EmitFcvt_s_Gp(context, () => { }); + } + + public static void Fcvtzs_Gp_Fix(ILEmitterCtx context) + { + EmitFcvtzs_Gp_Fix(context); + } + + public static void Fcvtzs_S(ILEmitterCtx context) + { + EmitScalarFcvtzs(context); + } + + public static void Fcvtzs_V(ILEmitterCtx context) + { + EmitVectorFcvtzs(context); + } + + public static void Fcvtzu_Gp(ILEmitterCtx context) + { + EmitFcvt_u_Gp(context, () => { }); + } + + public static void Fcvtzu_Gp_Fix(ILEmitterCtx context) + { + EmitFcvtzu_Gp_Fix(context); + } + + public static void Fcvtzu_S(ILEmitterCtx context) + { + EmitScalarFcvtzu(context); + } + + public static void Fcvtzu_V(ILEmitterCtx context) + { + EmitVectorFcvtzu(context); + } + + public static void Scvtf_Gp(ILEmitterCtx context) + { + OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + context.Emit(OpCodes.Conv_U4); + } + + EmitFloatCast(context, op.Size); + + EmitScalarSetF(context, op.Rd, op.Size); + } + + public static void Scvtf_S(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + EmitVectorExtractSx(context, op.Rn, 0, op.Size + 2); + + EmitFloatCast(context, op.Size); + + EmitScalarSetF(context, op.Rd, op.Size); + } + + public static void Scvtf_V(ILEmitterCtx context) + { + EmitVectorCvtf(context, signed: true); + } + + public static void Ucvtf_Gp(ILEmitterCtx context) + { + OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + context.Emit(OpCodes.Conv_U4); + } + + context.Emit(OpCodes.Conv_R_Un); + + EmitFloatCast(context, op.Size); + + EmitScalarSetF(context, op.Rd, op.Size); + } + + public static void Ucvtf_S(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + EmitVectorExtractZx(context, op.Rn, 0, op.Size + 2); + + context.Emit(OpCodes.Conv_R_Un); + + EmitFloatCast(context, op.Size); + + EmitScalarSetF(context, op.Rd, op.Size); + } + + public static void Ucvtf_V(ILEmitterCtx context) + { + EmitVectorCvtf(context, signed: false); + } + + private static int GetFBits(ILEmitterCtx context) + { + if (context.CurrOp is OpCodeSimdShImm64 op) + { + return GetImmShr(op); + } + + return 0; + } + + private static void EmitFloatCast(ILEmitterCtx context, int size) + { + if (size == 0) + { + context.Emit(OpCodes.Conv_R4); + } + else if (size == 1) + { + context.Emit(OpCodes.Conv_R8); + } + else + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + } + + private static void EmitFcvtn(ILEmitterCtx context, bool signed, bool scalar) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + int sizeI = sizeF + 2; + + int bytes = op.GetBitsCount() >> 3; + int elems = !scalar ? bytes >> sizeI : 1; + + if (scalar && (sizeF == 0)) + { + EmitVectorZeroLowerTmp(context); + } + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractF(context, op.Rn, index, sizeF); + + EmitRoundMathCall(context, MidpointRounding.ToEven); + + if (sizeF == 0) + { + VectorHelper.EmitCall(context, signed + ? nameof(VectorHelper.SatF32ToS32) + : nameof(VectorHelper.SatF32ToU32)); + + context.Emit(OpCodes.Conv_U8); + } + else /* if (SizeF == 1) */ + { + VectorHelper.EmitCall(context, signed + ? nameof(VectorHelper.SatF64ToS64) + : nameof(VectorHelper.SatF64ToU64)); + } + + EmitVectorInsertTmp(context, index, sizeI); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + private static void EmitFcvt_s_Gp(ILEmitterCtx context, Action emit) + { + EmitFcvt___Gp(context, emit, true); + } + + private static void EmitFcvt_u_Gp(ILEmitterCtx context, Action emit) + { + EmitFcvt___Gp(context, emit, false); + } + + private static void EmitFcvt___Gp(ILEmitterCtx context, Action emit, bool signed) + { + OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp; + + EmitVectorExtractF(context, op.Rn, 0, op.Size); + + emit(); + + if (signed) + { + EmitScalarFcvts(context, op.Size, 0); + } + else + { + EmitScalarFcvtu(context, op.Size, 0); + } + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + context.Emit(OpCodes.Conv_U8); + } + + context.EmitStintzr(op.Rd); + } + + private static void EmitFcvtzs_Gp_Fix(ILEmitterCtx context) + { + EmitFcvtz__Gp_Fix(context, true); + } + + private static void EmitFcvtzu_Gp_Fix(ILEmitterCtx context) + { + EmitFcvtz__Gp_Fix(context, false); + } + + private static void EmitFcvtz__Gp_Fix(ILEmitterCtx context, bool signed) + { + OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp; + + EmitVectorExtractF(context, op.Rn, 0, op.Size); + + if (signed) + { + EmitScalarFcvts(context, op.Size, op.FBits); + } + else + { + EmitScalarFcvtu(context, op.Size, op.FBits); + } + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + context.Emit(OpCodes.Conv_U8); + } + + context.EmitStintzr(op.Rd); + } + + private static void EmitVectorScvtf(ILEmitterCtx context) + { + EmitVectorCvtf(context, true); + } + + private static void EmitVectorUcvtf(ILEmitterCtx context) + { + EmitVectorCvtf(context, false); + } + + private static void EmitVectorCvtf(ILEmitterCtx context, bool signed) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + int sizeI = sizeF + 2; + + int fBits = GetFBits(context); + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> sizeI; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtract(context, op.Rn, index, sizeI, signed); + + if (!signed) + { + context.Emit(OpCodes.Conv_R_Un); + } + + context.Emit(sizeF == 0 + ? OpCodes.Conv_R4 + : OpCodes.Conv_R8); + + EmitI2fFBitsMul(context, sizeF, fBits); + + EmitVectorInsertF(context, op.Rd, index, sizeF); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + private static void EmitScalarFcvtzs(ILEmitterCtx context) + { + EmitScalarFcvtz(context, true); + } + + private static void EmitScalarFcvtzu(ILEmitterCtx context) + { + EmitScalarFcvtz(context, false); + } + + private static void EmitScalarFcvtz(ILEmitterCtx context, bool signed) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + int sizeI = sizeF + 2; + + int fBits = GetFBits(context); + + EmitVectorExtractF(context, op.Rn, 0, sizeF); + + EmitF2iFBitsMul(context, sizeF, fBits); + + if (sizeF == 0) + { + VectorHelper.EmitCall(context, signed + ? nameof(VectorHelper.SatF32ToS32) + : nameof(VectorHelper.SatF32ToU32)); + } + else /* if (SizeF == 1) */ + { + VectorHelper.EmitCall(context, signed + ? nameof(VectorHelper.SatF64ToS64) + : nameof(VectorHelper.SatF64ToU64)); + } + + if (sizeF == 0) + { + context.Emit(OpCodes.Conv_U8); + } + + EmitScalarSet(context, op.Rd, sizeI); + } + + private static void EmitVectorFcvtzs(ILEmitterCtx context) + { + EmitVectorFcvtz(context, true); + } + + private static void EmitVectorFcvtzu(ILEmitterCtx context) + { + EmitVectorFcvtz(context, false); + } + + private static void EmitVectorFcvtz(ILEmitterCtx context, bool signed) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + int sizeI = sizeF + 2; + + int fBits = GetFBits(context); + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> sizeI; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractF(context, op.Rn, index, sizeF); + + EmitF2iFBitsMul(context, sizeF, fBits); + + if (sizeF == 0) + { + VectorHelper.EmitCall(context, signed + ? nameof(VectorHelper.SatF32ToS32) + : nameof(VectorHelper.SatF32ToU32)); + } + else /* if (SizeF == 1) */ + { + VectorHelper.EmitCall(context, signed + ? nameof(VectorHelper.SatF64ToS64) + : nameof(VectorHelper.SatF64ToU64)); + } + + if (sizeF == 0) + { + context.Emit(OpCodes.Conv_U8); + } + + EmitVectorInsert(context, op.Rd, index, sizeI); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + private static void EmitScalarFcvts(ILEmitterCtx context, int size, int fBits) + { + if (size < 0 || size > 1) + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + EmitF2iFBitsMul(context, size, fBits); + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + if (size == 0) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.SatF32ToS32)); + } + else /* if (Size == 1) */ + { + VectorHelper.EmitCall(context, nameof(VectorHelper.SatF64ToS32)); + } + } + else + { + if (size == 0) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.SatF32ToS64)); + } + else /* if (Size == 1) */ + { + VectorHelper.EmitCall(context, nameof(VectorHelper.SatF64ToS64)); + } + } + } + + private static void EmitScalarFcvtu(ILEmitterCtx context, int size, int fBits) + { + if (size < 0 || size > 1) + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + EmitF2iFBitsMul(context, size, fBits); + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + if (size == 0) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.SatF32ToU32)); + } + else /* if (Size == 1) */ + { + VectorHelper.EmitCall(context, nameof(VectorHelper.SatF64ToU32)); + } + } + else + { + if (size == 0) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.SatF32ToU64)); + } + else /* if (Size == 1) */ + { + VectorHelper.EmitCall(context, nameof(VectorHelper.SatF64ToU64)); + } + } + } + + private static void EmitF2iFBitsMul(ILEmitterCtx context, int size, int fBits) + { + if (fBits != 0) + { + if (size == 0) + { + context.EmitLdc_R4(MathF.Pow(2f, fBits)); + } + else if (size == 1) + { + context.EmitLdc_R8(Math.Pow(2d, fBits)); + } + else + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + context.Emit(OpCodes.Mul); + } + } + + private static void EmitI2fFBitsMul(ILEmitterCtx context, int size, int fBits) + { + if (fBits != 0) + { + if (size == 0) + { + context.EmitLdc_R4(1f / MathF.Pow(2f, fBits)); + } + else if (size == 1) + { + context.EmitLdc_R8(1d / Math.Pow(2d, fBits)); + } + else + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + context.Emit(OpCodes.Mul); + } + } + } +} diff --git a/ChocolArm64/Instructions/InstEmitSimdHash.cs b/ChocolArm64/Instructions/InstEmitSimdHash.cs new file mode 100644 index 00000000..bb767fec --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitSimdHash.cs @@ -0,0 +1,140 @@ +using ChocolArm64.Decoders; +using ChocolArm64.Translation; + +using static ChocolArm64.Instructions.InstEmitSimdHelper; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { +#region "Sha1" + public static void Sha1c_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + context.EmitLdvec(op.Rd); + EmitVectorExtractZx(context, op.Rn, 0, 2); + context.EmitLdvec(op.Rm); + + SoftFallback.EmitCall(context, nameof(SoftFallback.HashChoose)); + + context.EmitStvec(op.Rd); + } + + public static void Sha1h_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + EmitVectorExtractZx(context, op.Rn, 0, 2); + + SoftFallback.EmitCall(context, nameof(SoftFallback.FixedRotate)); + + EmitScalarSet(context, op.Rd, 2); + } + + public static void Sha1m_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + context.EmitLdvec(op.Rd); + EmitVectorExtractZx(context, op.Rn, 0, 2); + context.EmitLdvec(op.Rm); + + SoftFallback.EmitCall(context, nameof(SoftFallback.HashMajority)); + + context.EmitStvec(op.Rd); + } + + public static void Sha1p_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + context.EmitLdvec(op.Rd); + EmitVectorExtractZx(context, op.Rn, 0, 2); + context.EmitLdvec(op.Rm); + + SoftFallback.EmitCall(context, nameof(SoftFallback.HashParity)); + + context.EmitStvec(op.Rd); + } + + public static void Sha1su0_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + SoftFallback.EmitCall(context, nameof(SoftFallback.Sha1SchedulePart1)); + + context.EmitStvec(op.Rd); + } + + public static void Sha1su1_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); + + SoftFallback.EmitCall(context, nameof(SoftFallback.Sha1SchedulePart2)); + + context.EmitStvec(op.Rd); + } +#endregion + +#region "Sha256" + public static void Sha256h_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + SoftFallback.EmitCall(context, nameof(SoftFallback.HashLower)); + + context.EmitStvec(op.Rd); + } + + public static void Sha256h2_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + SoftFallback.EmitCall(context, nameof(SoftFallback.HashUpper)); + + context.EmitStvec(op.Rd); + } + + public static void Sha256su0_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); + + SoftFallback.EmitCall(context, nameof(SoftFallback.Sha256SchedulePart1)); + + context.EmitStvec(op.Rd); + } + + public static void Sha256su1_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + SoftFallback.EmitCall(context, nameof(SoftFallback.Sha256SchedulePart2)); + + context.EmitStvec(op.Rd); + } +#endregion + } +} diff --git a/ChocolArm64/Instructions/InstEmitSimdHelper.cs b/ChocolArm64/Instructions/InstEmitSimdHelper.cs new file mode 100644 index 00000000..fad51510 --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitSimdHelper.cs @@ -0,0 +1,1495 @@ +using ChocolArm64.Decoders; +using ChocolArm64.State; +using ChocolArm64.Translation; +using System; +using System.Reflection; +using System.Reflection.Emit; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace ChocolArm64.Instructions +{ + static class InstEmitSimdHelper + { + public static readonly Type[] IntTypesPerSizeLog2 = new Type[] + { + typeof(sbyte), + typeof(short), + typeof(int), + typeof(long) + }; + + public static readonly Type[] UIntTypesPerSizeLog2 = new Type[] + { + typeof(byte), + typeof(ushort), + typeof(uint), + typeof(ulong) + }; + + public static readonly Type[] VectorIntTypesPerSizeLog2 = new Type[] + { + typeof(Vector128<sbyte>), + typeof(Vector128<short>), + typeof(Vector128<int>), + typeof(Vector128<long>) + }; + + public static readonly Type[] VectorUIntTypesPerSizeLog2 = new Type[] + { + typeof(Vector128<byte>), + typeof(Vector128<ushort>), + typeof(Vector128<uint>), + typeof(Vector128<ulong>) + }; + + [Flags] + public enum OperFlags + { + Rd = 1 << 0, + Rn = 1 << 1, + Rm = 1 << 2, + Ra = 1 << 3, + + RnRm = Rn | Rm, + RdRn = Rd | Rn, + RaRnRm = Ra | Rn | Rm, + RdRnRm = Rd | Rn | Rm + } + + public static int GetImmShl(OpCodeSimdShImm64 op) + { + return op.Imm - (8 << op.Size); + } + + public static int GetImmShr(OpCodeSimdShImm64 op) + { + return (8 << (op.Size + 1)) - op.Imm; + } + + public static void EmitSse2Op(ILEmitterCtx context, string name) + { + EmitSseOp(context, name, typeof(Sse2)); + } + + public static void EmitSse41Op(ILEmitterCtx context, string name) + { + EmitSseOp(context, name, typeof(Sse41)); + } + + public static void EmitSse42Op(ILEmitterCtx context, string name) + { + EmitSseOp(context, name, typeof(Sse42)); + } + + private static void EmitSseOp(ILEmitterCtx context, string name, Type type) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + + Type baseType = VectorIntTypesPerSizeLog2[op.Size]; + + if (op is OpCodeSimdReg64 binOp) + { + EmitLdvecWithSignedCast(context, binOp.Rm, op.Size); + + context.EmitCall(type.GetMethod(name, new Type[] { baseType, baseType })); + } + else + { + context.EmitCall(type.GetMethod(name, new Type[] { baseType })); + } + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void EmitLdvecWithSignedCast(ILEmitterCtx context, int reg, int size) + { + context.EmitLdvec(reg); + + switch (size) + { + case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToSByte)); break; + case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToInt16)); break; + case 2: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToInt32)); break; + case 3: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToInt64)); break; + + default: throw new ArgumentOutOfRangeException(nameof(size)); + } + } + + public static void EmitLdvecWithCastToDouble(ILEmitterCtx context, int reg) + { + context.EmitLdvec(reg); + + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToDouble)); + } + + public static void EmitStvecWithCastFromDouble(ILEmitterCtx context, int reg) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleToSingle)); + + context.EmitStvec(reg); + } + + public static void EmitLdvecWithUnsignedCast(ILEmitterCtx context, int reg, int size) + { + context.EmitLdvec(reg); + + switch (size) + { + case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToByte)); break; + case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToUInt16)); break; + case 2: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToUInt32)); break; + case 3: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToUInt64)); break; + + default: throw new ArgumentOutOfRangeException(nameof(size)); + } + } + + public static void EmitStvecWithSignedCast(ILEmitterCtx context, int reg, int size) + { + switch (size) + { + case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSByteToSingle)); break; + case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt16ToSingle)); break; + case 2: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt32ToSingle)); break; + case 3: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt64ToSingle)); break; + + default: throw new ArgumentOutOfRangeException(nameof(size)); + } + + context.EmitStvec(reg); + } + + public static void EmitStvecWithUnsignedCast(ILEmitterCtx context, int reg, int size) + { + switch (size) + { + case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorByteToSingle)); break; + case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorUInt16ToSingle)); break; + case 2: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorUInt32ToSingle)); break; + case 3: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorUInt64ToSingle)); break; + + default: throw new ArgumentOutOfRangeException(nameof(size)); + } + + context.EmitStvec(reg); + } + + public static void EmitScalarSseOrSse2OpF(ILEmitterCtx context, string name) + { + EmitSseOrSse2OpF(context, name, true); + } + + public static void EmitVectorSseOrSse2OpF(ILEmitterCtx context, string name) + { + EmitSseOrSse2OpF(context, name, false); + } + + public static void EmitSseOrSse2OpF(ILEmitterCtx context, string name, bool scalar) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + void Ldvec(int reg) + { + context.EmitLdvec(reg); + + if (sizeF == 1) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToDouble)); + } + } + + Ldvec(op.Rn); + + Type type; + Type baseType; + + if (sizeF == 0) + { + type = typeof(Sse); + baseType = typeof(Vector128<float>); + } + else /* if (SizeF == 1) */ + { + type = typeof(Sse2); + baseType = typeof(Vector128<double>); + } + + if (op is OpCodeSimdReg64 binOp) + { + Ldvec(binOp.Rm); + + context.EmitCall(type.GetMethod(name, new Type[] { baseType, baseType })); + } + else + { + context.EmitCall(type.GetMethod(name, new Type[] { baseType })); + } + + if (sizeF == 1) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleToSingle)); + } + + context.EmitStvec(op.Rd); + + if (scalar) + { + if (sizeF == 0) + { + EmitVectorZero32_128(context, op.Rd); + } + else /* if (SizeF == 1) */ + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void EmitUnaryMathCall(ILEmitterCtx context, string name) + { + IOpCodeSimd64 op = (IOpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + MethodInfo mthdInfo; + + if (sizeF == 0) + { + mthdInfo = typeof(MathF).GetMethod(name, new Type[] { typeof(float) }); + } + else /* if (SizeF == 1) */ + { + mthdInfo = typeof(Math).GetMethod(name, new Type[] { typeof(double) }); + } + + context.EmitCall(mthdInfo); + } + + public static void EmitBinaryMathCall(ILEmitterCtx context, string name) + { + IOpCodeSimd64 op = (IOpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + MethodInfo mthdInfo; + + if (sizeF == 0) + { + mthdInfo = typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(float) }); + } + else /* if (SizeF == 1) */ + { + mthdInfo = typeof(Math).GetMethod(name, new Type[] { typeof(double), typeof(double) }); + } + + context.EmitCall(mthdInfo); + } + + public static void EmitRoundMathCall(ILEmitterCtx context, MidpointRounding roundMode) + { + IOpCodeSimd64 op = (IOpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + MethodInfo mthdInfo; + + if (sizeF == 0) + { + mthdInfo = typeof(MathF).GetMethod(nameof(MathF.Round), new Type[] { typeof(float), typeof(MidpointRounding) }); + } + else /* if (SizeF == 1) */ + { + mthdInfo = typeof(Math).GetMethod(nameof(Math.Round), new Type[] { typeof(double), typeof(MidpointRounding) }); + } + + context.EmitLdc_I4((int)roundMode); + + context.EmitCall(mthdInfo); + } + + public static void EmitUnarySoftFloatCall(ILEmitterCtx context, string name) + { + IOpCodeSimd64 op = (IOpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + MethodInfo mthdInfo; + + if (sizeF == 0) + { + mthdInfo = typeof(SoftFloat).GetMethod(name, new Type[] { typeof(float) }); + } + else /* if (SizeF == 1) */ + { + mthdInfo = typeof(SoftFloat).GetMethod(name, new Type[] { typeof(double) }); + } + + context.EmitCall(mthdInfo); + } + + public static void EmitSoftFloatCall(ILEmitterCtx context, string name) + { + IOpCodeSimd64 op = (IOpCodeSimd64)context.CurrOp; + + Type type = (op.Size & 1) == 0 + ? typeof(SoftFloat32) + : typeof(SoftFloat64); + + context.EmitLdarg(TranslatedSub.StateArgIdx); + + context.EmitCall(type, name); + } + + public static void EmitScalarBinaryOpByElemF(ILEmitterCtx context, Action emit) + { + OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp; + + EmitScalarOpByElemF(context, emit, op.Index, ternary: false); + } + + public static void EmitScalarTernaryOpByElemF(ILEmitterCtx context, Action emit) + { + OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp; + + EmitScalarOpByElemF(context, emit, op.Index, ternary: true); + } + + public static void EmitScalarOpByElemF(ILEmitterCtx context, Action emit, int elem, bool ternary) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (ternary) + { + EmitVectorExtractF(context, op.Rd, 0, sizeF); + } + + EmitVectorExtractF(context, op.Rn, 0, sizeF); + EmitVectorExtractF(context, op.Rm, elem, sizeF); + + emit(); + + EmitScalarSetF(context, op.Rd, sizeF); + } + + public static void EmitScalarUnaryOpSx(ILEmitterCtx context, Action emit) + { + EmitScalarOp(context, emit, OperFlags.Rn, true); + } + + public static void EmitScalarBinaryOpSx(ILEmitterCtx context, Action emit) + { + EmitScalarOp(context, emit, OperFlags.RnRm, true); + } + + public static void EmitScalarUnaryOpZx(ILEmitterCtx context, Action emit) + { + EmitScalarOp(context, emit, OperFlags.Rn, false); + } + + public static void EmitScalarBinaryOpZx(ILEmitterCtx context, Action emit) + { + EmitScalarOp(context, emit, OperFlags.RnRm, false); + } + + public static void EmitScalarTernaryOpZx(ILEmitterCtx context, Action emit) + { + EmitScalarOp(context, emit, OperFlags.RdRnRm, false); + } + + public static void EmitScalarOp(ILEmitterCtx context, Action emit, OperFlags opers, bool signed) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + bool rd = (opers & OperFlags.Rd) != 0; + bool rn = (opers & OperFlags.Rn) != 0; + bool rm = (opers & OperFlags.Rm) != 0; + + if (rd) + { + EmitVectorExtract(context, op.Rd, 0, op.Size, signed); + } + + if (rn) + { + EmitVectorExtract(context, op.Rn, 0, op.Size, signed); + } + + if (rm) + { + EmitVectorExtract(context, ((OpCodeSimdReg64)op).Rm, 0, op.Size, signed); + } + + emit(); + + EmitScalarSet(context, op.Rd, op.Size); + } + + public static void EmitScalarUnaryOpF(ILEmitterCtx context, Action emit) + { + EmitScalarOpF(context, emit, OperFlags.Rn); + } + + public static void EmitScalarBinaryOpF(ILEmitterCtx context, Action emit) + { + EmitScalarOpF(context, emit, OperFlags.RnRm); + } + + public static void EmitScalarTernaryRaOpF(ILEmitterCtx context, Action emit) + { + EmitScalarOpF(context, emit, OperFlags.RaRnRm); + } + + public static void EmitScalarOpF(ILEmitterCtx context, Action emit, OperFlags opers) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + bool ra = (opers & OperFlags.Ra) != 0; + bool rn = (opers & OperFlags.Rn) != 0; + bool rm = (opers & OperFlags.Rm) != 0; + + if (ra) + { + EmitVectorExtractF(context, ((OpCodeSimdReg64)op).Ra, 0, sizeF); + } + + if (rn) + { + EmitVectorExtractF(context, op.Rn, 0, sizeF); + } + + if (rm) + { + EmitVectorExtractF(context, ((OpCodeSimdReg64)op).Rm, 0, sizeF); + } + + emit(); + + EmitScalarSetF(context, op.Rd, sizeF); + } + + public static void EmitVectorUnaryOpF(ILEmitterCtx context, Action emit) + { + EmitVectorOpF(context, emit, OperFlags.Rn); + } + + public static void EmitVectorBinaryOpF(ILEmitterCtx context, Action emit) + { + EmitVectorOpF(context, emit, OperFlags.RnRm); + } + + public static void EmitVectorTernaryOpF(ILEmitterCtx context, Action emit) + { + EmitVectorOpF(context, emit, OperFlags.RdRnRm); + } + + public static void EmitVectorOpF(ILEmitterCtx context, Action emit, OperFlags opers) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> sizeF + 2; + + bool rd = (opers & OperFlags.Rd) != 0; + bool rn = (opers & OperFlags.Rn) != 0; + bool rm = (opers & OperFlags.Rm) != 0; + + for (int index = 0; index < elems; index++) + { + if (rd) + { + EmitVectorExtractF(context, op.Rd, index, sizeF); + } + + if (rn) + { + EmitVectorExtractF(context, op.Rn, index, sizeF); + } + + if (rm) + { + EmitVectorExtractF(context, ((OpCodeSimdReg64)op).Rm, index, sizeF); + } + + emit(); + + EmitVectorInsertF(context, op.Rd, index, sizeF); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void EmitVectorBinaryOpByElemF(ILEmitterCtx context, Action emit) + { + OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp; + + EmitVectorOpByElemF(context, emit, op.Index, ternary: false); + } + + public static void EmitVectorTernaryOpByElemF(ILEmitterCtx context, Action emit) + { + OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp; + + EmitVectorOpByElemF(context, emit, op.Index, ternary: true); + } + + public static void EmitVectorOpByElemF(ILEmitterCtx context, Action emit, int elem, bool ternary) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int sizeF = op.Size & 1; + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + if (ternary) + { + EmitVectorExtractF(context, op.Rd, index, sizeF); + } + + EmitVectorExtractF(context, op.Rn, index, sizeF); + EmitVectorExtractF(context, op.Rm, elem, sizeF); + + emit(); + + EmitVectorInsertTmpF(context, index, sizeF); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void EmitVectorUnaryOpSx(ILEmitterCtx context, Action emit) + { + EmitVectorOp(context, emit, OperFlags.Rn, true); + } + + public static void EmitVectorBinaryOpSx(ILEmitterCtx context, Action emit) + { + EmitVectorOp(context, emit, OperFlags.RnRm, true); + } + + public static void EmitVectorTernaryOpSx(ILEmitterCtx context, Action emit) + { + EmitVectorOp(context, emit, OperFlags.RdRnRm, true); + } + + public static void EmitVectorUnaryOpZx(ILEmitterCtx context, Action emit) + { + EmitVectorOp(context, emit, OperFlags.Rn, false); + } + + public static void EmitVectorBinaryOpZx(ILEmitterCtx context, Action emit) + { + EmitVectorOp(context, emit, OperFlags.RnRm, false); + } + + public static void EmitVectorTernaryOpZx(ILEmitterCtx context, Action emit) + { + EmitVectorOp(context, emit, OperFlags.RdRnRm, false); + } + + public static void EmitVectorOp(ILEmitterCtx context, Action emit, OperFlags opers, bool signed) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + bool rd = (opers & OperFlags.Rd) != 0; + bool rn = (opers & OperFlags.Rn) != 0; + bool rm = (opers & OperFlags.Rm) != 0; + + for (int index = 0; index < elems; index++) + { + if (rd) + { + EmitVectorExtract(context, op.Rd, index, op.Size, signed); + } + + if (rn) + { + EmitVectorExtract(context, op.Rn, index, op.Size, signed); + } + + if (rm) + { + EmitVectorExtract(context, ((OpCodeSimdReg64)op).Rm, index, op.Size, signed); + } + + emit(); + + EmitVectorInsert(context, op.Rd, index, op.Size); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void EmitVectorBinaryOpByElemSx(ILEmitterCtx context, Action emit) + { + OpCodeSimdRegElem64 op = (OpCodeSimdRegElem64)context.CurrOp; + + EmitVectorOpByElem(context, emit, op.Index, false, true); + } + + public static void EmitVectorBinaryOpByElemZx(ILEmitterCtx context, Action emit) + { + OpCodeSimdRegElem64 op = (OpCodeSimdRegElem64)context.CurrOp; + + EmitVectorOpByElem(context, emit, op.Index, false, false); + } + + public static void EmitVectorTernaryOpByElemZx(ILEmitterCtx context, Action emit) + { + OpCodeSimdRegElem64 op = (OpCodeSimdRegElem64)context.CurrOp; + + EmitVectorOpByElem(context, emit, op.Index, true, false); + } + + public static void EmitVectorOpByElem(ILEmitterCtx context, Action emit, int elem, bool ternary, bool signed) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + EmitVectorExtract(context, op.Rm, elem, op.Size, signed); + context.EmitSttmp(); + + for (int index = 0; index < elems; index++) + { + if (ternary) + { + EmitVectorExtract(context, op.Rd, index, op.Size, signed); + } + + EmitVectorExtract(context, op.Rn, index, op.Size, signed); + context.EmitLdtmp(); + + emit(); + + EmitVectorInsertTmp(context, index, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void EmitVectorImmUnaryOp(ILEmitterCtx context, Action emit) + { + EmitVectorImmOp(context, emit, false); + } + + public static void EmitVectorImmBinaryOp(ILEmitterCtx context, Action emit) + { + EmitVectorImmOp(context, emit, true); + } + + public static void EmitVectorImmOp(ILEmitterCtx context, Action emit, bool binary) + { + OpCodeSimdImm64 op = (OpCodeSimdImm64)context.CurrOp; + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + for (int index = 0; index < elems; index++) + { + if (binary) + { + EmitVectorExtractZx(context, op.Rd, index, op.Size); + } + + context.EmitLdc_I8(op.Imm); + + emit(); + + EmitVectorInsert(context, op.Rd, index, op.Size); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void EmitVectorWidenRmBinaryOpSx(ILEmitterCtx context, Action emit) + { + EmitVectorWidenRmBinaryOp(context, emit, true); + } + + public static void EmitVectorWidenRmBinaryOpZx(ILEmitterCtx context, Action emit) + { + EmitVectorWidenRmBinaryOp(context, emit, false); + } + + public static void EmitVectorWidenRmBinaryOp(ILEmitterCtx context, Action emit, bool signed) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtract(context, op.Rn, index, op.Size + 1, signed); + EmitVectorExtract(context, op.Rm, part + index, op.Size, signed); + + emit(); + + EmitVectorInsertTmp(context, index, op.Size + 1); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + } + + public static void EmitVectorWidenRnRmBinaryOpSx(ILEmitterCtx context, Action emit) + { + EmitVectorWidenRnRmOp(context, emit, false, true); + } + + public static void EmitVectorWidenRnRmBinaryOpZx(ILEmitterCtx context, Action emit) + { + EmitVectorWidenRnRmOp(context, emit, false, false); + } + + public static void EmitVectorWidenRnRmTernaryOpSx(ILEmitterCtx context, Action emit) + { + EmitVectorWidenRnRmOp(context, emit, true, true); + } + + public static void EmitVectorWidenRnRmTernaryOpZx(ILEmitterCtx context, Action emit) + { + EmitVectorWidenRnRmOp(context, emit, true, false); + } + + public static void EmitVectorWidenRnRmOp(ILEmitterCtx context, Action emit, bool ternary, bool signed) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + if (ternary) + { + EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed); + } + + EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + EmitVectorExtract(context, op.Rm, part + index, op.Size, signed); + + emit(); + + EmitVectorInsertTmp(context, index, op.Size + 1); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + } + + public static void EmitVectorPairwiseOpSx(ILEmitterCtx context, Action emit) + { + EmitVectorPairwiseOp(context, emit, true); + } + + public static void EmitVectorPairwiseOpZx(ILEmitterCtx context, Action emit) + { + EmitVectorPairwiseOp(context, emit, false); + } + + public static void EmitVectorPairwiseOp(ILEmitterCtx context, Action emit, bool signed) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int words = op.GetBitsCount() >> 4; + int pairs = words >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int idx = index << 1; + + EmitVectorExtract(context, op.Rn, idx, op.Size, signed); + EmitVectorExtract(context, op.Rn, idx + 1, op.Size, signed); + + emit(); + + EmitVectorExtract(context, op.Rm, idx, op.Size, signed); + EmitVectorExtract(context, op.Rm, idx + 1, op.Size, signed); + + emit(); + + EmitVectorInsertTmp(context, pairs + index, op.Size); + EmitVectorInsertTmp(context, index, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void EmitVectorPairwiseOpF(ILEmitterCtx context, Action emit) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int sizeF = op.Size & 1; + + int words = op.GetBitsCount() >> 4; + int pairs = words >> sizeF + 2; + + for (int index = 0; index < pairs; index++) + { + int idx = index << 1; + + EmitVectorExtractF(context, op.Rn, idx, sizeF); + EmitVectorExtractF(context, op.Rn, idx + 1, sizeF); + + emit(); + + EmitVectorExtractF(context, op.Rm, idx, sizeF); + EmitVectorExtractF(context, op.Rm, idx + 1, sizeF); + + emit(); + + EmitVectorInsertTmpF(context, pairs + index, sizeF); + EmitVectorInsertTmpF(context, index, sizeF); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + [Flags] + public enum SaturatingFlags + { + Scalar = 1 << 0, + Signed = 1 << 1, + + Add = 1 << 2, + Sub = 1 << 3, + + Accumulate = 1 << 4, + + ScalarSx = Scalar | Signed, + ScalarZx = Scalar, + + VectorSx = Signed, + VectorZx = 0 + } + + public static void EmitScalarSaturatingUnaryOpSx(ILEmitterCtx context, Action emit) + { + EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.ScalarSx); + } + + public static void EmitVectorSaturatingUnaryOpSx(ILEmitterCtx context, Action emit) + { + EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.VectorSx); + } + + public static void EmitSaturatingUnaryOpSx(ILEmitterCtx context, Action emit, SaturatingFlags flags) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + bool scalar = (flags & SaturatingFlags.Scalar) != 0; + + int bytes = op.GetBitsCount() >> 3; + int elems = !scalar ? bytes >> op.Size : 1; + + if (scalar) + { + EmitVectorZeroLowerTmp(context); + } + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractSx(context, op.Rn, index, op.Size); + + emit(); + + if (op.Size <= 2) + { + EmitSatQ(context, op.Size, true, true); + } + else /* if (Op.Size == 3) */ + { + EmitUnarySignedSatQAbsOrNeg(context); + } + + EmitVectorInsertTmp(context, index, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void EmitScalarSaturatingBinaryOpSx(ILEmitterCtx context, SaturatingFlags flags) + { + EmitSaturatingBinaryOp(context, () => { }, SaturatingFlags.ScalarSx | flags); + } + + public static void EmitScalarSaturatingBinaryOpZx(ILEmitterCtx context, SaturatingFlags flags) + { + EmitSaturatingBinaryOp(context, () => { }, SaturatingFlags.ScalarZx | flags); + } + + public static void EmitVectorSaturatingBinaryOpSx(ILEmitterCtx context, SaturatingFlags flags) + { + EmitSaturatingBinaryOp(context, () => { }, SaturatingFlags.VectorSx | flags); + } + + public static void EmitVectorSaturatingBinaryOpZx(ILEmitterCtx context, SaturatingFlags flags) + { + EmitSaturatingBinaryOp(context, () => { }, SaturatingFlags.VectorZx | flags); + } + + public static void EmitSaturatingBinaryOp(ILEmitterCtx context, Action emit, SaturatingFlags flags) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + bool scalar = (flags & SaturatingFlags.Scalar) != 0; + bool signed = (flags & SaturatingFlags.Signed) != 0; + + bool add = (flags & SaturatingFlags.Add) != 0; + bool sub = (flags & SaturatingFlags.Sub) != 0; + + bool accumulate = (flags & SaturatingFlags.Accumulate) != 0; + + int bytes = op.GetBitsCount() >> 3; + int elems = !scalar ? bytes >> op.Size : 1; + + if (scalar) + { + EmitVectorZeroLowerTmp(context); + } + + if (add || sub) + { + for (int index = 0; index < elems; index++) + { + EmitVectorExtract(context, op.Rn, index, op.Size, signed); + EmitVectorExtract(context, ((OpCodeSimdReg64)op).Rm, index, op.Size, signed); + + if (op.Size <= 2) + { + context.Emit(add ? OpCodes.Add : OpCodes.Sub); + + EmitSatQ(context, op.Size, true, signed); + } + else /* if (Op.Size == 3) */ + { + if (add) + { + EmitBinarySatQAdd(context, signed); + } + else /* if (Sub) */ + { + EmitBinarySatQSub(context, signed); + } + } + + EmitVectorInsertTmp(context, index, op.Size); + } + } + else if (accumulate) + { + for (int index = 0; index < elems; index++) + { + EmitVectorExtract(context, op.Rn, index, op.Size, !signed); + EmitVectorExtract(context, op.Rd, index, op.Size, signed); + + if (op.Size <= 2) + { + context.Emit(OpCodes.Add); + + EmitSatQ(context, op.Size, true, signed); + } + else /* if (Op.Size == 3) */ + { + EmitBinarySatQAccumulate(context, signed); + } + + EmitVectorInsertTmp(context, index, op.Size); + } + } + else + { + for (int index = 0; index < elems; index++) + { + EmitVectorExtract(context, op.Rn, index, op.Size, signed); + EmitVectorExtract(context, ((OpCodeSimdReg64)op).Rm, index, op.Size, signed); + + emit(); + + EmitSatQ(context, op.Size, true, signed); + + EmitVectorInsertTmp(context, index, op.Size); + } + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + [Flags] + public enum SaturatingNarrowFlags + { + Scalar = 1 << 0, + SignedSrc = 1 << 1, + SignedDst = 1 << 2, + + ScalarSxSx = Scalar | SignedSrc | SignedDst, + ScalarSxZx = Scalar | SignedSrc, + ScalarZxZx = Scalar, + + VectorSxSx = SignedSrc | SignedDst, + VectorSxZx = SignedSrc, + VectorZxZx = 0 + } + + public static void EmitSaturatingNarrowOp(ILEmitterCtx context, SaturatingNarrowFlags flags) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + bool scalar = (flags & SaturatingNarrowFlags.Scalar) != 0; + bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0; + bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0; + + int elems = !scalar ? 8 >> op.Size : 1; + + int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0; + + if (scalar) + { + EmitVectorZeroLowerTmp(context); + } + + if (part != 0) + { + context.EmitLdvec(op.Rd); + context.EmitStvectmp(); + } + + for (int index = 0; index < elems; index++) + { + EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc); + + EmitSatQ(context, op.Size, signedSrc, signedDst); + + EmitVectorInsertTmp(context, part + index, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (part == 0) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + // TSrc (16bit, 32bit, 64bit; signed, unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned). + public static void EmitSatQ( + ILEmitterCtx context, + int sizeDst, + bool signedSrc, + bool signedDst) + { + if (sizeDst > 2) + { + throw new ArgumentOutOfRangeException(nameof(sizeDst)); + } + + context.EmitLdc_I4(sizeDst); + context.EmitLdarg(TranslatedSub.StateArgIdx); + + if (signedSrc) + { + SoftFallback.EmitCall(context, signedDst + ? nameof(SoftFallback.SignedSrcSignedDstSatQ) + : nameof(SoftFallback.SignedSrcUnsignedDstSatQ)); + } + else + { + SoftFallback.EmitCall(context, signedDst + ? nameof(SoftFallback.UnsignedSrcSignedDstSatQ) + : nameof(SoftFallback.UnsignedSrcUnsignedDstSatQ)); + } + } + + // TSrc (64bit) == TDst (64bit); signed. + public static void EmitUnarySignedSatQAbsOrNeg(ILEmitterCtx context) + { + if (((OpCodeSimd64)context.CurrOp).Size < 3) + { + throw new InvalidOperationException(); + } + + context.EmitLdarg(TranslatedSub.StateArgIdx); + + SoftFallback.EmitCall(context, nameof(SoftFallback.UnarySignedSatQAbsOrNeg)); + } + + // TSrcs (64bit) == TDst (64bit); signed, unsigned. + public static void EmitBinarySatQAdd(ILEmitterCtx context, bool signed) + { + if (((OpCodeSimdReg64)context.CurrOp).Size < 3) + { + throw new InvalidOperationException(); + } + + context.EmitLdarg(TranslatedSub.StateArgIdx); + + SoftFallback.EmitCall(context, signed + ? nameof(SoftFallback.BinarySignedSatQAdd) + : nameof(SoftFallback.BinaryUnsignedSatQAdd)); + } + + // TSrcs (64bit) == TDst (64bit); signed, unsigned. + public static void EmitBinarySatQSub(ILEmitterCtx context, bool signed) + { + if (((OpCodeSimdReg64)context.CurrOp).Size < 3) + { + throw new InvalidOperationException(); + } + + context.EmitLdarg(TranslatedSub.StateArgIdx); + + SoftFallback.EmitCall(context, signed + ? nameof(SoftFallback.BinarySignedSatQSub) + : nameof(SoftFallback.BinaryUnsignedSatQSub)); + } + + // TSrcs (64bit) == TDst (64bit); signed, unsigned. + public static void EmitBinarySatQAccumulate(ILEmitterCtx context, bool signed) + { + if (((OpCodeSimd64)context.CurrOp).Size < 3) + { + throw new InvalidOperationException(); + } + + context.EmitLdarg(TranslatedSub.StateArgIdx); + + SoftFallback.EmitCall(context, signed + ? nameof(SoftFallback.BinarySignedSatQAcc) + : nameof(SoftFallback.BinaryUnsignedSatQAcc)); + } + + public static void EmitScalarSet(ILEmitterCtx context, int reg, int size) + { + EmitVectorZeroAll(context, reg); + EmitVectorInsert(context, reg, 0, size); + } + + public static void EmitScalarSetF(ILEmitterCtx context, int reg, int size) + { + if (Optimizations.UseSse41 && size == 0) + { + //If the type is float, we can perform insertion and + //zero the upper bits with a single instruction (INSERTPS); + context.EmitLdvec(reg); + + VectorHelper.EmitCall(context, nameof(VectorHelper.Sse41VectorInsertScalarSingle)); + + context.EmitStvec(reg); + } + else + { + EmitVectorZeroAll(context, reg); + EmitVectorInsertF(context, reg, 0, size); + } + } + + public static void EmitVectorExtractSx(ILEmitterCtx context, int reg, int index, int size) + { + EmitVectorExtract(context, reg, index, size, true); + } + + public static void EmitVectorExtractZx(ILEmitterCtx context, int reg, int index, int size) + { + EmitVectorExtract(context, reg, index, size, false); + } + + public static void EmitVectorExtract(ILEmitterCtx context, int reg, int index, int size, bool signed) + { + ThrowIfInvalid(index, size); + + context.EmitLdvec(reg); + context.EmitLdc_I4(index); + context.EmitLdc_I4(size); + + VectorHelper.EmitCall(context, signed + ? nameof(VectorHelper.VectorExtractIntSx) + : nameof(VectorHelper.VectorExtractIntZx)); + } + + public static void EmitVectorExtractF(ILEmitterCtx context, int reg, int index, int size) + { + ThrowIfInvalidF(index, size); + + context.EmitLdvec(reg); + context.EmitLdc_I4(index); + + if (size == 0) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorExtractSingle)); + } + else if (size == 1) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorExtractDouble)); + } + else + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + } + + public static void EmitVectorZeroAll(ILEmitterCtx context, int rd) + { + if (Optimizations.UseSse2) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + + context.EmitStvec(rd); + } + else + { + EmitVectorZeroLower(context, rd); + EmitVectorZeroUpper(context, rd); + } + } + + public static void EmitVectorZeroLower(ILEmitterCtx context, int rd) + { + EmitVectorInsert(context, rd, 0, 3, 0); + } + + public static void EmitVectorZeroLowerTmp(ILEmitterCtx context) + { + EmitVectorInsertTmp(context, 0, 3, 0); + } + + public static void EmitVectorZeroUpper(ILEmitterCtx context, int reg) + { + if (Optimizations.UseSse2) + { + //TODO: Use MoveScalar once it is fixed, as of the + //time of writing it just crashes the JIT. + EmitLdvecWithUnsignedCast(context, reg, 3); + + Type[] types = new Type[] { typeof(Vector128<ulong>), typeof(byte) }; + + //Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MoveScalar), Types)); + + context.EmitLdc_I4(8); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical128BitLane), types)); + + context.EmitLdc_I4(8); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), types)); + + EmitStvecWithUnsignedCast(context, reg, 3); + } + else + { + EmitVectorInsert(context, reg, 1, 3, 0); + } + } + + public static void EmitVectorZero32_128(ILEmitterCtx context, int reg) + { + context.EmitLdvec(reg); + + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorZero32_128)); + + context.EmitStvec(reg); + } + + public static void EmitVectorInsert(ILEmitterCtx context, int reg, int index, int size) + { + ThrowIfInvalid(index, size); + + context.EmitLdvec(reg); + context.EmitLdc_I4(index); + context.EmitLdc_I4(size); + + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertInt)); + + context.EmitStvec(reg); + } + + public static void EmitVectorInsertTmp(ILEmitterCtx context, int index, int size) + { + ThrowIfInvalid(index, size); + + context.EmitLdvectmp(); + context.EmitLdc_I4(index); + context.EmitLdc_I4(size); + + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertInt)); + + context.EmitStvectmp(); + } + + public static void EmitVectorInsert(ILEmitterCtx context, int reg, int index, int size, long value) + { + ThrowIfInvalid(index, size); + + context.EmitLdc_I8(value); + context.EmitLdvec(reg); + context.EmitLdc_I4(index); + context.EmitLdc_I4(size); + + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertInt)); + + context.EmitStvec(reg); + } + + public static void EmitVectorInsertTmp(ILEmitterCtx context, int index, int size, long value) + { + ThrowIfInvalid(index, size); + + context.EmitLdc_I8(value); + context.EmitLdvectmp(); + context.EmitLdc_I4(index); + context.EmitLdc_I4(size); + + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertInt)); + + context.EmitStvectmp(); + } + + public static void EmitVectorInsertF(ILEmitterCtx context, int reg, int index, int size) + { + ThrowIfInvalidF(index, size); + + context.EmitLdvec(reg); + context.EmitLdc_I4(index); + + if (size == 0) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertSingle)); + } + else if (size == 1) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertDouble)); + } + else + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + context.EmitStvec(reg); + } + + public static void EmitVectorInsertTmpF(ILEmitterCtx context, int index, int size) + { + ThrowIfInvalidF(index, size); + + context.EmitLdvectmp(); + context.EmitLdc_I4(index); + + if (size == 0) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertSingle)); + } + else if (size == 1) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertDouble)); + } + else + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + context.EmitStvectmp(); + } + + private static void ThrowIfInvalid(int index, int size) + { + if ((uint)size > 3u) + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + if ((uint)index >= 16u >> size) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + } + + private static void ThrowIfInvalidF(int index, int size) + { + if ((uint)size > 1u) + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + if ((uint)index >= 4u >> size) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + } + } +} diff --git a/ChocolArm64/Instructions/InstEmitSimdLogical.cs b/ChocolArm64/Instructions/InstEmitSimdLogical.cs new file mode 100644 index 00000000..f51568eb --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitSimdLogical.cs @@ -0,0 +1,311 @@ +using ChocolArm64.Decoders; +using ChocolArm64.State; +using ChocolArm64.Translation; +using System; +using System.Reflection.Emit; +using System.Runtime.Intrinsics.X86; + +using static ChocolArm64.Instructions.InstEmitSimdHelper; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + public static void And_V(ILEmitterCtx context) + { + if (Optimizations.UseSse2) + { + EmitSse2Op(context, nameof(Sse2.And)); + } + else + { + EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.And)); + } + } + + public static void Bic_V(ILEmitterCtx context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + Type[] types = new Type[] + { + VectorUIntTypesPerSizeLog2[op.Size], + VectorUIntTypesPerSizeLog2[op.Size] + }; + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), types)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpZx(context, () => + { + context.Emit(OpCodes.Not); + context.Emit(OpCodes.And); + }); + } + } + + public static void Bic_Vi(ILEmitterCtx context) + { + EmitVectorImmBinaryOp(context, () => + { + context.Emit(OpCodes.Not); + context.Emit(OpCodes.And); + }); + } + + public static void Bif_V(ILEmitterCtx context) + { + EmitBitBif(context, true); + } + + public static void Bit_V(ILEmitterCtx context) + { + EmitBitBif(context, false); + } + + private static void EmitBitBif(ILEmitterCtx context, bool notRm) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse2) + { + Type[] types = new Type[] + { + VectorUIntTypesPerSizeLog2[op.Size], + VectorUIntTypesPerSizeLog2[op.Size] + }; + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + EmitLdvecWithUnsignedCast(context, op.Rd, op.Size); + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), types)); + + string name = notRm ? nameof(Sse2.AndNot) : nameof(Sse2.And); + + context.EmitCall(typeof(Sse2).GetMethod(name, types)); + + EmitLdvecWithUnsignedCast(context, op.Rd, op.Size); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), types)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rd, index, op.Size); + EmitVectorExtractZx(context, op.Rn, index, op.Size); + + context.Emit(OpCodes.Xor); + + EmitVectorExtractZx(context, op.Rm, index, op.Size); + + if (notRm) + { + context.Emit(OpCodes.Not); + } + + context.Emit(OpCodes.And); + + EmitVectorExtractZx(context, op.Rd, index, op.Size); + + context.Emit(OpCodes.Xor); + + EmitVectorInsert(context, op.Rd, index, op.Size); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + } + + public static void Bsl_V(ILEmitterCtx context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] types = new Type[] + { + VectorUIntTypesPerSizeLog2[op.Size], + VectorUIntTypesPerSizeLog2[op.Size] + }; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), types)); + + EmitLdvecWithUnsignedCast(context, op.Rd, op.Size); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), types)); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), types)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorTernaryOpZx(context, () => + { + context.EmitSttmp(); + context.EmitLdtmp(); + + context.Emit(OpCodes.Xor); + context.Emit(OpCodes.And); + + context.EmitLdtmp(); + + context.Emit(OpCodes.Xor); + }); + } + } + + public static void Eor_V(ILEmitterCtx context) + { + if (Optimizations.UseSse2) + { + EmitSse2Op(context, nameof(Sse2.Xor)); + } + else + { + EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Xor)); + } + } + + public static void Not_V(ILEmitterCtx context) + { + EmitVectorUnaryOpZx(context, () => context.Emit(OpCodes.Not)); + } + + public static void Orn_V(ILEmitterCtx context) + { + EmitVectorBinaryOpZx(context, () => + { + context.Emit(OpCodes.Not); + context.Emit(OpCodes.Or); + }); + } + + public static void Orr_V(ILEmitterCtx context) + { + if (Optimizations.UseSse2) + { + EmitSse2Op(context, nameof(Sse2.Or)); + } + else + { + EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Or)); + } + } + + public static void Orr_Vi(ILEmitterCtx context) + { + EmitVectorImmBinaryOp(context, () => context.Emit(OpCodes.Or)); + } + + public static void Rbit_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, index, 0); + + context.Emit(OpCodes.Conv_U4); + + SoftFallback.EmitCall(context, nameof(SoftFallback.ReverseBits8)); + + context.Emit(OpCodes.Conv_U8); + + EmitVectorInsert(context, op.Rd, index, 0); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void Rev16_V(ILEmitterCtx context) + { + EmitRev_V(context, containerSize: 1); + } + + public static void Rev32_V(ILEmitterCtx context) + { + EmitRev_V(context, containerSize: 2); + } + + public static void Rev64_V(ILEmitterCtx context) + { + EmitRev_V(context, containerSize: 3); + } + + private static void EmitRev_V(ILEmitterCtx context, int containerSize) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + if (op.Size >= containerSize) + { + throw new InvalidOperationException(); + } + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + int containerMask = (1 << (containerSize - op.Size)) - 1; + + for (int index = 0; index < elems; index++) + { + int revIndex = index ^ containerMask; + + EmitVectorExtractZx(context, op.Rn, revIndex, op.Size); + + EmitVectorInsertTmp(context, index, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + } +} diff --git a/ChocolArm64/Instructions/InstEmitSimdMemory.cs b/ChocolArm64/Instructions/InstEmitSimdMemory.cs new file mode 100644 index 00000000..eb053257 --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitSimdMemory.cs @@ -0,0 +1,185 @@ +using ChocolArm64.Decoders; +using ChocolArm64.State; +using ChocolArm64.Translation; +using System; +using System.Reflection.Emit; + +using static ChocolArm64.Instructions.InstEmitMemoryHelper; +using static ChocolArm64.Instructions.InstEmitSimdHelper; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + public static void Ld__Vms(ILEmitterCtx context) + { + EmitSimdMemMs(context, isLoad: true); + } + + public static void Ld__Vss(ILEmitterCtx context) + { + EmitSimdMemSs(context, isLoad: true); + } + + public static void St__Vms(ILEmitterCtx context) + { + EmitSimdMemMs(context, isLoad: false); + } + + public static void St__Vss(ILEmitterCtx context) + { + EmitSimdMemSs(context, isLoad: false); + } + + private static void EmitSimdMemMs(ILEmitterCtx context, bool isLoad) + { + OpCodeSimdMemMs64 op = (OpCodeSimdMemMs64)context.CurrOp; + + int offset = 0; + + for (int rep = 0; rep < op.Reps; rep++) + for (int elem = 0; elem < op.Elems; elem++) + for (int sElem = 0; sElem < op.SElems; sElem++) + { + int rtt = (op.Rt + rep + sElem) & 0x1f; + + if (isLoad) + { + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + context.EmitLdint(op.Rn); + context.EmitLdc_I8(offset); + + context.Emit(OpCodes.Add); + + EmitReadZxCall(context, op.Size); + + EmitVectorInsert(context, rtt, elem, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64 && elem == op.Elems - 1) + { + EmitVectorZeroUpper(context, rtt); + } + } + else + { + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + context.EmitLdint(op.Rn); + context.EmitLdc_I8(offset); + + context.Emit(OpCodes.Add); + + EmitVectorExtractZx(context, rtt, elem, op.Size); + + EmitWriteCall(context, op.Size); + } + + offset += 1 << op.Size; + } + + if (op.WBack) + { + EmitSimdMemWBack(context, offset); + } + } + + private static void EmitSimdMemSs(ILEmitterCtx context, bool isLoad) + { + OpCodeSimdMemSs64 op = (OpCodeSimdMemSs64)context.CurrOp; + + int offset = 0; + + void EmitMemAddress() + { + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + context.EmitLdint(op.Rn); + context.EmitLdc_I8(offset); + + context.Emit(OpCodes.Add); + } + + if (op.Replicate) + { + //Only loads uses the replicate mode. + if (!isLoad) + { + throw new InvalidOperationException(); + } + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + for (int sElem = 0; sElem < op.SElems; sElem++) + { + int rt = (op.Rt + sElem) & 0x1f; + + for (int index = 0; index < elems; index++) + { + EmitMemAddress(); + + EmitReadZxCall(context, op.Size); + + EmitVectorInsert(context, rt, index, op.Size); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, rt); + } + + offset += 1 << op.Size; + } + } + else + { + for (int sElem = 0; sElem < op.SElems; sElem++) + { + int rt = (op.Rt + sElem) & 0x1f; + + if (isLoad) + { + EmitMemAddress(); + + EmitReadZxCall(context, op.Size); + + EmitVectorInsert(context, rt, op.Index, op.Size); + } + else + { + EmitMemAddress(); + + EmitVectorExtractZx(context, rt, op.Index, op.Size); + + EmitWriteCall(context, op.Size); + } + + offset += 1 << op.Size; + } + } + + if (op.WBack) + { + EmitSimdMemWBack(context, offset); + } + } + + private static void EmitSimdMemWBack(ILEmitterCtx context, int offset) + { + OpCodeMemReg64 op = (OpCodeMemReg64)context.CurrOp; + + context.EmitLdint(op.Rn); + + if (op.Rm != CpuThreadState.ZrIndex) + { + context.EmitLdint(op.Rm); + } + else + { + context.EmitLdc_I8(offset); + } + + context.Emit(OpCodes.Add); + + context.EmitStint(op.Rn); + } + } +}
\ No newline at end of file diff --git a/ChocolArm64/Instructions/InstEmitSimdMove.cs b/ChocolArm64/Instructions/InstEmitSimdMove.cs new file mode 100644 index 00000000..3f539b8a --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitSimdMove.cs @@ -0,0 +1,562 @@ +using ChocolArm64.Decoders; +using ChocolArm64.State; +using ChocolArm64.Translation; +using System; +using System.Reflection.Emit; +using System.Runtime.Intrinsics.X86; + +using static ChocolArm64.Instructions.InstEmitSimdHelper; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + public static void Dup_Gp(ILEmitterCtx context) + { + OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp; + + if (Optimizations.UseSse2) + { + context.EmitLdintzr(op.Rn); + + switch (op.Size) + { + case 0: context.Emit(OpCodes.Conv_U1); break; + case 1: context.Emit(OpCodes.Conv_U2); break; + case 2: context.Emit(OpCodes.Conv_U4); break; + } + + Type[] types = new Type[] { UIntTypesPerSizeLog2[op.Size] }; + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), types)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + for (int index = 0; index < elems; index++) + { + context.EmitLdintzr(op.Rn); + + EmitVectorInsert(context, op.Rd, index, op.Size); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + } + + public static void Dup_S(ILEmitterCtx context) + { + OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp; + + EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); + + EmitScalarSet(context, op.Rd, op.Size); + } + + public static void Dup_V(ILEmitterCtx context) + { + OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp; + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); + + EmitVectorInsert(context, op.Rd, index, op.Size); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void Ext_V(ILEmitterCtx context) + { + OpCodeSimdExt64 op = (OpCodeSimdExt64)context.CurrOp; + + context.EmitLdvec(op.Rd); + context.EmitStvectmp(); + + int bytes = op.GetBitsCount() >> 3; + + int position = op.Imm4; + + for (int index = 0; index < bytes; index++) + { + int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm; + + if (position == bytes) + { + position = 0; + } + + EmitVectorExtractZx(context, reg, position++, 0); + EmitVectorInsertTmp(context, index, 0); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void Fcsel_S(ILEmitterCtx context) + { + OpCodeSimdFcond64 op = (OpCodeSimdFcond64)context.CurrOp; + + ILLabel lblTrue = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + context.EmitCondBranch(lblTrue, op.Cond); + + EmitVectorExtractF(context, op.Rm, 0, op.Size); + + context.Emit(OpCodes.Br_S, lblEnd); + + context.MarkLabel(lblTrue); + + EmitVectorExtractF(context, op.Rn, 0, op.Size); + + context.MarkLabel(lblEnd); + + EmitScalarSetF(context, op.Rd, op.Size); + } + + public static void Fmov_Ftoi(ILEmitterCtx context) + { + OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp; + + EmitVectorExtractZx(context, op.Rn, 0, 3); + + EmitIntZeroUpperIfNeeded(context); + + context.EmitStintzr(op.Rd); + } + + public static void Fmov_Ftoi1(ILEmitterCtx context) + { + OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp; + + EmitVectorExtractZx(context, op.Rn, 1, 3); + + EmitIntZeroUpperIfNeeded(context); + + context.EmitStintzr(op.Rd); + } + + public static void Fmov_Itof(ILEmitterCtx context) + { + OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + + EmitIntZeroUpperIfNeeded(context); + + EmitScalarSet(context, op.Rd, 3); + } + + public static void Fmov_Itof1(ILEmitterCtx context) + { + OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + + EmitIntZeroUpperIfNeeded(context); + + EmitVectorInsert(context, op.Rd, 1, 3); + } + + public static void Fmov_S(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + EmitVectorExtractF(context, op.Rn, 0, op.Size); + + EmitScalarSetF(context, op.Rd, op.Size); + } + + public static void Fmov_Si(ILEmitterCtx context) + { + OpCodeSimdFmov64 op = (OpCodeSimdFmov64)context.CurrOp; + + context.EmitLdc_I8(op.Imm); + + EmitScalarSet(context, op.Rd, op.Size + 2); + } + + public static void Fmov_V(ILEmitterCtx context) + { + OpCodeSimdImm64 op = (OpCodeSimdImm64)context.CurrOp; + + int elems = op.RegisterSize == RegisterSize.Simd128 ? 4 : 2; + + for (int index = 0; index < (elems >> op.Size); index++) + { + context.EmitLdc_I8(op.Imm); + + EmitVectorInsert(context, op.Rd, index, op.Size + 2); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void Ins_Gp(ILEmitterCtx context) + { + OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + + EmitVectorInsert(context, op.Rd, op.DstIndex, op.Size); + } + + public static void Ins_V(ILEmitterCtx context) + { + OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp; + + EmitVectorExtractZx(context, op.Rn, op.SrcIndex, op.Size); + + EmitVectorInsert(context, op.Rd, op.DstIndex, op.Size); + } + + public static void Movi_V(ILEmitterCtx context) + { + EmitVectorImmUnaryOp(context, () => { }); + } + + public static void Mvni_V(ILEmitterCtx context) + { + EmitVectorImmUnaryOp(context, () => context.Emit(OpCodes.Not)); + } + + public static void Smov_S(ILEmitterCtx context) + { + OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp; + + EmitVectorExtractSx(context, op.Rn, op.DstIndex, op.Size); + + EmitIntZeroUpperIfNeeded(context); + + context.EmitStintzr(op.Rd); + } + + public static void Tbl_V(ILEmitterCtx context) + { + OpCodeSimdTbl64 op = (OpCodeSimdTbl64)context.CurrOp; + + context.EmitLdvec(op.Rm); + + for (int index = 0; index < op.Size; index++) + { + context.EmitLdvec((op.Rn + index) & 0x1f); + } + + switch (op.Size) + { + case 1: VectorHelper.EmitCall(context, + nameof(VectorHelper.Tbl1_V64), + nameof(VectorHelper.Tbl1_V128)); break; + + case 2: VectorHelper.EmitCall(context, + nameof(VectorHelper.Tbl2_V64), + nameof(VectorHelper.Tbl2_V128)); break; + + case 3: VectorHelper.EmitCall(context, + nameof(VectorHelper.Tbl3_V64), + nameof(VectorHelper.Tbl3_V128)); break; + + case 4: VectorHelper.EmitCall(context, + nameof(VectorHelper.Tbl4_V64), + nameof(VectorHelper.Tbl4_V128)); break; + + default: throw new InvalidOperationException(); + } + + context.EmitStvec(op.Rd); + } + + public static void Trn1_V(ILEmitterCtx context) + { + EmitVectorTranspose(context, part: 0); + } + + public static void Trn2_V(ILEmitterCtx context) + { + EmitVectorTranspose(context, part: 1); + } + + public static void Umov_S(ILEmitterCtx context) + { + OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp; + + EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); + + context.EmitStintzr(op.Rd); + } + + public static void Uzp1_V(ILEmitterCtx context) + { + EmitVectorUnzip(context, part: 0); + } + + public static void Uzp2_V(ILEmitterCtx context) + { + EmitVectorUnzip(context, part: 1); + } + + public static void Xtn_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + if (Optimizations.UseSse41 && op.Size < 2) + { + void EmitZeroVector() + { + switch (op.Size) + { + case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt16Zero)); break; + case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt32Zero)); break; + } + } + + //For XTN, first operand is source, second operand is 0. + //For XTN2, first operand is 0, second operand is source. + if (part != 0) + { + EmitZeroVector(); + } + + EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1); + + //Set mask to discard the upper half of the wide elements. + switch (op.Size) + { + case 0: context.EmitLdc_I4(0x00ff); break; + case 1: context.EmitLdc_I4(0x0000ffff); break; + } + + Type wideType = IntTypesPerSizeLog2[op.Size + 1]; + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), new Type[] { wideType })); + + wideType = VectorIntTypesPerSizeLog2[op.Size + 1]; + + Type[] wideTypes = new Type[] { wideType, wideType }; + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), wideTypes)); + + if (part == 0) + { + EmitZeroVector(); + } + + //Pack values with signed saturation, the signed saturation shouldn't + //saturate anything since the upper bits were masked off. + Type sseType = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + + context.EmitCall(sseType.GetMethod(nameof(Sse2.PackUnsignedSaturate), wideTypes)); + + if (part != 0) + { + //For XTN2, we additionally need to discard the upper bits + //of the target register and OR the result with it. + EmitVectorZeroUpper(context, op.Rd); + + EmitLdvecWithUnsignedCast(context, op.Rd, op.Size); + + Type narrowType = VectorUIntTypesPerSizeLog2[op.Size]; + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), new Type[] { narrowType, narrowType })); + } + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + } + else + { + if (part != 0) + { + context.EmitLdvec(op.Rd); + context.EmitStvectmp(); + } + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); + + EmitVectorInsertTmp(context, part + index, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (part == 0) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + } + + public static void Zip1_V(ILEmitterCtx context) + { + EmitVectorZip(context, part: 0); + } + + public static void Zip2_V(ILEmitterCtx context) + { + EmitVectorZip(context, part: 1); + } + + private static void EmitIntZeroUpperIfNeeded(ILEmitterCtx context) + { + if (context.CurrOp.RegisterSize == RegisterSize.Int32 || + context.CurrOp.RegisterSize == RegisterSize.Simd64) + { + context.Emit(OpCodes.Conv_U4); + context.Emit(OpCodes.Conv_U8); + } + } + + private static void EmitVectorTranspose(ILEmitterCtx context, int part) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int words = op.GetBitsCount() >> 4; + int pairs = words >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int idx = index << 1; + + EmitVectorExtractZx(context, op.Rn, idx + part, op.Size); + EmitVectorExtractZx(context, op.Rm, idx + part, op.Size); + + EmitVectorInsertTmp(context, idx + 1, op.Size); + EmitVectorInsertTmp(context, idx, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + private static void EmitVectorUnzip(ILEmitterCtx context, int part) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int words = op.GetBitsCount() >> 4; + int pairs = words >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int idx = index << 1; + + EmitVectorExtractZx(context, op.Rn, idx + part, op.Size); + EmitVectorExtractZx(context, op.Rm, idx + part, op.Size); + + EmitVectorInsertTmp(context, pairs + index, op.Size); + EmitVectorInsertTmp(context, index, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + private static void EmitVectorZip(ILEmitterCtx context, int part) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse2) + { + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + Type[] types = new Type[] + { + VectorUIntTypesPerSizeLog2[op.Size], + VectorUIntTypesPerSizeLog2[op.Size] + }; + + string name = part == 0 || (part != 0 && op.RegisterSize == RegisterSize.Simd64) + ? nameof(Sse2.UnpackLow) + : nameof(Sse2.UnpackHigh); + + context.EmitCall(typeof(Sse2).GetMethod(name, types)); + + if (op.RegisterSize == RegisterSize.Simd64 && part != 0) + { + context.EmitLdc_I4(8); + + Type[] shTypes = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), shTypes)); + } + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64 && part == 0) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + int words = op.GetBitsCount() >> 4; + int pairs = words >> op.Size; + + int Base = part != 0 ? pairs : 0; + + for (int index = 0; index < pairs; index++) + { + int idx = index << 1; + + EmitVectorExtractZx(context, op.Rn, Base + index, op.Size); + EmitVectorExtractZx(context, op.Rm, Base + index, op.Size); + + EmitVectorInsertTmp(context, idx + 1, op.Size); + EmitVectorInsertTmp(context, idx, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + } + } +} diff --git a/ChocolArm64/Instructions/InstEmitSimdShift.cs b/ChocolArm64/Instructions/InstEmitSimdShift.cs new file mode 100644 index 00000000..3c24ff23 --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitSimdShift.cs @@ -0,0 +1,865 @@ +// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h + +using ChocolArm64.Decoders; +using ChocolArm64.State; +using ChocolArm64.Translation; +using System; +using System.Reflection.Emit; +using System.Runtime.Intrinsics.X86; + +using static ChocolArm64.Instructions.InstEmitSimdHelper; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + public static void Rshrn_V(ILEmitterCtx context) + { + EmitVectorShrImmNarrowOpZx(context, round: true); + } + + public static void Shl_S(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + EmitScalarUnaryOpZx(context, () => + { + context.EmitLdc_I4(GetImmShl(op)); + + context.Emit(OpCodes.Shl); + }); + } + + public static void Shl_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Type[] typesSll = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(GetImmShl(op)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorUnaryOpZx(context, () => + { + context.EmitLdc_I4(GetImmShl(op)); + + context.Emit(OpCodes.Shl); + }); + } + } + + public static void Shll_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int shift = 8 << op.Size; + + EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), shift); + } + + public static void Shrn_V(ILEmitterCtx context) + { + EmitVectorShrImmNarrowOpZx(context, round: false); + } + + public static void Sli_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + int shift = GetImmShl(op); + + ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, index, op.Size); + + context.EmitLdc_I4(shift); + + context.Emit(OpCodes.Shl); + + EmitVectorExtractZx(context, op.Rd, index, op.Size); + + context.EmitLdc_I8((long)mask); + + context.Emit(OpCodes.And); + context.Emit(OpCodes.Or); + + EmitVectorInsert(context, op.Rd, index, op.Size); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void Sqrshrn_S(ILEmitterCtx context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + } + + public static void Sqrshrn_V(ILEmitterCtx context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); + } + + public static void Sqrshrun_S(ILEmitterCtx context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + } + + public static void Sqrshrun_V(ILEmitterCtx context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + } + + public static void Sqshrn_S(ILEmitterCtx context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + } + + public static void Sqshrn_V(ILEmitterCtx context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); + } + + public static void Sqshrun_S(ILEmitterCtx context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + } + + public static void Sqshrun_V(ILEmitterCtx context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + } + + public static void Srshr_S(ILEmitterCtx context) + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Round); + } + + public static void Srshr_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0 + && op.Size < 3) + { + Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; + + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp(); + + context.EmitLdc_I4(eSize - shift); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs)); + + context.EmitLdc_I4(eSize - 1); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); + + context.EmitLdvectmp(); + + context.EmitLdc_I4(shift); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorShrImmOpSx(context, ShrImmFlags.Round); + } + } + + public static void Srsra_S(ILEmitterCtx context) + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + + public static void Srsra_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0 + && op.Size < 3) + { + Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; + + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + EmitLdvecWithSignedCast(context, op.Rd, op.Size); + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp(); + + context.EmitLdc_I4(eSize - shift); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs)); + + context.EmitLdc_I4(eSize - 1); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); + + context.EmitLdvectmp(); + + context.EmitLdc_I4(shift); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + } + + public static void Sshl_V(ILEmitterCtx context) + { + EmitVectorShl(context, signed: true); + } + + public static void Sshll_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + EmitVectorShImmWidenBinarySx(context, () => context.Emit(OpCodes.Shl), GetImmShl(op)); + } + + public static void Sshr_S(ILEmitterCtx context) + { + EmitShrImmOp(context, ShrImmFlags.ScalarSx); + } + + public static void Sshr_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0 + && op.Size < 3) + { + Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(GetImmShr(op)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitShrImmOp(context, ShrImmFlags.VectorSx); + } + } + + public static void Ssra_S(ILEmitterCtx context) + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate); + } + + public static void Ssra_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0 + && op.Size < 3) + { + Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; + + EmitLdvecWithSignedCast(context, op.Rd, op.Size); + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(GetImmShr(op)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorShrImmOpSx(context, ShrImmFlags.Accumulate); + } + } + + public static void Uqrshrn_S(ILEmitterCtx context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + } + + public static void Uqrshrn_V(ILEmitterCtx context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); + } + + public static void Uqshrn_S(ILEmitterCtx context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + } + + public static void Uqshrn_V(ILEmitterCtx context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); + } + + public static void Urshr_S(ILEmitterCtx context) + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Round); + } + + public static void Urshr_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Type[] typesShs = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp(); + + context.EmitLdc_I4(eSize - shift); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs)); + + context.EmitLdc_I4(eSize - 1); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); + + context.EmitLdvectmp(); + + context.EmitLdc_I4(shift); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorShrImmOpZx(context, ShrImmFlags.Round); + } + } + + public static void Ursra_S(ILEmitterCtx context) + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + + public static void Ursra_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Type[] typesShs = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + EmitLdvecWithUnsignedCast(context, op.Rd, op.Size); + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp(); + + context.EmitLdc_I4(eSize - shift); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs)); + + context.EmitLdc_I4(eSize - 1); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); + + context.EmitLdvectmp(); + + context.EmitLdc_I4(shift); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + } + + public static void Ushl_V(ILEmitterCtx context) + { + EmitVectorShl(context, signed: false); + } + + public static void Ushll_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), GetImmShl(op)); + } + + public static void Ushr_S(ILEmitterCtx context) + { + EmitShrImmOp(context, ShrImmFlags.ScalarZx); + } + + public static void Ushr_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(GetImmShr(op)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitShrImmOp(context, ShrImmFlags.VectorZx); + } + } + + public static void Usra_S(ILEmitterCtx context) + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate); + } + + public static void Usra_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + + EmitLdvecWithUnsignedCast(context, op.Rd, op.Size); + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(GetImmShr(op)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate); + } + } + + private static void EmitVectorShl(ILEmitterCtx context, bool signed) + { + //This instruction shifts the value on vector A by the number of bits + //specified on the signed, lower 8 bits of vector B. If the shift value + //is greater or equal to the data size of each lane, then the result is zero. + //Additionally, negative shifts produces right shifts by the negated shift value. + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int maxShift = 8 << op.Size; + + Action emit = () => + { + ILLabel lblShl = new ILLabel(); + ILLabel lblZero = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + void EmitShift(OpCode ilOp) + { + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(maxShift); + + context.Emit(OpCodes.Bge_S, lblZero); + context.Emit(ilOp); + context.Emit(OpCodes.Br_S, lblEnd); + } + + context.Emit(OpCodes.Conv_I1); + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(0); + + context.Emit(OpCodes.Bge_S, lblShl); + context.Emit(OpCodes.Neg); + + EmitShift(signed + ? OpCodes.Shr + : OpCodes.Shr_Un); + + context.MarkLabel(lblShl); + + EmitShift(OpCodes.Shl); + + context.MarkLabel(lblZero); + + context.Emit(OpCodes.Pop); + context.Emit(OpCodes.Pop); + + context.EmitLdc_I8(0); + + context.MarkLabel(lblEnd); + }; + + if (signed) + { + EmitVectorBinaryOpSx(context, emit); + } + else + { + EmitVectorBinaryOpZx(context, emit); + } + } + + [Flags] + private enum ShrImmFlags + { + Scalar = 1 << 0, + Signed = 1 << 1, + + Round = 1 << 2, + Accumulate = 1 << 3, + + ScalarSx = Scalar | Signed, + ScalarZx = Scalar, + + VectorSx = Signed, + VectorZx = 0 + } + + private static void EmitScalarShrImmOpSx(ILEmitterCtx context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.ScalarSx | flags); + } + + private static void EmitScalarShrImmOpZx(ILEmitterCtx context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.ScalarZx | flags); + } + + private static void EmitVectorShrImmOpSx(ILEmitterCtx context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.VectorSx | flags); + } + + private static void EmitVectorShrImmOpZx(ILEmitterCtx context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.VectorZx | flags); + } + + private static void EmitShrImmOp(ILEmitterCtx context, ShrImmFlags flags) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + bool scalar = (flags & ShrImmFlags.Scalar) != 0; + bool signed = (flags & ShrImmFlags.Signed) != 0; + bool round = (flags & ShrImmFlags.Round) != 0; + bool accumulate = (flags & ShrImmFlags.Accumulate) != 0; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + int bytes = op.GetBitsCount() >> 3; + int elems = !scalar ? bytes >> op.Size : 1; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtract(context, op.Rn, index, op.Size, signed); + + if (op.Size <= 2) + { + if (round) + { + context.EmitLdc_I8(roundConst); + + context.Emit(OpCodes.Add); + } + + context.EmitLdc_I4(shift); + + context.Emit(signed ? OpCodes.Shr : OpCodes.Shr_Un); + } + else /* if (Op.Size == 3) */ + { + EmitShrImm_64(context, signed, round ? roundConst : 0L, shift); + } + + if (accumulate) + { + EmitVectorExtract(context, op.Rd, index, op.Size, signed); + + context.Emit(OpCodes.Add); + } + + EmitVectorInsertTmp(context, index, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + private static void EmitVectorShrImmNarrowOpZx(ILEmitterCtx context, bool round) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + if (part != 0) + { + context.EmitLdvec(op.Rd); + context.EmitStvectmp(); + } + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); + + if (round) + { + context.EmitLdc_I8(roundConst); + + context.Emit(OpCodes.Add); + } + + context.EmitLdc_I4(shift); + + context.Emit(OpCodes.Shr_Un); + + EmitVectorInsertTmp(context, part + index, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (part == 0) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + [Flags] + private enum ShrImmSaturatingNarrowFlags + { + Scalar = 1 << 0, + SignedSrc = 1 << 1, + SignedDst = 1 << 2, + + Round = 1 << 3, + + ScalarSxSx = Scalar | SignedSrc | SignedDst, + ScalarSxZx = Scalar | SignedSrc, + ScalarZxZx = Scalar, + + VectorSxSx = SignedSrc | SignedDst, + VectorSxZx = SignedSrc, + VectorZxZx = 0 + } + + private static void EmitRoundShrImmSaturatingNarrowOp(ILEmitterCtx context, ShrImmSaturatingNarrowFlags flags) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags); + } + + private static void EmitShrImmSaturatingNarrowOp(ILEmitterCtx context, ShrImmSaturatingNarrowFlags flags) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0; + bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0; + bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0; + bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + int elems = !scalar ? 8 >> op.Size : 1; + + int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0; + + if (scalar) + { + EmitVectorZeroLowerTmp(context); + } + + if (part != 0) + { + context.EmitLdvec(op.Rd); + context.EmitStvectmp(); + } + + for (int index = 0; index < elems; index++) + { + EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc); + + if (op.Size <= 1 || !round) + { + if (round) + { + context.EmitLdc_I8(roundConst); + + context.Emit(OpCodes.Add); + } + + context.EmitLdc_I4(shift); + + context.Emit(signedSrc ? OpCodes.Shr : OpCodes.Shr_Un); + } + else /* if (Op.Size == 2 && Round) */ + { + EmitShrImm_64(context, signedSrc, roundConst, shift); // Shift <= 32 + } + + EmitSatQ(context, op.Size, signedSrc, signedDst); + + EmitVectorInsertTmp(context, part + index, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (part == 0) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + // Dst_64 = (Int(Src_64, Signed) + RoundConst) >> Shift; + private static void EmitShrImm_64( + ILEmitterCtx context, + bool signed, + long roundConst, + int shift) + { + context.EmitLdc_I8(roundConst); + context.EmitLdc_I4(shift); + + SoftFallback.EmitCall(context, signed + ? nameof(SoftFallback.SignedShrImm_64) + : nameof(SoftFallback.UnsignedShrImm_64)); + } + + private static void EmitVectorShImmWidenBinarySx(ILEmitterCtx context, Action emit, int imm) + { + EmitVectorShImmWidenBinaryOp(context, emit, imm, true); + } + + private static void EmitVectorShImmWidenBinaryZx(ILEmitterCtx context, Action emit, int imm) + { + EmitVectorShImmWidenBinaryOp(context, emit, imm, false); + } + + private static void EmitVectorShImmWidenBinaryOp(ILEmitterCtx context, Action emit, int imm, bool signed) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + + context.EmitLdc_I4(imm); + + emit(); + + EmitVectorInsertTmp(context, index, op.Size + 1); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + } + } +} diff --git a/ChocolArm64/Instructions/InstEmitSystem.cs b/ChocolArm64/Instructions/InstEmitSystem.cs new file mode 100644 index 00000000..0e61d5bd --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitSystem.cs @@ -0,0 +1,138 @@ +using ChocolArm64.Decoders; +using ChocolArm64.State; +using ChocolArm64.Translation; +using System; +using System.Reflection; +using System.Reflection.Emit; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + public static void Hint(ILEmitterCtx context) + { + //Execute as no-op. + } + + public static void Isb(ILEmitterCtx context) + { + //Execute as no-op. + } + + public static void Mrs(ILEmitterCtx context) + { + OpCodeSystem64 op = (OpCodeSystem64)context.CurrOp; + + context.EmitLdarg(TranslatedSub.StateArgIdx); + + string propName; + + switch (GetPackedId(op)) + { + case 0b11_011_0000_0000_001: propName = nameof(CpuThreadState.CtrEl0); break; + case 0b11_011_0000_0000_111: propName = nameof(CpuThreadState.DczidEl0); break; + case 0b11_011_0100_0100_000: propName = nameof(CpuThreadState.Fpcr); break; + case 0b11_011_0100_0100_001: propName = nameof(CpuThreadState.Fpsr); break; + case 0b11_011_1101_0000_010: propName = nameof(CpuThreadState.TpidrEl0); break; + case 0b11_011_1101_0000_011: propName = nameof(CpuThreadState.Tpidr); break; + case 0b11_011_1110_0000_000: propName = nameof(CpuThreadState.CntfrqEl0); break; + case 0b11_011_1110_0000_001: propName = nameof(CpuThreadState.CntpctEl0); break; + + default: throw new NotImplementedException($"Unknown MRS at {op.Position:x16}"); + } + + context.EmitCallPropGet(typeof(CpuThreadState), propName); + + PropertyInfo propInfo = typeof(CpuThreadState).GetProperty(propName); + + if (propInfo.PropertyType != typeof(long) && + propInfo.PropertyType != typeof(ulong)) + { + context.Emit(OpCodes.Conv_U8); + } + + context.EmitStintzr(op.Rt); + } + + public static void Msr(ILEmitterCtx context) + { + OpCodeSystem64 op = (OpCodeSystem64)context.CurrOp; + + context.EmitLdarg(TranslatedSub.StateArgIdx); + context.EmitLdintzr(op.Rt); + + string propName; + + switch (GetPackedId(op)) + { + case 0b11_011_0100_0100_000: propName = nameof(CpuThreadState.Fpcr); break; + case 0b11_011_0100_0100_001: propName = nameof(CpuThreadState.Fpsr); break; + case 0b11_011_1101_0000_010: propName = nameof(CpuThreadState.TpidrEl0); break; + + default: throw new NotImplementedException($"Unknown MSR at {op.Position:x16}"); + } + + PropertyInfo propInfo = typeof(CpuThreadState).GetProperty(propName); + + if (propInfo.PropertyType != typeof(long) && + propInfo.PropertyType != typeof(ulong)) + { + context.Emit(OpCodes.Conv_U4); + } + + context.EmitCallPropSet(typeof(CpuThreadState), propName); + } + + public static void Nop(ILEmitterCtx context) + { + //Do nothing. + } + + public static void Sys(ILEmitterCtx context) + { + //This instruction is used to do some operations on the CPU like cache invalidation, + //address translation and the like. + //We treat it as no-op here since we don't have any cache being emulated anyway. + OpCodeSystem64 op = (OpCodeSystem64)context.CurrOp; + + switch (GetPackedId(op)) + { + case 0b11_011_0111_0100_001: + { + //DC ZVA + for (int offs = 0; offs < (4 << CpuThreadState.DczSizeLog2); offs += 8) + { + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + context.EmitLdintzr(op.Rt); + context.EmitLdc_I(offs); + + context.Emit(OpCodes.Add); + + context.EmitLdc_I8(0); + + InstEmitMemoryHelper.EmitWriteCall(context, 3); + } + + break; + } + + //No-op + case 0b11_011_0111_1110_001: //DC CIVAC + break; + } + } + + private static int GetPackedId(OpCodeSystem64 op) + { + int id; + + id = op.Op2 << 0; + id |= op.CRm << 3; + id |= op.CRn << 7; + id |= op.Op1 << 11; + id |= op.Op0 << 14; + + return id; + } + } +} diff --git a/ChocolArm64/Instructions/InstEmitter.cs b/ChocolArm64/Instructions/InstEmitter.cs new file mode 100644 index 00000000..db6e8604 --- /dev/null +++ b/ChocolArm64/Instructions/InstEmitter.cs @@ -0,0 +1,6 @@ +using ChocolArm64.Translation; + +namespace ChocolArm64.Instructions +{ + delegate void InstEmitter(ILEmitterCtx context); +}
\ No newline at end of file diff --git a/ChocolArm64/Instructions/InstInterpreter.cs b/ChocolArm64/Instructions/InstInterpreter.cs new file mode 100644 index 00000000..e6354fd5 --- /dev/null +++ b/ChocolArm64/Instructions/InstInterpreter.cs @@ -0,0 +1,8 @@ +using ChocolArm64.Decoders; +using ChocolArm64.Memory; +using ChocolArm64.State; + +namespace ChocolArm64.Instructions +{ + delegate void InstInterpreter(CpuThreadState state, MemoryManager memory, OpCode64 opCode); +}
\ No newline at end of file diff --git a/ChocolArm64/Instructions/SoftFallback.cs b/ChocolArm64/Instructions/SoftFallback.cs new file mode 100644 index 00000000..a31aa34c --- /dev/null +++ b/ChocolArm64/Instructions/SoftFallback.cs @@ -0,0 +1,922 @@ +using ChocolArm64.State; +using ChocolArm64.Translation; +using System; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace ChocolArm64.Instructions +{ + using static VectorHelper; + + static class SoftFallback + { + public static void EmitCall(ILEmitterCtx context, string mthdName) + { + context.EmitCall(typeof(SoftFallback), mthdName); + } + +#region "ShrImm_64" + public static long SignedShrImm_64(long value, long roundConst, int shift) + { + if (roundConst == 0L) + { + if (shift <= 63) + { + return value >> shift; + } + else /* if (Shift == 64) */ + { + if (value < 0L) + { + return -1L; + } + else + { + return 0L; + } + } + } + else /* if (RoundConst == 1L << (Shift - 1)) */ + { + if (shift <= 63) + { + long add = value + roundConst; + + if ((~value & (value ^ add)) < 0L) + { + return (long)((ulong)add >> shift); + } + else + { + return add >> shift; + } + } + else /* if (Shift == 64) */ + { + return 0L; + } + } + } + + public static ulong UnsignedShrImm_64(ulong value, long roundConst, int shift) + { + if (roundConst == 0L) + { + if (shift <= 63) + { + return value >> shift; + } + else /* if (Shift == 64) */ + { + return 0UL; + } + } + else /* if (RoundConst == 1L << (Shift - 1)) */ + { + ulong add = value + (ulong)roundConst; + + if ((add < value) && (add < (ulong)roundConst)) + { + if (shift <= 63) + { + return (add >> shift) | (0x8000000000000000UL >> (shift - 1)); + } + else /* if (Shift == 64) */ + { + return 1UL; + } + } + else + { + if (shift <= 63) + { + return add >> shift; + } + else /* if (Shift == 64) */ + { + return 0UL; + } + } + } + } +#endregion + +#region "Saturating" + public static long SignedSrcSignedDstSatQ(long op, int size, CpuThreadState state) + { + int eSize = 8 << size; + + long tMaxValue = (1L << (eSize - 1)) - 1L; + long tMinValue = -(1L << (eSize - 1)); + + if (op > tMaxValue) + { + state.SetFpsrFlag(Fpsr.Qc); + + return tMaxValue; + } + else if (op < tMinValue) + { + state.SetFpsrFlag(Fpsr.Qc); + + return tMinValue; + } + else + { + return op; + } + } + + public static ulong SignedSrcUnsignedDstSatQ(long op, int size, CpuThreadState state) + { + int eSize = 8 << size; + + ulong tMaxValue = (1UL << eSize) - 1UL; + ulong tMinValue = 0UL; + + if (op > (long)tMaxValue) + { + state.SetFpsrFlag(Fpsr.Qc); + + return tMaxValue; + } + else if (op < (long)tMinValue) + { + state.SetFpsrFlag(Fpsr.Qc); + + return tMinValue; + } + else + { + return (ulong)op; + } + } + + public static long UnsignedSrcSignedDstSatQ(ulong op, int size, CpuThreadState state) + { + int eSize = 8 << size; + + long tMaxValue = (1L << (eSize - 1)) - 1L; + + if (op > (ulong)tMaxValue) + { + state.SetFpsrFlag(Fpsr.Qc); + + return tMaxValue; + } + else + { + return (long)op; + } + } + + public static ulong UnsignedSrcUnsignedDstSatQ(ulong op, int size, CpuThreadState state) + { + int eSize = 8 << size; + + ulong tMaxValue = (1UL << eSize) - 1UL; + + if (op > tMaxValue) + { + state.SetFpsrFlag(Fpsr.Qc); + + return tMaxValue; + } + else + { + return op; + } + } + + public static long UnarySignedSatQAbsOrNeg(long op, CpuThreadState state) + { + if (op == long.MinValue) + { + state.SetFpsrFlag(Fpsr.Qc); + + return long.MaxValue; + } + else + { + return op; + } + } + + public static long BinarySignedSatQAdd(long op1, long op2, CpuThreadState state) + { + long add = op1 + op2; + + if ((~(op1 ^ op2) & (op1 ^ add)) < 0L) + { + state.SetFpsrFlag(Fpsr.Qc); + + if (op1 < 0L) + { + return long.MinValue; + } + else + { + return long.MaxValue; + } + } + else + { + return add; + } + } + + public static ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2, CpuThreadState state) + { + ulong add = op1 + op2; + + if ((add < op1) && (add < op2)) + { + state.SetFpsrFlag(Fpsr.Qc); + + return ulong.MaxValue; + } + else + { + return add; + } + } + + public static long BinarySignedSatQSub(long op1, long op2, CpuThreadState state) + { + long sub = op1 - op2; + + if (((op1 ^ op2) & (op1 ^ sub)) < 0L) + { + state.SetFpsrFlag(Fpsr.Qc); + + if (op1 < 0L) + { + return long.MinValue; + } + else + { + return long.MaxValue; + } + } + else + { + return sub; + } + } + + public static ulong BinaryUnsignedSatQSub(ulong op1, ulong op2, CpuThreadState state) + { + ulong sub = op1 - op2; + + if (op1 < op2) + { + state.SetFpsrFlag(Fpsr.Qc); + + return ulong.MinValue; + } + else + { + return sub; + } + } + + public static long BinarySignedSatQAcc(ulong op1, long op2, CpuThreadState state) + { + if (op1 <= (ulong)long.MaxValue) + { + // Op1 from ulong.MinValue to (ulong)long.MaxValue + // Op2 from long.MinValue to long.MaxValue + + long add = (long)op1 + op2; + + if ((~op2 & add) < 0L) + { + state.SetFpsrFlag(Fpsr.Qc); + + return long.MaxValue; + } + else + { + return add; + } + } + else if (op2 >= 0L) + { + // Op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue + // Op2 from (long)ulong.MinValue to long.MaxValue + + state.SetFpsrFlag(Fpsr.Qc); + + return long.MaxValue; + } + else + { + // Op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue + // Op2 from long.MinValue to (long)ulong.MinValue - 1L + + ulong add = op1 + (ulong)op2; + + if (add > (ulong)long.MaxValue) + { + state.SetFpsrFlag(Fpsr.Qc); + + return long.MaxValue; + } + else + { + return (long)add; + } + } + } + + public static ulong BinaryUnsignedSatQAcc(long op1, ulong op2, CpuThreadState state) + { + if (op1 >= 0L) + { + // Op1 from (long)ulong.MinValue to long.MaxValue + // Op2 from ulong.MinValue to ulong.MaxValue + + ulong add = (ulong)op1 + op2; + + if ((add < (ulong)op1) && (add < op2)) + { + state.SetFpsrFlag(Fpsr.Qc); + + return ulong.MaxValue; + } + else + { + return add; + } + } + else if (op2 > (ulong)long.MaxValue) + { + // Op1 from long.MinValue to (long)ulong.MinValue - 1L + // Op2 from (ulong)long.MaxValue + 1UL to ulong.MaxValue + + return (ulong)op1 + op2; + } + else + { + // Op1 from long.MinValue to (long)ulong.MinValue - 1L + // Op2 from ulong.MinValue to (ulong)long.MaxValue + + long add = op1 + (long)op2; + + if (add < (long)ulong.MinValue) + { + state.SetFpsrFlag(Fpsr.Qc); + + return ulong.MinValue; + } + else + { + return (ulong)add; + } + } + } +#endregion + +#region "Count" + public static ulong CountLeadingSigns(ulong value, int size) // Size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.). + { + value ^= value >> 1; + + int highBit = size - 2; + + for (int bit = highBit; bit >= 0; bit--) + { + if (((value >> bit) & 0b1) != 0) + { + return (ulong)(highBit - bit); + } + } + + return (ulong)(size - 1); + } + + private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 }; + + public static ulong CountLeadingZeros(ulong value, int size) // Size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.). + { + if (value == 0ul) + { + return (ulong)size; + } + + int nibbleIdx = size; + int preCount, count = 0; + + do + { + nibbleIdx -= 4; + preCount = ClzNibbleTbl[(value >> nibbleIdx) & 0b1111]; + count += preCount; + } + while (preCount == 4); + + return (ulong)count; + } + + public static ulong CountSetBits8(ulong value) // "Size" is 8 (SIMD&FP Inst.). + { + if (value == 0xfful) + { + return 8ul; + } + + value = ((value >> 1) & 0x55ul) + (value & 0x55ul); + value = ((value >> 2) & 0x33ul) + (value & 0x33ul); + + return (value >> 4) + (value & 0x0ful); + } +#endregion + +#region "Crc32" + private const uint Crc32RevPoly = 0xedb88320; + private const uint Crc32CRevPoly = 0x82f63b78; + + public static uint Crc32B(uint crc, byte val) => Crc32 (crc, Crc32RevPoly, val); + public static uint Crc32H(uint crc, ushort val) => Crc32H(crc, Crc32RevPoly, val); + public static uint Crc32W(uint crc, uint val) => Crc32W(crc, Crc32RevPoly, val); + public static uint Crc32X(uint crc, ulong val) => Crc32X(crc, Crc32RevPoly, val); + + public static uint Crc32Cb(uint crc, byte val) => Crc32 (crc, Crc32CRevPoly, val); + public static uint Crc32Ch(uint crc, ushort val) => Crc32H(crc, Crc32CRevPoly, val); + public static uint Crc32Cw(uint crc, uint val) => Crc32W(crc, Crc32CRevPoly, val); + public static uint Crc32Cx(uint crc, ulong val) => Crc32X(crc, Crc32CRevPoly, val); + + private static uint Crc32H(uint crc, uint poly, ushort val) + { + crc = Crc32(crc, poly, (byte)(val >> 0)); + crc = Crc32(crc, poly, (byte)(val >> 8)); + + return crc; + } + + private static uint Crc32W(uint crc, uint poly, uint val) + { + crc = Crc32(crc, poly, (byte)(val >> 0 )); + crc = Crc32(crc, poly, (byte)(val >> 8 )); + crc = Crc32(crc, poly, (byte)(val >> 16)); + crc = Crc32(crc, poly, (byte)(val >> 24)); + + return crc; + } + + private static uint Crc32X(uint crc, uint poly, ulong val) + { + crc = Crc32(crc, poly, (byte)(val >> 0 )); + crc = Crc32(crc, poly, (byte)(val >> 8 )); + crc = Crc32(crc, poly, (byte)(val >> 16)); + crc = Crc32(crc, poly, (byte)(val >> 24)); + crc = Crc32(crc, poly, (byte)(val >> 32)); + crc = Crc32(crc, poly, (byte)(val >> 40)); + crc = Crc32(crc, poly, (byte)(val >> 48)); + crc = Crc32(crc, poly, (byte)(val >> 56)); + + return crc; + } + + private static uint Crc32(uint crc, uint poly, byte val) + { + crc ^= val; + + for (int bit = 7; bit >= 0; bit--) + { + uint mask = (uint)(-(int)(crc & 1)); + + crc = (crc >> 1) ^ (poly & mask); + } + + return crc; + } +#endregion + +#region "Aes" + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> Decrypt(Vector128<float> value, Vector128<float> roundKey) + { + if (!Sse.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + return CryptoHelper.AesInvSubBytes(CryptoHelper.AesInvShiftRows(Sse.Xor(value, roundKey))); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> Encrypt(Vector128<float> value, Vector128<float> roundKey) + { + if (!Sse.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + return CryptoHelper.AesSubBytes(CryptoHelper.AesShiftRows(Sse.Xor(value, roundKey))); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> InverseMixColumns(Vector128<float> value) + { + return CryptoHelper.AesInvMixColumns(value); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> MixColumns(Vector128<float> value) + { + return CryptoHelper.AesMixColumns(value); + } +#endregion + +#region "Sha1" + public static Vector128<float> HashChoose(Vector128<float> hashAbcd, uint hashE, Vector128<float> wk) + { + for (int e = 0; e <= 3; e++) + { + uint t = ShaChoose((uint)VectorExtractIntZx(hashAbcd, (byte)1, 2), + (uint)VectorExtractIntZx(hashAbcd, (byte)2, 2), + (uint)VectorExtractIntZx(hashAbcd, (byte)3, 2)); + + hashE += Rol((uint)VectorExtractIntZx(hashAbcd, (byte)0, 2), 5) + t; + hashE += (uint)VectorExtractIntZx(wk, (byte)e, 2); + + t = Rol((uint)VectorExtractIntZx(hashAbcd, (byte)1, 2), 30); + hashAbcd = VectorInsertInt((ulong)t, hashAbcd, (byte)1, 2); + + Rol32_160(ref hashE, ref hashAbcd); + } + + return hashAbcd; + } + + public static uint FixedRotate(uint hashE) + { + return hashE.Rol(30); + } + + public static Vector128<float> HashMajority(Vector128<float> hashAbcd, uint hashE, Vector128<float> wk) + { + for (int e = 0; e <= 3; e++) + { + uint t = ShaMajority((uint)VectorExtractIntZx(hashAbcd, (byte)1, 2), + (uint)VectorExtractIntZx(hashAbcd, (byte)2, 2), + (uint)VectorExtractIntZx(hashAbcd, (byte)3, 2)); + + hashE += Rol((uint)VectorExtractIntZx(hashAbcd, (byte)0, 2), 5) + t; + hashE += (uint)VectorExtractIntZx(wk, (byte)e, 2); + + t = Rol((uint)VectorExtractIntZx(hashAbcd, (byte)1, 2), 30); + hashAbcd = VectorInsertInt((ulong)t, hashAbcd, (byte)1, 2); + + Rol32_160(ref hashE, ref hashAbcd); + } + + return hashAbcd; + } + + public static Vector128<float> HashParity(Vector128<float> hashAbcd, uint hashE, Vector128<float> wk) + { + for (int e = 0; e <= 3; e++) + { + uint t = ShaParity((uint)VectorExtractIntZx(hashAbcd, (byte)1, 2), + (uint)VectorExtractIntZx(hashAbcd, (byte)2, 2), + (uint)VectorExtractIntZx(hashAbcd, (byte)3, 2)); + + hashE += Rol((uint)VectorExtractIntZx(hashAbcd, (byte)0, 2), 5) + t; + hashE += (uint)VectorExtractIntZx(wk, (byte)e, 2); + + t = Rol((uint)VectorExtractIntZx(hashAbcd, (byte)1, 2), 30); + hashAbcd = VectorInsertInt((ulong)t, hashAbcd, (byte)1, 2); + + Rol32_160(ref hashE, ref hashAbcd); + } + + return hashAbcd; + } + + public static Vector128<float> Sha1SchedulePart1(Vector128<float> w03, Vector128<float> w47, Vector128<float> w811) + { + if (!Sse.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + Vector128<float> result = new Vector128<float>(); + + ulong t2 = VectorExtractIntZx(w47, (byte)0, 3); + ulong t1 = VectorExtractIntZx(w03, (byte)1, 3); + + result = VectorInsertInt((ulong)t1, result, (byte)0, 3); + result = VectorInsertInt((ulong)t2, result, (byte)1, 3); + + return Sse.Xor(result, Sse.Xor(w03, w811)); + } + + public static Vector128<float> Sha1SchedulePart2(Vector128<float> tw03, Vector128<float> w1215) + { + if (!Sse2.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + Vector128<float> result = new Vector128<float>(); + + Vector128<float> t = Sse.Xor(tw03, Sse.StaticCast<uint, float>( + Sse2.ShiftRightLogical128BitLane(Sse.StaticCast<float, uint>(w1215), (byte)4))); + + uint tE0 = (uint)VectorExtractIntZx(t, (byte)0, 2); + uint tE1 = (uint)VectorExtractIntZx(t, (byte)1, 2); + uint tE2 = (uint)VectorExtractIntZx(t, (byte)2, 2); + uint tE3 = (uint)VectorExtractIntZx(t, (byte)3, 2); + + result = VectorInsertInt((ulong)tE0.Rol(1), result, (byte)0, 2); + result = VectorInsertInt((ulong)tE1.Rol(1), result, (byte)1, 2); + result = VectorInsertInt((ulong)tE2.Rol(1), result, (byte)2, 2); + + return VectorInsertInt((ulong)(tE3.Rol(1) ^ tE0.Rol(2)), result, (byte)3, 2); + } + + private static void Rol32_160(ref uint y, ref Vector128<float> x) + { + if (!Sse2.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + uint xE3 = (uint)VectorExtractIntZx(x, (byte)3, 2); + + x = Sse.StaticCast<uint, float>(Sse2.ShiftLeftLogical128BitLane(Sse.StaticCast<float, uint>(x), (byte)4)); + x = VectorInsertInt((ulong)y, x, (byte)0, 2); + + y = xE3; + } + + private static uint ShaChoose(uint x, uint y, uint z) + { + return ((y ^ z) & x) ^ z; + } + + private static uint ShaMajority(uint x, uint y, uint z) + { + return (x & y) | ((x | y) & z); + } + + private static uint ShaParity(uint x, uint y, uint z) + { + return x ^ y ^ z; + } + + private static uint Rol(this uint value, int count) + { + return (value << count) | (value >> (32 - count)); + } +#endregion + +#region "Sha256" + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> HashLower(Vector128<float> hashAbcd, Vector128<float> hashEfgh, Vector128<float> wk) + { + return Sha256Hash(hashAbcd, hashEfgh, wk, true); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> HashUpper(Vector128<float> hashEfgh, Vector128<float> hashAbcd, Vector128<float> wk) + { + return Sha256Hash(hashAbcd, hashEfgh, wk, false); + } + + public static Vector128<float> Sha256SchedulePart1(Vector128<float> w03, Vector128<float> w47) + { + Vector128<float> result = new Vector128<float>(); + + for (int e = 0; e <= 3; e++) + { + uint elt = (uint)VectorExtractIntZx(e <= 2 ? w03 : w47, (byte)(e <= 2 ? e + 1 : 0), 2); + + elt = elt.Ror(7) ^ elt.Ror(18) ^ elt.Lsr(3); + + elt += (uint)VectorExtractIntZx(w03, (byte)e, 2); + + result = VectorInsertInt((ulong)elt, result, (byte)e, 2); + } + + return result; + } + + public static Vector128<float> Sha256SchedulePart2(Vector128<float> w03, Vector128<float> w811, Vector128<float> w1215) + { + Vector128<float> result = new Vector128<float>(); + + ulong t1 = VectorExtractIntZx(w1215, (byte)1, 3); + + for (int e = 0; e <= 1; e++) + { + uint elt = t1.ULongPart(e); + + elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10); + + elt += (uint)VectorExtractIntZx(w03, (byte)e, 2); + elt += (uint)VectorExtractIntZx(w811, (byte)(e + 1), 2); + + result = VectorInsertInt((ulong)elt, result, (byte)e, 2); + } + + t1 = VectorExtractIntZx(result, (byte)0, 3); + + for (int e = 2; e <= 3; e++) + { + uint elt = t1.ULongPart(e - 2); + + elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10); + + elt += (uint)VectorExtractIntZx(w03, (byte)e, 2); + elt += (uint)VectorExtractIntZx(e == 2 ? w811 : w1215, (byte)(e == 2 ? 3 : 0), 2); + + result = VectorInsertInt((ulong)elt, result, (byte)e, 2); + } + + return result; + } + + private static Vector128<float> Sha256Hash(Vector128<float> x, Vector128<float> y, Vector128<float> w, bool part1) + { + for (int e = 0; e <= 3; e++) + { + uint chs = ShaChoose((uint)VectorExtractIntZx(y, (byte)0, 2), + (uint)VectorExtractIntZx(y, (byte)1, 2), + (uint)VectorExtractIntZx(y, (byte)2, 2)); + + uint maj = ShaMajority((uint)VectorExtractIntZx(x, (byte)0, 2), + (uint)VectorExtractIntZx(x, (byte)1, 2), + (uint)VectorExtractIntZx(x, (byte)2, 2)); + + uint t1 = (uint)VectorExtractIntZx(y, (byte)3, 2); + t1 += ShaHashSigma1((uint)VectorExtractIntZx(y, (byte)0, 2)) + chs; + t1 += (uint)VectorExtractIntZx(w, (byte)e, 2); + + uint t2 = t1 + (uint)VectorExtractIntZx(x, (byte)3, 2); + x = VectorInsertInt((ulong)t2, x, (byte)3, 2); + t2 = t1 + ShaHashSigma0((uint)VectorExtractIntZx(x, (byte)0, 2)) + maj; + y = VectorInsertInt((ulong)t2, y, (byte)3, 2); + + Rol32_256(ref y, ref x); + } + + return part1 ? x : y; + } + + private static void Rol32_256(ref Vector128<float> y, ref Vector128<float> x) + { + if (!Sse2.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + uint yE3 = (uint)VectorExtractIntZx(y, (byte)3, 2); + uint xE3 = (uint)VectorExtractIntZx(x, (byte)3, 2); + + y = Sse.StaticCast<uint, float>(Sse2.ShiftLeftLogical128BitLane(Sse.StaticCast<float, uint>(y), (byte)4)); + x = Sse.StaticCast<uint, float>(Sse2.ShiftLeftLogical128BitLane(Sse.StaticCast<float, uint>(x), (byte)4)); + + y = VectorInsertInt((ulong)xE3, y, (byte)0, 2); + x = VectorInsertInt((ulong)yE3, x, (byte)0, 2); + } + + private static uint ShaHashSigma0(uint x) + { + return x.Ror(2) ^ x.Ror(13) ^ x.Ror(22); + } + + private static uint ShaHashSigma1(uint x) + { + return x.Ror(6) ^ x.Ror(11) ^ x.Ror(25); + } + + private static uint Ror(this uint value, int count) + { + return (value >> count) | (value << (32 - count)); + } + + private static uint Lsr(this uint value, int count) + { + return value >> count; + } + + private static uint ULongPart(this ulong value, int part) + { + return part == 0 + ? (uint)(value & 0xFFFFFFFFUL) + : (uint)(value >> 32); + } +#endregion + +#region "Reverse" + public static uint ReverseBits8(uint value) + { + value = ((value & 0xaa) >> 1) | ((value & 0x55) << 1); + value = ((value & 0xcc) >> 2) | ((value & 0x33) << 2); + + return (value >> 4) | ((value & 0x0f) << 4); + } + + public static uint ReverseBits32(uint value) + { + value = ((value & 0xaaaaaaaa) >> 1) | ((value & 0x55555555) << 1); + value = ((value & 0xcccccccc) >> 2) | ((value & 0x33333333) << 2); + value = ((value & 0xf0f0f0f0) >> 4) | ((value & 0x0f0f0f0f) << 4); + value = ((value & 0xff00ff00) >> 8) | ((value & 0x00ff00ff) << 8); + + return (value >> 16) | (value << 16); + } + + public static ulong ReverseBits64(ulong value) + { + value = ((value & 0xaaaaaaaaaaaaaaaa) >> 1 ) | ((value & 0x5555555555555555) << 1 ); + value = ((value & 0xcccccccccccccccc) >> 2 ) | ((value & 0x3333333333333333) << 2 ); + value = ((value & 0xf0f0f0f0f0f0f0f0) >> 4 ) | ((value & 0x0f0f0f0f0f0f0f0f) << 4 ); + value = ((value & 0xff00ff00ff00ff00) >> 8 ) | ((value & 0x00ff00ff00ff00ff) << 8 ); + value = ((value & 0xffff0000ffff0000) >> 16) | ((value & 0x0000ffff0000ffff) << 16); + + return (value >> 32) | (value << 32); + } + + public static uint ReverseBytes16_32(uint value) => (uint)ReverseBytes16_64(value); + public static uint ReverseBytes32_32(uint value) => (uint)ReverseBytes32_64(value); + + public static ulong ReverseBytes16_64(ulong value) => ReverseBytes(value, RevSize.Rev16); + public static ulong ReverseBytes32_64(ulong value) => ReverseBytes(value, RevSize.Rev32); + public static ulong ReverseBytes64(ulong value) => ReverseBytes(value, RevSize.Rev64); + + private enum RevSize + { + Rev16, + Rev32, + Rev64 + } + + private static ulong ReverseBytes(ulong value, RevSize size) + { + value = ((value & 0xff00ff00ff00ff00) >> 8) | ((value & 0x00ff00ff00ff00ff) << 8); + + if (size == RevSize.Rev16) + { + return value; + } + + value = ((value & 0xffff0000ffff0000) >> 16) | ((value & 0x0000ffff0000ffff) << 16); + + if (size == RevSize.Rev32) + { + return value; + } + + value = ((value & 0xffffffff00000000) >> 32) | ((value & 0x00000000ffffffff) << 32); + + if (size == RevSize.Rev64) + { + return value; + } + + throw new ArgumentException(nameof(size)); + } +#endregion + +#region "MultiplyHigh" + public static long SMulHi128(long left, long right) + { + long result = (long)UMulHi128((ulong)left, (ulong)right); + + if (left < 0) + { + result -= right; + } + + if (right < 0) + { + result -= left; + } + + return result; + } + + public static ulong UMulHi128(ulong left, ulong right) + { + ulong lHigh = left >> 32; + ulong lLow = left & 0xFFFFFFFF; + ulong rHigh = right >> 32; + ulong rLow = right & 0xFFFFFFFF; + + ulong z2 = lLow * rLow; + ulong t = lHigh * rLow + (z2 >> 32); + ulong z1 = t & 0xFFFFFFFF; + ulong z0 = t >> 32; + + z1 += lLow * rHigh; + + return lHigh * rHigh + z0 + (z1 >> 32); + } +#endregion + } +} diff --git a/ChocolArm64/Instructions/SoftFloat.cs b/ChocolArm64/Instructions/SoftFloat.cs new file mode 100644 index 00000000..79dbe954 --- /dev/null +++ b/ChocolArm64/Instructions/SoftFloat.cs @@ -0,0 +1,2127 @@ +using ChocolArm64.State; +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; + +namespace ChocolArm64.Instructions +{ + static class SoftFloat + { + static SoftFloat() + { + RecipEstimateTable = BuildRecipEstimateTable(); + InvSqrtEstimateTable = BuildInvSqrtEstimateTable(); + } + + private static readonly byte[] RecipEstimateTable; + private static readonly byte[] InvSqrtEstimateTable; + + private static byte[] BuildRecipEstimateTable() + { + byte[] table = new byte[256]; + for (ulong index = 0; index < 256; index++) + { + ulong a = index | 0x100; + + a = (a << 1) + 1; + ulong b = 0x80000 / a; + b = (b + 1) >> 1; + + table[index] = (byte)(b & 0xFF); + } + return table; + } + + private static byte[] BuildInvSqrtEstimateTable() + { + byte[] table = new byte[512]; + for (ulong index = 128; index < 512; index++) + { + ulong a = index; + if (a < 256) + { + a = (a << 1) + 1; + } + else + { + a = (a | 1) << 1; + } + + ulong b = 256; + while (a * (b + 1) * (b + 1) < (1ul << 28)) + { + b++; + } + b = (b + 1) >> 1; + + table[index] = (byte)(b & 0xFF); + } + return table; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static float RecipEstimate(float x) + { + return (float)RecipEstimate((double)x); + } + + public static double RecipEstimate(double x) + { + ulong xBits = (ulong)BitConverter.DoubleToInt64Bits(x); + ulong xSign = xBits & 0x8000000000000000; + ulong xExp = (xBits >> 52) & 0x7FF; + ulong scaled = xBits & ((1ul << 52) - 1); + + if (xExp >= 2045) + { + if (xExp == 0x7ff && scaled != 0) + { + // NaN + return BitConverter.Int64BitsToDouble((long)(xBits | 0x0008000000000000)); + } + + // Infinity, or Out of range -> Zero + return BitConverter.Int64BitsToDouble((long)xSign); + } + + if (xExp == 0) + { + if (scaled == 0) + { + // Zero -> Infinity + return BitConverter.Int64BitsToDouble((long)(xSign | 0x7FF0000000000000)); + } + + // Denormal + if ((scaled & (1ul << 51)) == 0) + { + xExp = ~0ul; + scaled <<= 2; + } + else + { + scaled <<= 1; + } + } + + scaled >>= 44; + scaled &= 0xFF; + + ulong resultExp = (2045 - xExp) & 0x7FF; + ulong estimate = (ulong)RecipEstimateTable[scaled]; + ulong fraction = estimate << 44; + + if (resultExp == 0) + { + fraction >>= 1; + fraction |= 1ul << 51; + } + else if (resultExp == 0x7FF) + { + resultExp = 0; + fraction >>= 2; + fraction |= 1ul << 50; + } + + ulong result = xSign | (resultExp << 52) | fraction; + return BitConverter.Int64BitsToDouble((long)result); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static float InvSqrtEstimate(float x) + { + return (float)InvSqrtEstimate((double)x); + } + + public static double InvSqrtEstimate(double x) + { + ulong xBits = (ulong)BitConverter.DoubleToInt64Bits(x); + ulong xSign = xBits & 0x8000000000000000; + long xExp = (long)((xBits >> 52) & 0x7FF); + ulong scaled = xBits & ((1ul << 52) - 1); + + if (xExp == 0x7FF && scaled != 0) + { + // NaN + return BitConverter.Int64BitsToDouble((long)(xBits | 0x0008000000000000)); + } + + if (xExp == 0) + { + if (scaled == 0) + { + // Zero -> Infinity + return BitConverter.Int64BitsToDouble((long)(xSign | 0x7FF0000000000000)); + } + + // Denormal + while ((scaled & (1 << 51)) == 0) + { + scaled <<= 1; + xExp--; + } + scaled <<= 1; + } + + if (xSign != 0) + { + // Negative -> NaN + return BitConverter.Int64BitsToDouble((long)0x7FF8000000000000); + } + + if (xExp == 0x7ff && scaled == 0) + { + // Infinity -> Zero + return BitConverter.Int64BitsToDouble((long)xSign); + } + + if (((ulong)xExp & 1) == 1) + { + scaled >>= 45; + scaled &= 0xFF; + scaled |= 0x80; + } + else + { + scaled >>= 44; + scaled &= 0xFF; + scaled |= 0x100; + } + + ulong resultExp = ((ulong)(3068 - xExp) / 2) & 0x7FF; + ulong estimate = (ulong)InvSqrtEstimateTable[scaled]; + ulong fraction = estimate << 44; + + ulong result = xSign | (resultExp << 52) | fraction; + return BitConverter.Int64BitsToDouble((long)result); + } + } + + static class SoftFloat1632 + { + public static float FPConvert(ushort valueBits, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat16_32.FPConvert: State.Fpcr = 0x{state.Fpcr:X8}"); + + double real = valueBits.FPUnpackCv(out FpType type, out bool sign, state); + + float result; + + if (type == FpType.SNaN || type == FpType.QNaN) + { + if (state.GetFpcrFlag(Fpcr.Dn)) + { + result = FPDefaultNaN(); + } + else + { + result = FPConvertNaN(valueBits); + } + + if (type == FpType.SNaN) + { + FPProcessException(FpExc.InvalidOp, state); + } + } + else if (type == FpType.Infinity) + { + result = FPInfinity(sign); + } + else if (type == FpType.Zero) + { + result = FPZero(sign); + } + else + { + result = FPRoundCv(real, state); + } + + return result; + } + + private static float FPDefaultNaN() + { + return -float.NaN; + } + + private static float FPInfinity(bool sign) + { + return sign ? float.NegativeInfinity : float.PositiveInfinity; + } + + private static float FPZero(bool sign) + { + return sign ? -0f : +0f; + } + + private static float FPMaxNormal(bool sign) + { + return sign ? float.MinValue : float.MaxValue; + } + + private static double FPUnpackCv(this ushort valueBits, out FpType type, out bool sign, CpuThreadState state) + { + sign = (~(uint)valueBits & 0x8000u) == 0u; + + uint exp16 = ((uint)valueBits & 0x7C00u) >> 10; + uint frac16 = (uint)valueBits & 0x03FFu; + + double real; + + if (exp16 == 0u) + { + if (frac16 == 0u) + { + type = FpType.Zero; + real = 0d; + } + else + { + type = FpType.Nonzero; // Subnormal. + real = Math.Pow(2d, -14) * ((double)frac16 * Math.Pow(2d, -10)); + } + } + else if (exp16 == 0x1Fu && !state.GetFpcrFlag(Fpcr.Ahp)) + { + if (frac16 == 0u) + { + type = FpType.Infinity; + real = Math.Pow(2d, 1000); + } + else + { + type = (~frac16 & 0x0200u) == 0u ? FpType.QNaN : FpType.SNaN; + real = 0d; + } + } + else + { + type = FpType.Nonzero; // Normal. + real = Math.Pow(2d, (int)exp16 - 15) * (1d + (double)frac16 * Math.Pow(2d, -10)); + } + + return sign ? -real : real; + } + + private static float FPRoundCv(double real, CpuThreadState state) + { + const int minimumExp = -126; + + const int e = 8; + const int f = 23; + + bool sign; + double mantissa; + + if (real < 0d) + { + sign = true; + mantissa = -real; + } + else + { + sign = false; + mantissa = real; + } + + int exponent = 0; + + while (mantissa < 1d) + { + mantissa *= 2d; + exponent--; + } + + while (mantissa >= 2d) + { + mantissa /= 2d; + exponent++; + } + + if (state.GetFpcrFlag(Fpcr.Fz) && exponent < minimumExp) + { + state.SetFpsrFlag(Fpsr.Ufc); + + return FPZero(sign); + } + + uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0); + + if (biasedExp == 0u) + { + mantissa /= Math.Pow(2d, minimumExp - exponent); + } + + uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f)); + double error = mantissa * Math.Pow(2d, f) - (double)intMant; + + if (biasedExp == 0u && (error != 0d || state.GetFpcrFlag(Fpcr.Ufe))) + { + FPProcessException(FpExc.Underflow, state); + } + + bool overflowToInf; + bool roundUp; + + switch (state.FPRoundingMode()) + { + default: + case RoundMode.ToNearest: + roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u)); + overflowToInf = true; + break; + + case RoundMode.TowardsPlusInfinity: + roundUp = (error != 0d && !sign); + overflowToInf = !sign; + break; + + case RoundMode.TowardsMinusInfinity: + roundUp = (error != 0d && sign); + overflowToInf = sign; + break; + + case RoundMode.TowardsZero: + roundUp = false; + overflowToInf = false; + break; + } + + if (roundUp) + { + intMant++; + + if (intMant == (uint)Math.Pow(2d, f)) + { + biasedExp = 1u; + } + + if (intMant == (uint)Math.Pow(2d, f + 1)) + { + biasedExp++; + intMant >>= 1; + } + } + + float result; + + if (biasedExp >= (uint)Math.Pow(2d, e) - 1u) + { + result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); + + FPProcessException(FpExc.Overflow, state); + + error = 1d; + } + else + { + result = BitConverter.Int32BitsToSingle( + (int)((sign ? 1u : 0u) << 31 | (biasedExp & 0xFFu) << 23 | (intMant & 0x007FFFFFu))); + } + + if (error != 0d) + { + FPProcessException(FpExc.Inexact, state); + } + + return result; + } + + private static float FPConvertNaN(ushort valueBits) + { + return BitConverter.Int32BitsToSingle( + (int)(((uint)valueBits & 0x8000u) << 16 | 0x7FC00000u | ((uint)valueBits & 0x01FFu) << 13)); + } + + private static void FPProcessException(FpExc exc, CpuThreadState state) + { + int enable = (int)exc + 8; + + if ((state.Fpcr & (1 << enable)) != 0) + { + throw new NotImplementedException("floating-point trap handling"); + } + else + { + state.Fpsr |= 1 << (int)exc; + } + } + } + + static class SoftFloat3216 + { + public static ushort FPConvert(float value, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat32_16.FPConvert: State.Fpcr = 0x{state.Fpcr:X8}"); + + double real = value.FPUnpackCv(out FpType type, out bool sign, state, out uint valueBits); + + bool altHp = state.GetFpcrFlag(Fpcr.Ahp); + + ushort resultBits; + + if (type == FpType.SNaN || type == FpType.QNaN) + { + if (altHp) + { + resultBits = FPZero(sign); + } + else if (state.GetFpcrFlag(Fpcr.Dn)) + { + resultBits = FPDefaultNaN(); + } + else + { + resultBits = FPConvertNaN(valueBits); + } + + if (type == FpType.SNaN || altHp) + { + FPProcessException(FpExc.InvalidOp, state); + } + } + else if (type == FpType.Infinity) + { + if (altHp) + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu); + + FPProcessException(FpExc.InvalidOp, state); + } + else + { + resultBits = FPInfinity(sign); + } + } + else if (type == FpType.Zero) + { + resultBits = FPZero(sign); + } + else + { + resultBits = FPRoundCv(real, state); + } + + return resultBits; + } + + private static ushort FPDefaultNaN() + { + return (ushort)0x7E00u; + } + + private static ushort FPInfinity(bool sign) + { + return sign ? (ushort)0xFC00u : (ushort)0x7C00u; + } + + private static ushort FPZero(bool sign) + { + return sign ? (ushort)0x8000u : (ushort)0x0000u; + } + + private static ushort FPMaxNormal(bool sign) + { + return sign ? (ushort)0xFBFFu : (ushort)0x7BFFu; + } + + private static double FPUnpackCv(this float value, out FpType type, out bool sign, CpuThreadState state, out uint valueBits) + { + valueBits = (uint)BitConverter.SingleToInt32Bits(value); + + sign = (~valueBits & 0x80000000u) == 0u; + + uint exp32 = (valueBits & 0x7F800000u) >> 23; + uint frac32 = valueBits & 0x007FFFFFu; + + double real; + + if (exp32 == 0u) + { + if (frac32 == 0u || state.GetFpcrFlag(Fpcr.Fz)) + { + type = FpType.Zero; + real = 0d; + + if (frac32 != 0u) FPProcessException(FpExc.InputDenorm, state); + } + else + { + type = FpType.Nonzero; // Subnormal. + real = Math.Pow(2d, -126) * ((double)frac32 * Math.Pow(2d, -23)); + } + } + else if (exp32 == 0xFFu) + { + if (frac32 == 0u) + { + type = FpType.Infinity; + real = Math.Pow(2d, 1000); + } + else + { + type = (~frac32 & 0x00400000u) == 0u ? FpType.QNaN : FpType.SNaN; + real = 0d; + } + } + else + { + type = FpType.Nonzero; // Normal. + real = Math.Pow(2d, (int)exp32 - 127) * (1d + (double)frac32 * Math.Pow(2d, -23)); + } + + return sign ? -real : real; + } + + private static ushort FPRoundCv(double real, CpuThreadState state) + { + const int minimumExp = -14; + + const int e = 5; + const int f = 10; + + bool sign; + double mantissa; + + if (real < 0d) + { + sign = true; + mantissa = -real; + } + else + { + sign = false; + mantissa = real; + } + + int exponent = 0; + + while (mantissa < 1d) + { + mantissa *= 2d; + exponent--; + } + + while (mantissa >= 2d) + { + mantissa /= 2d; + exponent++; + } + + uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0); + + if (biasedExp == 0u) + { + mantissa /= Math.Pow(2d, minimumExp - exponent); + } + + uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f)); + double error = mantissa * Math.Pow(2d, f) - (double)intMant; + + if (biasedExp == 0u && (error != 0d || state.GetFpcrFlag(Fpcr.Ufe))) + { + FPProcessException(FpExc.Underflow, state); + } + + bool overflowToInf; + bool roundUp; + + switch (state.FPRoundingMode()) + { + default: + case RoundMode.ToNearest: + roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u)); + overflowToInf = true; + break; + + case RoundMode.TowardsPlusInfinity: + roundUp = (error != 0d && !sign); + overflowToInf = !sign; + break; + + case RoundMode.TowardsMinusInfinity: + roundUp = (error != 0d && sign); + overflowToInf = sign; + break; + + case RoundMode.TowardsZero: + roundUp = false; + overflowToInf = false; + break; + } + + if (roundUp) + { + intMant++; + + if (intMant == (uint)Math.Pow(2d, f)) + { + biasedExp = 1u; + } + + if (intMant == (uint)Math.Pow(2d, f + 1)) + { + biasedExp++; + intMant >>= 1; + } + } + + ushort resultBits; + + if (!state.GetFpcrFlag(Fpcr.Ahp)) + { + if (biasedExp >= (uint)Math.Pow(2d, e) - 1u) + { + resultBits = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); + + FPProcessException(FpExc.Overflow, state); + + error = 1d; + } + else + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | (biasedExp & 0x1Fu) << 10 | (intMant & 0x03FFu)); + } + } + else + { + if (biasedExp >= (uint)Math.Pow(2d, e)) + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu); + + FPProcessException(FpExc.InvalidOp, state); + + error = 0d; + } + else + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | (biasedExp & 0x1Fu) << 10 | (intMant & 0x03FFu)); + } + } + + if (error != 0d) + { + FPProcessException(FpExc.Inexact, state); + } + + return resultBits; + } + + private static ushort FPConvertNaN(uint valueBits) + { + return (ushort)((valueBits & 0x80000000u) >> 16 | 0x7E00u | (valueBits & 0x003FE000u) >> 13); + } + + private static void FPProcessException(FpExc exc, CpuThreadState state) + { + int enable = (int)exc + 8; + + if ((state.Fpcr & (1 << enable)) != 0) + { + throw new NotImplementedException("floating-point trap handling"); + } + else + { + state.Fpsr |= 1 << (int)exc; + } + } + } + + static class SoftFloat32 + { + public static float FPAdd(float value1, float value2, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_32.FPAdd: State.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2); + + float result = FPProcessNaNs(type1, type2, op1, op2, state, out bool done); + + if (!done) + { + bool inf1 = type1 == FpType.Infinity; bool zero1 = type1 == FpType.Zero; + bool inf2 = type2 == FpType.Infinity; bool zero2 = type2 == FpType.Zero; + + if (inf1 && inf2 && sign1 == !sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FpExc.InvalidOp, state); + } + else if ((inf1 && !sign1) || (inf2 && !sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == sign2) + { + result = FPZero(sign1); + } + else + { + result = value1 + value2; + } + } + + return result; + } + + public static float FPDiv(float value1, float value2, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_32.FPDiv: State.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2); + + float result = FPProcessNaNs(type1, type2, op1, op2, state, out bool done); + + if (!done) + { + bool inf1 = type1 == FpType.Infinity; bool zero1 = type1 == FpType.Zero; + bool inf2 = type2 == FpType.Infinity; bool zero2 = type2 == FpType.Zero; + + if ((inf1 && inf2) || (zero1 && zero2)) + { + result = FPDefaultNaN(); + + FPProcessException(FpExc.InvalidOp, state); + } + else if (inf1 || zero2) + { + result = FPInfinity(sign1 ^ sign2); + + if (!inf1) FPProcessException(FpExc.DivideByZero, state); + } + else if (zero1 || inf2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 / value2; + } + } + + return result; + } + + public static float FPMax(float value1, float value2, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_32.FPMax: State.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2); + + float result = FPProcessNaNs(type1, type2, op1, op2, state, out bool done); + + if (!done) + { + if (value1 > value2) + { + if (type1 == FpType.Infinity) + { + result = FPInfinity(sign1); + } + else if (type1 == FpType.Zero) + { + result = FPZero(sign1 && sign2); + } + else + { + result = value1; + } + } + else + { + if (type2 == FpType.Infinity) + { + result = FPInfinity(sign2); + } + else if (type2 == FpType.Zero) + { + result = FPZero(sign1 && sign2); + } + else + { + result = value2; + } + } + } + + return result; + } + + public static float FPMaxNum(float value1, float value2, CpuThreadState state) + { + Debug.WriteIf(state.Fpcr != 0, "ASoftFloat_32.FPMaxNum: "); + + value1.FPUnpack(out FpType type1, out _, out _); + value2.FPUnpack(out FpType type2, out _, out _); + + if (type1 == FpType.QNaN && type2 != FpType.QNaN) + { + value1 = FPInfinity(true); + } + else if (type1 != FpType.QNaN && type2 == FpType.QNaN) + { + value2 = FPInfinity(true); + } + + return FPMax(value1, value2, state); + } + + public static float FPMin(float value1, float value2, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_32.FPMin: State.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2); + + float result = FPProcessNaNs(type1, type2, op1, op2, state, out bool done); + + if (!done) + { + if (value1 < value2) + { + if (type1 == FpType.Infinity) + { + result = FPInfinity(sign1); + } + else if (type1 == FpType.Zero) + { + result = FPZero(sign1 || sign2); + } + else + { + result = value1; + } + } + else + { + if (type2 == FpType.Infinity) + { + result = FPInfinity(sign2); + } + else if (type2 == FpType.Zero) + { + result = FPZero(sign1 || sign2); + } + else + { + result = value2; + } + } + } + + return result; + } + + public static float FPMinNum(float value1, float value2, CpuThreadState state) + { + Debug.WriteIf(state.Fpcr != 0, "ASoftFloat_32.FPMinNum: "); + + value1.FPUnpack(out FpType type1, out _, out _); + value2.FPUnpack(out FpType type2, out _, out _); + + if (type1 == FpType.QNaN && type2 != FpType.QNaN) + { + value1 = FPInfinity(false); + } + else if (type1 != FpType.QNaN && type2 == FpType.QNaN) + { + value2 = FPInfinity(false); + } + + return FPMin(value1, value2, state); + } + + public static float FPMul(float value1, float value2, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_32.FPMul: State.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2); + + float result = FPProcessNaNs(type1, type2, op1, op2, state, out bool done); + + if (!done) + { + bool inf1 = type1 == FpType.Infinity; bool zero1 = type1 == FpType.Zero; + bool inf2 = type2 == FpType.Infinity; bool zero2 = type2 == FpType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPDefaultNaN(); + + FPProcessException(FpExc.InvalidOp, state); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else if (zero1 || zero2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 * value2; + } + } + + return result; + } + + public static float FPMulAdd(float valueA, float value1, float value2, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_32.FPMulAdd: State.Fpcr = 0x{state.Fpcr:X8}"); + + valueA = valueA.FPUnpack(out FpType typeA, out bool signA, out uint addend); + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2); + + bool inf1 = type1 == FpType.Infinity; bool zero1 = type1 == FpType.Zero; + bool inf2 = type2 == FpType.Infinity; bool zero2 = type2 == FpType.Zero; + + float result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, state, out bool done); + + if (typeA == FpType.QNaN && ((inf1 && zero2) || (zero1 && inf2))) + { + result = FPDefaultNaN(); + + FPProcessException(FpExc.InvalidOp, state); + } + + if (!done) + { + bool infA = typeA == FpType.Infinity; bool zeroA = typeA == FpType.Zero; + + bool signP = sign1 ^ sign2; + bool infP = inf1 || inf2; + bool zeroP = zero1 || zero2; + + if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP)) + { + result = FPDefaultNaN(); + + FPProcessException(FpExc.InvalidOp, state); + } + else if ((infA && !signA) || (infP && !signP)) + { + result = FPInfinity(false); + } + else if ((infA && signA) || (infP && signP)) + { + result = FPInfinity(true); + } + else if (zeroA && zeroP && signA == signP) + { + result = FPZero(signA); + } + else + { + // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = valueA + (value1 * value2); + } + } + + return result; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static float FPMulSub(float valueA, float value1, float value2, CpuThreadState state) + { + Debug.WriteIf(state.Fpcr != 0, "ASoftFloat_32.FPMulSub: "); + + value1 = value1.FPNeg(); + + return FPMulAdd(valueA, value1, value2, state); + } + + public static float FPMulX(float value1, float value2, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_32.FPMulX: State.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2); + + float result = FPProcessNaNs(type1, type2, op1, op2, state, out bool done); + + if (!done) + { + bool inf1 = type1 == FpType.Infinity; bool zero1 = type1 == FpType.Zero; + bool inf2 = type2 == FpType.Infinity; bool zero2 = type2 == FpType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPTwo(sign1 ^ sign2); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else if (zero1 || zero2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 * value2; + } + } + + return result; + } + + public static float FPRecipStepFused(float value1, float value2, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_32.FPRecipStepFused: State.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPNeg(); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2); + + float result = FPProcessNaNs(type1, type2, op1, op2, state, out bool done); + + if (!done) + { + bool inf1 = type1 == FpType.Infinity; bool zero1 = type1 == FpType.Zero; + bool inf2 = type2 == FpType.Infinity; bool zero2 = type2 == FpType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPTwo(false); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else + { + // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = 2f + (value1 * value2); + } + } + + return result; + } + + public static float FPRecpX(float value, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_32.FPRecpX: State.Fpcr = 0x{state.Fpcr:X8}"); + + value.FPUnpack(out FpType type, out bool sign, out uint op); + + float result; + + if (type == FpType.SNaN || type == FpType.QNaN) + { + result = FPProcessNaN(type, op, state); + } + else + { + uint notExp = (~op >> 23) & 0xFFu; + uint maxExp = 0xFEu; + + result = BitConverter.Int32BitsToSingle( + (int)((sign ? 1u : 0u) << 31 | (notExp == 0xFFu ? maxExp : notExp) << 23)); + } + + return result; + } + + public static float FprSqrtStepFused(float value1, float value2, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_32.FPRSqrtStepFused: State.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPNeg(); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2); + + float result = FPProcessNaNs(type1, type2, op1, op2, state, out bool done); + + if (!done) + { + bool inf1 = type1 == FpType.Infinity; bool zero1 = type1 == FpType.Zero; + bool inf2 = type2 == FpType.Infinity; bool zero2 = type2 == FpType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPOnePointFive(false); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else + { + // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = (3f + (value1 * value2)) / 2f; + } + } + + return result; + } + + public static float FPSqrt(float value, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_32.FPSqrt: State.Fpcr = 0x{state.Fpcr:X8}"); + + value = value.FPUnpack(out FpType type, out bool sign, out uint op); + + float result; + + if (type == FpType.SNaN || type == FpType.QNaN) + { + result = FPProcessNaN(type, op, state); + } + else if (type == FpType.Zero) + { + result = FPZero(sign); + } + else if (type == FpType.Infinity && !sign) + { + result = FPInfinity(sign); + } + else if (sign) + { + result = FPDefaultNaN(); + + FPProcessException(FpExc.InvalidOp, state); + } + else + { + result = MathF.Sqrt(value); + } + + return result; + } + + public static float FPSub(float value1, float value2, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_32.FPSub: State.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2); + + float result = FPProcessNaNs(type1, type2, op1, op2, state, out bool done); + + if (!done) + { + bool inf1 = type1 == FpType.Infinity; bool zero1 = type1 == FpType.Zero; + bool inf2 = type2 == FpType.Infinity; bool zero2 = type2 == FpType.Zero; + + if (inf1 && inf2 && sign1 == sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FpExc.InvalidOp, state); + } + else if ((inf1 && !sign1) || (inf2 && sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && !sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == !sign2) + { + result = FPZero(sign1); + } + else + { + result = value1 - value2; + } + } + + return result; + } + + private static float FPDefaultNaN() + { + return -float.NaN; + } + + private static float FPInfinity(bool sign) + { + return sign ? float.NegativeInfinity : float.PositiveInfinity; + } + + private static float FPZero(bool sign) + { + return sign ? -0f : +0f; + } + + private static float FPTwo(bool sign) + { + return sign ? -2f : +2f; + } + + private static float FPOnePointFive(bool sign) + { + return sign ? -1.5f : +1.5f; + } + + private static float FPNeg(this float value) + { + return -value; + } + + private static float FPUnpack(this float value, out FpType type, out bool sign, out uint valueBits) + { + valueBits = (uint)BitConverter.SingleToInt32Bits(value); + + sign = (~valueBits & 0x80000000u) == 0u; + + if ((valueBits & 0x7F800000u) == 0u) + { + if ((valueBits & 0x007FFFFFu) == 0u) + { + type = FpType.Zero; + } + else + { + type = FpType.Nonzero; + } + } + else if ((~valueBits & 0x7F800000u) == 0u) + { + if ((valueBits & 0x007FFFFFu) == 0u) + { + type = FpType.Infinity; + } + else + { + type = (~valueBits & 0x00400000u) == 0u + ? FpType.QNaN + : FpType.SNaN; + + return FPZero(sign); + } + } + else + { + type = FpType.Nonzero; + } + + return value; + } + + private static float FPProcessNaNs( + FpType type1, + FpType type2, + uint op1, + uint op2, + CpuThreadState state, + out bool done) + { + done = true; + + if (type1 == FpType.SNaN) + { + return FPProcessNaN(type1, op1, state); + } + else if (type2 == FpType.SNaN) + { + return FPProcessNaN(type2, op2, state); + } + else if (type1 == FpType.QNaN) + { + return FPProcessNaN(type1, op1, state); + } + else if (type2 == FpType.QNaN) + { + return FPProcessNaN(type2, op2, state); + } + + done = false; + + return FPZero(false); + } + + private static float FPProcessNaNs3( + FpType type1, + FpType type2, + FpType type3, + uint op1, + uint op2, + uint op3, + CpuThreadState state, + out bool done) + { + done = true; + + if (type1 == FpType.SNaN) + { + return FPProcessNaN(type1, op1, state); + } + else if (type2 == FpType.SNaN) + { + return FPProcessNaN(type2, op2, state); + } + else if (type3 == FpType.SNaN) + { + return FPProcessNaN(type3, op3, state); + } + else if (type1 == FpType.QNaN) + { + return FPProcessNaN(type1, op1, state); + } + else if (type2 == FpType.QNaN) + { + return FPProcessNaN(type2, op2, state); + } + else if (type3 == FpType.QNaN) + { + return FPProcessNaN(type3, op3, state); + } + + done = false; + + return FPZero(false); + } + + private static float FPProcessNaN(FpType type, uint op, CpuThreadState state) + { + if (type == FpType.SNaN) + { + op |= 1u << 22; + + FPProcessException(FpExc.InvalidOp, state); + } + + if (state.GetFpcrFlag(Fpcr.Dn)) + { + return FPDefaultNaN(); + } + + return BitConverter.Int32BitsToSingle((int)op); + } + + private static void FPProcessException(FpExc exc, CpuThreadState state) + { + int enable = (int)exc + 8; + + if ((state.Fpcr & (1 << enable)) != 0) + { + throw new NotImplementedException("floating-point trap handling"); + } + else + { + state.Fpsr |= 1 << (int)exc; + } + } + } + + static class SoftFloat64 + { + public static double FPAdd(double value1, double value2, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_64.FPAdd: State.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2); + + double result = FPProcessNaNs(type1, type2, op1, op2, state, out bool done); + + if (!done) + { + bool inf1 = type1 == FpType.Infinity; bool zero1 = type1 == FpType.Zero; + bool inf2 = type2 == FpType.Infinity; bool zero2 = type2 == FpType.Zero; + + if (inf1 && inf2 && sign1 == !sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FpExc.InvalidOp, state); + } + else if ((inf1 && !sign1) || (inf2 && !sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == sign2) + { + result = FPZero(sign1); + } + else + { + result = value1 + value2; + } + } + + return result; + } + + public static double FPDiv(double value1, double value2, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_64.FPDiv: State.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2); + + double result = FPProcessNaNs(type1, type2, op1, op2, state, out bool done); + + if (!done) + { + bool inf1 = type1 == FpType.Infinity; bool zero1 = type1 == FpType.Zero; + bool inf2 = type2 == FpType.Infinity; bool zero2 = type2 == FpType.Zero; + + if ((inf1 && inf2) || (zero1 && zero2)) + { + result = FPDefaultNaN(); + + FPProcessException(FpExc.InvalidOp, state); + } + else if (inf1 || zero2) + { + result = FPInfinity(sign1 ^ sign2); + + if (!inf1) FPProcessException(FpExc.DivideByZero, state); + } + else if (zero1 || inf2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 / value2; + } + } + + return result; + } + + public static double FPMax(double value1, double value2, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_64.FPMax: State.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2); + + double result = FPProcessNaNs(type1, type2, op1, op2, state, out bool done); + + if (!done) + { + if (value1 > value2) + { + if (type1 == FpType.Infinity) + { + result = FPInfinity(sign1); + } + else if (type1 == FpType.Zero) + { + result = FPZero(sign1 && sign2); + } + else + { + result = value1; + } + } + else + { + if (type2 == FpType.Infinity) + { + result = FPInfinity(sign2); + } + else if (type2 == FpType.Zero) + { + result = FPZero(sign1 && sign2); + } + else + { + result = value2; + } + } + } + + return result; + } + + public static double FPMaxNum(double value1, double value2, CpuThreadState state) + { + Debug.WriteIf(state.Fpcr != 0, "ASoftFloat_64.FPMaxNum: "); + + value1.FPUnpack(out FpType type1, out _, out _); + value2.FPUnpack(out FpType type2, out _, out _); + + if (type1 == FpType.QNaN && type2 != FpType.QNaN) + { + value1 = FPInfinity(true); + } + else if (type1 != FpType.QNaN && type2 == FpType.QNaN) + { + value2 = FPInfinity(true); + } + + return FPMax(value1, value2, state); + } + + public static double FPMin(double value1, double value2, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_64.FPMin: State.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2); + + double result = FPProcessNaNs(type1, type2, op1, op2, state, out bool done); + + if (!done) + { + if (value1 < value2) + { + if (type1 == FpType.Infinity) + { + result = FPInfinity(sign1); + } + else if (type1 == FpType.Zero) + { + result = FPZero(sign1 || sign2); + } + else + { + result = value1; + } + } + else + { + if (type2 == FpType.Infinity) + { + result = FPInfinity(sign2); + } + else if (type2 == FpType.Zero) + { + result = FPZero(sign1 || sign2); + } + else + { + result = value2; + } + } + } + + return result; + } + + public static double FPMinNum(double value1, double value2, CpuThreadState state) + { + Debug.WriteIf(state.Fpcr != 0, "ASoftFloat_64.FPMinNum: "); + + value1.FPUnpack(out FpType type1, out _, out _); + value2.FPUnpack(out FpType type2, out _, out _); + + if (type1 == FpType.QNaN && type2 != FpType.QNaN) + { + value1 = FPInfinity(false); + } + else if (type1 != FpType.QNaN && type2 == FpType.QNaN) + { + value2 = FPInfinity(false); + } + + return FPMin(value1, value2, state); + } + + public static double FPMul(double value1, double value2, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_64.FPMul: State.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2); + + double result = FPProcessNaNs(type1, type2, op1, op2, state, out bool done); + + if (!done) + { + bool inf1 = type1 == FpType.Infinity; bool zero1 = type1 == FpType.Zero; + bool inf2 = type2 == FpType.Infinity; bool zero2 = type2 == FpType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPDefaultNaN(); + + FPProcessException(FpExc.InvalidOp, state); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else if (zero1 || zero2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 * value2; + } + } + + return result; + } + + public static double FPMulAdd(double valueA, double value1, double value2, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_64.FPMulAdd: State.Fpcr = 0x{state.Fpcr:X8}"); + + valueA = valueA.FPUnpack(out FpType typeA, out bool signA, out ulong addend); + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2); + + bool inf1 = type1 == FpType.Infinity; bool zero1 = type1 == FpType.Zero; + bool inf2 = type2 == FpType.Infinity; bool zero2 = type2 == FpType.Zero; + + double result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, state, out bool done); + + if (typeA == FpType.QNaN && ((inf1 && zero2) || (zero1 && inf2))) + { + result = FPDefaultNaN(); + + FPProcessException(FpExc.InvalidOp, state); + } + + if (!done) + { + bool infA = typeA == FpType.Infinity; bool zeroA = typeA == FpType.Zero; + + bool signP = sign1 ^ sign2; + bool infP = inf1 || inf2; + bool zeroP = zero1 || zero2; + + if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP)) + { + result = FPDefaultNaN(); + + FPProcessException(FpExc.InvalidOp, state); + } + else if ((infA && !signA) || (infP && !signP)) + { + result = FPInfinity(false); + } + else if ((infA && signA) || (infP && signP)) + { + result = FPInfinity(true); + } + else if (zeroA && zeroP && signA == signP) + { + result = FPZero(signA); + } + else + { + // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = valueA + (value1 * value2); + } + } + + return result; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static double FPMulSub(double valueA, double value1, double value2, CpuThreadState state) + { + Debug.WriteIf(state.Fpcr != 0, "ASoftFloat_64.FPMulSub: "); + + value1 = value1.FPNeg(); + + return FPMulAdd(valueA, value1, value2, state); + } + + public static double FPMulX(double value1, double value2, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_64.FPMulX: State.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2); + + double result = FPProcessNaNs(type1, type2, op1, op2, state, out bool done); + + if (!done) + { + bool inf1 = type1 == FpType.Infinity; bool zero1 = type1 == FpType.Zero; + bool inf2 = type2 == FpType.Infinity; bool zero2 = type2 == FpType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPTwo(sign1 ^ sign2); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else if (zero1 || zero2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 * value2; + } + } + + return result; + } + + public static double FPRecipStepFused(double value1, double value2, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_64.FPRecipStepFused: State.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPNeg(); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2); + + double result = FPProcessNaNs(type1, type2, op1, op2, state, out bool done); + + if (!done) + { + bool inf1 = type1 == FpType.Infinity; bool zero1 = type1 == FpType.Zero; + bool inf2 = type2 == FpType.Infinity; bool zero2 = type2 == FpType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPTwo(false); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else + { + // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = 2d + (value1 * value2); + } + } + + return result; + } + + public static double FPRecpX(double value, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_64.FPRecpX: State.Fpcr = 0x{state.Fpcr:X8}"); + + value.FPUnpack(out FpType type, out bool sign, out ulong op); + + double result; + + if (type == FpType.SNaN || type == FpType.QNaN) + { + result = FPProcessNaN(type, op, state); + } + else + { + ulong notExp = (~op >> 52) & 0x7FFul; + ulong maxExp = 0x7FEul; + + result = BitConverter.Int64BitsToDouble( + (long)((sign ? 1ul : 0ul) << 63 | (notExp == 0x7FFul ? maxExp : notExp) << 52)); + } + + return result; + } + + public static double FprSqrtStepFused(double value1, double value2, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_64.FPRSqrtStepFused: State.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPNeg(); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2); + + double result = FPProcessNaNs(type1, type2, op1, op2, state, out bool done); + + if (!done) + { + bool inf1 = type1 == FpType.Infinity; bool zero1 = type1 == FpType.Zero; + bool inf2 = type2 == FpType.Infinity; bool zero2 = type2 == FpType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPOnePointFive(false); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else + { + // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = (3d + (value1 * value2)) / 2d; + } + } + + return result; + } + + public static double FPSqrt(double value, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_64.FPSqrt: State.Fpcr = 0x{state.Fpcr:X8}"); + + value = value.FPUnpack(out FpType type, out bool sign, out ulong op); + + double result; + + if (type == FpType.SNaN || type == FpType.QNaN) + { + result = FPProcessNaN(type, op, state); + } + else if (type == FpType.Zero) + { + result = FPZero(sign); + } + else if (type == FpType.Infinity && !sign) + { + result = FPInfinity(sign); + } + else if (sign) + { + result = FPDefaultNaN(); + + FPProcessException(FpExc.InvalidOp, state); + } + else + { + result = Math.Sqrt(value); + } + + return result; + } + + public static double FPSub(double value1, double value2, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"ASoftFloat_64.FPSub: State.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2); + + double result = FPProcessNaNs(type1, type2, op1, op2, state, out bool done); + + if (!done) + { + bool inf1 = type1 == FpType.Infinity; bool zero1 = type1 == FpType.Zero; + bool inf2 = type2 == FpType.Infinity; bool zero2 = type2 == FpType.Zero; + + if (inf1 && inf2 && sign1 == sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FpExc.InvalidOp, state); + } + else if ((inf1 && !sign1) || (inf2 && sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && !sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == !sign2) + { + result = FPZero(sign1); + } + else + { + result = value1 - value2; + } + } + + return result; + } + + private static double FPDefaultNaN() + { + return -double.NaN; + } + + private static double FPInfinity(bool sign) + { + return sign ? double.NegativeInfinity : double.PositiveInfinity; + } + + private static double FPZero(bool sign) + { + return sign ? -0d : +0d; + } + + private static double FPTwo(bool sign) + { + return sign ? -2d : +2d; + } + + private static double FPOnePointFive(bool sign) + { + return sign ? -1.5d : +1.5d; + } + + private static double FPNeg(this double value) + { + return -value; + } + + private static double FPUnpack(this double value, out FpType type, out bool sign, out ulong valueBits) + { + valueBits = (ulong)BitConverter.DoubleToInt64Bits(value); + + sign = (~valueBits & 0x8000000000000000ul) == 0ul; + + if ((valueBits & 0x7FF0000000000000ul) == 0ul) + { + if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul) + { + type = FpType.Zero; + } + else + { + type = FpType.Nonzero; + } + } + else if ((~valueBits & 0x7FF0000000000000ul) == 0ul) + { + if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul) + { + type = FpType.Infinity; + } + else + { + type = (~valueBits & 0x0008000000000000ul) == 0ul + ? FpType.QNaN + : FpType.SNaN; + + return FPZero(sign); + } + } + else + { + type = FpType.Nonzero; + } + + return value; + } + + private static double FPProcessNaNs( + FpType type1, + FpType type2, + ulong op1, + ulong op2, + CpuThreadState state, + out bool done) + { + done = true; + + if (type1 == FpType.SNaN) + { + return FPProcessNaN(type1, op1, state); + } + else if (type2 == FpType.SNaN) + { + return FPProcessNaN(type2, op2, state); + } + else if (type1 == FpType.QNaN) + { + return FPProcessNaN(type1, op1, state); + } + else if (type2 == FpType.QNaN) + { + return FPProcessNaN(type2, op2, state); + } + + done = false; + + return FPZero(false); + } + + private static double FPProcessNaNs3( + FpType type1, + FpType type2, + FpType type3, + ulong op1, + ulong op2, + ulong op3, + CpuThreadState state, + out bool done) + { + done = true; + + if (type1 == FpType.SNaN) + { + return FPProcessNaN(type1, op1, state); + } + else if (type2 == FpType.SNaN) + { + return FPProcessNaN(type2, op2, state); + } + else if (type3 == FpType.SNaN) + { + return FPProcessNaN(type3, op3, state); + } + else if (type1 == FpType.QNaN) + { + return FPProcessNaN(type1, op1, state); + } + else if (type2 == FpType.QNaN) + { + return FPProcessNaN(type2, op2, state); + } + else if (type3 == FpType.QNaN) + { + return FPProcessNaN(type3, op3, state); + } + + done = false; + + return FPZero(false); + } + + private static double FPProcessNaN(FpType type, ulong op, CpuThreadState state) + { + if (type == FpType.SNaN) + { + op |= 1ul << 51; + + FPProcessException(FpExc.InvalidOp, state); + } + + if (state.GetFpcrFlag(Fpcr.Dn)) + { + return FPDefaultNaN(); + } + + return BitConverter.Int64BitsToDouble((long)op); + } + + private static void FPProcessException(FpExc exc, CpuThreadState state) + { + int enable = (int)exc + 8; + + if ((state.Fpcr & (1 << enable)) != 0) + { + throw new NotImplementedException("floating-point trap handling"); + } + else + { + state.Fpsr |= 1 << (int)exc; + } + } + } +} diff --git a/ChocolArm64/Instructions/VectorHelper.cs b/ChocolArm64/Instructions/VectorHelper.cs new file mode 100644 index 00000000..8ef15818 --- /dev/null +++ b/ChocolArm64/Instructions/VectorHelper.cs @@ -0,0 +1,790 @@ +using ChocolArm64.State; +using ChocolArm64.Translation; +using System; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace ChocolArm64.Instructions +{ + static class VectorHelper + { + private static readonly Vector128<float> Zero32128Mask; + + static VectorHelper() + { + if (!Sse2.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + Zero32128Mask = Sse.StaticCast<uint, float>(Sse2.SetVector128(0, 0, 0, 0xffffffff)); + } + + public static void EmitCall(ILEmitterCtx context, string name64, string name128) + { + bool isSimd64 = context.CurrOp.RegisterSize == RegisterSize.Simd64; + + context.EmitCall(typeof(VectorHelper), isSimd64 ? name64 : name128); + } + + public static void EmitCall(ILEmitterCtx context, string mthdName) + { + context.EmitCall(typeof(VectorHelper), mthdName); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int SatF32ToS32(float value) + { + if (float.IsNaN(value)) return 0; + + return value > int.MaxValue ? int.MaxValue : + value < int.MinValue ? int.MinValue : (int)value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static long SatF32ToS64(float value) + { + if (float.IsNaN(value)) return 0; + + return value > long.MaxValue ? long.MaxValue : + value < long.MinValue ? long.MinValue : (long)value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static uint SatF32ToU32(float value) + { + if (float.IsNaN(value)) return 0; + + return value > uint.MaxValue ? uint.MaxValue : + value < uint.MinValue ? uint.MinValue : (uint)value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static ulong SatF32ToU64(float value) + { + if (float.IsNaN(value)) return 0; + + return value > ulong.MaxValue ? ulong.MaxValue : + value < ulong.MinValue ? ulong.MinValue : (ulong)value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int SatF64ToS32(double value) + { + if (double.IsNaN(value)) return 0; + + return value > int.MaxValue ? int.MaxValue : + value < int.MinValue ? int.MinValue : (int)value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static long SatF64ToS64(double value) + { + if (double.IsNaN(value)) return 0; + + return value > long.MaxValue ? long.MaxValue : + value < long.MinValue ? long.MinValue : (long)value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static uint SatF64ToU32(double value) + { + if (double.IsNaN(value)) return 0; + + return value > uint.MaxValue ? uint.MaxValue : + value < uint.MinValue ? uint.MinValue : (uint)value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static ulong SatF64ToU64(double value) + { + if (double.IsNaN(value)) return 0; + + return value > ulong.MaxValue ? ulong.MaxValue : + value < ulong.MinValue ? ulong.MinValue : (ulong)value; + } + + public static double Round(double value, CpuThreadState state) + { + switch (state.FPRoundingMode()) + { + case RoundMode.ToNearest: return Math.Round (value); + case RoundMode.TowardsPlusInfinity: return Math.Ceiling (value); + case RoundMode.TowardsMinusInfinity: return Math.Floor (value); + case RoundMode.TowardsZero: return Math.Truncate(value); + } + + throw new InvalidOperationException(); + } + + public static float RoundF(float value, CpuThreadState state) + { + switch (state.FPRoundingMode()) + { + case RoundMode.ToNearest: return MathF.Round (value); + case RoundMode.TowardsPlusInfinity: return MathF.Ceiling (value); + case RoundMode.TowardsMinusInfinity: return MathF.Floor (value); + case RoundMode.TowardsZero: return MathF.Truncate(value); + } + + throw new InvalidOperationException(); + } + + public static Vector128<float> Tbl1_V64( + Vector128<float> vector, + Vector128<float> tb0) + { + return Tbl(vector, 8, tb0); + } + + public static Vector128<float> Tbl1_V128( + Vector128<float> vector, + Vector128<float> tb0) + { + return Tbl(vector, 16, tb0); + } + + public static Vector128<float> Tbl2_V64( + Vector128<float> vector, + Vector128<float> tb0, + Vector128<float> tb1) + { + return Tbl(vector, 8, tb0, tb1); + } + + public static Vector128<float> Tbl2_V128( + Vector128<float> vector, + Vector128<float> tb0, + Vector128<float> tb1) + { + return Tbl(vector, 16, tb0, tb1); + } + + public static Vector128<float> Tbl3_V64( + Vector128<float> vector, + Vector128<float> tb0, + Vector128<float> tb1, + Vector128<float> tb2) + { + return Tbl(vector, 8, tb0, tb1, tb2); + } + + public static Vector128<float> Tbl3_V128( + Vector128<float> vector, + Vector128<float> tb0, + Vector128<float> tb1, + Vector128<float> tb2) + { + return Tbl(vector, 16, tb0, tb1, tb2); + } + + public static Vector128<float> Tbl4_V64( + Vector128<float> vector, + Vector128<float> tb0, + Vector128<float> tb1, + Vector128<float> tb2, + Vector128<float> tb3) + { + return Tbl(vector, 8, tb0, tb1, tb2, tb3); + } + + public static Vector128<float> Tbl4_V128( + Vector128<float> vector, + Vector128<float> tb0, + Vector128<float> tb1, + Vector128<float> tb2, + Vector128<float> tb3) + { + return Tbl(vector, 16, tb0, tb1, tb2, tb3); + } + + private static Vector128<float> Tbl(Vector128<float> vector, int bytes, params Vector128<float>[] tb) + { + Vector128<float> res = new Vector128<float>(); + + byte[] table = new byte[tb.Length * 16]; + + for (byte index = 0; index < tb.Length; index++) + for (byte index2 = 0; index2 < 16; index2++) + { + table[index * 16 + index2] = (byte)VectorExtractIntZx(tb[index], index2, 0); + } + + for (byte index = 0; index < bytes; index++) + { + byte tblIdx = (byte)VectorExtractIntZx(vector, index, 0); + + if (tblIdx < table.Length) + { + res = VectorInsertInt(table[tblIdx], res, index, 0); + } + } + + return res; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static double VectorExtractDouble(Vector128<float> vector, byte index) + { + if (Sse41.IsSupported) + { + return BitConverter.Int64BitsToDouble(Sse41.Extract(Sse.StaticCast<float, long>(vector), index)); + } + else if (Sse2.IsSupported) + { + return BitConverter.Int64BitsToDouble((long)VectorExtractIntZx(vector, index, 3)); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static long VectorExtractIntSx(Vector128<float> vector, byte index, int size) + { + if (Sse41.IsSupported) + { + if (size == 0) + { + return (sbyte)Sse41.Extract(Sse.StaticCast<float, byte>(vector), index); + } + else if (size == 1) + { + return (short)Sse2.Extract(Sse.StaticCast<float, ushort>(vector), index); + } + else if (size == 2) + { + return Sse41.Extract(Sse.StaticCast<float, int>(vector), index); + } + else if (size == 3) + { + return Sse41.Extract(Sse.StaticCast<float, long>(vector), index); + } + else + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + } + else if (Sse2.IsSupported) + { + if (size == 0) + { + return (sbyte)VectorExtractIntZx(vector, index, size); + } + else if (size == 1) + { + return (short)VectorExtractIntZx(vector, index, size); + } + else if (size == 2) + { + return (int)VectorExtractIntZx(vector, index, size); + } + else if (size == 3) + { + return (long)VectorExtractIntZx(vector, index, size); + } + else + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static ulong VectorExtractIntZx(Vector128<float> vector, byte index, int size) + { + if (Sse41.IsSupported) + { + if (size == 0) + { + return Sse41.Extract(Sse.StaticCast<float, byte>(vector), index); + } + else if (size == 1) + { + return Sse2.Extract(Sse.StaticCast<float, ushort>(vector), index); + } + else if (size == 2) + { + return Sse41.Extract(Sse.StaticCast<float, uint>(vector), index); + } + else if (size == 3) + { + return Sse41.Extract(Sse.StaticCast<float, ulong>(vector), index); + } + else + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + } + else if (Sse2.IsSupported) + { + int shortIdx = size == 0 + ? index >> 1 + : index << (size - 1); + + ushort value = Sse2.Extract(Sse.StaticCast<float, ushort>(vector), (byte)shortIdx); + + if (size == 0) + { + return (byte)(value >> (index & 1) * 8); + } + else if (size == 1) + { + return value; + } + else if (size == 2 || size == 3) + { + ushort value1 = Sse2.Extract(Sse.StaticCast<float, ushort>(vector), (byte)(shortIdx + 1)); + + if (size == 2) + { + return (uint)(value | (value1 << 16)); + } + + ushort value2 = Sse2.Extract(Sse.StaticCast<float, ushort>(vector), (byte)(shortIdx + 2)); + ushort value3 = Sse2.Extract(Sse.StaticCast<float, ushort>(vector), (byte)(shortIdx + 3)); + + return ((ulong)value << 0) | + ((ulong)value1 << 16) | + ((ulong)value2 << 32) | + ((ulong)value3 << 48); + } + else + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static float VectorExtractSingle(Vector128<float> vector, byte index) + { + if (Sse41.IsSupported) + { + return Sse41.Extract(vector, index); + } + else if (Sse2.IsSupported) + { + Vector128<ushort> shortVector = Sse.StaticCast<float, ushort>(vector); + + int low = Sse2.Extract(shortVector, (byte)(index * 2 + 0)); + int high = Sse2.Extract(shortVector, (byte)(index * 2 + 1)); + + return BitConverter.Int32BitsToSingle(low | (high << 16)); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorInsertDouble(double value, Vector128<float> vector, byte index) + { + return VectorInsertInt((ulong)BitConverter.DoubleToInt64Bits(value), vector, index, 3); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorInsertInt(ulong value, Vector128<float> vector, byte index, int size) + { + if (Sse41.IsSupported) + { + if (size == 0) + { + return Sse.StaticCast<byte, float>(Sse41.Insert(Sse.StaticCast<float, byte>(vector), (byte)value, index)); + } + else if (size == 1) + { + return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(vector), (ushort)value, index)); + } + else if (size == 2) + { + return Sse.StaticCast<uint, float>(Sse41.Insert(Sse.StaticCast<float, uint>(vector), (uint)value, index)); + } + else if (size == 3) + { + return Sse.StaticCast<ulong, float>(Sse41.Insert(Sse.StaticCast<float, ulong>(vector), value, index)); + } + else + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + } + else if (Sse2.IsSupported) + { + Vector128<ushort> shortVector = Sse.StaticCast<float, ushort>(vector); + + int shortIdx = size == 0 + ? index >> 1 + : index << (size - 1); + + if (size == 0) + { + ushort shortVal = Sse2.Extract(Sse.StaticCast<float, ushort>(vector), (byte)shortIdx); + + int shift = (index & 1) * 8; + + shortVal &= (ushort)(0xff00 >> shift); + + shortVal |= (ushort)((byte)value << shift); + + return Sse.StaticCast<ushort, float>(Sse2.Insert(shortVector, shortVal, (byte)shortIdx)); + } + else if (size == 1) + { + return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(vector), (ushort)value, index)); + } + else if (size == 2 || size == 3) + { + shortVector = Sse2.Insert(shortVector, (ushort)(value >> 0), (byte)(shortIdx + 0)); + shortVector = Sse2.Insert(shortVector, (ushort)(value >> 16), (byte)(shortIdx + 1)); + + if (size == 3) + { + shortVector = Sse2.Insert(shortVector, (ushort)(value >> 32), (byte)(shortIdx + 2)); + shortVector = Sse2.Insert(shortVector, (ushort)(value >> 48), (byte)(shortIdx + 3)); + } + + return Sse.StaticCast<ushort, float>(shortVector); + } + else + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorInsertSingle(float value, Vector128<float> vector, byte index) + { + if (Sse41.IsSupported) + { + //Note: The if/else if is necessary to enable the JIT to + //produce a single INSERTPS instruction instead of the + //jump table fallback. + if (index == 0) + { + return Sse41.Insert(vector, value, 0x00); + } + else if (index == 1) + { + return Sse41.Insert(vector, value, 0x10); + } + else if (index == 2) + { + return Sse41.Insert(vector, value, 0x20); + } + else if (index == 3) + { + return Sse41.Insert(vector, value, 0x30); + } + else + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + } + else if (Sse2.IsSupported) + { + int intValue = BitConverter.SingleToInt32Bits(value); + + ushort low = (ushort)(intValue >> 0); + ushort high = (ushort)(intValue >> 16); + + Vector128<ushort> shortVector = Sse.StaticCast<float, ushort>(vector); + + shortVector = Sse2.Insert(shortVector, low, (byte)(index * 2 + 0)); + shortVector = Sse2.Insert(shortVector, high, (byte)(index * 2 + 1)); + + return Sse.StaticCast<ushort, float>(shortVector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> Sse41VectorInsertScalarSingle(float value, Vector128<float> vector) + { + //Note: 0b1110 is the mask to zero the upper bits. + return Sse41.Insert(vector, value, 0b1110); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<sbyte> VectorSByteZero() + { + if (Sse2.IsSupported) + { + return Sse2.SetZeroVector128<sbyte>(); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<short> VectorInt16Zero() + { + if (Sse2.IsSupported) + { + return Sse2.SetZeroVector128<short>(); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<int> VectorInt32Zero() + { + if (Sse2.IsSupported) + { + return Sse2.SetZeroVector128<int>(); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<long> VectorInt64Zero() + { + if (Sse2.IsSupported) + { + return Sse2.SetZeroVector128<long>(); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorSingleZero() + { + if (Sse.IsSupported) + { + return Sse.SetZeroVector128(); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<double> VectorDoubleZero() + { + if (Sse2.IsSupported) + { + return Sse2.SetZeroVector128<double>(); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorZero32_128(Vector128<float> vector) + { + if (Sse.IsSupported) + { + return Sse.And(vector, Zero32128Mask); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<sbyte> VectorSingleToSByte(Vector128<float> vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<float, sbyte>(vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<short> VectorSingleToInt16(Vector128<float> vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<float, short>(vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<int> VectorSingleToInt32(Vector128<float> vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<float, int>(vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<long> VectorSingleToInt64(Vector128<float> vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<float, long>(vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<byte> VectorSingleToByte(Vector128<float> vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<float, byte>(vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<ushort> VectorSingleToUInt16(Vector128<float> vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<float, ushort>(vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<uint> VectorSingleToUInt32(Vector128<float> vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<float, uint>(vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<ulong> VectorSingleToUInt64(Vector128<float> vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<float, ulong>(vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<double> VectorSingleToDouble(Vector128<float> vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<float, double>(vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorSByteToSingle(Vector128<sbyte> vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<sbyte, float>(vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorInt16ToSingle(Vector128<short> vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<short, float>(vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorInt32ToSingle(Vector128<int> vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<int, float>(vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorInt64ToSingle(Vector128<long> vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<long, float>(vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorByteToSingle(Vector128<byte> vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<byte, float>(vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorUInt16ToSingle(Vector128<ushort> vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<ushort, float>(vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorUInt32ToSingle(Vector128<uint> vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<uint, float>(vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorUInt64ToSingle(Vector128<ulong> vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<ulong, float>(vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorDoubleToSingle(Vector128<double> vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<double, float>(vector); + } + + throw new PlatformNotSupportedException(); + } + } +} |
