1 2 //===- subzero/unittest/unittest/AssemblerX8664/TestUtil.h ------*- C++ -*-===// 3 // 4 // The Subzero Code Generator 5 // 6 // This file is distributed under the University of Illinois Open Source 7 // License. See LICENSE.TXT for details. 8 // 9 //===----------------------------------------------------------------------===// 10 // 11 // Utility classes for testing the X8664 Assembler. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef ASSEMBLERX8664_TESTUTIL_H_ 16 #define ASSEMBLERX8664_TESTUTIL_H_ 17 18 #include "IceAssemblerX8664.h" 19 20 #include "gtest/gtest.h" 21 22 #if defined(__unix__) 23 #include <sys/mman.h> 24 #elif defined(_WIN32) 25 #define NOMINMAX 26 #include <Windows.h> 27 #else 28 #error "Platform unsupported" 29 #endif 30 31 #include <cassert> 32 33 namespace Ice { 34 namespace X8664 { 35 namespace Test { 36 37 class AssemblerX8664TestBase : public ::testing::Test { 38 protected: 39 using Address = AssemblerX8664::Traits::Address; 40 using Cond = AssemblerX8664::Traits::Cond; 41 using GPRRegister = AssemblerX8664::Traits::GPRRegister; 42 using ByteRegister = AssemblerX8664::Traits::ByteRegister; 43 using Traits = AssemblerX8664::Traits; 44 using XmmRegister = AssemblerX8664::Traits::XmmRegister; 45 46 // The following are "nicknames" for all possible GPRs in x86-64. With those, we 47 // can use, e.g., 48 // 49 // Encoded_GPR_al() 50 // 51 // instead of GPRRegister::Encoded_Reg_eax for 8 bit operands. They also 52 // introduce "regular" nicknames for legacy x86-32 register (e.g., eax becomes 53 // r1; esp, r0). 54 #define LegacyRegAliases(NewName, Name64, Name32, Name16, Name8) \ 55 static constexpr GPRRegister Encoded_GPR_##NewName() { \ 56 return GPRRegister::Encoded_Reg_##Name32; \ 57 } \ 58 static constexpr GPRRegister Encoded_GPR_##NewName##q() { \ 59 return GPRRegister::Encoded_Reg_##Name32; \ 60 } \ 61 static constexpr GPRRegister Encoded_GPR_##NewName##d() { \ 62 return GPRRegister::Encoded_Reg_##Name32; \ 63 } \ 64 static constexpr GPRRegister Encoded_GPR_##NewName##w() { \ 65 return GPRRegister::Encoded_Reg_##Name32; \ 66 } \ 67 static constexpr GPRRegister Encoded_GPR_##NewName##l() { \ 68 return GPRRegister::Encoded_Reg_##Name32; \ 69 } \ 70 static constexpr ByteRegister Encoded_Bytereg_##NewName() { \ 71 return ByteRegister::Encoded_8_Reg_##Name8; \ 72 } \ 73 static constexpr GPRRegister Encoded_GPR_##Name64() { \ 74 return GPRRegister::Encoded_Reg_##Name32; \ 75 } \ 76 static constexpr GPRRegister Encoded_GPR_##Name32() { \ 77 return GPRRegister::Encoded_Reg_##Name32; \ 78 } \ 79 static constexpr GPRRegister Encoded_GPR_##Name16() { \ 80 return GPRRegister::Encoded_Reg_##Name32; \ 81 } \ 82 static constexpr GPRRegister Encoded_GPR_##Name8() { \ 83 return GPRRegister::Encoded_Reg_##Name32; \ 84 } 85 #define NewRegAliases(Name) \ 86 static constexpr GPRRegister Encoded_GPR_##Name() { \ 87 return GPRRegister::Encoded_Reg_##Name##d; \ 88 } \ 89 static constexpr GPRRegister Encoded_GPR_##Name##q() { \ 90 return GPRRegister::Encoded_Reg_##Name##d; \ 91 } \ 92 static constexpr GPRRegister Encoded_GPR_##Name##d() { \ 93 return GPRRegister::Encoded_Reg_##Name##d; \ 94 } \ 95 static constexpr GPRRegister Encoded_GPR_##Name##w() { \ 96 return GPRRegister::Encoded_Reg_##Name##d; \ 97 } \ 98 static constexpr GPRRegister Encoded_GPR_##Name##l() { \ 99 return GPRRegister::Encoded_Reg_##Name##d; \ 100 } \ 101 static constexpr ByteRegister Encoded_Bytereg_##Name() { \ 102 return ByteRegister::Encoded_8_Reg_##Name##l; \ 103 } 104 #define XmmRegAliases(Name) \ 105 static constexpr XmmRegister Encoded_Xmm_##Name() { \ 106 return XmmRegister::Encoded_Reg_##Name; \ 107 } 108 LegacyRegAliases(r0, rsp, esp, sp, spl); 109 LegacyRegAliases(r1, rax, eax, ax, al); 110 LegacyRegAliases(r2, rbx, ebx, bx, bl); 111 LegacyRegAliases(r3, rcx, ecx, cx, cl); 112 LegacyRegAliases(r4, rdx, edx, dx, dl); 113 LegacyRegAliases(r5, rbp, ebp, bp, bpl); 114 LegacyRegAliases(r6, rsi, esi, si, sil); 115 LegacyRegAliases(r7, rdi, edi, di, dil); 116 NewRegAliases(r8); 117 NewRegAliases(r9); 118 NewRegAliases(r10); 119 NewRegAliases(r11); 120 NewRegAliases(r12); 121 NewRegAliases(r13); 122 NewRegAliases(r14); 123 NewRegAliases(r15); 124 XmmRegAliases(xmm0); 125 XmmRegAliases(xmm1); 126 XmmRegAliases(xmm2); 127 XmmRegAliases(xmm3); 128 XmmRegAliases(xmm4); 129 XmmRegAliases(xmm5); 130 XmmRegAliases(xmm6); 131 XmmRegAliases(xmm7); 132 XmmRegAliases(xmm8); 133 XmmRegAliases(xmm9); 134 XmmRegAliases(xmm10); 135 XmmRegAliases(xmm11); 136 XmmRegAliases(xmm12); 137 XmmRegAliases(xmm13); 138 XmmRegAliases(xmm14); 139 XmmRegAliases(xmm15); 140 #undef XmmRegAliases 141 #undef NewRegAliases 142 #undef LegacyRegAliases 143 AssemblerX8664TestBase()144 AssemblerX8664TestBase() { reset(); } 145 reset()146 void reset() { Assembler = makeUnique<AssemblerX8664>(); } 147 assembler()148 AssemblerX8664 *assembler() const { return Assembler.get(); } 149 codeBytesSize()150 size_t codeBytesSize() const { return Assembler->getBufferView().size(); } 151 codeBytes()152 const uint8_t *codeBytes() const { 153 return static_cast<const uint8_t *>( 154 static_cast<const void *>(Assembler->getBufferView().data())); 155 } 156 157 private: 158 std::unique_ptr<AssemblerX8664> Assembler; 159 }; 160 161 // __ is a helper macro. It allows test cases to emit X8664 assembly 162 // instructions with 163 // 164 // __ mov(GPRRegister::Reg_Eax, 1); 165 // __ ret(); 166 // 167 // and so on. The idea of having this was "stolen" from dart's unit tests. 168 #define __ (this->assembler())-> 169 170 // AssemblerX8664LowLevelTest verify that the "basic" instructions the tests 171 // rely on are encoded correctly. Therefore, instead of executing the assembled 172 // code, these tests will verify that the assembled bytes are sane. 173 class AssemblerX8664LowLevelTest : public AssemblerX8664TestBase { 174 protected: 175 // verifyBytes is a template helper that takes a Buffer, and a variable number 176 // of bytes. As the name indicates, it is used to verify the bytes for an 177 // instruction encoding. verifyBytes(const uint8_t *)178 template <int N, int I> static bool verifyBytes(const uint8_t *) { 179 static_assert(I == N, "Invalid template instantiation."); 180 return true; 181 } 182 183 template <int N, int I = 0, typename... Args> verifyBytes(const uint8_t * Buffer,uint8_t Byte,Args...OtherBytes)184 static bool verifyBytes(const uint8_t *Buffer, uint8_t Byte, 185 Args... OtherBytes) { 186 static_assert(I < N, "Invalid template instantiation."); 187 EXPECT_EQ(Byte, Buffer[I]) << "Byte " << (I + 1) << " of " << N; 188 return verifyBytes<N, I + 1>(Buffer, OtherBytes...) && Buffer[I] == Byte; 189 } 190 }; 191 192 // After these tests we should have a sane environment; we know the following 193 // work: 194 // 195 // (*) zeroing eax, ebx, ecx, edx, edi, and esi; 196 // (*) call $4 instruction (used for ip materialization); 197 // (*) register push and pop; 198 // (*) cmp reg, reg; and 199 // (*) returning from functions. 200 // 201 // We can now dive into testing each emitting method in AssemblerX8664. Each 202 // test will emit some instructions for performing the test. The assembled 203 // instructions will operate in a "safe" environment. All x86-64 registers are 204 // spilled to the program stack, and the registers are then zeroed out, with the 205 // exception of %esp and %r9. 206 // 207 // The jitted code and the unittest code will share the same stack. Therefore, 208 // test harnesses need to ensure it does not leave anything it pushed on the 209 // stack. 210 // 211 // %r9 is initialized with a pointer for rIP-based addressing. This pointer is 212 // used for position-independent access to a scratchpad area for use in tests. 213 // In theory we could use rip-based addressing, but in practice that would 214 // require creating fixups, which would, in turn, require creating a global 215 // context. We therefore rely on the same technique used for pic code in x86-32 216 // (i.e., IP materialization). Upon a test start up, a call(NextInstruction) is 217 // executed. We then pop the return address from the stack, and use it for pic 218 // addressing. 219 // 220 // The jitted code will look like the following: 221 // 222 // test: 223 // push %r9 224 // call test$materialize_ip 225 // test$materialize_ip: <<------- %r9 will point here 226 // pop %r9 227 // push %rax 228 // push %rbx 229 // push %rcx 230 // push %rdx 231 // push %rbp 232 // push %rdi 233 // push %rsi 234 // push %r8 235 // push %r10 236 // push %r11 237 // push %r12 238 // push %r13 239 // push %r14 240 // push %r15 241 // mov $0, %rax 242 // mov $0, %rbx 243 // mov $0, %rcx 244 // mov $0, %rdx 245 // mov $0, %rbp 246 // mov $0, %rdi 247 // mov $0, %rsi 248 // mov $0, %r8 249 // mov $0, %r10 250 // mov $0, %r11 251 // mov $0, %r12 252 // mov $0, %r13 253 // mov $0, %r14 254 // mov $0, %r15 255 // 256 // << test code goes here >> 257 // 258 // mov %rax, { 0 + $ScratchpadOffset}(%rbp) 259 // mov %rbx, { 8 + $ScratchpadOffset}(%rbp) 260 // mov %rcx, { 16 + $ScratchpadOffset}(%rbp) 261 // mov %rdx, { 24 + $ScratchpadOffset}(%rbp) 262 // mov %rdi, { 32 + $ScratchpadOffset}(%rbp) 263 // mov %rsi, { 40 + $ScratchpadOffset}(%rbp) 264 // mov %rbp, { 48 + $ScratchpadOffset}(%rbp) 265 // mov %rsp, { 56 + $ScratchpadOffset}(%rbp) 266 // mov %r8, { 64 + $ScratchpadOffset}(%rbp) 267 // mov %r9, { 72 + $ScratchpadOffset}(%rbp) 268 // mov %r10, { 80 + $ScratchpadOffset}(%rbp) 269 // mov %r11, { 88 + $ScratchpadOffset}(%rbp) 270 // mov %r12, { 96 + $ScratchpadOffset}(%rbp) 271 // mov %r13, {104 + $ScratchpadOffset}(%rbp) 272 // mov %r14, {112 + $ScratchpadOffset}(%rbp) 273 // mov %r15, {120 + $ScratchpadOffset}(%rbp) 274 // movups %xmm0, {128 + $ScratchpadOffset}(%rbp) 275 // movups %xmm1, {136 + $ScratchpadOffset}(%rbp) 276 // movups %xmm2, {144 + $ScratchpadOffset}(%rbp) 277 // movups %xmm3, {152 + $ScratchpadOffset}(%rbp) 278 // movups %xmm4, {160 + $ScratchpadOffset}(%rbp) 279 // movups %xmm5, {168 + $ScratchpadOffset}(%rbp) 280 // movups %xmm6, {176 + $ScratchpadOffset}(%rbp) 281 // movups %xmm7, {184 + $ScratchpadOffset}(%rbp) 282 // movups %xmm8, {192 + $ScratchpadOffset}(%rbp) 283 // movups %xmm9, {200 + $ScratchpadOffset}(%rbp) 284 // movups %xmm10, {208 + $ScratchpadOffset}(%rbp) 285 // movups %xmm11, {216 + $ScratchpadOffset}(%rbp) 286 // movups %xmm12, {224 + $ScratchpadOffset}(%rbp) 287 // movups %xmm13, {232 + $ScratchpadOffset}(%rbp) 288 // movups %xmm14, {240 + $ScratchpadOffset}(%rbp) 289 // movups %xmm15, {248 + $ScratchpadOffset}(%rbp) 290 // 291 // pop %r15 292 // pop %r14 293 // pop %r13 294 // pop %r12 295 // pop %r11 296 // pop %r10 297 // pop %r8 298 // pop %rsi 299 // pop %rdi 300 // pop %rbp 301 // pop %rdx 302 // pop %rcx 303 // pop %rbx 304 // pop %rax 305 // pop %r9 306 // ret 307 // 308 // << ... >> 309 // 310 // scratchpad: <<------- accessed via $Offset(%ebp) 311 // 312 // << test scratch area >> 313 // 314 // TODO(jpp): test the 315 // 316 // mov %reg, $Offset(%ebp) 317 // movups %xmm, $Offset(%ebp) 318 // 319 // encodings using the low level assembler test ensuring that the register 320 // values can be written to the scratchpad area. 321 // 322 // r9 was deliberately choosen so that every instruction accessing memory would 323 // fail if the rex prefix was not emitted for it. 324 class AssemblerX8664Test : public AssemblerX8664TestBase { 325 protected: 326 // Dqword is used to represent 128-bit data types. The Dqword's contents are 327 // the same as the contents read from memory. Tests can then use the union 328 // members to verify the tests' outputs. 329 // 330 // NOTE: We want sizeof(Dqword) == sizeof(uint64_t) * 2. In other words, we 331 // want Dqword's contents to be **exactly** what the memory contents were so 332 // that we can do, e.g., 333 // 334 // ... 335 // float Ret[4]; 336 // // populate Ret 337 // return *reinterpret_cast<Dqword *>(&Ret); 338 // 339 // While being an ugly hack, this kind of return statements are used 340 // extensively in the PackedArith (see below) class. 341 union Dqword { 342 template <typename T0, typename T1, typename T2, typename T3, 343 typename = typename std::enable_if< 344 std::is_floating_point<T0>::value>::type> Dqword(T0 F0,T1 F1,T2 F2,T3 F3)345 Dqword(T0 F0, T1 F1, T2 F2, T3 F3) { 346 F32[0] = F0; 347 F32[1] = F1; 348 F32[2] = F2; 349 F32[3] = F3; 350 } 351 352 template <typename T> Dqword(typename std::enable_if<std::is_same<T,int32_t>::value,T>::type I0,T I1,T I2,T I3)353 Dqword(typename std::enable_if<std::is_same<T, int32_t>::value, T>::type I0, 354 T I1, T I2, T I3) { 355 I32[0] = I0; 356 I32[1] = I1; 357 I32[2] = I2; 358 I32[3] = I3; 359 } 360 361 template <typename T> Dqword(typename std::enable_if<std::is_same<T,uint64_t>::value,T>::type U64_0,T U64_1)362 Dqword(typename std::enable_if<std::is_same<T, uint64_t>::value, T>::type 363 U64_0, 364 T U64_1) { 365 U64[0] = U64_0; 366 U64[1] = U64_1; 367 } 368 369 template <typename T> Dqword(typename std::enable_if<std::is_same<T,double>::value,T>::type D0,T D1)370 Dqword(typename std::enable_if<std::is_same<T, double>::value, T>::type D0, 371 T D1) { 372 F64[0] = D0; 373 F64[1] = D1; 374 } 375 376 bool operator==(const Dqword &Rhs) const { 377 return std::memcmp(this, &Rhs, sizeof(*this)) == 0; 378 } 379 380 double F64[2]; 381 uint64_t U64[2]; 382 int64_t I64[2]; 383 384 float F32[4]; 385 uint32_t U32[4]; 386 int32_t I32[4]; 387 388 uint16_t U16[8]; 389 int16_t I16[8]; 390 391 uint8_t U8[16]; 392 int8_t I8[16]; 393 394 private: 395 Dqword() = delete; 396 }; 397 398 // As stated, we want this condition to hold, so we assert. 399 static_assert(sizeof(Dqword) == 2 * sizeof(uint64_t), 400 "Dqword has the wrong size."); 401 402 // PackedArith is an interface provider for Dqwords. PackedArith's C argument 403 // is the undelying Dqword's type, which is then used so that we can define 404 // operators in terms of C++ operators on the underlying elements' type. 405 template <typename C> class PackedArith { 406 public: 407 static constexpr uint32_t N = sizeof(Dqword) / sizeof(C); 408 static_assert(N * sizeof(C) == sizeof(Dqword), 409 "Invalid template paramenter."); 410 static_assert((N & 1) == 0, "N should be divisible by 2"); 411 412 #define DefinePackedComparisonOperator(Op) \ 413 template <typename Container = C, int Size = N> \ 414 typename std::enable_if<std::is_floating_point<Container>::value, \ 415 Dqword>::type \ 416 operator Op(const Dqword &Rhs) const { \ 417 using ElemType = \ 418 typename std::conditional<std::is_same<float, Container>::value, \ 419 int32_t, int64_t>::type; \ 420 static_assert(sizeof(ElemType) == sizeof(Container), \ 421 "Check ElemType definition."); \ 422 const ElemType *const RhsPtr = \ 423 reinterpret_cast<const ElemType *const>(&Rhs); \ 424 const ElemType *const LhsPtr = \ 425 reinterpret_cast<const ElemType *const>(&Lhs); \ 426 ElemType Ret[N]; \ 427 for (uint32_t i = 0; i < N; ++i) { \ 428 Ret[i] = (LhsPtr[i] Op RhsPtr[i]) ? -1 : 0; \ 429 } \ 430 return *reinterpret_cast<Dqword *>(&Ret); \ 431 } 432 433 DefinePackedComparisonOperator(< ); 434 DefinePackedComparisonOperator(<= ); 435 DefinePackedComparisonOperator(> ); 436 DefinePackedComparisonOperator(>= ); 437 DefinePackedComparisonOperator(== ); 438 DefinePackedComparisonOperator(!= ); 439 440 #undef DefinePackedComparisonOperator 441 442 #define DefinePackedOrdUnordComparisonOperator(Op, Ordered) \ 443 template <typename Container = C, int Size = N> \ 444 typename std::enable_if<std::is_floating_point<Container>::value, \ 445 Dqword>::type \ 446 Op(const Dqword &Rhs) const { \ 447 using ElemType = \ 448 typename std::conditional<std::is_same<float, Container>::value, \ 449 int32_t, int64_t>::type; \ 450 static_assert(sizeof(ElemType) == sizeof(Container), \ 451 "Check ElemType definition."); \ 452 const Container *const RhsPtr = \ 453 reinterpret_cast<const Container *const>(&Rhs); \ 454 const Container *const LhsPtr = \ 455 reinterpret_cast<const Container *const>(&Lhs); \ 456 ElemType Ret[N]; \ 457 for (uint32_t i = 0; i < N; ++i) { \ 458 Ret[i] = (!(LhsPtr[i] == LhsPtr[i]) || !(RhsPtr[i] == RhsPtr[i])) != \ 459 (Ordered) \ 460 ? -1 \ 461 : 0; \ 462 } \ 463 return *reinterpret_cast<Dqword *>(&Ret); \ 464 } 465 466 DefinePackedOrdUnordComparisonOperator(ord, true); 467 DefinePackedOrdUnordComparisonOperator(unord, false); 468 #undef DefinePackedOrdUnordComparisonOperator 469 470 #define DefinePackedArithOperator(Op, RhsIndexChanges, NeedsInt) \ 471 template <typename Container = C, int Size = N> \ 472 Dqword operator Op(const Dqword &Rhs) const { \ 473 using ElemTypeForFp = typename std::conditional< \ 474 !(NeedsInt), Container, \ 475 typename std::conditional< \ 476 std::is_same<Container, float>::value, uint32_t, \ 477 typename std::conditional<std::is_same<Container, double>::value, \ 478 uint64_t, void>::type>::type>::type; \ 479 using ElemType = \ 480 typename std::conditional<std::is_integral<Container>::value, \ 481 Container, ElemTypeForFp>::type; \ 482 static_assert(!std::is_same<void, ElemType>::value, \ 483 "Check ElemType definition."); \ 484 const ElemType *const RhsPtr = \ 485 reinterpret_cast<const ElemType *const>(&Rhs); \ 486 const ElemType *const LhsPtr = \ 487 reinterpret_cast<const ElemType *const>(&Lhs); \ 488 ElemType Ret[N]; \ 489 for (uint32_t i = 0; i < N; ++i) { \ 490 Ret[i] = LhsPtr[i] Op RhsPtr[(RhsIndexChanges) ? i : 0]; \ 491 } \ 492 return *reinterpret_cast<Dqword *>(&Ret); \ 493 } 494 495 DefinePackedArithOperator(>>, false, true); 496 DefinePackedArithOperator(<<, false, true); 497 DefinePackedArithOperator(+, true, false); 498 DefinePackedArithOperator(-, true, false); 499 DefinePackedArithOperator(/, true, false); 500 DefinePackedArithOperator(&, true, true); 501 DefinePackedArithOperator(|, true, true); 502 DefinePackedArithOperator (^, true, true); 503 504 #undef DefinePackedArithOperator 505 506 #define DefinePackedArithShiftImm(Op) \ 507 template <typename Container = C, int Size = N> \ 508 Dqword operator Op(uint8_t imm) const { \ 509 const Container *const LhsPtr = \ 510 reinterpret_cast<const Container *const>(&Lhs); \ 511 Container Ret[N]; \ 512 for (uint32_t i = 0; i < N; ++i) { \ 513 Ret[i] = LhsPtr[i] Op imm; \ 514 } \ 515 return *reinterpret_cast<Dqword *>(&Ret); \ 516 } 517 518 DefinePackedArithShiftImm(>> ); 519 DefinePackedArithShiftImm(<< ); 520 521 #undef DefinePackedArithShiftImm 522 523 template <typename Container = C, int Size = N> 524 typename std::enable_if<std::is_signed<Container>::value || 525 std::is_floating_point<Container>::value, 526 Dqword>::type 527 operator*(const Dqword &Rhs) const { 528 static_assert((std::is_integral<Container>::value && 529 sizeof(Container) < sizeof(uint64_t)) || 530 std::is_floating_point<Container>::value, 531 "* is only defined for i(8|16|32), and fp types."); 532 533 const Container *const RhsPtr = 534 reinterpret_cast<const Container *const>(&Rhs); 535 const Container *const LhsPtr = 536 reinterpret_cast<const Container *const>(&Lhs); 537 Container Ret[Size]; 538 for (uint32_t i = 0; i < Size; ++i) { 539 Ret[i] = LhsPtr[i] * RhsPtr[i]; 540 } 541 return *reinterpret_cast<Dqword *>(&Ret); 542 } 543 544 template <typename Container = C, int Size = N, 545 typename = typename std::enable_if< 546 !std::is_signed<Container>::value>::type> 547 Dqword operator*(const Dqword &Rhs) const { 548 static_assert(std::is_integral<Container>::value && 549 sizeof(Container) < sizeof(uint64_t), 550 "* is only defined for ui(8|16|32)"); 551 using NextType = typename std::conditional< 552 sizeof(Container) == 1, uint16_t, 553 typename std::conditional<sizeof(Container) == 2, uint32_t, 554 uint64_t>::type>::type; 555 static_assert(sizeof(Container) * 2 == sizeof(NextType), 556 "Unexpected size"); 557 558 const Container *const RhsPtr = 559 reinterpret_cast<const Container *const>(&Rhs); 560 const Container *const LhsPtr = 561 reinterpret_cast<const Container *const>(&Lhs); 562 NextType Ret[Size / 2]; 563 for (uint32_t i = 0; i < Size; i += 2) { 564 Ret[i / 2] = 565 static_cast<NextType>(LhsPtr[i]) * static_cast<NextType>(RhsPtr[i]); 566 } 567 return *reinterpret_cast<Dqword *>(&Ret); 568 } 569 570 template <typename Container = C, int Size = N> 571 PackedArith<Container> operator~() const { 572 const Container *const LhsPtr = 573 reinterpret_cast<const Container *const>(&Lhs); 574 Container Ret[Size]; 575 for (uint32_t i = 0; i < Size; ++i) { 576 Ret[i] = ~LhsPtr[i]; 577 } 578 return PackedArith<Container>(*reinterpret_cast<Dqword *>(&Ret)); 579 } 580 581 #define MinMaxOperations(Name, Suffix) \ 582 template <typename Container = C, int Size = N> \ 583 Dqword Name##Suffix(const Dqword &Rhs) const { \ 584 static_assert(std::is_floating_point<Container>::value, \ 585 #Name #Suffix "ps is only available for fp."); \ 586 const Container *const RhsPtr = \ 587 reinterpret_cast<const Container *const>(&Rhs); \ 588 const Container *const LhsPtr = \ 589 reinterpret_cast<const Container *const>(&Lhs); \ 590 Container Ret[Size]; \ 591 for (uint32_t i = 0; i < Size; ++i) { \ 592 Ret[i] = std::Name(LhsPtr[i], RhsPtr[i]); \ 593 } \ 594 return *reinterpret_cast<Dqword *>(&Ret); \ 595 } 596 597 MinMaxOperations(max, ps); 598 MinMaxOperations(max, pd); 599 MinMaxOperations(min, ps); 600 MinMaxOperations(min, pd); 601 #undef MinMaxOperations 602 603 template <typename Container = C, int Size = N> blendWith(const Dqword & Rhs,const Dqword & Mask)604 Dqword blendWith(const Dqword &Rhs, const Dqword &Mask) const { 605 using MaskType = typename std::conditional< 606 sizeof(Container) == 1, int8_t, 607 typename std::conditional<sizeof(Container) == 2, int16_t, 608 int32_t>::type>::type; 609 static_assert(sizeof(MaskType) == sizeof(Container), 610 "MaskType has the wrong size."); 611 const Container *const RhsPtr = 612 reinterpret_cast<const Container *const>(&Rhs); 613 const Container *const LhsPtr = 614 reinterpret_cast<const Container *const>(&Lhs); 615 const MaskType *const MaskPtr = 616 reinterpret_cast<const MaskType *const>(&Mask); 617 Container Ret[Size]; 618 for (int i = 0; i < Size; ++i) { 619 Ret[i] = ((MaskPtr[i] < 0) ? RhsPtr : LhsPtr)[i]; 620 } 621 return *reinterpret_cast<Dqword *>(&Ret); 622 } 623 624 private: 625 // The AssemblerX8664Test class needs to be a friend so that it can create 626 // PackedArith objects (see below.) 627 friend class AssemblerX8664Test; 628 PackedArith(const Dqword & MyLhs)629 explicit PackedArith(const Dqword &MyLhs) : Lhs(MyLhs) {} 630 631 // Lhs can't be a & because operator~ returns a temporary object that needs 632 // access to its own Dqword. 633 const Dqword Lhs; 634 }; 635 636 // Named constructor for PackedArith objects. packedAs(const Dqword & D)637 template <typename C> static PackedArith<C> packedAs(const Dqword &D) { 638 return PackedArith<C>(D); 639 } 640 AssemblerX8664Test()641 AssemblerX8664Test() { reset(); } 642 reset()643 void reset() { 644 AssemblerX8664TestBase::reset(); 645 646 NeedsEpilogue = true; 647 // These dwords are allocated for saving the GPR state after the jitted code 648 // runs. 649 NumAllocatedDwords = AssembledTest::ScratchpadSlots; 650 addPrologue(); 651 } 652 653 // AssembledTest is a wrapper around a PROT_EXEC mmap'ed buffer. This buffer 654 // contains both the test code as well as prologue/epilogue, and the 655 // scratchpad area that tests may use -- all tests use this scratchpad area 656 // for storing the processor's registers after the tests executed. This class 657 // also exposes helper methods for reading the register state after test 658 // execution, as well as for reading the scratchpad area. 659 class AssembledTest { 660 AssembledTest() = delete; 661 AssembledTest(const AssembledTest &) = delete; 662 AssembledTest &operator=(const AssembledTest &) = delete; 663 664 public: 665 static constexpr uint32_t MaximumCodeSize = 1 << 20; raxSlot()666 static constexpr uint32_t raxSlot() { return 0; } rbxSlot()667 static constexpr uint32_t rbxSlot() { return 2; } rcxSlot()668 static constexpr uint32_t rcxSlot() { return 4; } rdxSlot()669 static constexpr uint32_t rdxSlot() { return 6; } rdiSlot()670 static constexpr uint32_t rdiSlot() { return 8; } rsiSlot()671 static constexpr uint32_t rsiSlot() { return 10; } rbpSlot()672 static constexpr uint32_t rbpSlot() { return 12; } rspSlot()673 static constexpr uint32_t rspSlot() { return 14; } r8Slot()674 static constexpr uint32_t r8Slot() { return 16; } r9Slot()675 static constexpr uint32_t r9Slot() { return 18; } r10Slot()676 static constexpr uint32_t r10Slot() { return 20; } r11Slot()677 static constexpr uint32_t r11Slot() { return 22; } r12Slot()678 static constexpr uint32_t r12Slot() { return 24; } r13Slot()679 static constexpr uint32_t r13Slot() { return 26; } r14Slot()680 static constexpr uint32_t r14Slot() { return 28; } r15Slot()681 static constexpr uint32_t r15Slot() { return 30; } 682 683 // save 4 dwords for each xmm registers. xmm0Slot()684 static constexpr uint32_t xmm0Slot() { return 32; } xmm1Slot()685 static constexpr uint32_t xmm1Slot() { return 36; } xmm2Slot()686 static constexpr uint32_t xmm2Slot() { return 40; } xmm3Slot()687 static constexpr uint32_t xmm3Slot() { return 44; } xmm4Slot()688 static constexpr uint32_t xmm4Slot() { return 48; } xmm5Slot()689 static constexpr uint32_t xmm5Slot() { return 52; } xmm6Slot()690 static constexpr uint32_t xmm6Slot() { return 56; } xmm7Slot()691 static constexpr uint32_t xmm7Slot() { return 60; } xmm8Slot()692 static constexpr uint32_t xmm8Slot() { return 64; } xmm9Slot()693 static constexpr uint32_t xmm9Slot() { return 68; } xmm10Slot()694 static constexpr uint32_t xmm10Slot() { return 72; } xmm11Slot()695 static constexpr uint32_t xmm11Slot() { return 76; } xmm12Slot()696 static constexpr uint32_t xmm12Slot() { return 80; } xmm13Slot()697 static constexpr uint32_t xmm13Slot() { return 84; } xmm14Slot()698 static constexpr uint32_t xmm14Slot() { return 88; } xmm15Slot()699 static constexpr uint32_t xmm15Slot() { return 92; } 700 701 static constexpr uint32_t ScratchpadSlots = 96; 702 AssembledTest(const uint8_t * Data,const size_t MySize,const size_t ExtraStorageDwords)703 AssembledTest(const uint8_t *Data, const size_t MySize, 704 const size_t ExtraStorageDwords) 705 : Size(MaximumCodeSize + 4 * ExtraStorageDwords) { 706 // MaxCodeSize is needed because EXPECT_LT needs a symbol with a name -- 707 // probably a compiler bug? 708 uint32_t MaxCodeSize = MaximumCodeSize; 709 EXPECT_LT(MySize, MaxCodeSize); 710 assert(MySize < MaximumCodeSize); 711 712 #if defined(__unix__) 713 ExecutableData = mmap(nullptr, Size, PROT_WRITE | PROT_READ | PROT_EXEC, 714 MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0); 715 EXPECT_NE(MAP_FAILED, ExecutableData) << strerror(errno); 716 assert(MAP_FAILED != ExecutableData); 717 #elif defined(_WIN32) 718 ExecutableData = VirtualAlloc(NULL, Size, MEM_COMMIT | MEM_RESERVE, 719 PAGE_EXECUTE_READWRITE); 720 EXPECT_NE(nullptr, ExecutableData) << strerror(errno); 721 assert(nullptr != ExecutableData); 722 #else 723 #error "Platform unsupported" 724 #endif 725 726 std::memcpy(ExecutableData, Data, MySize); 727 } 728 729 // We allow AssembledTest to be moved so that we can return objects of 730 // this type. AssembledTest(AssembledTest && Buffer)731 AssembledTest(AssembledTest &&Buffer) 732 : ExecutableData(Buffer.ExecutableData), Size(Buffer.Size) { 733 Buffer.ExecutableData = nullptr; 734 Buffer.Size = 0; 735 } 736 737 AssembledTest &operator=(AssembledTest &&Buffer) { 738 ExecutableData = Buffer.ExecutableData; 739 Buffer.ExecutableData = nullptr; 740 Size = Buffer.Size; 741 Buffer.Size = 0; 742 return *this; 743 } 744 ~AssembledTest()745 ~AssembledTest() { 746 if (ExecutableData != nullptr) { 747 #if defined(__unix__) 748 munmap(ExecutableData, Size); 749 #elif defined(_WIN32) 750 VirtualFree(ExecutableData, 0, MEM_RELEASE); 751 #else 752 #error "Platform unsupported" 753 #endif 754 ExecutableData = nullptr; 755 } 756 } 757 run()758 void run() const { reinterpret_cast<void (*)()>(ExecutableData)(); } 759 760 #define LegacyRegAccessors(NewName, Name64, Name32, Name16, Name8) \ 761 static_assert(Encoded_GPR_##NewName() == Encoded_GPR_##Name64(), \ 762 "Invalid aliasing."); \ 763 uint64_t NewName() const { \ 764 return contentsOfQword(AssembledTest::Name64##Slot()); \ 765 } \ 766 static_assert(Encoded_GPR_##NewName##q() == Encoded_GPR_##Name64(), \ 767 "Invalid aliasing."); \ 768 uint64_t NewName##q() const { \ 769 return contentsOfQword(AssembledTest::Name64##Slot()); \ 770 } \ 771 static_assert(Encoded_GPR_##NewName##d() == Encoded_GPR_##Name64(), \ 772 "Invalid aliasing."); \ 773 uint32_t NewName##d() const { \ 774 return contentsOfQword(AssembledTest::Name64##Slot()); \ 775 } \ 776 static_assert(Encoded_GPR_##NewName##w() == Encoded_GPR_##Name64(), \ 777 "Invalid aliasing."); \ 778 uint16_t NewName##w() const { \ 779 return contentsOfQword(AssembledTest::Name64##Slot()); \ 780 } \ 781 static_assert(Encoded_GPR_##NewName##l() == Encoded_GPR_##Name64(), \ 782 "Invalid aliasing."); \ 783 uint8_t NewName##l() const { \ 784 return contentsOfQword(AssembledTest::Name64##Slot()); \ 785 } \ 786 static_assert(Encoded_GPR_##Name64() == Encoded_GPR_##Name64(), \ 787 "Invalid aliasing."); \ 788 uint64_t Name64() const { \ 789 return contentsOfQword(AssembledTest::Name64##Slot()); \ 790 } \ 791 static_assert(Encoded_GPR_##Name32() == Encoded_GPR_##Name64(), \ 792 "Invalid aliasing."); \ 793 uint32_t Name32() const { \ 794 return contentsOfQword(AssembledTest::Name64##Slot()); \ 795 } \ 796 static_assert(Encoded_GPR_##Name16() == Encoded_GPR_##Name64(), \ 797 "Invalid aliasing."); \ 798 uint16_t Name16() const { \ 799 return contentsOfQword(AssembledTest::Name64##Slot()); \ 800 } \ 801 static_assert(Encoded_GPR_##Name8() == Encoded_GPR_##Name64(), \ 802 "Invalid aliasing."); \ 803 uint8_t Name8() const { \ 804 return contentsOfQword(AssembledTest::Name64##Slot()); \ 805 } 806 #define NewRegAccessors(NewName) \ 807 uint64_t NewName() const { \ 808 return contentsOfQword(AssembledTest::NewName##Slot()); \ 809 } \ 810 uint64_t NewName##q() const { \ 811 return contentsOfQword(AssembledTest::NewName##Slot()); \ 812 } \ 813 uint32_t NewName##d() const { \ 814 return contentsOfQword(AssembledTest::NewName##Slot()); \ 815 } \ 816 uint16_t NewName##w() const { \ 817 return contentsOfQword(AssembledTest::NewName##Slot()); \ 818 } \ 819 uint8_t NewName##l() const { \ 820 return contentsOfQword(AssembledTest::NewName##Slot()); \ 821 } 822 #define XmmRegAccessor(Name) \ 823 template <typename T> T Name() const { \ 824 return xmm<T>(AssembledTest::Name##Slot()); \ 825 } 826 LegacyRegAccessors(r0, rsp, esp, sp, spl); 827 LegacyRegAccessors(r1, rax, eax, ax, al); 828 LegacyRegAccessors(r2, rbx, ebx, bx, bl); 829 LegacyRegAccessors(r3, rcx, ecx, cx, cl); 830 LegacyRegAccessors(r4, rdx, edx, dx, dl); 831 LegacyRegAccessors(r5, rbp, ebp, bp, bpl); 832 LegacyRegAccessors(r6, rsi, esi, si, sil); 833 LegacyRegAccessors(r7, rdi, edi, di, dil); 834 NewRegAccessors(r8); 835 NewRegAccessors(r9); 836 NewRegAccessors(r10); 837 NewRegAccessors(r11); 838 NewRegAccessors(r12); 839 NewRegAccessors(r13); 840 NewRegAccessors(r14); 841 NewRegAccessors(r15); 842 XmmRegAccessor(xmm0); 843 XmmRegAccessor(xmm1); 844 XmmRegAccessor(xmm2); 845 XmmRegAccessor(xmm3); 846 XmmRegAccessor(xmm4); 847 XmmRegAccessor(xmm5); 848 XmmRegAccessor(xmm6); 849 XmmRegAccessor(xmm7); 850 XmmRegAccessor(xmm8); 851 XmmRegAccessor(xmm9); 852 XmmRegAccessor(xmm10); 853 XmmRegAccessor(xmm11); 854 XmmRegAccessor(xmm12); 855 XmmRegAccessor(xmm13); 856 XmmRegAccessor(xmm14); 857 XmmRegAccessor(xmm15); 858 #undef XmmRegAccessor 859 #undef NewRegAccessors 860 #undef LegacyRegAccessors 861 862 // contentsOfDword is used for reading the values in the scratchpad area. 863 // Valid arguments are the dword ids returned by 864 // AssemblerX8664Test::allocateDword() -- other inputs are considered 865 // invalid, and are not guaranteed to work if the implementation changes. 866 template <typename T = uint32_t, typename = typename std::enable_if< 867 sizeof(T) == sizeof(uint32_t)>::type> contentsOfDword(uint32_t Dword)868 T contentsOfDword(uint32_t Dword) const { 869 return *reinterpret_cast<T *>(static_cast<uint8_t *>(ExecutableData) + 870 dwordOffset(Dword)); 871 } 872 873 template <typename T = uint64_t, typename = typename std::enable_if< 874 sizeof(T) == sizeof(uint64_t)>::type> contentsOfQword(uint32_t InitialDword)875 T contentsOfQword(uint32_t InitialDword) const { 876 return *reinterpret_cast<T *>(static_cast<uint8_t *>(ExecutableData) + 877 dwordOffset(InitialDword)); 878 } 879 contentsOfDqword(uint32_t InitialDword)880 Dqword contentsOfDqword(uint32_t InitialDword) const { 881 return *reinterpret_cast<Dqword *>( 882 static_cast<uint8_t *>(ExecutableData) + 883 dwordOffset(InitialDword)); 884 } 885 886 template <typename T = uint32_t, typename = typename std::enable_if< 887 sizeof(T) == sizeof(uint32_t)>::type> setDwordTo(uint32_t Dword,T value)888 void setDwordTo(uint32_t Dword, T value) { 889 *reinterpret_cast<uint32_t *>(static_cast<uint8_t *>(ExecutableData) + 890 dwordOffset(Dword)) = 891 *reinterpret_cast<uint32_t *>(&value); 892 } 893 894 template <typename T = uint64_t, typename = typename std::enable_if< 895 sizeof(T) == sizeof(uint64_t)>::type> setQwordTo(uint32_t InitialDword,T value)896 void setQwordTo(uint32_t InitialDword, T value) { 897 *reinterpret_cast<uint64_t *>(static_cast<uint8_t *>(ExecutableData) + 898 dwordOffset(InitialDword)) = 899 *reinterpret_cast<uint64_t *>(&value); 900 } 901 setDqwordTo(uint32_t InitialDword,const Dqword & qdword)902 void setDqwordTo(uint32_t InitialDword, const Dqword &qdword) { 903 setQwordTo(InitialDword, qdword.U64[0]); 904 setQwordTo(InitialDword + 2, qdword.U64[1]); 905 } 906 907 private: 908 template <typename T> 909 typename std::enable_if<std::is_same<T, Dqword>::value, Dqword>::type xmm(uint8_t Slot)910 xmm(uint8_t Slot) const { 911 return contentsOfDqword(Slot); 912 } 913 914 template <typename T> 915 typename std::enable_if<!std::is_same<T, Dqword>::value, T>::type xmm(uint8_t Slot)916 xmm(uint8_t Slot) const { 917 constexpr bool TIs64Bit = sizeof(T) == sizeof(uint64_t); 918 using _64BitType = typename std::conditional<TIs64Bit, T, uint64_t>::type; 919 using _32BitType = typename std::conditional<TIs64Bit, uint32_t, T>::type; 920 if (TIs64Bit) { 921 return contentsOfQword<_64BitType>(Slot); 922 } 923 return contentsOfDword<_32BitType>(Slot); 924 } 925 dwordOffset(uint32_t Index)926 static uint32_t dwordOffset(uint32_t Index) { 927 return MaximumCodeSize + (Index * 4); 928 } 929 930 void *ExecutableData = nullptr; 931 size_t Size; 932 }; 933 934 // assemble created an AssembledTest with the jitted code. The first time 935 // assemble is executed it will add the epilogue to the jitted code (which is 936 // the reason why this method is not const qualified. assemble()937 AssembledTest assemble() { 938 if (NeedsEpilogue) { 939 addEpilogue(); 940 } 941 NeedsEpilogue = false; 942 943 for (const auto *Fixup : assembler()->fixups()) { 944 Fixup->emitOffset(assembler()); 945 } 946 947 return AssembledTest(codeBytes(), codeBytesSize(), NumAllocatedDwords); 948 } 949 950 // Allocates a new dword slot in the test's scratchpad area. allocateDword()951 uint32_t allocateDword() { return NumAllocatedDwords++; } 952 953 // Allocates a new qword slot in the test's scratchpad area. allocateQword()954 uint32_t allocateQword() { 955 uint32_t InitialDword = allocateDword(); 956 allocateDword(); 957 return InitialDword; 958 } 959 960 // Allocates a new dqword slot in the test's scratchpad area. allocateDqword()961 uint32_t allocateDqword() { 962 uint32_t InitialDword = allocateQword(); 963 allocateQword(); 964 return InitialDword; 965 } 966 dwordAddress(uint32_t Dword)967 Address dwordAddress(uint32_t Dword) { 968 return Address(Encoded_GPR_r9(), dwordDisp(Dword), nullptr); 969 } 970 971 private: 972 // e??SlotAddress returns an AssemblerX8664::Traits::Address that can be used 973 // by the test cases to encode an address operand for accessing the slot for 974 // the specified register. These are all private for, when jitting the test 975 // code, tests should not tamper with these values. Besides, during the test 976 // execution these slots' contents are undefined and should not be accessed. raxSlotAddress()977 Address raxSlotAddress() { return dwordAddress(AssembledTest::raxSlot()); } rbxSlotAddress()978 Address rbxSlotAddress() { return dwordAddress(AssembledTest::rbxSlot()); } rcxSlotAddress()979 Address rcxSlotAddress() { return dwordAddress(AssembledTest::rcxSlot()); } rdxSlotAddress()980 Address rdxSlotAddress() { return dwordAddress(AssembledTest::rdxSlot()); } rdiSlotAddress()981 Address rdiSlotAddress() { return dwordAddress(AssembledTest::rdiSlot()); } rsiSlotAddress()982 Address rsiSlotAddress() { return dwordAddress(AssembledTest::rsiSlot()); } rbpSlotAddress()983 Address rbpSlotAddress() { return dwordAddress(AssembledTest::rbpSlot()); } rspSlotAddress()984 Address rspSlotAddress() { return dwordAddress(AssembledTest::rspSlot()); } r8SlotAddress()985 Address r8SlotAddress() { return dwordAddress(AssembledTest::r8Slot()); } r9SlotAddress()986 Address r9SlotAddress() { return dwordAddress(AssembledTest::r9Slot()); } r10SlotAddress()987 Address r10SlotAddress() { return dwordAddress(AssembledTest::r10Slot()); } r11SlotAddress()988 Address r11SlotAddress() { return dwordAddress(AssembledTest::r11Slot()); } r12SlotAddress()989 Address r12SlotAddress() { return dwordAddress(AssembledTest::r12Slot()); } r13SlotAddress()990 Address r13SlotAddress() { return dwordAddress(AssembledTest::r13Slot()); } r14SlotAddress()991 Address r14SlotAddress() { return dwordAddress(AssembledTest::r14Slot()); } r15SlotAddress()992 Address r15SlotAddress() { return dwordAddress(AssembledTest::r15Slot()); } xmm0SlotAddress()993 Address xmm0SlotAddress() { return dwordAddress(AssembledTest::xmm0Slot()); } xmm1SlotAddress()994 Address xmm1SlotAddress() { return dwordAddress(AssembledTest::xmm1Slot()); } xmm2SlotAddress()995 Address xmm2SlotAddress() { return dwordAddress(AssembledTest::xmm2Slot()); } xmm3SlotAddress()996 Address xmm3SlotAddress() { return dwordAddress(AssembledTest::xmm3Slot()); } xmm4SlotAddress()997 Address xmm4SlotAddress() { return dwordAddress(AssembledTest::xmm4Slot()); } xmm5SlotAddress()998 Address xmm5SlotAddress() { return dwordAddress(AssembledTest::xmm5Slot()); } xmm6SlotAddress()999 Address xmm6SlotAddress() { return dwordAddress(AssembledTest::xmm6Slot()); } xmm7SlotAddress()1000 Address xmm7SlotAddress() { return dwordAddress(AssembledTest::xmm7Slot()); } xmm8SlotAddress()1001 Address xmm8SlotAddress() { return dwordAddress(AssembledTest::xmm8Slot()); } xmm9SlotAddress()1002 Address xmm9SlotAddress() { return dwordAddress(AssembledTest::xmm9Slot()); } xmm10SlotAddress()1003 Address xmm10SlotAddress() { 1004 return dwordAddress(AssembledTest::xmm10Slot()); 1005 } xmm11SlotAddress()1006 Address xmm11SlotAddress() { 1007 return dwordAddress(AssembledTest::xmm11Slot()); 1008 } xmm12SlotAddress()1009 Address xmm12SlotAddress() { 1010 return dwordAddress(AssembledTest::xmm12Slot()); 1011 } xmm13SlotAddress()1012 Address xmm13SlotAddress() { 1013 return dwordAddress(AssembledTest::xmm13Slot()); 1014 } xmm14SlotAddress()1015 Address xmm14SlotAddress() { 1016 return dwordAddress(AssembledTest::xmm14Slot()); 1017 } xmm15SlotAddress()1018 Address xmm15SlotAddress() { 1019 return dwordAddress(AssembledTest::xmm15Slot()); 1020 } 1021 1022 // Returns the displacement that should be used when accessing the specified 1023 // Dword in the scratchpad area. It needs to adjust for the initial 1024 // instructions that are emitted before the call that materializes the IP 1025 // register. dwordDisp(uint32_t Dword)1026 uint32_t dwordDisp(uint32_t Dword) const { 1027 EXPECT_LT(Dword, NumAllocatedDwords); 1028 assert(Dword < NumAllocatedDwords); 1029 static constexpr uint8_t PushR9Bytes = 2; 1030 static constexpr uint8_t CallImmBytes = 5; 1031 return AssembledTest::MaximumCodeSize + (Dword * 4) - 1032 (PushR9Bytes + CallImmBytes); 1033 } 1034 addPrologue()1035 void addPrologue() { 1036 __ pushl(Encoded_GPR_r9()); 1037 __ call(Immediate(4)); 1038 __ popl(Encoded_GPR_r9()); 1039 1040 __ pushl(Encoded_GPR_rax()); 1041 __ pushl(Encoded_GPR_rbx()); 1042 __ pushl(Encoded_GPR_rcx()); 1043 __ pushl(Encoded_GPR_rdx()); 1044 __ pushl(Encoded_GPR_rbp()); 1045 __ pushl(Encoded_GPR_rdi()); 1046 __ pushl(Encoded_GPR_rsi()); 1047 __ pushl(Encoded_GPR_r8()); 1048 __ pushl(Encoded_GPR_r10()); 1049 __ pushl(Encoded_GPR_r11()); 1050 __ pushl(Encoded_GPR_r12()); 1051 __ pushl(Encoded_GPR_r13()); 1052 __ pushl(Encoded_GPR_r14()); 1053 __ pushl(Encoded_GPR_r15()); 1054 1055 __ mov(IceType_i32, Encoded_GPR_rax(), Immediate(0x00)); 1056 __ mov(IceType_i32, Encoded_GPR_rbx(), Immediate(0x00)); 1057 __ mov(IceType_i32, Encoded_GPR_rcx(), Immediate(0x00)); 1058 __ mov(IceType_i32, Encoded_GPR_rdx(), Immediate(0x00)); 1059 __ mov(IceType_i32, Encoded_GPR_rbp(), Immediate(0x00)); 1060 __ mov(IceType_i32, Encoded_GPR_rdi(), Immediate(0x00)); 1061 __ mov(IceType_i32, Encoded_GPR_rsi(), Immediate(0x00)); 1062 __ mov(IceType_i32, Encoded_GPR_r8(), Immediate(0x00)); 1063 __ mov(IceType_i32, Encoded_GPR_r10(), Immediate(0x00)); 1064 __ mov(IceType_i32, Encoded_GPR_r11(), Immediate(0x00)); 1065 __ mov(IceType_i32, Encoded_GPR_r12(), Immediate(0x00)); 1066 __ mov(IceType_i32, Encoded_GPR_r13(), Immediate(0x00)); 1067 __ mov(IceType_i32, Encoded_GPR_r14(), Immediate(0x00)); 1068 __ mov(IceType_i32, Encoded_GPR_r15(), Immediate(0x00)); 1069 } 1070 addEpilogue()1071 void addEpilogue() { 1072 __ mov(IceType_i64, raxSlotAddress(), Encoded_GPR_rax()); 1073 __ mov(IceType_i64, rbxSlotAddress(), Encoded_GPR_rbx()); 1074 __ mov(IceType_i64, rcxSlotAddress(), Encoded_GPR_rcx()); 1075 __ mov(IceType_i64, rdxSlotAddress(), Encoded_GPR_rdx()); 1076 __ mov(IceType_i64, rdiSlotAddress(), Encoded_GPR_rdi()); 1077 __ mov(IceType_i64, rsiSlotAddress(), Encoded_GPR_rsi()); 1078 __ mov(IceType_i64, rbpSlotAddress(), Encoded_GPR_rbp()); 1079 __ mov(IceType_i64, rspSlotAddress(), Encoded_GPR_rsp()); 1080 __ mov(IceType_i64, r8SlotAddress(), Encoded_GPR_r8()); 1081 __ mov(IceType_i64, r9SlotAddress(), Encoded_GPR_r9()); 1082 __ mov(IceType_i64, r10SlotAddress(), Encoded_GPR_r10()); 1083 __ mov(IceType_i64, r11SlotAddress(), Encoded_GPR_r11()); 1084 __ mov(IceType_i64, r12SlotAddress(), Encoded_GPR_r12()); 1085 __ mov(IceType_i64, r13SlotAddress(), Encoded_GPR_r13()); 1086 __ mov(IceType_i64, r14SlotAddress(), Encoded_GPR_r14()); 1087 __ mov(IceType_i64, r15SlotAddress(), Encoded_GPR_r15()); 1088 __ movups(xmm0SlotAddress(), Encoded_Xmm_xmm0()); 1089 __ movups(xmm1SlotAddress(), Encoded_Xmm_xmm1()); 1090 __ movups(xmm2SlotAddress(), Encoded_Xmm_xmm2()); 1091 __ movups(xmm3SlotAddress(), Encoded_Xmm_xmm3()); 1092 __ movups(xmm4SlotAddress(), Encoded_Xmm_xmm4()); 1093 __ movups(xmm5SlotAddress(), Encoded_Xmm_xmm5()); 1094 __ movups(xmm6SlotAddress(), Encoded_Xmm_xmm6()); 1095 __ movups(xmm7SlotAddress(), Encoded_Xmm_xmm7()); 1096 __ movups(xmm8SlotAddress(), Encoded_Xmm_xmm8()); 1097 __ movups(xmm9SlotAddress(), Encoded_Xmm_xmm9()); 1098 __ movups(xmm10SlotAddress(), Encoded_Xmm_xmm10()); 1099 __ movups(xmm11SlotAddress(), Encoded_Xmm_xmm11()); 1100 __ movups(xmm12SlotAddress(), Encoded_Xmm_xmm12()); 1101 __ movups(xmm13SlotAddress(), Encoded_Xmm_xmm13()); 1102 __ movups(xmm14SlotAddress(), Encoded_Xmm_xmm14()); 1103 __ movups(xmm15SlotAddress(), Encoded_Xmm_xmm15()); 1104 1105 __ popl(Encoded_GPR_r15()); 1106 __ popl(Encoded_GPR_r14()); 1107 __ popl(Encoded_GPR_r13()); 1108 __ popl(Encoded_GPR_r12()); 1109 __ popl(Encoded_GPR_r11()); 1110 __ popl(Encoded_GPR_r10()); 1111 __ popl(Encoded_GPR_r8()); 1112 __ popl(Encoded_GPR_rsi()); 1113 __ popl(Encoded_GPR_rdi()); 1114 __ popl(Encoded_GPR_rbp()); 1115 __ popl(Encoded_GPR_rdx()); 1116 __ popl(Encoded_GPR_rcx()); 1117 __ popl(Encoded_GPR_rbx()); 1118 __ popl(Encoded_GPR_rax()); 1119 __ popl(Encoded_GPR_r9()); 1120 1121 __ ret(); 1122 } 1123 1124 bool NeedsEpilogue; 1125 uint32_t NumAllocatedDwords; 1126 }; 1127 1128 } // end of namespace Test 1129 } // end of namespace X8664 1130 } // end of namespace Ice 1131 1132 #endif // ASSEMBLERX8664_TESTUTIL_H_ 1133