1 
2 //===- subzero/unittest/unittest/AssemblerX8664/TestUtil.h ------*- C++ -*-===//
3 //
4 //                        The Subzero Code Generator
5 //
6 // This file is distributed under the University of Illinois Open Source
7 // License. See LICENSE.TXT for details.
8 //
9 //===----------------------------------------------------------------------===//
10 //
11 // Utility classes for testing the X8664 Assembler.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef ASSEMBLERX8664_TESTUTIL_H_
16 #define ASSEMBLERX8664_TESTUTIL_H_
17 
18 #include "IceAssemblerX8664.h"
19 
20 #include "gtest/gtest.h"
21 
22 #if defined(__unix__)
23 #include <sys/mman.h>
24 #elif defined(_WIN32)
25 #define NOMINMAX
26 #include <Windows.h>
27 #else
28 #error "Platform unsupported"
29 #endif
30 
31 #include <cassert>
32 
33 namespace Ice {
34 namespace X8664 {
35 namespace Test {
36 
37 class AssemblerX8664TestBase : public ::testing::Test {
38 protected:
39   using Address = AssemblerX8664::Traits::Address;
40   using Cond = AssemblerX8664::Traits::Cond;
41   using GPRRegister = AssemblerX8664::Traits::GPRRegister;
42   using ByteRegister = AssemblerX8664::Traits::ByteRegister;
43   using Traits = AssemblerX8664::Traits;
44   using XmmRegister = AssemblerX8664::Traits::XmmRegister;
45 
46 // The following are "nicknames" for all possible GPRs in x86-64. With those, we
47 // can use, e.g.,
48 //
49 //  Encoded_GPR_al()
50 //
51 // instead of GPRRegister::Encoded_Reg_eax for 8 bit operands. They also
52 // introduce "regular" nicknames for legacy x86-32 register (e.g., eax becomes
53 // r1; esp, r0).
54 #define LegacyRegAliases(NewName, Name64, Name32, Name16, Name8)               \
55   static constexpr GPRRegister Encoded_GPR_##NewName() {                       \
56     return GPRRegister::Encoded_Reg_##Name32;                                  \
57   }                                                                            \
58   static constexpr GPRRegister Encoded_GPR_##NewName##q() {                    \
59     return GPRRegister::Encoded_Reg_##Name32;                                  \
60   }                                                                            \
61   static constexpr GPRRegister Encoded_GPR_##NewName##d() {                    \
62     return GPRRegister::Encoded_Reg_##Name32;                                  \
63   }                                                                            \
64   static constexpr GPRRegister Encoded_GPR_##NewName##w() {                    \
65     return GPRRegister::Encoded_Reg_##Name32;                                  \
66   }                                                                            \
67   static constexpr GPRRegister Encoded_GPR_##NewName##l() {                    \
68     return GPRRegister::Encoded_Reg_##Name32;                                  \
69   }                                                                            \
70   static constexpr ByteRegister Encoded_Bytereg_##NewName() {                  \
71     return ByteRegister::Encoded_8_Reg_##Name8;                                \
72   }                                                                            \
73   static constexpr GPRRegister Encoded_GPR_##Name64() {                        \
74     return GPRRegister::Encoded_Reg_##Name32;                                  \
75   }                                                                            \
76   static constexpr GPRRegister Encoded_GPR_##Name32() {                        \
77     return GPRRegister::Encoded_Reg_##Name32;                                  \
78   }                                                                            \
79   static constexpr GPRRegister Encoded_GPR_##Name16() {                        \
80     return GPRRegister::Encoded_Reg_##Name32;                                  \
81   }                                                                            \
82   static constexpr GPRRegister Encoded_GPR_##Name8() {                         \
83     return GPRRegister::Encoded_Reg_##Name32;                                  \
84   }
85 #define NewRegAliases(Name)                                                    \
86   static constexpr GPRRegister Encoded_GPR_##Name() {                          \
87     return GPRRegister::Encoded_Reg_##Name##d;                                 \
88   }                                                                            \
89   static constexpr GPRRegister Encoded_GPR_##Name##q() {                       \
90     return GPRRegister::Encoded_Reg_##Name##d;                                 \
91   }                                                                            \
92   static constexpr GPRRegister Encoded_GPR_##Name##d() {                       \
93     return GPRRegister::Encoded_Reg_##Name##d;                                 \
94   }                                                                            \
95   static constexpr GPRRegister Encoded_GPR_##Name##w() {                       \
96     return GPRRegister::Encoded_Reg_##Name##d;                                 \
97   }                                                                            \
98   static constexpr GPRRegister Encoded_GPR_##Name##l() {                       \
99     return GPRRegister::Encoded_Reg_##Name##d;                                 \
100   }                                                                            \
101   static constexpr ByteRegister Encoded_Bytereg_##Name() {                     \
102     return ByteRegister::Encoded_8_Reg_##Name##l;                              \
103   }
104 #define XmmRegAliases(Name)                                                    \
105   static constexpr XmmRegister Encoded_Xmm_##Name() {                          \
106     return XmmRegister::Encoded_Reg_##Name;                                    \
107   }
108   LegacyRegAliases(r0, rsp, esp, sp, spl);
109   LegacyRegAliases(r1, rax, eax, ax, al);
110   LegacyRegAliases(r2, rbx, ebx, bx, bl);
111   LegacyRegAliases(r3, rcx, ecx, cx, cl);
112   LegacyRegAliases(r4, rdx, edx, dx, dl);
113   LegacyRegAliases(r5, rbp, ebp, bp, bpl);
114   LegacyRegAliases(r6, rsi, esi, si, sil);
115   LegacyRegAliases(r7, rdi, edi, di, dil);
116   NewRegAliases(r8);
117   NewRegAliases(r9);
118   NewRegAliases(r10);
119   NewRegAliases(r11);
120   NewRegAliases(r12);
121   NewRegAliases(r13);
122   NewRegAliases(r14);
123   NewRegAliases(r15);
124   XmmRegAliases(xmm0);
125   XmmRegAliases(xmm1);
126   XmmRegAliases(xmm2);
127   XmmRegAliases(xmm3);
128   XmmRegAliases(xmm4);
129   XmmRegAliases(xmm5);
130   XmmRegAliases(xmm6);
131   XmmRegAliases(xmm7);
132   XmmRegAliases(xmm8);
133   XmmRegAliases(xmm9);
134   XmmRegAliases(xmm10);
135   XmmRegAliases(xmm11);
136   XmmRegAliases(xmm12);
137   XmmRegAliases(xmm13);
138   XmmRegAliases(xmm14);
139   XmmRegAliases(xmm15);
140 #undef XmmRegAliases
141 #undef NewRegAliases
142 #undef LegacyRegAliases
143 
AssemblerX8664TestBase()144   AssemblerX8664TestBase() { reset(); }
145 
reset()146   void reset() { Assembler = makeUnique<AssemblerX8664>(); }
147 
assembler()148   AssemblerX8664 *assembler() const { return Assembler.get(); }
149 
codeBytesSize()150   size_t codeBytesSize() const { return Assembler->getBufferView().size(); }
151 
codeBytes()152   const uint8_t *codeBytes() const {
153     return static_cast<const uint8_t *>(
154         static_cast<const void *>(Assembler->getBufferView().data()));
155   }
156 
157 private:
158   std::unique_ptr<AssemblerX8664> Assembler;
159 };
160 
161 // __ is a helper macro. It allows test cases to emit X8664 assembly
162 // instructions with
163 //
164 //   __ mov(GPRRegister::Reg_Eax, 1);
165 //   __ ret();
166 //
167 // and so on. The idea of having this was "stolen" from dart's unit tests.
168 #define __ (this->assembler())->
169 
170 // AssemblerX8664LowLevelTest verify that the "basic" instructions the tests
171 // rely on are encoded correctly. Therefore, instead of executing the assembled
172 // code, these tests will verify that the assembled bytes are sane.
173 class AssemblerX8664LowLevelTest : public AssemblerX8664TestBase {
174 protected:
175   // verifyBytes is a template helper that takes a Buffer, and a variable number
176   // of bytes. As the name indicates, it is used to verify the bytes for an
177   // instruction encoding.
verifyBytes(const uint8_t *)178   template <int N, int I> static bool verifyBytes(const uint8_t *) {
179     static_assert(I == N, "Invalid template instantiation.");
180     return true;
181   }
182 
183   template <int N, int I = 0, typename... Args>
verifyBytes(const uint8_t * Buffer,uint8_t Byte,Args...OtherBytes)184   static bool verifyBytes(const uint8_t *Buffer, uint8_t Byte,
185                           Args... OtherBytes) {
186     static_assert(I < N, "Invalid template instantiation.");
187     EXPECT_EQ(Byte, Buffer[I]) << "Byte " << (I + 1) << " of " << N;
188     return verifyBytes<N, I + 1>(Buffer, OtherBytes...) && Buffer[I] == Byte;
189   }
190 };
191 
192 // After these tests we should have a sane environment; we know the following
193 // work:
194 //
195 //  (*) zeroing eax, ebx, ecx, edx, edi, and esi;
196 //  (*) call $4 instruction (used for ip materialization);
197 //  (*) register push and pop;
198 //  (*) cmp reg, reg; and
199 //  (*) returning from functions.
200 //
201 // We can now dive into testing each emitting method in AssemblerX8664. Each
202 // test will emit some instructions for performing the test. The assembled
203 // instructions will operate in a "safe" environment. All x86-64 registers are
204 // spilled to the program stack, and the registers are then zeroed out, with the
205 // exception of %esp and %r9.
206 //
207 // The jitted code and the unittest code will share the same stack. Therefore,
208 // test harnesses need to ensure it does not leave anything it pushed on the
209 // stack.
210 //
211 // %r9 is initialized with a pointer for rIP-based addressing. This pointer is
212 // used for position-independent access to a scratchpad area for use in tests.
213 // In theory we could use rip-based addressing, but in practice that would
214 // require creating fixups, which would, in turn, require creating a global
215 // context. We therefore rely on the same technique used for pic code in x86-32
216 // (i.e., IP materialization). Upon a test start up, a call(NextInstruction) is
217 // executed. We then pop the return address from the stack, and use it for pic
218 // addressing.
219 //
220 // The jitted code will look like the following:
221 //
222 // test:
223 //       push   %r9
224 //       call   test$materialize_ip
225 // test$materialize_ip:                           <<------- %r9 will point here
226 //       pop    %r9
227 //       push   %rax
228 //       push   %rbx
229 //       push   %rcx
230 //       push   %rdx
231 //       push   %rbp
232 //       push   %rdi
233 //       push   %rsi
234 //       push   %r8
235 //       push   %r10
236 //       push   %r11
237 //       push   %r12
238 //       push   %r13
239 //       push   %r14
240 //       push   %r15
241 //       mov    $0, %rax
242 //       mov    $0, %rbx
243 //       mov    $0, %rcx
244 //       mov    $0, %rdx
245 //       mov    $0, %rbp
246 //       mov    $0, %rdi
247 //       mov    $0, %rsi
248 //       mov    $0, %r8
249 //       mov    $0, %r10
250 //       mov    $0, %r11
251 //       mov    $0, %r12
252 //       mov    $0, %r13
253 //       mov    $0, %r14
254 //       mov    $0, %r15
255 //
256 //       << test code goes here >>
257 //
258 //       mov    %rax, {  0 + $ScratchpadOffset}(%rbp)
259 //       mov    %rbx, {  8 + $ScratchpadOffset}(%rbp)
260 //       mov    %rcx, { 16 + $ScratchpadOffset}(%rbp)
261 //       mov    %rdx, { 24 + $ScratchpadOffset}(%rbp)
262 //       mov    %rdi, { 32 + $ScratchpadOffset}(%rbp)
263 //       mov    %rsi, { 40 + $ScratchpadOffset}(%rbp)
264 //       mov    %rbp, { 48 + $ScratchpadOffset}(%rbp)
265 //       mov    %rsp, { 56 + $ScratchpadOffset}(%rbp)
266 //       mov    %r8,  { 64 + $ScratchpadOffset}(%rbp)
267 //       mov    %r9,  { 72 + $ScratchpadOffset}(%rbp)
268 //       mov    %r10, { 80 + $ScratchpadOffset}(%rbp)
269 //       mov    %r11, { 88 + $ScratchpadOffset}(%rbp)
270 //       mov    %r12, { 96 + $ScratchpadOffset}(%rbp)
271 //       mov    %r13, {104 + $ScratchpadOffset}(%rbp)
272 //       mov    %r14, {112 + $ScratchpadOffset}(%rbp)
273 //       mov    %r15, {120 + $ScratchpadOffset}(%rbp)
274 //       movups %xmm0,  {128 + $ScratchpadOffset}(%rbp)
275 //       movups %xmm1,  {136 + $ScratchpadOffset}(%rbp)
276 //       movups %xmm2,  {144 + $ScratchpadOffset}(%rbp)
277 //       movups %xmm3,  {152 + $ScratchpadOffset}(%rbp)
278 //       movups %xmm4,  {160 + $ScratchpadOffset}(%rbp)
279 //       movups %xmm5,  {168 + $ScratchpadOffset}(%rbp)
280 //       movups %xmm6,  {176 + $ScratchpadOffset}(%rbp)
281 //       movups %xmm7,  {184 + $ScratchpadOffset}(%rbp)
282 //       movups %xmm8,  {192 + $ScratchpadOffset}(%rbp)
283 //       movups %xmm9,  {200 + $ScratchpadOffset}(%rbp)
284 //       movups %xmm10, {208 + $ScratchpadOffset}(%rbp)
285 //       movups %xmm11, {216 + $ScratchpadOffset}(%rbp)
286 //       movups %xmm12, {224 + $ScratchpadOffset}(%rbp)
287 //       movups %xmm13, {232 + $ScratchpadOffset}(%rbp)
288 //       movups %xmm14, {240 + $ScratchpadOffset}(%rbp)
289 //       movups %xmm15, {248 + $ScratchpadOffset}(%rbp)
290 //
291 //       pop    %r15
292 //       pop    %r14
293 //       pop    %r13
294 //       pop    %r12
295 //       pop    %r11
296 //       pop    %r10
297 //       pop    %r8
298 //       pop    %rsi
299 //       pop    %rdi
300 //       pop    %rbp
301 //       pop    %rdx
302 //       pop    %rcx
303 //       pop    %rbx
304 //       pop    %rax
305 //       pop    %r9
306 //       ret
307 //
308 //      << ... >>
309 //
310 // scratchpad:                              <<------- accessed via $Offset(%ebp)
311 //
312 //      << test scratch area >>
313 //
314 // TODO(jpp): test the
315 //
316 //    mov %reg, $Offset(%ebp)
317 //    movups %xmm, $Offset(%ebp)
318 //
319 // encodings using the low level assembler test ensuring that the register
320 // values can be written to the scratchpad area.
321 //
322 // r9 was deliberately choosen so that every instruction accessing memory would
323 // fail if the rex prefix was not emitted for it.
324 class AssemblerX8664Test : public AssemblerX8664TestBase {
325 protected:
326   // Dqword is used to represent 128-bit data types. The Dqword's contents are
327   // the same as the contents read from memory. Tests can then use the union
328   // members to verify the tests' outputs.
329   //
330   // NOTE: We want sizeof(Dqword) == sizeof(uint64_t) * 2. In other words, we
331   // want Dqword's contents to be **exactly** what the memory contents were so
332   // that we can do, e.g.,
333   //
334   // ...
335   // float Ret[4];
336   // // populate Ret
337   // return *reinterpret_cast<Dqword *>(&Ret);
338   //
339   // While being an ugly hack, this kind of return statements are used
340   // extensively in the PackedArith (see below) class.
341   union Dqword {
342     template <typename T0, typename T1, typename T2, typename T3,
343               typename = typename std::enable_if<
344                   std::is_floating_point<T0>::value>::type>
Dqword(T0 F0,T1 F1,T2 F2,T3 F3)345     Dqword(T0 F0, T1 F1, T2 F2, T3 F3) {
346       F32[0] = F0;
347       F32[1] = F1;
348       F32[2] = F2;
349       F32[3] = F3;
350     }
351 
352     template <typename T>
Dqword(typename std::enable_if<std::is_same<T,int32_t>::value,T>::type I0,T I1,T I2,T I3)353     Dqword(typename std::enable_if<std::is_same<T, int32_t>::value, T>::type I0,
354            T I1, T I2, T I3) {
355       I32[0] = I0;
356       I32[1] = I1;
357       I32[2] = I2;
358       I32[3] = I3;
359     }
360 
361     template <typename T>
Dqword(typename std::enable_if<std::is_same<T,uint64_t>::value,T>::type U64_0,T U64_1)362     Dqword(typename std::enable_if<std::is_same<T, uint64_t>::value, T>::type
363                U64_0,
364            T U64_1) {
365       U64[0] = U64_0;
366       U64[1] = U64_1;
367     }
368 
369     template <typename T>
Dqword(typename std::enable_if<std::is_same<T,double>::value,T>::type D0,T D1)370     Dqword(typename std::enable_if<std::is_same<T, double>::value, T>::type D0,
371            T D1) {
372       F64[0] = D0;
373       F64[1] = D1;
374     }
375 
376     bool operator==(const Dqword &Rhs) const {
377       return std::memcmp(this, &Rhs, sizeof(*this)) == 0;
378     }
379 
380     double F64[2];
381     uint64_t U64[2];
382     int64_t I64[2];
383 
384     float F32[4];
385     uint32_t U32[4];
386     int32_t I32[4];
387 
388     uint16_t U16[8];
389     int16_t I16[8];
390 
391     uint8_t U8[16];
392     int8_t I8[16];
393 
394   private:
395     Dqword() = delete;
396   };
397 
398   // As stated, we want this condition to hold, so we assert.
399   static_assert(sizeof(Dqword) == 2 * sizeof(uint64_t),
400                 "Dqword has the wrong size.");
401 
402   // PackedArith is an interface provider for Dqwords. PackedArith's C argument
403   // is the undelying Dqword's type, which is then used so that we can define
404   // operators in terms of C++ operators on the underlying elements' type.
405   template <typename C> class PackedArith {
406   public:
407     static constexpr uint32_t N = sizeof(Dqword) / sizeof(C);
408     static_assert(N * sizeof(C) == sizeof(Dqword),
409                   "Invalid template paramenter.");
410     static_assert((N & 1) == 0, "N should be divisible by 2");
411 
412 #define DefinePackedComparisonOperator(Op)                                     \
413   template <typename Container = C, int Size = N>                              \
414   typename std::enable_if<std::is_floating_point<Container>::value,            \
415                           Dqword>::type                                        \
416   operator Op(const Dqword &Rhs) const {                                       \
417     using ElemType =                                                           \
418         typename std::conditional<std::is_same<float, Container>::value,       \
419                                   int32_t, int64_t>::type;                     \
420     static_assert(sizeof(ElemType) == sizeof(Container),                       \
421                   "Check ElemType definition.");                               \
422     const ElemType *const RhsPtr =                                             \
423         reinterpret_cast<const ElemType *const>(&Rhs);                         \
424     const ElemType *const LhsPtr =                                             \
425         reinterpret_cast<const ElemType *const>(&Lhs);                         \
426     ElemType Ret[N];                                                           \
427     for (uint32_t i = 0; i < N; ++i) {                                         \
428       Ret[i] = (LhsPtr[i] Op RhsPtr[i]) ? -1 : 0;                              \
429     }                                                                          \
430     return *reinterpret_cast<Dqword *>(&Ret);                                  \
431   }
432 
433     DefinePackedComparisonOperator(< );
434     DefinePackedComparisonOperator(<= );
435     DefinePackedComparisonOperator(> );
436     DefinePackedComparisonOperator(>= );
437     DefinePackedComparisonOperator(== );
438     DefinePackedComparisonOperator(!= );
439 
440 #undef DefinePackedComparisonOperator
441 
442 #define DefinePackedOrdUnordComparisonOperator(Op, Ordered)                    \
443   template <typename Container = C, int Size = N>                              \
444   typename std::enable_if<std::is_floating_point<Container>::value,            \
445                           Dqword>::type                                        \
446   Op(const Dqword &Rhs) const {                                                \
447     using ElemType =                                                           \
448         typename std::conditional<std::is_same<float, Container>::value,       \
449                                   int32_t, int64_t>::type;                     \
450     static_assert(sizeof(ElemType) == sizeof(Container),                       \
451                   "Check ElemType definition.");                               \
452     const Container *const RhsPtr =                                            \
453         reinterpret_cast<const Container *const>(&Rhs);                        \
454     const Container *const LhsPtr =                                            \
455         reinterpret_cast<const Container *const>(&Lhs);                        \
456     ElemType Ret[N];                                                           \
457     for (uint32_t i = 0; i < N; ++i) {                                         \
458       Ret[i] = (!(LhsPtr[i] == LhsPtr[i]) || !(RhsPtr[i] == RhsPtr[i])) !=     \
459                        (Ordered)                                               \
460                    ? -1                                                        \
461                    : 0;                                                        \
462     }                                                                          \
463     return *reinterpret_cast<Dqword *>(&Ret);                                  \
464   }
465 
466     DefinePackedOrdUnordComparisonOperator(ord, true);
467     DefinePackedOrdUnordComparisonOperator(unord, false);
468 #undef DefinePackedOrdUnordComparisonOperator
469 
470 #define DefinePackedArithOperator(Op, RhsIndexChanges, NeedsInt)               \
471   template <typename Container = C, int Size = N>                              \
472   Dqword operator Op(const Dqword &Rhs) const {                                \
473     using ElemTypeForFp = typename std::conditional<                           \
474         !(NeedsInt), Container,                                                \
475         typename std::conditional<                                             \
476             std::is_same<Container, float>::value, uint32_t,                   \
477             typename std::conditional<std::is_same<Container, double>::value,  \
478                                       uint64_t, void>::type>::type>::type;     \
479     using ElemType =                                                           \
480         typename std::conditional<std::is_integral<Container>::value,          \
481                                   Container, ElemTypeForFp>::type;             \
482     static_assert(!std::is_same<void, ElemType>::value,                        \
483                   "Check ElemType definition.");                               \
484     const ElemType *const RhsPtr =                                             \
485         reinterpret_cast<const ElemType *const>(&Rhs);                         \
486     const ElemType *const LhsPtr =                                             \
487         reinterpret_cast<const ElemType *const>(&Lhs);                         \
488     ElemType Ret[N];                                                           \
489     for (uint32_t i = 0; i < N; ++i) {                                         \
490       Ret[i] = LhsPtr[i] Op RhsPtr[(RhsIndexChanges) ? i : 0];                 \
491     }                                                                          \
492     return *reinterpret_cast<Dqword *>(&Ret);                                  \
493   }
494 
495     DefinePackedArithOperator(>>, false, true);
496     DefinePackedArithOperator(<<, false, true);
497     DefinePackedArithOperator(+, true, false);
498     DefinePackedArithOperator(-, true, false);
499     DefinePackedArithOperator(/, true, false);
500     DefinePackedArithOperator(&, true, true);
501     DefinePackedArithOperator(|, true, true);
502     DefinePackedArithOperator (^, true, true);
503 
504 #undef DefinePackedArithOperator
505 
506 #define DefinePackedArithShiftImm(Op)                                          \
507   template <typename Container = C, int Size = N>                              \
508   Dqword operator Op(uint8_t imm) const {                                      \
509     const Container *const LhsPtr =                                            \
510         reinterpret_cast<const Container *const>(&Lhs);                        \
511     Container Ret[N];                                                          \
512     for (uint32_t i = 0; i < N; ++i) {                                         \
513       Ret[i] = LhsPtr[i] Op imm;                                               \
514     }                                                                          \
515     return *reinterpret_cast<Dqword *>(&Ret);                                  \
516   }
517 
518     DefinePackedArithShiftImm(>> );
519     DefinePackedArithShiftImm(<< );
520 
521 #undef DefinePackedArithShiftImm
522 
523     template <typename Container = C, int Size = N>
524     typename std::enable_if<std::is_signed<Container>::value ||
525                                 std::is_floating_point<Container>::value,
526                             Dqword>::type
527     operator*(const Dqword &Rhs) const {
528       static_assert((std::is_integral<Container>::value &&
529                      sizeof(Container) < sizeof(uint64_t)) ||
530                         std::is_floating_point<Container>::value,
531                     "* is only defined for i(8|16|32), and fp types.");
532 
533       const Container *const RhsPtr =
534           reinterpret_cast<const Container *const>(&Rhs);
535       const Container *const LhsPtr =
536           reinterpret_cast<const Container *const>(&Lhs);
537       Container Ret[Size];
538       for (uint32_t i = 0; i < Size; ++i) {
539         Ret[i] = LhsPtr[i] * RhsPtr[i];
540       }
541       return *reinterpret_cast<Dqword *>(&Ret);
542     }
543 
544     template <typename Container = C, int Size = N,
545               typename = typename std::enable_if<
546                   !std::is_signed<Container>::value>::type>
547     Dqword operator*(const Dqword &Rhs) const {
548       static_assert(std::is_integral<Container>::value &&
549                         sizeof(Container) < sizeof(uint64_t),
550                     "* is only defined for ui(8|16|32)");
551       using NextType = typename std::conditional<
552           sizeof(Container) == 1, uint16_t,
553           typename std::conditional<sizeof(Container) == 2, uint32_t,
554                                     uint64_t>::type>::type;
555       static_assert(sizeof(Container) * 2 == sizeof(NextType),
556                     "Unexpected size");
557 
558       const Container *const RhsPtr =
559           reinterpret_cast<const Container *const>(&Rhs);
560       const Container *const LhsPtr =
561           reinterpret_cast<const Container *const>(&Lhs);
562       NextType Ret[Size / 2];
563       for (uint32_t i = 0; i < Size; i += 2) {
564         Ret[i / 2] =
565             static_cast<NextType>(LhsPtr[i]) * static_cast<NextType>(RhsPtr[i]);
566       }
567       return *reinterpret_cast<Dqword *>(&Ret);
568     }
569 
570     template <typename Container = C, int Size = N>
571     PackedArith<Container> operator~() const {
572       const Container *const LhsPtr =
573           reinterpret_cast<const Container *const>(&Lhs);
574       Container Ret[Size];
575       for (uint32_t i = 0; i < Size; ++i) {
576         Ret[i] = ~LhsPtr[i];
577       }
578       return PackedArith<Container>(*reinterpret_cast<Dqword *>(&Ret));
579     }
580 
581 #define MinMaxOperations(Name, Suffix)                                         \
582   template <typename Container = C, int Size = N>                              \
583   Dqword Name##Suffix(const Dqword &Rhs) const {                               \
584     static_assert(std::is_floating_point<Container>::value,                    \
585                   #Name #Suffix "ps is only available for fp.");               \
586     const Container *const RhsPtr =                                            \
587         reinterpret_cast<const Container *const>(&Rhs);                        \
588     const Container *const LhsPtr =                                            \
589         reinterpret_cast<const Container *const>(&Lhs);                        \
590     Container Ret[Size];                                                       \
591     for (uint32_t i = 0; i < Size; ++i) {                                      \
592       Ret[i] = std::Name(LhsPtr[i], RhsPtr[i]);                                \
593     }                                                                          \
594     return *reinterpret_cast<Dqword *>(&Ret);                                  \
595   }
596 
597     MinMaxOperations(max, ps);
598     MinMaxOperations(max, pd);
599     MinMaxOperations(min, ps);
600     MinMaxOperations(min, pd);
601 #undef MinMaxOperations
602 
603     template <typename Container = C, int Size = N>
blendWith(const Dqword & Rhs,const Dqword & Mask)604     Dqword blendWith(const Dqword &Rhs, const Dqword &Mask) const {
605       using MaskType = typename std::conditional<
606           sizeof(Container) == 1, int8_t,
607           typename std::conditional<sizeof(Container) == 2, int16_t,
608                                     int32_t>::type>::type;
609       static_assert(sizeof(MaskType) == sizeof(Container),
610                     "MaskType has the wrong size.");
611       const Container *const RhsPtr =
612           reinterpret_cast<const Container *const>(&Rhs);
613       const Container *const LhsPtr =
614           reinterpret_cast<const Container *const>(&Lhs);
615       const MaskType *const MaskPtr =
616           reinterpret_cast<const MaskType *const>(&Mask);
617       Container Ret[Size];
618       for (int i = 0; i < Size; ++i) {
619         Ret[i] = ((MaskPtr[i] < 0) ? RhsPtr : LhsPtr)[i];
620       }
621       return *reinterpret_cast<Dqword *>(&Ret);
622     }
623 
624   private:
625     // The AssemblerX8664Test class needs to be a friend so that it can create
626     // PackedArith objects (see below.)
627     friend class AssemblerX8664Test;
628 
PackedArith(const Dqword & MyLhs)629     explicit PackedArith(const Dqword &MyLhs) : Lhs(MyLhs) {}
630 
631     // Lhs can't be a & because operator~ returns a temporary object that needs
632     // access to its own Dqword.
633     const Dqword Lhs;
634   };
635 
636   // Named constructor for PackedArith objects.
packedAs(const Dqword & D)637   template <typename C> static PackedArith<C> packedAs(const Dqword &D) {
638     return PackedArith<C>(D);
639   }
640 
AssemblerX8664Test()641   AssemblerX8664Test() { reset(); }
642 
reset()643   void reset() {
644     AssemblerX8664TestBase::reset();
645 
646     NeedsEpilogue = true;
647     // These dwords are allocated for saving the GPR state after the jitted code
648     // runs.
649     NumAllocatedDwords = AssembledTest::ScratchpadSlots;
650     addPrologue();
651   }
652 
653   // AssembledTest is a wrapper around a PROT_EXEC mmap'ed buffer. This buffer
654   // contains both the test code as well as prologue/epilogue, and the
655   // scratchpad area that tests may use -- all tests use this scratchpad area
656   // for storing the processor's registers after the tests executed. This class
657   // also exposes helper methods for reading the register state after test
658   // execution, as well as for reading the scratchpad area.
659   class AssembledTest {
660     AssembledTest() = delete;
661     AssembledTest(const AssembledTest &) = delete;
662     AssembledTest &operator=(const AssembledTest &) = delete;
663 
664   public:
665     static constexpr uint32_t MaximumCodeSize = 1 << 20;
raxSlot()666     static constexpr uint32_t raxSlot() { return 0; }
rbxSlot()667     static constexpr uint32_t rbxSlot() { return 2; }
rcxSlot()668     static constexpr uint32_t rcxSlot() { return 4; }
rdxSlot()669     static constexpr uint32_t rdxSlot() { return 6; }
rdiSlot()670     static constexpr uint32_t rdiSlot() { return 8; }
rsiSlot()671     static constexpr uint32_t rsiSlot() { return 10; }
rbpSlot()672     static constexpr uint32_t rbpSlot() { return 12; }
rspSlot()673     static constexpr uint32_t rspSlot() { return 14; }
r8Slot()674     static constexpr uint32_t r8Slot() { return 16; }
r9Slot()675     static constexpr uint32_t r9Slot() { return 18; }
r10Slot()676     static constexpr uint32_t r10Slot() { return 20; }
r11Slot()677     static constexpr uint32_t r11Slot() { return 22; }
r12Slot()678     static constexpr uint32_t r12Slot() { return 24; }
r13Slot()679     static constexpr uint32_t r13Slot() { return 26; }
r14Slot()680     static constexpr uint32_t r14Slot() { return 28; }
r15Slot()681     static constexpr uint32_t r15Slot() { return 30; }
682 
683     // save 4 dwords for each xmm registers.
xmm0Slot()684     static constexpr uint32_t xmm0Slot() { return 32; }
xmm1Slot()685     static constexpr uint32_t xmm1Slot() { return 36; }
xmm2Slot()686     static constexpr uint32_t xmm2Slot() { return 40; }
xmm3Slot()687     static constexpr uint32_t xmm3Slot() { return 44; }
xmm4Slot()688     static constexpr uint32_t xmm4Slot() { return 48; }
xmm5Slot()689     static constexpr uint32_t xmm5Slot() { return 52; }
xmm6Slot()690     static constexpr uint32_t xmm6Slot() { return 56; }
xmm7Slot()691     static constexpr uint32_t xmm7Slot() { return 60; }
xmm8Slot()692     static constexpr uint32_t xmm8Slot() { return 64; }
xmm9Slot()693     static constexpr uint32_t xmm9Slot() { return 68; }
xmm10Slot()694     static constexpr uint32_t xmm10Slot() { return 72; }
xmm11Slot()695     static constexpr uint32_t xmm11Slot() { return 76; }
xmm12Slot()696     static constexpr uint32_t xmm12Slot() { return 80; }
xmm13Slot()697     static constexpr uint32_t xmm13Slot() { return 84; }
xmm14Slot()698     static constexpr uint32_t xmm14Slot() { return 88; }
xmm15Slot()699     static constexpr uint32_t xmm15Slot() { return 92; }
700 
701     static constexpr uint32_t ScratchpadSlots = 96;
702 
AssembledTest(const uint8_t * Data,const size_t MySize,const size_t ExtraStorageDwords)703     AssembledTest(const uint8_t *Data, const size_t MySize,
704                   const size_t ExtraStorageDwords)
705         : Size(MaximumCodeSize + 4 * ExtraStorageDwords) {
706       // MaxCodeSize is needed because EXPECT_LT needs a symbol with a name --
707       // probably a compiler bug?
708       uint32_t MaxCodeSize = MaximumCodeSize;
709       EXPECT_LT(MySize, MaxCodeSize);
710       assert(MySize < MaximumCodeSize);
711 
712 #if defined(__unix__)
713       ExecutableData = mmap(nullptr, Size, PROT_WRITE | PROT_READ | PROT_EXEC,
714                             MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
715       EXPECT_NE(MAP_FAILED, ExecutableData) << strerror(errno);
716       assert(MAP_FAILED != ExecutableData);
717 #elif defined(_WIN32)
718       ExecutableData = VirtualAlloc(NULL, Size, MEM_COMMIT | MEM_RESERVE,
719                                     PAGE_EXECUTE_READWRITE);
720       EXPECT_NE(nullptr, ExecutableData) << strerror(errno);
721       assert(nullptr != ExecutableData);
722 #else
723 #error "Platform unsupported"
724 #endif
725 
726       std::memcpy(ExecutableData, Data, MySize);
727     }
728 
729     // We allow AssembledTest to be moved so that we can return objects of
730     // this type.
AssembledTest(AssembledTest && Buffer)731     AssembledTest(AssembledTest &&Buffer)
732         : ExecutableData(Buffer.ExecutableData), Size(Buffer.Size) {
733       Buffer.ExecutableData = nullptr;
734       Buffer.Size = 0;
735     }
736 
737     AssembledTest &operator=(AssembledTest &&Buffer) {
738       ExecutableData = Buffer.ExecutableData;
739       Buffer.ExecutableData = nullptr;
740       Size = Buffer.Size;
741       Buffer.Size = 0;
742       return *this;
743     }
744 
~AssembledTest()745     ~AssembledTest() {
746       if (ExecutableData != nullptr) {
747 #if defined(__unix__)
748         munmap(ExecutableData, Size);
749 #elif defined(_WIN32)
750         VirtualFree(ExecutableData, 0, MEM_RELEASE);
751 #else
752 #error "Platform unsupported"
753 #endif
754         ExecutableData = nullptr;
755       }
756     }
757 
run()758     void run() const { reinterpret_cast<void (*)()>(ExecutableData)(); }
759 
760 #define LegacyRegAccessors(NewName, Name64, Name32, Name16, Name8)             \
761   static_assert(Encoded_GPR_##NewName() == Encoded_GPR_##Name64(),             \
762                 "Invalid aliasing.");                                          \
763   uint64_t NewName() const {                                                   \
764     return contentsOfQword(AssembledTest::Name64##Slot());                     \
765   }                                                                            \
766   static_assert(Encoded_GPR_##NewName##q() == Encoded_GPR_##Name64(),          \
767                 "Invalid aliasing.");                                          \
768   uint64_t NewName##q() const {                                                \
769     return contentsOfQword(AssembledTest::Name64##Slot());                     \
770   }                                                                            \
771   static_assert(Encoded_GPR_##NewName##d() == Encoded_GPR_##Name64(),          \
772                 "Invalid aliasing.");                                          \
773   uint32_t NewName##d() const {                                                \
774     return contentsOfQword(AssembledTest::Name64##Slot());                     \
775   }                                                                            \
776   static_assert(Encoded_GPR_##NewName##w() == Encoded_GPR_##Name64(),          \
777                 "Invalid aliasing.");                                          \
778   uint16_t NewName##w() const {                                                \
779     return contentsOfQword(AssembledTest::Name64##Slot());                     \
780   }                                                                            \
781   static_assert(Encoded_GPR_##NewName##l() == Encoded_GPR_##Name64(),          \
782                 "Invalid aliasing.");                                          \
783   uint8_t NewName##l() const {                                                 \
784     return contentsOfQword(AssembledTest::Name64##Slot());                     \
785   }                                                                            \
786   static_assert(Encoded_GPR_##Name64() == Encoded_GPR_##Name64(),              \
787                 "Invalid aliasing.");                                          \
788   uint64_t Name64() const {                                                    \
789     return contentsOfQword(AssembledTest::Name64##Slot());                     \
790   }                                                                            \
791   static_assert(Encoded_GPR_##Name32() == Encoded_GPR_##Name64(),              \
792                 "Invalid aliasing.");                                          \
793   uint32_t Name32() const {                                                    \
794     return contentsOfQword(AssembledTest::Name64##Slot());                     \
795   }                                                                            \
796   static_assert(Encoded_GPR_##Name16() == Encoded_GPR_##Name64(),              \
797                 "Invalid aliasing.");                                          \
798   uint16_t Name16() const {                                                    \
799     return contentsOfQword(AssembledTest::Name64##Slot());                     \
800   }                                                                            \
801   static_assert(Encoded_GPR_##Name8() == Encoded_GPR_##Name64(),               \
802                 "Invalid aliasing.");                                          \
803   uint8_t Name8() const {                                                      \
804     return contentsOfQword(AssembledTest::Name64##Slot());                     \
805   }
806 #define NewRegAccessors(NewName)                                               \
807   uint64_t NewName() const {                                                   \
808     return contentsOfQword(AssembledTest::NewName##Slot());                    \
809   }                                                                            \
810   uint64_t NewName##q() const {                                                \
811     return contentsOfQword(AssembledTest::NewName##Slot());                    \
812   }                                                                            \
813   uint32_t NewName##d() const {                                                \
814     return contentsOfQword(AssembledTest::NewName##Slot());                    \
815   }                                                                            \
816   uint16_t NewName##w() const {                                                \
817     return contentsOfQword(AssembledTest::NewName##Slot());                    \
818   }                                                                            \
819   uint8_t NewName##l() const {                                                 \
820     return contentsOfQword(AssembledTest::NewName##Slot());                    \
821   }
822 #define XmmRegAccessor(Name)                                                   \
823   template <typename T> T Name() const {                                       \
824     return xmm<T>(AssembledTest::Name##Slot());                                \
825   }
826     LegacyRegAccessors(r0, rsp, esp, sp, spl);
827     LegacyRegAccessors(r1, rax, eax, ax, al);
828     LegacyRegAccessors(r2, rbx, ebx, bx, bl);
829     LegacyRegAccessors(r3, rcx, ecx, cx, cl);
830     LegacyRegAccessors(r4, rdx, edx, dx, dl);
831     LegacyRegAccessors(r5, rbp, ebp, bp, bpl);
832     LegacyRegAccessors(r6, rsi, esi, si, sil);
833     LegacyRegAccessors(r7, rdi, edi, di, dil);
834     NewRegAccessors(r8);
835     NewRegAccessors(r9);
836     NewRegAccessors(r10);
837     NewRegAccessors(r11);
838     NewRegAccessors(r12);
839     NewRegAccessors(r13);
840     NewRegAccessors(r14);
841     NewRegAccessors(r15);
842     XmmRegAccessor(xmm0);
843     XmmRegAccessor(xmm1);
844     XmmRegAccessor(xmm2);
845     XmmRegAccessor(xmm3);
846     XmmRegAccessor(xmm4);
847     XmmRegAccessor(xmm5);
848     XmmRegAccessor(xmm6);
849     XmmRegAccessor(xmm7);
850     XmmRegAccessor(xmm8);
851     XmmRegAccessor(xmm9);
852     XmmRegAccessor(xmm10);
853     XmmRegAccessor(xmm11);
854     XmmRegAccessor(xmm12);
855     XmmRegAccessor(xmm13);
856     XmmRegAccessor(xmm14);
857     XmmRegAccessor(xmm15);
858 #undef XmmRegAccessor
859 #undef NewRegAccessors
860 #undef LegacyRegAccessors
861 
862     // contentsOfDword is used for reading the values in the scratchpad area.
863     // Valid arguments are the dword ids returned by
864     // AssemblerX8664Test::allocateDword() -- other inputs are considered
865     // invalid, and are not guaranteed to work if the implementation changes.
866     template <typename T = uint32_t, typename = typename std::enable_if<
867                                          sizeof(T) == sizeof(uint32_t)>::type>
contentsOfDword(uint32_t Dword)868     T contentsOfDword(uint32_t Dword) const {
869       return *reinterpret_cast<T *>(static_cast<uint8_t *>(ExecutableData) +
870                                     dwordOffset(Dword));
871     }
872 
873     template <typename T = uint64_t, typename = typename std::enable_if<
874                                          sizeof(T) == sizeof(uint64_t)>::type>
contentsOfQword(uint32_t InitialDword)875     T contentsOfQword(uint32_t InitialDword) const {
876       return *reinterpret_cast<T *>(static_cast<uint8_t *>(ExecutableData) +
877                                     dwordOffset(InitialDword));
878     }
879 
contentsOfDqword(uint32_t InitialDword)880     Dqword contentsOfDqword(uint32_t InitialDword) const {
881       return *reinterpret_cast<Dqword *>(
882                  static_cast<uint8_t *>(ExecutableData) +
883                  dwordOffset(InitialDword));
884     }
885 
886     template <typename T = uint32_t, typename = typename std::enable_if<
887                                          sizeof(T) == sizeof(uint32_t)>::type>
setDwordTo(uint32_t Dword,T value)888     void setDwordTo(uint32_t Dword, T value) {
889       *reinterpret_cast<uint32_t *>(static_cast<uint8_t *>(ExecutableData) +
890                                     dwordOffset(Dword)) =
891           *reinterpret_cast<uint32_t *>(&value);
892     }
893 
894     template <typename T = uint64_t, typename = typename std::enable_if<
895                                          sizeof(T) == sizeof(uint64_t)>::type>
setQwordTo(uint32_t InitialDword,T value)896     void setQwordTo(uint32_t InitialDword, T value) {
897       *reinterpret_cast<uint64_t *>(static_cast<uint8_t *>(ExecutableData) +
898                                     dwordOffset(InitialDword)) =
899           *reinterpret_cast<uint64_t *>(&value);
900     }
901 
setDqwordTo(uint32_t InitialDword,const Dqword & qdword)902     void setDqwordTo(uint32_t InitialDword, const Dqword &qdword) {
903       setQwordTo(InitialDword, qdword.U64[0]);
904       setQwordTo(InitialDword + 2, qdword.U64[1]);
905     }
906 
907   private:
908     template <typename T>
909     typename std::enable_if<std::is_same<T, Dqword>::value, Dqword>::type
xmm(uint8_t Slot)910     xmm(uint8_t Slot) const {
911       return contentsOfDqword(Slot);
912     }
913 
914     template <typename T>
915     typename std::enable_if<!std::is_same<T, Dqword>::value, T>::type
xmm(uint8_t Slot)916     xmm(uint8_t Slot) const {
917       constexpr bool TIs64Bit = sizeof(T) == sizeof(uint64_t);
918       using _64BitType = typename std::conditional<TIs64Bit, T, uint64_t>::type;
919       using _32BitType = typename std::conditional<TIs64Bit, uint32_t, T>::type;
920       if (TIs64Bit) {
921         return contentsOfQword<_64BitType>(Slot);
922       }
923       return contentsOfDword<_32BitType>(Slot);
924     }
925 
dwordOffset(uint32_t Index)926     static uint32_t dwordOffset(uint32_t Index) {
927       return MaximumCodeSize + (Index * 4);
928     }
929 
930     void *ExecutableData = nullptr;
931     size_t Size;
932   };
933 
934   // assemble created an AssembledTest with the jitted code. The first time
935   // assemble is executed it will add the epilogue to the jitted code (which is
936   // the reason why this method is not const qualified.
assemble()937   AssembledTest assemble() {
938     if (NeedsEpilogue) {
939       addEpilogue();
940     }
941     NeedsEpilogue = false;
942 
943     for (const auto *Fixup : assembler()->fixups()) {
944       Fixup->emitOffset(assembler());
945     }
946 
947     return AssembledTest(codeBytes(), codeBytesSize(), NumAllocatedDwords);
948   }
949 
950   // Allocates a new dword slot in the test's scratchpad area.
allocateDword()951   uint32_t allocateDword() { return NumAllocatedDwords++; }
952 
953   // Allocates a new qword slot in the test's scratchpad area.
allocateQword()954   uint32_t allocateQword() {
955     uint32_t InitialDword = allocateDword();
956     allocateDword();
957     return InitialDword;
958   }
959 
960   // Allocates a new dqword slot in the test's scratchpad area.
allocateDqword()961   uint32_t allocateDqword() {
962     uint32_t InitialDword = allocateQword();
963     allocateQword();
964     return InitialDword;
965   }
966 
dwordAddress(uint32_t Dword)967   Address dwordAddress(uint32_t Dword) {
968     return Address(Encoded_GPR_r9(), dwordDisp(Dword), nullptr);
969   }
970 
971 private:
972   // e??SlotAddress returns an AssemblerX8664::Traits::Address that can be used
973   // by the test cases to encode an address operand for accessing the slot for
974   // the specified register. These are all private for, when jitting the test
975   // code, tests should not tamper with these values. Besides, during the test
976   // execution these slots' contents are undefined and should not be accessed.
raxSlotAddress()977   Address raxSlotAddress() { return dwordAddress(AssembledTest::raxSlot()); }
rbxSlotAddress()978   Address rbxSlotAddress() { return dwordAddress(AssembledTest::rbxSlot()); }
rcxSlotAddress()979   Address rcxSlotAddress() { return dwordAddress(AssembledTest::rcxSlot()); }
rdxSlotAddress()980   Address rdxSlotAddress() { return dwordAddress(AssembledTest::rdxSlot()); }
rdiSlotAddress()981   Address rdiSlotAddress() { return dwordAddress(AssembledTest::rdiSlot()); }
rsiSlotAddress()982   Address rsiSlotAddress() { return dwordAddress(AssembledTest::rsiSlot()); }
rbpSlotAddress()983   Address rbpSlotAddress() { return dwordAddress(AssembledTest::rbpSlot()); }
rspSlotAddress()984   Address rspSlotAddress() { return dwordAddress(AssembledTest::rspSlot()); }
r8SlotAddress()985   Address r8SlotAddress() { return dwordAddress(AssembledTest::r8Slot()); }
r9SlotAddress()986   Address r9SlotAddress() { return dwordAddress(AssembledTest::r9Slot()); }
r10SlotAddress()987   Address r10SlotAddress() { return dwordAddress(AssembledTest::r10Slot()); }
r11SlotAddress()988   Address r11SlotAddress() { return dwordAddress(AssembledTest::r11Slot()); }
r12SlotAddress()989   Address r12SlotAddress() { return dwordAddress(AssembledTest::r12Slot()); }
r13SlotAddress()990   Address r13SlotAddress() { return dwordAddress(AssembledTest::r13Slot()); }
r14SlotAddress()991   Address r14SlotAddress() { return dwordAddress(AssembledTest::r14Slot()); }
r15SlotAddress()992   Address r15SlotAddress() { return dwordAddress(AssembledTest::r15Slot()); }
xmm0SlotAddress()993   Address xmm0SlotAddress() { return dwordAddress(AssembledTest::xmm0Slot()); }
xmm1SlotAddress()994   Address xmm1SlotAddress() { return dwordAddress(AssembledTest::xmm1Slot()); }
xmm2SlotAddress()995   Address xmm2SlotAddress() { return dwordAddress(AssembledTest::xmm2Slot()); }
xmm3SlotAddress()996   Address xmm3SlotAddress() { return dwordAddress(AssembledTest::xmm3Slot()); }
xmm4SlotAddress()997   Address xmm4SlotAddress() { return dwordAddress(AssembledTest::xmm4Slot()); }
xmm5SlotAddress()998   Address xmm5SlotAddress() { return dwordAddress(AssembledTest::xmm5Slot()); }
xmm6SlotAddress()999   Address xmm6SlotAddress() { return dwordAddress(AssembledTest::xmm6Slot()); }
xmm7SlotAddress()1000   Address xmm7SlotAddress() { return dwordAddress(AssembledTest::xmm7Slot()); }
xmm8SlotAddress()1001   Address xmm8SlotAddress() { return dwordAddress(AssembledTest::xmm8Slot()); }
xmm9SlotAddress()1002   Address xmm9SlotAddress() { return dwordAddress(AssembledTest::xmm9Slot()); }
xmm10SlotAddress()1003   Address xmm10SlotAddress() {
1004     return dwordAddress(AssembledTest::xmm10Slot());
1005   }
xmm11SlotAddress()1006   Address xmm11SlotAddress() {
1007     return dwordAddress(AssembledTest::xmm11Slot());
1008   }
xmm12SlotAddress()1009   Address xmm12SlotAddress() {
1010     return dwordAddress(AssembledTest::xmm12Slot());
1011   }
xmm13SlotAddress()1012   Address xmm13SlotAddress() {
1013     return dwordAddress(AssembledTest::xmm13Slot());
1014   }
xmm14SlotAddress()1015   Address xmm14SlotAddress() {
1016     return dwordAddress(AssembledTest::xmm14Slot());
1017   }
xmm15SlotAddress()1018   Address xmm15SlotAddress() {
1019     return dwordAddress(AssembledTest::xmm15Slot());
1020   }
1021 
1022   // Returns the displacement that should be used when accessing the specified
1023   // Dword in the scratchpad area. It needs to adjust for the initial
1024   // instructions that are emitted before the call that materializes the IP
1025   // register.
dwordDisp(uint32_t Dword)1026   uint32_t dwordDisp(uint32_t Dword) const {
1027     EXPECT_LT(Dword, NumAllocatedDwords);
1028     assert(Dword < NumAllocatedDwords);
1029     static constexpr uint8_t PushR9Bytes = 2;
1030     static constexpr uint8_t CallImmBytes = 5;
1031     return AssembledTest::MaximumCodeSize + (Dword * 4) -
1032            (PushR9Bytes + CallImmBytes);
1033   }
1034 
addPrologue()1035   void addPrologue() {
1036     __ pushl(Encoded_GPR_r9());
1037     __ call(Immediate(4));
1038     __ popl(Encoded_GPR_r9());
1039 
1040     __ pushl(Encoded_GPR_rax());
1041     __ pushl(Encoded_GPR_rbx());
1042     __ pushl(Encoded_GPR_rcx());
1043     __ pushl(Encoded_GPR_rdx());
1044     __ pushl(Encoded_GPR_rbp());
1045     __ pushl(Encoded_GPR_rdi());
1046     __ pushl(Encoded_GPR_rsi());
1047     __ pushl(Encoded_GPR_r8());
1048     __ pushl(Encoded_GPR_r10());
1049     __ pushl(Encoded_GPR_r11());
1050     __ pushl(Encoded_GPR_r12());
1051     __ pushl(Encoded_GPR_r13());
1052     __ pushl(Encoded_GPR_r14());
1053     __ pushl(Encoded_GPR_r15());
1054 
1055     __ mov(IceType_i32, Encoded_GPR_rax(), Immediate(0x00));
1056     __ mov(IceType_i32, Encoded_GPR_rbx(), Immediate(0x00));
1057     __ mov(IceType_i32, Encoded_GPR_rcx(), Immediate(0x00));
1058     __ mov(IceType_i32, Encoded_GPR_rdx(), Immediate(0x00));
1059     __ mov(IceType_i32, Encoded_GPR_rbp(), Immediate(0x00));
1060     __ mov(IceType_i32, Encoded_GPR_rdi(), Immediate(0x00));
1061     __ mov(IceType_i32, Encoded_GPR_rsi(), Immediate(0x00));
1062     __ mov(IceType_i32, Encoded_GPR_r8(), Immediate(0x00));
1063     __ mov(IceType_i32, Encoded_GPR_r10(), Immediate(0x00));
1064     __ mov(IceType_i32, Encoded_GPR_r11(), Immediate(0x00));
1065     __ mov(IceType_i32, Encoded_GPR_r12(), Immediate(0x00));
1066     __ mov(IceType_i32, Encoded_GPR_r13(), Immediate(0x00));
1067     __ mov(IceType_i32, Encoded_GPR_r14(), Immediate(0x00));
1068     __ mov(IceType_i32, Encoded_GPR_r15(), Immediate(0x00));
1069   }
1070 
addEpilogue()1071   void addEpilogue() {
1072     __ mov(IceType_i64, raxSlotAddress(), Encoded_GPR_rax());
1073     __ mov(IceType_i64, rbxSlotAddress(), Encoded_GPR_rbx());
1074     __ mov(IceType_i64, rcxSlotAddress(), Encoded_GPR_rcx());
1075     __ mov(IceType_i64, rdxSlotAddress(), Encoded_GPR_rdx());
1076     __ mov(IceType_i64, rdiSlotAddress(), Encoded_GPR_rdi());
1077     __ mov(IceType_i64, rsiSlotAddress(), Encoded_GPR_rsi());
1078     __ mov(IceType_i64, rbpSlotAddress(), Encoded_GPR_rbp());
1079     __ mov(IceType_i64, rspSlotAddress(), Encoded_GPR_rsp());
1080     __ mov(IceType_i64, r8SlotAddress(), Encoded_GPR_r8());
1081     __ mov(IceType_i64, r9SlotAddress(), Encoded_GPR_r9());
1082     __ mov(IceType_i64, r10SlotAddress(), Encoded_GPR_r10());
1083     __ mov(IceType_i64, r11SlotAddress(), Encoded_GPR_r11());
1084     __ mov(IceType_i64, r12SlotAddress(), Encoded_GPR_r12());
1085     __ mov(IceType_i64, r13SlotAddress(), Encoded_GPR_r13());
1086     __ mov(IceType_i64, r14SlotAddress(), Encoded_GPR_r14());
1087     __ mov(IceType_i64, r15SlotAddress(), Encoded_GPR_r15());
1088     __ movups(xmm0SlotAddress(), Encoded_Xmm_xmm0());
1089     __ movups(xmm1SlotAddress(), Encoded_Xmm_xmm1());
1090     __ movups(xmm2SlotAddress(), Encoded_Xmm_xmm2());
1091     __ movups(xmm3SlotAddress(), Encoded_Xmm_xmm3());
1092     __ movups(xmm4SlotAddress(), Encoded_Xmm_xmm4());
1093     __ movups(xmm5SlotAddress(), Encoded_Xmm_xmm5());
1094     __ movups(xmm6SlotAddress(), Encoded_Xmm_xmm6());
1095     __ movups(xmm7SlotAddress(), Encoded_Xmm_xmm7());
1096     __ movups(xmm8SlotAddress(), Encoded_Xmm_xmm8());
1097     __ movups(xmm9SlotAddress(), Encoded_Xmm_xmm9());
1098     __ movups(xmm10SlotAddress(), Encoded_Xmm_xmm10());
1099     __ movups(xmm11SlotAddress(), Encoded_Xmm_xmm11());
1100     __ movups(xmm12SlotAddress(), Encoded_Xmm_xmm12());
1101     __ movups(xmm13SlotAddress(), Encoded_Xmm_xmm13());
1102     __ movups(xmm14SlotAddress(), Encoded_Xmm_xmm14());
1103     __ movups(xmm15SlotAddress(), Encoded_Xmm_xmm15());
1104 
1105     __ popl(Encoded_GPR_r15());
1106     __ popl(Encoded_GPR_r14());
1107     __ popl(Encoded_GPR_r13());
1108     __ popl(Encoded_GPR_r12());
1109     __ popl(Encoded_GPR_r11());
1110     __ popl(Encoded_GPR_r10());
1111     __ popl(Encoded_GPR_r8());
1112     __ popl(Encoded_GPR_rsi());
1113     __ popl(Encoded_GPR_rdi());
1114     __ popl(Encoded_GPR_rbp());
1115     __ popl(Encoded_GPR_rdx());
1116     __ popl(Encoded_GPR_rcx());
1117     __ popl(Encoded_GPR_rbx());
1118     __ popl(Encoded_GPR_rax());
1119     __ popl(Encoded_GPR_r9());
1120 
1121     __ ret();
1122   }
1123 
1124   bool NeedsEpilogue;
1125   uint32_t NumAllocatedDwords;
1126 };
1127 
1128 } // end of namespace Test
1129 } // end of namespace X8664
1130 } // end of namespace Ice
1131 
1132 #endif // ASSEMBLERX8664_TESTUTIL_H_
1133