1 //===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===//
2 //
3 //                        The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the TargetLoweringX8664 class, which consists almost
12 /// entirely of the lowering sequence for each high-level instruction.
13 ///
14 //===----------------------------------------------------------------------===//
15 #include "IceTargetLoweringX8664.h"
16 
17 #include "IceDefs.h"
18 #include "IceTargetLoweringX8664Traits.h"
19 
20 #if defined(SUBZERO_USE_MICROSOFT_ABI)
21 extern "C" void __chkstk();
22 #endif
23 
24 namespace X8664 {
createTargetLowering(::Ice::Cfg * Func)25 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
26   return ::Ice::X8664::TargetX8664::create(Func);
27 }
28 
29 std::unique_ptr<::Ice::TargetDataLowering>
createTargetDataLowering(::Ice::GlobalContext * Ctx)30 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
31   return ::Ice::X8664::TargetDataX86<::Ice::X8664::TargetX8664Traits>::create(
32       Ctx);
33 }
34 
35 std::unique_ptr<::Ice::TargetHeaderLowering>
createTargetHeaderLowering(::Ice::GlobalContext * Ctx)36 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
37   return ::Ice::X8664::TargetHeaderX86::create(Ctx);
38 }
39 
staticInit(::Ice::GlobalContext * Ctx)40 void staticInit(::Ice::GlobalContext *Ctx) {
41   ::Ice::X8664::TargetX8664::staticInit(Ctx);
42 }
43 
shouldBePooled(const class::Ice::Constant * C)44 bool shouldBePooled(const class ::Ice::Constant *C) {
45   return ::Ice::X8664::TargetX8664::shouldBePooled(C);
46 }
47 
getPointerType()48 ::Ice::Type getPointerType() {
49   return ::Ice::X8664::TargetX8664::getPointerType();
50 }
51 
52 } // end of namespace X8664
53 
54 namespace Ice {
55 namespace X8664 {
56 
57 //------------------------------------------------------------------------------
58 //      ______   ______     ______     __     ______   ______
59 //     /\__  _\ /\  == \   /\  __ \   /\ \   /\__  _\ /\  ___\
60 //     \/_/\ \/ \ \  __<   \ \  __ \  \ \ \  \/_/\ \/ \ \___  \
61 //        \ \_\  \ \_\ \_\  \ \_\ \_\  \ \_\    \ \_\  \/\_____\
62 //         \/_/   \/_/ /_/   \/_/\/_/   \/_/     \/_/   \/_____/
63 //
64 //------------------------------------------------------------------------------
65 const TargetX8664Traits::TableFcmpType TargetX8664Traits::TableFcmp[] = {
66 #define X(val, dflt, swapS, C1, C2, swapV, pred)                               \
67   {dflt,                                                                       \
68    swapS,                                                                      \
69    X8664::Traits::Cond::C1,                                                    \
70    X8664::Traits::Cond::C2,                                                    \
71    swapV,                                                                      \
72    X8664::Traits::Cond::pred},
73     FCMPX8664_TABLE
74 #undef X
75 };
76 
77 const size_t TargetX8664Traits::TableFcmpSize = llvm::array_lengthof(TableFcmp);
78 
79 const TargetX8664Traits::TableIcmp32Type TargetX8664Traits::TableIcmp32[] = {
80 #define X(val, C_32, C1_64, C2_64, C3_64) {X8664::Traits::Cond::C_32},
81     ICMPX8664_TABLE
82 #undef X
83 };
84 
85 const size_t TargetX8664Traits::TableIcmp32Size =
86     llvm::array_lengthof(TableIcmp32);
87 
88 const TargetX8664Traits::TableIcmp64Type TargetX8664Traits::TableIcmp64[] = {
89 #define X(val, C_32, C1_64, C2_64, C3_64)                                      \
90   {X8664::Traits::Cond::C1_64, X8664::Traits::Cond::C2_64,                     \
91    X8664::Traits::Cond::C3_64},
92     ICMPX8664_TABLE
93 #undef X
94 };
95 
96 const size_t TargetX8664Traits::TableIcmp64Size =
97     llvm::array_lengthof(TableIcmp64);
98 
99 const TargetX8664Traits::TableTypeX8664AttributesType
100     TargetX8664Traits::TableTypeX8664Attributes[] = {
101 #define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld)    \
102   {IceType_##elty},
103         ICETYPEX8664_TABLE
104 #undef X
105 };
106 
107 const size_t TargetX8664Traits::TableTypeX8664AttributesSize =
108     llvm::array_lengthof(TableTypeX8664Attributes);
109 
110 const uint32_t TargetX8664Traits::X86_STACK_ALIGNMENT_BYTES = 16;
111 const char *TargetX8664Traits::TargetName = "X8664";
112 
113 template <>
114 std::array<SmallBitVector, RCX86_NUM>
115     TargetX86Base<X8664::Traits>::TypeToRegisterSet = {{}};
116 
117 template <>
118 std::array<SmallBitVector, RCX86_NUM>
119     TargetX86Base<X8664::Traits>::TypeToRegisterSetUnfiltered = {{}};
120 
121 template <>
122 std::array<SmallBitVector,
123            TargetX86Base<X8664::Traits>::Traits::RegisterSet::Reg_NUM>
124     TargetX86Base<X8664::Traits>::RegisterAliases = {{}};
125 
126 template <>
127 FixupKind TargetX86Base<X8664::Traits>::PcRelFixup =
128     TargetX86Base<X8664::Traits>::Traits::FK_PcRel;
129 
130 template <>
131 FixupKind TargetX86Base<X8664::Traits>::AbsFixup =
132     TargetX86Base<X8664::Traits>::Traits::FK_Abs;
133 
134 //------------------------------------------------------------------------------
135 //     __      ______  __     __  ______  ______  __  __   __  ______
136 //    /\ \    /\  __ \/\ \  _ \ \/\  ___\/\  == \/\ \/\ "-.\ \/\  ___\
137 //    \ \ \___\ \ \/\ \ \ \/ ".\ \ \  __\\ \  __<\ \ \ \ \-.  \ \ \__ \
138 //     \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\
139 //      \/_____/\/_____/\/_/   \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/
140 //
141 //------------------------------------------------------------------------------
_add_sp(Operand * Adjustment)142 void TargetX8664::_add_sp(Operand *Adjustment) {
143   Variable *rsp =
144       getPhysicalRegister(Traits::RegisterSet::Reg_rsp, IceType_i64);
145   if (!NeedSandboxing) {
146     _add(rsp, Adjustment);
147     return;
148   }
149 
150   Variable *esp =
151       getPhysicalRegister(Traits::RegisterSet::Reg_esp, IceType_i32);
152   Variable *r15 =
153       getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
154 
155   // When incrementing rsp, NaCl sandboxing requires the following sequence
156   //
157   // .bundle_start
158   // add Adjustment, %esp
159   // add %r15, %rsp
160   // .bundle_end
161   //
162   // In Subzero, even though rsp and esp alias each other, defining one does not
163   // define the other. Therefore, we must emit
164   //
165   // .bundle_start
166   // %esp = fake-def %rsp
167   // add Adjustment, %esp
168   // %rsp = fake-def %esp
169   // add %r15, %rsp
170   // .bundle_end
171   //
172   // The fake-defs ensure that the
173   //
174   // add Adjustment, %esp
175   //
176   // instruction is not DCE'd.
177   AutoBundle _(this);
178   _redefined(Context.insert<InstFakeDef>(esp, rsp));
179   _add(esp, Adjustment);
180   _redefined(Context.insert<InstFakeDef>(rsp, esp));
181   _add(rsp, r15);
182 }
183 
_mov_sp(Operand * NewValue)184 void TargetX8664::_mov_sp(Operand *NewValue) {
185   assert(NewValue->getType() == IceType_i32);
186 
187   Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
188   Variable *rsp =
189       getPhysicalRegister(Traits::RegisterSet::Reg_rsp, IceType_i64);
190 
191   AutoBundle _(this);
192 
193   _redefined(Context.insert<InstFakeDef>(esp, rsp));
194   _redefined(_mov(esp, NewValue));
195   _redefined(Context.insert<InstFakeDef>(rsp, esp));
196 
197   if (!NeedSandboxing) {
198     return;
199   }
200 
201   Variable *r15 =
202       getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
203   _add(rsp, r15);
204 }
205 
_push_rbp()206 void TargetX8664::_push_rbp() {
207   assert(NeedSandboxing);
208 
209   Constant *_0 = Ctx->getConstantZero(IceType_i32);
210   Variable *ebp =
211       getPhysicalRegister(Traits::RegisterSet::Reg_ebp, IceType_i32);
212   Variable *rsp =
213       getPhysicalRegister(Traits::RegisterSet::Reg_rsp, IceType_i64);
214   auto *TopOfStack = llvm::cast<X86OperandMem>(
215       legalize(X86OperandMem::create(Func, IceType_i32, rsp, _0),
216                Legal_Reg | Legal_Mem));
217 
218   // Emits a sequence:
219   //
220   //   .bundle_start
221   //   push 0
222   //   mov %ebp, %(rsp)
223   //   .bundle_end
224   //
225   // to avoid leaking the upper 32-bits (i.e., the sandbox address.)
226   AutoBundle _(this);
227   _push(_0);
228   Context.insert<typename Traits::Insts::Store>(ebp, TopOfStack);
229 }
230 
_link_bp()231 void TargetX8664::_link_bp() {
232   Variable *esp =
233       getPhysicalRegister(Traits::RegisterSet::Reg_esp, IceType_i32);
234   Variable *rsp =
235       getPhysicalRegister(Traits::RegisterSet::Reg_rsp, Traits::WordType);
236   Variable *ebp =
237       getPhysicalRegister(Traits::RegisterSet::Reg_ebp, IceType_i32);
238   Variable *rbp =
239       getPhysicalRegister(Traits::RegisterSet::Reg_rbp, Traits::WordType);
240   Variable *r15 =
241       getPhysicalRegister(Traits::RegisterSet::Reg_r15, Traits::WordType);
242 
243   if (!NeedSandboxing) {
244     _push(rbp);
245     _mov(rbp, rsp);
246   } else {
247     _push_rbp();
248 
249     AutoBundle _(this);
250     _redefined(Context.insert<InstFakeDef>(ebp, rbp));
251     _redefined(Context.insert<InstFakeDef>(esp, rsp));
252     _mov(ebp, esp);
253     _redefined(Context.insert<InstFakeDef>(rsp, esp));
254     _add(rbp, r15);
255   }
256   // Keep ebp live for late-stage liveness analysis (e.g. asm-verbose mode).
257   Context.insert<InstFakeUse>(rbp);
258 }
259 
_unlink_bp()260 void TargetX8664::_unlink_bp() {
261   Variable *rsp =
262       getPhysicalRegister(Traits::RegisterSet::Reg_rsp, IceType_i64);
263   Variable *rbp =
264       getPhysicalRegister(Traits::RegisterSet::Reg_rbp, IceType_i64);
265   Variable *ebp =
266       getPhysicalRegister(Traits::RegisterSet::Reg_ebp, IceType_i32);
267   // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
268   // use of rsp before the assignment of rsp=rbp keeps previous rsp
269   // adjustments from being dead-code eliminated.
270   Context.insert<InstFakeUse>(rsp);
271   if (!NeedSandboxing) {
272     _mov(rsp, rbp);
273     _pop(rbp);
274   } else {
275     _mov_sp(ebp);
276 
277     Variable *r15 =
278         getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
279     Variable *rcx =
280         getPhysicalRegister(Traits::RegisterSet::Reg_rcx, IceType_i64);
281     Variable *ecx =
282         getPhysicalRegister(Traits::RegisterSet::Reg_ecx, IceType_i32);
283 
284     _pop(rcx);
285     Context.insert<InstFakeDef>(ecx, rcx);
286     AutoBundle _(this);
287     _mov(ebp, ecx);
288 
289     _redefined(Context.insert<InstFakeDef>(rbp, ebp));
290     _add(rbp, r15);
291   }
292 }
293 
_push_reg(RegNumT RegNum)294 void TargetX8664::_push_reg(RegNumT RegNum) {
295   if (Traits::isXmm(RegNum)) {
296     Variable *reg = getPhysicalRegister(RegNum, IceType_v4f32);
297     Variable *rsp =
298         getPhysicalRegister(Traits::RegisterSet::Reg_rsp, Traits::WordType);
299     auto *address =
300         Traits::X86OperandMem::create(Func, reg->getType(), rsp, nullptr);
301     _sub_sp(
302         Ctx->getConstantInt32(16)); // TODO(capn): accumulate all the offsets
303                                     // and adjust the stack pointer once.
304     _storep(reg, address);
305   } else if (RegNum != Traits::RegisterSet::Reg_rbp || !NeedSandboxing) {
306     _push(getPhysicalRegister(RegNum, Traits::WordType));
307   } else {
308     _push_rbp();
309   }
310 }
311 
_pop_reg(RegNumT RegNum)312 void TargetX8664::_pop_reg(RegNumT RegNum) {
313   if (Traits::isXmm(RegNum)) {
314     Variable *reg = getPhysicalRegister(RegNum, IceType_v4f32);
315     Variable *rsp =
316         getPhysicalRegister(Traits::RegisterSet::Reg_rsp, Traits::WordType);
317     auto *address =
318         Traits::X86OperandMem::create(Func, reg->getType(), rsp, nullptr);
319     _movp(reg, address);
320     _add_sp(
321         Ctx->getConstantInt32(16)); // TODO(capn): accumulate all the offsets
322                                     // and adjust the stack pointer once.
323   } else {
324     _pop(getPhysicalRegister(RegNum, Traits::WordType));
325   }
326 }
327 
emitGetIP(CfgNode * Node)328 void TargetX8664::emitGetIP(CfgNode *Node) {
329   // No IP base register is needed on X86-64.
330   (void)Node;
331 }
332 
333 namespace {
isAssignedToRspOrRbp(const Variable * Var)334 bool isAssignedToRspOrRbp(const Variable *Var) {
335   if (Var == nullptr) {
336     return false;
337   }
338 
339   if (Var->isRematerializable()) {
340     return true;
341   }
342 
343   if (!Var->hasReg()) {
344     return false;
345   }
346 
347   const auto RegNum = Var->getRegNum();
348   if ((RegNum == Traits::RegisterSet::Reg_rsp) ||
349       (RegNum == Traits::RegisterSet::Reg_rbp)) {
350     return true;
351   }
352 
353   return false;
354 }
355 } // end of anonymous namespace
356 
_sandbox_mem_reference(X86OperandMem * Mem)357 Traits::X86OperandMem *TargetX8664::_sandbox_mem_reference(X86OperandMem *Mem) {
358   if (SandboxingType == ST_None) {
359     return Mem;
360   }
361 
362   if (SandboxingType == ST_Nonsfi) {
363     llvm::report_fatal_error(
364         "_sandbox_mem_reference not implemented for nonsfi");
365   }
366 
367   // In x86_64-nacl, all memory references are relative to a base register
368   // (%r15, %rsp, %rbp, or %rip).
369 
370   Variable *Base = Mem->getBase();
371   Variable *Index = Mem->getIndex();
372   uint16_t Shift = 0;
373   Variable *ZeroReg = RebasePtr;
374   Constant *Offset = Mem->getOffset();
375   Variable *T = nullptr;
376 
377   bool AbsoluteAddress = false;
378   if (Base == nullptr && Index == nullptr) {
379     if (llvm::isa<ConstantRelocatable>(Offset)) {
380       // Mem is RIP-relative. There's no need to rebase it.
381       return Mem;
382     }
383     // Offset is an absolute address, so we need to emit
384     //   Offset(%r15)
385     AbsoluteAddress = true;
386   }
387 
388   if (Mem->getIsRebased()) {
389     // If Mem.IsRebased, then we don't need to update Mem, as it's already been
390     // updated to contain a reference to one of %rsp, %rbp, or %r15.
391     // We don't return early because we still need to zero extend Index.
392     assert(ZeroReg == Base || AbsoluteAddress || isAssignedToRspOrRbp(Base));
393     if (!AbsoluteAddress) {
394       // If Mem is an absolute address, no need to update ZeroReg (which is
395       // already set to %r15.)
396       ZeroReg = Base;
397     }
398     if (Index != nullptr) {
399       T = makeReg(IceType_i32);
400       _mov(T, Index);
401       Shift = Mem->getShift();
402     }
403   } else {
404     if (Base != nullptr) {
405       // If Base is a valid base pointer we don't need to use the RebasePtr. By
406       // doing this we might save us the need to zero extend the memory operand.
407       if (isAssignedToRspOrRbp(Base)) {
408         ZeroReg = Base;
409       } else {
410         T = Base;
411       }
412     }
413 
414     if (Index != nullptr) {
415       assert(!Index->isRematerializable());
416       // If Index is not nullptr, it is mandatory that T is a nullptr.
417       // Otherwise, the lowering generated a memory operand with two registers.
418       // Note that Base might still be non-nullptr, but it must be a valid
419       // base register.
420       if (T != nullptr) {
421         llvm::report_fatal_error("memory reference contains base and index.");
422       }
423       // If the Index is not shifted, and it is a Valid Base, and the ZeroReg is
424       // still RebasePtr, then we do ZeroReg = Index, and hopefully prevent the
425       // need to zero-extend the memory operand (which may still happen -- see
426       // NeedLea below.)
427       if (Shift == 0 && isAssignedToRspOrRbp(Index) && ZeroReg == RebasePtr) {
428         ZeroReg = Index;
429       } else {
430         T = Index;
431         Shift = Mem->getShift();
432       }
433     }
434   }
435 
436   // NeedsLea is a flag indicating whether Mem needs to be materialized to a GPR
437   // prior to being used. A LEA is needed if Mem.Offset is a constant
438   // relocatable with a nonzero offset, or if Mem.Offset is a nonzero immediate;
439   // but only when the address mode contains a "user" register other than the
440   // rsp/rbp/r15 base. In both these cases, the LEA is needed to ensure the
441   // sandboxed memory operand will only use the lower 32-bits of T+Offset.
442   bool NeedsLea = false;
443   if (!Mem->getIsRebased()) {
444     bool IsOffsetZero = false;
445     if (Offset == nullptr) {
446       IsOffsetZero = true;
447     } else if (const auto *CR = llvm::dyn_cast<ConstantRelocatable>(Offset)) {
448       IsOffsetZero = (CR->getOffset() == 0);
449     } else if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Offset)) {
450       IsOffsetZero = (Imm->getValue() == 0);
451     } else {
452       llvm::report_fatal_error("Unexpected Offset type.");
453     }
454     if (!IsOffsetZero) {
455       if (Base != nullptr && Base != ZeroReg)
456         NeedsLea = true;
457       if (Index != nullptr && Index != ZeroReg)
458         NeedsLea = true;
459     }
460   }
461 
462   RegNumT RegNum, RegNum32;
463   if (T != nullptr) {
464     if (T->hasReg()) {
465       RegNum = Traits::getGprForType(IceType_i64, T->getRegNum());
466       RegNum32 = Traits::getGprForType(IceType_i32, RegNum);
467       // At this point, if T was assigned to rsp/rbp, then we would have already
468       // made this the ZeroReg.
469       assert(RegNum != Traits::RegisterSet::Reg_rsp);
470       assert(RegNum != Traits::RegisterSet::Reg_rbp);
471     }
472 
473     switch (T->getType()) {
474     default:
475       llvm::report_fatal_error("Mem pointer should be a 32-bit GPR.");
476     case IceType_i64:
477       // Even though "default:" would also catch T.Type == IceType_i64, an
478       // explicit 'case IceType_i64' shows that memory operands are always
479       // supposed to be 32-bits.
480       llvm::report_fatal_error("Mem pointer should not be a 64-bit GPR.");
481     case IceType_i32: {
482       Variable *T64 = makeReg(IceType_i64, RegNum);
483       auto *Movzx = _movzx(T64, T);
484       if (!NeedsLea) {
485         // This movzx is only needed when Mem does not need to be lea'd into a
486         // temporary. If an lea is going to be emitted, then eliding this movzx
487         // is safe because the emitted lea will write a 32-bit result --
488         // implicitly zero-extended to 64-bit.
489         Movzx->setMustKeep();
490       }
491       T = T64;
492     } break;
493     }
494   }
495 
496   if (NeedsLea) {
497     Variable *NewT = makeReg(IceType_i32, RegNum32);
498     Variable *Base = T;
499     Variable *Index = T;
500     static constexpr bool NotRebased = false;
501     if (Shift == 0) {
502       Index = nullptr;
503     } else {
504       Base = nullptr;
505     }
506     _lea(NewT, Traits::X86OperandMem::create(
507                    Func, Mem->getType(), Base, Offset, Index, Shift,
508                    Traits::X86OperandMem::DefaultSegment, NotRebased));
509 
510     T = makeReg(IceType_i64, RegNum);
511     _movzx(T, NewT);
512     Shift = 0;
513     Offset = nullptr;
514   }
515 
516   static constexpr bool IsRebased = true;
517   return Traits::X86OperandMem::create(
518       Func, Mem->getType(), ZeroReg, Offset, T, Shift,
519       Traits::X86OperandMem::DefaultSegment, IsRebased);
520 }
521 
_sub_sp(Operand * Adjustment)522 void TargetX8664::_sub_sp(Operand *Adjustment) {
523   Variable *rsp =
524       getPhysicalRegister(Traits::RegisterSet::Reg_rsp, Traits::WordType);
525 
526   if (NeedSandboxing) {
527     Variable *esp =
528         getPhysicalRegister(Traits::RegisterSet::Reg_esp, IceType_i32);
529     Variable *r15 =
530         getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
531 
532     // .bundle_start
533     // sub Adjustment, %esp
534     // add %r15, %rsp
535     // .bundle_end
536     AutoBundle _(this);
537     _redefined(Context.insert<InstFakeDef>(esp, rsp));
538     _sub(esp, Adjustment);
539     _redefined(Context.insert<InstFakeDef>(rsp, esp));
540     _add(rsp, r15);
541   } else {
542     _sub(rsp, Adjustment);
543   }
544 
545   // Add a fake use of the stack pointer, to prevent the stack pointer adustment
546   // from being dead-code eliminated in a function that doesn't return.
547   Context.insert<InstFakeUse>(rsp);
548 }
549 
initRebasePtr()550 void TargetX8664::initRebasePtr() {
551   switch (SandboxingType) {
552   case ST_Nonsfi:
553     // Probably no implementation is needed, but error to be safe for now.
554     llvm::report_fatal_error(
555         "initRebasePtr() is not yet implemented on x32-nonsfi.");
556   case ST_NaCl:
557     RebasePtr = getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
558     break;
559   case ST_None:
560     // nothing.
561     break;
562   }
563 }
564 
initSandbox()565 void TargetX8664::initSandbox() {
566   assert(SandboxingType == ST_NaCl);
567   Context.init(Func->getEntryNode());
568   Context.setInsertPoint(Context.getCur());
569   Variable *r15 =
570       getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
571   Context.insert<InstFakeDef>(r15);
572   Context.insert<InstFakeUse>(r15);
573 }
574 
575 namespace {
isRematerializable(const Variable * Var)576 bool isRematerializable(const Variable *Var) {
577   return Var != nullptr && Var->isRematerializable();
578 }
579 } // end of anonymous namespace
580 
legalizeOptAddrForSandbox(OptAddr * Addr)581 bool TargetX8664::legalizeOptAddrForSandbox(OptAddr *Addr) {
582   if (SandboxingType == ST_Nonsfi) {
583     llvm::report_fatal_error("Nonsfi not yet implemented for x8664.");
584   }
585 
586   if (isRematerializable(Addr->Base)) {
587     if (Addr->Index == RebasePtr) {
588       Addr->Index = nullptr;
589       Addr->Shift = 0;
590     }
591     return true;
592   }
593 
594   if (isRematerializable(Addr->Index)) {
595     if (Addr->Base == RebasePtr) {
596       Addr->Base = nullptr;
597     }
598     return true;
599   }
600 
601   assert(Addr->Base != RebasePtr && Addr->Index != RebasePtr);
602 
603   if (Addr->Base == nullptr) {
604     return true;
605   }
606 
607   if (Addr->Index == nullptr) {
608     return true;
609   }
610 
611   return false;
612 }
613 
lowerIndirectJump(Variable * JumpTarget)614 void TargetX8664::lowerIndirectJump(Variable *JumpTarget) {
615   std::unique_ptr<AutoBundle> Bundler;
616 
617   if (!NeedSandboxing) {
618     if (JumpTarget->getType() != IceType_i64) {
619       Variable *T = makeReg(IceType_i64);
620       _movzx(T, JumpTarget);
621       JumpTarget = T;
622     }
623   } else {
624     Variable *T = makeReg(IceType_i32);
625     Variable *T64 = makeReg(IceType_i64);
626     Variable *r15 =
627         getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
628 
629     _mov(T, JumpTarget);
630     Bundler = makeUnique<AutoBundle>(this);
631     const SizeT BundleSize =
632         1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
633     _and(T, Ctx->getConstantInt32(~(BundleSize - 1)));
634     _movzx(T64, T);
635     _add(T64, r15);
636     JumpTarget = T64;
637   }
638 
639   _jmp(JumpTarget);
640 }
641 
emitCallToTarget(Operand * CallTarget,Variable * ReturnReg,size_t NumVariadicFpArgs)642 Inst *TargetX8664::emitCallToTarget(Operand *CallTarget, Variable *ReturnReg,
643                                     size_t NumVariadicFpArgs) {
644   Inst *NewCall = nullptr;
645   auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget);
646   if (NeedSandboxing) {
647     // In NaCl sandbox, calls are replaced by a push/jmp pair:
648     //
649     //     push .after_call
650     //     jmp CallTarget
651     //     .align bundle_size
652     // after_call:
653     //
654     // In order to emit this sequence, we need a temporary label ("after_call",
655     // in this example.)
656     //
657     // The operand to push is a ConstantRelocatable. The easy way to implement
658     // this sequence is to create a ConstantRelocatable(0, "after_call"), but
659     // this ends up creating more relocations for the linker to resolve.
660     // Therefore, we create a ConstantRelocatable from the name of the function
661     // being compiled (i.e., ConstantRelocatable(after_call - Func, Func).
662     //
663     // By default, ConstantRelocatables are emitted (in textual output) as
664     //
665     //  ConstantName + Offset
666     //
667     // ReturnReloc has an offset that is only known during binary emission.
668     // Therefore, we set a custom emit string for ReturnReloc that will be
669     // used instead. In this particular case, the code will be emitted as
670     //
671     //  push .after_call
672     InstX86Label *ReturnAddress = InstX86Label::create(Func, this);
673     auto *ReturnRelocOffset = RelocOffset::create(Func->getAssembler());
674     ReturnAddress->setRelocOffset(ReturnRelocOffset);
675     constexpr RelocOffsetT NoFixedOffset = 0;
676     const std::string EmitString =
677         BuildDefs::dump() ? ReturnAddress->getLabelName().toString() : "";
678     auto *ReturnReloc = ConstantRelocatable::create(
679         Func->getAssembler(), IceType_i32,
680         RelocatableTuple(NoFixedOffset, {ReturnRelocOffset},
681                          Func->getFunctionName(), EmitString));
682     /* AutoBundle scoping */ {
683       std::unique_ptr<AutoBundle> Bundler;
684       if (CallTargetR == nullptr) {
685         Bundler = makeUnique<AutoBundle>(this, InstBundleLock::Opt_PadToEnd);
686         _push(ReturnReloc);
687       } else {
688         Variable *T = makeReg(IceType_i32);
689         Variable *T64 = makeReg(IceType_i64);
690         Variable *r15 =
691             getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
692 
693         _mov(T, CallTargetR);
694         Bundler = makeUnique<AutoBundle>(this, InstBundleLock::Opt_PadToEnd);
695         _push(ReturnReloc);
696         const SizeT BundleSize =
697             1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
698         _and(T, Ctx->getConstantInt32(~(BundleSize - 1)));
699         _movzx(T64, T);
700         _add(T64, r15);
701         CallTarget = T64;
702       }
703       NewCall = Context.insert<Traits::Insts::Jmp>(CallTarget);
704     }
705     if (ReturnReg != nullptr) {
706       Context.insert<InstFakeDef>(ReturnReg);
707     }
708 
709     Context.insert(ReturnAddress);
710   } else {
711     if (CallTargetR != nullptr && CallTarget->getType() == IceType_i32) {
712       // x86-64 in PNaCl is ILP32. Therefore, CallTarget is i32, but the
713       // emitted call needs an i64 register (for textual asm.)
714       Variable *T = makeReg(IceType_i64);
715       _movzx(T, CallTargetR);
716       CallTarget = T;
717 
718     } else if (CallTarget->getType() == IceType_i64) {
719       // x86-64 does not support 64-bit direct calls, so write the value to a
720       // register and make an indirect call for Constant call targets.
721       RegNumT TargetReg = {};
722 
723       // System V: force r11 when calling a variadic function so that rax isn't
724       // used, since rax stores the number of FP args (see NumVariadicFpArgs
725       // usage below).
726 #if !defined(SUBZERO_USE_MICROSOFT_ABI)
727       if (NumVariadicFpArgs > 0)
728         TargetReg = Traits::RegisterSet::Reg_r11;
729 #endif
730 
731       if (llvm::isa<Constant>(CallTarget)) {
732         Variable *T = makeReg(IceType_i64, TargetReg);
733         _mov(T, CallTarget);
734         CallTarget = T;
735       } else if (llvm::isa<Variable>(CallTarget)) {
736         Operand *T = legalizeToReg(CallTarget, TargetReg);
737         CallTarget = T;
738       }
739     }
740 
741     // System V: store number of FP args in RAX for variadic calls
742 #if !defined(SUBZERO_USE_MICROSOFT_ABI)
743     if (NumVariadicFpArgs > 0) {
744       // Store number of FP args (stored in XMM registers) in RAX for variadic
745       // calls
746       auto *NumFpArgs = Ctx->getConstantInt64(NumVariadicFpArgs);
747       Variable *NumFpArgsReg =
748           legalizeToReg(NumFpArgs, Traits::RegisterSet::Reg_rax);
749       Context.insert<InstFakeUse>(NumFpArgsReg);
750     }
751 #endif
752 
753     NewCall = Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget);
754   }
755   return NewCall;
756 }
757 
moveReturnValueToRegister(Operand * Value,Type ReturnType)758 Variable *TargetX8664::moveReturnValueToRegister(Operand *Value,
759                                                  Type ReturnType) {
760   if (isVectorType(ReturnType) || isScalarFloatingType(ReturnType)) {
761     return legalizeToReg(Value, Traits::RegisterSet::Reg_xmm0);
762   } else {
763     assert(ReturnType == IceType_i32 || ReturnType == IceType_i64);
764     Variable *Reg = nullptr;
765     _mov(Reg, Value,
766          Traits::getGprForType(ReturnType, Traits::RegisterSet::Reg_rax));
767     return Reg;
768   }
769 }
770 
emitSandboxedReturn()771 void TargetX8664::emitSandboxedReturn() {
772   Variable *T_rcx = makeReg(IceType_i64, Traits::RegisterSet::Reg_rcx);
773   Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
774   _pop(T_rcx);
775   _mov(T_ecx, T_rcx);
776   // lowerIndirectJump(T_ecx);
777   Variable *r15 =
778       getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
779 
780   /* AutoBundle scoping */ {
781     AutoBundle _(this);
782     const SizeT BundleSize =
783         1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
784     _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1)));
785     Context.insert<InstFakeDef>(T_rcx, T_ecx);
786     _add(T_rcx, r15);
787 
788     _jmp(T_rcx);
789   }
790 }
791 
emitStackProbe(size_t StackSizeBytes)792 void TargetX8664::emitStackProbe(size_t StackSizeBytes) {
793 #if defined(SUBZERO_USE_MICROSOFT_ABI)
794   // Mirroring the behavior of MSVC here, which emits a _chkstk when locals are
795   // >= 4KB, rather than the 8KB claimed by the docs.
796   if (StackSizeBytes >= 4096) {
797     // __chkstk on Win64 probes the stack up to RSP - EAX, but does not clobber
798     // RSP, so we don't need to save and restore it.
799 
800     Variable *EAX = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
801     _mov(EAX, Ctx->getConstantInt32(StackSizeBytes));
802 
803     auto *CallTarget =
804         Ctx->getConstantInt64(reinterpret_cast<int64_t>(&__chkstk));
805     Operand *CallTargetReg =
806         legalizeToReg(CallTarget, Traits::RegisterSet::Reg_r11);
807     emitCallToTarget(CallTargetReg, nullptr);
808   }
809 #endif
810 }
811 
812 // In some cases, there are x-macros tables for both high-level and low-level
813 // instructions/operands that use the same enum key value. The tables are kept
814 // separate to maintain a proper separation between abstraction layers. There
815 // is a risk that the tables could get out of sync if enum values are reordered
816 // or if entries are added or deleted. The following dummy namespaces use
817 // static_asserts to ensure everything is kept in sync.
818 
819 namespace {
820 // Validate the enum values in FCMPX8664_TABLE.
821 namespace dummy1 {
822 // Define a temporary set of enum values based on low-level table entries.
823 enum _tmp_enum {
824 #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
825   FCMPX8664_TABLE
826 #undef X
827       _num
828 };
829 // Define a set of constants based on high-level table entries.
830 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
831 ICEINSTFCMP_TABLE
832 #undef X
833 // Define a set of constants based on low-level table entries, and ensure the
834 // table entry keys are consistent.
835 #define X(val, dflt, swapS, C1, C2, swapV, pred)                               \
836   static const int _table2_##val = _tmp_##val;                                 \
837   static_assert(                                                               \
838       _table1_##val == _table2_##val,                                          \
839       "Inconsistency between FCMPX8664_TABLE and ICEINSTFCMP_TABLE");
840 FCMPX8664_TABLE
841 #undef X
842 // Repeat the static asserts with respect to the high-level table entries in
843 // case the high-level table has extra entries.
844 #define X(tag, str)                                                            \
845   static_assert(                                                               \
846       _table1_##tag == _table2_##tag,                                          \
847       "Inconsistency between FCMPX8664_TABLE and ICEINSTFCMP_TABLE");
848 ICEINSTFCMP_TABLE
849 #undef X
850 } // end of namespace dummy1
851 
852 // Validate the enum values in ICMPX8664_TABLE.
853 namespace dummy2 {
854 // Define a temporary set of enum values based on low-level table entries.
855 enum _tmp_enum {
856 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
857   ICMPX8664_TABLE
858 #undef X
859       _num
860 };
861 // Define a set of constants based on high-level table entries.
862 #define X(tag, reverse, str) static const int _table1_##tag = InstIcmp::tag;
863 ICEINSTICMP_TABLE
864 #undef X
865 // Define a set of constants based on low-level table entries, and ensure the
866 // table entry keys are consistent.
867 #define X(val, C_32, C1_64, C2_64, C3_64)                                      \
868   static const int _table2_##val = _tmp_##val;                                 \
869   static_assert(                                                               \
870       _table1_##val == _table2_##val,                                          \
871       "Inconsistency between ICMPX8664_TABLE and ICEINSTICMP_TABLE");
872 ICMPX8664_TABLE
873 #undef X
874 // Repeat the static asserts with respect to the high-level table entries in
875 // case the high-level table has extra entries.
876 #define X(tag, reverse, str)                                                   \
877   static_assert(                                                               \
878       _table1_##tag == _table2_##tag,                                          \
879       "Inconsistency between ICMPX8664_TABLE and ICEINSTICMP_TABLE");
880 ICEINSTICMP_TABLE
881 #undef X
882 } // end of namespace dummy2
883 
884 // Validate the enum values in ICETYPEX8664_TABLE.
885 namespace dummy3 {
886 // Define a temporary set of enum values based on low-level table entries.
887 enum _tmp_enum {
888 #define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld)    \
889   _tmp_##tag,
890   ICETYPEX8664_TABLE
891 #undef X
892       _num
893 };
894 // Define a set of constants based on high-level table entries.
895 #define X(tag, sizeLog2, align, elts, elty, str, rcstr)                        \
896   static const int _table1_##tag = IceType_##tag;
897 ICETYPE_TABLE
898 #undef X
899 // Define a set of constants based on low-level table entries, and ensure the
900 // table entry keys are consistent.
901 #define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld)    \
902   static const int _table2_##tag = _tmp_##tag;                                 \
903   static_assert(_table1_##tag == _table2_##tag,                                \
904                 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");
905 ICETYPEX8664_TABLE
906 #undef X
907 // Repeat the static asserts with respect to the high-level table entries in
908 // case the high-level table has extra entries.
909 #define X(tag, sizeLog2, align, elts, elty, str, rcstr)                        \
910   static_assert(_table1_##tag == _table2_##tag,                                \
911                 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");
912 ICETYPE_TABLE
913 #undef X
914 } // end of namespace dummy3
915 } // end of anonymous namespace
916 
917 } // end of namespace X8664
918 } // end of namespace Ice
919