1 //===- subzero/src/IceAssemblerX86Base.h - base x86 assembler -*- C++ -*---===//
2 //
3 // Copyright (c) 2013, the Dart project authors.  Please see the AUTHORS file
4 // for details. All rights reserved. Use of this source code is governed by a
5 // BSD-style license that can be found in the LICENSE file.
6 //
7 // Modified by the Subzero authors.
8 //
9 //===----------------------------------------------------------------------===//
10 //
11 //                        The Subzero Code Generator
12 //
13 // This file is distributed under the University of Illinois Open Source
14 // License. See LICENSE.TXT for details.
15 //
16 //===----------------------------------------------------------------------===//
17 //
18 /// \file
19 /// \brief Defines the AssemblerX86 template class for x86, the base of all X86
20 /// assemblers.
21 //
22 //===----------------------------------------------------------------------===//
23 
24 #ifndef SUBZERO_SRC_ICEASSEMBLERX86BASE_H
25 #define SUBZERO_SRC_ICEASSEMBLERX86BASE_H
26 
27 #include "IceAssembler.h"
28 #include "IceDefs.h"
29 #include "IceOperand.h"
30 #include "IceTypes.h"
31 #include "IceUtils.h"
32 
33 namespace Ice {
34 
35 #ifndef X86NAMESPACE
36 #error "You must define the X86 Target namespace."
37 #endif
38 
39 namespace X86NAMESPACE {
40 
41 template <typename TraitsType>
42 class AssemblerX86Base : public ::Ice::Assembler {
43   AssemblerX86Base(const AssemblerX86Base &) = delete;
44   AssemblerX86Base &operator=(const AssemblerX86Base &) = delete;
45 
46 protected:
47   explicit AssemblerX86Base(
48       bool EmitAddrSizeOverridePrefix = TraitsType::Is64Bit)
Assembler(Traits::AsmKind)49       : Assembler(Traits::AsmKind),
50         EmitAddrSizeOverridePrefix(EmitAddrSizeOverridePrefix) {
51     assert(Traits::Is64Bit || !EmitAddrSizeOverridePrefix);
52   }
53 
54 public:
55   using Traits = TraitsType;
56   using Address = typename Traits::Address;
57   using ByteRegister = typename Traits::ByteRegister;
58   using BrCond = typename Traits::Cond::BrCond;
59   using CmppsCond = typename Traits::Cond::CmppsCond;
60   using GPRRegister = typename Traits::GPRRegister;
61   using Operand = typename Traits::Operand;
62   using XmmRegister = typename Traits::XmmRegister;
63 
64   static constexpr int MAX_NOP_SIZE = 8;
65 
classof(const Assembler * Asm)66   static bool classof(const Assembler *Asm) {
67     return Asm->getKind() == Traits::AsmKind;
68   }
69 
70   class Immediate {
71     Immediate(const Immediate &) = delete;
72     Immediate &operator=(const Immediate &) = delete;
73 
74   public:
Immediate(int32_t value)75     explicit Immediate(int32_t value) : value_(value) {}
76 
Immediate(AssemblerFixup * fixup)77     explicit Immediate(AssemblerFixup *fixup) : fixup_(fixup) {}
78 
value()79     int32_t value() const { return value_; }
fixup()80     AssemblerFixup *fixup() const { return fixup_; }
81 
is_int8()82     bool is_int8() const {
83       // We currently only allow 32-bit fixups, and they usually have value = 0,
84       // so if fixup_ != nullptr, it shouldn't be classified as int8/16.
85       return fixup_ == nullptr && Utils::IsInt(8, value_);
86     }
is_uint8()87     bool is_uint8() const {
88       return fixup_ == nullptr && Utils::IsUint(8, value_);
89     }
is_uint16()90     bool is_uint16() const {
91       return fixup_ == nullptr && Utils::IsUint(16, value_);
92     }
93 
94   private:
95     const int32_t value_ = 0;
96     AssemblerFixup *fixup_ = nullptr;
97   };
98 
99   /// X86 allows near and far jumps.
100   class Label final : public Ice::Label {
101     Label(const Label &) = delete;
102     Label &operator=(const Label &) = delete;
103 
104   public:
105     Label() = default;
106     ~Label() = default;
107 
finalCheck()108     void finalCheck() const override {
109       Ice::Label::finalCheck();
110       assert(!hasNear());
111     }
112 
113     /// Returns the position of an earlier branch instruction which assumes that
114     /// this label is "near", and bumps iterator to the next near position.
getNearPosition()115     intptr_t getNearPosition() {
116       assert(hasNear());
117       intptr_t Pos = UnresolvedNearPositions.back();
118       UnresolvedNearPositions.pop_back();
119       return Pos;
120     }
121 
hasNear()122     bool hasNear() const { return !UnresolvedNearPositions.empty(); }
isUnused()123     bool isUnused() const override {
124       return Ice::Label::isUnused() && !hasNear();
125     }
126 
127   private:
128     friend class AssemblerX86Base<TraitsType>;
129 
nearLinkTo(const Assembler & Asm,intptr_t position)130     void nearLinkTo(const Assembler &Asm, intptr_t position) {
131       if (Asm.getPreliminary())
132         return;
133       assert(!isBound());
134       UnresolvedNearPositions.push_back(position);
135     }
136 
137     llvm::SmallVector<intptr_t, 20> UnresolvedNearPositions;
138   };
139 
140 public:
141   ~AssemblerX86Base() override;
142 
143   static const bool kNearJump = true;
144   static const bool kFarJump = false;
145 
146   void alignFunction() override;
147 
getBundleAlignLog2Bytes()148   SizeT getBundleAlignLog2Bytes() const override { return 5; }
149 
getAlignDirective()150   const char *getAlignDirective() const override { return ".p2align"; }
151 
getNonExecBundlePadding()152   llvm::ArrayRef<uint8_t> getNonExecBundlePadding() const override {
153     static const uint8_t Padding[] = {0xF4};
154     return llvm::ArrayRef<uint8_t>(Padding, 1);
155   }
156 
padWithNop(intptr_t Padding)157   void padWithNop(intptr_t Padding) override {
158     while (Padding > MAX_NOP_SIZE) {
159       nop(MAX_NOP_SIZE);
160       Padding -= MAX_NOP_SIZE;
161     }
162     if (Padding)
163       nop(Padding);
164   }
165 
166   Ice::Label *getCfgNodeLabel(SizeT NodeNumber) override;
167   void bindCfgNodeLabel(const CfgNode *Node) override;
168   Label *getOrCreateCfgNodeLabel(SizeT Number);
169   Label *getOrCreateLocalLabel(SizeT Number);
170   void bindLocalLabel(SizeT Number);
171 
fixupIsPCRel(FixupKind Kind)172   bool fixupIsPCRel(FixupKind Kind) const override {
173     // Currently assuming this is the only PC-rel relocation type used.
174     // TODO(jpp): Traits.PcRelTypes.count(Kind) != 0
175     return Kind == Traits::FK_PcRel;
176   }
177 
178   // Operations to emit GPR instructions (and dispatch on operand type).
179   using TypedEmitGPR = void (AssemblerX86Base::*)(Type, GPRRegister);
180   using TypedEmitAddr = void (AssemblerX86Base::*)(Type, const Address &);
181   struct GPREmitterOneOp {
182     TypedEmitGPR Reg;
183     TypedEmitAddr Addr;
184   };
185 
186   using TypedEmitGPRGPR = void (AssemblerX86Base::*)(Type, GPRRegister,
187                                                      GPRRegister);
188   using TypedEmitGPRAddr = void (AssemblerX86Base::*)(Type, GPRRegister,
189                                                       const Address &);
190   using TypedEmitGPRImm = void (AssemblerX86Base::*)(Type, GPRRegister,
191                                                      const Immediate &);
192   struct GPREmitterRegOp {
193     TypedEmitGPRGPR GPRGPR;
194     TypedEmitGPRAddr GPRAddr;
195     TypedEmitGPRImm GPRImm;
196   };
197 
198   struct GPREmitterShiftOp {
199     // Technically, Addr/GPR and Addr/Imm are also allowed, but */Addr are
200     // not. In practice, we always normalize the Dest to a Register first.
201     TypedEmitGPRGPR GPRGPR;
202     TypedEmitGPRImm GPRImm;
203   };
204 
205   using TypedEmitGPRGPRImm = void (AssemblerX86Base::*)(Type, GPRRegister,
206                                                         GPRRegister,
207                                                         const Immediate &);
208   struct GPREmitterShiftD {
209     // Technically AddrGPR and AddrGPRImm are also allowed, but in practice we
210     // always normalize Dest to a Register first.
211     TypedEmitGPRGPR GPRGPR;
212     TypedEmitGPRGPRImm GPRGPRImm;
213   };
214 
215   using TypedEmitAddrGPR = void (AssemblerX86Base::*)(Type, const Address &,
216                                                       GPRRegister);
217   using TypedEmitAddrImm = void (AssemblerX86Base::*)(Type, const Address &,
218                                                       const Immediate &);
219   struct GPREmitterAddrOp {
220     TypedEmitAddrGPR AddrGPR;
221     TypedEmitAddrImm AddrImm;
222   };
223 
224   // Operations to emit XMM instructions (and dispatch on operand type).
225   using TypedEmitXmmXmm = void (AssemblerX86Base::*)(Type, XmmRegister,
226                                                      XmmRegister);
227   using TypedEmitXmmAddr = void (AssemblerX86Base::*)(Type, XmmRegister,
228                                                       const Address &);
229   struct XmmEmitterRegOp {
230     TypedEmitXmmXmm XmmXmm;
231     TypedEmitXmmAddr XmmAddr;
232   };
233 
234   using EmitXmmXmm = void (AssemblerX86Base::*)(XmmRegister, XmmRegister);
235   using EmitXmmAddr = void (AssemblerX86Base::*)(XmmRegister, const Address &);
236   using EmitAddrXmm = void (AssemblerX86Base::*)(const Address &, XmmRegister);
237   struct XmmEmitterMovOps {
238     EmitXmmXmm XmmXmm;
239     EmitXmmAddr XmmAddr;
240     EmitAddrXmm AddrXmm;
241   };
242 
243   using TypedEmitXmmImm = void (AssemblerX86Base::*)(Type, XmmRegister,
244                                                      const Immediate &);
245 
246   struct XmmEmitterShiftOp {
247     TypedEmitXmmXmm XmmXmm;
248     TypedEmitXmmAddr XmmAddr;
249     TypedEmitXmmImm XmmImm;
250   };
251 
252   // Cross Xmm/GPR cast instructions.
253   template <typename DReg_t, typename SReg_t> struct CastEmitterRegOp {
254     using TypedEmitRegs = void (AssemblerX86Base::*)(Type, DReg_t, Type,
255                                                      SReg_t);
256     using TypedEmitAddr = void (AssemblerX86Base::*)(Type, DReg_t, Type,
257                                                      const Address &);
258 
259     TypedEmitRegs RegReg;
260     TypedEmitAddr RegAddr;
261   };
262 
263   // Three operand (potentially) cross Xmm/GPR instructions. The last operand
264   // must be an immediate.
265   template <typename DReg_t, typename SReg_t> struct ThreeOpImmEmitter {
266     using TypedEmitRegRegImm = void (AssemblerX86Base::*)(Type, DReg_t, SReg_t,
267                                                           const Immediate &);
268     using TypedEmitRegAddrImm = void (AssemblerX86Base::*)(Type, DReg_t,
269                                                            const Address &,
270                                                            const Immediate &);
271 
272     TypedEmitRegRegImm RegRegImm;
273     TypedEmitRegAddrImm RegAddrImm;
274   };
275 
276   /*
277    * Emit Machine Instructions.
278    */
279   void call(GPRRegister reg);
280   void call(const Address &address);
281   void call(const ConstantRelocatable *label); // not testable.
282   void call(const Immediate &abs_address);
283 
284   static const intptr_t kCallExternalLabelSize = 5;
285 
286   void pushl(GPRRegister reg);
287   void pushl(const Immediate &Imm);
288   void pushl(const ConstantRelocatable *Label);
289 
290   void popl(GPRRegister reg);
291   void popl(const Address &address);
292 
293   template <typename T = Traits,
294             typename = typename std::enable_if<T::HasPusha>::type>
295   void pushal();
296   template <typename T = Traits,
297             typename = typename std::enable_if<T::HasPopa>::type>
298   void popal();
299 
300   void setcc(BrCond condition, ByteRegister dst);
301   void setcc(BrCond condition, const Address &address);
302 
303   void mov(Type Ty, GPRRegister dst, const Immediate &src);
304   void mov(Type Ty, GPRRegister dst, GPRRegister src);
305   void mov(Type Ty, GPRRegister dst, const Address &src);
306   void mov(Type Ty, const Address &dst, GPRRegister src);
307   void mov(Type Ty, const Address &dst, const Immediate &imm);
308 
309   template <typename T = Traits>
310   typename std::enable_if<T::Is64Bit, void>::type movabs(const GPRRegister Dst,
311                                                          uint64_t Imm64);
312   template <typename T = Traits>
movabs(const GPRRegister,uint64_t)313   typename std::enable_if<!T::Is64Bit, void>::type movabs(const GPRRegister,
314                                                           uint64_t) {
315     llvm::report_fatal_error("movabs is only supported in 64-bit x86 targets.");
316   }
317 
318   void movzx(Type Ty, GPRRegister dst, GPRRegister src);
319   void movzx(Type Ty, GPRRegister dst, const Address &src);
320   void movsx(Type Ty, GPRRegister dst, GPRRegister src);
321   void movsx(Type Ty, GPRRegister dst, const Address &src);
322 
323   void lea(Type Ty, GPRRegister dst, const Address &src);
324 
325   void cmov(Type Ty, BrCond cond, GPRRegister dst, GPRRegister src);
326   void cmov(Type Ty, BrCond cond, GPRRegister dst, const Address &src);
327 
328   void rep_movsb();
329 
330   void movss(Type Ty, XmmRegister dst, const Address &src);
331   void movss(Type Ty, const Address &dst, XmmRegister src);
332   void movss(Type Ty, XmmRegister dst, XmmRegister src);
333 
334   void movd(Type SrcTy, XmmRegister dst, GPRRegister src);
335   void movd(Type SrcTy, XmmRegister dst, const Address &src);
336   void movd(Type DestTy, GPRRegister dst, XmmRegister src);
337   void movd(Type DestTy, const Address &dst, XmmRegister src);
338 
339   void movq(XmmRegister dst, XmmRegister src);
340   void movq(const Address &dst, XmmRegister src);
341   void movq(XmmRegister dst, const Address &src);
342 
343   void addss(Type Ty, XmmRegister dst, XmmRegister src);
344   void addss(Type Ty, XmmRegister dst, const Address &src);
345   void subss(Type Ty, XmmRegister dst, XmmRegister src);
346   void subss(Type Ty, XmmRegister dst, const Address &src);
347   void mulss(Type Ty, XmmRegister dst, XmmRegister src);
348   void mulss(Type Ty, XmmRegister dst, const Address &src);
349   void divss(Type Ty, XmmRegister dst, XmmRegister src);
350   void divss(Type Ty, XmmRegister dst, const Address &src);
351 
352   void movaps(XmmRegister dst, XmmRegister src);
353 
354   void movups(XmmRegister dst, XmmRegister src);
355   void movups(XmmRegister dst, const Address &src);
356   void movups(const Address &dst, XmmRegister src);
357 
358   void padd(Type Ty, XmmRegister dst, XmmRegister src);
359   void padd(Type Ty, XmmRegister dst, const Address &src);
360   void padds(Type Ty, XmmRegister dst, XmmRegister src);
361   void padds(Type Ty, XmmRegister dst, const Address &src);
362   void paddus(Type Ty, XmmRegister dst, XmmRegister src);
363   void paddus(Type Ty, XmmRegister dst, const Address &src);
364   void pand(Type Ty, XmmRegister dst, XmmRegister src);
365   void pand(Type Ty, XmmRegister dst, const Address &src);
366   void pandn(Type Ty, XmmRegister dst, XmmRegister src);
367   void pandn(Type Ty, XmmRegister dst, const Address &src);
368   void pmull(Type Ty, XmmRegister dst, XmmRegister src);
369   void pmull(Type Ty, XmmRegister dst, const Address &src);
370   void pmulhw(Type Ty, XmmRegister dst, XmmRegister src);
371   void pmulhw(Type Ty, XmmRegister dst, const Address &src);
372   void pmulhuw(Type Ty, XmmRegister dst, XmmRegister src);
373   void pmulhuw(Type Ty, XmmRegister dst, const Address &src);
374   void pmaddwd(Type Ty, XmmRegister dst, XmmRegister src);
375   void pmaddwd(Type Ty, XmmRegister dst, const Address &src);
376   void pmuludq(Type Ty, XmmRegister dst, XmmRegister src);
377   void pmuludq(Type Ty, XmmRegister dst, const Address &src);
378   void por(Type Ty, XmmRegister dst, XmmRegister src);
379   void por(Type Ty, XmmRegister dst, const Address &src);
380   void psub(Type Ty, XmmRegister dst, XmmRegister src);
381   void psub(Type Ty, XmmRegister dst, const Address &src);
382   void psubs(Type Ty, XmmRegister dst, XmmRegister src);
383   void psubs(Type Ty, XmmRegister dst, const Address &src);
384   void psubus(Type Ty, XmmRegister dst, XmmRegister src);
385   void psubus(Type Ty, XmmRegister dst, const Address &src);
386   void pxor(Type Ty, XmmRegister dst, XmmRegister src);
387   void pxor(Type Ty, XmmRegister dst, const Address &src);
388 
389   void psll(Type Ty, XmmRegister dst, XmmRegister src);
390   void psll(Type Ty, XmmRegister dst, const Address &src);
391   void psll(Type Ty, XmmRegister dst, const Immediate &src);
392 
393   void psra(Type Ty, XmmRegister dst, XmmRegister src);
394   void psra(Type Ty, XmmRegister dst, const Address &src);
395   void psra(Type Ty, XmmRegister dst, const Immediate &src);
396   void psrl(Type Ty, XmmRegister dst, XmmRegister src);
397   void psrl(Type Ty, XmmRegister dst, const Address &src);
398   void psrl(Type Ty, XmmRegister dst, const Immediate &src);
399 
400   void addps(Type Ty, XmmRegister dst, XmmRegister src);
401   void addps(Type Ty, XmmRegister dst, const Address &src);
402   void subps(Type Ty, XmmRegister dst, XmmRegister src);
403   void subps(Type Ty, XmmRegister dst, const Address &src);
404   void divps(Type Ty, XmmRegister dst, XmmRegister src);
405   void divps(Type Ty, XmmRegister dst, const Address &src);
406   void mulps(Type Ty, XmmRegister dst, XmmRegister src);
407   void mulps(Type Ty, XmmRegister dst, const Address &src);
408   void minps(Type Ty, XmmRegister dst, const Address &src);
409   void minps(Type Ty, XmmRegister dst, XmmRegister src);
410   void minss(Type Ty, XmmRegister dst, const Address &src);
411   void minss(Type Ty, XmmRegister dst, XmmRegister src);
412   void maxps(Type Ty, XmmRegister dst, const Address &src);
413   void maxps(Type Ty, XmmRegister dst, XmmRegister src);
414   void maxss(Type Ty, XmmRegister dst, const Address &src);
415   void maxss(Type Ty, XmmRegister dst, XmmRegister src);
416   void andnps(Type Ty, XmmRegister dst, const Address &src);
417   void andnps(Type Ty, XmmRegister dst, XmmRegister src);
418   void andps(Type Ty, XmmRegister dst, const Address &src);
419   void andps(Type Ty, XmmRegister dst, XmmRegister src);
420   void orps(Type Ty, XmmRegister dst, const Address &src);
421   void orps(Type Ty, XmmRegister dst, XmmRegister src);
422 
423   void blendvps(Type Ty, XmmRegister dst, XmmRegister src);
424   void blendvps(Type Ty, XmmRegister dst, const Address &src);
425   void pblendvb(Type Ty, XmmRegister dst, XmmRegister src);
426   void pblendvb(Type Ty, XmmRegister dst, const Address &src);
427 
428   void cmpps(Type Ty, XmmRegister dst, XmmRegister src, CmppsCond CmpCondition);
429   void cmpps(Type Ty, XmmRegister dst, const Address &src,
430              CmppsCond CmpCondition);
431 
432   void sqrtps(XmmRegister dst);
433   void rsqrtps(XmmRegister dst);
434   void reciprocalps(XmmRegister dst);
435 
436   void movhlps(XmmRegister dst, XmmRegister src);
437   void movlhps(XmmRegister dst, XmmRegister src);
438   void unpcklps(XmmRegister dst, XmmRegister src);
439   void unpckhps(XmmRegister dst, XmmRegister src);
440   void unpcklpd(XmmRegister dst, XmmRegister src);
441   void unpckhpd(XmmRegister dst, XmmRegister src);
442 
443   void set1ps(XmmRegister dst, GPRRegister tmp, const Immediate &imm);
444 
445   void sqrtpd(XmmRegister dst);
446 
447   void pshufb(Type Ty, XmmRegister dst, XmmRegister src);
448   void pshufb(Type Ty, XmmRegister dst, const Address &src);
449   void pshufd(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
450   void pshufd(Type Ty, XmmRegister dst, const Address &src,
451               const Immediate &mask);
452   void punpckl(Type Ty, XmmRegister Dst, XmmRegister Src);
453   void punpckl(Type Ty, XmmRegister Dst, const Address &Src);
454   void punpckh(Type Ty, XmmRegister Dst, XmmRegister Src);
455   void punpckh(Type Ty, XmmRegister Dst, const Address &Src);
456   void packss(Type Ty, XmmRegister Dst, XmmRegister Src);
457   void packss(Type Ty, XmmRegister Dst, const Address &Src);
458   void packus(Type Ty, XmmRegister Dst, XmmRegister Src);
459   void packus(Type Ty, XmmRegister Dst, const Address &Src);
460   void shufps(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
461   void shufps(Type Ty, XmmRegister dst, const Address &src,
462               const Immediate &mask);
463 
464   void cvtdq2ps(Type, XmmRegister dst, XmmRegister src);
465   void cvtdq2ps(Type, XmmRegister dst, const Address &src);
466 
467   void cvttps2dq(Type, XmmRegister dst, XmmRegister src);
468   void cvttps2dq(Type, XmmRegister dst, const Address &src);
469 
470   void cvtps2dq(Type, XmmRegister dst, XmmRegister src);
471   void cvtps2dq(Type, XmmRegister dst, const Address &src);
472 
473   void cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy, GPRRegister src);
474   void cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy, const Address &src);
475 
476   void cvtfloat2float(Type SrcTy, XmmRegister dst, XmmRegister src);
477   void cvtfloat2float(Type SrcTy, XmmRegister dst, const Address &src);
478 
479   void cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy, XmmRegister src);
480   void cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy, const Address &src);
481 
482   void cvtss2si(Type DestTy, GPRRegister dst, Type SrcTy, XmmRegister src);
483   void cvtss2si(Type DestTy, GPRRegister dst, Type SrcTy, const Address &src);
484 
485   void ucomiss(Type Ty, XmmRegister a, XmmRegister b);
486   void ucomiss(Type Ty, XmmRegister a, const Address &b);
487 
488   void movmsk(Type Ty, GPRRegister dst, XmmRegister src);
489 
490   void sqrt(Type Ty, XmmRegister dst, const Address &src);
491   void sqrt(Type Ty, XmmRegister dst, XmmRegister src);
492 
493   void xorps(Type Ty, XmmRegister dst, const Address &src);
494   void xorps(Type Ty, XmmRegister dst, XmmRegister src);
495 
496   void insertps(Type Ty, XmmRegister dst, XmmRegister src,
497                 const Immediate &imm);
498   void insertps(Type Ty, XmmRegister dst, const Address &src,
499                 const Immediate &imm);
500 
501   void pinsr(Type Ty, XmmRegister dst, GPRRegister src, const Immediate &imm);
502   void pinsr(Type Ty, XmmRegister dst, const Address &src,
503              const Immediate &imm);
504 
505   void pextr(Type Ty, GPRRegister dst, XmmRegister src, const Immediate &imm);
506 
507   void pmovsxdq(XmmRegister dst, XmmRegister src);
508 
509   void pcmpeq(Type Ty, XmmRegister dst, XmmRegister src);
510   void pcmpeq(Type Ty, XmmRegister dst, const Address &src);
511   void pcmpgt(Type Ty, XmmRegister dst, XmmRegister src);
512   void pcmpgt(Type Ty, XmmRegister dst, const Address &src);
513 
514   enum RoundingMode {
515     kRoundToNearest = 0x0,
516     kRoundDown = 0x1,
517     kRoundUp = 0x2,
518     kRoundToZero = 0x3
519   };
520   void round(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mode);
521   void round(Type Ty, XmmRegister dst, const Address &src,
522              const Immediate &mode);
523 
524   //----------------------------------------------------------------------------
525   //
526   // Begin: X87 instructions. Only available when Traits::UsesX87.
527   //
528   //----------------------------------------------------------------------------
529   template <typename T = Traits,
530             typename = typename std::enable_if<T::UsesX87>::type>
531   void fld(Type Ty, const typename T::Address &src);
532   template <typename T = Traits,
533             typename = typename std::enable_if<T::UsesX87>::type>
534   void fstp(Type Ty, const typename T::Address &dst);
535   template <typename T = Traits,
536             typename = typename std::enable_if<T::UsesX87>::type>
537   void fstp(typename T::X87STRegister st);
538 
539   template <typename T = Traits,
540             typename = typename std::enable_if<T::UsesX87>::type>
541   void fnstcw(const typename T::Address &dst);
542   template <typename T = Traits,
543             typename = typename std::enable_if<T::UsesX87>::type>
544   void fldcw(const typename T::Address &src);
545 
546   template <typename T = Traits,
547             typename = typename std::enable_if<T::UsesX87>::type>
548   void fistpl(const typename T::Address &dst);
549   template <typename T = Traits,
550             typename = typename std::enable_if<T::UsesX87>::type>
551   void fistps(const typename T::Address &dst);
552   template <typename T = Traits,
553             typename = typename std::enable_if<T::UsesX87>::type>
554   void fildl(const typename T::Address &src);
555   template <typename T = Traits,
556             typename = typename std::enable_if<T::UsesX87>::type>
557   void filds(const typename T::Address &src);
558 
559   template <typename T = Traits,
560             typename = typename std::enable_if<T::UsesX87>::type>
561   void fincstp();
562   //----------------------------------------------------------------------------
563   //
564   // End: X87 instructions.
565   //
566   //----------------------------------------------------------------------------
567 
568   void cmp(Type Ty, GPRRegister reg0, GPRRegister reg1);
569   void cmp(Type Ty, GPRRegister reg, const Address &address);
570   void cmp(Type Ty, GPRRegister reg, const Immediate &imm);
571   void cmp(Type Ty, const Address &address, GPRRegister reg);
572   void cmp(Type Ty, const Address &address, const Immediate &imm);
573 
574   void test(Type Ty, GPRRegister reg0, GPRRegister reg1);
575   void test(Type Ty, GPRRegister reg, const Immediate &imm);
576   void test(Type Ty, const Address &address, GPRRegister reg);
577   void test(Type Ty, const Address &address, const Immediate &imm);
578 
579   void And(Type Ty, GPRRegister dst, GPRRegister src);
580   void And(Type Ty, GPRRegister dst, const Address &address);
581   void And(Type Ty, GPRRegister dst, const Immediate &imm);
582   void And(Type Ty, const Address &address, GPRRegister reg);
583   void And(Type Ty, const Address &address, const Immediate &imm);
584 
585   void Or(Type Ty, GPRRegister dst, GPRRegister src);
586   void Or(Type Ty, GPRRegister dst, const Address &address);
587   void Or(Type Ty, GPRRegister dst, const Immediate &imm);
588   void Or(Type Ty, const Address &address, GPRRegister reg);
589   void Or(Type Ty, const Address &address, const Immediate &imm);
590 
591   void Xor(Type Ty, GPRRegister dst, GPRRegister src);
592   void Xor(Type Ty, GPRRegister dst, const Address &address);
593   void Xor(Type Ty, GPRRegister dst, const Immediate &imm);
594   void Xor(Type Ty, const Address &address, GPRRegister reg);
595   void Xor(Type Ty, const Address &address, const Immediate &imm);
596 
597   void add(Type Ty, GPRRegister dst, GPRRegister src);
598   void add(Type Ty, GPRRegister reg, const Address &address);
599   void add(Type Ty, GPRRegister reg, const Immediate &imm);
600   void add(Type Ty, const Address &address, GPRRegister reg);
601   void add(Type Ty, const Address &address, const Immediate &imm);
602 
603   void adc(Type Ty, GPRRegister dst, GPRRegister src);
604   void adc(Type Ty, GPRRegister dst, const Address &address);
605   void adc(Type Ty, GPRRegister reg, const Immediate &imm);
606   void adc(Type Ty, const Address &address, GPRRegister reg);
607   void adc(Type Ty, const Address &address, const Immediate &imm);
608 
609   void sub(Type Ty, GPRRegister dst, GPRRegister src);
610   void sub(Type Ty, GPRRegister reg, const Address &address);
611   void sub(Type Ty, GPRRegister reg, const Immediate &imm);
612   void sub(Type Ty, const Address &address, GPRRegister reg);
613   void sub(Type Ty, const Address &address, const Immediate &imm);
614 
615   void sbb(Type Ty, GPRRegister dst, GPRRegister src);
616   void sbb(Type Ty, GPRRegister reg, const Address &address);
617   void sbb(Type Ty, GPRRegister reg, const Immediate &imm);
618   void sbb(Type Ty, const Address &address, GPRRegister reg);
619   void sbb(Type Ty, const Address &address, const Immediate &imm);
620 
621   void cbw();
622   void cwd();
623   void cdq();
624   template <typename T = Traits>
625   typename std::enable_if<T::Is64Bit, void>::type cqo();
626   template <typename T = Traits>
cqo()627   typename std::enable_if<!T::Is64Bit, void>::type cqo() {
628     llvm::report_fatal_error("CQO is only available in 64-bit x86 backends.");
629   }
630 
631   void div(Type Ty, GPRRegister reg);
632   void div(Type Ty, const Address &address);
633 
634   void idiv(Type Ty, GPRRegister reg);
635   void idiv(Type Ty, const Address &address);
636 
637   void imul(Type Ty, GPRRegister dst, GPRRegister src);
638   void imul(Type Ty, GPRRegister reg, const Immediate &imm);
639   void imul(Type Ty, GPRRegister reg, const Address &address);
640 
641   void imul(Type Ty, GPRRegister reg);
642   void imul(Type Ty, const Address &address);
643 
644   void imul(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
645   void imul(Type Ty, GPRRegister dst, const Address &address,
646             const Immediate &imm);
647 
648   void mul(Type Ty, GPRRegister reg);
649   void mul(Type Ty, const Address &address);
650 
651   template <class T = Traits,
652             typename = typename std::enable_if<!T::Is64Bit>::type>
653   void incl(GPRRegister reg);
654   void incl(const Address &address);
655 
656   template <class T = Traits,
657             typename = typename std::enable_if<!T::Is64Bit>::type>
658   void decl(GPRRegister reg);
659   void decl(const Address &address);
660 
661   void rol(Type Ty, GPRRegister reg, const Immediate &imm);
662   void rol(Type Ty, GPRRegister operand, GPRRegister shifter);
663   void rol(Type Ty, const Address &operand, GPRRegister shifter);
664 
665   void shl(Type Ty, GPRRegister reg, const Immediate &imm);
666   void shl(Type Ty, GPRRegister operand, GPRRegister shifter);
667   void shl(Type Ty, const Address &operand, GPRRegister shifter);
668 
669   void shr(Type Ty, GPRRegister reg, const Immediate &imm);
670   void shr(Type Ty, GPRRegister operand, GPRRegister shifter);
671   void shr(Type Ty, const Address &operand, GPRRegister shifter);
672 
673   void sar(Type Ty, GPRRegister reg, const Immediate &imm);
674   void sar(Type Ty, GPRRegister operand, GPRRegister shifter);
675   void sar(Type Ty, const Address &address, GPRRegister shifter);
676 
677   void shld(Type Ty, GPRRegister dst, GPRRegister src);
678   void shld(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
679   void shld(Type Ty, const Address &operand, GPRRegister src);
680   void shrd(Type Ty, GPRRegister dst, GPRRegister src);
681   void shrd(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
682   void shrd(Type Ty, const Address &dst, GPRRegister src);
683 
684   void neg(Type Ty, GPRRegister reg);
685   void neg(Type Ty, const Address &addr);
686   void notl(GPRRegister reg);
687 
688   void bsf(Type Ty, GPRRegister dst, GPRRegister src);
689   void bsf(Type Ty, GPRRegister dst, const Address &src);
690   void bsr(Type Ty, GPRRegister dst, GPRRegister src);
691   void bsr(Type Ty, GPRRegister dst, const Address &src);
692 
693   void bswap(Type Ty, GPRRegister reg);
694 
695   void bt(GPRRegister base, GPRRegister offset);
696 
697   void ret();
698   void ret(const Immediate &imm);
699 
700   // 'size' indicates size in bytes and must be in the range 1..8.
701   void nop(int size = 1);
702   void int3();
703   void hlt();
704   void ud2();
705 
706   // j(Label) is fully tested.
707   void j(BrCond condition, Label *label, bool near = kFarJump);
708   void j(BrCond condition, const ConstantRelocatable *label); // not testable.
709 
710   void jmp(GPRRegister reg);
711   void jmp(Label *label, bool near = kFarJump);
712   void jmp(const ConstantRelocatable *label); // not testable.
713   void jmp(const Immediate &abs_address);
714 
715   void mfence();
716 
717   void lock();
718   void cmpxchg(Type Ty, const Address &address, GPRRegister reg, bool Locked);
719   void cmpxchg8b(const Address &address, bool Locked);
720   void xadd(Type Ty, const Address &address, GPRRegister reg, bool Locked);
721   void xchg(Type Ty, GPRRegister reg0, GPRRegister reg1);
722   void xchg(Type Ty, const Address &address, GPRRegister reg);
723 
724   /// \name Intel Architecture Code Analyzer markers.
725   /// @{
726   void iaca_start();
727   void iaca_end();
728   /// @}
729 
730   void emitSegmentOverride(uint8_t prefix);
731 
preferredLoopAlignment()732   intptr_t preferredLoopAlignment() { return 16; }
733   void align(intptr_t alignment, intptr_t offset);
734   void bind(Label *label);
735 
CodeSize()736   intptr_t CodeSize() const { return Buffer.size(); }
737 
738 protected:
739   inline void emitUint8(uint8_t value);
740 
741 private:
742   ENABLE_MAKE_UNIQUE;
743 
744   // EmidAddrSizeOverridePrefix directs the emission of the 0x67 prefix to
745   // force 32-bit registers when accessing memory. This is only used in native
746   // 64-bit.
747   const bool EmitAddrSizeOverridePrefix;
748 
749   static constexpr Type RexTypeIrrelevant = IceType_i32;
750   static constexpr Type RexTypeForceRexW = IceType_i64;
751   static constexpr GPRRegister RexRegIrrelevant =
752       Traits::GPRRegister::Encoded_Reg_eax;
753 
754   inline void emitInt16(int16_t value);
755   inline void emitInt32(int32_t value);
756   inline void emitRegisterOperand(int rm, int reg);
757   template <typename RegType, typename RmType>
758   inline void emitXmmRegisterOperand(RegType reg, RmType rm);
759   inline void emitOperandSizeOverride();
760 
761   void emitOperand(int rm, const Operand &operand, RelocOffsetT Addend = 0);
762   void emitImmediate(Type ty, const Immediate &imm);
763   void emitComplexI8(int rm, const Operand &operand,
764                      const Immediate &immediate);
765   void emitComplex(Type Ty, int rm, const Operand &operand,
766                    const Immediate &immediate);
767   void emitLabel(Label *label, intptr_t instruction_size);
768   void emitLabelLink(Label *label);
769   void emitNearLabelLink(Label *label);
770 
771   void emitGenericShift(int rm, Type Ty, GPRRegister reg, const Immediate &imm);
772   void emitGenericShift(int rm, Type Ty, const Operand &operand,
773                         GPRRegister shifter);
774 
775   using LabelVector = std::vector<Label *>;
776   // A vector of pool-allocated x86 labels for CFG nodes.
777   LabelVector CfgNodeLabels;
778   // A vector of pool-allocated x86 labels for Local labels.
779   LabelVector LocalLabels;
780 
781   Label *getOrCreateLabel(SizeT Number, LabelVector &Labels);
782 
emitAddrSizeOverridePrefix()783   void emitAddrSizeOverridePrefix() {
784     if (!Traits::Is64Bit || !EmitAddrSizeOverridePrefix) {
785       return;
786     }
787     static constexpr uint8_t AddrSizeOverridePrefix = 0x67;
788     emitUint8(AddrSizeOverridePrefix);
789   }
790 
791   // The arith_int() methods factor out the commonality between the encodings
792   // of add(), Or(), adc(), sbb(), And(), sub(), Xor(), and cmp(). The Tag
793   // parameter is statically asserted to be less than 8.
794   template <uint32_t Tag>
795   void arith_int(Type Ty, GPRRegister reg, const Immediate &imm);
796 
797   template <uint32_t Tag>
798   void arith_int(Type Ty, GPRRegister reg0, GPRRegister reg1);
799 
800   template <uint32_t Tag>
801   void arith_int(Type Ty, GPRRegister reg, const Address &address);
802 
803   template <uint32_t Tag>
804   void arith_int(Type Ty, const Address &address, GPRRegister reg);
805 
806   template <uint32_t Tag>
807   void arith_int(Type Ty, const Address &address, const Immediate &imm);
808 
809   // gprEncoding returns Reg encoding for operand emission. For x86-64 we mask
810   // out the 4th bit as it is encoded in the REX.[RXB] bits. No other bits are
811   // touched because we don't want to mask errors.
812   template <typename RegType, typename T = Traits>
813   typename std::enable_if<T::Is64Bit, typename T::GPRRegister>::type
gprEncoding(const RegType Reg)814   gprEncoding(const RegType Reg) {
815     return static_cast<GPRRegister>(static_cast<uint8_t>(Reg) & ~0x08);
816   }
817 
818   template <typename RegType, typename T = Traits>
819   typename std::enable_if<!T::Is64Bit, typename T::GPRRegister>::type
gprEncoding(const RegType Reg)820   gprEncoding(const RegType Reg) {
821     return static_cast<typename T::GPRRegister>(Reg);
822   }
823 
824   template <typename RegType>
is8BitRegisterRequiringRex(const Type Ty,const RegType Reg)825   bool is8BitRegisterRequiringRex(const Type Ty, const RegType Reg) {
826     static constexpr bool IsGPR =
827         std::is_same<typename std::decay<RegType>::type, ByteRegister>::value ||
828         std::is_same<typename std::decay<RegType>::type, GPRRegister>::value;
829 
830     // At this point in the assembler, we have encoded regs, so it is not
831     // possible to distinguish between the "new" low byte registers introduced
832     // in x86-64 and the legacy [abcd]h registers. Because x86, we may still
833     // see ah (div) in the assembler, so we whitelist it here.
834     //
835     // The "local" uint32_t Encoded_Reg_ah is needed because RegType is an
836     // enum that is not necessarily the same type of
837     // Traits::RegisterSet::Encoded_Reg_ah.
838     constexpr uint32_t Encoded_Reg_ah = Traits::RegisterSet::Encoded_Reg_ah;
839     return IsGPR && (Reg & 0x04) != 0 && (Reg & 0x08) == 0 &&
840            isByteSizedType(Ty) && (Reg != Encoded_Reg_ah);
841   }
842 
843   // assembleAndEmitRex is used for determining which (if any) rex prefix
844   // should be emitted for the current instruction. It allows different types
845   // for Reg and Rm because they could be of different types (e.g., in
846   // mov[sz]x instructions.) If Addr is not nullptr, then Rm is ignored, and
847   // Rex.B is determined by Addr instead. TyRm is still used to determine
848   // Addr's size.
849   template <typename RegType, typename RmType, typename T = Traits>
850   typename std::enable_if<T::Is64Bit, void>::type
851   assembleAndEmitRex(const Type TyReg, const RegType Reg, const Type TyRm,
852                      const RmType Rm,
853                      const typename T::Address *Addr = nullptr) {
854     const uint8_t W = (TyReg == IceType_i64 || TyRm == IceType_i64)
855                           ? T::Operand::RexW
856                           : T::Operand::RexNone;
857     const uint8_t R = (Reg & 0x08) ? T::Operand::RexR : T::Operand::RexNone;
858     const uint8_t X = (Addr != nullptr)
859                           ? (typename T::Operand::RexBits)Addr->rexX()
860                           : T::Operand::RexNone;
861     const uint8_t B =
862         (Addr != nullptr)
863             ? (typename T::Operand::RexBits)Addr->rexB()
864             : (Rm & 0x08) ? T::Operand::RexB : T::Operand::RexNone;
865     const uint8_t Prefix = W | R | X | B;
866     if (Prefix != T::Operand::RexNone) {
867       emitUint8(Prefix);
868     } else if (is8BitRegisterRequiringRex(TyReg, Reg) ||
869                (Addr == nullptr && is8BitRegisterRequiringRex(TyRm, Rm))) {
870       emitUint8(T::Operand::RexBase);
871     }
872   }
873 
874   template <typename RegType, typename RmType, typename T = Traits>
875   typename std::enable_if<!T::Is64Bit, void>::type
876   assembleAndEmitRex(const Type, const RegType, const Type, const RmType,
877                      const typename T::Address * = nullptr) {}
878 
879   // emitRexRB is used for emitting a Rex prefix instructions with two
880   // explicit register operands in its mod-rm byte.
881   template <typename RegType, typename RmType>
emitRexRB(const Type Ty,const RegType Reg,const RmType Rm)882   void emitRexRB(const Type Ty, const RegType Reg, const RmType Rm) {
883     assembleAndEmitRex(Ty, Reg, Ty, Rm);
884   }
885 
886   template <typename RegType, typename RmType>
emitRexRB(const Type TyReg,const RegType Reg,const Type TyRm,const RmType Rm)887   void emitRexRB(const Type TyReg, const RegType Reg, const Type TyRm,
888                  const RmType Rm) {
889     assembleAndEmitRex(TyReg, Reg, TyRm, Rm);
890   }
891 
892   // emitRexB is used for emitting a Rex prefix if one is needed on encoding
893   // the Reg field in an x86 instruction. It is invoked by the template when
894   // Reg is the single register operand in the instruction (e.g., push Reg.)
emitRexB(const Type Ty,const RmType Rm)895   template <typename RmType> void emitRexB(const Type Ty, const RmType Rm) {
896     emitRexRB(Ty, RexRegIrrelevant, Ty, Rm);
897   }
898 
899   // emitRex is used for emitting a Rex prefix for an address and a GPR. The
900   // address may contain zero, one, or two registers.
901   template <typename RegType>
emitRex(const Type Ty,const Address & Addr,const RegType Reg)902   void emitRex(const Type Ty, const Address &Addr, const RegType Reg) {
903     assembleAndEmitRex(Ty, Reg, Ty, RexRegIrrelevant, &Addr);
904   }
905 
906   template <typename RegType>
emitRex(const Type AddrTy,const Address & Addr,const Type TyReg,const RegType Reg)907   void emitRex(const Type AddrTy, const Address &Addr, const Type TyReg,
908                const RegType Reg) {
909     assembleAndEmitRex(TyReg, Reg, AddrTy, RexRegIrrelevant, &Addr);
910   }
911 };
912 
913 template <typename TraitsType>
emitUint8(uint8_t value)914 inline void AssemblerX86Base<TraitsType>::emitUint8(uint8_t value) {
915   Buffer.emit<uint8_t>(value);
916 }
917 
918 template <typename TraitsType>
emitInt16(int16_t value)919 inline void AssemblerX86Base<TraitsType>::emitInt16(int16_t value) {
920   Buffer.emit<int16_t>(value);
921 }
922 
923 template <typename TraitsType>
emitInt32(int32_t value)924 inline void AssemblerX86Base<TraitsType>::emitInt32(int32_t value) {
925   Buffer.emit<int32_t>(value);
926 }
927 
928 template <typename TraitsType>
emitRegisterOperand(int reg,int rm)929 inline void AssemblerX86Base<TraitsType>::emitRegisterOperand(int reg, int rm) {
930   assert(reg >= 0 && reg < 8);
931   assert(rm >= 0 && rm < 8);
932   Buffer.emit<uint8_t>(0xC0 + (reg << 3) + rm);
933 }
934 
935 template <typename TraitsType>
936 template <typename RegType, typename RmType>
emitXmmRegisterOperand(RegType reg,RmType rm)937 inline void AssemblerX86Base<TraitsType>::emitXmmRegisterOperand(RegType reg,
938                                                                  RmType rm) {
939   emitRegisterOperand(gprEncoding(reg), gprEncoding(rm));
940 }
941 
942 template <typename TraitsType>
emitOperandSizeOverride()943 inline void AssemblerX86Base<TraitsType>::emitOperandSizeOverride() {
944   emitUint8(0x66);
945 }
946 
947 } // end of namespace X86NAMESPACE
948 
949 } // end of namespace Ice
950 
951 #include "IceAssemblerX86BaseImpl.h"
952 
953 #endif // SUBZERO_SRC_ICEASSEMBLERX86BASE_H
954