1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
18 #define ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
19 
20 #include <vector>
21 
22 #include "arch/x86_64/instruction_set_features_x86_64.h"
23 #include "base/arena_containers.h"
24 #include "base/array_ref.h"
25 #include "base/bit_utils.h"
26 #include "base/globals.h"
27 #include "base/macros.h"
28 #include "constants_x86_64.h"
29 #include "heap_poisoning.h"
30 #include "managed_register_x86_64.h"
31 #include "offsets.h"
32 #include "utils/assembler.h"
33 
34 namespace art HIDDEN {
35 namespace x86_64 {
36 
37 // Encodes an immediate value for operands.
38 //
39 // Note: Immediates can be 64b on x86-64 for certain instructions, but are often restricted
40 // to 32b.
41 //
42 // Note: As we support cross-compilation, the value type must be int64_t. Please be aware of
43 // conversion rules in expressions regarding negation, especially size_t on 32b.
44 class Immediate : public ValueObject {
45  public:
Immediate(int64_t value_in)46   explicit Immediate(int64_t value_in) : value_(value_in) {}
47 
value()48   int64_t value() const { return value_; }
49 
is_int8()50   bool is_int8() const { return IsInt<8>(value_); }
is_uint8()51   bool is_uint8() const { return IsUint<8>(value_); }
is_int16()52   bool is_int16() const { return IsInt<16>(value_); }
is_uint16()53   bool is_uint16() const { return IsUint<16>(value_); }
is_int32()54   bool is_int32() const { return IsInt<32>(value_); }
55 
56  private:
57   const int64_t value_;
58 };
59 
60 
61 class Operand : public ValueObject {
62  public:
mod()63   uint8_t mod() const {
64     return (encoding_at(0) >> 6) & 3;
65   }
66 
rm()67   Register rm() const {
68     return static_cast<Register>(encoding_at(0) & 7);
69   }
70 
scale()71   ScaleFactor scale() const {
72     return static_cast<ScaleFactor>((encoding_at(1) >> 6) & 3);
73   }
74 
index()75   Register index() const {
76     return static_cast<Register>((encoding_at(1) >> 3) & 7);
77   }
78 
base()79   Register base() const {
80     return static_cast<Register>(encoding_at(1) & 7);
81   }
82 
cpu_rm()83   CpuRegister cpu_rm() const {
84     int ext = (rex_ & 1) != 0 ? x86_64::R8 : x86_64::RAX;
85     return static_cast<CpuRegister>(rm() + ext);
86   }
87 
cpu_index()88   CpuRegister cpu_index() const {
89     int ext = (rex_ & 2) != 0 ? x86_64::R8 : x86_64::RAX;
90     return static_cast<CpuRegister>(index() + ext);
91   }
92 
cpu_base()93   CpuRegister cpu_base() const {
94     int ext = (rex_ & 1) != 0 ? x86_64::R8 : x86_64::RAX;
95     return static_cast<CpuRegister>(base() + ext);
96   }
97 
rex()98   uint8_t rex() const {
99     return rex_;
100   }
101 
disp8()102   int8_t disp8() const {
103     CHECK_GE(length_, 2);
104     return static_cast<int8_t>(encoding_[length_ - 1]);
105   }
106 
disp32()107   int32_t disp32() const {
108     CHECK_GE(length_, 5);
109     int32_t value;
110     memcpy(&value, &encoding_[length_ - 4], sizeof(value));
111     return value;
112   }
113 
disp()114   int32_t disp() const {
115     switch (mod()) {
116       case 0:
117         // With mod 00b RBP is special and means disp32 (either in r/m or in SIB base).
118         return (rm() == RBP || (rm() == RSP && base() == RBP)) ? disp32() : 0;
119       case 1:
120         return disp8();
121       case 2:
122         return disp32();
123       default:
124         // Mod 11b means reg/reg, so there is no address and consequently no displacement.
125         LOG(FATAL) << "there is no displacement in x86_64 reg/reg operand";
126         UNREACHABLE();
127     }
128   }
129 
IsRegister(CpuRegister reg)130   bool IsRegister(CpuRegister reg) const {
131     return ((encoding_[0] & 0xF8) == 0xC0)  // Addressing mode is register only.
132         && ((encoding_[0] & 0x07) == reg.LowBits())  // Register codes match.
133         && (reg.NeedsRex() == ((rex_ & 1) != 0));  // REX.000B bits match.
134   }
135 
GetFixup()136   AssemblerFixup* GetFixup() const {
137     return fixup_;
138   }
139 
140   inline bool operator==(const Operand &op) const {
141     return rex_ == op.rex_ &&
142         length_ == op.length_ &&
143         memcmp(encoding_, op.encoding_, length_) == 0 &&
144         fixup_ == op.fixup_;
145   }
146 
147  protected:
148   // Operand can be sub classed (e.g: Address).
Operand()149   Operand() : rex_(0), length_(0), fixup_(nullptr) { }
150 
SetModRM(uint8_t mod_in,CpuRegister rm_in)151   void SetModRM(uint8_t mod_in, CpuRegister rm_in) {
152     CHECK_EQ(mod_in & ~3, 0);
153     if (rm_in.NeedsRex()) {
154       rex_ |= 0x41;  // REX.000B
155     }
156     encoding_[0] = (mod_in << 6) | rm_in.LowBits();
157     length_ = 1;
158   }
159 
SetSIB(ScaleFactor scale_in,CpuRegister index_in,CpuRegister base_in)160   void SetSIB(ScaleFactor scale_in, CpuRegister index_in, CpuRegister base_in) {
161     CHECK_EQ(length_, 1);
162     CHECK_EQ(scale_in & ~3, 0);
163     if (base_in.NeedsRex()) {
164       rex_ |= 0x41;  // REX.000B
165     }
166     if (index_in.NeedsRex()) {
167       rex_ |= 0x42;  // REX.00X0
168     }
169     encoding_[1] = (scale_in << 6) | (static_cast<uint8_t>(index_in.LowBits()) << 3) |
170         static_cast<uint8_t>(base_in.LowBits());
171     length_ = 2;
172   }
173 
SetDisp8(int8_t disp)174   void SetDisp8(int8_t disp) {
175     CHECK(length_ == 1 || length_ == 2);
176     encoding_[length_++] = static_cast<uint8_t>(disp);
177   }
178 
SetDisp32(int32_t disp)179   void SetDisp32(int32_t disp) {
180     CHECK(length_ == 1 || length_ == 2);
181     int disp_size = sizeof(disp);
182     memmove(&encoding_[length_], &disp, disp_size);
183     length_ += disp_size;
184   }
185 
SetFixup(AssemblerFixup * fixup)186   void SetFixup(AssemblerFixup* fixup) {
187     fixup_ = fixup;
188   }
189 
190  private:
191   uint8_t rex_;
192   uint8_t length_;
193   uint8_t encoding_[6];
194   AssemblerFixup* fixup_;
195 
Operand(CpuRegister reg)196   explicit Operand(CpuRegister reg) : rex_(0), length_(0), fixup_(nullptr) { SetModRM(3, reg); }
197 
198   // Get the operand encoding byte at the given index.
encoding_at(int index_in)199   uint8_t encoding_at(int index_in) const {
200     CHECK_GE(index_in, 0);
201     CHECK_LT(index_in, length_);
202     return encoding_[index_in];
203   }
204 
205   friend class X86_64Assembler;
206 };
207 
208 
209 class Address : public Operand {
210  public:
Address(CpuRegister base_in,int32_t disp)211   Address(CpuRegister base_in, int32_t disp) {
212     Init(base_in, disp);
213   }
214 
Address(CpuRegister base_in,Offset disp)215   Address(CpuRegister base_in, Offset disp) {
216     Init(base_in, disp.Int32Value());
217   }
218 
Address(CpuRegister base_in,FrameOffset disp)219   Address(CpuRegister base_in, FrameOffset disp) {
220     CHECK_EQ(base_in.AsRegister(), RSP);
221     Init(CpuRegister(RSP), disp.Int32Value());
222   }
223 
Address(CpuRegister base_in,MemberOffset disp)224   Address(CpuRegister base_in, MemberOffset disp) {
225     Init(base_in, disp.Int32Value());
226   }
227 
Init(CpuRegister base_in,int32_t disp)228   void Init(CpuRegister base_in, int32_t disp) {
229     if (disp == 0 && base_in.LowBits() != RBP) {
230       SetModRM(0, base_in);
231       if (base_in.LowBits() == RSP) {
232         SetSIB(TIMES_1, CpuRegister(RSP), base_in);
233       }
234     } else if (disp >= -128 && disp <= 127) {
235       SetModRM(1, base_in);
236       if (base_in.LowBits() == RSP) {
237         SetSIB(TIMES_1, CpuRegister(RSP), base_in);
238       }
239       SetDisp8(disp);
240     } else {
241       SetModRM(2, base_in);
242       if (base_in.LowBits() == RSP) {
243         SetSIB(TIMES_1, CpuRegister(RSP), base_in);
244       }
245       SetDisp32(disp);
246     }
247   }
248 
Address(CpuRegister index_in,ScaleFactor scale_in,int32_t disp)249   Address(CpuRegister index_in, ScaleFactor scale_in, int32_t disp) {
250     CHECK_NE(index_in.AsRegister(), RSP);  // Illegal addressing mode.
251     SetModRM(0, CpuRegister(RSP));
252     SetSIB(scale_in, index_in, CpuRegister(RBP));
253     SetDisp32(disp);
254   }
255 
Address(CpuRegister base_in,CpuRegister index_in,ScaleFactor scale_in,int32_t disp)256   Address(CpuRegister base_in, CpuRegister index_in, ScaleFactor scale_in, int32_t disp) {
257     CHECK_NE(index_in.AsRegister(), RSP);  // Illegal addressing mode.
258     if (disp == 0 && base_in.LowBits() != RBP) {
259       SetModRM(0, CpuRegister(RSP));
260       SetSIB(scale_in, index_in, base_in);
261     } else if (disp >= -128 && disp <= 127) {
262       SetModRM(1, CpuRegister(RSP));
263       SetSIB(scale_in, index_in, base_in);
264       SetDisp8(disp);
265     } else {
266       SetModRM(2, CpuRegister(RSP));
267       SetSIB(scale_in, index_in, base_in);
268       SetDisp32(disp);
269     }
270   }
271 
272   // If no_rip is true then the Absolute address isn't RIP relative.
273   static Address Absolute(uintptr_t addr, bool no_rip = false) {
274     Address result;
275     if (no_rip) {
276       result.SetModRM(0, CpuRegister(RSP));
277       result.SetSIB(TIMES_1, CpuRegister(RSP), CpuRegister(RBP));
278       result.SetDisp32(addr);
279     } else {
280       // RIP addressing is done using RBP as the base register.
281       // The value in RBP isn't used.  Instead the offset is added to RIP.
282       result.SetModRM(0, CpuRegister(RBP));
283       result.SetDisp32(addr);
284     }
285     return result;
286   }
287 
288   // An RIP relative address that will be fixed up later.
RIP(AssemblerFixup * fixup)289   static Address RIP(AssemblerFixup* fixup) {
290     Address result;
291     // RIP addressing is done using RBP as the base register.
292     // The value in RBP isn't used.  Instead the offset is added to RIP.
293     result.SetModRM(0, CpuRegister(RBP));
294     result.SetDisp32(0);
295     result.SetFixup(fixup);
296     return result;
297   }
298 
299   // If no_rip is true then the Absolute address isn't RIP relative.
300   static Address Absolute(ThreadOffset64 addr, bool no_rip = false) {
301     return Absolute(addr.Int32Value(), no_rip);
302   }
303 
304   // Break the address into pieces and reassemble it again with a new displacement.
305   // Note that it may require a new addressing mode if displacement size is changed.
displace(const Address & addr,int32_t disp)306   static Address displace(const Address &addr, int32_t disp) {
307     const int32_t new_disp = addr.disp() + disp;
308     const bool sib = addr.rm() == RSP;
309     const bool rbp = RBP == (sib ? addr.base() : addr.rm());
310     Address new_addr;
311     if (addr.mod() == 0 && rbp) {
312       // Special case: mod 00b and RBP in r/m or SIB base => 32-bit displacement.
313       // This case includes RIP-relative addressing.
314       new_addr.SetModRM(0, addr.cpu_rm());
315       if (sib) {
316         new_addr.SetSIB(addr.scale(), addr.cpu_index(), addr.cpu_base());
317       }
318       new_addr.SetDisp32(new_disp);
319     } else if (new_disp == 0 && !rbp) {
320       // Mod 00b (excluding a special case for RBP) => no displacement.
321       new_addr.SetModRM(0, addr.cpu_rm());
322       if (sib) {
323         new_addr.SetSIB(addr.scale(), addr.cpu_index(), addr.cpu_base());
324       }
325     } else if (new_disp >= -128 && new_disp <= 127) {
326       // Mod 01b => 8-bit displacement.
327       new_addr.SetModRM(1, addr.cpu_rm());
328       if (sib) {
329         new_addr.SetSIB(addr.scale(), addr.cpu_index(), addr.cpu_base());
330       }
331       new_addr.SetDisp8(new_disp);
332     } else {
333       // Mod 10b => 32-bit displacement.
334       new_addr.SetModRM(2, addr.cpu_rm());
335       if (sib) {
336         new_addr.SetSIB(addr.scale(), addr.cpu_index(), addr.cpu_base());
337       }
338       new_addr.SetDisp32(new_disp);
339     }
340     new_addr.SetFixup(addr.GetFixup());
341     return new_addr;
342   }
343 
344   inline bool operator==(const Address& addr) const {
345     return static_cast<const Operand&>(*this) == static_cast<const Operand&>(addr);
346   }
347 
348  private:
Address()349   Address() {}
350 };
351 
352 std::ostream& operator<<(std::ostream& os, const Address& addr);
353 
354 /**
355  * Class to handle constant area values.
356  */
357 class ConstantArea {
358  public:
ConstantArea(ArenaAllocator * allocator)359   explicit ConstantArea(ArenaAllocator* allocator)
360       : buffer_(allocator->Adapter(kArenaAllocAssembler)) {}
361 
362   // Add a double to the constant area, returning the offset into
363   // the constant area where the literal resides.
364   size_t AddDouble(double v);
365 
366   // Add a float to the constant area, returning the offset into
367   // the constant area where the literal resides.
368   size_t AddFloat(float v);
369 
370   // Add an int32_t to the constant area, returning the offset into
371   // the constant area where the literal resides.
372   size_t AddInt32(int32_t v);
373 
374   // Add an int32_t to the end of the constant area, returning the offset into
375   // the constant area where the literal resides.
376   size_t AppendInt32(int32_t v);
377 
378   // Add an int64_t to the constant area, returning the offset into
379   // the constant area where the literal resides.
380   size_t AddInt64(int64_t v);
381 
GetSize()382   size_t GetSize() const {
383     return buffer_.size() * elem_size_;
384   }
385 
GetBuffer()386   ArrayRef<const int32_t> GetBuffer() const {
387     return ArrayRef<const int32_t>(buffer_);
388   }
389 
390  private:
391   static constexpr size_t elem_size_ = sizeof(int32_t);
392   ArenaVector<int32_t> buffer_;
393 };
394 
395 
396 // This is equivalent to the Label class, used in a slightly different context. We
397 // inherit the functionality of the Label class, but prevent unintended
398 // derived-to-base conversions by making the base class private.
399 class NearLabel : private Label {
400  public:
NearLabel()401   NearLabel() : Label() {}
402 
403   // Expose the Label routines that we need.
404   using Label::Position;
405   using Label::LinkPosition;
406   using Label::IsBound;
407   using Label::IsUnused;
408   using Label::IsLinked;
409 
410  private:
411   using Label::BindTo;
412   using Label::LinkTo;
413 
414   friend class x86_64::X86_64Assembler;
415 
416   DISALLOW_COPY_AND_ASSIGN(NearLabel);
417 };
418 
419 
420 class X86_64Assembler final : public Assembler {
421  public:
422   explicit X86_64Assembler(ArenaAllocator* allocator,
423                            const X86_64InstructionSetFeatures* instruction_set_features = nullptr)
Assembler(allocator)424       : Assembler(allocator),
425         constant_area_(allocator),
426         has_AVX_(instruction_set_features != nullptr ? instruction_set_features->HasAVX(): false),
427         has_AVX2_(instruction_set_features != nullptr ? instruction_set_features->HasAVX2() : false) {}
~X86_64Assembler()428   virtual ~X86_64Assembler() {}
429 
430   /*
431    * Emit Machine Instructions.
432    */
433   void call(CpuRegister reg);
434   void call(const Address& address);
435   void call(Label* label);
436 
437   void pushq(CpuRegister reg);
438   void pushq(const Address& address);
439   void pushq(const Immediate& imm);
440 
441   void popq(CpuRegister reg);
442   void popq(const Address& address);
443 
444   void movq(CpuRegister dst, const Immediate& src);
445   void movl(CpuRegister dst, const Immediate& src);
446   void movq(CpuRegister dst, CpuRegister src);
447   void movl(CpuRegister dst, CpuRegister src);
448 
449   void movntl(const Address& dst, CpuRegister src);
450   void movntq(const Address& dst, CpuRegister src);
451 
452   void movq(CpuRegister dst, const Address& src);
453   void movl(CpuRegister dst, const Address& src);
454   void movq(const Address& dst, CpuRegister src);
455   void movq(const Address& dst, const Immediate& imm);
456   void movl(const Address& dst, CpuRegister src);
457   void movl(const Address& dst, const Immediate& imm);
458 
459   void cmov(Condition c, CpuRegister dst, CpuRegister src);  // This is the 64b version.
460   void cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit);
461   void cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit);
462 
463   void movzxb(CpuRegister dst, CpuRegister src);
464   void movzxb(CpuRegister dst, const Address& src);
465   void movsxb(CpuRegister dst, CpuRegister src);
466   void movsxb(CpuRegister dst, const Address& src);
467   void movb(CpuRegister dst, const Address& src);
468   void movb(const Address& dst, CpuRegister src);
469   void movb(const Address& dst, const Immediate& imm);
470 
471   void movzxw(CpuRegister dst, CpuRegister src);
472   void movzxw(CpuRegister dst, const Address& src);
473   void movsxw(CpuRegister dst, CpuRegister src);
474   void movsxw(CpuRegister dst, const Address& src);
475   void movw(CpuRegister dst, const Address& src);
476   void movw(const Address& dst, CpuRegister src);
477   void movw(const Address& dst, const Immediate& imm);
478 
479   void leaq(CpuRegister dst, const Address& src);
480   void leal(CpuRegister dst, const Address& src);
481 
482   void movaps(XmmRegister dst, XmmRegister src);     // move
483   void movaps(XmmRegister dst, const Address& src);  // load aligned
484   void movups(XmmRegister dst, const Address& src);  // load unaligned
485   void movaps(const Address& dst, XmmRegister src);  // store aligned
486   void movups(const Address& dst, XmmRegister src);  // store unaligned
487 
488   void vmovaps(XmmRegister dst, XmmRegister src);     // move
489   void vmovaps(XmmRegister dst, const Address& src);  // load aligned
490   void vmovaps(const Address& dst, XmmRegister src);  // store aligned
491   void vmovups(XmmRegister dst, const Address& src);  // load unaligned
492   void vmovups(const Address& dst, XmmRegister src);  // store unaligned
493 
494   void movss(XmmRegister dst, const Address& src);
495   void movss(const Address& dst, XmmRegister src);
496   void movss(XmmRegister dst, XmmRegister src);
497 
498   void movsxd(CpuRegister dst, CpuRegister src);
499   void movsxd(CpuRegister dst, const Address& src);
500 
501   void movd(XmmRegister dst, CpuRegister src);  // Note: this is the r64 version, formally movq.
502   void movd(CpuRegister dst, XmmRegister src);  // Note: this is the r64 version, formally movq.
503   void movd(XmmRegister dst, CpuRegister src, bool is64bit);
504   void movd(CpuRegister dst, XmmRegister src, bool is64bit);
505 
506   void addss(XmmRegister dst, XmmRegister src);
507   void addss(XmmRegister dst, const Address& src);
508   void subss(XmmRegister dst, XmmRegister src);
509   void subss(XmmRegister dst, const Address& src);
510   void mulss(XmmRegister dst, XmmRegister src);
511   void mulss(XmmRegister dst, const Address& src);
512   void divss(XmmRegister dst, XmmRegister src);
513   void divss(XmmRegister dst, const Address& src);
514 
515   void addps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
516   void subps(XmmRegister dst, XmmRegister src);
517   void mulps(XmmRegister dst, XmmRegister src);
518   void divps(XmmRegister dst, XmmRegister src);
519 
520   void vmulps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
521   void vmulpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
522   void vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
523   void vdivpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
524 
525   void vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
526   void vsubps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
527   void vsubpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
528   void vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
529 
530   void vfmadd213ss(XmmRegister accumulator, XmmRegister left, XmmRegister right);
531   void vfmadd213sd(XmmRegister accumulator, XmmRegister left, XmmRegister right);
532 
533   void movapd(XmmRegister dst, XmmRegister src);     // move
534   void movapd(XmmRegister dst, const Address& src);  // load aligned
535   void movupd(XmmRegister dst, const Address& src);  // load unaligned
536   void movapd(const Address& dst, XmmRegister src);  // store aligned
537   void movupd(const Address& dst, XmmRegister src);  // store unaligned
538 
539   void vmovapd(XmmRegister dst, XmmRegister src);     // move
540   void vmovapd(XmmRegister dst, const Address& src);  // load aligned
541   void vmovapd(const Address& dst, XmmRegister src);  // store aligned
542   void vmovupd(XmmRegister dst, const Address& src);  // load unaligned
543   void vmovupd(const Address& dst, XmmRegister src);  // store unaligned
544 
545   void movsd(XmmRegister dst, const Address& src);
546   void movsd(const Address& dst, XmmRegister src);
547   void movsd(XmmRegister dst, XmmRegister src);
548 
549   void addsd(XmmRegister dst, XmmRegister src);
550   void addsd(XmmRegister dst, const Address& src);
551   void subsd(XmmRegister dst, XmmRegister src);
552   void subsd(XmmRegister dst, const Address& src);
553   void mulsd(XmmRegister dst, XmmRegister src);
554   void mulsd(XmmRegister dst, const Address& src);
555   void divsd(XmmRegister dst, XmmRegister src);
556   void divsd(XmmRegister dst, const Address& src);
557 
558   void addpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
559   void subpd(XmmRegister dst, XmmRegister src);
560   void mulpd(XmmRegister dst, XmmRegister src);
561   void divpd(XmmRegister dst, XmmRegister src);
562 
563   void movdqa(XmmRegister dst, XmmRegister src);     // move
564   void movdqa(XmmRegister dst, const Address& src);  // load aligned
565   void movdqu(XmmRegister dst, const Address& src);  // load unaligned
566   void movdqa(const Address& dst, XmmRegister src);  // store aligned
567   void movdqu(const Address& dst, XmmRegister src);  // store unaligned
568 
569   void vmovdqa(XmmRegister dst, XmmRegister src);     // move
570   void vmovdqa(XmmRegister dst, const Address& src);  // load aligned
571   void vmovdqa(const Address& dst, XmmRegister src);  // store aligned
572   void vmovdqu(XmmRegister dst, const Address& src);  // load unaligned
573   void vmovdqu(const Address& dst, XmmRegister src);  // store unaligned
574 
575   void paddb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
576   void psubb(XmmRegister dst, XmmRegister src);
577 
578   void vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
579   void vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
580 
581   void paddw(XmmRegister dst, XmmRegister src);
582   void psubw(XmmRegister dst, XmmRegister src);
583   void pmullw(XmmRegister dst, XmmRegister src);
584   void vpmullw(XmmRegister dst, XmmRegister src1, XmmRegister src2);
585 
586   void vpsubb(XmmRegister dst, XmmRegister src1, XmmRegister src2);
587   void vpsubw(XmmRegister dst, XmmRegister src1, XmmRegister src2);
588   void vpsubd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
589 
590   void paddd(XmmRegister dst, XmmRegister src);
591   void psubd(XmmRegister dst, XmmRegister src);
592   void pmulld(XmmRegister dst, XmmRegister src);
593   void vpmulld(XmmRegister dst, XmmRegister src1, XmmRegister src2);
594 
595   void vpaddd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
596 
597   void paddq(XmmRegister dst, XmmRegister src);
598   void psubq(XmmRegister dst, XmmRegister src);
599 
600   void vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
601   void vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
602 
603   void paddusb(XmmRegister dst, XmmRegister src);
604   void paddsb(XmmRegister dst, XmmRegister src);
605   void paddusw(XmmRegister dst, XmmRegister src);
606   void paddsw(XmmRegister dst, XmmRegister src);
607   void psubusb(XmmRegister dst, XmmRegister src);
608   void psubsb(XmmRegister dst, XmmRegister src);
609   void psubusw(XmmRegister dst, XmmRegister src);
610   void psubsw(XmmRegister dst, XmmRegister src);
611 
612   void cvtsi2ss(XmmRegister dst, CpuRegister src);  // Note: this is the r/m32 version.
613   void cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit);
614   void cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit);
615   void cvtsi2sd(XmmRegister dst, CpuRegister src);  // Note: this is the r/m32 version.
616   void cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit);
617   void cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit);
618 
619   void cvtss2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
620   void cvtss2sd(XmmRegister dst, XmmRegister src);
621   void cvtss2sd(XmmRegister dst, const Address& src);
622 
623   void cvtsd2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
624   void cvtsd2ss(XmmRegister dst, XmmRegister src);
625   void cvtsd2ss(XmmRegister dst, const Address& src);
626 
627   void cvttss2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
628   void cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit);
629   void cvttsd2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
630   void cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit);
631 
632   void cvtdq2ps(XmmRegister dst, XmmRegister src);
633   void cvtdq2pd(XmmRegister dst, XmmRegister src);
634 
635   void comiss(XmmRegister a, XmmRegister b);
636   void comiss(XmmRegister a, const Address& b);
637   void comisd(XmmRegister a, XmmRegister b);
638   void comisd(XmmRegister a, const Address& b);
639   void ucomiss(XmmRegister a, XmmRegister b);
640   void ucomiss(XmmRegister a, const Address& b);
641   void ucomisd(XmmRegister a, XmmRegister b);
642   void ucomisd(XmmRegister a, const Address& b);
643 
644   void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm);
645   void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm);
646 
647   void sqrtsd(XmmRegister dst, XmmRegister src);
648   void sqrtss(XmmRegister dst, XmmRegister src);
649 
650   void xorpd(XmmRegister dst, const Address& src);
651   void xorpd(XmmRegister dst, XmmRegister src);
652   void xorps(XmmRegister dst, const Address& src);
653   void xorps(XmmRegister dst, XmmRegister src);
654   void pxor(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
655   void vpxor(XmmRegister dst, XmmRegister src1, XmmRegister src2);
656   void vxorps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
657   void vxorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
658 
659   void andpd(XmmRegister dst, const Address& src);
660   void andpd(XmmRegister dst, XmmRegister src);
661   void andps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
662   void pand(XmmRegister dst, XmmRegister src);
663   void vpand(XmmRegister dst, XmmRegister src1, XmmRegister src2);
664   void vandps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
665   void vandpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
666 
667   void andn(CpuRegister dst, CpuRegister src1, CpuRegister src2);
668   void andnpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
669   void andnps(XmmRegister dst, XmmRegister src);
670   void pandn(XmmRegister dst, XmmRegister src);
671   void vpandn(XmmRegister dst, XmmRegister src1, XmmRegister src2);
672   void vandnps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
673   void vandnpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
674 
675   void orpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
676   void orps(XmmRegister dst, XmmRegister src);
677   void por(XmmRegister dst, XmmRegister src);
678   void vpor(XmmRegister dst, XmmRegister src1, XmmRegister src2);
679   void vorps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
680   void vorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
681 
682   void pavgb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
683   void pavgw(XmmRegister dst, XmmRegister src);
684   void psadbw(XmmRegister dst, XmmRegister src);
685   void pmaddwd(XmmRegister dst, XmmRegister src);
686   void vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
687   void phaddw(XmmRegister dst, XmmRegister src);
688   void phaddd(XmmRegister dst, XmmRegister src);
689   void haddps(XmmRegister dst, XmmRegister src);
690   void haddpd(XmmRegister dst, XmmRegister src);
691   void phsubw(XmmRegister dst, XmmRegister src);
692   void phsubd(XmmRegister dst, XmmRegister src);
693   void hsubps(XmmRegister dst, XmmRegister src);
694   void hsubpd(XmmRegister dst, XmmRegister src);
695 
696   void pminsb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
697   void pmaxsb(XmmRegister dst, XmmRegister src);
698   void pminsw(XmmRegister dst, XmmRegister src);
699   void pmaxsw(XmmRegister dst, XmmRegister src);
700   void pminsd(XmmRegister dst, XmmRegister src);
701   void pmaxsd(XmmRegister dst, XmmRegister src);
702 
703   void pminub(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
704   void pmaxub(XmmRegister dst, XmmRegister src);
705   void pminuw(XmmRegister dst, XmmRegister src);
706   void pmaxuw(XmmRegister dst, XmmRegister src);
707   void pminud(XmmRegister dst, XmmRegister src);
708   void pmaxud(XmmRegister dst, XmmRegister src);
709 
710   void minps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
711   void maxps(XmmRegister dst, XmmRegister src);
712   void minpd(XmmRegister dst, XmmRegister src);
713   void maxpd(XmmRegister dst, XmmRegister src);
714 
715   void pcmpeqb(XmmRegister dst, XmmRegister src);
716   void pcmpeqw(XmmRegister dst, XmmRegister src);
717   void pcmpeqd(XmmRegister dst, XmmRegister src);
718   void pcmpeqq(XmmRegister dst, XmmRegister src);
719 
720   void pcmpgtb(XmmRegister dst, XmmRegister src);
721   void pcmpgtw(XmmRegister dst, XmmRegister src);
722   void pcmpgtd(XmmRegister dst, XmmRegister src);
723   void pcmpgtq(XmmRegister dst, XmmRegister src);  // SSE4.2
724 
725   void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm);
726   void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
727   void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
728 
729   void punpcklbw(XmmRegister dst, XmmRegister src);
730   void punpcklwd(XmmRegister dst, XmmRegister src);
731   void punpckldq(XmmRegister dst, XmmRegister src);
732   void punpcklqdq(XmmRegister dst, XmmRegister src);
733 
734   void punpckhbw(XmmRegister dst, XmmRegister src);
735   void punpckhwd(XmmRegister dst, XmmRegister src);
736   void punpckhdq(XmmRegister dst, XmmRegister src);
737   void punpckhqdq(XmmRegister dst, XmmRegister src);
738 
739   void psllw(XmmRegister reg, const Immediate& shift_count);
740   void pslld(XmmRegister reg, const Immediate& shift_count);
741   void psllq(XmmRegister reg, const Immediate& shift_count);
742 
743   void psraw(XmmRegister reg, const Immediate& shift_count);
744   void psrad(XmmRegister reg, const Immediate& shift_count);
745   // no psraq
746 
747   void psrlw(XmmRegister reg, const Immediate& shift_count);
748   void psrld(XmmRegister reg, const Immediate& shift_count);
749   void psrlq(XmmRegister reg, const Immediate& shift_count);
750   void psrldq(XmmRegister reg, const Immediate& shift_count);
751 
752   void flds(const Address& src);
753   void fstps(const Address& dst);
754   void fsts(const Address& dst);
755 
756   void fldl(const Address& src);
757   void fstpl(const Address& dst);
758   void fstl(const Address& dst);
759 
760   void fstsw();
761 
762   void fucompp();
763 
764   void fnstcw(const Address& dst);
765   void fldcw(const Address& src);
766 
767   void fistpl(const Address& dst);
768   void fistps(const Address& dst);
769   void fildl(const Address& src);
770   void filds(const Address& src);
771 
772   void fincstp();
773   void ffree(const Immediate& index);
774 
775   void fsin();
776   void fcos();
777   void fptan();
778   void fprem();
779 
780   void xchgb(CpuRegister dst, CpuRegister src);
781   void xchgb(CpuRegister reg, const Address& address);
782 
783   void xchgw(CpuRegister dst, CpuRegister src);
784   void xchgw(CpuRegister reg, const Address& address);
785 
786   void xchgl(CpuRegister dst, CpuRegister src);
787   void xchgl(CpuRegister reg, const Address& address);
788 
789   void xchgq(CpuRegister dst, CpuRegister src);
790   void xchgq(CpuRegister reg, const Address& address);
791 
792   void xaddb(CpuRegister dst, CpuRegister src);
793   void xaddb(const Address& address, CpuRegister reg);
794 
795   void xaddw(CpuRegister dst, CpuRegister src);
796   void xaddw(const Address& address, CpuRegister reg);
797 
798   void xaddl(CpuRegister dst, CpuRegister src);
799   void xaddl(const Address& address, CpuRegister reg);
800 
801   void xaddq(CpuRegister dst, CpuRegister src);
802   void xaddq(const Address& address, CpuRegister reg);
803 
804   void cmpb(const Address& address, const Immediate& imm);
805   void cmpw(const Address& address, const Immediate& imm);
806 
807   void cmpl(CpuRegister reg, const Immediate& imm);
808   void cmpl(CpuRegister reg0, CpuRegister reg1);
809   void cmpl(CpuRegister reg, const Address& address);
810   void cmpl(const Address& address, CpuRegister reg);
811   void cmpl(const Address& address, const Immediate& imm);
812 
813   void cmpq(CpuRegister reg0, CpuRegister reg1);
814   void cmpq(CpuRegister reg0, const Immediate& imm);
815   void cmpq(CpuRegister reg0, const Address& address);
816   void cmpq(const Address& address, const Immediate& imm);
817 
818   void testl(CpuRegister reg1, CpuRegister reg2);
819   void testl(CpuRegister reg, const Address& address);
820   void testl(CpuRegister reg, const Immediate& imm);
821 
822   void testq(CpuRegister reg1, CpuRegister reg2);
823   void testq(CpuRegister reg, const Address& address);
824 
825   void testb(const Address& address, const Immediate& imm);
826   void testl(const Address& address, const Immediate& imm);
827 
828   void andl(CpuRegister dst, const Immediate& imm);
829   void andl(CpuRegister dst, CpuRegister src);
830   void andl(CpuRegister reg, const Address& address);
831   void andq(CpuRegister dst, const Immediate& imm);
832   void andq(CpuRegister dst, CpuRegister src);
833   void andq(CpuRegister reg, const Address& address);
834   void andw(const Address& address, const Immediate& imm);
835 
836   void orl(CpuRegister dst, const Immediate& imm);
837   void orl(CpuRegister dst, CpuRegister src);
838   void orl(CpuRegister reg, const Address& address);
839   void orq(CpuRegister dst, CpuRegister src);
840   void orq(CpuRegister dst, const Immediate& imm);
841   void orq(CpuRegister reg, const Address& address);
842 
843   void xorl(CpuRegister dst, CpuRegister src);
844   void xorl(CpuRegister dst, const Immediate& imm);
845   void xorl(CpuRegister reg, const Address& address);
846   void xorq(CpuRegister dst, const Immediate& imm);
847   void xorq(CpuRegister dst, CpuRegister src);
848   void xorq(CpuRegister reg, const Address& address);
849 
850   void addl(CpuRegister dst, CpuRegister src);
851   void addl(CpuRegister reg, const Immediate& imm);
852   void addl(CpuRegister reg, const Address& address);
853   void addl(const Address& address, CpuRegister reg);
854   void addl(const Address& address, const Immediate& imm);
855   void addw(CpuRegister reg, const Immediate& imm);
856   void addw(const Address& address, const Immediate& imm);
857   void addw(const Address& address, CpuRegister reg);
858 
859   void addq(CpuRegister reg, const Immediate& imm);
860   void addq(CpuRegister dst, CpuRegister src);
861   void addq(CpuRegister dst, const Address& address);
862 
863   void subl(CpuRegister dst, CpuRegister src);
864   void subl(CpuRegister reg, const Immediate& imm);
865   void subl(CpuRegister reg, const Address& address);
866 
867   void subq(CpuRegister reg, const Immediate& imm);
868   void subq(CpuRegister dst, CpuRegister src);
869   void subq(CpuRegister dst, const Address& address);
870 
871   void cdq();
872   void cqo();
873 
874   void idivl(CpuRegister reg);
875   void idivq(CpuRegister reg);
876   void divl(CpuRegister reg);
877   void divq(CpuRegister reg);
878 
879   void imull(CpuRegister dst, CpuRegister src);
880   void imull(CpuRegister reg, const Immediate& imm);
881   void imull(CpuRegister dst, CpuRegister src, const Immediate& imm);
882   void imull(CpuRegister reg, const Address& address);
883 
884   void imulq(CpuRegister src);
885   void imulq(CpuRegister dst, CpuRegister src);
886   void imulq(CpuRegister reg, const Immediate& imm);
887   void imulq(CpuRegister reg, const Address& address);
888   void imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm);
889 
890   void imull(CpuRegister reg);
891   void imull(const Address& address);
892 
893   void mull(CpuRegister reg);
894   void mull(const Address& address);
895 
896   void shll(CpuRegister reg, const Immediate& imm);
897   void shll(CpuRegister operand, CpuRegister shifter);
898   void shrl(CpuRegister reg, const Immediate& imm);
899   void shrl(CpuRegister operand, CpuRegister shifter);
900   void sarl(CpuRegister reg, const Immediate& imm);
901   void sarl(CpuRegister operand, CpuRegister shifter);
902 
903   void shlq(CpuRegister reg, const Immediate& imm);
904   void shlq(CpuRegister operand, CpuRegister shifter);
905   void shrq(CpuRegister reg, const Immediate& imm);
906   void shrq(CpuRegister operand, CpuRegister shifter);
907   void sarq(CpuRegister reg, const Immediate& imm);
908   void sarq(CpuRegister operand, CpuRegister shifter);
909 
910   void negl(CpuRegister reg);
911   void negq(CpuRegister reg);
912 
913   void notl(CpuRegister reg);
914   void notq(CpuRegister reg);
915 
916   void enter(const Immediate& imm);
917   void leave();
918 
919   void ret();
920   void ret(const Immediate& imm);
921 
922   void nop();
923   void int3();
924   void hlt();
925 
926   void j(Condition condition, Label* label);
927   void j(Condition condition, NearLabel* label);
928   void jrcxz(NearLabel* label);
929 
930   void jmp(CpuRegister reg);
931   void jmp(const Address& address);
932   void jmp(Label* label);
933   void jmp(NearLabel* label);
934 
935   X86_64Assembler* lock();
936   void cmpxchgb(const Address& address, CpuRegister reg);
937   void cmpxchgw(const Address& address, CpuRegister reg);
938   void cmpxchgl(const Address& address, CpuRegister reg);
939   void cmpxchgq(const Address& address, CpuRegister reg);
940 
941   void mfence();
942 
943   X86_64Assembler* gs();
944 
945   void setcc(Condition condition, CpuRegister dst);
946 
947   void bswapl(CpuRegister dst);
948   void bswapq(CpuRegister dst);
949 
950   void bsfl(CpuRegister dst, CpuRegister src);
951   void bsfl(CpuRegister dst, const Address& src);
952   void bsfq(CpuRegister dst, CpuRegister src);
953   void bsfq(CpuRegister dst, const Address& src);
954 
955   void blsi(CpuRegister dst, CpuRegister src);  // no addr variant (for now)
956   void blsmsk(CpuRegister dst, CpuRegister src);  // no addr variant (for now)
957   void blsr(CpuRegister dst, CpuRegister src);  // no addr variant (for now)
958 
959   void bsrl(CpuRegister dst, CpuRegister src);
960   void bsrl(CpuRegister dst, const Address& src);
961   void bsrq(CpuRegister dst, CpuRegister src);
962   void bsrq(CpuRegister dst, const Address& src);
963 
964   void popcntl(CpuRegister dst, CpuRegister src);
965   void popcntl(CpuRegister dst, const Address& src);
966   void popcntq(CpuRegister dst, CpuRegister src);
967   void popcntq(CpuRegister dst, const Address& src);
968 
969   void rdtsc();
970 
971   void rorl(CpuRegister reg, const Immediate& imm);
972   void rorl(CpuRegister operand, CpuRegister shifter);
973   void roll(CpuRegister reg, const Immediate& imm);
974   void roll(CpuRegister operand, CpuRegister shifter);
975 
976   void rorq(CpuRegister reg, const Immediate& imm);
977   void rorq(CpuRegister operand, CpuRegister shifter);
978   void rolq(CpuRegister reg, const Immediate& imm);
979   void rolq(CpuRegister operand, CpuRegister shifter);
980 
981   void repne_scasb();
982   void repne_scasw();
983   void repe_cmpsw();
984   void repe_cmpsl();
985   void repe_cmpsq();
986   void rep_movsw();
987   void rep_movsb();
988   void rep_movsl();
989 
990   void ud2();
991 
992   //
993   // Macros for High-level operations.
994   //
995 
996   void AddImmediate(CpuRegister reg, const Immediate& imm);
997 
998   void LoadDoubleConstant(XmmRegister dst, double value);
999 
LockCmpxchgb(const Address & address,CpuRegister reg)1000   void LockCmpxchgb(const Address& address, CpuRegister reg) {
1001     lock()->cmpxchgb(address, reg);
1002   }
1003 
LockCmpxchgw(const Address & address,CpuRegister reg)1004   void LockCmpxchgw(const Address& address, CpuRegister reg) {
1005     AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1006     // We make sure that the operand size override bytecode is emited before the lock bytecode.
1007     // We test against clang which enforces this bytecode order.
1008     EmitOperandSizeOverride();
1009     EmitUint8(0xF0);
1010     EmitOptionalRex32(reg, address);
1011     EmitUint8(0x0F);
1012     EmitUint8(0xB1);
1013     EmitOperand(reg.LowBits(), address);
1014   }
1015 
LockCmpxchgl(const Address & address,CpuRegister reg)1016   void LockCmpxchgl(const Address& address, CpuRegister reg) {
1017     lock()->cmpxchgl(address, reg);
1018   }
1019 
LockCmpxchgq(const Address & address,CpuRegister reg)1020   void LockCmpxchgq(const Address& address, CpuRegister reg) {
1021     lock()->cmpxchgq(address, reg);
1022   }
1023 
LockXaddb(const Address & address,CpuRegister reg)1024   void LockXaddb(const Address& address, CpuRegister reg) {
1025     lock()->xaddb(address, reg);
1026   }
1027 
LockXaddw(const Address & address,CpuRegister reg)1028   void LockXaddw(const Address& address, CpuRegister reg) {
1029     AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1030     // We make sure that the operand size override bytecode is emited before the lock bytecode.
1031     // We test against clang which enforces this bytecode order.
1032     EmitOperandSizeOverride();
1033     EmitUint8(0xF0);
1034     EmitOptionalRex32(reg, address);
1035     EmitUint8(0x0F);
1036     EmitUint8(0xC1);
1037     EmitOperand(reg.LowBits(), address);
1038   }
1039 
LockXaddl(const Address & address,CpuRegister reg)1040   void LockXaddl(const Address& address, CpuRegister reg) {
1041     lock()->xaddl(address, reg);
1042   }
1043 
LockXaddq(const Address & address,CpuRegister reg)1044   void LockXaddq(const Address& address, CpuRegister reg) {
1045     lock()->xaddq(address, reg);
1046   }
1047 
1048   //
1049   // Misc. functionality
1050   //
PreferredLoopAlignment()1051   int PreferredLoopAlignment() { return 16; }
1052   void Align(int alignment, int offset);
1053   void Bind(Label* label) override;
Jump(Label * label)1054   void Jump(Label* label) override {
1055     jmp(label);
1056   }
1057   void Bind(NearLabel* label);
1058 
1059   // Add a double to the constant area, returning the offset into
1060   // the constant area where the literal resides.
AddDouble(double v)1061   size_t AddDouble(double v) { return constant_area_.AddDouble(v); }
1062 
1063   // Add a float to the constant area, returning the offset into
1064   // the constant area where the literal resides.
AddFloat(float v)1065   size_t AddFloat(float v)   { return constant_area_.AddFloat(v); }
1066 
1067   // Add an int32_t to the constant area, returning the offset into
1068   // the constant area where the literal resides.
AddInt32(int32_t v)1069   size_t AddInt32(int32_t v) {
1070     return constant_area_.AddInt32(v);
1071   }
1072 
1073   // Add an int32_t to the end of the constant area, returning the offset into
1074   // the constant area where the literal resides.
AppendInt32(int32_t v)1075   size_t AppendInt32(int32_t v) {
1076     return constant_area_.AppendInt32(v);
1077   }
1078 
1079   // Add an int64_t to the constant area, returning the offset into
1080   // the constant area where the literal resides.
AddInt64(int64_t v)1081   size_t AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
1082 
1083   // Add the contents of the constant area to the assembler buffer.
1084   void AddConstantArea();
1085 
1086   // Is the constant area empty? Return true if there are no literals in the constant area.
IsConstantAreaEmpty()1087   bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; }
1088 
1089   // Return the current size of the constant area.
ConstantAreaSize()1090   size_t ConstantAreaSize() const { return constant_area_.GetSize(); }
1091 
1092   //
1093   // Heap poisoning.
1094   //
1095 
1096   // Poison a heap reference contained in `reg`.
PoisonHeapReference(CpuRegister reg)1097   void PoisonHeapReference(CpuRegister reg) { negl(reg); }
1098   // Unpoison a heap reference contained in `reg`.
UnpoisonHeapReference(CpuRegister reg)1099   void UnpoisonHeapReference(CpuRegister reg) { negl(reg); }
1100   // Poison a heap reference contained in `reg` if heap poisoning is enabled.
MaybePoisonHeapReference(CpuRegister reg)1101   void MaybePoisonHeapReference(CpuRegister reg) {
1102     if (kPoisonHeapReferences) {
1103       PoisonHeapReference(reg);
1104     }
1105   }
1106   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
MaybeUnpoisonHeapReference(CpuRegister reg)1107   void MaybeUnpoisonHeapReference(CpuRegister reg) {
1108     if (kPoisonHeapReferences) {
1109       UnpoisonHeapReference(reg);
1110     }
1111   }
1112 
1113   bool CpuHasAVXorAVX2FeatureFlag();
1114 
1115  private:
1116   void EmitUint8(uint8_t value);
1117   void EmitInt32(int32_t value);
1118   void EmitInt64(int64_t value);
1119   void EmitRegisterOperand(uint8_t rm, uint8_t reg);
1120   void EmitXmmRegisterOperand(uint8_t rm, XmmRegister reg);
1121   void EmitFixup(AssemblerFixup* fixup);
1122   void EmitOperandSizeOverride();
1123 
1124   void EmitOperand(uint8_t rm, const Operand& operand);
1125   void EmitImmediate(const Immediate& imm, bool is_16_op = false);
1126   void EmitComplex(
1127       uint8_t rm, const Operand& operand, const Immediate& immediate, bool is_16_op = false);
1128   void EmitLabel(Label* label, int instruction_size);
1129   void EmitLabelLink(Label* label);
1130   void EmitLabelLink(NearLabel* label);
1131 
1132   void EmitGenericShift(bool wide, int rm, CpuRegister reg, const Immediate& imm);
1133   void EmitGenericShift(bool wide, int rm, CpuRegister operand, CpuRegister shifter);
1134 
1135   // If any input is not false, output the necessary rex prefix.
1136   void EmitOptionalRex(bool force, bool w, bool r, bool x, bool b);
1137 
1138   // Emit a rex prefix byte if necessary for reg. ie if reg is a register in the range R8 to R15.
1139   void EmitOptionalRex32(CpuRegister reg);
1140   void EmitOptionalRex32(CpuRegister dst, CpuRegister src);
1141   void EmitOptionalRex32(XmmRegister dst, XmmRegister src);
1142   void EmitOptionalRex32(CpuRegister dst, XmmRegister src);
1143   void EmitOptionalRex32(XmmRegister dst, CpuRegister src);
1144   void EmitOptionalRex32(const Operand& operand);
1145   void EmitOptionalRex32(CpuRegister dst, const Operand& operand);
1146   void EmitOptionalRex32(XmmRegister dst, const Operand& operand);
1147 
1148   // Emit a REX.W prefix plus necessary register bit encodings.
1149   void EmitRex64();
1150   void EmitRex64(CpuRegister reg);
1151   void EmitRex64(const Operand& operand);
1152   void EmitRex64(CpuRegister dst, CpuRegister src);
1153   void EmitRex64(CpuRegister dst, const Operand& operand);
1154   void EmitRex64(XmmRegister dst, const Operand& operand);
1155   void EmitRex64(XmmRegister dst, CpuRegister src);
1156   void EmitRex64(CpuRegister dst, XmmRegister src);
1157 
1158   // Emit a REX prefix to normalize byte registers plus necessary register bit encodings.
1159   // `normalize_both` parameter controls if the REX prefix is checked only for the `src` register
1160   // (which is the case for instructions like `movzxb rax, bpl`), or for both `src` and `dst`
1161   // registers (which is the case of instructions like `xchg bpl, al`). By default only `src` is
1162   // used to decide if REX is needed.
1163   void EmitOptionalByteRegNormalizingRex32(CpuRegister dst,
1164                                            CpuRegister src,
1165                                            bool normalize_both = false);
1166   void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand);
1167 
1168   uint8_t EmitVexPrefixByteZero(bool is_twobyte_form);
1169   uint8_t EmitVexPrefixByteOne(bool R, bool X, bool B, int SET_VEX_M);
1170   uint8_t EmitVexPrefixByteOne(bool R,
1171                                X86_64ManagedRegister operand,
1172                                int SET_VEX_L,
1173                                int SET_VEX_PP);
1174   uint8_t EmitVexPrefixByteTwo(bool W,
1175                                X86_64ManagedRegister operand,
1176                                int SET_VEX_L,
1177                                int SET_VEX_PP);
1178   uint8_t EmitVexPrefixByteTwo(bool W,
1179                                int SET_VEX_L,
1180                                int SET_VEX_PP);
1181 
1182   // Helper function to emit a shorter variant of XCHG if at least one operand is RAX/EAX/AX.
1183   bool try_xchg_rax(CpuRegister dst,
1184                     CpuRegister src,
1185                     void (X86_64Assembler::*prefix_fn)(CpuRegister));
1186 
1187   ConstantArea constant_area_;
1188   bool has_AVX_;     // x86 256bit SIMD AVX.
1189   bool has_AVX2_;    // x86 256bit SIMD AVX 2.0.
1190 
1191   DISALLOW_COPY_AND_ASSIGN(X86_64Assembler);
1192 };
1193 
EmitUint8(uint8_t value)1194 inline void X86_64Assembler::EmitUint8(uint8_t value) {
1195   buffer_.Emit<uint8_t>(value);
1196 }
1197 
EmitInt32(int32_t value)1198 inline void X86_64Assembler::EmitInt32(int32_t value) {
1199   buffer_.Emit<int32_t>(value);
1200 }
1201 
EmitInt64(int64_t value)1202 inline void X86_64Assembler::EmitInt64(int64_t value) {
1203   // Write this 64-bit value as two 32-bit words for alignment reasons
1204   // (this is essentially when running on ARM, which does not allow
1205   // 64-bit unaligned accesses).  We assume little-endianness here.
1206   EmitInt32(Low32Bits(value));
1207   EmitInt32(High32Bits(value));
1208 }
1209 
EmitRegisterOperand(uint8_t rm,uint8_t reg)1210 inline void X86_64Assembler::EmitRegisterOperand(uint8_t rm, uint8_t reg) {
1211   CHECK_GE(rm, 0);
1212   CHECK_LT(rm, 8);
1213   buffer_.Emit<uint8_t>((0xC0 | (reg & 7)) + (rm << 3));
1214 }
1215 
EmitXmmRegisterOperand(uint8_t rm,XmmRegister reg)1216 inline void X86_64Assembler::EmitXmmRegisterOperand(uint8_t rm, XmmRegister reg) {
1217   EmitRegisterOperand(rm, static_cast<uint8_t>(reg.AsFloatRegister()));
1218 }
1219 
EmitFixup(AssemblerFixup * fixup)1220 inline void X86_64Assembler::EmitFixup(AssemblerFixup* fixup) {
1221   buffer_.EmitFixup(fixup);
1222 }
1223 
EmitOperandSizeOverride()1224 inline void X86_64Assembler::EmitOperandSizeOverride() {
1225   EmitUint8(0x66);
1226 }
1227 
1228 }  // namespace x86_64
1229 }  // namespace art
1230 
1231 #endif  // ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
1232