1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
18 #define ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
19 
20 #include <vector>
21 
22 #include "base/arena_containers.h"
23 #include "base/array_ref.h"
24 #include "base/bit_utils.h"
25 #include "base/globals.h"
26 #include "base/macros.h"
27 #include "constants_x86_64.h"
28 #include "heap_poisoning.h"
29 #include "managed_register_x86_64.h"
30 #include "offsets.h"
31 #include "utils/assembler.h"
32 #include "utils/jni_macro_assembler.h"
33 
34 namespace art {
35 namespace x86_64 {
36 
37 // Encodes an immediate value for operands.
38 //
39 // Note: Immediates can be 64b on x86-64 for certain instructions, but are often restricted
40 // to 32b.
41 //
42 // Note: As we support cross-compilation, the value type must be int64_t. Please be aware of
43 // conversion rules in expressions regarding negation, especially size_t on 32b.
44 class Immediate : public ValueObject {
45  public:
Immediate(int64_t value_in)46   explicit Immediate(int64_t value_in) : value_(value_in) {}
47 
value()48   int64_t value() const { return value_; }
49 
is_int8()50   bool is_int8() const { return IsInt<8>(value_); }
is_uint8()51   bool is_uint8() const { return IsUint<8>(value_); }
is_int16()52   bool is_int16() const { return IsInt<16>(value_); }
is_uint16()53   bool is_uint16() const { return IsUint<16>(value_); }
is_int32()54   bool is_int32() const { return IsInt<32>(value_); }
55 
56  private:
57   const int64_t value_;
58 };
59 
60 
61 class Operand : public ValueObject {
62  public:
mod()63   uint8_t mod() const {
64     return (encoding_at(0) >> 6) & 3;
65   }
66 
rm()67   Register rm() const {
68     return static_cast<Register>(encoding_at(0) & 7);
69   }
70 
scale()71   ScaleFactor scale() const {
72     return static_cast<ScaleFactor>((encoding_at(1) >> 6) & 3);
73   }
74 
index()75   Register index() const {
76     return static_cast<Register>((encoding_at(1) >> 3) & 7);
77   }
78 
base()79   Register base() const {
80     return static_cast<Register>(encoding_at(1) & 7);
81   }
82 
cpu_rm()83   CpuRegister cpu_rm() const {
84     int ext = (rex_ & 1) != 0 ? x86_64::R8 : x86_64::RAX;
85     return static_cast<CpuRegister>(rm() + ext);
86   }
87 
cpu_index()88   CpuRegister cpu_index() const {
89     int ext = (rex_ & 2) != 0 ? x86_64::R8 : x86_64::RAX;
90     return static_cast<CpuRegister>(index() + ext);
91   }
92 
cpu_base()93   CpuRegister cpu_base() const {
94     int ext = (rex_ & 1) != 0 ? x86_64::R8 : x86_64::RAX;
95     return static_cast<CpuRegister>(base() + ext);
96   }
97 
rex()98   uint8_t rex() const {
99     return rex_;
100   }
101 
disp8()102   int8_t disp8() const {
103     CHECK_GE(length_, 2);
104     return static_cast<int8_t>(encoding_[length_ - 1]);
105   }
106 
disp32()107   int32_t disp32() const {
108     CHECK_GE(length_, 5);
109     int32_t value;
110     memcpy(&value, &encoding_[length_ - 4], sizeof(value));
111     return value;
112   }
113 
IsRegister(CpuRegister reg)114   bool IsRegister(CpuRegister reg) const {
115     return ((encoding_[0] & 0xF8) == 0xC0)  // Addressing mode is register only.
116         && ((encoding_[0] & 0x07) == reg.LowBits())  // Register codes match.
117         && (reg.NeedsRex() == ((rex_ & 1) != 0));  // REX.000B bits match.
118   }
119 
GetFixup()120   AssemblerFixup* GetFixup() const {
121     return fixup_;
122   }
123 
124  protected:
125   // Operand can be sub classed (e.g: Address).
Operand()126   Operand() : rex_(0), length_(0), fixup_(nullptr) { }
127 
SetModRM(uint8_t mod_in,CpuRegister rm_in)128   void SetModRM(uint8_t mod_in, CpuRegister rm_in) {
129     CHECK_EQ(mod_in & ~3, 0);
130     if (rm_in.NeedsRex()) {
131       rex_ |= 0x41;  // REX.000B
132     }
133     encoding_[0] = (mod_in << 6) | rm_in.LowBits();
134     length_ = 1;
135   }
136 
SetSIB(ScaleFactor scale_in,CpuRegister index_in,CpuRegister base_in)137   void SetSIB(ScaleFactor scale_in, CpuRegister index_in, CpuRegister base_in) {
138     CHECK_EQ(length_, 1);
139     CHECK_EQ(scale_in & ~3, 0);
140     if (base_in.NeedsRex()) {
141       rex_ |= 0x41;  // REX.000B
142     }
143     if (index_in.NeedsRex()) {
144       rex_ |= 0x42;  // REX.00X0
145     }
146     encoding_[1] = (scale_in << 6) | (static_cast<uint8_t>(index_in.LowBits()) << 3) |
147         static_cast<uint8_t>(base_in.LowBits());
148     length_ = 2;
149   }
150 
SetDisp8(int8_t disp)151   void SetDisp8(int8_t disp) {
152     CHECK(length_ == 1 || length_ == 2);
153     encoding_[length_++] = static_cast<uint8_t>(disp);
154   }
155 
SetDisp32(int32_t disp)156   void SetDisp32(int32_t disp) {
157     CHECK(length_ == 1 || length_ == 2);
158     int disp_size = sizeof(disp);
159     memmove(&encoding_[length_], &disp, disp_size);
160     length_ += disp_size;
161   }
162 
SetFixup(AssemblerFixup * fixup)163   void SetFixup(AssemblerFixup* fixup) {
164     fixup_ = fixup;
165   }
166 
167  private:
168   uint8_t rex_;
169   uint8_t length_;
170   uint8_t encoding_[6];
171   AssemblerFixup* fixup_;
172 
Operand(CpuRegister reg)173   explicit Operand(CpuRegister reg) : rex_(0), length_(0), fixup_(nullptr) { SetModRM(3, reg); }
174 
175   // Get the operand encoding byte at the given index.
encoding_at(int index_in)176   uint8_t encoding_at(int index_in) const {
177     CHECK_GE(index_in, 0);
178     CHECK_LT(index_in, length_);
179     return encoding_[index_in];
180   }
181 
182   friend class X86_64Assembler;
183 };
184 
185 
186 class Address : public Operand {
187  public:
Address(CpuRegister base_in,int32_t disp)188   Address(CpuRegister base_in, int32_t disp) {
189     Init(base_in, disp);
190   }
191 
Address(CpuRegister base_in,Offset disp)192   Address(CpuRegister base_in, Offset disp) {
193     Init(base_in, disp.Int32Value());
194   }
195 
Address(CpuRegister base_in,FrameOffset disp)196   Address(CpuRegister base_in, FrameOffset disp) {
197     CHECK_EQ(base_in.AsRegister(), RSP);
198     Init(CpuRegister(RSP), disp.Int32Value());
199   }
200 
Address(CpuRegister base_in,MemberOffset disp)201   Address(CpuRegister base_in, MemberOffset disp) {
202     Init(base_in, disp.Int32Value());
203   }
204 
Init(CpuRegister base_in,int32_t disp)205   void Init(CpuRegister base_in, int32_t disp) {
206     if (disp == 0 && base_in.LowBits() != RBP) {
207       SetModRM(0, base_in);
208       if (base_in.LowBits() == RSP) {
209         SetSIB(TIMES_1, CpuRegister(RSP), base_in);
210       }
211     } else if (disp >= -128 && disp <= 127) {
212       SetModRM(1, base_in);
213       if (base_in.LowBits() == RSP) {
214         SetSIB(TIMES_1, CpuRegister(RSP), base_in);
215       }
216       SetDisp8(disp);
217     } else {
218       SetModRM(2, base_in);
219       if (base_in.LowBits() == RSP) {
220         SetSIB(TIMES_1, CpuRegister(RSP), base_in);
221       }
222       SetDisp32(disp);
223     }
224   }
225 
226 
Address(CpuRegister index_in,ScaleFactor scale_in,int32_t disp)227   Address(CpuRegister index_in, ScaleFactor scale_in, int32_t disp) {
228     CHECK_NE(index_in.AsRegister(), RSP);  // Illegal addressing mode.
229     SetModRM(0, CpuRegister(RSP));
230     SetSIB(scale_in, index_in, CpuRegister(RBP));
231     SetDisp32(disp);
232   }
233 
Address(CpuRegister base_in,CpuRegister index_in,ScaleFactor scale_in,int32_t disp)234   Address(CpuRegister base_in, CpuRegister index_in, ScaleFactor scale_in, int32_t disp) {
235     CHECK_NE(index_in.AsRegister(), RSP);  // Illegal addressing mode.
236     if (disp == 0 && base_in.LowBits() != RBP) {
237       SetModRM(0, CpuRegister(RSP));
238       SetSIB(scale_in, index_in, base_in);
239     } else if (disp >= -128 && disp <= 127) {
240       SetModRM(1, CpuRegister(RSP));
241       SetSIB(scale_in, index_in, base_in);
242       SetDisp8(disp);
243     } else {
244       SetModRM(2, CpuRegister(RSP));
245       SetSIB(scale_in, index_in, base_in);
246       SetDisp32(disp);
247     }
248   }
249 
250   // If no_rip is true then the Absolute address isn't RIP relative.
251   static Address Absolute(uintptr_t addr, bool no_rip = false) {
252     Address result;
253     if (no_rip) {
254       result.SetModRM(0, CpuRegister(RSP));
255       result.SetSIB(TIMES_1, CpuRegister(RSP), CpuRegister(RBP));
256       result.SetDisp32(addr);
257     } else {
258       // RIP addressing is done using RBP as the base register.
259       // The value in RBP isn't used.  Instead the offset is added to RIP.
260       result.SetModRM(0, CpuRegister(RBP));
261       result.SetDisp32(addr);
262     }
263     return result;
264   }
265 
266   // An RIP relative address that will be fixed up later.
RIP(AssemblerFixup * fixup)267   static Address RIP(AssemblerFixup* fixup) {
268     Address result;
269     // RIP addressing is done using RBP as the base register.
270     // The value in RBP isn't used.  Instead the offset is added to RIP.
271     result.SetModRM(0, CpuRegister(RBP));
272     result.SetDisp32(0);
273     result.SetFixup(fixup);
274     return result;
275   }
276 
277   // If no_rip is true then the Absolute address isn't RIP relative.
278   static Address Absolute(ThreadOffset64 addr, bool no_rip = false) {
279     return Absolute(addr.Int32Value(), no_rip);
280   }
281 
282  private:
Address()283   Address() {}
284 };
285 
286 std::ostream& operator<<(std::ostream& os, const Address& addr);
287 
288 /**
289  * Class to handle constant area values.
290  */
291 class ConstantArea {
292  public:
ConstantArea(ArenaAllocator * allocator)293   explicit ConstantArea(ArenaAllocator* allocator)
294       : buffer_(allocator->Adapter(kArenaAllocAssembler)) {}
295 
296   // Add a double to the constant area, returning the offset into
297   // the constant area where the literal resides.
298   size_t AddDouble(double v);
299 
300   // Add a float to the constant area, returning the offset into
301   // the constant area where the literal resides.
302   size_t AddFloat(float v);
303 
304   // Add an int32_t to the constant area, returning the offset into
305   // the constant area where the literal resides.
306   size_t AddInt32(int32_t v);
307 
308   // Add an int32_t to the end of the constant area, returning the offset into
309   // the constant area where the literal resides.
310   size_t AppendInt32(int32_t v);
311 
312   // Add an int64_t to the constant area, returning the offset into
313   // the constant area where the literal resides.
314   size_t AddInt64(int64_t v);
315 
GetSize()316   size_t GetSize() const {
317     return buffer_.size() * elem_size_;
318   }
319 
GetBuffer()320   ArrayRef<const int32_t> GetBuffer() const {
321     return ArrayRef<const int32_t>(buffer_);
322   }
323 
324  private:
325   static constexpr size_t elem_size_ = sizeof(int32_t);
326   ArenaVector<int32_t> buffer_;
327 };
328 
329 
330 // This is equivalent to the Label class, used in a slightly different context. We
331 // inherit the functionality of the Label class, but prevent unintended
332 // derived-to-base conversions by making the base class private.
333 class NearLabel : private Label {
334  public:
NearLabel()335   NearLabel() : Label() {}
336 
337   // Expose the Label routines that we need.
338   using Label::Position;
339   using Label::LinkPosition;
340   using Label::IsBound;
341   using Label::IsUnused;
342   using Label::IsLinked;
343 
344  private:
345   using Label::BindTo;
346   using Label::LinkTo;
347 
348   friend class x86_64::X86_64Assembler;
349 
350   DISALLOW_COPY_AND_ASSIGN(NearLabel);
351 };
352 
353 
354 class X86_64Assembler final : public Assembler {
355  public:
X86_64Assembler(ArenaAllocator * allocator)356   explicit X86_64Assembler(ArenaAllocator* allocator)
357       : Assembler(allocator), constant_area_(allocator) {}
~X86_64Assembler()358   virtual ~X86_64Assembler() {}
359 
360   /*
361    * Emit Machine Instructions.
362    */
363   void call(CpuRegister reg);
364   void call(const Address& address);
365   void call(Label* label);
366 
367   void pushq(CpuRegister reg);
368   void pushq(const Address& address);
369   void pushq(const Immediate& imm);
370 
371   void popq(CpuRegister reg);
372   void popq(const Address& address);
373 
374   void movq(CpuRegister dst, const Immediate& src);
375   void movl(CpuRegister dst, const Immediate& src);
376   void movq(CpuRegister dst, CpuRegister src);
377   void movl(CpuRegister dst, CpuRegister src);
378 
379   void movntl(const Address& dst, CpuRegister src);
380   void movntq(const Address& dst, CpuRegister src);
381 
382   void movq(CpuRegister dst, const Address& src);
383   void movl(CpuRegister dst, const Address& src);
384   void movq(const Address& dst, CpuRegister src);
385   void movq(const Address& dst, const Immediate& imm);
386   void movl(const Address& dst, CpuRegister src);
387   void movl(const Address& dst, const Immediate& imm);
388 
389   void cmov(Condition c, CpuRegister dst, CpuRegister src);  // This is the 64b version.
390   void cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit);
391   void cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit);
392 
393   void movzxb(CpuRegister dst, CpuRegister src);
394   void movzxb(CpuRegister dst, const Address& src);
395   void movsxb(CpuRegister dst, CpuRegister src);
396   void movsxb(CpuRegister dst, const Address& src);
397   void movb(CpuRegister dst, const Address& src);
398   void movb(const Address& dst, CpuRegister src);
399   void movb(const Address& dst, const Immediate& imm);
400 
401   void movzxw(CpuRegister dst, CpuRegister src);
402   void movzxw(CpuRegister dst, const Address& src);
403   void movsxw(CpuRegister dst, CpuRegister src);
404   void movsxw(CpuRegister dst, const Address& src);
405   void movw(CpuRegister dst, const Address& src);
406   void movw(const Address& dst, CpuRegister src);
407   void movw(const Address& dst, const Immediate& imm);
408 
409   void leaq(CpuRegister dst, const Address& src);
410   void leal(CpuRegister dst, const Address& src);
411 
412   void movaps(XmmRegister dst, XmmRegister src);     // move
413   void movaps(XmmRegister dst, const Address& src);  // load aligned
414   void movups(XmmRegister dst, const Address& src);  // load unaligned
415   void movaps(const Address& dst, XmmRegister src);  // store aligned
416   void movups(const Address& dst, XmmRegister src);  // store unaligned
417 
418   void movss(XmmRegister dst, const Address& src);
419   void movss(const Address& dst, XmmRegister src);
420   void movss(XmmRegister dst, XmmRegister src);
421 
422   void movsxd(CpuRegister dst, CpuRegister src);
423   void movsxd(CpuRegister dst, const Address& src);
424 
425   void movd(XmmRegister dst, CpuRegister src);  // Note: this is the r64 version, formally movq.
426   void movd(CpuRegister dst, XmmRegister src);  // Note: this is the r64 version, formally movq.
427   void movd(XmmRegister dst, CpuRegister src, bool is64bit);
428   void movd(CpuRegister dst, XmmRegister src, bool is64bit);
429 
430   void addss(XmmRegister dst, XmmRegister src);
431   void addss(XmmRegister dst, const Address& src);
432   void subss(XmmRegister dst, XmmRegister src);
433   void subss(XmmRegister dst, const Address& src);
434   void mulss(XmmRegister dst, XmmRegister src);
435   void mulss(XmmRegister dst, const Address& src);
436   void divss(XmmRegister dst, XmmRegister src);
437   void divss(XmmRegister dst, const Address& src);
438 
439   void addps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
440   void subps(XmmRegister dst, XmmRegister src);
441   void mulps(XmmRegister dst, XmmRegister src);
442   void divps(XmmRegister dst, XmmRegister src);
443 
444   void movapd(XmmRegister dst, XmmRegister src);     // move
445   void movapd(XmmRegister dst, const Address& src);  // load aligned
446   void movupd(XmmRegister dst, const Address& src);  // load unaligned
447   void movapd(const Address& dst, XmmRegister src);  // store aligned
448   void movupd(const Address& dst, XmmRegister src);  // store unaligned
449 
450   void movsd(XmmRegister dst, const Address& src);
451   void movsd(const Address& dst, XmmRegister src);
452   void movsd(XmmRegister dst, XmmRegister src);
453 
454   void addsd(XmmRegister dst, XmmRegister src);
455   void addsd(XmmRegister dst, const Address& src);
456   void subsd(XmmRegister dst, XmmRegister src);
457   void subsd(XmmRegister dst, const Address& src);
458   void mulsd(XmmRegister dst, XmmRegister src);
459   void mulsd(XmmRegister dst, const Address& src);
460   void divsd(XmmRegister dst, XmmRegister src);
461   void divsd(XmmRegister dst, const Address& src);
462 
463   void addpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
464   void subpd(XmmRegister dst, XmmRegister src);
465   void mulpd(XmmRegister dst, XmmRegister src);
466   void divpd(XmmRegister dst, XmmRegister src);
467 
468   void movdqa(XmmRegister dst, XmmRegister src);     // move
469   void movdqa(XmmRegister dst, const Address& src);  // load aligned
470   void movdqu(XmmRegister dst, const Address& src);  // load unaligned
471   void movdqa(const Address& dst, XmmRegister src);  // store aligned
472   void movdqu(const Address& dst, XmmRegister src);  // store unaligned
473 
474   void paddb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
475   void psubb(XmmRegister dst, XmmRegister src);
476 
477   void paddw(XmmRegister dst, XmmRegister src);
478   void psubw(XmmRegister dst, XmmRegister src);
479   void pmullw(XmmRegister dst, XmmRegister src);
480 
481   void paddd(XmmRegister dst, XmmRegister src);
482   void psubd(XmmRegister dst, XmmRegister src);
483   void pmulld(XmmRegister dst, XmmRegister src);
484 
485   void paddq(XmmRegister dst, XmmRegister src);
486   void psubq(XmmRegister dst, XmmRegister src);
487 
488   void paddusb(XmmRegister dst, XmmRegister src);
489   void paddsb(XmmRegister dst, XmmRegister src);
490   void paddusw(XmmRegister dst, XmmRegister src);
491   void paddsw(XmmRegister dst, XmmRegister src);
492   void psubusb(XmmRegister dst, XmmRegister src);
493   void psubsb(XmmRegister dst, XmmRegister src);
494   void psubusw(XmmRegister dst, XmmRegister src);
495   void psubsw(XmmRegister dst, XmmRegister src);
496 
497   void cvtsi2ss(XmmRegister dst, CpuRegister src);  // Note: this is the r/m32 version.
498   void cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit);
499   void cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit);
500   void cvtsi2sd(XmmRegister dst, CpuRegister src);  // Note: this is the r/m32 version.
501   void cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit);
502   void cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit);
503 
504   void cvtss2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
505   void cvtss2sd(XmmRegister dst, XmmRegister src);
506   void cvtss2sd(XmmRegister dst, const Address& src);
507 
508   void cvtsd2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
509   void cvtsd2ss(XmmRegister dst, XmmRegister src);
510   void cvtsd2ss(XmmRegister dst, const Address& src);
511 
512   void cvttss2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
513   void cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit);
514   void cvttsd2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
515   void cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit);
516 
517   void cvtdq2ps(XmmRegister dst, XmmRegister src);
518   void cvtdq2pd(XmmRegister dst, XmmRegister src);
519 
520   void comiss(XmmRegister a, XmmRegister b);
521   void comiss(XmmRegister a, const Address& b);
522   void comisd(XmmRegister a, XmmRegister b);
523   void comisd(XmmRegister a, const Address& b);
524   void ucomiss(XmmRegister a, XmmRegister b);
525   void ucomiss(XmmRegister a, const Address& b);
526   void ucomisd(XmmRegister a, XmmRegister b);
527   void ucomisd(XmmRegister a, const Address& b);
528 
529   void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm);
530   void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm);
531 
532   void sqrtsd(XmmRegister dst, XmmRegister src);
533   void sqrtss(XmmRegister dst, XmmRegister src);
534 
535   void xorpd(XmmRegister dst, const Address& src);
536   void xorpd(XmmRegister dst, XmmRegister src);
537   void xorps(XmmRegister dst, const Address& src);
538   void xorps(XmmRegister dst, XmmRegister src);
539   void pxor(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
540 
541   void andpd(XmmRegister dst, const Address& src);
542   void andpd(XmmRegister dst, XmmRegister src);
543   void andps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
544   void pand(XmmRegister dst, XmmRegister src);
545 
546   void andn(CpuRegister dst, CpuRegister src1, CpuRegister src2);
547   void andnpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
548   void andnps(XmmRegister dst, XmmRegister src);
549   void pandn(XmmRegister dst, XmmRegister src);
550 
551   void orpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
552   void orps(XmmRegister dst, XmmRegister src);
553   void por(XmmRegister dst, XmmRegister src);
554 
555   void pavgb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
556   void pavgw(XmmRegister dst, XmmRegister src);
557   void psadbw(XmmRegister dst, XmmRegister src);
558   void pmaddwd(XmmRegister dst, XmmRegister src);
559   void phaddw(XmmRegister dst, XmmRegister src);
560   void phaddd(XmmRegister dst, XmmRegister src);
561   void haddps(XmmRegister dst, XmmRegister src);
562   void haddpd(XmmRegister dst, XmmRegister src);
563   void phsubw(XmmRegister dst, XmmRegister src);
564   void phsubd(XmmRegister dst, XmmRegister src);
565   void hsubps(XmmRegister dst, XmmRegister src);
566   void hsubpd(XmmRegister dst, XmmRegister src);
567 
568   void pminsb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
569   void pmaxsb(XmmRegister dst, XmmRegister src);
570   void pminsw(XmmRegister dst, XmmRegister src);
571   void pmaxsw(XmmRegister dst, XmmRegister src);
572   void pminsd(XmmRegister dst, XmmRegister src);
573   void pmaxsd(XmmRegister dst, XmmRegister src);
574 
575   void pminub(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
576   void pmaxub(XmmRegister dst, XmmRegister src);
577   void pminuw(XmmRegister dst, XmmRegister src);
578   void pmaxuw(XmmRegister dst, XmmRegister src);
579   void pminud(XmmRegister dst, XmmRegister src);
580   void pmaxud(XmmRegister dst, XmmRegister src);
581 
582   void minps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
583   void maxps(XmmRegister dst, XmmRegister src);
584   void minpd(XmmRegister dst, XmmRegister src);
585   void maxpd(XmmRegister dst, XmmRegister src);
586 
587   void pcmpeqb(XmmRegister dst, XmmRegister src);
588   void pcmpeqw(XmmRegister dst, XmmRegister src);
589   void pcmpeqd(XmmRegister dst, XmmRegister src);
590   void pcmpeqq(XmmRegister dst, XmmRegister src);
591 
592   void pcmpgtb(XmmRegister dst, XmmRegister src);
593   void pcmpgtw(XmmRegister dst, XmmRegister src);
594   void pcmpgtd(XmmRegister dst, XmmRegister src);
595   void pcmpgtq(XmmRegister dst, XmmRegister src);  // SSE4.2
596 
597   void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm);
598   void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
599   void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
600 
601   void punpcklbw(XmmRegister dst, XmmRegister src);
602   void punpcklwd(XmmRegister dst, XmmRegister src);
603   void punpckldq(XmmRegister dst, XmmRegister src);
604   void punpcklqdq(XmmRegister dst, XmmRegister src);
605 
606   void punpckhbw(XmmRegister dst, XmmRegister src);
607   void punpckhwd(XmmRegister dst, XmmRegister src);
608   void punpckhdq(XmmRegister dst, XmmRegister src);
609   void punpckhqdq(XmmRegister dst, XmmRegister src);
610 
611   void psllw(XmmRegister reg, const Immediate& shift_count);
612   void pslld(XmmRegister reg, const Immediate& shift_count);
613   void psllq(XmmRegister reg, const Immediate& shift_count);
614 
615   void psraw(XmmRegister reg, const Immediate& shift_count);
616   void psrad(XmmRegister reg, const Immediate& shift_count);
617   // no psraq
618 
619   void psrlw(XmmRegister reg, const Immediate& shift_count);
620   void psrld(XmmRegister reg, const Immediate& shift_count);
621   void psrlq(XmmRegister reg, const Immediate& shift_count);
622   void psrldq(XmmRegister reg, const Immediate& shift_count);
623 
624   void flds(const Address& src);
625   void fstps(const Address& dst);
626   void fsts(const Address& dst);
627 
628   void fldl(const Address& src);
629   void fstpl(const Address& dst);
630   void fstl(const Address& dst);
631 
632   void fstsw();
633 
634   void fucompp();
635 
636   void fnstcw(const Address& dst);
637   void fldcw(const Address& src);
638 
639   void fistpl(const Address& dst);
640   void fistps(const Address& dst);
641   void fildl(const Address& src);
642   void filds(const Address& src);
643 
644   void fincstp();
645   void ffree(const Immediate& index);
646 
647   void fsin();
648   void fcos();
649   void fptan();
650   void fprem();
651 
652   void xchgl(CpuRegister dst, CpuRegister src);
653   void xchgq(CpuRegister dst, CpuRegister src);
654   void xchgl(CpuRegister reg, const Address& address);
655 
656   void cmpb(const Address& address, const Immediate& imm);
657   void cmpw(const Address& address, const Immediate& imm);
658 
659   void cmpl(CpuRegister reg, const Immediate& imm);
660   void cmpl(CpuRegister reg0, CpuRegister reg1);
661   void cmpl(CpuRegister reg, const Address& address);
662   void cmpl(const Address& address, CpuRegister reg);
663   void cmpl(const Address& address, const Immediate& imm);
664 
665   void cmpq(CpuRegister reg0, CpuRegister reg1);
666   void cmpq(CpuRegister reg0, const Immediate& imm);
667   void cmpq(CpuRegister reg0, const Address& address);
668   void cmpq(const Address& address, const Immediate& imm);
669 
670   void testl(CpuRegister reg1, CpuRegister reg2);
671   void testl(CpuRegister reg, const Address& address);
672   void testl(CpuRegister reg, const Immediate& imm);
673 
674   void testq(CpuRegister reg1, CpuRegister reg2);
675   void testq(CpuRegister reg, const Address& address);
676 
677   void testb(const Address& address, const Immediate& imm);
678   void testl(const Address& address, const Immediate& imm);
679 
680   void andl(CpuRegister dst, const Immediate& imm);
681   void andl(CpuRegister dst, CpuRegister src);
682   void andl(CpuRegister reg, const Address& address);
683   void andq(CpuRegister dst, const Immediate& imm);
684   void andq(CpuRegister dst, CpuRegister src);
685   void andq(CpuRegister reg, const Address& address);
686 
687   void orl(CpuRegister dst, const Immediate& imm);
688   void orl(CpuRegister dst, CpuRegister src);
689   void orl(CpuRegister reg, const Address& address);
690   void orq(CpuRegister dst, CpuRegister src);
691   void orq(CpuRegister dst, const Immediate& imm);
692   void orq(CpuRegister reg, const Address& address);
693 
694   void xorl(CpuRegister dst, CpuRegister src);
695   void xorl(CpuRegister dst, const Immediate& imm);
696   void xorl(CpuRegister reg, const Address& address);
697   void xorq(CpuRegister dst, const Immediate& imm);
698   void xorq(CpuRegister dst, CpuRegister src);
699   void xorq(CpuRegister reg, const Address& address);
700 
701   void addl(CpuRegister dst, CpuRegister src);
702   void addl(CpuRegister reg, const Immediate& imm);
703   void addl(CpuRegister reg, const Address& address);
704   void addl(const Address& address, CpuRegister reg);
705   void addl(const Address& address, const Immediate& imm);
706   void addw(const Address& address, const Immediate& imm);
707 
708   void addq(CpuRegister reg, const Immediate& imm);
709   void addq(CpuRegister dst, CpuRegister src);
710   void addq(CpuRegister dst, const Address& address);
711 
712   void subl(CpuRegister dst, CpuRegister src);
713   void subl(CpuRegister reg, const Immediate& imm);
714   void subl(CpuRegister reg, const Address& address);
715 
716   void subq(CpuRegister reg, const Immediate& imm);
717   void subq(CpuRegister dst, CpuRegister src);
718   void subq(CpuRegister dst, const Address& address);
719 
720   void cdq();
721   void cqo();
722 
723   void idivl(CpuRegister reg);
724   void idivq(CpuRegister reg);
725 
726   void imull(CpuRegister dst, CpuRegister src);
727   void imull(CpuRegister reg, const Immediate& imm);
728   void imull(CpuRegister dst, CpuRegister src, const Immediate& imm);
729   void imull(CpuRegister reg, const Address& address);
730 
731   void imulq(CpuRegister src);
732   void imulq(CpuRegister dst, CpuRegister src);
733   void imulq(CpuRegister reg, const Immediate& imm);
734   void imulq(CpuRegister reg, const Address& address);
735   void imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm);
736 
737   void imull(CpuRegister reg);
738   void imull(const Address& address);
739 
740   void mull(CpuRegister reg);
741   void mull(const Address& address);
742 
743   void shll(CpuRegister reg, const Immediate& imm);
744   void shll(CpuRegister operand, CpuRegister shifter);
745   void shrl(CpuRegister reg, const Immediate& imm);
746   void shrl(CpuRegister operand, CpuRegister shifter);
747   void sarl(CpuRegister reg, const Immediate& imm);
748   void sarl(CpuRegister operand, CpuRegister shifter);
749 
750   void shlq(CpuRegister reg, const Immediate& imm);
751   void shlq(CpuRegister operand, CpuRegister shifter);
752   void shrq(CpuRegister reg, const Immediate& imm);
753   void shrq(CpuRegister operand, CpuRegister shifter);
754   void sarq(CpuRegister reg, const Immediate& imm);
755   void sarq(CpuRegister operand, CpuRegister shifter);
756 
757   void negl(CpuRegister reg);
758   void negq(CpuRegister reg);
759 
760   void notl(CpuRegister reg);
761   void notq(CpuRegister reg);
762 
763   void enter(const Immediate& imm);
764   void leave();
765 
766   void ret();
767   void ret(const Immediate& imm);
768 
769   void nop();
770   void int3();
771   void hlt();
772 
773   void j(Condition condition, Label* label);
774   void j(Condition condition, NearLabel* label);
775   void jrcxz(NearLabel* label);
776 
777   void jmp(CpuRegister reg);
778   void jmp(const Address& address);
779   void jmp(Label* label);
780   void jmp(NearLabel* label);
781 
782   X86_64Assembler* lock();
783   void cmpxchgl(const Address& address, CpuRegister reg);
784   void cmpxchgq(const Address& address, CpuRegister reg);
785 
786   void mfence();
787 
788   X86_64Assembler* gs();
789 
790   void setcc(Condition condition, CpuRegister dst);
791 
792   void bswapl(CpuRegister dst);
793   void bswapq(CpuRegister dst);
794 
795   void bsfl(CpuRegister dst, CpuRegister src);
796   void bsfl(CpuRegister dst, const Address& src);
797   void bsfq(CpuRegister dst, CpuRegister src);
798   void bsfq(CpuRegister dst, const Address& src);
799 
800   void blsi(CpuRegister dst, CpuRegister src);  // no addr variant (for now)
801   void blsmsk(CpuRegister dst, CpuRegister src);  // no addr variant (for now)
802   void blsr(CpuRegister dst, CpuRegister src);  // no addr variant (for now)
803 
804   void bsrl(CpuRegister dst, CpuRegister src);
805   void bsrl(CpuRegister dst, const Address& src);
806   void bsrq(CpuRegister dst, CpuRegister src);
807   void bsrq(CpuRegister dst, const Address& src);
808 
809   void popcntl(CpuRegister dst, CpuRegister src);
810   void popcntl(CpuRegister dst, const Address& src);
811   void popcntq(CpuRegister dst, CpuRegister src);
812   void popcntq(CpuRegister dst, const Address& src);
813 
814   void rorl(CpuRegister reg, const Immediate& imm);
815   void rorl(CpuRegister operand, CpuRegister shifter);
816   void roll(CpuRegister reg, const Immediate& imm);
817   void roll(CpuRegister operand, CpuRegister shifter);
818 
819   void rorq(CpuRegister reg, const Immediate& imm);
820   void rorq(CpuRegister operand, CpuRegister shifter);
821   void rolq(CpuRegister reg, const Immediate& imm);
822   void rolq(CpuRegister operand, CpuRegister shifter);
823 
824   void repne_scasb();
825   void repne_scasw();
826   void repe_cmpsw();
827   void repe_cmpsl();
828   void repe_cmpsq();
829   void rep_movsw();
830 
831   //
832   // Macros for High-level operations.
833   //
834 
835   void AddImmediate(CpuRegister reg, const Immediate& imm);
836 
837   void LoadDoubleConstant(XmmRegister dst, double value);
838 
LockCmpxchgl(const Address & address,CpuRegister reg)839   void LockCmpxchgl(const Address& address, CpuRegister reg) {
840     lock()->cmpxchgl(address, reg);
841   }
842 
LockCmpxchgq(const Address & address,CpuRegister reg)843   void LockCmpxchgq(const Address& address, CpuRegister reg) {
844     lock()->cmpxchgq(address, reg);
845   }
846 
847   //
848   // Misc. functionality
849   //
PreferredLoopAlignment()850   int PreferredLoopAlignment() { return 16; }
851   void Align(int alignment, int offset);
852   void Bind(Label* label) override;
Jump(Label * label)853   void Jump(Label* label) override {
854     jmp(label);
855   }
856   void Bind(NearLabel* label);
857 
858   // Add a double to the constant area, returning the offset into
859   // the constant area where the literal resides.
AddDouble(double v)860   size_t AddDouble(double v) { return constant_area_.AddDouble(v); }
861 
862   // Add a float to the constant area, returning the offset into
863   // the constant area where the literal resides.
AddFloat(float v)864   size_t AddFloat(float v)   { return constant_area_.AddFloat(v); }
865 
866   // Add an int32_t to the constant area, returning the offset into
867   // the constant area where the literal resides.
AddInt32(int32_t v)868   size_t AddInt32(int32_t v) {
869     return constant_area_.AddInt32(v);
870   }
871 
872   // Add an int32_t to the end of the constant area, returning the offset into
873   // the constant area where the literal resides.
AppendInt32(int32_t v)874   size_t AppendInt32(int32_t v) {
875     return constant_area_.AppendInt32(v);
876   }
877 
878   // Add an int64_t to the constant area, returning the offset into
879   // the constant area where the literal resides.
AddInt64(int64_t v)880   size_t AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
881 
882   // Add the contents of the constant area to the assembler buffer.
883   void AddConstantArea();
884 
885   // Is the constant area empty? Return true if there are no literals in the constant area.
IsConstantAreaEmpty()886   bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; }
887 
888   // Return the current size of the constant area.
ConstantAreaSize()889   size_t ConstantAreaSize() const { return constant_area_.GetSize(); }
890 
891   //
892   // Heap poisoning.
893   //
894 
895   // Poison a heap reference contained in `reg`.
PoisonHeapReference(CpuRegister reg)896   void PoisonHeapReference(CpuRegister reg) { negl(reg); }
897   // Unpoison a heap reference contained in `reg`.
UnpoisonHeapReference(CpuRegister reg)898   void UnpoisonHeapReference(CpuRegister reg) { negl(reg); }
899   // Poison a heap reference contained in `reg` if heap poisoning is enabled.
MaybePoisonHeapReference(CpuRegister reg)900   void MaybePoisonHeapReference(CpuRegister reg) {
901     if (kPoisonHeapReferences) {
902       PoisonHeapReference(reg);
903     }
904   }
905   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
MaybeUnpoisonHeapReference(CpuRegister reg)906   void MaybeUnpoisonHeapReference(CpuRegister reg) {
907     if (kPoisonHeapReferences) {
908       UnpoisonHeapReference(reg);
909     }
910   }
911 
912  private:
913   void EmitUint8(uint8_t value);
914   void EmitInt32(int32_t value);
915   void EmitInt64(int64_t value);
916   void EmitRegisterOperand(uint8_t rm, uint8_t reg);
917   void EmitXmmRegisterOperand(uint8_t rm, XmmRegister reg);
918   void EmitFixup(AssemblerFixup* fixup);
919   void EmitOperandSizeOverride();
920 
921   void EmitOperand(uint8_t rm, const Operand& operand);
922   void EmitImmediate(const Immediate& imm, bool is_16_op = false);
923   void EmitComplex(
924       uint8_t rm, const Operand& operand, const Immediate& immediate, bool is_16_op = false);
925   void EmitLabel(Label* label, int instruction_size);
926   void EmitLabelLink(Label* label);
927   void EmitLabelLink(NearLabel* label);
928 
929   void EmitGenericShift(bool wide, int rm, CpuRegister reg, const Immediate& imm);
930   void EmitGenericShift(bool wide, int rm, CpuRegister operand, CpuRegister shifter);
931 
932   // If any input is not false, output the necessary rex prefix.
933   void EmitOptionalRex(bool force, bool w, bool r, bool x, bool b);
934 
935   // Emit a rex prefix byte if necessary for reg. ie if reg is a register in the range R8 to R15.
936   void EmitOptionalRex32(CpuRegister reg);
937   void EmitOptionalRex32(CpuRegister dst, CpuRegister src);
938   void EmitOptionalRex32(XmmRegister dst, XmmRegister src);
939   void EmitOptionalRex32(CpuRegister dst, XmmRegister src);
940   void EmitOptionalRex32(XmmRegister dst, CpuRegister src);
941   void EmitOptionalRex32(const Operand& operand);
942   void EmitOptionalRex32(CpuRegister dst, const Operand& operand);
943   void EmitOptionalRex32(XmmRegister dst, const Operand& operand);
944 
945   // Emit a REX.W prefix plus necessary register bit encodings.
946   void EmitRex64();
947   void EmitRex64(CpuRegister reg);
948   void EmitRex64(const Operand& operand);
949   void EmitRex64(CpuRegister dst, CpuRegister src);
950   void EmitRex64(CpuRegister dst, const Operand& operand);
951   void EmitRex64(XmmRegister dst, const Operand& operand);
952   void EmitRex64(XmmRegister dst, CpuRegister src);
953   void EmitRex64(CpuRegister dst, XmmRegister src);
954 
955   // Emit a REX prefix to normalize byte registers plus necessary register bit encodings.
956   void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src);
957   void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand);
958 
959   // Emit a 3 byte VEX Prefix
960   uint8_t EmitVexByteZero(bool is_two_byte);
961   uint8_t EmitVexByte1(bool r, bool x, bool b, int mmmmm);
962   uint8_t EmitVexByte2(bool w , int l , X86_64ManagedRegister operand, int pp);
963 
964   ConstantArea constant_area_;
965 
966   DISALLOW_COPY_AND_ASSIGN(X86_64Assembler);
967 };
968 
EmitUint8(uint8_t value)969 inline void X86_64Assembler::EmitUint8(uint8_t value) {
970   buffer_.Emit<uint8_t>(value);
971 }
972 
EmitInt32(int32_t value)973 inline void X86_64Assembler::EmitInt32(int32_t value) {
974   buffer_.Emit<int32_t>(value);
975 }
976 
EmitInt64(int64_t value)977 inline void X86_64Assembler::EmitInt64(int64_t value) {
978   // Write this 64-bit value as two 32-bit words for alignment reasons
979   // (this is essentially when running on ARM, which does not allow
980   // 64-bit unaligned accesses).  We assume little-endianness here.
981   EmitInt32(Low32Bits(value));
982   EmitInt32(High32Bits(value));
983 }
984 
EmitRegisterOperand(uint8_t rm,uint8_t reg)985 inline void X86_64Assembler::EmitRegisterOperand(uint8_t rm, uint8_t reg) {
986   CHECK_GE(rm, 0);
987   CHECK_LT(rm, 8);
988   buffer_.Emit<uint8_t>((0xC0 | (reg & 7)) + (rm << 3));
989 }
990 
EmitXmmRegisterOperand(uint8_t rm,XmmRegister reg)991 inline void X86_64Assembler::EmitXmmRegisterOperand(uint8_t rm, XmmRegister reg) {
992   EmitRegisterOperand(rm, static_cast<uint8_t>(reg.AsFloatRegister()));
993 }
994 
EmitFixup(AssemblerFixup * fixup)995 inline void X86_64Assembler::EmitFixup(AssemblerFixup* fixup) {
996   buffer_.EmitFixup(fixup);
997 }
998 
EmitOperandSizeOverride()999 inline void X86_64Assembler::EmitOperandSizeOverride() {
1000   EmitUint8(0x66);
1001 }
1002 
1003 }  // namespace x86_64
1004 }  // namespace art
1005 
1006 #endif  // ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
1007