1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
18 #define ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
19
20 #include <vector>
21
22 #include "base/arena_containers.h"
23 #include "base/array_ref.h"
24 #include "base/bit_utils.h"
25 #include "base/globals.h"
26 #include "base/macros.h"
27 #include "constants_x86_64.h"
28 #include "heap_poisoning.h"
29 #include "managed_register_x86_64.h"
30 #include "offsets.h"
31 #include "utils/assembler.h"
32 #include "utils/jni_macro_assembler.h"
33
34 namespace art {
35 namespace x86_64 {
36
37 // Encodes an immediate value for operands.
38 //
39 // Note: Immediates can be 64b on x86-64 for certain instructions, but are often restricted
40 // to 32b.
41 //
42 // Note: As we support cross-compilation, the value type must be int64_t. Please be aware of
43 // conversion rules in expressions regarding negation, especially size_t on 32b.
44 class Immediate : public ValueObject {
45 public:
Immediate(int64_t value_in)46 explicit Immediate(int64_t value_in) : value_(value_in) {}
47
value()48 int64_t value() const { return value_; }
49
is_int8()50 bool is_int8() const { return IsInt<8>(value_); }
is_uint8()51 bool is_uint8() const { return IsUint<8>(value_); }
is_int16()52 bool is_int16() const { return IsInt<16>(value_); }
is_uint16()53 bool is_uint16() const { return IsUint<16>(value_); }
is_int32()54 bool is_int32() const { return IsInt<32>(value_); }
55
56 private:
57 const int64_t value_;
58 };
59
60
61 class Operand : public ValueObject {
62 public:
mod()63 uint8_t mod() const {
64 return (encoding_at(0) >> 6) & 3;
65 }
66
rm()67 Register rm() const {
68 return static_cast<Register>(encoding_at(0) & 7);
69 }
70
scale()71 ScaleFactor scale() const {
72 return static_cast<ScaleFactor>((encoding_at(1) >> 6) & 3);
73 }
74
index()75 Register index() const {
76 return static_cast<Register>((encoding_at(1) >> 3) & 7);
77 }
78
base()79 Register base() const {
80 return static_cast<Register>(encoding_at(1) & 7);
81 }
82
cpu_rm()83 CpuRegister cpu_rm() const {
84 int ext = (rex_ & 1) != 0 ? x86_64::R8 : x86_64::RAX;
85 return static_cast<CpuRegister>(rm() + ext);
86 }
87
cpu_index()88 CpuRegister cpu_index() const {
89 int ext = (rex_ & 2) != 0 ? x86_64::R8 : x86_64::RAX;
90 return static_cast<CpuRegister>(index() + ext);
91 }
92
cpu_base()93 CpuRegister cpu_base() const {
94 int ext = (rex_ & 1) != 0 ? x86_64::R8 : x86_64::RAX;
95 return static_cast<CpuRegister>(base() + ext);
96 }
97
rex()98 uint8_t rex() const {
99 return rex_;
100 }
101
disp8()102 int8_t disp8() const {
103 CHECK_GE(length_, 2);
104 return static_cast<int8_t>(encoding_[length_ - 1]);
105 }
106
disp32()107 int32_t disp32() const {
108 CHECK_GE(length_, 5);
109 int32_t value;
110 memcpy(&value, &encoding_[length_ - 4], sizeof(value));
111 return value;
112 }
113
IsRegister(CpuRegister reg)114 bool IsRegister(CpuRegister reg) const {
115 return ((encoding_[0] & 0xF8) == 0xC0) // Addressing mode is register only.
116 && ((encoding_[0] & 0x07) == reg.LowBits()) // Register codes match.
117 && (reg.NeedsRex() == ((rex_ & 1) != 0)); // REX.000B bits match.
118 }
119
GetFixup()120 AssemblerFixup* GetFixup() const {
121 return fixup_;
122 }
123
124 protected:
125 // Operand can be sub classed (e.g: Address).
Operand()126 Operand() : rex_(0), length_(0), fixup_(nullptr) { }
127
SetModRM(uint8_t mod_in,CpuRegister rm_in)128 void SetModRM(uint8_t mod_in, CpuRegister rm_in) {
129 CHECK_EQ(mod_in & ~3, 0);
130 if (rm_in.NeedsRex()) {
131 rex_ |= 0x41; // REX.000B
132 }
133 encoding_[0] = (mod_in << 6) | rm_in.LowBits();
134 length_ = 1;
135 }
136
SetSIB(ScaleFactor scale_in,CpuRegister index_in,CpuRegister base_in)137 void SetSIB(ScaleFactor scale_in, CpuRegister index_in, CpuRegister base_in) {
138 CHECK_EQ(length_, 1);
139 CHECK_EQ(scale_in & ~3, 0);
140 if (base_in.NeedsRex()) {
141 rex_ |= 0x41; // REX.000B
142 }
143 if (index_in.NeedsRex()) {
144 rex_ |= 0x42; // REX.00X0
145 }
146 encoding_[1] = (scale_in << 6) | (static_cast<uint8_t>(index_in.LowBits()) << 3) |
147 static_cast<uint8_t>(base_in.LowBits());
148 length_ = 2;
149 }
150
SetDisp8(int8_t disp)151 void SetDisp8(int8_t disp) {
152 CHECK(length_ == 1 || length_ == 2);
153 encoding_[length_++] = static_cast<uint8_t>(disp);
154 }
155
SetDisp32(int32_t disp)156 void SetDisp32(int32_t disp) {
157 CHECK(length_ == 1 || length_ == 2);
158 int disp_size = sizeof(disp);
159 memmove(&encoding_[length_], &disp, disp_size);
160 length_ += disp_size;
161 }
162
SetFixup(AssemblerFixup * fixup)163 void SetFixup(AssemblerFixup* fixup) {
164 fixup_ = fixup;
165 }
166
167 private:
168 uint8_t rex_;
169 uint8_t length_;
170 uint8_t encoding_[6];
171 AssemblerFixup* fixup_;
172
Operand(CpuRegister reg)173 explicit Operand(CpuRegister reg) : rex_(0), length_(0), fixup_(nullptr) { SetModRM(3, reg); }
174
175 // Get the operand encoding byte at the given index.
encoding_at(int index_in)176 uint8_t encoding_at(int index_in) const {
177 CHECK_GE(index_in, 0);
178 CHECK_LT(index_in, length_);
179 return encoding_[index_in];
180 }
181
182 friend class X86_64Assembler;
183 };
184
185
186 class Address : public Operand {
187 public:
Address(CpuRegister base_in,int32_t disp)188 Address(CpuRegister base_in, int32_t disp) {
189 Init(base_in, disp);
190 }
191
Address(CpuRegister base_in,Offset disp)192 Address(CpuRegister base_in, Offset disp) {
193 Init(base_in, disp.Int32Value());
194 }
195
Address(CpuRegister base_in,FrameOffset disp)196 Address(CpuRegister base_in, FrameOffset disp) {
197 CHECK_EQ(base_in.AsRegister(), RSP);
198 Init(CpuRegister(RSP), disp.Int32Value());
199 }
200
Address(CpuRegister base_in,MemberOffset disp)201 Address(CpuRegister base_in, MemberOffset disp) {
202 Init(base_in, disp.Int32Value());
203 }
204
Init(CpuRegister base_in,int32_t disp)205 void Init(CpuRegister base_in, int32_t disp) {
206 if (disp == 0 && base_in.LowBits() != RBP) {
207 SetModRM(0, base_in);
208 if (base_in.LowBits() == RSP) {
209 SetSIB(TIMES_1, CpuRegister(RSP), base_in);
210 }
211 } else if (disp >= -128 && disp <= 127) {
212 SetModRM(1, base_in);
213 if (base_in.LowBits() == RSP) {
214 SetSIB(TIMES_1, CpuRegister(RSP), base_in);
215 }
216 SetDisp8(disp);
217 } else {
218 SetModRM(2, base_in);
219 if (base_in.LowBits() == RSP) {
220 SetSIB(TIMES_1, CpuRegister(RSP), base_in);
221 }
222 SetDisp32(disp);
223 }
224 }
225
226
Address(CpuRegister index_in,ScaleFactor scale_in,int32_t disp)227 Address(CpuRegister index_in, ScaleFactor scale_in, int32_t disp) {
228 CHECK_NE(index_in.AsRegister(), RSP); // Illegal addressing mode.
229 SetModRM(0, CpuRegister(RSP));
230 SetSIB(scale_in, index_in, CpuRegister(RBP));
231 SetDisp32(disp);
232 }
233
Address(CpuRegister base_in,CpuRegister index_in,ScaleFactor scale_in,int32_t disp)234 Address(CpuRegister base_in, CpuRegister index_in, ScaleFactor scale_in, int32_t disp) {
235 CHECK_NE(index_in.AsRegister(), RSP); // Illegal addressing mode.
236 if (disp == 0 && base_in.LowBits() != RBP) {
237 SetModRM(0, CpuRegister(RSP));
238 SetSIB(scale_in, index_in, base_in);
239 } else if (disp >= -128 && disp <= 127) {
240 SetModRM(1, CpuRegister(RSP));
241 SetSIB(scale_in, index_in, base_in);
242 SetDisp8(disp);
243 } else {
244 SetModRM(2, CpuRegister(RSP));
245 SetSIB(scale_in, index_in, base_in);
246 SetDisp32(disp);
247 }
248 }
249
250 // If no_rip is true then the Absolute address isn't RIP relative.
251 static Address Absolute(uintptr_t addr, bool no_rip = false) {
252 Address result;
253 if (no_rip) {
254 result.SetModRM(0, CpuRegister(RSP));
255 result.SetSIB(TIMES_1, CpuRegister(RSP), CpuRegister(RBP));
256 result.SetDisp32(addr);
257 } else {
258 // RIP addressing is done using RBP as the base register.
259 // The value in RBP isn't used. Instead the offset is added to RIP.
260 result.SetModRM(0, CpuRegister(RBP));
261 result.SetDisp32(addr);
262 }
263 return result;
264 }
265
266 // An RIP relative address that will be fixed up later.
RIP(AssemblerFixup * fixup)267 static Address RIP(AssemblerFixup* fixup) {
268 Address result;
269 // RIP addressing is done using RBP as the base register.
270 // The value in RBP isn't used. Instead the offset is added to RIP.
271 result.SetModRM(0, CpuRegister(RBP));
272 result.SetDisp32(0);
273 result.SetFixup(fixup);
274 return result;
275 }
276
277 // If no_rip is true then the Absolute address isn't RIP relative.
278 static Address Absolute(ThreadOffset64 addr, bool no_rip = false) {
279 return Absolute(addr.Int32Value(), no_rip);
280 }
281
282 private:
Address()283 Address() {}
284 };
285
286 std::ostream& operator<<(std::ostream& os, const Address& addr);
287
288 /**
289 * Class to handle constant area values.
290 */
291 class ConstantArea {
292 public:
ConstantArea(ArenaAllocator * allocator)293 explicit ConstantArea(ArenaAllocator* allocator)
294 : buffer_(allocator->Adapter(kArenaAllocAssembler)) {}
295
296 // Add a double to the constant area, returning the offset into
297 // the constant area where the literal resides.
298 size_t AddDouble(double v);
299
300 // Add a float to the constant area, returning the offset into
301 // the constant area where the literal resides.
302 size_t AddFloat(float v);
303
304 // Add an int32_t to the constant area, returning the offset into
305 // the constant area where the literal resides.
306 size_t AddInt32(int32_t v);
307
308 // Add an int32_t to the end of the constant area, returning the offset into
309 // the constant area where the literal resides.
310 size_t AppendInt32(int32_t v);
311
312 // Add an int64_t to the constant area, returning the offset into
313 // the constant area where the literal resides.
314 size_t AddInt64(int64_t v);
315
GetSize()316 size_t GetSize() const {
317 return buffer_.size() * elem_size_;
318 }
319
GetBuffer()320 ArrayRef<const int32_t> GetBuffer() const {
321 return ArrayRef<const int32_t>(buffer_);
322 }
323
324 private:
325 static constexpr size_t elem_size_ = sizeof(int32_t);
326 ArenaVector<int32_t> buffer_;
327 };
328
329
330 // This is equivalent to the Label class, used in a slightly different context. We
331 // inherit the functionality of the Label class, but prevent unintended
332 // derived-to-base conversions by making the base class private.
333 class NearLabel : private Label {
334 public:
NearLabel()335 NearLabel() : Label() {}
336
337 // Expose the Label routines that we need.
338 using Label::Position;
339 using Label::LinkPosition;
340 using Label::IsBound;
341 using Label::IsUnused;
342 using Label::IsLinked;
343
344 private:
345 using Label::BindTo;
346 using Label::LinkTo;
347
348 friend class x86_64::X86_64Assembler;
349
350 DISALLOW_COPY_AND_ASSIGN(NearLabel);
351 };
352
353
354 class X86_64Assembler final : public Assembler {
355 public:
X86_64Assembler(ArenaAllocator * allocator)356 explicit X86_64Assembler(ArenaAllocator* allocator)
357 : Assembler(allocator), constant_area_(allocator) {}
~X86_64Assembler()358 virtual ~X86_64Assembler() {}
359
360 /*
361 * Emit Machine Instructions.
362 */
363 void call(CpuRegister reg);
364 void call(const Address& address);
365 void call(Label* label);
366
367 void pushq(CpuRegister reg);
368 void pushq(const Address& address);
369 void pushq(const Immediate& imm);
370
371 void popq(CpuRegister reg);
372 void popq(const Address& address);
373
374 void movq(CpuRegister dst, const Immediate& src);
375 void movl(CpuRegister dst, const Immediate& src);
376 void movq(CpuRegister dst, CpuRegister src);
377 void movl(CpuRegister dst, CpuRegister src);
378
379 void movntl(const Address& dst, CpuRegister src);
380 void movntq(const Address& dst, CpuRegister src);
381
382 void movq(CpuRegister dst, const Address& src);
383 void movl(CpuRegister dst, const Address& src);
384 void movq(const Address& dst, CpuRegister src);
385 void movq(const Address& dst, const Immediate& imm);
386 void movl(const Address& dst, CpuRegister src);
387 void movl(const Address& dst, const Immediate& imm);
388
389 void cmov(Condition c, CpuRegister dst, CpuRegister src); // This is the 64b version.
390 void cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit);
391 void cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit);
392
393 void movzxb(CpuRegister dst, CpuRegister src);
394 void movzxb(CpuRegister dst, const Address& src);
395 void movsxb(CpuRegister dst, CpuRegister src);
396 void movsxb(CpuRegister dst, const Address& src);
397 void movb(CpuRegister dst, const Address& src);
398 void movb(const Address& dst, CpuRegister src);
399 void movb(const Address& dst, const Immediate& imm);
400
401 void movzxw(CpuRegister dst, CpuRegister src);
402 void movzxw(CpuRegister dst, const Address& src);
403 void movsxw(CpuRegister dst, CpuRegister src);
404 void movsxw(CpuRegister dst, const Address& src);
405 void movw(CpuRegister dst, const Address& src);
406 void movw(const Address& dst, CpuRegister src);
407 void movw(const Address& dst, const Immediate& imm);
408
409 void leaq(CpuRegister dst, const Address& src);
410 void leal(CpuRegister dst, const Address& src);
411
412 void movaps(XmmRegister dst, XmmRegister src); // move
413 void movaps(XmmRegister dst, const Address& src); // load aligned
414 void movups(XmmRegister dst, const Address& src); // load unaligned
415 void movaps(const Address& dst, XmmRegister src); // store aligned
416 void movups(const Address& dst, XmmRegister src); // store unaligned
417
418 void movss(XmmRegister dst, const Address& src);
419 void movss(const Address& dst, XmmRegister src);
420 void movss(XmmRegister dst, XmmRegister src);
421
422 void movsxd(CpuRegister dst, CpuRegister src);
423 void movsxd(CpuRegister dst, const Address& src);
424
425 void movd(XmmRegister dst, CpuRegister src); // Note: this is the r64 version, formally movq.
426 void movd(CpuRegister dst, XmmRegister src); // Note: this is the r64 version, formally movq.
427 void movd(XmmRegister dst, CpuRegister src, bool is64bit);
428 void movd(CpuRegister dst, XmmRegister src, bool is64bit);
429
430 void addss(XmmRegister dst, XmmRegister src);
431 void addss(XmmRegister dst, const Address& src);
432 void subss(XmmRegister dst, XmmRegister src);
433 void subss(XmmRegister dst, const Address& src);
434 void mulss(XmmRegister dst, XmmRegister src);
435 void mulss(XmmRegister dst, const Address& src);
436 void divss(XmmRegister dst, XmmRegister src);
437 void divss(XmmRegister dst, const Address& src);
438
439 void addps(XmmRegister dst, XmmRegister src); // no addr variant (for now)
440 void subps(XmmRegister dst, XmmRegister src);
441 void mulps(XmmRegister dst, XmmRegister src);
442 void divps(XmmRegister dst, XmmRegister src);
443
444 void movapd(XmmRegister dst, XmmRegister src); // move
445 void movapd(XmmRegister dst, const Address& src); // load aligned
446 void movupd(XmmRegister dst, const Address& src); // load unaligned
447 void movapd(const Address& dst, XmmRegister src); // store aligned
448 void movupd(const Address& dst, XmmRegister src); // store unaligned
449
450 void movsd(XmmRegister dst, const Address& src);
451 void movsd(const Address& dst, XmmRegister src);
452 void movsd(XmmRegister dst, XmmRegister src);
453
454 void addsd(XmmRegister dst, XmmRegister src);
455 void addsd(XmmRegister dst, const Address& src);
456 void subsd(XmmRegister dst, XmmRegister src);
457 void subsd(XmmRegister dst, const Address& src);
458 void mulsd(XmmRegister dst, XmmRegister src);
459 void mulsd(XmmRegister dst, const Address& src);
460 void divsd(XmmRegister dst, XmmRegister src);
461 void divsd(XmmRegister dst, const Address& src);
462
463 void addpd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
464 void subpd(XmmRegister dst, XmmRegister src);
465 void mulpd(XmmRegister dst, XmmRegister src);
466 void divpd(XmmRegister dst, XmmRegister src);
467
468 void movdqa(XmmRegister dst, XmmRegister src); // move
469 void movdqa(XmmRegister dst, const Address& src); // load aligned
470 void movdqu(XmmRegister dst, const Address& src); // load unaligned
471 void movdqa(const Address& dst, XmmRegister src); // store aligned
472 void movdqu(const Address& dst, XmmRegister src); // store unaligned
473
474 void paddb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
475 void psubb(XmmRegister dst, XmmRegister src);
476
477 void paddw(XmmRegister dst, XmmRegister src);
478 void psubw(XmmRegister dst, XmmRegister src);
479 void pmullw(XmmRegister dst, XmmRegister src);
480
481 void paddd(XmmRegister dst, XmmRegister src);
482 void psubd(XmmRegister dst, XmmRegister src);
483 void pmulld(XmmRegister dst, XmmRegister src);
484
485 void paddq(XmmRegister dst, XmmRegister src);
486 void psubq(XmmRegister dst, XmmRegister src);
487
488 void paddusb(XmmRegister dst, XmmRegister src);
489 void paddsb(XmmRegister dst, XmmRegister src);
490 void paddusw(XmmRegister dst, XmmRegister src);
491 void paddsw(XmmRegister dst, XmmRegister src);
492 void psubusb(XmmRegister dst, XmmRegister src);
493 void psubsb(XmmRegister dst, XmmRegister src);
494 void psubusw(XmmRegister dst, XmmRegister src);
495 void psubsw(XmmRegister dst, XmmRegister src);
496
497 void cvtsi2ss(XmmRegister dst, CpuRegister src); // Note: this is the r/m32 version.
498 void cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit);
499 void cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit);
500 void cvtsi2sd(XmmRegister dst, CpuRegister src); // Note: this is the r/m32 version.
501 void cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit);
502 void cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit);
503
504 void cvtss2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version.
505 void cvtss2sd(XmmRegister dst, XmmRegister src);
506 void cvtss2sd(XmmRegister dst, const Address& src);
507
508 void cvtsd2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version.
509 void cvtsd2ss(XmmRegister dst, XmmRegister src);
510 void cvtsd2ss(XmmRegister dst, const Address& src);
511
512 void cvttss2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version.
513 void cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit);
514 void cvttsd2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version.
515 void cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit);
516
517 void cvtdq2ps(XmmRegister dst, XmmRegister src);
518 void cvtdq2pd(XmmRegister dst, XmmRegister src);
519
520 void comiss(XmmRegister a, XmmRegister b);
521 void comiss(XmmRegister a, const Address& b);
522 void comisd(XmmRegister a, XmmRegister b);
523 void comisd(XmmRegister a, const Address& b);
524 void ucomiss(XmmRegister a, XmmRegister b);
525 void ucomiss(XmmRegister a, const Address& b);
526 void ucomisd(XmmRegister a, XmmRegister b);
527 void ucomisd(XmmRegister a, const Address& b);
528
529 void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm);
530 void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm);
531
532 void sqrtsd(XmmRegister dst, XmmRegister src);
533 void sqrtss(XmmRegister dst, XmmRegister src);
534
535 void xorpd(XmmRegister dst, const Address& src);
536 void xorpd(XmmRegister dst, XmmRegister src);
537 void xorps(XmmRegister dst, const Address& src);
538 void xorps(XmmRegister dst, XmmRegister src);
539 void pxor(XmmRegister dst, XmmRegister src); // no addr variant (for now)
540
541 void andpd(XmmRegister dst, const Address& src);
542 void andpd(XmmRegister dst, XmmRegister src);
543 void andps(XmmRegister dst, XmmRegister src); // no addr variant (for now)
544 void pand(XmmRegister dst, XmmRegister src);
545
546 void andn(CpuRegister dst, CpuRegister src1, CpuRegister src2);
547 void andnpd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
548 void andnps(XmmRegister dst, XmmRegister src);
549 void pandn(XmmRegister dst, XmmRegister src);
550
551 void orpd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
552 void orps(XmmRegister dst, XmmRegister src);
553 void por(XmmRegister dst, XmmRegister src);
554
555 void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
556 void pavgw(XmmRegister dst, XmmRegister src);
557 void psadbw(XmmRegister dst, XmmRegister src);
558 void pmaddwd(XmmRegister dst, XmmRegister src);
559 void phaddw(XmmRegister dst, XmmRegister src);
560 void phaddd(XmmRegister dst, XmmRegister src);
561 void haddps(XmmRegister dst, XmmRegister src);
562 void haddpd(XmmRegister dst, XmmRegister src);
563 void phsubw(XmmRegister dst, XmmRegister src);
564 void phsubd(XmmRegister dst, XmmRegister src);
565 void hsubps(XmmRegister dst, XmmRegister src);
566 void hsubpd(XmmRegister dst, XmmRegister src);
567
568 void pminsb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
569 void pmaxsb(XmmRegister dst, XmmRegister src);
570 void pminsw(XmmRegister dst, XmmRegister src);
571 void pmaxsw(XmmRegister dst, XmmRegister src);
572 void pminsd(XmmRegister dst, XmmRegister src);
573 void pmaxsd(XmmRegister dst, XmmRegister src);
574
575 void pminub(XmmRegister dst, XmmRegister src); // no addr variant (for now)
576 void pmaxub(XmmRegister dst, XmmRegister src);
577 void pminuw(XmmRegister dst, XmmRegister src);
578 void pmaxuw(XmmRegister dst, XmmRegister src);
579 void pminud(XmmRegister dst, XmmRegister src);
580 void pmaxud(XmmRegister dst, XmmRegister src);
581
582 void minps(XmmRegister dst, XmmRegister src); // no addr variant (for now)
583 void maxps(XmmRegister dst, XmmRegister src);
584 void minpd(XmmRegister dst, XmmRegister src);
585 void maxpd(XmmRegister dst, XmmRegister src);
586
587 void pcmpeqb(XmmRegister dst, XmmRegister src);
588 void pcmpeqw(XmmRegister dst, XmmRegister src);
589 void pcmpeqd(XmmRegister dst, XmmRegister src);
590 void pcmpeqq(XmmRegister dst, XmmRegister src);
591
592 void pcmpgtb(XmmRegister dst, XmmRegister src);
593 void pcmpgtw(XmmRegister dst, XmmRegister src);
594 void pcmpgtd(XmmRegister dst, XmmRegister src);
595 void pcmpgtq(XmmRegister dst, XmmRegister src); // SSE4.2
596
597 void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm);
598 void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
599 void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
600
601 void punpcklbw(XmmRegister dst, XmmRegister src);
602 void punpcklwd(XmmRegister dst, XmmRegister src);
603 void punpckldq(XmmRegister dst, XmmRegister src);
604 void punpcklqdq(XmmRegister dst, XmmRegister src);
605
606 void punpckhbw(XmmRegister dst, XmmRegister src);
607 void punpckhwd(XmmRegister dst, XmmRegister src);
608 void punpckhdq(XmmRegister dst, XmmRegister src);
609 void punpckhqdq(XmmRegister dst, XmmRegister src);
610
611 void psllw(XmmRegister reg, const Immediate& shift_count);
612 void pslld(XmmRegister reg, const Immediate& shift_count);
613 void psllq(XmmRegister reg, const Immediate& shift_count);
614
615 void psraw(XmmRegister reg, const Immediate& shift_count);
616 void psrad(XmmRegister reg, const Immediate& shift_count);
617 // no psraq
618
619 void psrlw(XmmRegister reg, const Immediate& shift_count);
620 void psrld(XmmRegister reg, const Immediate& shift_count);
621 void psrlq(XmmRegister reg, const Immediate& shift_count);
622 void psrldq(XmmRegister reg, const Immediate& shift_count);
623
624 void flds(const Address& src);
625 void fstps(const Address& dst);
626 void fsts(const Address& dst);
627
628 void fldl(const Address& src);
629 void fstpl(const Address& dst);
630 void fstl(const Address& dst);
631
632 void fstsw();
633
634 void fucompp();
635
636 void fnstcw(const Address& dst);
637 void fldcw(const Address& src);
638
639 void fistpl(const Address& dst);
640 void fistps(const Address& dst);
641 void fildl(const Address& src);
642 void filds(const Address& src);
643
644 void fincstp();
645 void ffree(const Immediate& index);
646
647 void fsin();
648 void fcos();
649 void fptan();
650 void fprem();
651
652 void xchgl(CpuRegister dst, CpuRegister src);
653 void xchgq(CpuRegister dst, CpuRegister src);
654 void xchgl(CpuRegister reg, const Address& address);
655
656 void cmpb(const Address& address, const Immediate& imm);
657 void cmpw(const Address& address, const Immediate& imm);
658
659 void cmpl(CpuRegister reg, const Immediate& imm);
660 void cmpl(CpuRegister reg0, CpuRegister reg1);
661 void cmpl(CpuRegister reg, const Address& address);
662 void cmpl(const Address& address, CpuRegister reg);
663 void cmpl(const Address& address, const Immediate& imm);
664
665 void cmpq(CpuRegister reg0, CpuRegister reg1);
666 void cmpq(CpuRegister reg0, const Immediate& imm);
667 void cmpq(CpuRegister reg0, const Address& address);
668 void cmpq(const Address& address, const Immediate& imm);
669
670 void testl(CpuRegister reg1, CpuRegister reg2);
671 void testl(CpuRegister reg, const Address& address);
672 void testl(CpuRegister reg, const Immediate& imm);
673
674 void testq(CpuRegister reg1, CpuRegister reg2);
675 void testq(CpuRegister reg, const Address& address);
676
677 void testb(const Address& address, const Immediate& imm);
678 void testl(const Address& address, const Immediate& imm);
679
680 void andl(CpuRegister dst, const Immediate& imm);
681 void andl(CpuRegister dst, CpuRegister src);
682 void andl(CpuRegister reg, const Address& address);
683 void andq(CpuRegister dst, const Immediate& imm);
684 void andq(CpuRegister dst, CpuRegister src);
685 void andq(CpuRegister reg, const Address& address);
686
687 void orl(CpuRegister dst, const Immediate& imm);
688 void orl(CpuRegister dst, CpuRegister src);
689 void orl(CpuRegister reg, const Address& address);
690 void orq(CpuRegister dst, CpuRegister src);
691 void orq(CpuRegister dst, const Immediate& imm);
692 void orq(CpuRegister reg, const Address& address);
693
694 void xorl(CpuRegister dst, CpuRegister src);
695 void xorl(CpuRegister dst, const Immediate& imm);
696 void xorl(CpuRegister reg, const Address& address);
697 void xorq(CpuRegister dst, const Immediate& imm);
698 void xorq(CpuRegister dst, CpuRegister src);
699 void xorq(CpuRegister reg, const Address& address);
700
701 void addl(CpuRegister dst, CpuRegister src);
702 void addl(CpuRegister reg, const Immediate& imm);
703 void addl(CpuRegister reg, const Address& address);
704 void addl(const Address& address, CpuRegister reg);
705 void addl(const Address& address, const Immediate& imm);
706 void addw(const Address& address, const Immediate& imm);
707
708 void addq(CpuRegister reg, const Immediate& imm);
709 void addq(CpuRegister dst, CpuRegister src);
710 void addq(CpuRegister dst, const Address& address);
711
712 void subl(CpuRegister dst, CpuRegister src);
713 void subl(CpuRegister reg, const Immediate& imm);
714 void subl(CpuRegister reg, const Address& address);
715
716 void subq(CpuRegister reg, const Immediate& imm);
717 void subq(CpuRegister dst, CpuRegister src);
718 void subq(CpuRegister dst, const Address& address);
719
720 void cdq();
721 void cqo();
722
723 void idivl(CpuRegister reg);
724 void idivq(CpuRegister reg);
725
726 void imull(CpuRegister dst, CpuRegister src);
727 void imull(CpuRegister reg, const Immediate& imm);
728 void imull(CpuRegister dst, CpuRegister src, const Immediate& imm);
729 void imull(CpuRegister reg, const Address& address);
730
731 void imulq(CpuRegister src);
732 void imulq(CpuRegister dst, CpuRegister src);
733 void imulq(CpuRegister reg, const Immediate& imm);
734 void imulq(CpuRegister reg, const Address& address);
735 void imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm);
736
737 void imull(CpuRegister reg);
738 void imull(const Address& address);
739
740 void mull(CpuRegister reg);
741 void mull(const Address& address);
742
743 void shll(CpuRegister reg, const Immediate& imm);
744 void shll(CpuRegister operand, CpuRegister shifter);
745 void shrl(CpuRegister reg, const Immediate& imm);
746 void shrl(CpuRegister operand, CpuRegister shifter);
747 void sarl(CpuRegister reg, const Immediate& imm);
748 void sarl(CpuRegister operand, CpuRegister shifter);
749
750 void shlq(CpuRegister reg, const Immediate& imm);
751 void shlq(CpuRegister operand, CpuRegister shifter);
752 void shrq(CpuRegister reg, const Immediate& imm);
753 void shrq(CpuRegister operand, CpuRegister shifter);
754 void sarq(CpuRegister reg, const Immediate& imm);
755 void sarq(CpuRegister operand, CpuRegister shifter);
756
757 void negl(CpuRegister reg);
758 void negq(CpuRegister reg);
759
760 void notl(CpuRegister reg);
761 void notq(CpuRegister reg);
762
763 void enter(const Immediate& imm);
764 void leave();
765
766 void ret();
767 void ret(const Immediate& imm);
768
769 void nop();
770 void int3();
771 void hlt();
772
773 void j(Condition condition, Label* label);
774 void j(Condition condition, NearLabel* label);
775 void jrcxz(NearLabel* label);
776
777 void jmp(CpuRegister reg);
778 void jmp(const Address& address);
779 void jmp(Label* label);
780 void jmp(NearLabel* label);
781
782 X86_64Assembler* lock();
783 void cmpxchgl(const Address& address, CpuRegister reg);
784 void cmpxchgq(const Address& address, CpuRegister reg);
785
786 void mfence();
787
788 X86_64Assembler* gs();
789
790 void setcc(Condition condition, CpuRegister dst);
791
792 void bswapl(CpuRegister dst);
793 void bswapq(CpuRegister dst);
794
795 void bsfl(CpuRegister dst, CpuRegister src);
796 void bsfl(CpuRegister dst, const Address& src);
797 void bsfq(CpuRegister dst, CpuRegister src);
798 void bsfq(CpuRegister dst, const Address& src);
799
800 void blsi(CpuRegister dst, CpuRegister src); // no addr variant (for now)
801 void blsmsk(CpuRegister dst, CpuRegister src); // no addr variant (for now)
802 void blsr(CpuRegister dst, CpuRegister src); // no addr variant (for now)
803
804 void bsrl(CpuRegister dst, CpuRegister src);
805 void bsrl(CpuRegister dst, const Address& src);
806 void bsrq(CpuRegister dst, CpuRegister src);
807 void bsrq(CpuRegister dst, const Address& src);
808
809 void popcntl(CpuRegister dst, CpuRegister src);
810 void popcntl(CpuRegister dst, const Address& src);
811 void popcntq(CpuRegister dst, CpuRegister src);
812 void popcntq(CpuRegister dst, const Address& src);
813
814 void rorl(CpuRegister reg, const Immediate& imm);
815 void rorl(CpuRegister operand, CpuRegister shifter);
816 void roll(CpuRegister reg, const Immediate& imm);
817 void roll(CpuRegister operand, CpuRegister shifter);
818
819 void rorq(CpuRegister reg, const Immediate& imm);
820 void rorq(CpuRegister operand, CpuRegister shifter);
821 void rolq(CpuRegister reg, const Immediate& imm);
822 void rolq(CpuRegister operand, CpuRegister shifter);
823
824 void repne_scasb();
825 void repne_scasw();
826 void repe_cmpsw();
827 void repe_cmpsl();
828 void repe_cmpsq();
829 void rep_movsw();
830
831 //
832 // Macros for High-level operations.
833 //
834
835 void AddImmediate(CpuRegister reg, const Immediate& imm);
836
837 void LoadDoubleConstant(XmmRegister dst, double value);
838
LockCmpxchgl(const Address & address,CpuRegister reg)839 void LockCmpxchgl(const Address& address, CpuRegister reg) {
840 lock()->cmpxchgl(address, reg);
841 }
842
LockCmpxchgq(const Address & address,CpuRegister reg)843 void LockCmpxchgq(const Address& address, CpuRegister reg) {
844 lock()->cmpxchgq(address, reg);
845 }
846
847 //
848 // Misc. functionality
849 //
PreferredLoopAlignment()850 int PreferredLoopAlignment() { return 16; }
851 void Align(int alignment, int offset);
852 void Bind(Label* label) override;
Jump(Label * label)853 void Jump(Label* label) override {
854 jmp(label);
855 }
856 void Bind(NearLabel* label);
857
858 // Add a double to the constant area, returning the offset into
859 // the constant area where the literal resides.
AddDouble(double v)860 size_t AddDouble(double v) { return constant_area_.AddDouble(v); }
861
862 // Add a float to the constant area, returning the offset into
863 // the constant area where the literal resides.
AddFloat(float v)864 size_t AddFloat(float v) { return constant_area_.AddFloat(v); }
865
866 // Add an int32_t to the constant area, returning the offset into
867 // the constant area where the literal resides.
AddInt32(int32_t v)868 size_t AddInt32(int32_t v) {
869 return constant_area_.AddInt32(v);
870 }
871
872 // Add an int32_t to the end of the constant area, returning the offset into
873 // the constant area where the literal resides.
AppendInt32(int32_t v)874 size_t AppendInt32(int32_t v) {
875 return constant_area_.AppendInt32(v);
876 }
877
878 // Add an int64_t to the constant area, returning the offset into
879 // the constant area where the literal resides.
AddInt64(int64_t v)880 size_t AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
881
882 // Add the contents of the constant area to the assembler buffer.
883 void AddConstantArea();
884
885 // Is the constant area empty? Return true if there are no literals in the constant area.
IsConstantAreaEmpty()886 bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; }
887
888 // Return the current size of the constant area.
ConstantAreaSize()889 size_t ConstantAreaSize() const { return constant_area_.GetSize(); }
890
891 //
892 // Heap poisoning.
893 //
894
895 // Poison a heap reference contained in `reg`.
PoisonHeapReference(CpuRegister reg)896 void PoisonHeapReference(CpuRegister reg) { negl(reg); }
897 // Unpoison a heap reference contained in `reg`.
UnpoisonHeapReference(CpuRegister reg)898 void UnpoisonHeapReference(CpuRegister reg) { negl(reg); }
899 // Poison a heap reference contained in `reg` if heap poisoning is enabled.
MaybePoisonHeapReference(CpuRegister reg)900 void MaybePoisonHeapReference(CpuRegister reg) {
901 if (kPoisonHeapReferences) {
902 PoisonHeapReference(reg);
903 }
904 }
905 // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
MaybeUnpoisonHeapReference(CpuRegister reg)906 void MaybeUnpoisonHeapReference(CpuRegister reg) {
907 if (kPoisonHeapReferences) {
908 UnpoisonHeapReference(reg);
909 }
910 }
911
912 private:
913 void EmitUint8(uint8_t value);
914 void EmitInt32(int32_t value);
915 void EmitInt64(int64_t value);
916 void EmitRegisterOperand(uint8_t rm, uint8_t reg);
917 void EmitXmmRegisterOperand(uint8_t rm, XmmRegister reg);
918 void EmitFixup(AssemblerFixup* fixup);
919 void EmitOperandSizeOverride();
920
921 void EmitOperand(uint8_t rm, const Operand& operand);
922 void EmitImmediate(const Immediate& imm, bool is_16_op = false);
923 void EmitComplex(
924 uint8_t rm, const Operand& operand, const Immediate& immediate, bool is_16_op = false);
925 void EmitLabel(Label* label, int instruction_size);
926 void EmitLabelLink(Label* label);
927 void EmitLabelLink(NearLabel* label);
928
929 void EmitGenericShift(bool wide, int rm, CpuRegister reg, const Immediate& imm);
930 void EmitGenericShift(bool wide, int rm, CpuRegister operand, CpuRegister shifter);
931
932 // If any input is not false, output the necessary rex prefix.
933 void EmitOptionalRex(bool force, bool w, bool r, bool x, bool b);
934
935 // Emit a rex prefix byte if necessary for reg. ie if reg is a register in the range R8 to R15.
936 void EmitOptionalRex32(CpuRegister reg);
937 void EmitOptionalRex32(CpuRegister dst, CpuRegister src);
938 void EmitOptionalRex32(XmmRegister dst, XmmRegister src);
939 void EmitOptionalRex32(CpuRegister dst, XmmRegister src);
940 void EmitOptionalRex32(XmmRegister dst, CpuRegister src);
941 void EmitOptionalRex32(const Operand& operand);
942 void EmitOptionalRex32(CpuRegister dst, const Operand& operand);
943 void EmitOptionalRex32(XmmRegister dst, const Operand& operand);
944
945 // Emit a REX.W prefix plus necessary register bit encodings.
946 void EmitRex64();
947 void EmitRex64(CpuRegister reg);
948 void EmitRex64(const Operand& operand);
949 void EmitRex64(CpuRegister dst, CpuRegister src);
950 void EmitRex64(CpuRegister dst, const Operand& operand);
951 void EmitRex64(XmmRegister dst, const Operand& operand);
952 void EmitRex64(XmmRegister dst, CpuRegister src);
953 void EmitRex64(CpuRegister dst, XmmRegister src);
954
955 // Emit a REX prefix to normalize byte registers plus necessary register bit encodings.
956 void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src);
957 void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand);
958
959 // Emit a 3 byte VEX Prefix
960 uint8_t EmitVexByteZero(bool is_two_byte);
961 uint8_t EmitVexByte1(bool r, bool x, bool b, int mmmmm);
962 uint8_t EmitVexByte2(bool w , int l , X86_64ManagedRegister operand, int pp);
963
964 ConstantArea constant_area_;
965
966 DISALLOW_COPY_AND_ASSIGN(X86_64Assembler);
967 };
968
EmitUint8(uint8_t value)969 inline void X86_64Assembler::EmitUint8(uint8_t value) {
970 buffer_.Emit<uint8_t>(value);
971 }
972
EmitInt32(int32_t value)973 inline void X86_64Assembler::EmitInt32(int32_t value) {
974 buffer_.Emit<int32_t>(value);
975 }
976
EmitInt64(int64_t value)977 inline void X86_64Assembler::EmitInt64(int64_t value) {
978 // Write this 64-bit value as two 32-bit words for alignment reasons
979 // (this is essentially when running on ARM, which does not allow
980 // 64-bit unaligned accesses). We assume little-endianness here.
981 EmitInt32(Low32Bits(value));
982 EmitInt32(High32Bits(value));
983 }
984
EmitRegisterOperand(uint8_t rm,uint8_t reg)985 inline void X86_64Assembler::EmitRegisterOperand(uint8_t rm, uint8_t reg) {
986 CHECK_GE(rm, 0);
987 CHECK_LT(rm, 8);
988 buffer_.Emit<uint8_t>((0xC0 | (reg & 7)) + (rm << 3));
989 }
990
EmitXmmRegisterOperand(uint8_t rm,XmmRegister reg)991 inline void X86_64Assembler::EmitXmmRegisterOperand(uint8_t rm, XmmRegister reg) {
992 EmitRegisterOperand(rm, static_cast<uint8_t>(reg.AsFloatRegister()));
993 }
994
EmitFixup(AssemblerFixup * fixup)995 inline void X86_64Assembler::EmitFixup(AssemblerFixup* fixup) {
996 buffer_.EmitFixup(fixup);
997 }
998
EmitOperandSizeOverride()999 inline void X86_64Assembler::EmitOperandSizeOverride() {
1000 EmitUint8(0x66);
1001 }
1002
1003 } // namespace x86_64
1004 } // namespace art
1005
1006 #endif // ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
1007