1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 // All Rights Reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // - Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer.
10 //
11 // - Redistribution in binary form must reproduce the above copyright
12 // notice, this list of conditions and the following disclaimer in the
13 // documentation and/or other materials provided with the distribution.
14 //
15 // - Neither the name of Sun Microsystems or the names of contributors may
16 // be used to endorse or promote products derived from this software without
17 // specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // The original source code covered by the above license above has been
32 // modified significantly by Google Inc.
33 // Copyright 2012 the V8 project authors. All rights reserved.
34
35 // A lightweight X64 Assembler.
36
37 #ifndef V8_X64_ASSEMBLER_X64_H_
38 #define V8_X64_ASSEMBLER_X64_H_
39
40 #include <deque>
41
42 #include "src/assembler.h"
43 #include "src/x64/sse-instr.h"
44
45 namespace v8 {
46 namespace internal {
47
48 // Utility functions
49
50 #define GENERAL_REGISTERS(V) \
51 V(rax) \
52 V(rcx) \
53 V(rdx) \
54 V(rbx) \
55 V(rsp) \
56 V(rbp) \
57 V(rsi) \
58 V(rdi) \
59 V(r8) \
60 V(r9) \
61 V(r10) \
62 V(r11) \
63 V(r12) \
64 V(r13) \
65 V(r14) \
66 V(r15)
67
68 #define ALLOCATABLE_GENERAL_REGISTERS(V) \
69 V(rax) \
70 V(rbx) \
71 V(rdx) \
72 V(rcx) \
73 V(rsi) \
74 V(rdi) \
75 V(r8) \
76 V(r9) \
77 V(r11) \
78 V(r12) \
79 V(r14) \
80 V(r15)
81
82
83 // CPU Registers.
84 //
85 // 1) We would prefer to use an enum, but enum values are assignment-
86 // compatible with int, which has caused code-generation bugs.
87 //
88 // 2) We would prefer to use a class instead of a struct but we don't like
89 // the register initialization to depend on the particular initialization
90 // order (which appears to be different on OS X, Linux, and Windows for the
91 // installed versions of C++ we tried). Using a struct permits C-style
92 // "initialization". Also, the Register objects cannot be const as this
93 // forces initialization stubs in MSVC, making us dependent on initialization
94 // order.
95 //
96 // 3) By not using an enum, we are possibly preventing the compiler from
97 // doing certain constant folds, which may significantly reduce the
98 // code generated for some assembly instructions (because they boil down
99 // to a few constants). If this is a problem, we could change the code
100 // such that we use an enum in optimized mode, and the struct in debug
101 // mode. This way we get the compile-time error checking in debug mode
102 // and best performance in optimized code.
103 //
104 struct Register {
105 enum Code {
106 #define REGISTER_CODE(R) kCode_##R,
107 GENERAL_REGISTERS(REGISTER_CODE)
108 #undef REGISTER_CODE
109 kAfterLast,
110 kCode_no_reg = -1
111 };
112
113 static const int kNumRegisters = Code::kAfterLast;
114
from_codeRegister115 static Register from_code(int code) {
116 DCHECK(code >= 0);
117 DCHECK(code < kNumRegisters);
118 Register r = {code};
119 return r;
120 }
is_validRegister121 bool is_valid() const { return 0 <= reg_code && reg_code < kNumRegisters; }
isRegister122 bool is(Register reg) const { return reg_code == reg.reg_code; }
codeRegister123 int code() const {
124 DCHECK(is_valid());
125 return reg_code;
126 }
bitRegister127 int bit() const {
128 DCHECK(is_valid());
129 return 1 << reg_code;
130 }
131
is_byte_registerRegister132 bool is_byte_register() const { return reg_code <= 3; }
133 // Return the high bit of the register code as a 0 or 1. Used often
134 // when constructing the REX prefix byte.
high_bitRegister135 int high_bit() const { return reg_code >> 3; }
136 // Return the 3 low bits of the register code. Used when encoding registers
137 // in modR/M, SIB, and opcode bytes.
low_bitsRegister138 int low_bits() const { return reg_code & 0x7; }
139
140 // Unfortunately we can't make this private in a struct when initializing
141 // by assignment.
142 int reg_code;
143 };
144
145
146 #define DECLARE_REGISTER(R) const Register R = {Register::kCode_##R};
147 GENERAL_REGISTERS(DECLARE_REGISTER)
148 #undef DECLARE_REGISTER
149 const Register no_reg = {Register::kCode_no_reg};
150
151
152 #ifdef _WIN64
153 // Windows calling convention
154 const Register arg_reg_1 = {Register::kCode_rcx};
155 const Register arg_reg_2 = {Register::kCode_rdx};
156 const Register arg_reg_3 = {Register::kCode_r8};
157 const Register arg_reg_4 = {Register::kCode_r9};
158 #else
159 // AMD64 calling convention
160 const Register arg_reg_1 = {Register::kCode_rdi};
161 const Register arg_reg_2 = {Register::kCode_rsi};
162 const Register arg_reg_3 = {Register::kCode_rdx};
163 const Register arg_reg_4 = {Register::kCode_rcx};
164 #endif // _WIN64
165
166
167 #define DOUBLE_REGISTERS(V) \
168 V(xmm0) \
169 V(xmm1) \
170 V(xmm2) \
171 V(xmm3) \
172 V(xmm4) \
173 V(xmm5) \
174 V(xmm6) \
175 V(xmm7) \
176 V(xmm8) \
177 V(xmm9) \
178 V(xmm10) \
179 V(xmm11) \
180 V(xmm12) \
181 V(xmm13) \
182 V(xmm14) \
183 V(xmm15)
184
185 #define FLOAT_REGISTERS DOUBLE_REGISTERS
186 #define SIMD128_REGISTERS DOUBLE_REGISTERS
187
188 #define ALLOCATABLE_DOUBLE_REGISTERS(V) \
189 V(xmm0) \
190 V(xmm1) \
191 V(xmm2) \
192 V(xmm3) \
193 V(xmm4) \
194 V(xmm5) \
195 V(xmm6) \
196 V(xmm7) \
197 V(xmm8) \
198 V(xmm9) \
199 V(xmm10) \
200 V(xmm11) \
201 V(xmm12) \
202 V(xmm13) \
203 V(xmm14)
204
205 static const bool kSimpleFPAliasing = true;
206
207 struct XMMRegister {
208 enum Code {
209 #define REGISTER_CODE(R) kCode_##R,
210 DOUBLE_REGISTERS(REGISTER_CODE)
211 #undef REGISTER_CODE
212 kAfterLast,
213 kCode_no_reg = -1
214 };
215
216 static const int kMaxNumRegisters = Code::kAfterLast;
217
from_codeXMMRegister218 static XMMRegister from_code(int code) {
219 XMMRegister result = {code};
220 return result;
221 }
222
is_validXMMRegister223 bool is_valid() const { return 0 <= reg_code && reg_code < kMaxNumRegisters; }
isXMMRegister224 bool is(XMMRegister reg) const { return reg_code == reg.reg_code; }
codeXMMRegister225 int code() const {
226 DCHECK(is_valid());
227 return reg_code;
228 }
229
230 // Return the high bit of the register code as a 0 or 1. Used often
231 // when constructing the REX prefix byte.
high_bitXMMRegister232 int high_bit() const { return reg_code >> 3; }
233 // Return the 3 low bits of the register code. Used when encoding registers
234 // in modR/M, SIB, and opcode bytes.
low_bitsXMMRegister235 int low_bits() const { return reg_code & 0x7; }
236
237 // Unfortunately we can't make this private in a struct when initializing
238 // by assignment.
239 int reg_code;
240 };
241
242 typedef XMMRegister FloatRegister;
243
244 typedef XMMRegister DoubleRegister;
245
246 typedef XMMRegister Simd128Register;
247
248 #define DECLARE_REGISTER(R) \
249 const DoubleRegister R = {DoubleRegister::kCode_##R};
250 DOUBLE_REGISTERS(DECLARE_REGISTER)
251 #undef DECLARE_REGISTER
252 const DoubleRegister no_double_reg = {DoubleRegister::kCode_no_reg};
253
254 enum Condition {
255 // any value < 0 is considered no_condition
256 no_condition = -1,
257
258 overflow = 0,
259 no_overflow = 1,
260 below = 2,
261 above_equal = 3,
262 equal = 4,
263 not_equal = 5,
264 below_equal = 6,
265 above = 7,
266 negative = 8,
267 positive = 9,
268 parity_even = 10,
269 parity_odd = 11,
270 less = 12,
271 greater_equal = 13,
272 less_equal = 14,
273 greater = 15,
274
275 // Fake conditions that are handled by the
276 // opcodes using them.
277 always = 16,
278 never = 17,
279 // aliases
280 carry = below,
281 not_carry = above_equal,
282 zero = equal,
283 not_zero = not_equal,
284 sign = negative,
285 not_sign = positive,
286 last_condition = greater
287 };
288
289
290 // Returns the equivalent of !cc.
291 // Negation of the default no_condition (-1) results in a non-default
292 // no_condition value (-2). As long as tests for no_condition check
293 // for condition < 0, this will work as expected.
NegateCondition(Condition cc)294 inline Condition NegateCondition(Condition cc) {
295 return static_cast<Condition>(cc ^ 1);
296 }
297
298
299 // Commute a condition such that {a cond b == b cond' a}.
CommuteCondition(Condition cc)300 inline Condition CommuteCondition(Condition cc) {
301 switch (cc) {
302 case below:
303 return above;
304 case above:
305 return below;
306 case above_equal:
307 return below_equal;
308 case below_equal:
309 return above_equal;
310 case less:
311 return greater;
312 case greater:
313 return less;
314 case greater_equal:
315 return less_equal;
316 case less_equal:
317 return greater_equal;
318 default:
319 return cc;
320 }
321 }
322
323
324 enum RoundingMode {
325 kRoundToNearest = 0x0,
326 kRoundDown = 0x1,
327 kRoundUp = 0x2,
328 kRoundToZero = 0x3
329 };
330
331
332 // -----------------------------------------------------------------------------
333 // Machine instruction Immediates
334
335 class Immediate BASE_EMBEDDED {
336 public:
Immediate(int32_t value)337 explicit Immediate(int32_t value) : value_(value) {}
Immediate(int32_t value,RelocInfo::Mode rmode)338 explicit Immediate(int32_t value, RelocInfo::Mode rmode)
339 : value_(value), rmode_(rmode) {}
Immediate(Smi * value)340 explicit Immediate(Smi* value) {
341 DCHECK(SmiValuesAre31Bits()); // Only available for 31-bit SMI.
342 value_ = static_cast<int32_t>(reinterpret_cast<intptr_t>(value));
343 }
344
345 private:
346 int32_t value_;
347 RelocInfo::Mode rmode_ = RelocInfo::NONE32;
348
349 friend class Assembler;
350 };
351
352
353 // -----------------------------------------------------------------------------
354 // Machine instruction Operands
355
356 enum ScaleFactor {
357 times_1 = 0,
358 times_2 = 1,
359 times_4 = 2,
360 times_8 = 3,
361 times_int_size = times_4,
362 times_pointer_size = (kPointerSize == 8) ? times_8 : times_4
363 };
364
365
366 class Operand BASE_EMBEDDED {
367 public:
368 // [base + disp/r]
369 Operand(Register base, int32_t disp);
370
371 // [base + index*scale + disp/r]
372 Operand(Register base,
373 Register index,
374 ScaleFactor scale,
375 int32_t disp);
376
377 // [index*scale + disp/r]
378 Operand(Register index,
379 ScaleFactor scale,
380 int32_t disp);
381
382 // Offset from existing memory operand.
383 // Offset is added to existing displacement as 32-bit signed values and
384 // this must not overflow.
385 Operand(const Operand& base, int32_t offset);
386
387 // [rip + disp/r]
388 explicit Operand(Label* label);
389
390 // Checks whether either base or index register is the given register.
391 // Does not check the "reg" part of the Operand.
392 bool AddressUsesRegister(Register reg) const;
393
394 // Queries related to the size of the generated instruction.
395 // Whether the generated instruction will have a REX prefix.
requires_rex()396 bool requires_rex() const { return rex_ != 0; }
397 // Size of the ModR/M, SIB and displacement parts of the generated
398 // instruction.
operand_size()399 int operand_size() const { return len_; }
400
401 private:
402 byte rex_;
403 byte buf_[9];
404 // The number of bytes of buf_ in use.
405 byte len_;
406
407 // Set the ModR/M byte without an encoded 'reg' register. The
408 // register is encoded later as part of the emit_operand operation.
409 // set_modrm can be called before or after set_sib and set_disp*.
410 inline void set_modrm(int mod, Register rm);
411
412 // Set the SIB byte if one is needed. Sets the length to 2 rather than 1.
413 inline void set_sib(ScaleFactor scale, Register index, Register base);
414
415 // Adds operand displacement fields (offsets added to the memory address).
416 // Needs to be called after set_sib, not before it.
417 inline void set_disp8(int disp);
418 inline void set_disp32(int disp);
419 inline void set_disp64(int64_t disp); // for labels.
420
421 friend class Assembler;
422 };
423
424 #define ASSEMBLER_INSTRUCTION_LIST(V) \
425 V(add) \
426 V(and) \
427 V(cmp) \
428 V(cmpxchg) \
429 V(dec) \
430 V(idiv) \
431 V(div) \
432 V(imul) \
433 V(inc) \
434 V(lea) \
435 V(mov) \
436 V(movzxb) \
437 V(movzxw) \
438 V(neg) \
439 V(not) \
440 V(or) \
441 V(repmovs) \
442 V(sbb) \
443 V(sub) \
444 V(test) \
445 V(xchg) \
446 V(xor)
447
448 // Shift instructions on operands/registers with kPointerSize, kInt32Size and
449 // kInt64Size.
450 #define SHIFT_INSTRUCTION_LIST(V) \
451 V(rol, 0x0) \
452 V(ror, 0x1) \
453 V(rcl, 0x2) \
454 V(rcr, 0x3) \
455 V(shl, 0x4) \
456 V(shr, 0x5) \
457 V(sar, 0x7) \
458
459
460 class Assembler : public AssemblerBase {
461 private:
462 // We check before assembling an instruction that there is sufficient
463 // space to write an instruction and its relocation information.
464 // The relocation writer's position must be kGap bytes above the end of
465 // the generated instructions. This leaves enough space for the
466 // longest possible x64 instruction, 15 bytes, and the longest possible
467 // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
468 // (There is a 15 byte limit on x64 instruction length that rules out some
469 // otherwise valid instructions.)
470 // This allows for a single, fast space check per instruction.
471 static const int kGap = 32;
472
473 public:
474 // Create an assembler. Instructions and relocation information are emitted
475 // into a buffer, with the instructions starting from the beginning and the
476 // relocation information starting from the end of the buffer. See CodeDesc
477 // for a detailed comment on the layout (globals.h).
478 //
479 // If the provided buffer is NULL, the assembler allocates and grows its own
480 // buffer, and buffer_size determines the initial buffer size. The buffer is
481 // owned by the assembler and deallocated upon destruction of the assembler.
482 //
483 // If the provided buffer is not NULL, the assembler uses the provided buffer
484 // for code generation and assumes its size to be buffer_size. If the buffer
485 // is too small, a fatal error occurs. No deallocation of the buffer is done
486 // upon destruction of the assembler.
487 Assembler(Isolate* isolate, void* buffer, int buffer_size);
~Assembler()488 virtual ~Assembler() { }
489
490 // GetCode emits any pending (non-emitted) code and fills the descriptor
491 // desc. GetCode() is idempotent; it returns the same result if no other
492 // Assembler functions are invoked in between GetCode() calls.
493 void GetCode(CodeDesc* desc);
494
495 // Read/Modify the code target in the relative branch/call instruction at pc.
496 // On the x64 architecture, we use relative jumps with a 32-bit displacement
497 // to jump to other Code objects in the Code space in the heap.
498 // Jumps to C functions are done indirectly through a 64-bit register holding
499 // the absolute address of the target.
500 // These functions convert between absolute Addresses of Code objects and
501 // the relative displacements stored in the code.
502 static inline Address target_address_at(Address pc, Address constant_pool);
503 static inline void set_target_address_at(
504 Isolate* isolate, Address pc, Address constant_pool, Address target,
505 ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
target_address_at(Address pc,Code * code)506 static inline Address target_address_at(Address pc, Code* code) {
507 Address constant_pool = code ? code->constant_pool() : NULL;
508 return target_address_at(pc, constant_pool);
509 }
510 static inline void set_target_address_at(
511 Isolate* isolate, Address pc, Code* code, Address target,
512 ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED) {
513 Address constant_pool = code ? code->constant_pool() : NULL;
514 set_target_address_at(isolate, pc, constant_pool, target,
515 icache_flush_mode);
516 }
517
518 // Return the code target address at a call site from the return address
519 // of that call in the instruction stream.
520 static inline Address target_address_from_return_address(Address pc);
521
522 // This sets the branch destination (which is in the instruction on x64).
523 // This is for calls and branches within generated code.
deserialization_set_special_target_at(Isolate * isolate,Address instruction_payload,Code * code,Address target)524 inline static void deserialization_set_special_target_at(
525 Isolate* isolate, Address instruction_payload, Code* code,
526 Address target) {
527 set_target_address_at(isolate, instruction_payload, code, target);
528 }
529
530 // This sets the internal reference at the pc.
531 inline static void deserialization_set_target_internal_reference_at(
532 Isolate* isolate, Address pc, Address target,
533 RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
534
RelocInfoNone()535 static inline RelocInfo::Mode RelocInfoNone() {
536 if (kPointerSize == kInt64Size) {
537 return RelocInfo::NONE64;
538 } else {
539 DCHECK(kPointerSize == kInt32Size);
540 return RelocInfo::NONE32;
541 }
542 }
543
544 inline Handle<Object> code_target_object_handle_at(Address pc);
545 inline Address runtime_entry_at(Address pc);
546 // Number of bytes taken up by the branch target in the code.
547 static const int kSpecialTargetSize = 4; // Use 32-bit displacement.
548 // Distance between the address of the code target in the call instruction
549 // and the return address pushed on the stack.
550 static const int kCallTargetAddressOffset = 4; // Use 32-bit displacement.
551 // The length of call(kScratchRegister).
552 static const int kCallScratchRegisterInstructionLength = 3;
553 // The length of call(Immediate32).
554 static const int kShortCallInstructionLength = 5;
555 // The length of movq(kScratchRegister, address).
556 static const int kMoveAddressIntoScratchRegisterInstructionLength =
557 2 + kPointerSize;
558 // The length of movq(kScratchRegister, address) and call(kScratchRegister).
559 static const int kCallSequenceLength =
560 kMoveAddressIntoScratchRegisterInstructionLength +
561 kCallScratchRegisterInstructionLength;
562
563 // The debug break slot must be able to contain an indirect call sequence.
564 static const int kDebugBreakSlotLength = kCallSequenceLength;
565 // Distance between start of patched debug break slot and the emitted address
566 // to jump to.
567 static const int kPatchDebugBreakSlotAddressOffset =
568 kMoveAddressIntoScratchRegisterInstructionLength - kPointerSize;
569
570 // One byte opcode for test eax,0xXXXXXXXX.
571 static const byte kTestEaxByte = 0xA9;
572 // One byte opcode for test al, 0xXX.
573 static const byte kTestAlByte = 0xA8;
574 // One byte opcode for nop.
575 static const byte kNopByte = 0x90;
576
577 // One byte prefix for a short conditional jump.
578 static const byte kJccShortPrefix = 0x70;
579 static const byte kJncShortOpcode = kJccShortPrefix | not_carry;
580 static const byte kJcShortOpcode = kJccShortPrefix | carry;
581 static const byte kJnzShortOpcode = kJccShortPrefix | not_zero;
582 static const byte kJzShortOpcode = kJccShortPrefix | zero;
583
584 // VEX prefix encodings.
585 enum SIMDPrefix { kNone = 0x0, k66 = 0x1, kF3 = 0x2, kF2 = 0x3 };
586 enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128, kLZ = kL128 };
587 enum VexW { kW0 = 0x0, kW1 = 0x80, kWIG = kW0 };
588 enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x3 };
589
590 // ---------------------------------------------------------------------------
591 // Code generation
592 //
593 // Function names correspond one-to-one to x64 instruction mnemonics.
594 // Unless specified otherwise, instructions operate on 64-bit operands.
595 //
596 // If we need versions of an assembly instruction that operate on different
597 // width arguments, we add a single-letter suffix specifying the width.
598 // This is done for the following instructions: mov, cmp, inc, dec,
599 // add, sub, and test.
600 // There are no versions of these instructions without the suffix.
601 // - Instructions on 8-bit (byte) operands/registers have a trailing 'b'.
602 // - Instructions on 16-bit (word) operands/registers have a trailing 'w'.
603 // - Instructions on 32-bit (doubleword) operands/registers use 'l'.
604 // - Instructions on 64-bit (quadword) operands/registers use 'q'.
605 // - Instructions on operands/registers with pointer size use 'p'.
606
607 STATIC_ASSERT(kPointerSize == kInt64Size || kPointerSize == kInt32Size);
608
609 #define DECLARE_INSTRUCTION(instruction) \
610 template<class P1> \
611 void instruction##p(P1 p1) { \
612 emit_##instruction(p1, kPointerSize); \
613 } \
614 \
615 template<class P1> \
616 void instruction##l(P1 p1) { \
617 emit_##instruction(p1, kInt32Size); \
618 } \
619 \
620 template<class P1> \
621 void instruction##q(P1 p1) { \
622 emit_##instruction(p1, kInt64Size); \
623 } \
624 \
625 template<class P1, class P2> \
626 void instruction##p(P1 p1, P2 p2) { \
627 emit_##instruction(p1, p2, kPointerSize); \
628 } \
629 \
630 template<class P1, class P2> \
631 void instruction##l(P1 p1, P2 p2) { \
632 emit_##instruction(p1, p2, kInt32Size); \
633 } \
634 \
635 template<class P1, class P2> \
636 void instruction##q(P1 p1, P2 p2) { \
637 emit_##instruction(p1, p2, kInt64Size); \
638 } \
639 \
640 template<class P1, class P2, class P3> \
641 void instruction##p(P1 p1, P2 p2, P3 p3) { \
642 emit_##instruction(p1, p2, p3, kPointerSize); \
643 } \
644 \
645 template<class P1, class P2, class P3> \
646 void instruction##l(P1 p1, P2 p2, P3 p3) { \
647 emit_##instruction(p1, p2, p3, kInt32Size); \
648 } \
649 \
650 template<class P1, class P2, class P3> \
651 void instruction##q(P1 p1, P2 p2, P3 p3) { \
652 emit_##instruction(p1, p2, p3, kInt64Size); \
653 }
654 ASSEMBLER_INSTRUCTION_LIST(DECLARE_INSTRUCTION)
655 #undef DECLARE_INSTRUCTION
656
657 // Insert the smallest number of nop instructions
658 // possible to align the pc offset to a multiple
659 // of m, where m must be a power of 2.
660 void Align(int m);
661 // Insert the smallest number of zero bytes possible to align the pc offset
662 // to a mulitple of m. m must be a power of 2 (>= 2).
663 void DataAlign(int m);
664 void Nop(int bytes = 1);
665 // Aligns code to something that's optimal for a jump target for the platform.
666 void CodeTargetAlign();
667
668 // Stack
669 void pushfq();
670 void popfq();
671
672 void pushq(Immediate value);
673 // Push a 32 bit integer, and guarantee that it is actually pushed as a
674 // 32 bit value, the normal push will optimize the 8 bit case.
675 void pushq_imm32(int32_t imm32);
676 void pushq(Register src);
677 void pushq(const Operand& src);
678
679 void popq(Register dst);
680 void popq(const Operand& dst);
681
682 void enter(Immediate size);
683 void leave();
684
685 // Moves
686 void movb(Register dst, const Operand& src);
687 void movb(Register dst, Immediate imm);
688 void movb(const Operand& dst, Register src);
689 void movb(const Operand& dst, Immediate imm);
690
691 // Move the low 16 bits of a 64-bit register value to a 16-bit
692 // memory location.
693 void movw(Register dst, const Operand& src);
694 void movw(const Operand& dst, Register src);
695 void movw(const Operand& dst, Immediate imm);
696
697 // Move the offset of the label location relative to the current
698 // position (after the move) to the destination.
699 void movl(const Operand& dst, Label* src);
700
701 // Loads a pointer into a register with a relocation mode.
702 void movp(Register dst, void* ptr, RelocInfo::Mode rmode);
703
704 // Loads a 64-bit immediate into a register.
705 void movq(Register dst, int64_t value,
706 RelocInfo::Mode rmode = RelocInfo::NONE64);
707 void movq(Register dst, uint64_t value,
708 RelocInfo::Mode rmode = RelocInfo::NONE64);
709
710 void movsxbl(Register dst, Register src);
711 void movsxbl(Register dst, const Operand& src);
712 void movsxbq(Register dst, Register src);
713 void movsxbq(Register dst, const Operand& src);
714 void movsxwl(Register dst, Register src);
715 void movsxwl(Register dst, const Operand& src);
716 void movsxwq(Register dst, Register src);
717 void movsxwq(Register dst, const Operand& src);
718 void movsxlq(Register dst, Register src);
719 void movsxlq(Register dst, const Operand& src);
720
721 // Repeated moves.
722
723 void repmovsb();
724 void repmovsw();
repmovsp()725 void repmovsp() { emit_repmovs(kPointerSize); }
repmovsl()726 void repmovsl() { emit_repmovs(kInt32Size); }
repmovsq()727 void repmovsq() { emit_repmovs(kInt64Size); }
728
729 // Instruction to load from an immediate 64-bit pointer into RAX.
730 void load_rax(void* ptr, RelocInfo::Mode rmode);
731 void load_rax(ExternalReference ext);
732
733 // Conditional moves.
734 void cmovq(Condition cc, Register dst, Register src);
735 void cmovq(Condition cc, Register dst, const Operand& src);
736 void cmovl(Condition cc, Register dst, Register src);
737 void cmovl(Condition cc, Register dst, const Operand& src);
738
cmpb(Register dst,Immediate src)739 void cmpb(Register dst, Immediate src) {
740 immediate_arithmetic_op_8(0x7, dst, src);
741 }
742
743 void cmpb_al(Immediate src);
744
cmpb(Register dst,Register src)745 void cmpb(Register dst, Register src) {
746 arithmetic_op_8(0x3A, dst, src);
747 }
748
cmpb(Register dst,const Operand & src)749 void cmpb(Register dst, const Operand& src) {
750 arithmetic_op_8(0x3A, dst, src);
751 }
752
cmpb(const Operand & dst,Register src)753 void cmpb(const Operand& dst, Register src) {
754 arithmetic_op_8(0x38, src, dst);
755 }
756
cmpb(const Operand & dst,Immediate src)757 void cmpb(const Operand& dst, Immediate src) {
758 immediate_arithmetic_op_8(0x7, dst, src);
759 }
760
cmpw(const Operand & dst,Immediate src)761 void cmpw(const Operand& dst, Immediate src) {
762 immediate_arithmetic_op_16(0x7, dst, src);
763 }
764
cmpw(Register dst,Immediate src)765 void cmpw(Register dst, Immediate src) {
766 immediate_arithmetic_op_16(0x7, dst, src);
767 }
768
cmpw(Register dst,const Operand & src)769 void cmpw(Register dst, const Operand& src) {
770 arithmetic_op_16(0x3B, dst, src);
771 }
772
cmpw(Register dst,Register src)773 void cmpw(Register dst, Register src) {
774 arithmetic_op_16(0x3B, dst, src);
775 }
776
cmpw(const Operand & dst,Register src)777 void cmpw(const Operand& dst, Register src) {
778 arithmetic_op_16(0x39, src, dst);
779 }
780
testb(Register reg,const Operand & op)781 void testb(Register reg, const Operand& op) { testb(op, reg); }
782
testw(Register reg,const Operand & op)783 void testw(Register reg, const Operand& op) { testw(op, reg); }
784
andb(Register dst,Immediate src)785 void andb(Register dst, Immediate src) {
786 immediate_arithmetic_op_8(0x4, dst, src);
787 }
788
789 void decb(Register dst);
790 void decb(const Operand& dst);
791
792 // Lock prefix.
793 void lock();
794
795 void xchgb(Register reg, const Operand& op);
796 void xchgw(Register reg, const Operand& op);
797
798 void cmpxchgb(const Operand& dst, Register src);
799 void cmpxchgw(const Operand& dst, Register src);
800
801 // Sign-extends rax into rdx:rax.
802 void cqo();
803 // Sign-extends eax into edx:eax.
804 void cdq();
805
806 // Multiply eax by src, put the result in edx:eax.
807 void mull(Register src);
808 void mull(const Operand& src);
809 // Multiply rax by src, put the result in rdx:rax.
810 void mulq(Register src);
811
812 #define DECLARE_SHIFT_INSTRUCTION(instruction, subcode) \
813 void instruction##p(Register dst, Immediate imm8) { \
814 shift(dst, imm8, subcode, kPointerSize); \
815 } \
816 \
817 void instruction##l(Register dst, Immediate imm8) { \
818 shift(dst, imm8, subcode, kInt32Size); \
819 } \
820 \
821 void instruction##q(Register dst, Immediate imm8) { \
822 shift(dst, imm8, subcode, kInt64Size); \
823 } \
824 \
825 void instruction##p(Operand dst, Immediate imm8) { \
826 shift(dst, imm8, subcode, kPointerSize); \
827 } \
828 \
829 void instruction##l(Operand dst, Immediate imm8) { \
830 shift(dst, imm8, subcode, kInt32Size); \
831 } \
832 \
833 void instruction##q(Operand dst, Immediate imm8) { \
834 shift(dst, imm8, subcode, kInt64Size); \
835 } \
836 \
837 void instruction##p_cl(Register dst) { shift(dst, subcode, kPointerSize); } \
838 \
839 void instruction##l_cl(Register dst) { shift(dst, subcode, kInt32Size); } \
840 \
841 void instruction##q_cl(Register dst) { shift(dst, subcode, kInt64Size); } \
842 \
843 void instruction##p_cl(Operand dst) { shift(dst, subcode, kPointerSize); } \
844 \
845 void instruction##l_cl(Operand dst) { shift(dst, subcode, kInt32Size); } \
846 \
847 void instruction##q_cl(Operand dst) { shift(dst, subcode, kInt64Size); }
848 SHIFT_INSTRUCTION_LIST(DECLARE_SHIFT_INSTRUCTION)
849 #undef DECLARE_SHIFT_INSTRUCTION
850
851 // Shifts dst:src left by cl bits, affecting only dst.
852 void shld(Register dst, Register src);
853
854 // Shifts src:dst right by cl bits, affecting only dst.
855 void shrd(Register dst, Register src);
856
857 void store_rax(void* dst, RelocInfo::Mode mode);
858 void store_rax(ExternalReference ref);
859
subb(Register dst,Immediate src)860 void subb(Register dst, Immediate src) {
861 immediate_arithmetic_op_8(0x5, dst, src);
862 }
863
864 void testb(Register dst, Register src);
865 void testb(Register reg, Immediate mask);
866 void testb(const Operand& op, Immediate mask);
867 void testb(const Operand& op, Register reg);
868
869 void testw(Register dst, Register src);
870 void testw(Register reg, Immediate mask);
871 void testw(const Operand& op, Immediate mask);
872 void testw(const Operand& op, Register reg);
873
874 // Bit operations.
875 void bt(const Operand& dst, Register src);
876 void bts(const Operand& dst, Register src);
877 void bsrq(Register dst, Register src);
878 void bsrq(Register dst, const Operand& src);
879 void bsrl(Register dst, Register src);
880 void bsrl(Register dst, const Operand& src);
881 void bsfq(Register dst, Register src);
882 void bsfq(Register dst, const Operand& src);
883 void bsfl(Register dst, Register src);
884 void bsfl(Register dst, const Operand& src);
885
886 // Miscellaneous
887 void clc();
888 void cld();
889 void cpuid();
890 void hlt();
891 void int3();
892 void nop();
893 void ret(int imm16);
894 void ud2();
895 void setcc(Condition cc, Register reg);
896
897 // Label operations & relative jumps (PPUM Appendix D)
898 //
899 // Takes a branch opcode (cc) and a label (L) and generates
900 // either a backward branch or a forward branch and links it
901 // to the label fixup chain. Usage:
902 //
903 // Label L; // unbound label
904 // j(cc, &L); // forward branch to unbound label
905 // bind(&L); // bind label to the current pc
906 // j(cc, &L); // backward branch to bound label
907 // bind(&L); // illegal: a label may be bound only once
908 //
909 // Note: The same Label can be used for forward and backward branches
910 // but it may be bound only once.
911
912 void bind(Label* L); // binds an unbound label L to the current code position
913
914 // Calls
915 // Call near relative 32-bit displacement, relative to next instruction.
916 void call(Label* L);
917 void call(Address entry, RelocInfo::Mode rmode);
918 void call(Handle<Code> target,
919 RelocInfo::Mode rmode = RelocInfo::CODE_TARGET,
920 TypeFeedbackId ast_id = TypeFeedbackId::None());
921
922 // Calls directly to the given address using a relative offset.
923 // Should only ever be used in Code objects for calls within the
924 // same Code object. Should not be used when generating new code (use labels),
925 // but only when patching existing code.
926 void call(Address target);
927
928 // Call near absolute indirect, address in register
929 void call(Register adr);
930
931 // Jumps
932 // Jump short or near relative.
933 // Use a 32-bit signed displacement.
934 // Unconditional jump to L
935 void jmp(Label* L, Label::Distance distance = Label::kFar);
936 void jmp(Address entry, RelocInfo::Mode rmode);
937 void jmp(Handle<Code> target, RelocInfo::Mode rmode);
938
939 // Jump near absolute indirect (r64)
940 void jmp(Register adr);
941 void jmp(const Operand& src);
942
943 // Conditional jumps
944 void j(Condition cc,
945 Label* L,
946 Label::Distance distance = Label::kFar);
947 void j(Condition cc, Address entry, RelocInfo::Mode rmode);
948 void j(Condition cc, Handle<Code> target, RelocInfo::Mode rmode);
949
950 // Floating-point operations
951 void fld(int i);
952
953 void fld1();
954 void fldz();
955 void fldpi();
956 void fldln2();
957
958 void fld_s(const Operand& adr);
959 void fld_d(const Operand& adr);
960
961 void fstp_s(const Operand& adr);
962 void fstp_d(const Operand& adr);
963 void fstp(int index);
964
965 void fild_s(const Operand& adr);
966 void fild_d(const Operand& adr);
967
968 void fist_s(const Operand& adr);
969
970 void fistp_s(const Operand& adr);
971 void fistp_d(const Operand& adr);
972
973 void fisttp_s(const Operand& adr);
974 void fisttp_d(const Operand& adr);
975
976 void fabs();
977 void fchs();
978
979 void fadd(int i);
980 void fsub(int i);
981 void fmul(int i);
982 void fdiv(int i);
983
984 void fisub_s(const Operand& adr);
985
986 void faddp(int i = 1);
987 void fsubp(int i = 1);
988 void fsubrp(int i = 1);
989 void fmulp(int i = 1);
990 void fdivp(int i = 1);
991 void fprem();
992 void fprem1();
993
994 void fxch(int i = 1);
995 void fincstp();
996 void ffree(int i = 0);
997
998 void ftst();
999 void fucomp(int i);
1000 void fucompp();
1001 void fucomi(int i);
1002 void fucomip();
1003
1004 void fcompp();
1005 void fnstsw_ax();
1006 void fwait();
1007 void fnclex();
1008
1009 void fsin();
1010 void fcos();
1011 void fptan();
1012 void fyl2x();
1013 void f2xm1();
1014 void fscale();
1015 void fninit();
1016
1017 void frndint();
1018
1019 void sahf();
1020
1021 // SSE instructions
1022 void addss(XMMRegister dst, XMMRegister src);
1023 void addss(XMMRegister dst, const Operand& src);
1024 void subss(XMMRegister dst, XMMRegister src);
1025 void subss(XMMRegister dst, const Operand& src);
1026 void mulss(XMMRegister dst, XMMRegister src);
1027 void mulss(XMMRegister dst, const Operand& src);
1028 void divss(XMMRegister dst, XMMRegister src);
1029 void divss(XMMRegister dst, const Operand& src);
1030
1031 void maxss(XMMRegister dst, XMMRegister src);
1032 void maxss(XMMRegister dst, const Operand& src);
1033 void minss(XMMRegister dst, XMMRegister src);
1034 void minss(XMMRegister dst, const Operand& src);
1035
1036 void sqrtss(XMMRegister dst, XMMRegister src);
1037 void sqrtss(XMMRegister dst, const Operand& src);
1038
1039 void ucomiss(XMMRegister dst, XMMRegister src);
1040 void ucomiss(XMMRegister dst, const Operand& src);
1041 void movaps(XMMRegister dst, XMMRegister src);
1042
1043 // Don't use this unless it's important to keep the
1044 // top half of the destination register unchanged.
1045 // Use movaps when moving float values and movd for integer
1046 // values in xmm registers.
1047 void movss(XMMRegister dst, XMMRegister src);
1048
1049 void movss(XMMRegister dst, const Operand& src);
1050 void movss(const Operand& dst, XMMRegister src);
1051 void shufps(XMMRegister dst, XMMRegister src, byte imm8);
1052
1053 void cvttss2si(Register dst, const Operand& src);
1054 void cvttss2si(Register dst, XMMRegister src);
1055 void cvtlsi2ss(XMMRegister dst, const Operand& src);
1056 void cvtlsi2ss(XMMRegister dst, Register src);
1057
1058 void andps(XMMRegister dst, XMMRegister src);
1059 void andps(XMMRegister dst, const Operand& src);
1060 void orps(XMMRegister dst, XMMRegister src);
1061 void orps(XMMRegister dst, const Operand& src);
1062 void xorps(XMMRegister dst, XMMRegister src);
1063 void xorps(XMMRegister dst, const Operand& src);
1064
1065 void addps(XMMRegister dst, XMMRegister src);
1066 void addps(XMMRegister dst, const Operand& src);
1067 void subps(XMMRegister dst, XMMRegister src);
1068 void subps(XMMRegister dst, const Operand& src);
1069 void mulps(XMMRegister dst, XMMRegister src);
1070 void mulps(XMMRegister dst, const Operand& src);
1071 void divps(XMMRegister dst, XMMRegister src);
1072 void divps(XMMRegister dst, const Operand& src);
1073
1074 void movmskps(Register dst, XMMRegister src);
1075
1076 void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1077 SIMDPrefix pp, LeadingOpcode m, VexW w);
1078 void vinstr(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2,
1079 SIMDPrefix pp, LeadingOpcode m, VexW w);
1080
1081 // SSE2 instructions
1082 void sse2_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape,
1083 byte opcode);
1084 void sse2_instr(XMMRegister dst, const Operand& src, byte prefix, byte escape,
1085 byte opcode);
1086 #define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \
1087 void instruction(XMMRegister dst, XMMRegister src) { \
1088 sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
1089 } \
1090 void instruction(XMMRegister dst, const Operand& src) { \
1091 sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
1092 }
1093
1094 SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
1095 #undef DECLARE_SSE2_INSTRUCTION
1096
1097 #define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
1098 void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1099 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
1100 } \
1101 void v##instruction(XMMRegister dst, XMMRegister src1, \
1102 const Operand& src2) { \
1103 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
1104 }
1105
1106 SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION)
1107 #undef DECLARE_SSE2_AVX_INSTRUCTION
1108
1109 // SSE3
1110 void lddqu(XMMRegister dst, const Operand& src);
1111
1112 // SSSE3
1113 void ssse3_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
1114 byte escape2, byte opcode);
1115 void ssse3_instr(XMMRegister dst, const Operand& src, byte prefix,
1116 byte escape1, byte escape2, byte opcode);
1117
1118 #define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2, \
1119 opcode) \
1120 void instruction(XMMRegister dst, XMMRegister src) { \
1121 ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1122 } \
1123 void instruction(XMMRegister dst, const Operand& src) { \
1124 ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1125 }
1126
1127 SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
1128 #undef DECLARE_SSSE3_INSTRUCTION
1129
1130 // SSE4
1131 void sse4_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
1132 byte escape2, byte opcode);
1133 void sse4_instr(XMMRegister dst, const Operand& src, byte prefix,
1134 byte escape1, byte escape2, byte opcode);
1135 #define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2, \
1136 opcode) \
1137 void instruction(XMMRegister dst, XMMRegister src) { \
1138 sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1139 } \
1140 void instruction(XMMRegister dst, const Operand& src) { \
1141 sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1142 }
1143
1144 SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
1145 #undef DECLARE_SSE4_INSTRUCTION
1146
1147 #define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \
1148 opcode) \
1149 void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1150 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
1151 } \
1152 void v##instruction(XMMRegister dst, XMMRegister src1, \
1153 const Operand& src2) { \
1154 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
1155 }
1156
1157 SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1158 SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1159 #undef DECLARE_SSE34_AVX_INSTRUCTION
1160
1161 void movd(XMMRegister dst, Register src);
1162 void movd(XMMRegister dst, const Operand& src);
1163 void movd(Register dst, XMMRegister src);
1164 void movq(XMMRegister dst, Register src);
1165 void movq(Register dst, XMMRegister src);
1166 void movq(XMMRegister dst, XMMRegister src);
1167
1168 // Don't use this unless it's important to keep the
1169 // top half of the destination register unchanged.
1170 // Use movapd when moving double values and movq for integer
1171 // values in xmm registers.
1172 void movsd(XMMRegister dst, XMMRegister src);
1173
1174 void movsd(const Operand& dst, XMMRegister src);
1175 void movsd(XMMRegister dst, const Operand& src);
1176
1177 void movdqa(const Operand& dst, XMMRegister src);
1178 void movdqa(XMMRegister dst, const Operand& src);
1179
1180 void movdqu(const Operand& dst, XMMRegister src);
1181 void movdqu(XMMRegister dst, const Operand& src);
1182
1183 void movapd(XMMRegister dst, XMMRegister src);
1184 void movupd(XMMRegister dst, const Operand& src);
1185 void movupd(const Operand& dst, XMMRegister src);
1186
1187 void psllq(XMMRegister reg, byte imm8);
1188 void psrlq(XMMRegister reg, byte imm8);
1189 void psllw(XMMRegister reg, byte imm8);
1190 void pslld(XMMRegister reg, byte imm8);
1191 void psrlw(XMMRegister reg, byte imm8);
1192 void psrld(XMMRegister reg, byte imm8);
1193 void psraw(XMMRegister reg, byte imm8);
1194 void psrad(XMMRegister reg, byte imm8);
1195
1196 void cvttsd2si(Register dst, const Operand& src);
1197 void cvttsd2si(Register dst, XMMRegister src);
1198 void cvttss2siq(Register dst, XMMRegister src);
1199 void cvttss2siq(Register dst, const Operand& src);
1200 void cvttsd2siq(Register dst, XMMRegister src);
1201 void cvttsd2siq(Register dst, const Operand& src);
1202
1203 void cvtlsi2sd(XMMRegister dst, const Operand& src);
1204 void cvtlsi2sd(XMMRegister dst, Register src);
1205
1206 void cvtqsi2ss(XMMRegister dst, const Operand& src);
1207 void cvtqsi2ss(XMMRegister dst, Register src);
1208
1209 void cvtqsi2sd(XMMRegister dst, const Operand& src);
1210 void cvtqsi2sd(XMMRegister dst, Register src);
1211
1212
1213 void cvtss2sd(XMMRegister dst, XMMRegister src);
1214 void cvtss2sd(XMMRegister dst, const Operand& src);
1215 void cvtsd2ss(XMMRegister dst, XMMRegister src);
1216 void cvtsd2ss(XMMRegister dst, const Operand& src);
1217
1218 void cvtsd2si(Register dst, XMMRegister src);
1219 void cvtsd2siq(Register dst, XMMRegister src);
1220
1221 void addsd(XMMRegister dst, XMMRegister src);
1222 void addsd(XMMRegister dst, const Operand& src);
1223 void subsd(XMMRegister dst, XMMRegister src);
1224 void subsd(XMMRegister dst, const Operand& src);
1225 void mulsd(XMMRegister dst, XMMRegister src);
1226 void mulsd(XMMRegister dst, const Operand& src);
1227 void divsd(XMMRegister dst, XMMRegister src);
1228 void divsd(XMMRegister dst, const Operand& src);
1229
1230 void maxsd(XMMRegister dst, XMMRegister src);
1231 void maxsd(XMMRegister dst, const Operand& src);
1232 void minsd(XMMRegister dst, XMMRegister src);
1233 void minsd(XMMRegister dst, const Operand& src);
1234
1235 void andpd(XMMRegister dst, XMMRegister src);
1236 void andpd(XMMRegister dst, const Operand& src);
1237 void orpd(XMMRegister dst, XMMRegister src);
1238 void orpd(XMMRegister dst, const Operand& src);
1239 void xorpd(XMMRegister dst, XMMRegister src);
1240 void xorpd(XMMRegister dst, const Operand& src);
1241 void sqrtsd(XMMRegister dst, XMMRegister src);
1242 void sqrtsd(XMMRegister dst, const Operand& src);
1243
1244 void ucomisd(XMMRegister dst, XMMRegister src);
1245 void ucomisd(XMMRegister dst, const Operand& src);
1246 void cmpltsd(XMMRegister dst, XMMRegister src);
1247
1248 void movmskpd(Register dst, XMMRegister src);
1249
1250 void punpckldq(XMMRegister dst, XMMRegister src);
1251 void punpckldq(XMMRegister dst, const Operand& src);
1252 void punpckhdq(XMMRegister dst, XMMRegister src);
1253
1254 // SSE 4.1 instruction
1255 void insertps(XMMRegister dst, XMMRegister src, byte imm8);
1256 void extractps(Register dst, XMMRegister src, byte imm8);
1257 void pextrb(Register dst, XMMRegister src, int8_t imm8);
1258 void pextrb(const Operand& dst, XMMRegister src, int8_t imm8);
1259 void pextrw(Register dst, XMMRegister src, int8_t imm8);
1260 void pextrw(const Operand& dst, XMMRegister src, int8_t imm8);
1261 void pextrd(Register dst, XMMRegister src, int8_t imm8);
1262 void pextrd(const Operand& dst, XMMRegister src, int8_t imm8);
1263 void pinsrb(XMMRegister dst, Register src, int8_t imm8);
1264 void pinsrb(XMMRegister dst, const Operand& src, int8_t imm8);
1265 void pinsrw(XMMRegister dst, Register src, int8_t imm8);
1266 void pinsrw(XMMRegister dst, const Operand& src, int8_t imm8);
1267 void pinsrd(XMMRegister dst, Register src, int8_t imm8);
1268 void pinsrd(XMMRegister dst, const Operand& src, int8_t imm8);
1269
1270 void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
1271 void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1272
1273 void cmpps(XMMRegister dst, XMMRegister src, int8_t cmp);
1274 void cmpps(XMMRegister dst, const Operand& src, int8_t cmp);
1275 void cmppd(XMMRegister dst, XMMRegister src, int8_t cmp);
1276 void cmppd(XMMRegister dst, const Operand& src, int8_t cmp);
1277
1278 #define SSE_CMP_P(instr, imm8) \
1279 void instr##ps(XMMRegister dst, XMMRegister src) { cmpps(dst, src, imm8); } \
1280 void instr##ps(XMMRegister dst, const Operand& src) { \
1281 cmpps(dst, src, imm8); \
1282 } \
1283 void instr##pd(XMMRegister dst, XMMRegister src) { cmppd(dst, src, imm8); } \
1284 void instr##pd(XMMRegister dst, const Operand& src) { cmppd(dst, src, imm8); }
1285
1286 SSE_CMP_P(cmpeq, 0x0);
1287 SSE_CMP_P(cmplt, 0x1);
1288 SSE_CMP_P(cmple, 0x2);
1289 SSE_CMP_P(cmpneq, 0x4);
1290 SSE_CMP_P(cmpnlt, 0x5);
1291 SSE_CMP_P(cmpnle, 0x6);
1292
1293 #undef SSE_CMP_P
1294
1295 void minps(XMMRegister dst, XMMRegister src);
1296 void minps(XMMRegister dst, const Operand& src);
1297 void maxps(XMMRegister dst, XMMRegister src);
1298 void maxps(XMMRegister dst, const Operand& src);
1299 void rcpps(XMMRegister dst, XMMRegister src);
1300 void rcpps(XMMRegister dst, const Operand& src);
1301 void rsqrtps(XMMRegister dst, XMMRegister src);
1302 void rsqrtps(XMMRegister dst, const Operand& src);
1303 void sqrtps(XMMRegister dst, XMMRegister src);
1304 void sqrtps(XMMRegister dst, const Operand& src);
1305 void movups(XMMRegister dst, XMMRegister src);
1306 void movups(XMMRegister dst, const Operand& src);
1307 void movups(const Operand& dst, XMMRegister src);
1308 void psrldq(XMMRegister dst, uint8_t shift);
1309 void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1310 void pshufd(XMMRegister dst, const Operand& src, uint8_t shuffle);
1311 void cvtdq2ps(XMMRegister dst, XMMRegister src);
1312 void cvtdq2ps(XMMRegister dst, const Operand& src);
1313
1314 // AVX instruction
vfmadd132sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1315 void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1316 vfmasd(0x99, dst, src1, src2);
1317 }
vfmadd213sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1318 void vfmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1319 vfmasd(0xa9, dst, src1, src2);
1320 }
vfmadd231sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1321 void vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1322 vfmasd(0xb9, dst, src1, src2);
1323 }
vfmadd132sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1324 void vfmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1325 vfmasd(0x99, dst, src1, src2);
1326 }
vfmadd213sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1327 void vfmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1328 vfmasd(0xa9, dst, src1, src2);
1329 }
vfmadd231sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1330 void vfmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1331 vfmasd(0xb9, dst, src1, src2);
1332 }
vfmsub132sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1333 void vfmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1334 vfmasd(0x9b, dst, src1, src2);
1335 }
vfmsub213sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1336 void vfmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1337 vfmasd(0xab, dst, src1, src2);
1338 }
vfmsub231sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1339 void vfmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1340 vfmasd(0xbb, dst, src1, src2);
1341 }
vfmsub132sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1342 void vfmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1343 vfmasd(0x9b, dst, src1, src2);
1344 }
vfmsub213sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1345 void vfmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1346 vfmasd(0xab, dst, src1, src2);
1347 }
vfmsub231sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1348 void vfmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1349 vfmasd(0xbb, dst, src1, src2);
1350 }
vfnmadd132sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1351 void vfnmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1352 vfmasd(0x9d, dst, src1, src2);
1353 }
vfnmadd213sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1354 void vfnmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1355 vfmasd(0xad, dst, src1, src2);
1356 }
vfnmadd231sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1357 void vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1358 vfmasd(0xbd, dst, src1, src2);
1359 }
vfnmadd132sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1360 void vfnmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1361 vfmasd(0x9d, dst, src1, src2);
1362 }
vfnmadd213sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1363 void vfnmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1364 vfmasd(0xad, dst, src1, src2);
1365 }
vfnmadd231sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1366 void vfnmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1367 vfmasd(0xbd, dst, src1, src2);
1368 }
vfnmsub132sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1369 void vfnmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1370 vfmasd(0x9f, dst, src1, src2);
1371 }
vfnmsub213sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1372 void vfnmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1373 vfmasd(0xaf, dst, src1, src2);
1374 }
vfnmsub231sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1375 void vfnmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1376 vfmasd(0xbf, dst, src1, src2);
1377 }
vfnmsub132sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1378 void vfnmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1379 vfmasd(0x9f, dst, src1, src2);
1380 }
vfnmsub213sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1381 void vfnmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1382 vfmasd(0xaf, dst, src1, src2);
1383 }
vfnmsub231sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1384 void vfnmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1385 vfmasd(0xbf, dst, src1, src2);
1386 }
1387 void vfmasd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1388 void vfmasd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1389
vfmadd132ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1390 void vfmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1391 vfmass(0x99, dst, src1, src2);
1392 }
vfmadd213ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1393 void vfmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1394 vfmass(0xa9, dst, src1, src2);
1395 }
vfmadd231ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1396 void vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1397 vfmass(0xb9, dst, src1, src2);
1398 }
vfmadd132ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1399 void vfmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1400 vfmass(0x99, dst, src1, src2);
1401 }
vfmadd213ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1402 void vfmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1403 vfmass(0xa9, dst, src1, src2);
1404 }
vfmadd231ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1405 void vfmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1406 vfmass(0xb9, dst, src1, src2);
1407 }
vfmsub132ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1408 void vfmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1409 vfmass(0x9b, dst, src1, src2);
1410 }
vfmsub213ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1411 void vfmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1412 vfmass(0xab, dst, src1, src2);
1413 }
vfmsub231ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1414 void vfmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1415 vfmass(0xbb, dst, src1, src2);
1416 }
vfmsub132ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1417 void vfmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1418 vfmass(0x9b, dst, src1, src2);
1419 }
vfmsub213ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1420 void vfmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1421 vfmass(0xab, dst, src1, src2);
1422 }
vfmsub231ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1423 void vfmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1424 vfmass(0xbb, dst, src1, src2);
1425 }
vfnmadd132ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1426 void vfnmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1427 vfmass(0x9d, dst, src1, src2);
1428 }
vfnmadd213ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1429 void vfnmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1430 vfmass(0xad, dst, src1, src2);
1431 }
vfnmadd231ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1432 void vfnmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1433 vfmass(0xbd, dst, src1, src2);
1434 }
vfnmadd132ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1435 void vfnmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1436 vfmass(0x9d, dst, src1, src2);
1437 }
vfnmadd213ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1438 void vfnmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1439 vfmass(0xad, dst, src1, src2);
1440 }
vfnmadd231ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1441 void vfnmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1442 vfmass(0xbd, dst, src1, src2);
1443 }
vfnmsub132ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1444 void vfnmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1445 vfmass(0x9f, dst, src1, src2);
1446 }
vfnmsub213ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1447 void vfnmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1448 vfmass(0xaf, dst, src1, src2);
1449 }
vfnmsub231ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1450 void vfnmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1451 vfmass(0xbf, dst, src1, src2);
1452 }
vfnmsub132ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1453 void vfnmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1454 vfmass(0x9f, dst, src1, src2);
1455 }
vfnmsub213ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1456 void vfnmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1457 vfmass(0xaf, dst, src1, src2);
1458 }
vfnmsub231ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1459 void vfnmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1460 vfmass(0xbf, dst, src1, src2);
1461 }
1462 void vfmass(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1463 void vfmass(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1464
1465 void vmovd(XMMRegister dst, Register src);
1466 void vmovd(XMMRegister dst, const Operand& src);
1467 void vmovd(Register dst, XMMRegister src);
1468 void vmovq(XMMRegister dst, Register src);
1469 void vmovq(XMMRegister dst, const Operand& src);
1470 void vmovq(Register dst, XMMRegister src);
1471
vmovsd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1472 void vmovsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1473 vsd(0x10, dst, src1, src2);
1474 }
vmovsd(XMMRegister dst,const Operand & src)1475 void vmovsd(XMMRegister dst, const Operand& src) {
1476 vsd(0x10, dst, xmm0, src);
1477 }
vmovsd(const Operand & dst,XMMRegister src)1478 void vmovsd(const Operand& dst, XMMRegister src) {
1479 vsd(0x11, src, xmm0, dst);
1480 }
1481
1482 #define AVX_SP_3(instr, opcode) \
1483 AVX_S_3(instr, opcode) \
1484 AVX_P_3(instr, opcode)
1485
1486 #define AVX_S_3(instr, opcode) \
1487 AVX_3(instr##ss, opcode, vss) \
1488 AVX_3(instr##sd, opcode, vsd)
1489
1490 #define AVX_P_3(instr, opcode) \
1491 AVX_3(instr##ps, opcode, vps) \
1492 AVX_3(instr##pd, opcode, vpd)
1493
1494 #define AVX_3(instr, opcode, impl) \
1495 void instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1496 impl(opcode, dst, src1, src2); \
1497 } \
1498 void instr(XMMRegister dst, XMMRegister src1, const Operand& src2) { \
1499 impl(opcode, dst, src1, src2); \
1500 }
1501
1502 AVX_SP_3(vsqrt, 0x51);
1503 AVX_SP_3(vadd, 0x58);
1504 AVX_SP_3(vsub, 0x5c);
1505 AVX_SP_3(vmul, 0x59);
1506 AVX_SP_3(vdiv, 0x5e);
1507 AVX_SP_3(vmin, 0x5d);
1508 AVX_SP_3(vmax, 0x5f);
1509 AVX_P_3(vand, 0x54);
1510 AVX_P_3(vor, 0x56);
1511 AVX_P_3(vxor, 0x57);
1512 AVX_3(vcvtsd2ss, 0x5a, vsd);
1513
1514 #undef AVX_3
1515 #undef AVX_S_3
1516 #undef AVX_P_3
1517 #undef AVX_SP_3
1518
vpsrlq(XMMRegister dst,XMMRegister src,byte imm8)1519 void vpsrlq(XMMRegister dst, XMMRegister src, byte imm8) {
1520 XMMRegister iop = {2};
1521 vpd(0x73, iop, dst, src);
1522 emit(imm8);
1523 }
vpsllq(XMMRegister dst,XMMRegister src,byte imm8)1524 void vpsllq(XMMRegister dst, XMMRegister src, byte imm8) {
1525 XMMRegister iop = {6};
1526 vpd(0x73, iop, dst, src);
1527 emit(imm8);
1528 }
vcvtss2sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1529 void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1530 vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1531 }
vcvtss2sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1532 void vcvtss2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1533 vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1534 }
vcvtlsi2sd(XMMRegister dst,XMMRegister src1,Register src2)1535 void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
1536 XMMRegister isrc2 = {src2.code()};
1537 vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW0);
1538 }
vcvtlsi2sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1539 void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1540 vinstr(0x2a, dst, src1, src2, kF2, k0F, kW0);
1541 }
vcvtlsi2ss(XMMRegister dst,XMMRegister src1,Register src2)1542 void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
1543 XMMRegister isrc2 = {src2.code()};
1544 vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW0);
1545 }
vcvtlsi2ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1546 void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1547 vinstr(0x2a, dst, src1, src2, kF3, k0F, kW0);
1548 }
vcvtqsi2ss(XMMRegister dst,XMMRegister src1,Register src2)1549 void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
1550 XMMRegister isrc2 = {src2.code()};
1551 vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW1);
1552 }
vcvtqsi2ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1553 void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1554 vinstr(0x2a, dst, src1, src2, kF3, k0F, kW1);
1555 }
vcvtqsi2sd(XMMRegister dst,XMMRegister src1,Register src2)1556 void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
1557 XMMRegister isrc2 = {src2.code()};
1558 vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW1);
1559 }
vcvtqsi2sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1560 void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1561 vinstr(0x2a, dst, src1, src2, kF2, k0F, kW1);
1562 }
vcvttss2si(Register dst,XMMRegister src)1563 void vcvttss2si(Register dst, XMMRegister src) {
1564 XMMRegister idst = {dst.code()};
1565 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1566 }
vcvttss2si(Register dst,const Operand & src)1567 void vcvttss2si(Register dst, const Operand& src) {
1568 XMMRegister idst = {dst.code()};
1569 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1570 }
vcvttsd2si(Register dst,XMMRegister src)1571 void vcvttsd2si(Register dst, XMMRegister src) {
1572 XMMRegister idst = {dst.code()};
1573 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1574 }
vcvttsd2si(Register dst,const Operand & src)1575 void vcvttsd2si(Register dst, const Operand& src) {
1576 XMMRegister idst = {dst.code()};
1577 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1578 }
vcvttss2siq(Register dst,XMMRegister src)1579 void vcvttss2siq(Register dst, XMMRegister src) {
1580 XMMRegister idst = {dst.code()};
1581 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1);
1582 }
vcvttss2siq(Register dst,const Operand & src)1583 void vcvttss2siq(Register dst, const Operand& src) {
1584 XMMRegister idst = {dst.code()};
1585 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1);
1586 }
vcvttsd2siq(Register dst,XMMRegister src)1587 void vcvttsd2siq(Register dst, XMMRegister src) {
1588 XMMRegister idst = {dst.code()};
1589 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1);
1590 }
vcvttsd2siq(Register dst,const Operand & src)1591 void vcvttsd2siq(Register dst, const Operand& src) {
1592 XMMRegister idst = {dst.code()};
1593 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1);
1594 }
vcvtsd2si(Register dst,XMMRegister src)1595 void vcvtsd2si(Register dst, XMMRegister src) {
1596 XMMRegister idst = {dst.code()};
1597 vinstr(0x2d, idst, xmm0, src, kF2, k0F, kW0);
1598 }
vucomisd(XMMRegister dst,XMMRegister src)1599 void vucomisd(XMMRegister dst, XMMRegister src) {
1600 vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG);
1601 }
vucomisd(XMMRegister dst,const Operand & src)1602 void vucomisd(XMMRegister dst, const Operand& src) {
1603 vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG);
1604 }
vroundss(XMMRegister dst,XMMRegister src1,XMMRegister src2,RoundingMode mode)1605 void vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1606 RoundingMode mode) {
1607 vinstr(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
1608 emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
1609 }
vroundsd(XMMRegister dst,XMMRegister src1,XMMRegister src2,RoundingMode mode)1610 void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1611 RoundingMode mode) {
1612 vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
1613 emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
1614 }
1615
vsd(byte op,XMMRegister dst,XMMRegister src1,XMMRegister src2)1616 void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1617 vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
1618 }
vsd(byte op,XMMRegister dst,XMMRegister src1,const Operand & src2)1619 void vsd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2) {
1620 vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
1621 }
1622
vmovss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1623 void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1624 vss(0x10, dst, src1, src2);
1625 }
vmovss(XMMRegister dst,const Operand & src)1626 void vmovss(XMMRegister dst, const Operand& src) {
1627 vss(0x10, dst, xmm0, src);
1628 }
vmovss(const Operand & dst,XMMRegister src)1629 void vmovss(const Operand& dst, XMMRegister src) {
1630 vss(0x11, src, xmm0, dst);
1631 }
1632 void vucomiss(XMMRegister dst, XMMRegister src);
1633 void vucomiss(XMMRegister dst, const Operand& src);
1634 void vss(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1635 void vss(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1636
vmovaps(XMMRegister dst,XMMRegister src)1637 void vmovaps(XMMRegister dst, XMMRegister src) { vps(0x28, dst, xmm0, src); }
vmovups(XMMRegister dst,XMMRegister src)1638 void vmovups(XMMRegister dst, XMMRegister src) { vps(0x10, dst, xmm0, src); }
vmovups(XMMRegister dst,const Operand & src)1639 void vmovups(XMMRegister dst, const Operand& src) {
1640 vps(0x10, dst, xmm0, src);
1641 }
vmovups(const Operand & dst,XMMRegister src)1642 void vmovups(const Operand& dst, XMMRegister src) {
1643 vps(0x11, src, xmm0, dst);
1644 }
vmovapd(XMMRegister dst,XMMRegister src)1645 void vmovapd(XMMRegister dst, XMMRegister src) { vpd(0x28, dst, xmm0, src); }
vmovupd(XMMRegister dst,const Operand & src)1646 void vmovupd(XMMRegister dst, const Operand& src) {
1647 vpd(0x10, dst, xmm0, src);
1648 }
vmovupd(const Operand & dst,XMMRegister src)1649 void vmovupd(const Operand& dst, XMMRegister src) {
1650 vpd(0x11, src, xmm0, dst);
1651 }
vmovmskps(Register dst,XMMRegister src)1652 void vmovmskps(Register dst, XMMRegister src) {
1653 XMMRegister idst = {dst.code()};
1654 vps(0x50, idst, xmm0, src);
1655 }
vmovmskpd(Register dst,XMMRegister src)1656 void vmovmskpd(Register dst, XMMRegister src) {
1657 XMMRegister idst = {dst.code()};
1658 vpd(0x50, idst, xmm0, src);
1659 }
vcmpps(XMMRegister dst,XMMRegister src1,XMMRegister src2,int8_t cmp)1660 void vcmpps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) {
1661 vps(0xC2, dst, src1, src2);
1662 emit(cmp);
1663 }
vcmpps(XMMRegister dst,XMMRegister src1,const Operand & src2,int8_t cmp)1664 void vcmpps(XMMRegister dst, XMMRegister src1, const Operand& src2,
1665 int8_t cmp) {
1666 vps(0xC2, dst, src1, src2);
1667 emit(cmp);
1668 }
vcmppd(XMMRegister dst,XMMRegister src1,XMMRegister src2,int8_t cmp)1669 void vcmppd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) {
1670 vpd(0xC2, dst, src1, src2);
1671 emit(cmp);
1672 }
vcmppd(XMMRegister dst,XMMRegister src1,const Operand & src2,int8_t cmp)1673 void vcmppd(XMMRegister dst, XMMRegister src1, const Operand& src2,
1674 int8_t cmp) {
1675 vpd(0xC2, dst, src1, src2);
1676 emit(cmp);
1677 }
1678
1679 #define AVX_CMP_P(instr, imm8) \
1680 void instr##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1681 vcmpps(dst, src1, src2, imm8); \
1682 } \
1683 void instr##ps(XMMRegister dst, XMMRegister src1, const Operand& src2) { \
1684 vcmpps(dst, src1, src2, imm8); \
1685 } \
1686 void instr##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1687 vcmppd(dst, src1, src2, imm8); \
1688 } \
1689 void instr##pd(XMMRegister dst, XMMRegister src1, const Operand& src2) { \
1690 vcmppd(dst, src1, src2, imm8); \
1691 }
1692
1693 AVX_CMP_P(vcmpeq, 0x0);
1694 AVX_CMP_P(vcmplt, 0x1);
1695 AVX_CMP_P(vcmple, 0x2);
1696 AVX_CMP_P(vcmpneq, 0x4);
1697 AVX_CMP_P(vcmpnlt, 0x5);
1698 AVX_CMP_P(vcmpnle, 0x6);
1699
1700 #undef AVX_CMP_P
1701
vlddqu(XMMRegister dst,const Operand & src)1702 void vlddqu(XMMRegister dst, const Operand& src) {
1703 vinstr(0xF0, dst, xmm0, src, kF2, k0F, kWIG);
1704 }
vpsllw(XMMRegister dst,XMMRegister src,int8_t imm8)1705 void vpsllw(XMMRegister dst, XMMRegister src, int8_t imm8) {
1706 XMMRegister iop = {6};
1707 vinstr(0x71, iop, dst, src, k66, k0F, kWIG);
1708 emit(imm8);
1709 }
vpsrlw(XMMRegister dst,XMMRegister src,int8_t imm8)1710 void vpsrlw(XMMRegister dst, XMMRegister src, int8_t imm8) {
1711 XMMRegister iop = {2};
1712 vinstr(0x71, iop, dst, src, k66, k0F, kWIG);
1713 emit(imm8);
1714 }
vpsraw(XMMRegister dst,XMMRegister src,int8_t imm8)1715 void vpsraw(XMMRegister dst, XMMRegister src, int8_t imm8) {
1716 XMMRegister iop = {4};
1717 vinstr(0x71, iop, dst, src, k66, k0F, kWIG);
1718 emit(imm8);
1719 }
vpslld(XMMRegister dst,XMMRegister src,int8_t imm8)1720 void vpslld(XMMRegister dst, XMMRegister src, int8_t imm8) {
1721 XMMRegister iop = {6};
1722 vinstr(0x72, iop, dst, src, k66, k0F, kWIG);
1723 emit(imm8);
1724 }
vpsrld(XMMRegister dst,XMMRegister src,int8_t imm8)1725 void vpsrld(XMMRegister dst, XMMRegister src, int8_t imm8) {
1726 XMMRegister iop = {2};
1727 vinstr(0x72, iop, dst, src, k66, k0F, kWIG);
1728 emit(imm8);
1729 }
vpsrad(XMMRegister dst,XMMRegister src,int8_t imm8)1730 void vpsrad(XMMRegister dst, XMMRegister src, int8_t imm8) {
1731 XMMRegister iop = {4};
1732 vinstr(0x72, iop, dst, src, k66, k0F, kWIG);
1733 emit(imm8);
1734 }
vpextrb(Register dst,XMMRegister src,int8_t imm8)1735 void vpextrb(Register dst, XMMRegister src, int8_t imm8) {
1736 XMMRegister idst = {dst.code()};
1737 vinstr(0x14, src, xmm0, idst, k66, k0F3A, kW0);
1738 emit(imm8);
1739 }
vpextrb(const Operand & dst,XMMRegister src,int8_t imm8)1740 void vpextrb(const Operand& dst, XMMRegister src, int8_t imm8) {
1741 vinstr(0x14, src, xmm0, dst, k66, k0F3A, kW0);
1742 emit(imm8);
1743 }
vpextrw(Register dst,XMMRegister src,int8_t imm8)1744 void vpextrw(Register dst, XMMRegister src, int8_t imm8) {
1745 XMMRegister idst = {dst.code()};
1746 vinstr(0xc5, idst, xmm0, src, k66, k0F, kW0);
1747 emit(imm8);
1748 }
vpextrw(const Operand & dst,XMMRegister src,int8_t imm8)1749 void vpextrw(const Operand& dst, XMMRegister src, int8_t imm8) {
1750 vinstr(0x15, src, xmm0, dst, k66, k0F3A, kW0);
1751 emit(imm8);
1752 }
vpextrd(Register dst,XMMRegister src,int8_t imm8)1753 void vpextrd(Register dst, XMMRegister src, int8_t imm8) {
1754 XMMRegister idst = {dst.code()};
1755 vinstr(0x16, src, xmm0, idst, k66, k0F3A, kW0);
1756 emit(imm8);
1757 }
vpextrd(const Operand & dst,XMMRegister src,int8_t imm8)1758 void vpextrd(const Operand& dst, XMMRegister src, int8_t imm8) {
1759 vinstr(0x16, src, xmm0, dst, k66, k0F3A, kW0);
1760 emit(imm8);
1761 }
vpinsrb(XMMRegister dst,XMMRegister src1,Register src2,int8_t imm8)1762 void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
1763 XMMRegister isrc = {src2.code()};
1764 vinstr(0x20, dst, src1, isrc, k66, k0F3A, kW0);
1765 emit(imm8);
1766 }
vpinsrb(XMMRegister dst,XMMRegister src1,const Operand & src2,int8_t imm8)1767 void vpinsrb(XMMRegister dst, XMMRegister src1, const Operand& src2,
1768 int8_t imm8) {
1769 vinstr(0x20, dst, src1, src2, k66, k0F3A, kW0);
1770 emit(imm8);
1771 }
vpinsrw(XMMRegister dst,XMMRegister src1,Register src2,int8_t imm8)1772 void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
1773 XMMRegister isrc = {src2.code()};
1774 vinstr(0xc4, dst, src1, isrc, k66, k0F, kW0);
1775 emit(imm8);
1776 }
vpinsrw(XMMRegister dst,XMMRegister src1,const Operand & src2,int8_t imm8)1777 void vpinsrw(XMMRegister dst, XMMRegister src1, const Operand& src2,
1778 int8_t imm8) {
1779 vinstr(0xc4, dst, src1, src2, k66, k0F, kW0);
1780 emit(imm8);
1781 }
vpinsrd(XMMRegister dst,XMMRegister src1,Register src2,int8_t imm8)1782 void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
1783 XMMRegister isrc = {src2.code()};
1784 vinstr(0x22, dst, src1, isrc, k66, k0F3A, kW0);
1785 emit(imm8);
1786 }
vpinsrd(XMMRegister dst,XMMRegister src1,const Operand & src2,int8_t imm8)1787 void vpinsrd(XMMRegister dst, XMMRegister src1, const Operand& src2,
1788 int8_t imm8) {
1789 vinstr(0x22, dst, src1, src2, k66, k0F3A, kW0);
1790 emit(imm8);
1791 }
vpshufd(XMMRegister dst,XMMRegister src,int8_t imm8)1792 void vpshufd(XMMRegister dst, XMMRegister src, int8_t imm8) {
1793 vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
1794 emit(imm8);
1795 }
1796
1797 void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1798 void vps(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1799 void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1800 void vpd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1801
1802 // BMI instruction
andnq(Register dst,Register src1,Register src2)1803 void andnq(Register dst, Register src1, Register src2) {
1804 bmi1q(0xf2, dst, src1, src2);
1805 }
andnq(Register dst,Register src1,const Operand & src2)1806 void andnq(Register dst, Register src1, const Operand& src2) {
1807 bmi1q(0xf2, dst, src1, src2);
1808 }
andnl(Register dst,Register src1,Register src2)1809 void andnl(Register dst, Register src1, Register src2) {
1810 bmi1l(0xf2, dst, src1, src2);
1811 }
andnl(Register dst,Register src1,const Operand & src2)1812 void andnl(Register dst, Register src1, const Operand& src2) {
1813 bmi1l(0xf2, dst, src1, src2);
1814 }
bextrq(Register dst,Register src1,Register src2)1815 void bextrq(Register dst, Register src1, Register src2) {
1816 bmi1q(0xf7, dst, src2, src1);
1817 }
bextrq(Register dst,const Operand & src1,Register src2)1818 void bextrq(Register dst, const Operand& src1, Register src2) {
1819 bmi1q(0xf7, dst, src2, src1);
1820 }
bextrl(Register dst,Register src1,Register src2)1821 void bextrl(Register dst, Register src1, Register src2) {
1822 bmi1l(0xf7, dst, src2, src1);
1823 }
bextrl(Register dst,const Operand & src1,Register src2)1824 void bextrl(Register dst, const Operand& src1, Register src2) {
1825 bmi1l(0xf7, dst, src2, src1);
1826 }
blsiq(Register dst,Register src)1827 void blsiq(Register dst, Register src) {
1828 Register ireg = {3};
1829 bmi1q(0xf3, ireg, dst, src);
1830 }
blsiq(Register dst,const Operand & src)1831 void blsiq(Register dst, const Operand& src) {
1832 Register ireg = {3};
1833 bmi1q(0xf3, ireg, dst, src);
1834 }
blsil(Register dst,Register src)1835 void blsil(Register dst, Register src) {
1836 Register ireg = {3};
1837 bmi1l(0xf3, ireg, dst, src);
1838 }
blsil(Register dst,const Operand & src)1839 void blsil(Register dst, const Operand& src) {
1840 Register ireg = {3};
1841 bmi1l(0xf3, ireg, dst, src);
1842 }
blsmskq(Register dst,Register src)1843 void blsmskq(Register dst, Register src) {
1844 Register ireg = {2};
1845 bmi1q(0xf3, ireg, dst, src);
1846 }
blsmskq(Register dst,const Operand & src)1847 void blsmskq(Register dst, const Operand& src) {
1848 Register ireg = {2};
1849 bmi1q(0xf3, ireg, dst, src);
1850 }
blsmskl(Register dst,Register src)1851 void blsmskl(Register dst, Register src) {
1852 Register ireg = {2};
1853 bmi1l(0xf3, ireg, dst, src);
1854 }
blsmskl(Register dst,const Operand & src)1855 void blsmskl(Register dst, const Operand& src) {
1856 Register ireg = {2};
1857 bmi1l(0xf3, ireg, dst, src);
1858 }
blsrq(Register dst,Register src)1859 void blsrq(Register dst, Register src) {
1860 Register ireg = {1};
1861 bmi1q(0xf3, ireg, dst, src);
1862 }
blsrq(Register dst,const Operand & src)1863 void blsrq(Register dst, const Operand& src) {
1864 Register ireg = {1};
1865 bmi1q(0xf3, ireg, dst, src);
1866 }
blsrl(Register dst,Register src)1867 void blsrl(Register dst, Register src) {
1868 Register ireg = {1};
1869 bmi1l(0xf3, ireg, dst, src);
1870 }
blsrl(Register dst,const Operand & src)1871 void blsrl(Register dst, const Operand& src) {
1872 Register ireg = {1};
1873 bmi1l(0xf3, ireg, dst, src);
1874 }
1875 void tzcntq(Register dst, Register src);
1876 void tzcntq(Register dst, const Operand& src);
1877 void tzcntl(Register dst, Register src);
1878 void tzcntl(Register dst, const Operand& src);
1879
1880 void lzcntq(Register dst, Register src);
1881 void lzcntq(Register dst, const Operand& src);
1882 void lzcntl(Register dst, Register src);
1883 void lzcntl(Register dst, const Operand& src);
1884
1885 void popcntq(Register dst, Register src);
1886 void popcntq(Register dst, const Operand& src);
1887 void popcntl(Register dst, Register src);
1888 void popcntl(Register dst, const Operand& src);
1889
bzhiq(Register dst,Register src1,Register src2)1890 void bzhiq(Register dst, Register src1, Register src2) {
1891 bmi2q(kNone, 0xf5, dst, src2, src1);
1892 }
bzhiq(Register dst,const Operand & src1,Register src2)1893 void bzhiq(Register dst, const Operand& src1, Register src2) {
1894 bmi2q(kNone, 0xf5, dst, src2, src1);
1895 }
bzhil(Register dst,Register src1,Register src2)1896 void bzhil(Register dst, Register src1, Register src2) {
1897 bmi2l(kNone, 0xf5, dst, src2, src1);
1898 }
bzhil(Register dst,const Operand & src1,Register src2)1899 void bzhil(Register dst, const Operand& src1, Register src2) {
1900 bmi2l(kNone, 0xf5, dst, src2, src1);
1901 }
mulxq(Register dst1,Register dst2,Register src)1902 void mulxq(Register dst1, Register dst2, Register src) {
1903 bmi2q(kF2, 0xf6, dst1, dst2, src);
1904 }
mulxq(Register dst1,Register dst2,const Operand & src)1905 void mulxq(Register dst1, Register dst2, const Operand& src) {
1906 bmi2q(kF2, 0xf6, dst1, dst2, src);
1907 }
mulxl(Register dst1,Register dst2,Register src)1908 void mulxl(Register dst1, Register dst2, Register src) {
1909 bmi2l(kF2, 0xf6, dst1, dst2, src);
1910 }
mulxl(Register dst1,Register dst2,const Operand & src)1911 void mulxl(Register dst1, Register dst2, const Operand& src) {
1912 bmi2l(kF2, 0xf6, dst1, dst2, src);
1913 }
pdepq(Register dst,Register src1,Register src2)1914 void pdepq(Register dst, Register src1, Register src2) {
1915 bmi2q(kF2, 0xf5, dst, src1, src2);
1916 }
pdepq(Register dst,Register src1,const Operand & src2)1917 void pdepq(Register dst, Register src1, const Operand& src2) {
1918 bmi2q(kF2, 0xf5, dst, src1, src2);
1919 }
pdepl(Register dst,Register src1,Register src2)1920 void pdepl(Register dst, Register src1, Register src2) {
1921 bmi2l(kF2, 0xf5, dst, src1, src2);
1922 }
pdepl(Register dst,Register src1,const Operand & src2)1923 void pdepl(Register dst, Register src1, const Operand& src2) {
1924 bmi2l(kF2, 0xf5, dst, src1, src2);
1925 }
pextq(Register dst,Register src1,Register src2)1926 void pextq(Register dst, Register src1, Register src2) {
1927 bmi2q(kF3, 0xf5, dst, src1, src2);
1928 }
pextq(Register dst,Register src1,const Operand & src2)1929 void pextq(Register dst, Register src1, const Operand& src2) {
1930 bmi2q(kF3, 0xf5, dst, src1, src2);
1931 }
pextl(Register dst,Register src1,Register src2)1932 void pextl(Register dst, Register src1, Register src2) {
1933 bmi2l(kF3, 0xf5, dst, src1, src2);
1934 }
pextl(Register dst,Register src1,const Operand & src2)1935 void pextl(Register dst, Register src1, const Operand& src2) {
1936 bmi2l(kF3, 0xf5, dst, src1, src2);
1937 }
sarxq(Register dst,Register src1,Register src2)1938 void sarxq(Register dst, Register src1, Register src2) {
1939 bmi2q(kF3, 0xf7, dst, src2, src1);
1940 }
sarxq(Register dst,const Operand & src1,Register src2)1941 void sarxq(Register dst, const Operand& src1, Register src2) {
1942 bmi2q(kF3, 0xf7, dst, src2, src1);
1943 }
sarxl(Register dst,Register src1,Register src2)1944 void sarxl(Register dst, Register src1, Register src2) {
1945 bmi2l(kF3, 0xf7, dst, src2, src1);
1946 }
sarxl(Register dst,const Operand & src1,Register src2)1947 void sarxl(Register dst, const Operand& src1, Register src2) {
1948 bmi2l(kF3, 0xf7, dst, src2, src1);
1949 }
shlxq(Register dst,Register src1,Register src2)1950 void shlxq(Register dst, Register src1, Register src2) {
1951 bmi2q(k66, 0xf7, dst, src2, src1);
1952 }
shlxq(Register dst,const Operand & src1,Register src2)1953 void shlxq(Register dst, const Operand& src1, Register src2) {
1954 bmi2q(k66, 0xf7, dst, src2, src1);
1955 }
shlxl(Register dst,Register src1,Register src2)1956 void shlxl(Register dst, Register src1, Register src2) {
1957 bmi2l(k66, 0xf7, dst, src2, src1);
1958 }
shlxl(Register dst,const Operand & src1,Register src2)1959 void shlxl(Register dst, const Operand& src1, Register src2) {
1960 bmi2l(k66, 0xf7, dst, src2, src1);
1961 }
shrxq(Register dst,Register src1,Register src2)1962 void shrxq(Register dst, Register src1, Register src2) {
1963 bmi2q(kF2, 0xf7, dst, src2, src1);
1964 }
shrxq(Register dst,const Operand & src1,Register src2)1965 void shrxq(Register dst, const Operand& src1, Register src2) {
1966 bmi2q(kF2, 0xf7, dst, src2, src1);
1967 }
shrxl(Register dst,Register src1,Register src2)1968 void shrxl(Register dst, Register src1, Register src2) {
1969 bmi2l(kF2, 0xf7, dst, src2, src1);
1970 }
shrxl(Register dst,const Operand & src1,Register src2)1971 void shrxl(Register dst, const Operand& src1, Register src2) {
1972 bmi2l(kF2, 0xf7, dst, src2, src1);
1973 }
1974 void rorxq(Register dst, Register src, byte imm8);
1975 void rorxq(Register dst, const Operand& src, byte imm8);
1976 void rorxl(Register dst, Register src, byte imm8);
1977 void rorxl(Register dst, const Operand& src, byte imm8);
1978
1979 // Check the code size generated from label to here.
SizeOfCodeGeneratedSince(Label * label)1980 int SizeOfCodeGeneratedSince(Label* label) {
1981 return pc_offset() - label->pos();
1982 }
1983
1984 // Mark generator continuation.
1985 void RecordGeneratorContinuation();
1986
1987 // Mark address of a debug break slot.
1988 void RecordDebugBreakSlot(RelocInfo::Mode mode);
1989
1990 // Record a comment relocation entry that can be used by a disassembler.
1991 // Use --code-comments to enable.
1992 void RecordComment(const char* msg);
1993
1994 // Record a deoptimization reason that can be used by a log or cpu profiler.
1995 // Use --trace-deopt to enable.
1996 void RecordDeoptReason(DeoptimizeReason reason, SourcePosition position,
1997 int id);
1998
PatchConstantPoolAccessInstruction(int pc_offset,int offset,ConstantPoolEntry::Access access,ConstantPoolEntry::Type type)1999 void PatchConstantPoolAccessInstruction(int pc_offset, int offset,
2000 ConstantPoolEntry::Access access,
2001 ConstantPoolEntry::Type type) {
2002 // No embedded constant pool support.
2003 UNREACHABLE();
2004 }
2005
2006 // Writes a single word of data in the code stream.
2007 // Used for inline tables, e.g., jump-tables.
2008 void db(uint8_t data);
2009 void dd(uint32_t data);
2010 void dq(uint64_t data);
dp(uintptr_t data)2011 void dp(uintptr_t data) { dq(data); }
2012 void dq(Label* label);
2013
2014 // Check if there is less than kGap bytes available in the buffer.
2015 // If this is the case, we need to grow the buffer before emitting
2016 // an instruction or relocation information.
buffer_overflow()2017 inline bool buffer_overflow() const {
2018 return pc_ >= reloc_info_writer.pos() - kGap;
2019 }
2020
2021 // Get the number of bytes available in the buffer.
available_space()2022 inline int available_space() const {
2023 return static_cast<int>(reloc_info_writer.pos() - pc_);
2024 }
2025
2026 static bool IsNop(Address addr);
2027
2028 // Avoid overflows for displacements etc.
2029 static const int kMaximalBufferSize = 512*MB;
2030
byte_at(int pos)2031 byte byte_at(int pos) { return buffer_[pos]; }
set_byte_at(int pos,byte value)2032 void set_byte_at(int pos, byte value) { buffer_[pos] = value; }
2033
pc()2034 Address pc() const { return pc_; }
2035
2036 protected:
2037 // Call near indirect
2038 void call(const Operand& operand);
2039
2040 private:
addr_at(int pos)2041 byte* addr_at(int pos) { return buffer_ + pos; }
long_at(int pos)2042 uint32_t long_at(int pos) {
2043 return *reinterpret_cast<uint32_t*>(addr_at(pos));
2044 }
long_at_put(int pos,uint32_t x)2045 void long_at_put(int pos, uint32_t x) {
2046 *reinterpret_cast<uint32_t*>(addr_at(pos)) = x;
2047 }
2048
2049 // code emission
2050 void GrowBuffer();
2051
emit(byte x)2052 void emit(byte x) { *pc_++ = x; }
2053 inline void emitl(uint32_t x);
2054 inline void emitp(void* x, RelocInfo::Mode rmode);
2055 inline void emitq(uint64_t x);
2056 inline void emitw(uint16_t x);
2057 inline void emit_code_target(Handle<Code> target,
2058 RelocInfo::Mode rmode,
2059 TypeFeedbackId ast_id = TypeFeedbackId::None());
2060 inline void emit_runtime_entry(Address entry, RelocInfo::Mode rmode);
emit(Immediate x)2061 void emit(Immediate x) {
2062 if (!RelocInfo::IsNone(x.rmode_)) {
2063 RecordRelocInfo(x.rmode_);
2064 }
2065 emitl(x.value_);
2066 }
2067
2068 // Emits a REX prefix that encodes a 64-bit operand size and
2069 // the top bit of both register codes.
2070 // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
2071 // REX.W is set.
2072 inline void emit_rex_64(XMMRegister reg, Register rm_reg);
2073 inline void emit_rex_64(Register reg, XMMRegister rm_reg);
2074 inline void emit_rex_64(Register reg, Register rm_reg);
2075
2076 // Emits a REX prefix that encodes a 64-bit operand size and
2077 // the top bit of the destination, index, and base register codes.
2078 // The high bit of reg is used for REX.R, the high bit of op's base
2079 // register is used for REX.B, and the high bit of op's index register
2080 // is used for REX.X. REX.W is set.
2081 inline void emit_rex_64(Register reg, const Operand& op);
2082 inline void emit_rex_64(XMMRegister reg, const Operand& op);
2083
2084 // Emits a REX prefix that encodes a 64-bit operand size and
2085 // the top bit of the register code.
2086 // The high bit of register is used for REX.B.
2087 // REX.W is set and REX.R and REX.X are clear.
2088 inline void emit_rex_64(Register rm_reg);
2089
2090 // Emits a REX prefix that encodes a 64-bit operand size and
2091 // the top bit of the index and base register codes.
2092 // The high bit of op's base register is used for REX.B, and the high
2093 // bit of op's index register is used for REX.X.
2094 // REX.W is set and REX.R clear.
2095 inline void emit_rex_64(const Operand& op);
2096
2097 // Emit a REX prefix that only sets REX.W to choose a 64-bit operand size.
emit_rex_64()2098 void emit_rex_64() { emit(0x48); }
2099
2100 // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
2101 // REX.W is clear.
2102 inline void emit_rex_32(Register reg, Register rm_reg);
2103
2104 // The high bit of reg is used for REX.R, the high bit of op's base
2105 // register is used for REX.B, and the high bit of op's index register
2106 // is used for REX.X. REX.W is cleared.
2107 inline void emit_rex_32(Register reg, const Operand& op);
2108
2109 // High bit of rm_reg goes to REX.B.
2110 // REX.W, REX.R and REX.X are clear.
2111 inline void emit_rex_32(Register rm_reg);
2112
2113 // High bit of base goes to REX.B and high bit of index to REX.X.
2114 // REX.W and REX.R are clear.
2115 inline void emit_rex_32(const Operand& op);
2116
2117 // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
2118 // REX.W is cleared. If no REX bits are set, no byte is emitted.
2119 inline void emit_optional_rex_32(Register reg, Register rm_reg);
2120
2121 // The high bit of reg is used for REX.R, the high bit of op's base
2122 // register is used for REX.B, and the high bit of op's index register
2123 // is used for REX.X. REX.W is cleared. If no REX bits are set, nothing
2124 // is emitted.
2125 inline void emit_optional_rex_32(Register reg, const Operand& op);
2126
2127 // As for emit_optional_rex_32(Register, Register), except that
2128 // the registers are XMM registers.
2129 inline void emit_optional_rex_32(XMMRegister reg, XMMRegister base);
2130
2131 // As for emit_optional_rex_32(Register, Register), except that
2132 // one of the registers is an XMM registers.
2133 inline void emit_optional_rex_32(XMMRegister reg, Register base);
2134
2135 // As for emit_optional_rex_32(Register, Register), except that
2136 // one of the registers is an XMM registers.
2137 inline void emit_optional_rex_32(Register reg, XMMRegister base);
2138
2139 // As for emit_optional_rex_32(Register, const Operand&), except that
2140 // the register is an XMM register.
2141 inline void emit_optional_rex_32(XMMRegister reg, const Operand& op);
2142
2143 // Optionally do as emit_rex_32(Register) if the register number has
2144 // the high bit set.
2145 inline void emit_optional_rex_32(Register rm_reg);
2146 inline void emit_optional_rex_32(XMMRegister rm_reg);
2147
2148 // Optionally do as emit_rex_32(const Operand&) if the operand register
2149 // numbers have a high bit set.
2150 inline void emit_optional_rex_32(const Operand& op);
2151
emit_rex(int size)2152 void emit_rex(int size) {
2153 if (size == kInt64Size) {
2154 emit_rex_64();
2155 } else {
2156 DCHECK(size == kInt32Size);
2157 }
2158 }
2159
2160 template<class P1>
emit_rex(P1 p1,int size)2161 void emit_rex(P1 p1, int size) {
2162 if (size == kInt64Size) {
2163 emit_rex_64(p1);
2164 } else {
2165 DCHECK(size == kInt32Size);
2166 emit_optional_rex_32(p1);
2167 }
2168 }
2169
2170 template<class P1, class P2>
emit_rex(P1 p1,P2 p2,int size)2171 void emit_rex(P1 p1, P2 p2, int size) {
2172 if (size == kInt64Size) {
2173 emit_rex_64(p1, p2);
2174 } else {
2175 DCHECK(size == kInt32Size);
2176 emit_optional_rex_32(p1, p2);
2177 }
2178 }
2179
2180 // Emit vex prefix
emit_vex2_byte0()2181 void emit_vex2_byte0() { emit(0xc5); }
2182 inline void emit_vex2_byte1(XMMRegister reg, XMMRegister v, VectorLength l,
2183 SIMDPrefix pp);
emit_vex3_byte0()2184 void emit_vex3_byte0() { emit(0xc4); }
2185 inline void emit_vex3_byte1(XMMRegister reg, XMMRegister rm, LeadingOpcode m);
2186 inline void emit_vex3_byte1(XMMRegister reg, const Operand& rm,
2187 LeadingOpcode m);
2188 inline void emit_vex3_byte2(VexW w, XMMRegister v, VectorLength l,
2189 SIMDPrefix pp);
2190 inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, XMMRegister rm,
2191 VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2192 VexW w);
2193 inline void emit_vex_prefix(Register reg, Register v, Register rm,
2194 VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2195 VexW w);
2196 inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, const Operand& rm,
2197 VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2198 VexW w);
2199 inline void emit_vex_prefix(Register reg, Register v, const Operand& rm,
2200 VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2201 VexW w);
2202
2203 // Emit the ModR/M byte, and optionally the SIB byte and
2204 // 1- or 4-byte offset for a memory operand. Also encodes
2205 // the second operand of the operation, a register or operation
2206 // subcode, into the reg field of the ModR/M byte.
emit_operand(Register reg,const Operand & adr)2207 void emit_operand(Register reg, const Operand& adr) {
2208 emit_operand(reg.low_bits(), adr);
2209 }
2210
2211 // Emit the ModR/M byte, and optionally the SIB byte and
2212 // 1- or 4-byte offset for a memory operand. Also used to encode
2213 // a three-bit opcode extension into the ModR/M byte.
2214 void emit_operand(int rm, const Operand& adr);
2215
2216 // Emit a ModR/M byte with registers coded in the reg and rm_reg fields.
emit_modrm(Register reg,Register rm_reg)2217 void emit_modrm(Register reg, Register rm_reg) {
2218 emit(0xC0 | reg.low_bits() << 3 | rm_reg.low_bits());
2219 }
2220
2221 // Emit a ModR/M byte with an operation subcode in the reg field and
2222 // a register in the rm_reg field.
emit_modrm(int code,Register rm_reg)2223 void emit_modrm(int code, Register rm_reg) {
2224 DCHECK(is_uint3(code));
2225 emit(0xC0 | code << 3 | rm_reg.low_bits());
2226 }
2227
2228 // Emit the code-object-relative offset of the label's position
2229 inline void emit_code_relative_offset(Label* label);
2230
2231 // The first argument is the reg field, the second argument is the r/m field.
2232 void emit_sse_operand(XMMRegister dst, XMMRegister src);
2233 void emit_sse_operand(XMMRegister reg, const Operand& adr);
2234 void emit_sse_operand(Register reg, const Operand& adr);
2235 void emit_sse_operand(XMMRegister dst, Register src);
2236 void emit_sse_operand(Register dst, XMMRegister src);
2237 void emit_sse_operand(XMMRegister dst);
2238
2239 // Emit machine code for one of the operations ADD, ADC, SUB, SBC,
2240 // AND, OR, XOR, or CMP. The encodings of these operations are all
2241 // similar, differing just in the opcode or in the reg field of the
2242 // ModR/M byte.
2243 void arithmetic_op_8(byte opcode, Register reg, Register rm_reg);
2244 void arithmetic_op_8(byte opcode, Register reg, const Operand& rm_reg);
2245 void arithmetic_op_16(byte opcode, Register reg, Register rm_reg);
2246 void arithmetic_op_16(byte opcode, Register reg, const Operand& rm_reg);
2247 // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
2248 void arithmetic_op(byte opcode, Register reg, Register rm_reg, int size);
2249 void arithmetic_op(byte opcode,
2250 Register reg,
2251 const Operand& rm_reg,
2252 int size);
2253 // Operate on a byte in memory or register.
2254 void immediate_arithmetic_op_8(byte subcode,
2255 Register dst,
2256 Immediate src);
2257 void immediate_arithmetic_op_8(byte subcode,
2258 const Operand& dst,
2259 Immediate src);
2260 // Operate on a word in memory or register.
2261 void immediate_arithmetic_op_16(byte subcode,
2262 Register dst,
2263 Immediate src);
2264 void immediate_arithmetic_op_16(byte subcode,
2265 const Operand& dst,
2266 Immediate src);
2267 // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
2268 void immediate_arithmetic_op(byte subcode,
2269 Register dst,
2270 Immediate src,
2271 int size);
2272 void immediate_arithmetic_op(byte subcode,
2273 const Operand& dst,
2274 Immediate src,
2275 int size);
2276
2277 // Emit machine code for a shift operation.
2278 void shift(Operand dst, Immediate shift_amount, int subcode, int size);
2279 void shift(Register dst, Immediate shift_amount, int subcode, int size);
2280 // Shift dst by cl % 64 bits.
2281 void shift(Register dst, int subcode, int size);
2282 void shift(Operand dst, int subcode, int size);
2283
2284 void emit_farith(int b1, int b2, int i);
2285
2286 // labels
2287 // void print(Label* L);
2288 void bind_to(Label* L, int pos);
2289
2290 // record reloc info for current pc_
2291 void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
2292
2293 // Arithmetics
emit_add(Register dst,Register src,int size)2294 void emit_add(Register dst, Register src, int size) {
2295 arithmetic_op(0x03, dst, src, size);
2296 }
2297
emit_add(Register dst,Immediate src,int size)2298 void emit_add(Register dst, Immediate src, int size) {
2299 immediate_arithmetic_op(0x0, dst, src, size);
2300 }
2301
emit_add(Register dst,const Operand & src,int size)2302 void emit_add(Register dst, const Operand& src, int size) {
2303 arithmetic_op(0x03, dst, src, size);
2304 }
2305
emit_add(const Operand & dst,Register src,int size)2306 void emit_add(const Operand& dst, Register src, int size) {
2307 arithmetic_op(0x1, src, dst, size);
2308 }
2309
emit_add(const Operand & dst,Immediate src,int size)2310 void emit_add(const Operand& dst, Immediate src, int size) {
2311 immediate_arithmetic_op(0x0, dst, src, size);
2312 }
2313
emit_and(Register dst,Register src,int size)2314 void emit_and(Register dst, Register src, int size) {
2315 arithmetic_op(0x23, dst, src, size);
2316 }
2317
emit_and(Register dst,const Operand & src,int size)2318 void emit_and(Register dst, const Operand& src, int size) {
2319 arithmetic_op(0x23, dst, src, size);
2320 }
2321
emit_and(const Operand & dst,Register src,int size)2322 void emit_and(const Operand& dst, Register src, int size) {
2323 arithmetic_op(0x21, src, dst, size);
2324 }
2325
emit_and(Register dst,Immediate src,int size)2326 void emit_and(Register dst, Immediate src, int size) {
2327 immediate_arithmetic_op(0x4, dst, src, size);
2328 }
2329
emit_and(const Operand & dst,Immediate src,int size)2330 void emit_and(const Operand& dst, Immediate src, int size) {
2331 immediate_arithmetic_op(0x4, dst, src, size);
2332 }
2333
emit_cmp(Register dst,Register src,int size)2334 void emit_cmp(Register dst, Register src, int size) {
2335 arithmetic_op(0x3B, dst, src, size);
2336 }
2337
emit_cmp(Register dst,const Operand & src,int size)2338 void emit_cmp(Register dst, const Operand& src, int size) {
2339 arithmetic_op(0x3B, dst, src, size);
2340 }
2341
emit_cmp(const Operand & dst,Register src,int size)2342 void emit_cmp(const Operand& dst, Register src, int size) {
2343 arithmetic_op(0x39, src, dst, size);
2344 }
2345
emit_cmp(Register dst,Immediate src,int size)2346 void emit_cmp(Register dst, Immediate src, int size) {
2347 immediate_arithmetic_op(0x7, dst, src, size);
2348 }
2349
emit_cmp(const Operand & dst,Immediate src,int size)2350 void emit_cmp(const Operand& dst, Immediate src, int size) {
2351 immediate_arithmetic_op(0x7, dst, src, size);
2352 }
2353
2354 // Compare {al,ax,eax,rax} with src. If equal, set ZF and write dst into
2355 // src. Otherwise clear ZF and write src into {al,ax,eax,rax}. This
2356 // operation is only atomic if prefixed by the lock instruction.
2357 void emit_cmpxchg(const Operand& dst, Register src, int size);
2358
2359 void emit_dec(Register dst, int size);
2360 void emit_dec(const Operand& dst, int size);
2361
2362 // Divide rdx:rax by src. Quotient in rax, remainder in rdx when size is 64.
2363 // Divide edx:eax by lower 32 bits of src. Quotient in eax, remainder in edx
2364 // when size is 32.
2365 void emit_idiv(Register src, int size);
2366 void emit_div(Register src, int size);
2367
2368 // Signed multiply instructions.
2369 // rdx:rax = rax * src when size is 64 or edx:eax = eax * src when size is 32.
2370 void emit_imul(Register src, int size);
2371 void emit_imul(const Operand& src, int size);
2372 void emit_imul(Register dst, Register src, int size);
2373 void emit_imul(Register dst, const Operand& src, int size);
2374 void emit_imul(Register dst, Register src, Immediate imm, int size);
2375 void emit_imul(Register dst, const Operand& src, Immediate imm, int size);
2376
2377 void emit_inc(Register dst, int size);
2378 void emit_inc(const Operand& dst, int size);
2379
2380 void emit_lea(Register dst, const Operand& src, int size);
2381
2382 void emit_mov(Register dst, const Operand& src, int size);
2383 void emit_mov(Register dst, Register src, int size);
2384 void emit_mov(const Operand& dst, Register src, int size);
2385 void emit_mov(Register dst, Immediate value, int size);
2386 void emit_mov(const Operand& dst, Immediate value, int size);
2387
2388 void emit_movzxb(Register dst, const Operand& src, int size);
2389 void emit_movzxb(Register dst, Register src, int size);
2390 void emit_movzxw(Register dst, const Operand& src, int size);
2391 void emit_movzxw(Register dst, Register src, int size);
2392
2393 void emit_neg(Register dst, int size);
2394 void emit_neg(const Operand& dst, int size);
2395
2396 void emit_not(Register dst, int size);
2397 void emit_not(const Operand& dst, int size);
2398
emit_or(Register dst,Register src,int size)2399 void emit_or(Register dst, Register src, int size) {
2400 arithmetic_op(0x0B, dst, src, size);
2401 }
2402
emit_or(Register dst,const Operand & src,int size)2403 void emit_or(Register dst, const Operand& src, int size) {
2404 arithmetic_op(0x0B, dst, src, size);
2405 }
2406
emit_or(const Operand & dst,Register src,int size)2407 void emit_or(const Operand& dst, Register src, int size) {
2408 arithmetic_op(0x9, src, dst, size);
2409 }
2410
emit_or(Register dst,Immediate src,int size)2411 void emit_or(Register dst, Immediate src, int size) {
2412 immediate_arithmetic_op(0x1, dst, src, size);
2413 }
2414
emit_or(const Operand & dst,Immediate src,int size)2415 void emit_or(const Operand& dst, Immediate src, int size) {
2416 immediate_arithmetic_op(0x1, dst, src, size);
2417 }
2418
2419 void emit_repmovs(int size);
2420
emit_sbb(Register dst,Register src,int size)2421 void emit_sbb(Register dst, Register src, int size) {
2422 arithmetic_op(0x1b, dst, src, size);
2423 }
2424
emit_sub(Register dst,Register src,int size)2425 void emit_sub(Register dst, Register src, int size) {
2426 arithmetic_op(0x2B, dst, src, size);
2427 }
2428
emit_sub(Register dst,Immediate src,int size)2429 void emit_sub(Register dst, Immediate src, int size) {
2430 immediate_arithmetic_op(0x5, dst, src, size);
2431 }
2432
emit_sub(Register dst,const Operand & src,int size)2433 void emit_sub(Register dst, const Operand& src, int size) {
2434 arithmetic_op(0x2B, dst, src, size);
2435 }
2436
emit_sub(const Operand & dst,Register src,int size)2437 void emit_sub(const Operand& dst, Register src, int size) {
2438 arithmetic_op(0x29, src, dst, size);
2439 }
2440
emit_sub(const Operand & dst,Immediate src,int size)2441 void emit_sub(const Operand& dst, Immediate src, int size) {
2442 immediate_arithmetic_op(0x5, dst, src, size);
2443 }
2444
2445 void emit_test(Register dst, Register src, int size);
2446 void emit_test(Register reg, Immediate mask, int size);
2447 void emit_test(const Operand& op, Register reg, int size);
2448 void emit_test(const Operand& op, Immediate mask, int size);
emit_test(Register reg,const Operand & op,int size)2449 void emit_test(Register reg, const Operand& op, int size) {
2450 return emit_test(op, reg, size);
2451 }
2452
2453 void emit_xchg(Register dst, Register src, int size);
2454 void emit_xchg(Register dst, const Operand& src, int size);
2455
emit_xor(Register dst,Register src,int size)2456 void emit_xor(Register dst, Register src, int size) {
2457 if (size == kInt64Size && dst.code() == src.code()) {
2458 // 32 bit operations zero the top 32 bits of 64 bit registers. Therefore
2459 // there is no need to make this a 64 bit operation.
2460 arithmetic_op(0x33, dst, src, kInt32Size);
2461 } else {
2462 arithmetic_op(0x33, dst, src, size);
2463 }
2464 }
2465
emit_xor(Register dst,const Operand & src,int size)2466 void emit_xor(Register dst, const Operand& src, int size) {
2467 arithmetic_op(0x33, dst, src, size);
2468 }
2469
emit_xor(Register dst,Immediate src,int size)2470 void emit_xor(Register dst, Immediate src, int size) {
2471 immediate_arithmetic_op(0x6, dst, src, size);
2472 }
2473
emit_xor(const Operand & dst,Immediate src,int size)2474 void emit_xor(const Operand& dst, Immediate src, int size) {
2475 immediate_arithmetic_op(0x6, dst, src, size);
2476 }
2477
emit_xor(const Operand & dst,Register src,int size)2478 void emit_xor(const Operand& dst, Register src, int size) {
2479 arithmetic_op(0x31, src, dst, size);
2480 }
2481
2482 // Most BMI instructions are similiar.
2483 void bmi1q(byte op, Register reg, Register vreg, Register rm);
2484 void bmi1q(byte op, Register reg, Register vreg, const Operand& rm);
2485 void bmi1l(byte op, Register reg, Register vreg, Register rm);
2486 void bmi1l(byte op, Register reg, Register vreg, const Operand& rm);
2487 void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
2488 void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg,
2489 const Operand& rm);
2490 void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
2491 void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg,
2492 const Operand& rm);
2493
2494 friend class CodePatcher;
2495 friend class EnsureSpace;
2496 friend class RegExpMacroAssemblerX64;
2497
2498 // code generation
2499 RelocInfoWriter reloc_info_writer;
2500
2501 // Internal reference positions, required for (potential) patching in
2502 // GrowBuffer(); contains only those internal references whose labels
2503 // are already bound.
2504 std::deque<int> internal_reference_positions_;
2505
2506 List< Handle<Code> > code_targets_;
2507 };
2508
2509
2510 // Helper class that ensures that there is enough space for generating
2511 // instructions and relocation information. The constructor makes
2512 // sure that there is enough space and (in debug mode) the destructor
2513 // checks that we did not generate too much.
2514 class EnsureSpace BASE_EMBEDDED {
2515 public:
EnsureSpace(Assembler * assembler)2516 explicit EnsureSpace(Assembler* assembler) : assembler_(assembler) {
2517 if (assembler_->buffer_overflow()) assembler_->GrowBuffer();
2518 #ifdef DEBUG
2519 space_before_ = assembler_->available_space();
2520 #endif
2521 }
2522
2523 #ifdef DEBUG
~EnsureSpace()2524 ~EnsureSpace() {
2525 int bytes_generated = space_before_ - assembler_->available_space();
2526 DCHECK(bytes_generated < assembler_->kGap);
2527 }
2528 #endif
2529
2530 private:
2531 Assembler* assembler_;
2532 #ifdef DEBUG
2533 int space_before_;
2534 #endif
2535 };
2536
2537 } // namespace internal
2538 } // namespace v8
2539
2540 #endif // V8_X64_ASSEMBLER_X64_H_
2541