1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "assembler_x86_64.h"
18
19 #include "base/casts.h"
20 #include "base/memory_region.h"
21 #include "entrypoints/quick/quick_entrypoints.h"
22 #include "thread.h"
23
24 namespace art {
25 namespace x86_64 {
26
operator <<(std::ostream & os,const CpuRegister & reg)27 std::ostream& operator<<(std::ostream& os, const CpuRegister& reg) {
28 return os << reg.AsRegister();
29 }
30
operator <<(std::ostream & os,const XmmRegister & reg)31 std::ostream& operator<<(std::ostream& os, const XmmRegister& reg) {
32 return os << reg.AsFloatRegister();
33 }
34
operator <<(std::ostream & os,const X87Register & reg)35 std::ostream& operator<<(std::ostream& os, const X87Register& reg) {
36 return os << "ST" << static_cast<int>(reg);
37 }
38
operator <<(std::ostream & os,const Address & addr)39 std::ostream& operator<<(std::ostream& os, const Address& addr) {
40 switch (addr.mod()) {
41 case 0:
42 if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
43 return os << "(%" << addr.cpu_rm() << ")";
44 } else if (addr.base() == RBP) {
45 return os << static_cast<int>(addr.disp32()) << "(,%" << addr.cpu_index()
46 << "," << (1 << addr.scale()) << ")";
47 }
48 return os << "(%" << addr.cpu_base() << ",%"
49 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
50 case 1:
51 if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
52 return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_rm() << ")";
53 }
54 return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_base() << ",%"
55 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
56 case 2:
57 if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
58 return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_rm() << ")";
59 }
60 return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_base() << ",%"
61 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
62 default:
63 return os << "<address?>";
64 }
65 }
66
CpuHasAVXorAVX2FeatureFlag()67 bool X86_64Assembler::CpuHasAVXorAVX2FeatureFlag() {
68 if (has_AVX_ || has_AVX2_) {
69 return true;
70 }
71 return false;
72 }
73
74
call(CpuRegister reg)75 void X86_64Assembler::call(CpuRegister reg) {
76 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
77 EmitOptionalRex32(reg);
78 EmitUint8(0xFF);
79 EmitRegisterOperand(2, reg.LowBits());
80 }
81
82
call(const Address & address)83 void X86_64Assembler::call(const Address& address) {
84 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
85 EmitOptionalRex32(address);
86 EmitUint8(0xFF);
87 EmitOperand(2, address);
88 }
89
90
call(Label * label)91 void X86_64Assembler::call(Label* label) {
92 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
93 EmitUint8(0xE8);
94 static const int kSize = 5;
95 // Offset by one because we already have emitted the opcode.
96 EmitLabel(label, kSize - 1);
97 }
98
pushq(CpuRegister reg)99 void X86_64Assembler::pushq(CpuRegister reg) {
100 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
101 EmitOptionalRex32(reg);
102 EmitUint8(0x50 + reg.LowBits());
103 }
104
105
pushq(const Address & address)106 void X86_64Assembler::pushq(const Address& address) {
107 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
108 EmitOptionalRex32(address);
109 EmitUint8(0xFF);
110 EmitOperand(6, address);
111 }
112
113
pushq(const Immediate & imm)114 void X86_64Assembler::pushq(const Immediate& imm) {
115 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
116 CHECK(imm.is_int32()); // pushq only supports 32b immediate.
117 if (imm.is_int8()) {
118 EmitUint8(0x6A);
119 EmitUint8(imm.value() & 0xFF);
120 } else {
121 EmitUint8(0x68);
122 EmitImmediate(imm);
123 }
124 }
125
126
popq(CpuRegister reg)127 void X86_64Assembler::popq(CpuRegister reg) {
128 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
129 EmitOptionalRex32(reg);
130 EmitUint8(0x58 + reg.LowBits());
131 }
132
133
popq(const Address & address)134 void X86_64Assembler::popq(const Address& address) {
135 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
136 EmitOptionalRex32(address);
137 EmitUint8(0x8F);
138 EmitOperand(0, address);
139 }
140
141
movq(CpuRegister dst,const Immediate & imm)142 void X86_64Assembler::movq(CpuRegister dst, const Immediate& imm) {
143 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
144 if (imm.is_int32()) {
145 // 32 bit. Note: sign-extends.
146 EmitRex64(dst);
147 EmitUint8(0xC7);
148 EmitRegisterOperand(0, dst.LowBits());
149 EmitInt32(static_cast<int32_t>(imm.value()));
150 } else {
151 EmitRex64(dst);
152 EmitUint8(0xB8 + dst.LowBits());
153 EmitInt64(imm.value());
154 }
155 }
156
157
movl(CpuRegister dst,const Immediate & imm)158 void X86_64Assembler::movl(CpuRegister dst, const Immediate& imm) {
159 CHECK(imm.is_int32());
160 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
161 EmitOptionalRex32(dst);
162 EmitUint8(0xB8 + dst.LowBits());
163 EmitImmediate(imm);
164 }
165
166
movq(const Address & dst,const Immediate & imm)167 void X86_64Assembler::movq(const Address& dst, const Immediate& imm) {
168 CHECK(imm.is_int32());
169 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
170 EmitRex64(dst);
171 EmitUint8(0xC7);
172 EmitOperand(0, dst);
173 EmitImmediate(imm);
174 }
175
176
movq(CpuRegister dst,CpuRegister src)177 void X86_64Assembler::movq(CpuRegister dst, CpuRegister src) {
178 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
179 // 0x89 is movq r/m64 <- r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
180 EmitRex64(src, dst);
181 EmitUint8(0x89);
182 EmitRegisterOperand(src.LowBits(), dst.LowBits());
183 }
184
185
movl(CpuRegister dst,CpuRegister src)186 void X86_64Assembler::movl(CpuRegister dst, CpuRegister src) {
187 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
188 EmitOptionalRex32(dst, src);
189 EmitUint8(0x8B);
190 EmitRegisterOperand(dst.LowBits(), src.LowBits());
191 }
192
193
movq(CpuRegister dst,const Address & src)194 void X86_64Assembler::movq(CpuRegister dst, const Address& src) {
195 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
196 EmitRex64(dst, src);
197 EmitUint8(0x8B);
198 EmitOperand(dst.LowBits(), src);
199 }
200
201
movl(CpuRegister dst,const Address & src)202 void X86_64Assembler::movl(CpuRegister dst, const Address& src) {
203 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
204 EmitOptionalRex32(dst, src);
205 EmitUint8(0x8B);
206 EmitOperand(dst.LowBits(), src);
207 }
208
209
movq(const Address & dst,CpuRegister src)210 void X86_64Assembler::movq(const Address& dst, CpuRegister src) {
211 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
212 EmitRex64(src, dst);
213 EmitUint8(0x89);
214 EmitOperand(src.LowBits(), dst);
215 }
216
217
movl(const Address & dst,CpuRegister src)218 void X86_64Assembler::movl(const Address& dst, CpuRegister src) {
219 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
220 EmitOptionalRex32(src, dst);
221 EmitUint8(0x89);
222 EmitOperand(src.LowBits(), dst);
223 }
224
movl(const Address & dst,const Immediate & imm)225 void X86_64Assembler::movl(const Address& dst, const Immediate& imm) {
226 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
227 EmitOptionalRex32(dst);
228 EmitUint8(0xC7);
229 EmitOperand(0, dst);
230 EmitImmediate(imm);
231 }
232
movntl(const Address & dst,CpuRegister src)233 void X86_64Assembler::movntl(const Address& dst, CpuRegister src) {
234 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
235 EmitOptionalRex32(src, dst);
236 EmitUint8(0x0F);
237 EmitUint8(0xC3);
238 EmitOperand(src.LowBits(), dst);
239 }
240
movntq(const Address & dst,CpuRegister src)241 void X86_64Assembler::movntq(const Address& dst, CpuRegister src) {
242 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
243 EmitRex64(src, dst);
244 EmitUint8(0x0F);
245 EmitUint8(0xC3);
246 EmitOperand(src.LowBits(), dst);
247 }
248
cmov(Condition c,CpuRegister dst,CpuRegister src)249 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src) {
250 cmov(c, dst, src, true);
251 }
252
cmov(Condition c,CpuRegister dst,CpuRegister src,bool is64bit)253 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit) {
254 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
255 EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
256 EmitUint8(0x0F);
257 EmitUint8(0x40 + c);
258 EmitRegisterOperand(dst.LowBits(), src.LowBits());
259 }
260
261
cmov(Condition c,CpuRegister dst,const Address & src,bool is64bit)262 void X86_64Assembler::cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit) {
263 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
264 if (is64bit) {
265 EmitRex64(dst, src);
266 } else {
267 EmitOptionalRex32(dst, src);
268 }
269 EmitUint8(0x0F);
270 EmitUint8(0x40 + c);
271 EmitOperand(dst.LowBits(), src);
272 }
273
274
movzxb(CpuRegister dst,CpuRegister src)275 void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) {
276 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
277 EmitOptionalByteRegNormalizingRex32(dst, src);
278 EmitUint8(0x0F);
279 EmitUint8(0xB6);
280 EmitRegisterOperand(dst.LowBits(), src.LowBits());
281 }
282
283
movzxb(CpuRegister dst,const Address & src)284 void X86_64Assembler::movzxb(CpuRegister dst, const Address& src) {
285 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
286 // Byte register is only in the source register form, so we don't use
287 // EmitOptionalByteRegNormalizingRex32(dst, src);
288 EmitOptionalRex32(dst, src);
289 EmitUint8(0x0F);
290 EmitUint8(0xB6);
291 EmitOperand(dst.LowBits(), src);
292 }
293
294
movsxb(CpuRegister dst,CpuRegister src)295 void X86_64Assembler::movsxb(CpuRegister dst, CpuRegister src) {
296 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
297 EmitOptionalByteRegNormalizingRex32(dst, src);
298 EmitUint8(0x0F);
299 EmitUint8(0xBE);
300 EmitRegisterOperand(dst.LowBits(), src.LowBits());
301 }
302
303
movsxb(CpuRegister dst,const Address & src)304 void X86_64Assembler::movsxb(CpuRegister dst, const Address& src) {
305 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
306 // Byte register is only in the source register form, so we don't use
307 // EmitOptionalByteRegNormalizingRex32(dst, src);
308 EmitOptionalRex32(dst, src);
309 EmitUint8(0x0F);
310 EmitUint8(0xBE);
311 EmitOperand(dst.LowBits(), src);
312 }
313
314
movb(CpuRegister,const Address &)315 void X86_64Assembler::movb(CpuRegister /*dst*/, const Address& /*src*/) {
316 LOG(FATAL) << "Use movzxb or movsxb instead.";
317 }
318
319
movb(const Address & dst,CpuRegister src)320 void X86_64Assembler::movb(const Address& dst, CpuRegister src) {
321 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
322 EmitOptionalByteRegNormalizingRex32(src, dst);
323 EmitUint8(0x88);
324 EmitOperand(src.LowBits(), dst);
325 }
326
327
movb(const Address & dst,const Immediate & imm)328 void X86_64Assembler::movb(const Address& dst, const Immediate& imm) {
329 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
330 EmitOptionalRex32(dst);
331 EmitUint8(0xC6);
332 EmitOperand(Register::RAX, dst);
333 CHECK(imm.is_int8());
334 EmitUint8(imm.value() & 0xFF);
335 }
336
337
movzxw(CpuRegister dst,CpuRegister src)338 void X86_64Assembler::movzxw(CpuRegister dst, CpuRegister src) {
339 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
340 EmitOptionalRex32(dst, src);
341 EmitUint8(0x0F);
342 EmitUint8(0xB7);
343 EmitRegisterOperand(dst.LowBits(), src.LowBits());
344 }
345
346
movzxw(CpuRegister dst,const Address & src)347 void X86_64Assembler::movzxw(CpuRegister dst, const Address& src) {
348 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
349 EmitOptionalRex32(dst, src);
350 EmitUint8(0x0F);
351 EmitUint8(0xB7);
352 EmitOperand(dst.LowBits(), src);
353 }
354
355
movsxw(CpuRegister dst,CpuRegister src)356 void X86_64Assembler::movsxw(CpuRegister dst, CpuRegister src) {
357 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
358 EmitOptionalRex32(dst, src);
359 EmitUint8(0x0F);
360 EmitUint8(0xBF);
361 EmitRegisterOperand(dst.LowBits(), src.LowBits());
362 }
363
364
movsxw(CpuRegister dst,const Address & src)365 void X86_64Assembler::movsxw(CpuRegister dst, const Address& src) {
366 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
367 EmitOptionalRex32(dst, src);
368 EmitUint8(0x0F);
369 EmitUint8(0xBF);
370 EmitOperand(dst.LowBits(), src);
371 }
372
373
movw(CpuRegister,const Address &)374 void X86_64Assembler::movw(CpuRegister /*dst*/, const Address& /*src*/) {
375 LOG(FATAL) << "Use movzxw or movsxw instead.";
376 }
377
378
movw(const Address & dst,CpuRegister src)379 void X86_64Assembler::movw(const Address& dst, CpuRegister src) {
380 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
381 EmitOperandSizeOverride();
382 EmitOptionalRex32(src, dst);
383 EmitUint8(0x89);
384 EmitOperand(src.LowBits(), dst);
385 }
386
387
movw(const Address & dst,const Immediate & imm)388 void X86_64Assembler::movw(const Address& dst, const Immediate& imm) {
389 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
390 EmitOperandSizeOverride();
391 EmitOptionalRex32(dst);
392 EmitUint8(0xC7);
393 EmitOperand(Register::RAX, dst);
394 CHECK(imm.is_uint16() || imm.is_int16());
395 EmitUint8(imm.value() & 0xFF);
396 EmitUint8(imm.value() >> 8);
397 }
398
399
leaq(CpuRegister dst,const Address & src)400 void X86_64Assembler::leaq(CpuRegister dst, const Address& src) {
401 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
402 EmitRex64(dst, src);
403 EmitUint8(0x8D);
404 EmitOperand(dst.LowBits(), src);
405 }
406
407
leal(CpuRegister dst,const Address & src)408 void X86_64Assembler::leal(CpuRegister dst, const Address& src) {
409 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
410 EmitOptionalRex32(dst, src);
411 EmitUint8(0x8D);
412 EmitOperand(dst.LowBits(), src);
413 }
414
415
movaps(XmmRegister dst,XmmRegister src)416 void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) {
417 if (CpuHasAVXorAVX2FeatureFlag()) {
418 vmovaps(dst, src);
419 return;
420 }
421 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
422 EmitOptionalRex32(dst, src);
423 EmitUint8(0x0F);
424 EmitUint8(0x28);
425 EmitXmmRegisterOperand(dst.LowBits(), src);
426 }
427
428
429 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, xmm2 */
vmovaps(XmmRegister dst,XmmRegister src)430 void X86_64Assembler::vmovaps(XmmRegister dst, XmmRegister src) {
431 DCHECK(CpuHasAVXorAVX2FeatureFlag());
432 uint8_t byte_zero, byte_one, byte_two;
433 bool is_twobyte_form = true;
434 bool load = dst.NeedsRex();
435 bool store = !load;
436
437 if (src.NeedsRex()&& dst.NeedsRex()) {
438 is_twobyte_form = false;
439 }
440 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
441 // Instruction VEX Prefix
442 byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
443 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
444 if (is_twobyte_form) {
445 bool rex_bit = (load) ? dst.NeedsRex() : src.NeedsRex();
446 byte_one = EmitVexPrefixByteOne(rex_bit,
447 vvvv_reg,
448 SET_VEX_L_128,
449 SET_VEX_PP_NONE);
450 } else {
451 byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
452 /*X=*/ false,
453 src.NeedsRex(),
454 SET_VEX_M_0F);
455 byte_two = EmitVexPrefixByteTwo(/*W=*/ false,
456 SET_VEX_L_128,
457 SET_VEX_PP_NONE);
458 }
459 EmitUint8(byte_zero);
460 EmitUint8(byte_one);
461 if (!is_twobyte_form) {
462 EmitUint8(byte_two);
463 }
464 // Instruction Opcode
465 if (is_twobyte_form && store) {
466 EmitUint8(0x29);
467 } else {
468 EmitUint8(0x28);
469 }
470 // Instruction Operands
471 if (is_twobyte_form && store) {
472 EmitXmmRegisterOperand(src.LowBits(), dst);
473 } else {
474 EmitXmmRegisterOperand(dst.LowBits(), src);
475 }
476 }
477
movaps(XmmRegister dst,const Address & src)478 void X86_64Assembler::movaps(XmmRegister dst, const Address& src) {
479 if (CpuHasAVXorAVX2FeatureFlag()) {
480 vmovaps(dst, src);
481 return;
482 }
483 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
484 EmitOptionalRex32(dst, src);
485 EmitUint8(0x0F);
486 EmitUint8(0x28);
487 EmitOperand(dst.LowBits(), src);
488 }
489
490 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, m128 */
vmovaps(XmmRegister dst,const Address & src)491 void X86_64Assembler::vmovaps(XmmRegister dst, const Address& src) {
492 DCHECK(CpuHasAVXorAVX2FeatureFlag());
493 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
494 uint8_t ByteZero, ByteOne, ByteTwo;
495 bool is_twobyte_form = false;
496 // Instruction VEX Prefix
497 uint8_t rex = src.rex();
498 bool Rex_x = rex & GET_REX_X;
499 bool Rex_b = rex & GET_REX_B;
500 if (!Rex_b && !Rex_x) {
501 is_twobyte_form = true;
502 }
503 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
504 if (is_twobyte_form) {
505 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
506 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
507 vvvv_reg,
508 SET_VEX_L_128,
509 SET_VEX_PP_NONE);
510 } else {
511 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
512 Rex_x,
513 Rex_b,
514 SET_VEX_M_0F);
515 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
516 SET_VEX_L_128,
517 SET_VEX_PP_NONE);
518 }
519 EmitUint8(ByteZero);
520 EmitUint8(ByteOne);
521 if (!is_twobyte_form) {
522 EmitUint8(ByteTwo);
523 }
524 // Instruction Opcode
525 EmitUint8(0x28);
526 // Instruction Operands
527 EmitOperand(dst.LowBits(), src);
528 }
529
movups(XmmRegister dst,const Address & src)530 void X86_64Assembler::movups(XmmRegister dst, const Address& src) {
531 if (CpuHasAVXorAVX2FeatureFlag()) {
532 vmovups(dst, src);
533 return;
534 }
535 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
536 EmitOptionalRex32(dst, src);
537 EmitUint8(0x0F);
538 EmitUint8(0x10);
539 EmitOperand(dst.LowBits(), src);
540 }
541
542 /** VEX.128.0F.WIG 10 /r VMOVUPS xmm1, m128 */
vmovups(XmmRegister dst,const Address & src)543 void X86_64Assembler::vmovups(XmmRegister dst, const Address& src) {
544 DCHECK(CpuHasAVXorAVX2FeatureFlag());
545 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
546 uint8_t ByteZero, ByteOne, ByteTwo;
547 bool is_twobyte_form = false;
548 // Instruction VEX Prefix
549 uint8_t rex = src.rex();
550 bool Rex_x = rex & GET_REX_X;
551 bool Rex_b = rex & GET_REX_B;
552 if (!Rex_x && !Rex_b) {
553 is_twobyte_form = true;
554 }
555 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
556 if (is_twobyte_form) {
557 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
558 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
559 vvvv_reg,
560 SET_VEX_L_128,
561 SET_VEX_PP_NONE);
562 } else {
563 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
564 Rex_x,
565 Rex_b,
566 SET_VEX_M_0F);
567 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
568 SET_VEX_L_128,
569 SET_VEX_PP_NONE);
570 }
571 EmitUint8(ByteZero);
572 EmitUint8(ByteOne);
573 if (!is_twobyte_form) {
574 EmitUint8(ByteTwo);
575 }
576 // Instruction Opcode
577 EmitUint8(0x10);
578 // Instruction Operands
579 EmitOperand(dst.LowBits(), src);
580 }
581
582
movaps(const Address & dst,XmmRegister src)583 void X86_64Assembler::movaps(const Address& dst, XmmRegister src) {
584 if (CpuHasAVXorAVX2FeatureFlag()) {
585 vmovaps(dst, src);
586 return;
587 }
588 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
589 EmitOptionalRex32(src, dst);
590 EmitUint8(0x0F);
591 EmitUint8(0x29);
592 EmitOperand(src.LowBits(), dst);
593 }
594
595 /** VEX.128.0F.WIG 29 /r VMOVAPS m128, xmm1 */
vmovaps(const Address & dst,XmmRegister src)596 void X86_64Assembler::vmovaps(const Address& dst, XmmRegister src) {
597 DCHECK(CpuHasAVXorAVX2FeatureFlag());
598 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
599 uint8_t ByteZero, ByteOne, ByteTwo;
600 bool is_twobyte_form = false;
601
602 // Instruction VEX Prefix
603 uint8_t rex = dst.rex();
604 bool Rex_x = rex & GET_REX_X;
605 bool Rex_b = rex & GET_REX_B;
606 if (!Rex_b && !Rex_x) {
607 is_twobyte_form = true;
608 }
609 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
610 if (is_twobyte_form) {
611 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
612 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
613 vvvv_reg,
614 SET_VEX_L_128,
615 SET_VEX_PP_NONE);
616 } else {
617 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
618 Rex_x,
619 Rex_b,
620 SET_VEX_M_0F);
621 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
622 SET_VEX_L_128,
623 SET_VEX_PP_NONE);
624 }
625 EmitUint8(ByteZero);
626 EmitUint8(ByteOne);
627 if (!is_twobyte_form) {
628 EmitUint8(ByteTwo);
629 }
630 // Instruction Opcode
631 EmitUint8(0x29);
632 // Instruction Operands
633 EmitOperand(src.LowBits(), dst);
634 }
635
movups(const Address & dst,XmmRegister src)636 void X86_64Assembler::movups(const Address& dst, XmmRegister src) {
637 if (CpuHasAVXorAVX2FeatureFlag()) {
638 vmovups(dst, src);
639 return;
640 }
641 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
642 EmitOptionalRex32(src, dst);
643 EmitUint8(0x0F);
644 EmitUint8(0x11);
645 EmitOperand(src.LowBits(), dst);
646 }
647
648 /** VEX.128.0F.WIG 11 /r VMOVUPS m128, xmm1 */
vmovups(const Address & dst,XmmRegister src)649 void X86_64Assembler::vmovups(const Address& dst, XmmRegister src) {
650 DCHECK(CpuHasAVXorAVX2FeatureFlag());
651 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
652 uint8_t ByteZero, ByteOne, ByteTwo;
653 bool is_twobyte_form = false;
654
655 // Instruction VEX Prefix
656 uint8_t rex = dst.rex();
657 bool Rex_x = rex & GET_REX_X;
658 bool Rex_b = rex & GET_REX_B;
659 if (!Rex_b && !Rex_x) {
660 is_twobyte_form = true;
661 }
662 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
663 if (is_twobyte_form) {
664 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
665 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
666 vvvv_reg,
667 SET_VEX_L_128,
668 SET_VEX_PP_NONE);
669 } else {
670 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
671 Rex_x,
672 Rex_b,
673 SET_VEX_M_0F);
674 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
675 SET_VEX_L_128,
676 SET_VEX_PP_NONE);
677 }
678 EmitUint8(ByteZero);
679 EmitUint8(ByteOne);
680 if (!is_twobyte_form) {
681 EmitUint8(ByteTwo);
682 }
683 // Instruction Opcode
684 EmitUint8(0x11);
685 // Instruction Operands
686 EmitOperand(src.LowBits(), dst);
687 }
688
689
movss(XmmRegister dst,const Address & src)690 void X86_64Assembler::movss(XmmRegister dst, const Address& src) {
691 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
692 EmitUint8(0xF3);
693 EmitOptionalRex32(dst, src);
694 EmitUint8(0x0F);
695 EmitUint8(0x10);
696 EmitOperand(dst.LowBits(), src);
697 }
698
699
movss(const Address & dst,XmmRegister src)700 void X86_64Assembler::movss(const Address& dst, XmmRegister src) {
701 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
702 EmitUint8(0xF3);
703 EmitOptionalRex32(src, dst);
704 EmitUint8(0x0F);
705 EmitUint8(0x11);
706 EmitOperand(src.LowBits(), dst);
707 }
708
709
movss(XmmRegister dst,XmmRegister src)710 void X86_64Assembler::movss(XmmRegister dst, XmmRegister src) {
711 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
712 EmitUint8(0xF3);
713 EmitOptionalRex32(src, dst); // Movss is MR encoding instead of the usual RM.
714 EmitUint8(0x0F);
715 EmitUint8(0x11);
716 EmitXmmRegisterOperand(src.LowBits(), dst);
717 }
718
719
movsxd(CpuRegister dst,CpuRegister src)720 void X86_64Assembler::movsxd(CpuRegister dst, CpuRegister src) {
721 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
722 EmitRex64(dst, src);
723 EmitUint8(0x63);
724 EmitRegisterOperand(dst.LowBits(), src.LowBits());
725 }
726
727
movsxd(CpuRegister dst,const Address & src)728 void X86_64Assembler::movsxd(CpuRegister dst, const Address& src) {
729 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
730 EmitRex64(dst, src);
731 EmitUint8(0x63);
732 EmitOperand(dst.LowBits(), src);
733 }
734
735
movd(XmmRegister dst,CpuRegister src)736 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) {
737 movd(dst, src, true);
738 }
739
movd(CpuRegister dst,XmmRegister src)740 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) {
741 movd(dst, src, true);
742 }
743
movd(XmmRegister dst,CpuRegister src,bool is64bit)744 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src, bool is64bit) {
745 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
746 EmitUint8(0x66);
747 EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
748 EmitUint8(0x0F);
749 EmitUint8(0x6E);
750 EmitOperand(dst.LowBits(), Operand(src));
751 }
752
movd(CpuRegister dst,XmmRegister src,bool is64bit)753 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src, bool is64bit) {
754 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
755 EmitUint8(0x66);
756 EmitOptionalRex(false, is64bit, src.NeedsRex(), false, dst.NeedsRex());
757 EmitUint8(0x0F);
758 EmitUint8(0x7E);
759 EmitOperand(src.LowBits(), Operand(dst));
760 }
761
addss(XmmRegister dst,XmmRegister src)762 void X86_64Assembler::addss(XmmRegister dst, XmmRegister src) {
763 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
764 EmitUint8(0xF3);
765 EmitOptionalRex32(dst, src);
766 EmitUint8(0x0F);
767 EmitUint8(0x58);
768 EmitXmmRegisterOperand(dst.LowBits(), src);
769 }
770
addss(XmmRegister dst,const Address & src)771 void X86_64Assembler::addss(XmmRegister dst, const Address& src) {
772 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
773 EmitUint8(0xF3);
774 EmitOptionalRex32(dst, src);
775 EmitUint8(0x0F);
776 EmitUint8(0x58);
777 EmitOperand(dst.LowBits(), src);
778 }
779
780
subss(XmmRegister dst,XmmRegister src)781 void X86_64Assembler::subss(XmmRegister dst, XmmRegister src) {
782 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
783 EmitUint8(0xF3);
784 EmitOptionalRex32(dst, src);
785 EmitUint8(0x0F);
786 EmitUint8(0x5C);
787 EmitXmmRegisterOperand(dst.LowBits(), src);
788 }
789
790
subss(XmmRegister dst,const Address & src)791 void X86_64Assembler::subss(XmmRegister dst, const Address& src) {
792 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
793 EmitUint8(0xF3);
794 EmitOptionalRex32(dst, src);
795 EmitUint8(0x0F);
796 EmitUint8(0x5C);
797 EmitOperand(dst.LowBits(), src);
798 }
799
800
mulss(XmmRegister dst,XmmRegister src)801 void X86_64Assembler::mulss(XmmRegister dst, XmmRegister src) {
802 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
803 EmitUint8(0xF3);
804 EmitOptionalRex32(dst, src);
805 EmitUint8(0x0F);
806 EmitUint8(0x59);
807 EmitXmmRegisterOperand(dst.LowBits(), src);
808 }
809
810
mulss(XmmRegister dst,const Address & src)811 void X86_64Assembler::mulss(XmmRegister dst, const Address& src) {
812 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
813 EmitUint8(0xF3);
814 EmitOptionalRex32(dst, src);
815 EmitUint8(0x0F);
816 EmitUint8(0x59);
817 EmitOperand(dst.LowBits(), src);
818 }
819
820
divss(XmmRegister dst,XmmRegister src)821 void X86_64Assembler::divss(XmmRegister dst, XmmRegister src) {
822 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
823 EmitUint8(0xF3);
824 EmitOptionalRex32(dst, src);
825 EmitUint8(0x0F);
826 EmitUint8(0x5E);
827 EmitXmmRegisterOperand(dst.LowBits(), src);
828 }
829
830
divss(XmmRegister dst,const Address & src)831 void X86_64Assembler::divss(XmmRegister dst, const Address& src) {
832 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
833 EmitUint8(0xF3);
834 EmitOptionalRex32(dst, src);
835 EmitUint8(0x0F);
836 EmitUint8(0x5E);
837 EmitOperand(dst.LowBits(), src);
838 }
839
840
addps(XmmRegister dst,XmmRegister src)841 void X86_64Assembler::addps(XmmRegister dst, XmmRegister src) {
842 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
843 EmitOptionalRex32(dst, src);
844 EmitUint8(0x0F);
845 EmitUint8(0x58);
846 EmitXmmRegisterOperand(dst.LowBits(), src);
847 }
848
849
subps(XmmRegister dst,XmmRegister src)850 void X86_64Assembler::subps(XmmRegister dst, XmmRegister src) {
851 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
852 EmitOptionalRex32(dst, src);
853 EmitUint8(0x0F);
854 EmitUint8(0x5C);
855 EmitXmmRegisterOperand(dst.LowBits(), src);
856 }
857
vaddps(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)858 void X86_64Assembler::vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
859 DCHECK(CpuHasAVXorAVX2FeatureFlag());
860 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
861 bool is_twobyte_form = false;
862 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
863 if (!add_right.NeedsRex()) {
864 is_twobyte_form = true;
865 }
866 X86_64ManagedRegister vvvv_reg =
867 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
868 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
869 if (is_twobyte_form) {
870 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
871 } else {
872 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
873 /*X=*/ false,
874 add_right.NeedsRex(),
875 SET_VEX_M_0F);
876 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
877 }
878 EmitUint8(ByteZero);
879 EmitUint8(ByteOne);
880 if (!is_twobyte_form) {
881 EmitUint8(ByteTwo);
882 }
883 EmitUint8(0x58);
884 EmitXmmRegisterOperand(dst.LowBits(), add_right);
885 }
886
vsubps(XmmRegister dst,XmmRegister src1,XmmRegister src2)887 void X86_64Assembler::vsubps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
888 DCHECK(CpuHasAVXorAVX2FeatureFlag());
889 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
890 bool is_twobyte_form = false;
891 uint8_t byte_zero = 0x00, byte_one = 0x00, byte_two = 0x00;
892 if (!src2.NeedsRex()) {
893 is_twobyte_form = true;
894 }
895 byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
896 X86_64ManagedRegister vvvv_reg = X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
897 if (is_twobyte_form) {
898 byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
899 } else {
900 byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), /*X=*/ false, src2.NeedsRex(), SET_VEX_M_0F);
901 byte_two = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
902 }
903 EmitUint8(byte_zero);
904 EmitUint8(byte_one);
905 if (!is_twobyte_form) {
906 EmitUint8(byte_two);
907 }
908 EmitUint8(0x5C);
909 EmitXmmRegisterOperand(dst.LowBits(), src2);
910 }
911
912
mulps(XmmRegister dst,XmmRegister src)913 void X86_64Assembler::mulps(XmmRegister dst, XmmRegister src) {
914 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
915 EmitOptionalRex32(dst, src);
916 EmitUint8(0x0F);
917 EmitUint8(0x59);
918 EmitXmmRegisterOperand(dst.LowBits(), src);
919 }
920
vmulps(XmmRegister dst,XmmRegister src1,XmmRegister src2)921 void X86_64Assembler::vmulps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
922 DCHECK(CpuHasAVXorAVX2FeatureFlag());
923 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
924 bool is_twobyte_form = false;
925 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
926 if (!src2.NeedsRex()) {
927 is_twobyte_form = true;
928 }
929 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
930 X86_64ManagedRegister vvvv_reg =
931 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
932 if (is_twobyte_form) {
933 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
934 } else {
935 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
936 /*X=*/ false,
937 src2.NeedsRex(),
938 SET_VEX_M_0F);
939 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
940 }
941 EmitUint8(ByteZero);
942 EmitUint8(ByteOne);
943 if (!is_twobyte_form) {
944 EmitUint8(ByteTwo);
945 }
946 EmitUint8(0x59);
947 EmitXmmRegisterOperand(dst.LowBits(), src2);
948 }
949
divps(XmmRegister dst,XmmRegister src)950 void X86_64Assembler::divps(XmmRegister dst, XmmRegister src) {
951 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
952 EmitOptionalRex32(dst, src);
953 EmitUint8(0x0F);
954 EmitUint8(0x5E);
955 EmitXmmRegisterOperand(dst.LowBits(), src);
956 }
957
vdivps(XmmRegister dst,XmmRegister src1,XmmRegister src2)958 void X86_64Assembler::vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
959 DCHECK(CpuHasAVXorAVX2FeatureFlag());
960 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
961 bool is_twobyte_form = false;
962 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
963 if (!src2.NeedsRex()) {
964 is_twobyte_form = true;
965 }
966 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
967 X86_64ManagedRegister vvvv_reg =
968 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
969 if (is_twobyte_form) {
970 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
971 } else {
972 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
973 /*X=*/ false,
974 src2.NeedsRex(),
975 SET_VEX_M_0F);
976 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
977 }
978 EmitUint8(ByteZero);
979 EmitUint8(ByteOne);
980 if (!is_twobyte_form) {
981 EmitUint8(ByteTwo);
982 }
983 EmitUint8(0x5E);
984 EmitXmmRegisterOperand(dst.LowBits(), src2);
985 }
986
flds(const Address & src)987 void X86_64Assembler::flds(const Address& src) {
988 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
989 EmitUint8(0xD9);
990 EmitOperand(0, src);
991 }
992
993
fsts(const Address & dst)994 void X86_64Assembler::fsts(const Address& dst) {
995 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
996 EmitUint8(0xD9);
997 EmitOperand(2, dst);
998 }
999
1000
fstps(const Address & dst)1001 void X86_64Assembler::fstps(const Address& dst) {
1002 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1003 EmitUint8(0xD9);
1004 EmitOperand(3, dst);
1005 }
1006
1007
movapd(XmmRegister dst,XmmRegister src)1008 void X86_64Assembler::movapd(XmmRegister dst, XmmRegister src) {
1009 if (CpuHasAVXorAVX2FeatureFlag()) {
1010 vmovapd(dst, src);
1011 return;
1012 }
1013 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1014 EmitUint8(0x66);
1015 EmitOptionalRex32(dst, src);
1016 EmitUint8(0x0F);
1017 EmitUint8(0x28);
1018 EmitXmmRegisterOperand(dst.LowBits(), src);
1019 }
1020
1021 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, xmm2 */
vmovapd(XmmRegister dst,XmmRegister src)1022 void X86_64Assembler::vmovapd(XmmRegister dst, XmmRegister src) {
1023 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1024 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1025 uint8_t ByteZero, ByteOne, ByteTwo;
1026 bool is_twobyte_form = true;
1027
1028 if (src.NeedsRex() && dst.NeedsRex()) {
1029 is_twobyte_form = false;
1030 }
1031 // Instruction VEX Prefix
1032 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1033 bool load = dst.NeedsRex();
1034 if (is_twobyte_form) {
1035 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1036 bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
1037 ByteOne = EmitVexPrefixByteOne(rex_bit,
1038 vvvv_reg,
1039 SET_VEX_L_128,
1040 SET_VEX_PP_66);
1041 } else {
1042 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1043 /*X=*/ false,
1044 src.NeedsRex(),
1045 SET_VEX_M_0F);
1046 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1047 SET_VEX_L_128,
1048 SET_VEX_PP_66);
1049 }
1050 EmitUint8(ByteZero);
1051 EmitUint8(ByteOne);
1052 if (!is_twobyte_form) {
1053 EmitUint8(ByteTwo);
1054 }
1055 // Instruction Opcode
1056 if (is_twobyte_form && !load) {
1057 EmitUint8(0x29);
1058 } else {
1059 EmitUint8(0x28);
1060 }
1061 // Instruction Operands
1062 if (is_twobyte_form && !load) {
1063 EmitXmmRegisterOperand(src.LowBits(), dst);
1064 } else {
1065 EmitXmmRegisterOperand(dst.LowBits(), src);
1066 }
1067 }
1068
movapd(XmmRegister dst,const Address & src)1069 void X86_64Assembler::movapd(XmmRegister dst, const Address& src) {
1070 if (CpuHasAVXorAVX2FeatureFlag()) {
1071 vmovapd(dst, src);
1072 return;
1073 }
1074 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1075 EmitUint8(0x66);
1076 EmitOptionalRex32(dst, src);
1077 EmitUint8(0x0F);
1078 EmitUint8(0x28);
1079 EmitOperand(dst.LowBits(), src);
1080 }
1081
1082 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, m128 */
vmovapd(XmmRegister dst,const Address & src)1083 void X86_64Assembler::vmovapd(XmmRegister dst, const Address& src) {
1084 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1085 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1086 uint8_t ByteZero, ByteOne, ByteTwo;
1087 bool is_twobyte_form = false;
1088
1089 // Instruction VEX Prefix
1090 uint8_t rex = src.rex();
1091 bool Rex_x = rex & GET_REX_X;
1092 bool Rex_b = rex & GET_REX_B;
1093 if (!Rex_b && !Rex_x) {
1094 is_twobyte_form = true;
1095 }
1096 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1097 if (is_twobyte_form) {
1098 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1099 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1100 vvvv_reg,
1101 SET_VEX_L_128,
1102 SET_VEX_PP_66);
1103 } else {
1104 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1105 Rex_x,
1106 Rex_b,
1107 SET_VEX_M_0F);
1108 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1109 SET_VEX_L_128,
1110 SET_VEX_PP_66);
1111 }
1112 EmitUint8(ByteZero);
1113 EmitUint8(ByteOne);
1114 if (!is_twobyte_form) {
1115 EmitUint8(ByteTwo);
1116 }
1117 // Instruction Opcode
1118 EmitUint8(0x28);
1119 // Instruction Operands
1120 EmitOperand(dst.LowBits(), src);
1121 }
1122
movupd(XmmRegister dst,const Address & src)1123 void X86_64Assembler::movupd(XmmRegister dst, const Address& src) {
1124 if (CpuHasAVXorAVX2FeatureFlag()) {
1125 vmovupd(dst, src);
1126 return;
1127 }
1128 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1129 EmitUint8(0x66);
1130 EmitOptionalRex32(dst, src);
1131 EmitUint8(0x0F);
1132 EmitUint8(0x10);
1133 EmitOperand(dst.LowBits(), src);
1134 }
1135
1136 /** VEX.128.66.0F.WIG 10 /r VMOVUPD xmm1, m128 */
vmovupd(XmmRegister dst,const Address & src)1137 void X86_64Assembler::vmovupd(XmmRegister dst, const Address& src) {
1138 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1139 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1140 bool is_twobyte_form = false;
1141 uint8_t ByteZero, ByteOne, ByteTwo;
1142
1143 // Instruction VEX Prefix
1144 uint8_t rex = src.rex();
1145 bool Rex_x = rex & GET_REX_X;
1146 bool Rex_b = rex & GET_REX_B;
1147 if (!Rex_b && !Rex_x) {
1148 is_twobyte_form = true;
1149 }
1150 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1151 if (is_twobyte_form) {
1152 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1153 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1154 vvvv_reg,
1155 SET_VEX_L_128,
1156 SET_VEX_PP_66);
1157 } else {
1158 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1159 Rex_x,
1160 Rex_b,
1161 SET_VEX_M_0F);
1162 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1163 SET_VEX_L_128,
1164 SET_VEX_PP_66);
1165 }
1166 EmitUint8(ByteZero);
1167 EmitUint8(ByteOne);
1168 if (!is_twobyte_form)
1169 EmitUint8(ByteTwo);
1170 // Instruction Opcode
1171 EmitUint8(0x10);
1172 // Instruction Operands
1173 EmitOperand(dst.LowBits(), src);
1174 }
1175
movapd(const Address & dst,XmmRegister src)1176 void X86_64Assembler::movapd(const Address& dst, XmmRegister src) {
1177 if (CpuHasAVXorAVX2FeatureFlag()) {
1178 vmovapd(dst, src);
1179 return;
1180 }
1181 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1182 EmitUint8(0x66);
1183 EmitOptionalRex32(src, dst);
1184 EmitUint8(0x0F);
1185 EmitUint8(0x29);
1186 EmitOperand(src.LowBits(), dst);
1187 }
1188
1189 /** VEX.128.66.0F.WIG 29 /r VMOVAPD m128, xmm1 */
vmovapd(const Address & dst,XmmRegister src)1190 void X86_64Assembler::vmovapd(const Address& dst, XmmRegister src) {
1191 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1192 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1193 bool is_twobyte_form = false;
1194 uint8_t ByteZero, ByteOne, ByteTwo;
1195 // Instruction VEX Prefix
1196 uint8_t rex = dst.rex();
1197 bool Rex_x = rex & GET_REX_X;
1198 bool Rex_b = rex & GET_REX_B;
1199 if (!Rex_x && !Rex_b) {
1200 is_twobyte_form = true;
1201 }
1202 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1203 if (is_twobyte_form) {
1204 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1205 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1206 vvvv_reg,
1207 SET_VEX_L_128,
1208 SET_VEX_PP_66);
1209 } else {
1210 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1211 Rex_x,
1212 Rex_b,
1213 SET_VEX_M_0F);
1214 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1215 SET_VEX_L_128,
1216 SET_VEX_PP_66);
1217 }
1218 EmitUint8(ByteZero);
1219 EmitUint8(ByteOne);
1220 if (!is_twobyte_form) {
1221 EmitUint8(ByteTwo);
1222 }
1223 // Instruction Opcode
1224 EmitUint8(0x29);
1225 // Instruction Operands
1226 EmitOperand(src.LowBits(), dst);
1227 }
1228
movupd(const Address & dst,XmmRegister src)1229 void X86_64Assembler::movupd(const Address& dst, XmmRegister src) {
1230 if (CpuHasAVXorAVX2FeatureFlag()) {
1231 vmovupd(dst, src);
1232 return;
1233 }
1234 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1235 EmitUint8(0x66);
1236 EmitOptionalRex32(src, dst);
1237 EmitUint8(0x0F);
1238 EmitUint8(0x11);
1239 EmitOperand(src.LowBits(), dst);
1240 }
1241
1242 /** VEX.128.66.0F.WIG 11 /r VMOVUPD m128, xmm1 */
vmovupd(const Address & dst,XmmRegister src)1243 void X86_64Assembler::vmovupd(const Address& dst, XmmRegister src) {
1244 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1245 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1246 bool is_twobyte_form = false;
1247 uint8_t ByteZero, ByteOne, ByteTwo;
1248
1249 // Instruction VEX Prefix
1250 uint8_t rex = dst.rex();
1251 bool Rex_x = rex & GET_REX_X;
1252 bool Rex_b = rex & GET_REX_B;
1253 if (!Rex_x && !Rex_b) {
1254 is_twobyte_form = true;
1255 }
1256 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1257 if (is_twobyte_form) {
1258 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1259 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1260 vvvv_reg,
1261 SET_VEX_L_128,
1262 SET_VEX_PP_66);
1263 } else {
1264 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1265 Rex_x,
1266 Rex_b,
1267 SET_VEX_M_0F);
1268 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1269 SET_VEX_L_128,
1270 SET_VEX_PP_66);
1271 }
1272 EmitUint8(ByteZero);
1273 EmitUint8(ByteOne);
1274 if (!is_twobyte_form) {
1275 EmitUint8(ByteTwo);
1276 }
1277 // Instruction Opcode
1278 EmitUint8(0x11);
1279 // Instruction Operands
1280 EmitOperand(src.LowBits(), dst);
1281 }
1282
1283
movsd(XmmRegister dst,const Address & src)1284 void X86_64Assembler::movsd(XmmRegister dst, const Address& src) {
1285 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1286 EmitUint8(0xF2);
1287 EmitOptionalRex32(dst, src);
1288 EmitUint8(0x0F);
1289 EmitUint8(0x10);
1290 EmitOperand(dst.LowBits(), src);
1291 }
1292
1293
movsd(const Address & dst,XmmRegister src)1294 void X86_64Assembler::movsd(const Address& dst, XmmRegister src) {
1295 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1296 EmitUint8(0xF2);
1297 EmitOptionalRex32(src, dst);
1298 EmitUint8(0x0F);
1299 EmitUint8(0x11);
1300 EmitOperand(src.LowBits(), dst);
1301 }
1302
1303
movsd(XmmRegister dst,XmmRegister src)1304 void X86_64Assembler::movsd(XmmRegister dst, XmmRegister src) {
1305 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1306 EmitUint8(0xF2);
1307 EmitOptionalRex32(src, dst); // Movsd is MR encoding instead of the usual RM.
1308 EmitUint8(0x0F);
1309 EmitUint8(0x11);
1310 EmitXmmRegisterOperand(src.LowBits(), dst);
1311 }
1312
1313
addsd(XmmRegister dst,XmmRegister src)1314 void X86_64Assembler::addsd(XmmRegister dst, XmmRegister src) {
1315 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1316 EmitUint8(0xF2);
1317 EmitOptionalRex32(dst, src);
1318 EmitUint8(0x0F);
1319 EmitUint8(0x58);
1320 EmitXmmRegisterOperand(dst.LowBits(), src);
1321 }
1322
1323
addsd(XmmRegister dst,const Address & src)1324 void X86_64Assembler::addsd(XmmRegister dst, const Address& src) {
1325 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1326 EmitUint8(0xF2);
1327 EmitOptionalRex32(dst, src);
1328 EmitUint8(0x0F);
1329 EmitUint8(0x58);
1330 EmitOperand(dst.LowBits(), src);
1331 }
1332
1333
subsd(XmmRegister dst,XmmRegister src)1334 void X86_64Assembler::subsd(XmmRegister dst, XmmRegister src) {
1335 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1336 EmitUint8(0xF2);
1337 EmitOptionalRex32(dst, src);
1338 EmitUint8(0x0F);
1339 EmitUint8(0x5C);
1340 EmitXmmRegisterOperand(dst.LowBits(), src);
1341 }
1342
1343
subsd(XmmRegister dst,const Address & src)1344 void X86_64Assembler::subsd(XmmRegister dst, const Address& src) {
1345 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1346 EmitUint8(0xF2);
1347 EmitOptionalRex32(dst, src);
1348 EmitUint8(0x0F);
1349 EmitUint8(0x5C);
1350 EmitOperand(dst.LowBits(), src);
1351 }
1352
1353
mulsd(XmmRegister dst,XmmRegister src)1354 void X86_64Assembler::mulsd(XmmRegister dst, XmmRegister src) {
1355 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1356 EmitUint8(0xF2);
1357 EmitOptionalRex32(dst, src);
1358 EmitUint8(0x0F);
1359 EmitUint8(0x59);
1360 EmitXmmRegisterOperand(dst.LowBits(), src);
1361 }
1362
1363
mulsd(XmmRegister dst,const Address & src)1364 void X86_64Assembler::mulsd(XmmRegister dst, const Address& src) {
1365 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1366 EmitUint8(0xF2);
1367 EmitOptionalRex32(dst, src);
1368 EmitUint8(0x0F);
1369 EmitUint8(0x59);
1370 EmitOperand(dst.LowBits(), src);
1371 }
1372
1373
divsd(XmmRegister dst,XmmRegister src)1374 void X86_64Assembler::divsd(XmmRegister dst, XmmRegister src) {
1375 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1376 EmitUint8(0xF2);
1377 EmitOptionalRex32(dst, src);
1378 EmitUint8(0x0F);
1379 EmitUint8(0x5E);
1380 EmitXmmRegisterOperand(dst.LowBits(), src);
1381 }
1382
1383
divsd(XmmRegister dst,const Address & src)1384 void X86_64Assembler::divsd(XmmRegister dst, const Address& src) {
1385 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1386 EmitUint8(0xF2);
1387 EmitOptionalRex32(dst, src);
1388 EmitUint8(0x0F);
1389 EmitUint8(0x5E);
1390 EmitOperand(dst.LowBits(), src);
1391 }
1392
1393
addpd(XmmRegister dst,XmmRegister src)1394 void X86_64Assembler::addpd(XmmRegister dst, XmmRegister src) {
1395 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1396 EmitUint8(0x66);
1397 EmitOptionalRex32(dst, src);
1398 EmitUint8(0x0F);
1399 EmitUint8(0x58);
1400 EmitXmmRegisterOperand(dst.LowBits(), src);
1401 }
1402
1403
vaddpd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1404 void X86_64Assembler::vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1405 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1406 bool is_twobyte_form = false;
1407 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1408 if (!add_right.NeedsRex()) {
1409 is_twobyte_form = true;
1410 }
1411 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1412 X86_64ManagedRegister vvvv_reg =
1413 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1414 if (is_twobyte_form) {
1415 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1416 } else {
1417 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1418 /*X=*/ false,
1419 add_right.NeedsRex(),
1420 SET_VEX_M_0F);
1421 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1422 }
1423 EmitUint8(ByteZero);
1424 EmitUint8(ByteOne);
1425 if (!is_twobyte_form) {
1426 EmitUint8(ByteTwo);
1427 }
1428 EmitUint8(0x58);
1429 EmitXmmRegisterOperand(dst.LowBits(), add_right);
1430 }
1431
1432
subpd(XmmRegister dst,XmmRegister src)1433 void X86_64Assembler::subpd(XmmRegister dst, XmmRegister src) {
1434 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1435 EmitUint8(0x66);
1436 EmitOptionalRex32(dst, src);
1437 EmitUint8(0x0F);
1438 EmitUint8(0x5C);
1439 EmitXmmRegisterOperand(dst.LowBits(), src);
1440 }
1441
1442
vsubpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1443 void X86_64Assembler::vsubpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1444 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1445 bool is_twobyte_form = false;
1446 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1447 if (!src2.NeedsRex()) {
1448 is_twobyte_form = true;
1449 }
1450 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1451 X86_64ManagedRegister vvvv_reg =
1452 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1453 if (is_twobyte_form) {
1454 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1455 } else {
1456 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1457 /*X=*/ false,
1458 src2.NeedsRex(),
1459 SET_VEX_M_0F);
1460 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1461 }
1462 EmitUint8(ByteZero);
1463 EmitUint8(ByteOne);
1464 if (!is_twobyte_form) {
1465 EmitUint8(ByteTwo);
1466 }
1467 EmitUint8(0x5C);
1468 EmitXmmRegisterOperand(dst.LowBits(), src2);
1469 }
1470
1471
mulpd(XmmRegister dst,XmmRegister src)1472 void X86_64Assembler::mulpd(XmmRegister dst, XmmRegister src) {
1473 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1474 EmitUint8(0x66);
1475 EmitOptionalRex32(dst, src);
1476 EmitUint8(0x0F);
1477 EmitUint8(0x59);
1478 EmitXmmRegisterOperand(dst.LowBits(), src);
1479 }
1480
vmulpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1481 void X86_64Assembler::vmulpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1482 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1483 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1484 bool is_twobyte_form = false;
1485 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1486 if (!src2.NeedsRex()) {
1487 is_twobyte_form = true;
1488 }
1489 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1490 X86_64ManagedRegister vvvv_reg =
1491 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1492 if (is_twobyte_form) {
1493 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1494 } else {
1495 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1496 /*X=*/ false,
1497 src2.NeedsRex(),
1498 SET_VEX_M_0F);
1499 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1500 }
1501 EmitUint8(ByteZero);
1502 EmitUint8(ByteOne);
1503 if (!is_twobyte_form) {
1504 EmitUint8(ByteTwo);
1505 }
1506 EmitUint8(0x59);
1507 EmitXmmRegisterOperand(dst.LowBits(), src2);
1508 }
1509
divpd(XmmRegister dst,XmmRegister src)1510 void X86_64Assembler::divpd(XmmRegister dst, XmmRegister src) {
1511 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1512 EmitUint8(0x66);
1513 EmitOptionalRex32(dst, src);
1514 EmitUint8(0x0F);
1515 EmitUint8(0x5E);
1516 EmitXmmRegisterOperand(dst.LowBits(), src);
1517 }
1518
1519
vdivpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1520 void X86_64Assembler::vdivpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1521 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1522 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1523 bool is_twobyte_form = false;
1524 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1525 if (!src2.NeedsRex()) {
1526 is_twobyte_form = true;
1527 }
1528 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1529 X86_64ManagedRegister vvvv_reg =
1530 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1531 if (is_twobyte_form) {
1532 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1533 } else {
1534 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1535 /*X=*/ false,
1536 src2.NeedsRex(),
1537 SET_VEX_M_0F);
1538 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1539 }
1540 EmitUint8(ByteZero);
1541 EmitUint8(ByteOne);
1542 if (!is_twobyte_form) {
1543 EmitUint8(ByteTwo);
1544 }
1545 EmitUint8(0x5E);
1546 EmitXmmRegisterOperand(dst.LowBits(), src2);
1547 }
1548
1549
movdqa(XmmRegister dst,XmmRegister src)1550 void X86_64Assembler::movdqa(XmmRegister dst, XmmRegister src) {
1551 if (CpuHasAVXorAVX2FeatureFlag()) {
1552 vmovdqa(dst, src);
1553 return;
1554 }
1555 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1556 EmitUint8(0x66);
1557 EmitOptionalRex32(dst, src);
1558 EmitUint8(0x0F);
1559 EmitUint8(0x6F);
1560 EmitXmmRegisterOperand(dst.LowBits(), src);
1561 }
1562
1563 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, xmm2 */
vmovdqa(XmmRegister dst,XmmRegister src)1564 void X86_64Assembler::vmovdqa(XmmRegister dst, XmmRegister src) {
1565 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1566 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1567 uint8_t ByteZero, ByteOne, ByteTwo;
1568 bool is_twobyte_form = true;
1569
1570 // Instruction VEX Prefix
1571 if (src.NeedsRex() && dst.NeedsRex()) {
1572 is_twobyte_form = false;
1573 }
1574 bool load = dst.NeedsRex();
1575 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1576 if (is_twobyte_form) {
1577 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1578 bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
1579 ByteOne = EmitVexPrefixByteOne(rex_bit,
1580 vvvv_reg,
1581 SET_VEX_L_128,
1582 SET_VEX_PP_66);
1583 } else {
1584 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1585 /*X=*/ false,
1586 src.NeedsRex(),
1587 SET_VEX_M_0F);
1588 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1589 SET_VEX_L_128,
1590 SET_VEX_PP_66);
1591 }
1592 EmitUint8(ByteZero);
1593 EmitUint8(ByteOne);
1594 if (!is_twobyte_form) {
1595 EmitUint8(ByteTwo);
1596 }
1597 // Instruction Opcode
1598 if (is_twobyte_form && !load) {
1599 EmitUint8(0x7F);
1600 } else {
1601 EmitUint8(0x6F);
1602 }
1603 // Instruction Operands
1604 if (is_twobyte_form && !load) {
1605 EmitXmmRegisterOperand(src.LowBits(), dst);
1606 } else {
1607 EmitXmmRegisterOperand(dst.LowBits(), src);
1608 }
1609 }
1610
movdqa(XmmRegister dst,const Address & src)1611 void X86_64Assembler::movdqa(XmmRegister dst, const Address& src) {
1612 if (CpuHasAVXorAVX2FeatureFlag()) {
1613 vmovdqa(dst, src);
1614 return;
1615 }
1616 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1617 EmitUint8(0x66);
1618 EmitOptionalRex32(dst, src);
1619 EmitUint8(0x0F);
1620 EmitUint8(0x6F);
1621 EmitOperand(dst.LowBits(), src);
1622 }
1623
1624 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, m128 */
vmovdqa(XmmRegister dst,const Address & src)1625 void X86_64Assembler::vmovdqa(XmmRegister dst, const Address& src) {
1626 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1627 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1628 uint8_t ByteZero, ByteOne, ByteTwo;
1629 bool is_twobyte_form = false;
1630
1631 // Instruction VEX Prefix
1632 uint8_t rex = src.rex();
1633 bool Rex_x = rex & GET_REX_X;
1634 bool Rex_b = rex & GET_REX_B;
1635 if (!Rex_x && !Rex_b) {
1636 is_twobyte_form = true;
1637 }
1638 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1639 if (is_twobyte_form) {
1640 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1641 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1642 vvvv_reg,
1643 SET_VEX_L_128,
1644 SET_VEX_PP_66);
1645 } else {
1646 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1647 Rex_x,
1648 Rex_b,
1649 SET_VEX_M_0F);
1650 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1651 SET_VEX_L_128,
1652 SET_VEX_PP_66);
1653 }
1654 EmitUint8(ByteZero);
1655 EmitUint8(ByteOne);
1656 if (!is_twobyte_form) {
1657 EmitUint8(ByteTwo);
1658 }
1659 // Instruction Opcode
1660 EmitUint8(0x6F);
1661 // Instruction Operands
1662 EmitOperand(dst.LowBits(), src);
1663 }
1664
movdqu(XmmRegister dst,const Address & src)1665 void X86_64Assembler::movdqu(XmmRegister dst, const Address& src) {
1666 if (CpuHasAVXorAVX2FeatureFlag()) {
1667 vmovdqu(dst, src);
1668 return;
1669 }
1670 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1671 EmitUint8(0xF3);
1672 EmitOptionalRex32(dst, src);
1673 EmitUint8(0x0F);
1674 EmitUint8(0x6F);
1675 EmitOperand(dst.LowBits(), src);
1676 }
1677
1678 /** VEX.128.F3.0F.WIG 6F /r VMOVDQU xmm1, m128
1679 Load Unaligned */
vmovdqu(XmmRegister dst,const Address & src)1680 void X86_64Assembler::vmovdqu(XmmRegister dst, const Address& src) {
1681 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1682 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1683 uint8_t ByteZero, ByteOne, ByteTwo;
1684 bool is_twobyte_form = false;
1685
1686 // Instruction VEX Prefix
1687 uint8_t rex = src.rex();
1688 bool Rex_x = rex & GET_REX_X;
1689 bool Rex_b = rex & GET_REX_B;
1690 if (!Rex_x && !Rex_b) {
1691 is_twobyte_form = true;
1692 }
1693 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1694 if (is_twobyte_form) {
1695 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1696 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1697 vvvv_reg,
1698 SET_VEX_L_128,
1699 SET_VEX_PP_F3);
1700 } else {
1701 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1702 Rex_x,
1703 Rex_b,
1704 SET_VEX_M_0F);
1705 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1706 SET_VEX_L_128,
1707 SET_VEX_PP_F3);
1708 }
1709 EmitUint8(ByteZero);
1710 EmitUint8(ByteOne);
1711 if (!is_twobyte_form) {
1712 EmitUint8(ByteTwo);
1713 }
1714 // Instruction Opcode
1715 EmitUint8(0x6F);
1716 // Instruction Operands
1717 EmitOperand(dst.LowBits(), src);
1718 }
1719
movdqa(const Address & dst,XmmRegister src)1720 void X86_64Assembler::movdqa(const Address& dst, XmmRegister src) {
1721 if (CpuHasAVXorAVX2FeatureFlag()) {
1722 vmovdqa(dst, src);
1723 return;
1724 }
1725 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1726 EmitUint8(0x66);
1727 EmitOptionalRex32(src, dst);
1728 EmitUint8(0x0F);
1729 EmitUint8(0x7F);
1730 EmitOperand(src.LowBits(), dst);
1731 }
1732
1733 /** VEX.128.66.0F.WIG 7F /r VMOVDQA m128, xmm1 */
vmovdqa(const Address & dst,XmmRegister src)1734 void X86_64Assembler::vmovdqa(const Address& dst, XmmRegister src) {
1735 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1736 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1737 bool is_twobyte_form = false;
1738 uint8_t ByteZero, ByteOne, ByteTwo;
1739 // Instruction VEX Prefix
1740 uint8_t rex = dst.rex();
1741 bool Rex_x = rex & GET_REX_X;
1742 bool Rex_b = rex & GET_REX_B;
1743 if (!Rex_x && !Rex_b) {
1744 is_twobyte_form = true;
1745 }
1746 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1747 if (is_twobyte_form) {
1748 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1749 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1750 vvvv_reg,
1751 SET_VEX_L_128,
1752 SET_VEX_PP_66);
1753 } else {
1754 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1755 Rex_x,
1756 Rex_b,
1757 SET_VEX_M_0F);
1758 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1759 SET_VEX_L_128,
1760 SET_VEX_PP_66);
1761 }
1762 EmitUint8(ByteZero);
1763 EmitUint8(ByteOne);
1764 if (!is_twobyte_form) {
1765 EmitUint8(ByteTwo);
1766 }
1767 // Instruction Opcode
1768 EmitUint8(0x7F);
1769 // Instruction Operands
1770 EmitOperand(src.LowBits(), dst);
1771 }
1772
movdqu(const Address & dst,XmmRegister src)1773 void X86_64Assembler::movdqu(const Address& dst, XmmRegister src) {
1774 if (CpuHasAVXorAVX2FeatureFlag()) {
1775 vmovdqu(dst, src);
1776 return;
1777 }
1778 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1779 EmitUint8(0xF3);
1780 EmitOptionalRex32(src, dst);
1781 EmitUint8(0x0F);
1782 EmitUint8(0x7F);
1783 EmitOperand(src.LowBits(), dst);
1784 }
1785
1786 /** VEX.128.F3.0F.WIG 7F /r VMOVDQU m128, xmm1 */
vmovdqu(const Address & dst,XmmRegister src)1787 void X86_64Assembler::vmovdqu(const Address& dst, XmmRegister src) {
1788 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1789 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1790 uint8_t ByteZero, ByteOne, ByteTwo;
1791 bool is_twobyte_form = false;
1792
1793 // Instruction VEX Prefix
1794 uint8_t rex = dst.rex();
1795 bool Rex_x = rex & GET_REX_X;
1796 bool Rex_b = rex & GET_REX_B;
1797 if (!Rex_b && !Rex_x) {
1798 is_twobyte_form = true;
1799 }
1800 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1801 if (is_twobyte_form) {
1802 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1803 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1804 vvvv_reg,
1805 SET_VEX_L_128,
1806 SET_VEX_PP_F3);
1807 } else {
1808 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1809 Rex_x,
1810 Rex_b,
1811 SET_VEX_M_0F);
1812 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1813 SET_VEX_L_128,
1814 SET_VEX_PP_F3);
1815 }
1816 EmitUint8(ByteZero);
1817 EmitUint8(ByteOne);
1818 if (!is_twobyte_form) {
1819 EmitUint8(ByteTwo);
1820 }
1821 // Instruction Opcode
1822 EmitUint8(0x7F);
1823 // Instruction Operands
1824 EmitOperand(src.LowBits(), dst);
1825 }
1826
paddb(XmmRegister dst,XmmRegister src)1827 void X86_64Assembler::paddb(XmmRegister dst, XmmRegister src) {
1828 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1829 EmitUint8(0x66);
1830 EmitOptionalRex32(dst, src);
1831 EmitUint8(0x0F);
1832 EmitUint8(0xFC);
1833 EmitXmmRegisterOperand(dst.LowBits(), src);
1834 }
1835
1836
vpaddb(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1837 void X86_64Assembler::vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1838 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1839 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1840 uint8_t ByteOne = 0x00, ByteZero = 0x00, ByteTwo = 0x00;
1841 bool is_twobyte_form = true;
1842 if (add_right.NeedsRex()) {
1843 is_twobyte_form = false;
1844 }
1845 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1846 X86_64ManagedRegister vvvv_reg =
1847 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1848 if (is_twobyte_form) {
1849 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1850 } else {
1851 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1852 /*X=*/ false,
1853 add_right.NeedsRex(),
1854 SET_VEX_M_0F);
1855 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1856 }
1857 EmitUint8(ByteZero);
1858 EmitUint8(ByteOne);
1859 if (!is_twobyte_form) {
1860 EmitUint8(ByteTwo);
1861 }
1862 EmitUint8(0xFC);
1863 EmitXmmRegisterOperand(dst.LowBits(), add_right);
1864 }
1865
1866
psubb(XmmRegister dst,XmmRegister src)1867 void X86_64Assembler::psubb(XmmRegister dst, XmmRegister src) {
1868 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1869 EmitUint8(0x66);
1870 EmitOptionalRex32(dst, src);
1871 EmitUint8(0x0F);
1872 EmitUint8(0xF8);
1873 EmitXmmRegisterOperand(dst.LowBits(), src);
1874 }
1875
1876
vpsubb(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1877 void X86_64Assembler::vpsubb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1878 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1879 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1880 bool is_twobyte_form = false;
1881 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1882 if (!add_right.NeedsRex()) {
1883 is_twobyte_form = true;
1884 }
1885 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1886 X86_64ManagedRegister vvvv_reg =
1887 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1888 if (is_twobyte_form) {
1889 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1890 } else {
1891 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1892 /*X=*/ false,
1893 add_right.NeedsRex(),
1894 SET_VEX_M_0F);
1895 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1896 }
1897 EmitUint8(ByteZero);
1898 EmitUint8(ByteOne);
1899 if (!is_twobyte_form) {
1900 EmitUint8(ByteTwo);
1901 }
1902 EmitUint8(0xF8);
1903 EmitXmmRegisterOperand(dst.LowBits(), add_right);
1904 }
1905
1906
paddw(XmmRegister dst,XmmRegister src)1907 void X86_64Assembler::paddw(XmmRegister dst, XmmRegister src) {
1908 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1909 EmitUint8(0x66);
1910 EmitOptionalRex32(dst, src);
1911 EmitUint8(0x0F);
1912 EmitUint8(0xFD);
1913 EmitXmmRegisterOperand(dst.LowBits(), src);
1914 }
1915
vpaddw(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1916 void X86_64Assembler::vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1917 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1918 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1919 bool is_twobyte_form = false;
1920 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1921 if (!add_right.NeedsRex()) {
1922 is_twobyte_form = true;
1923 }
1924 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1925 X86_64ManagedRegister vvvv_reg =
1926 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1927 if (is_twobyte_form) {
1928 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1929 } else {
1930 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1931 /*X=*/ false,
1932 add_right.NeedsRex(),
1933 SET_VEX_M_0F);
1934 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1935 }
1936 EmitUint8(ByteZero);
1937 EmitUint8(ByteOne);
1938 if (!is_twobyte_form) {
1939 EmitUint8(ByteTwo);
1940 }
1941 EmitUint8(0xFD);
1942 EmitXmmRegisterOperand(dst.LowBits(), add_right);
1943 }
1944
1945
psubw(XmmRegister dst,XmmRegister src)1946 void X86_64Assembler::psubw(XmmRegister dst, XmmRegister src) {
1947 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1948 EmitUint8(0x66);
1949 EmitOptionalRex32(dst, src);
1950 EmitUint8(0x0F);
1951 EmitUint8(0xF9);
1952 EmitXmmRegisterOperand(dst.LowBits(), src);
1953 }
1954
vpsubw(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1955 void X86_64Assembler::vpsubw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1956 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1957 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1958 bool is_twobyte_form = false;
1959 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1960 if (!add_right.NeedsRex()) {
1961 is_twobyte_form = true;
1962 }
1963 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1964 X86_64ManagedRegister vvvv_reg =
1965 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1966 if (is_twobyte_form) {
1967 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1968 } else {
1969 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1970 /*X=*/ false,
1971 add_right.NeedsRex(),
1972 SET_VEX_M_0F);
1973 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1974 }
1975 EmitUint8(ByteZero);
1976 EmitUint8(ByteOne);
1977 if (!is_twobyte_form) {
1978 EmitUint8(ByteTwo);
1979 }
1980 EmitUint8(0xF9);
1981 EmitXmmRegisterOperand(dst.LowBits(), add_right);
1982 }
1983
1984
pmullw(XmmRegister dst,XmmRegister src)1985 void X86_64Assembler::pmullw(XmmRegister dst, XmmRegister src) {
1986 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1987 EmitUint8(0x66);
1988 EmitOptionalRex32(dst, src);
1989 EmitUint8(0x0F);
1990 EmitUint8(0xD5);
1991 EmitXmmRegisterOperand(dst.LowBits(), src);
1992 }
1993
vpmullw(XmmRegister dst,XmmRegister src1,XmmRegister src2)1994 void X86_64Assembler::vpmullw(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1995 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1996 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1997 bool is_twobyte_form = false;
1998 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1999 if (!src2.NeedsRex()) {
2000 is_twobyte_form = true;
2001 }
2002 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2003 X86_64ManagedRegister vvvv_reg =
2004 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2005 if (is_twobyte_form) {
2006 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2007 } else {
2008 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2009 /*X=*/ false,
2010 src2.NeedsRex(),
2011 SET_VEX_M_0F);
2012 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2013 }
2014 EmitUint8(ByteZero);
2015 EmitUint8(ByteOne);
2016 if (!is_twobyte_form) {
2017 EmitUint8(ByteTwo);
2018 }
2019 EmitUint8(0xD5);
2020 EmitXmmRegisterOperand(dst.LowBits(), src2);
2021 }
2022
paddd(XmmRegister dst,XmmRegister src)2023 void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) {
2024 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2025 EmitUint8(0x66);
2026 EmitOptionalRex32(dst, src);
2027 EmitUint8(0x0F);
2028 EmitUint8(0xFE);
2029 EmitXmmRegisterOperand(dst.LowBits(), src);
2030 }
2031
vpaddd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2032 void X86_64Assembler::vpaddd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2033 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2034 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2035 bool is_twobyte_form = false;
2036 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2037 if (!add_right.NeedsRex()) {
2038 is_twobyte_form = true;
2039 }
2040 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2041 X86_64ManagedRegister vvvv_reg =
2042 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2043 if (is_twobyte_form) {
2044 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2045 } else {
2046 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2047 /*X=*/ false,
2048 add_right.NeedsRex(),
2049 SET_VEX_M_0F);
2050 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2051 }
2052 EmitUint8(ByteZero);
2053 EmitUint8(ByteOne);
2054 if (!is_twobyte_form) {
2055 EmitUint8(ByteTwo);
2056 }
2057 EmitUint8(0xFE);
2058 EmitXmmRegisterOperand(dst.LowBits(), add_right);
2059 }
2060
psubd(XmmRegister dst,XmmRegister src)2061 void X86_64Assembler::psubd(XmmRegister dst, XmmRegister src) {
2062 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2063 EmitUint8(0x66);
2064 EmitOptionalRex32(dst, src);
2065 EmitUint8(0x0F);
2066 EmitUint8(0xFA);
2067 EmitXmmRegisterOperand(dst.LowBits(), src);
2068 }
2069
2070
pmulld(XmmRegister dst,XmmRegister src)2071 void X86_64Assembler::pmulld(XmmRegister dst, XmmRegister src) {
2072 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2073 EmitUint8(0x66);
2074 EmitOptionalRex32(dst, src);
2075 EmitUint8(0x0F);
2076 EmitUint8(0x38);
2077 EmitUint8(0x40);
2078 EmitXmmRegisterOperand(dst.LowBits(), src);
2079 }
2080
vpmulld(XmmRegister dst,XmmRegister src1,XmmRegister src2)2081 void X86_64Assembler::vpmulld(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2082 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2083 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2084 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2085 ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form*/ false);
2086 X86_64ManagedRegister vvvv_reg =
2087 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2088 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2089 /*X=*/ false,
2090 src2.NeedsRex(),
2091 SET_VEX_M_0F_38);
2092 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2093 EmitUint8(ByteZero);
2094 EmitUint8(ByteOne);
2095 EmitUint8(ByteTwo);
2096 EmitUint8(0x40);
2097 EmitXmmRegisterOperand(dst.LowBits(), src2);
2098 }
2099
paddq(XmmRegister dst,XmmRegister src)2100 void X86_64Assembler::paddq(XmmRegister dst, XmmRegister src) {
2101 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2102 EmitUint8(0x66);
2103 EmitOptionalRex32(dst, src);
2104 EmitUint8(0x0F);
2105 EmitUint8(0xD4);
2106 EmitXmmRegisterOperand(dst.LowBits(), src);
2107 }
2108
2109
vpaddq(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2110 void X86_64Assembler::vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2111 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2112 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2113 bool is_twobyte_form = false;
2114 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2115 if (!add_right.NeedsRex()) {
2116 is_twobyte_form = true;
2117 }
2118 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2119 X86_64ManagedRegister vvvv_reg =
2120 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2121 if (is_twobyte_form) {
2122 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2123 } else {
2124 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2125 /*X=*/ false,
2126 add_right.NeedsRex(),
2127 SET_VEX_M_0F);
2128 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2129 }
2130 EmitUint8(ByteZero);
2131 EmitUint8(ByteOne);
2132 if (!is_twobyte_form) {
2133 EmitUint8(ByteTwo);
2134 }
2135 EmitUint8(0xD4);
2136 EmitXmmRegisterOperand(dst.LowBits(), add_right);
2137 }
2138
2139
psubq(XmmRegister dst,XmmRegister src)2140 void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) {
2141 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2142 EmitUint8(0x66);
2143 EmitOptionalRex32(dst, src);
2144 EmitUint8(0x0F);
2145 EmitUint8(0xFB);
2146 EmitXmmRegisterOperand(dst.LowBits(), src);
2147 }
2148
vpsubq(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2149 void X86_64Assembler::vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2150 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2151 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2152 bool is_twobyte_form = false;
2153 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2154 if (!add_right.NeedsRex()) {
2155 is_twobyte_form = true;
2156 }
2157 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2158 X86_64ManagedRegister vvvv_reg =
2159 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2160 if (is_twobyte_form) {
2161 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2162 } else {
2163 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2164 /*X=*/ false,
2165 add_right.NeedsRex(),
2166 SET_VEX_M_0F);
2167 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2168 }
2169 EmitUint8(ByteZero);
2170 EmitUint8(ByteOne);
2171 if (!is_twobyte_form) {
2172 EmitUint8(ByteTwo);
2173 }
2174 EmitUint8(0xFB);
2175 EmitXmmRegisterOperand(dst.LowBits(), add_right);
2176 }
2177
2178
paddusb(XmmRegister dst,XmmRegister src)2179 void X86_64Assembler::paddusb(XmmRegister dst, XmmRegister src) {
2180 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2181 EmitUint8(0x66);
2182 EmitOptionalRex32(dst, src);
2183 EmitUint8(0x0F);
2184 EmitUint8(0xDC);
2185 EmitXmmRegisterOperand(dst.LowBits(), src);
2186 }
2187
2188
paddsb(XmmRegister dst,XmmRegister src)2189 void X86_64Assembler::paddsb(XmmRegister dst, XmmRegister src) {
2190 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2191 EmitUint8(0x66);
2192 EmitOptionalRex32(dst, src);
2193 EmitUint8(0x0F);
2194 EmitUint8(0xEC);
2195 EmitXmmRegisterOperand(dst.LowBits(), src);
2196 }
2197
2198
paddusw(XmmRegister dst,XmmRegister src)2199 void X86_64Assembler::paddusw(XmmRegister dst, XmmRegister src) {
2200 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2201 EmitUint8(0x66);
2202 EmitOptionalRex32(dst, src);
2203 EmitUint8(0x0F);
2204 EmitUint8(0xDD);
2205 EmitXmmRegisterOperand(dst.LowBits(), src);
2206 }
2207
2208
paddsw(XmmRegister dst,XmmRegister src)2209 void X86_64Assembler::paddsw(XmmRegister dst, XmmRegister src) {
2210 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2211 EmitUint8(0x66);
2212 EmitOptionalRex32(dst, src);
2213 EmitUint8(0x0F);
2214 EmitUint8(0xED);
2215 EmitXmmRegisterOperand(dst.LowBits(), src);
2216 }
2217
2218
psubusb(XmmRegister dst,XmmRegister src)2219 void X86_64Assembler::psubusb(XmmRegister dst, XmmRegister src) {
2220 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2221 EmitUint8(0x66);
2222 EmitOptionalRex32(dst, src);
2223 EmitUint8(0x0F);
2224 EmitUint8(0xD8);
2225 EmitXmmRegisterOperand(dst.LowBits(), src);
2226 }
2227
2228
psubsb(XmmRegister dst,XmmRegister src)2229 void X86_64Assembler::psubsb(XmmRegister dst, XmmRegister src) {
2230 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2231 EmitUint8(0x66);
2232 EmitOptionalRex32(dst, src);
2233 EmitUint8(0x0F);
2234 EmitUint8(0xE8);
2235 EmitXmmRegisterOperand(dst.LowBits(), src);
2236 }
2237
2238
vpsubd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2239 void X86_64Assembler::vpsubd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2240 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2241 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2242 bool is_twobyte_form = false;
2243 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2244 if (!add_right.NeedsRex()) {
2245 is_twobyte_form = true;
2246 }
2247 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2248 X86_64ManagedRegister vvvv_reg =
2249 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2250 if (is_twobyte_form) {
2251 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2252 } else {
2253 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2254 /*X=*/ false,
2255 add_right.NeedsRex(),
2256 SET_VEX_M_0F);
2257 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2258 }
2259 EmitUint8(ByteZero);
2260 EmitUint8(ByteOne);
2261 if (!is_twobyte_form) {
2262 EmitUint8(ByteTwo);
2263 }
2264 EmitUint8(0xFA);
2265 EmitXmmRegisterOperand(dst.LowBits(), add_right);
2266 }
2267
2268
psubusw(XmmRegister dst,XmmRegister src)2269 void X86_64Assembler::psubusw(XmmRegister dst, XmmRegister src) {
2270 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2271 EmitUint8(0x66);
2272 EmitOptionalRex32(dst, src);
2273 EmitUint8(0x0F);
2274 EmitUint8(0xD9);
2275 EmitXmmRegisterOperand(dst.LowBits(), src);
2276 }
2277
2278
psubsw(XmmRegister dst,XmmRegister src)2279 void X86_64Assembler::psubsw(XmmRegister dst, XmmRegister src) {
2280 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2281 EmitUint8(0x66);
2282 EmitOptionalRex32(dst, src);
2283 EmitUint8(0x0F);
2284 EmitUint8(0xE9);
2285 EmitXmmRegisterOperand(dst.LowBits(), src);
2286 }
2287
2288
cvtsi2ss(XmmRegister dst,CpuRegister src)2289 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) {
2290 cvtsi2ss(dst, src, false);
2291 }
2292
2293
cvtsi2ss(XmmRegister dst,CpuRegister src,bool is64bit)2294 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit) {
2295 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2296 EmitUint8(0xF3);
2297 if (is64bit) {
2298 // Emit a REX.W prefix if the operand size is 64 bits.
2299 EmitRex64(dst, src);
2300 } else {
2301 EmitOptionalRex32(dst, src);
2302 }
2303 EmitUint8(0x0F);
2304 EmitUint8(0x2A);
2305 EmitOperand(dst.LowBits(), Operand(src));
2306 }
2307
2308
cvtsi2ss(XmmRegister dst,const Address & src,bool is64bit)2309 void X86_64Assembler::cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit) {
2310 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2311 EmitUint8(0xF3);
2312 if (is64bit) {
2313 // Emit a REX.W prefix if the operand size is 64 bits.
2314 EmitRex64(dst, src);
2315 } else {
2316 EmitOptionalRex32(dst, src);
2317 }
2318 EmitUint8(0x0F);
2319 EmitUint8(0x2A);
2320 EmitOperand(dst.LowBits(), src);
2321 }
2322
2323
cvtsi2sd(XmmRegister dst,CpuRegister src)2324 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src) {
2325 cvtsi2sd(dst, src, false);
2326 }
2327
2328
cvtsi2sd(XmmRegister dst,CpuRegister src,bool is64bit)2329 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit) {
2330 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2331 EmitUint8(0xF2);
2332 if (is64bit) {
2333 // Emit a REX.W prefix if the operand size is 64 bits.
2334 EmitRex64(dst, src);
2335 } else {
2336 EmitOptionalRex32(dst, src);
2337 }
2338 EmitUint8(0x0F);
2339 EmitUint8(0x2A);
2340 EmitOperand(dst.LowBits(), Operand(src));
2341 }
2342
2343
cvtsi2sd(XmmRegister dst,const Address & src,bool is64bit)2344 void X86_64Assembler::cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit) {
2345 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2346 EmitUint8(0xF2);
2347 if (is64bit) {
2348 // Emit a REX.W prefix if the operand size is 64 bits.
2349 EmitRex64(dst, src);
2350 } else {
2351 EmitOptionalRex32(dst, src);
2352 }
2353 EmitUint8(0x0F);
2354 EmitUint8(0x2A);
2355 EmitOperand(dst.LowBits(), src);
2356 }
2357
2358
cvtss2si(CpuRegister dst,XmmRegister src)2359 void X86_64Assembler::cvtss2si(CpuRegister dst, XmmRegister src) {
2360 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2361 EmitUint8(0xF3);
2362 EmitOptionalRex32(dst, src);
2363 EmitUint8(0x0F);
2364 EmitUint8(0x2D);
2365 EmitXmmRegisterOperand(dst.LowBits(), src);
2366 }
2367
2368
cvtss2sd(XmmRegister dst,XmmRegister src)2369 void X86_64Assembler::cvtss2sd(XmmRegister dst, XmmRegister src) {
2370 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2371 EmitUint8(0xF3);
2372 EmitOptionalRex32(dst, src);
2373 EmitUint8(0x0F);
2374 EmitUint8(0x5A);
2375 EmitXmmRegisterOperand(dst.LowBits(), src);
2376 }
2377
2378
cvtss2sd(XmmRegister dst,const Address & src)2379 void X86_64Assembler::cvtss2sd(XmmRegister dst, const Address& src) {
2380 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2381 EmitUint8(0xF3);
2382 EmitOptionalRex32(dst, src);
2383 EmitUint8(0x0F);
2384 EmitUint8(0x5A);
2385 EmitOperand(dst.LowBits(), src);
2386 }
2387
2388
cvtsd2si(CpuRegister dst,XmmRegister src)2389 void X86_64Assembler::cvtsd2si(CpuRegister dst, XmmRegister src) {
2390 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2391 EmitUint8(0xF2);
2392 EmitOptionalRex32(dst, src);
2393 EmitUint8(0x0F);
2394 EmitUint8(0x2D);
2395 EmitXmmRegisterOperand(dst.LowBits(), src);
2396 }
2397
2398
cvttss2si(CpuRegister dst,XmmRegister src)2399 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src) {
2400 cvttss2si(dst, src, false);
2401 }
2402
2403
cvttss2si(CpuRegister dst,XmmRegister src,bool is64bit)2404 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit) {
2405 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2406 EmitUint8(0xF3);
2407 if (is64bit) {
2408 // Emit a REX.W prefix if the operand size is 64 bits.
2409 EmitRex64(dst, src);
2410 } else {
2411 EmitOptionalRex32(dst, src);
2412 }
2413 EmitUint8(0x0F);
2414 EmitUint8(0x2C);
2415 EmitXmmRegisterOperand(dst.LowBits(), src);
2416 }
2417
2418
cvttsd2si(CpuRegister dst,XmmRegister src)2419 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src) {
2420 cvttsd2si(dst, src, false);
2421 }
2422
2423
cvttsd2si(CpuRegister dst,XmmRegister src,bool is64bit)2424 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit) {
2425 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2426 EmitUint8(0xF2);
2427 if (is64bit) {
2428 // Emit a REX.W prefix if the operand size is 64 bits.
2429 EmitRex64(dst, src);
2430 } else {
2431 EmitOptionalRex32(dst, src);
2432 }
2433 EmitUint8(0x0F);
2434 EmitUint8(0x2C);
2435 EmitXmmRegisterOperand(dst.LowBits(), src);
2436 }
2437
2438
cvtsd2ss(XmmRegister dst,XmmRegister src)2439 void X86_64Assembler::cvtsd2ss(XmmRegister dst, XmmRegister src) {
2440 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2441 EmitUint8(0xF2);
2442 EmitOptionalRex32(dst, src);
2443 EmitUint8(0x0F);
2444 EmitUint8(0x5A);
2445 EmitXmmRegisterOperand(dst.LowBits(), src);
2446 }
2447
2448
cvtsd2ss(XmmRegister dst,const Address & src)2449 void X86_64Assembler::cvtsd2ss(XmmRegister dst, const Address& src) {
2450 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2451 EmitUint8(0xF2);
2452 EmitOptionalRex32(dst, src);
2453 EmitUint8(0x0F);
2454 EmitUint8(0x5A);
2455 EmitOperand(dst.LowBits(), src);
2456 }
2457
2458
cvtdq2ps(XmmRegister dst,XmmRegister src)2459 void X86_64Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) {
2460 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2461 EmitOptionalRex32(dst, src);
2462 EmitUint8(0x0F);
2463 EmitUint8(0x5B);
2464 EmitXmmRegisterOperand(dst.LowBits(), src);
2465 }
2466
2467
cvtdq2pd(XmmRegister dst,XmmRegister src)2468 void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
2469 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2470 EmitUint8(0xF3);
2471 EmitOptionalRex32(dst, src);
2472 EmitUint8(0x0F);
2473 EmitUint8(0xE6);
2474 EmitXmmRegisterOperand(dst.LowBits(), src);
2475 }
2476
2477
comiss(XmmRegister a,XmmRegister b)2478 void X86_64Assembler::comiss(XmmRegister a, XmmRegister b) {
2479 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2480 EmitOptionalRex32(a, b);
2481 EmitUint8(0x0F);
2482 EmitUint8(0x2F);
2483 EmitXmmRegisterOperand(a.LowBits(), b);
2484 }
2485
2486
comiss(XmmRegister a,const Address & b)2487 void X86_64Assembler::comiss(XmmRegister a, const Address& b) {
2488 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2489 EmitOptionalRex32(a, b);
2490 EmitUint8(0x0F);
2491 EmitUint8(0x2F);
2492 EmitOperand(a.LowBits(), b);
2493 }
2494
2495
comisd(XmmRegister a,XmmRegister b)2496 void X86_64Assembler::comisd(XmmRegister a, XmmRegister b) {
2497 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2498 EmitUint8(0x66);
2499 EmitOptionalRex32(a, b);
2500 EmitUint8(0x0F);
2501 EmitUint8(0x2F);
2502 EmitXmmRegisterOperand(a.LowBits(), b);
2503 }
2504
2505
comisd(XmmRegister a,const Address & b)2506 void X86_64Assembler::comisd(XmmRegister a, const Address& b) {
2507 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2508 EmitUint8(0x66);
2509 EmitOptionalRex32(a, b);
2510 EmitUint8(0x0F);
2511 EmitUint8(0x2F);
2512 EmitOperand(a.LowBits(), b);
2513 }
2514
2515
ucomiss(XmmRegister a,XmmRegister b)2516 void X86_64Assembler::ucomiss(XmmRegister a, XmmRegister b) {
2517 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2518 EmitOptionalRex32(a, b);
2519 EmitUint8(0x0F);
2520 EmitUint8(0x2E);
2521 EmitXmmRegisterOperand(a.LowBits(), b);
2522 }
2523
2524
ucomiss(XmmRegister a,const Address & b)2525 void X86_64Assembler::ucomiss(XmmRegister a, const Address& b) {
2526 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2527 EmitOptionalRex32(a, b);
2528 EmitUint8(0x0F);
2529 EmitUint8(0x2E);
2530 EmitOperand(a.LowBits(), b);
2531 }
2532
2533
ucomisd(XmmRegister a,XmmRegister b)2534 void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) {
2535 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2536 EmitUint8(0x66);
2537 EmitOptionalRex32(a, b);
2538 EmitUint8(0x0F);
2539 EmitUint8(0x2E);
2540 EmitXmmRegisterOperand(a.LowBits(), b);
2541 }
2542
2543
ucomisd(XmmRegister a,const Address & b)2544 void X86_64Assembler::ucomisd(XmmRegister a, const Address& b) {
2545 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2546 EmitUint8(0x66);
2547 EmitOptionalRex32(a, b);
2548 EmitUint8(0x0F);
2549 EmitUint8(0x2E);
2550 EmitOperand(a.LowBits(), b);
2551 }
2552
2553
roundsd(XmmRegister dst,XmmRegister src,const Immediate & imm)2554 void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2555 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2556 EmitUint8(0x66);
2557 EmitOptionalRex32(dst, src);
2558 EmitUint8(0x0F);
2559 EmitUint8(0x3A);
2560 EmitUint8(0x0B);
2561 EmitXmmRegisterOperand(dst.LowBits(), src);
2562 EmitUint8(imm.value());
2563 }
2564
2565
roundss(XmmRegister dst,XmmRegister src,const Immediate & imm)2566 void X86_64Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2567 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2568 EmitUint8(0x66);
2569 EmitOptionalRex32(dst, src);
2570 EmitUint8(0x0F);
2571 EmitUint8(0x3A);
2572 EmitUint8(0x0A);
2573 EmitXmmRegisterOperand(dst.LowBits(), src);
2574 EmitUint8(imm.value());
2575 }
2576
2577
sqrtsd(XmmRegister dst,XmmRegister src)2578 void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
2579 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2580 EmitUint8(0xF2);
2581 EmitOptionalRex32(dst, src);
2582 EmitUint8(0x0F);
2583 EmitUint8(0x51);
2584 EmitXmmRegisterOperand(dst.LowBits(), src);
2585 }
2586
2587
sqrtss(XmmRegister dst,XmmRegister src)2588 void X86_64Assembler::sqrtss(XmmRegister dst, XmmRegister src) {
2589 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2590 EmitUint8(0xF3);
2591 EmitOptionalRex32(dst, src);
2592 EmitUint8(0x0F);
2593 EmitUint8(0x51);
2594 EmitXmmRegisterOperand(dst.LowBits(), src);
2595 }
2596
2597
xorpd(XmmRegister dst,const Address & src)2598 void X86_64Assembler::xorpd(XmmRegister dst, const Address& src) {
2599 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2600 EmitUint8(0x66);
2601 EmitOptionalRex32(dst, src);
2602 EmitUint8(0x0F);
2603 EmitUint8(0x57);
2604 EmitOperand(dst.LowBits(), src);
2605 }
2606
2607
xorpd(XmmRegister dst,XmmRegister src)2608 void X86_64Assembler::xorpd(XmmRegister dst, XmmRegister src) {
2609 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2610 EmitUint8(0x66);
2611 EmitOptionalRex32(dst, src);
2612 EmitUint8(0x0F);
2613 EmitUint8(0x57);
2614 EmitXmmRegisterOperand(dst.LowBits(), src);
2615 }
2616
2617
xorps(XmmRegister dst,const Address & src)2618 void X86_64Assembler::xorps(XmmRegister dst, const Address& src) {
2619 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2620 EmitOptionalRex32(dst, src);
2621 EmitUint8(0x0F);
2622 EmitUint8(0x57);
2623 EmitOperand(dst.LowBits(), src);
2624 }
2625
2626
xorps(XmmRegister dst,XmmRegister src)2627 void X86_64Assembler::xorps(XmmRegister dst, XmmRegister src) {
2628 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2629 EmitOptionalRex32(dst, src);
2630 EmitUint8(0x0F);
2631 EmitUint8(0x57);
2632 EmitXmmRegisterOperand(dst.LowBits(), src);
2633 }
2634
pxor(XmmRegister dst,XmmRegister src)2635 void X86_64Assembler::pxor(XmmRegister dst, XmmRegister src) {
2636 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2637 EmitUint8(0x66);
2638 EmitOptionalRex32(dst, src);
2639 EmitUint8(0x0F);
2640 EmitUint8(0xEF);
2641 EmitXmmRegisterOperand(dst.LowBits(), src);
2642 }
2643
2644 /* VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 */
vpxor(XmmRegister dst,XmmRegister src1,XmmRegister src2)2645 void X86_64Assembler::vpxor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2646 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2647 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2648 bool is_twobyte_form = false;
2649 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2650 if (!src2.NeedsRex()) {
2651 is_twobyte_form = true;
2652 }
2653 X86_64ManagedRegister vvvv_reg =
2654 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2655 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2656 if (is_twobyte_form) {
2657 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2658 } else {
2659 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2660 /*X=*/ false,
2661 src2.NeedsRex(),
2662 SET_VEX_M_0F);
2663 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2664 }
2665 EmitUint8(ByteZero);
2666 EmitUint8(ByteOne);
2667 if (!is_twobyte_form) {
2668 EmitUint8(ByteTwo);
2669 }
2670 EmitUint8(0xEF);
2671 EmitXmmRegisterOperand(dst.LowBits(), src2);
2672 }
2673
2674 /* VEX.128.0F.WIG 57 /r VXORPS xmm1,xmm2, xmm3/m128 */
vxorps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2675 void X86_64Assembler::vxorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2676 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2677 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2678 bool is_twobyte_form = false;
2679 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2680 if (!src2.NeedsRex()) {
2681 is_twobyte_form = true;
2682 }
2683 X86_64ManagedRegister vvvv_reg =
2684 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2685 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2686 if (is_twobyte_form) {
2687 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2688 } else {
2689 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2690 /*X=*/ false,
2691 src2.NeedsRex(),
2692 SET_VEX_M_0F);
2693 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2694 }
2695 EmitUint8(ByteZero);
2696 EmitUint8(ByteOne);
2697 if (!is_twobyte_form) {
2698 EmitUint8(ByteTwo);
2699 }
2700 EmitUint8(0x57);
2701 EmitXmmRegisterOperand(dst.LowBits(), src2);
2702 }
2703
2704 /* VEX.128.66.0F.WIG 57 /r VXORPD xmm1,xmm2, xmm3/m128 */
vxorpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2705 void X86_64Assembler::vxorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2706 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2707 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2708 bool is_twobyte_form = false;
2709 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2710 if (!src2.NeedsRex()) {
2711 is_twobyte_form = true;
2712 }
2713 X86_64ManagedRegister vvvv_reg =
2714 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2715 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2716 if (is_twobyte_form) {
2717 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2718 } else {
2719 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2720 /*X=*/ false,
2721 src2.NeedsRex(),
2722 SET_VEX_M_0F);
2723 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2724 }
2725 EmitUint8(ByteZero);
2726 EmitUint8(ByteOne);
2727 if (!is_twobyte_form) {
2728 EmitUint8(ByteTwo);
2729 }
2730 EmitUint8(0x57);
2731 EmitXmmRegisterOperand(dst.LowBits(), src2);
2732 }
2733
andpd(XmmRegister dst,const Address & src)2734 void X86_64Assembler::andpd(XmmRegister dst, const Address& src) {
2735 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2736 EmitUint8(0x66);
2737 EmitOptionalRex32(dst, src);
2738 EmitUint8(0x0F);
2739 EmitUint8(0x54);
2740 EmitOperand(dst.LowBits(), src);
2741 }
2742
andpd(XmmRegister dst,XmmRegister src)2743 void X86_64Assembler::andpd(XmmRegister dst, XmmRegister src) {
2744 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2745 EmitUint8(0x66);
2746 EmitOptionalRex32(dst, src);
2747 EmitUint8(0x0F);
2748 EmitUint8(0x54);
2749 EmitXmmRegisterOperand(dst.LowBits(), src);
2750 }
2751
andps(XmmRegister dst,XmmRegister src)2752 void X86_64Assembler::andps(XmmRegister dst, XmmRegister src) {
2753 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2754 EmitOptionalRex32(dst, src);
2755 EmitUint8(0x0F);
2756 EmitUint8(0x54);
2757 EmitXmmRegisterOperand(dst.LowBits(), src);
2758 }
2759
pand(XmmRegister dst,XmmRegister src)2760 void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) {
2761 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2762 EmitUint8(0x66);
2763 EmitOptionalRex32(dst, src);
2764 EmitUint8(0x0F);
2765 EmitUint8(0xDB);
2766 EmitXmmRegisterOperand(dst.LowBits(), src);
2767 }
2768
2769 /* VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 */
vpand(XmmRegister dst,XmmRegister src1,XmmRegister src2)2770 void X86_64Assembler::vpand(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2771 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2772 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2773 bool is_twobyte_form = false;
2774 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2775 if (!src2.NeedsRex()) {
2776 is_twobyte_form = true;
2777 }
2778 X86_64ManagedRegister vvvv_reg =
2779 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2780 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2781 if (is_twobyte_form) {
2782 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2783 } else {
2784 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2785 /*X=*/ false,
2786 src2.NeedsRex(),
2787 SET_VEX_M_0F);
2788 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2789 }
2790 EmitUint8(ByteZero);
2791 EmitUint8(ByteOne);
2792 if (!is_twobyte_form) {
2793 EmitUint8(ByteTwo);
2794 }
2795 EmitUint8(0xDB);
2796 EmitXmmRegisterOperand(dst.LowBits(), src2);
2797 }
2798
2799 /* VEX.128.0F 54 /r VANDPS xmm1,xmm2, xmm3/m128 */
vandps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2800 void X86_64Assembler::vandps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2801 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2802 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2803 bool is_twobyte_form = false;
2804 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2805 if (!src2.NeedsRex()) {
2806 is_twobyte_form = true;
2807 }
2808 X86_64ManagedRegister vvvv_reg =
2809 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2810 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2811 if (is_twobyte_form) {
2812 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2813 } else {
2814 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2815 /*X=*/ false,
2816 src2.NeedsRex(),
2817 SET_VEX_M_0F);
2818 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2819 }
2820 EmitUint8(ByteZero);
2821 EmitUint8(ByteOne);
2822 if (!is_twobyte_form) {
2823 EmitUint8(ByteTwo);
2824 }
2825 EmitUint8(0x54);
2826 EmitXmmRegisterOperand(dst.LowBits(), src2);
2827 }
2828
2829 /* VEX.128.66.0F 54 /r VANDPD xmm1, xmm2, xmm3/m128 */
vandpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2830 void X86_64Assembler::vandpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2831 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2832 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2833 bool is_twobyte_form = false;
2834 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2835 if (!src2.NeedsRex()) {
2836 is_twobyte_form = true;
2837 }
2838 X86_64ManagedRegister vvvv_reg =
2839 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2840 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2841 if (is_twobyte_form) {
2842 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2843 } else {
2844 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2845 /*X=*/ false,
2846 src2.NeedsRex(),
2847 SET_VEX_M_0F);
2848 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2849 }
2850 EmitUint8(ByteZero);
2851 EmitUint8(ByteOne);
2852 if (!is_twobyte_form) {
2853 EmitUint8(ByteTwo);
2854 }
2855 EmitUint8(0x54);
2856 EmitXmmRegisterOperand(dst.LowBits(), src2);
2857 }
2858
andn(CpuRegister dst,CpuRegister src1,CpuRegister src2)2859 void X86_64Assembler::andn(CpuRegister dst, CpuRegister src1, CpuRegister src2) {
2860 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2861 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
2862 uint8_t byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
2863 /*X=*/ false,
2864 src2.NeedsRex(),
2865 SET_VEX_M_0F_38);
2866 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
2867 X86_64ManagedRegister::FromCpuRegister(src1.AsRegister()),
2868 SET_VEX_L_128,
2869 SET_VEX_PP_NONE);
2870 EmitUint8(byte_zero);
2871 EmitUint8(byte_one);
2872 EmitUint8(byte_two);
2873 // Opcode field
2874 EmitUint8(0xF2);
2875 EmitRegisterOperand(dst.LowBits(), src2.LowBits());
2876 }
2877
andnpd(XmmRegister dst,XmmRegister src)2878 void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) {
2879 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2880 EmitUint8(0x66);
2881 EmitOptionalRex32(dst, src);
2882 EmitUint8(0x0F);
2883 EmitUint8(0x55);
2884 EmitXmmRegisterOperand(dst.LowBits(), src);
2885 }
2886
andnps(XmmRegister dst,XmmRegister src)2887 void X86_64Assembler::andnps(XmmRegister dst, XmmRegister src) {
2888 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2889 EmitOptionalRex32(dst, src);
2890 EmitUint8(0x0F);
2891 EmitUint8(0x55);
2892 EmitXmmRegisterOperand(dst.LowBits(), src);
2893 }
2894
pandn(XmmRegister dst,XmmRegister src)2895 void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) {
2896 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2897 EmitUint8(0x66);
2898 EmitOptionalRex32(dst, src);
2899 EmitUint8(0x0F);
2900 EmitUint8(0xDF);
2901 EmitXmmRegisterOperand(dst.LowBits(), src);
2902 }
2903
2904 /* VEX.128.66.0F.WIG DF /r VPANDN xmm1, xmm2, xmm3/m128 */
vpandn(XmmRegister dst,XmmRegister src1,XmmRegister src2)2905 void X86_64Assembler::vpandn(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2906 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2907 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2908 bool is_twobyte_form = false;
2909 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2910 if (!src2.NeedsRex()) {
2911 is_twobyte_form = true;
2912 }
2913 X86_64ManagedRegister vvvv_reg =
2914 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2915 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2916 if (is_twobyte_form) {
2917 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2918 } else {
2919 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2920 /*X=*/ false,
2921 src2.NeedsRex(),
2922 SET_VEX_M_0F);
2923 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2924 }
2925 EmitUint8(ByteZero);
2926 EmitUint8(ByteOne);
2927 if (!is_twobyte_form) {
2928 EmitUint8(ByteTwo);
2929 }
2930 EmitUint8(0xDF);
2931 EmitXmmRegisterOperand(dst.LowBits(), src2);
2932 }
2933
2934 /* VEX.128.0F 55 /r VANDNPS xmm1, xmm2, xmm3/m128 */
vandnps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2935 void X86_64Assembler::vandnps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2936 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2937 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2938 bool is_twobyte_form = false;
2939 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2940 if (!src2.NeedsRex()) {
2941 is_twobyte_form = true;
2942 }
2943 X86_64ManagedRegister vvvv_reg =
2944 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2945 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2946 if (is_twobyte_form) {
2947 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2948 } else {
2949 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2950 /*X=*/ false,
2951 src2.NeedsRex(),
2952 SET_VEX_M_0F);
2953 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2954 }
2955 EmitUint8(ByteZero);
2956 EmitUint8(ByteOne);
2957 if (!is_twobyte_form) {
2958 EmitUint8(ByteTwo);
2959 }
2960 EmitUint8(0x55);
2961 EmitXmmRegisterOperand(dst.LowBits(), src2);
2962 }
2963
2964 /* VEX.128.66.0F 55 /r VANDNPD xmm1, xmm2, xmm3/m128 */
vandnpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2965 void X86_64Assembler::vandnpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2966 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2967 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2968 bool is_twobyte_form = false;
2969 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2970 if (!src2.NeedsRex()) {
2971 is_twobyte_form = true;
2972 }
2973 X86_64ManagedRegister vvvv_reg =
2974 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2975 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2976 if (is_twobyte_form) {
2977 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2978 } else {
2979 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2980 /*X=*/ false,
2981 src2.NeedsRex(),
2982 SET_VEX_M_0F);
2983 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2984 }
2985 EmitUint8(ByteZero);
2986 EmitUint8(ByteOne);
2987 if (!is_twobyte_form) {
2988 EmitUint8(ByteTwo);
2989 }
2990 EmitUint8(0x55);
2991 EmitXmmRegisterOperand(dst.LowBits(), src2);
2992 }
2993
orpd(XmmRegister dst,XmmRegister src)2994 void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) {
2995 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2996 EmitUint8(0x66);
2997 EmitOptionalRex32(dst, src);
2998 EmitUint8(0x0F);
2999 EmitUint8(0x56);
3000 EmitXmmRegisterOperand(dst.LowBits(), src);
3001 }
3002
orps(XmmRegister dst,XmmRegister src)3003 void X86_64Assembler::orps(XmmRegister dst, XmmRegister src) {
3004 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3005 EmitOptionalRex32(dst, src);
3006 EmitUint8(0x0F);
3007 EmitUint8(0x56);
3008 EmitXmmRegisterOperand(dst.LowBits(), src);
3009 }
3010
por(XmmRegister dst,XmmRegister src)3011 void X86_64Assembler::por(XmmRegister dst, XmmRegister src) {
3012 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3013 EmitUint8(0x66);
3014 EmitOptionalRex32(dst, src);
3015 EmitUint8(0x0F);
3016 EmitUint8(0xEB);
3017 EmitXmmRegisterOperand(dst.LowBits(), src);
3018 }
3019
3020 /* VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 */
vpor(XmmRegister dst,XmmRegister src1,XmmRegister src2)3021 void X86_64Assembler::vpor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3022 DCHECK(CpuHasAVXorAVX2FeatureFlag());
3023 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3024 bool is_twobyte_form = false;
3025 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3026 if (!src2.NeedsRex()) {
3027 is_twobyte_form = true;
3028 }
3029 X86_64ManagedRegister vvvv_reg =
3030 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3031 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3032 if (is_twobyte_form) {
3033 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3034 } else {
3035 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3036 /*X=*/ false,
3037 src2.NeedsRex(),
3038 SET_VEX_M_0F);
3039 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3040 }
3041 EmitUint8(ByteZero);
3042 EmitUint8(ByteOne);
3043 if (!is_twobyte_form) {
3044 EmitUint8(ByteTwo);
3045 }
3046 EmitUint8(0xEB);
3047 EmitXmmRegisterOperand(dst.LowBits(), src2);
3048 }
3049
3050 /* VEX.128.0F 56 /r VORPS xmm1,xmm2, xmm3/m128 */
vorps(XmmRegister dst,XmmRegister src1,XmmRegister src2)3051 void X86_64Assembler::vorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3052 DCHECK(CpuHasAVXorAVX2FeatureFlag());
3053 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3054 bool is_twobyte_form = false;
3055 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3056 if (!src2.NeedsRex()) {
3057 is_twobyte_form = true;
3058 }
3059 X86_64ManagedRegister vvvv_reg =
3060 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3061 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3062 if (is_twobyte_form) {
3063 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3064 } else {
3065 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3066 /*X=*/ false,
3067 src2.NeedsRex(),
3068 SET_VEX_M_0F);
3069 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3070 }
3071 EmitUint8(ByteZero);
3072 EmitUint8(ByteOne);
3073 if (!is_twobyte_form) {
3074 EmitUint8(ByteTwo);
3075 }
3076 EmitUint8(0x56);
3077 EmitXmmRegisterOperand(dst.LowBits(), src2);
3078 }
3079
3080 /* VEX.128.66.0F 56 /r VORPD xmm1,xmm2, xmm3/m128 */
vorpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)3081 void X86_64Assembler::vorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3082 DCHECK(CpuHasAVXorAVX2FeatureFlag());
3083 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3084 bool is_twobyte_form = false;
3085 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3086 if (!src2.NeedsRex()) {
3087 is_twobyte_form = true;
3088 }
3089 X86_64ManagedRegister vvvv_reg =
3090 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3091 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3092 if (is_twobyte_form) {
3093 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3094 } else {
3095 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3096 /*X=*/ false,
3097 src2.NeedsRex(),
3098 SET_VEX_M_0F);
3099 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3100 }
3101 EmitUint8(ByteZero);
3102 EmitUint8(ByteOne);
3103 if (!is_twobyte_form) {
3104 EmitUint8(ByteTwo);
3105 }
3106 EmitUint8(0x56);
3107 EmitXmmRegisterOperand(dst.LowBits(), src2);
3108 }
3109
pavgb(XmmRegister dst,XmmRegister src)3110 void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) {
3111 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3112 EmitUint8(0x66);
3113 EmitOptionalRex32(dst, src);
3114 EmitUint8(0x0F);
3115 EmitUint8(0xE0);
3116 EmitXmmRegisterOperand(dst.LowBits(), src);
3117 }
3118
pavgw(XmmRegister dst,XmmRegister src)3119 void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) {
3120 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3121 EmitUint8(0x66);
3122 EmitOptionalRex32(dst, src);
3123 EmitUint8(0x0F);
3124 EmitUint8(0xE3);
3125 EmitXmmRegisterOperand(dst.LowBits(), src);
3126 }
3127
psadbw(XmmRegister dst,XmmRegister src)3128 void X86_64Assembler::psadbw(XmmRegister dst, XmmRegister src) {
3129 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3130 EmitUint8(0x66);
3131 EmitOptionalRex32(dst, src);
3132 EmitUint8(0x0F);
3133 EmitUint8(0xF6);
3134 EmitXmmRegisterOperand(dst.LowBits(), src);
3135 }
3136
pmaddwd(XmmRegister dst,XmmRegister src)3137 void X86_64Assembler::pmaddwd(XmmRegister dst, XmmRegister src) {
3138 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3139 EmitUint8(0x66);
3140 EmitOptionalRex32(dst, src);
3141 EmitUint8(0x0F);
3142 EmitUint8(0xF5);
3143 EmitXmmRegisterOperand(dst.LowBits(), src);
3144 }
3145
vpmaddwd(XmmRegister dst,XmmRegister src1,XmmRegister src2)3146 void X86_64Assembler::vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3147 DCHECK(CpuHasAVXorAVX2FeatureFlag());
3148 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3149 bool is_twobyte_form = false;
3150 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3151 if (!src2.NeedsRex()) {
3152 is_twobyte_form = true;
3153 }
3154 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3155 X86_64ManagedRegister vvvv_reg =
3156 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3157 if (is_twobyte_form) {
3158 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3159 } else {
3160 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3161 /*X=*/ false,
3162 src2.NeedsRex(),
3163 SET_VEX_M_0F);
3164 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3165 }
3166 EmitUint8(ByteZero);
3167 EmitUint8(ByteOne);
3168 if (!is_twobyte_form) {
3169 EmitUint8(ByteTwo);
3170 }
3171 EmitUint8(0xF5);
3172 EmitXmmRegisterOperand(dst.LowBits(), src2);
3173 }
3174
phaddw(XmmRegister dst,XmmRegister src)3175 void X86_64Assembler::phaddw(XmmRegister dst, XmmRegister src) {
3176 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3177 EmitUint8(0x66);
3178 EmitOptionalRex32(dst, src);
3179 EmitUint8(0x0F);
3180 EmitUint8(0x38);
3181 EmitUint8(0x01);
3182 EmitXmmRegisterOperand(dst.LowBits(), src);
3183 }
3184
phaddd(XmmRegister dst,XmmRegister src)3185 void X86_64Assembler::phaddd(XmmRegister dst, XmmRegister src) {
3186 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3187 EmitUint8(0x66);
3188 EmitOptionalRex32(dst, src);
3189 EmitUint8(0x0F);
3190 EmitUint8(0x38);
3191 EmitUint8(0x02);
3192 EmitXmmRegisterOperand(dst.LowBits(), src);
3193 }
3194
haddps(XmmRegister dst,XmmRegister src)3195 void X86_64Assembler::haddps(XmmRegister dst, XmmRegister src) {
3196 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3197 EmitUint8(0xF2);
3198 EmitOptionalRex32(dst, src);
3199 EmitUint8(0x0F);
3200 EmitUint8(0x7C);
3201 EmitXmmRegisterOperand(dst.LowBits(), src);
3202 }
3203
haddpd(XmmRegister dst,XmmRegister src)3204 void X86_64Assembler::haddpd(XmmRegister dst, XmmRegister src) {
3205 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3206 EmitUint8(0x66);
3207 EmitOptionalRex32(dst, src);
3208 EmitUint8(0x0F);
3209 EmitUint8(0x7C);
3210 EmitXmmRegisterOperand(dst.LowBits(), src);
3211 }
3212
phsubw(XmmRegister dst,XmmRegister src)3213 void X86_64Assembler::phsubw(XmmRegister dst, XmmRegister src) {
3214 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3215 EmitUint8(0x66);
3216 EmitOptionalRex32(dst, src);
3217 EmitUint8(0x0F);
3218 EmitUint8(0x38);
3219 EmitUint8(0x05);
3220 EmitXmmRegisterOperand(dst.LowBits(), src);
3221 }
3222
phsubd(XmmRegister dst,XmmRegister src)3223 void X86_64Assembler::phsubd(XmmRegister dst, XmmRegister src) {
3224 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3225 EmitUint8(0x66);
3226 EmitOptionalRex32(dst, src);
3227 EmitUint8(0x0F);
3228 EmitUint8(0x38);
3229 EmitUint8(0x06);
3230 EmitXmmRegisterOperand(dst.LowBits(), src);
3231 }
3232
hsubps(XmmRegister dst,XmmRegister src)3233 void X86_64Assembler::hsubps(XmmRegister dst, XmmRegister src) {
3234 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3235 EmitUint8(0xF2);
3236 EmitOptionalRex32(dst, src);
3237 EmitUint8(0x0F);
3238 EmitUint8(0x7D);
3239 EmitXmmRegisterOperand(dst.LowBits(), src);
3240 }
3241
hsubpd(XmmRegister dst,XmmRegister src)3242 void X86_64Assembler::hsubpd(XmmRegister dst, XmmRegister src) {
3243 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3244 EmitUint8(0x66);
3245 EmitOptionalRex32(dst, src);
3246 EmitUint8(0x0F);
3247 EmitUint8(0x7D);
3248 EmitXmmRegisterOperand(dst.LowBits(), src);
3249 }
3250
pminsb(XmmRegister dst,XmmRegister src)3251 void X86_64Assembler::pminsb(XmmRegister dst, XmmRegister src) {
3252 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3253 EmitUint8(0x66);
3254 EmitOptionalRex32(dst, src);
3255 EmitUint8(0x0F);
3256 EmitUint8(0x38);
3257 EmitUint8(0x38);
3258 EmitXmmRegisterOperand(dst.LowBits(), src);
3259 }
3260
pmaxsb(XmmRegister dst,XmmRegister src)3261 void X86_64Assembler::pmaxsb(XmmRegister dst, XmmRegister src) {
3262 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3263 EmitUint8(0x66);
3264 EmitOptionalRex32(dst, src);
3265 EmitUint8(0x0F);
3266 EmitUint8(0x38);
3267 EmitUint8(0x3C);
3268 EmitXmmRegisterOperand(dst.LowBits(), src);
3269 }
3270
pminsw(XmmRegister dst,XmmRegister src)3271 void X86_64Assembler::pminsw(XmmRegister dst, XmmRegister src) {
3272 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3273 EmitUint8(0x66);
3274 EmitOptionalRex32(dst, src);
3275 EmitUint8(0x0F);
3276 EmitUint8(0xEA);
3277 EmitXmmRegisterOperand(dst.LowBits(), src);
3278 }
3279
pmaxsw(XmmRegister dst,XmmRegister src)3280 void X86_64Assembler::pmaxsw(XmmRegister dst, XmmRegister src) {
3281 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3282 EmitUint8(0x66);
3283 EmitOptionalRex32(dst, src);
3284 EmitUint8(0x0F);
3285 EmitUint8(0xEE);
3286 EmitXmmRegisterOperand(dst.LowBits(), src);
3287 }
3288
pminsd(XmmRegister dst,XmmRegister src)3289 void X86_64Assembler::pminsd(XmmRegister dst, XmmRegister src) {
3290 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3291 EmitUint8(0x66);
3292 EmitOptionalRex32(dst, src);
3293 EmitUint8(0x0F);
3294 EmitUint8(0x38);
3295 EmitUint8(0x39);
3296 EmitXmmRegisterOperand(dst.LowBits(), src);
3297 }
3298
pmaxsd(XmmRegister dst,XmmRegister src)3299 void X86_64Assembler::pmaxsd(XmmRegister dst, XmmRegister src) {
3300 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3301 EmitUint8(0x66);
3302 EmitOptionalRex32(dst, src);
3303 EmitUint8(0x0F);
3304 EmitUint8(0x38);
3305 EmitUint8(0x3D);
3306 EmitXmmRegisterOperand(dst.LowBits(), src);
3307 }
3308
pminub(XmmRegister dst,XmmRegister src)3309 void X86_64Assembler::pminub(XmmRegister dst, XmmRegister src) {
3310 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3311 EmitUint8(0x66);
3312 EmitOptionalRex32(dst, src);
3313 EmitUint8(0x0F);
3314 EmitUint8(0xDA);
3315 EmitXmmRegisterOperand(dst.LowBits(), src);
3316 }
3317
pmaxub(XmmRegister dst,XmmRegister src)3318 void X86_64Assembler::pmaxub(XmmRegister dst, XmmRegister src) {
3319 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3320 EmitUint8(0x66);
3321 EmitOptionalRex32(dst, src);
3322 EmitUint8(0x0F);
3323 EmitUint8(0xDE);
3324 EmitXmmRegisterOperand(dst.LowBits(), src);
3325 }
3326
pminuw(XmmRegister dst,XmmRegister src)3327 void X86_64Assembler::pminuw(XmmRegister dst, XmmRegister src) {
3328 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3329 EmitUint8(0x66);
3330 EmitOptionalRex32(dst, src);
3331 EmitUint8(0x0F);
3332 EmitUint8(0x38);
3333 EmitUint8(0x3A);
3334 EmitXmmRegisterOperand(dst.LowBits(), src);
3335 }
3336
pmaxuw(XmmRegister dst,XmmRegister src)3337 void X86_64Assembler::pmaxuw(XmmRegister dst, XmmRegister src) {
3338 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3339 EmitUint8(0x66);
3340 EmitOptionalRex32(dst, src);
3341 EmitUint8(0x0F);
3342 EmitUint8(0x38);
3343 EmitUint8(0x3E);
3344 EmitXmmRegisterOperand(dst.LowBits(), src);
3345 }
3346
pminud(XmmRegister dst,XmmRegister src)3347 void X86_64Assembler::pminud(XmmRegister dst, XmmRegister src) {
3348 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3349 EmitUint8(0x66);
3350 EmitOptionalRex32(dst, src);
3351 EmitUint8(0x0F);
3352 EmitUint8(0x38);
3353 EmitUint8(0x3B);
3354 EmitXmmRegisterOperand(dst.LowBits(), src);
3355 }
3356
pmaxud(XmmRegister dst,XmmRegister src)3357 void X86_64Assembler::pmaxud(XmmRegister dst, XmmRegister src) {
3358 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3359 EmitUint8(0x66);
3360 EmitOptionalRex32(dst, src);
3361 EmitUint8(0x0F);
3362 EmitUint8(0x38);
3363 EmitUint8(0x3F);
3364 EmitXmmRegisterOperand(dst.LowBits(), src);
3365 }
3366
minps(XmmRegister dst,XmmRegister src)3367 void X86_64Assembler::minps(XmmRegister dst, XmmRegister src) {
3368 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3369 EmitOptionalRex32(dst, src);
3370 EmitUint8(0x0F);
3371 EmitUint8(0x5D);
3372 EmitXmmRegisterOperand(dst.LowBits(), src);
3373 }
3374
maxps(XmmRegister dst,XmmRegister src)3375 void X86_64Assembler::maxps(XmmRegister dst, XmmRegister src) {
3376 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3377 EmitOptionalRex32(dst, src);
3378 EmitUint8(0x0F);
3379 EmitUint8(0x5F);
3380 EmitXmmRegisterOperand(dst.LowBits(), src);
3381 }
3382
minpd(XmmRegister dst,XmmRegister src)3383 void X86_64Assembler::minpd(XmmRegister dst, XmmRegister src) {
3384 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3385 EmitUint8(0x66);
3386 EmitOptionalRex32(dst, src);
3387 EmitUint8(0x0F);
3388 EmitUint8(0x5D);
3389 EmitXmmRegisterOperand(dst.LowBits(), src);
3390 }
3391
maxpd(XmmRegister dst,XmmRegister src)3392 void X86_64Assembler::maxpd(XmmRegister dst, XmmRegister src) {
3393 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3394 EmitUint8(0x66);
3395 EmitOptionalRex32(dst, src);
3396 EmitUint8(0x0F);
3397 EmitUint8(0x5F);
3398 EmitXmmRegisterOperand(dst.LowBits(), src);
3399 }
3400
pcmpeqb(XmmRegister dst,XmmRegister src)3401 void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
3402 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3403 EmitUint8(0x66);
3404 EmitOptionalRex32(dst, src);
3405 EmitUint8(0x0F);
3406 EmitUint8(0x74);
3407 EmitXmmRegisterOperand(dst.LowBits(), src);
3408 }
3409
pcmpeqw(XmmRegister dst,XmmRegister src)3410 void X86_64Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) {
3411 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3412 EmitUint8(0x66);
3413 EmitOptionalRex32(dst, src);
3414 EmitUint8(0x0F);
3415 EmitUint8(0x75);
3416 EmitXmmRegisterOperand(dst.LowBits(), src);
3417 }
3418
pcmpeqd(XmmRegister dst,XmmRegister src)3419 void X86_64Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) {
3420 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3421 EmitUint8(0x66);
3422 EmitOptionalRex32(dst, src);
3423 EmitUint8(0x0F);
3424 EmitUint8(0x76);
3425 EmitXmmRegisterOperand(dst.LowBits(), src);
3426 }
3427
pcmpeqq(XmmRegister dst,XmmRegister src)3428 void X86_64Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) {
3429 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3430 EmitUint8(0x66);
3431 EmitOptionalRex32(dst, src);
3432 EmitUint8(0x0F);
3433 EmitUint8(0x38);
3434 EmitUint8(0x29);
3435 EmitXmmRegisterOperand(dst.LowBits(), src);
3436 }
3437
pcmpgtb(XmmRegister dst,XmmRegister src)3438 void X86_64Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) {
3439 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3440 EmitUint8(0x66);
3441 EmitOptionalRex32(dst, src);
3442 EmitUint8(0x0F);
3443 EmitUint8(0x64);
3444 EmitXmmRegisterOperand(dst.LowBits(), src);
3445 }
3446
pcmpgtw(XmmRegister dst,XmmRegister src)3447 void X86_64Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) {
3448 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3449 EmitUint8(0x66);
3450 EmitOptionalRex32(dst, src);
3451 EmitUint8(0x0F);
3452 EmitUint8(0x65);
3453 EmitXmmRegisterOperand(dst.LowBits(), src);
3454 }
3455
pcmpgtd(XmmRegister dst,XmmRegister src)3456 void X86_64Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) {
3457 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3458 EmitUint8(0x66);
3459 EmitOptionalRex32(dst, src);
3460 EmitUint8(0x0F);
3461 EmitUint8(0x66);
3462 EmitXmmRegisterOperand(dst.LowBits(), src);
3463 }
3464
pcmpgtq(XmmRegister dst,XmmRegister src)3465 void X86_64Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) {
3466 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3467 EmitUint8(0x66);
3468 EmitOptionalRex32(dst, src);
3469 EmitUint8(0x0F);
3470 EmitUint8(0x38);
3471 EmitUint8(0x37);
3472 EmitXmmRegisterOperand(dst.LowBits(), src);
3473 }
3474
shufpd(XmmRegister dst,XmmRegister src,const Immediate & imm)3475 void X86_64Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3476 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3477 EmitUint8(0x66);
3478 EmitOptionalRex32(dst, src);
3479 EmitUint8(0x0F);
3480 EmitUint8(0xC6);
3481 EmitXmmRegisterOperand(dst.LowBits(), src);
3482 EmitUint8(imm.value());
3483 }
3484
3485
shufps(XmmRegister dst,XmmRegister src,const Immediate & imm)3486 void X86_64Assembler::shufps(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3487 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3488 EmitOptionalRex32(dst, src);
3489 EmitUint8(0x0F);
3490 EmitUint8(0xC6);
3491 EmitXmmRegisterOperand(dst.LowBits(), src);
3492 EmitUint8(imm.value());
3493 }
3494
3495
pshufd(XmmRegister dst,XmmRegister src,const Immediate & imm)3496 void X86_64Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3497 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3498 EmitUint8(0x66);
3499 EmitOptionalRex32(dst, src);
3500 EmitUint8(0x0F);
3501 EmitUint8(0x70);
3502 EmitXmmRegisterOperand(dst.LowBits(), src);
3503 EmitUint8(imm.value());
3504 }
3505
3506
punpcklbw(XmmRegister dst,XmmRegister src)3507 void X86_64Assembler::punpcklbw(XmmRegister dst, XmmRegister src) {
3508 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3509 EmitUint8(0x66);
3510 EmitOptionalRex32(dst, src);
3511 EmitUint8(0x0F);
3512 EmitUint8(0x60);
3513 EmitXmmRegisterOperand(dst.LowBits(), src);
3514 }
3515
3516
punpcklwd(XmmRegister dst,XmmRegister src)3517 void X86_64Assembler::punpcklwd(XmmRegister dst, XmmRegister src) {
3518 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3519 EmitUint8(0x66);
3520 EmitOptionalRex32(dst, src);
3521 EmitUint8(0x0F);
3522 EmitUint8(0x61);
3523 EmitXmmRegisterOperand(dst.LowBits(), src);
3524 }
3525
3526
punpckldq(XmmRegister dst,XmmRegister src)3527 void X86_64Assembler::punpckldq(XmmRegister dst, XmmRegister src) {
3528 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3529 EmitUint8(0x66);
3530 EmitOptionalRex32(dst, src);
3531 EmitUint8(0x0F);
3532 EmitUint8(0x62);
3533 EmitXmmRegisterOperand(dst.LowBits(), src);
3534 }
3535
3536
punpcklqdq(XmmRegister dst,XmmRegister src)3537 void X86_64Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) {
3538 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3539 EmitUint8(0x66);
3540 EmitOptionalRex32(dst, src);
3541 EmitUint8(0x0F);
3542 EmitUint8(0x6C);
3543 EmitXmmRegisterOperand(dst.LowBits(), src);
3544 }
3545
3546
punpckhbw(XmmRegister dst,XmmRegister src)3547 void X86_64Assembler::punpckhbw(XmmRegister dst, XmmRegister src) {
3548 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3549 EmitUint8(0x66);
3550 EmitOptionalRex32(dst, src);
3551 EmitUint8(0x0F);
3552 EmitUint8(0x68);
3553 EmitXmmRegisterOperand(dst.LowBits(), src);
3554 }
3555
3556
punpckhwd(XmmRegister dst,XmmRegister src)3557 void X86_64Assembler::punpckhwd(XmmRegister dst, XmmRegister src) {
3558 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3559 EmitUint8(0x66);
3560 EmitOptionalRex32(dst, src);
3561 EmitUint8(0x0F);
3562 EmitUint8(0x69);
3563 EmitXmmRegisterOperand(dst.LowBits(), src);
3564 }
3565
3566
punpckhdq(XmmRegister dst,XmmRegister src)3567 void X86_64Assembler::punpckhdq(XmmRegister dst, XmmRegister src) {
3568 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3569 EmitUint8(0x66);
3570 EmitOptionalRex32(dst, src);
3571 EmitUint8(0x0F);
3572 EmitUint8(0x6A);
3573 EmitXmmRegisterOperand(dst.LowBits(), src);
3574 }
3575
3576
punpckhqdq(XmmRegister dst,XmmRegister src)3577 void X86_64Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) {
3578 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3579 EmitUint8(0x66);
3580 EmitOptionalRex32(dst, src);
3581 EmitUint8(0x0F);
3582 EmitUint8(0x6D);
3583 EmitXmmRegisterOperand(dst.LowBits(), src);
3584 }
3585
3586
psllw(XmmRegister reg,const Immediate & shift_count)3587 void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
3588 DCHECK(shift_count.is_uint8());
3589 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3590 EmitUint8(0x66);
3591 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3592 EmitUint8(0x0F);
3593 EmitUint8(0x71);
3594 EmitXmmRegisterOperand(6, reg);
3595 EmitUint8(shift_count.value());
3596 }
3597
3598
pslld(XmmRegister reg,const Immediate & shift_count)3599 void X86_64Assembler::pslld(XmmRegister reg, const Immediate& shift_count) {
3600 DCHECK(shift_count.is_uint8());
3601 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3602 EmitUint8(0x66);
3603 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3604 EmitUint8(0x0F);
3605 EmitUint8(0x72);
3606 EmitXmmRegisterOperand(6, reg);
3607 EmitUint8(shift_count.value());
3608 }
3609
3610
psllq(XmmRegister reg,const Immediate & shift_count)3611 void X86_64Assembler::psllq(XmmRegister reg, const Immediate& shift_count) {
3612 DCHECK(shift_count.is_uint8());
3613 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3614 EmitUint8(0x66);
3615 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3616 EmitUint8(0x0F);
3617 EmitUint8(0x73);
3618 EmitXmmRegisterOperand(6, reg);
3619 EmitUint8(shift_count.value());
3620 }
3621
3622
psraw(XmmRegister reg,const Immediate & shift_count)3623 void X86_64Assembler::psraw(XmmRegister reg, const Immediate& shift_count) {
3624 DCHECK(shift_count.is_uint8());
3625 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3626 EmitUint8(0x66);
3627 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3628 EmitUint8(0x0F);
3629 EmitUint8(0x71);
3630 EmitXmmRegisterOperand(4, reg);
3631 EmitUint8(shift_count.value());
3632 }
3633
3634
psrad(XmmRegister reg,const Immediate & shift_count)3635 void X86_64Assembler::psrad(XmmRegister reg, const Immediate& shift_count) {
3636 DCHECK(shift_count.is_uint8());
3637 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3638 EmitUint8(0x66);
3639 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3640 EmitUint8(0x0F);
3641 EmitUint8(0x72);
3642 EmitXmmRegisterOperand(4, reg);
3643 EmitUint8(shift_count.value());
3644 }
3645
3646
psrlw(XmmRegister reg,const Immediate & shift_count)3647 void X86_64Assembler::psrlw(XmmRegister reg, const Immediate& shift_count) {
3648 DCHECK(shift_count.is_uint8());
3649 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3650 EmitUint8(0x66);
3651 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3652 EmitUint8(0x0F);
3653 EmitUint8(0x71);
3654 EmitXmmRegisterOperand(2, reg);
3655 EmitUint8(shift_count.value());
3656 }
3657
3658
psrld(XmmRegister reg,const Immediate & shift_count)3659 void X86_64Assembler::psrld(XmmRegister reg, const Immediate& shift_count) {
3660 DCHECK(shift_count.is_uint8());
3661 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3662 EmitUint8(0x66);
3663 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3664 EmitUint8(0x0F);
3665 EmitUint8(0x72);
3666 EmitXmmRegisterOperand(2, reg);
3667 EmitUint8(shift_count.value());
3668 }
3669
3670
psrlq(XmmRegister reg,const Immediate & shift_count)3671 void X86_64Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) {
3672 DCHECK(shift_count.is_uint8());
3673 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3674 EmitUint8(0x66);
3675 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3676 EmitUint8(0x0F);
3677 EmitUint8(0x73);
3678 EmitXmmRegisterOperand(2, reg);
3679 EmitUint8(shift_count.value());
3680 }
3681
3682
psrldq(XmmRegister reg,const Immediate & shift_count)3683 void X86_64Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) {
3684 DCHECK(shift_count.is_uint8());
3685 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3686 EmitUint8(0x66);
3687 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3688 EmitUint8(0x0F);
3689 EmitUint8(0x73);
3690 EmitXmmRegisterOperand(3, reg);
3691 EmitUint8(shift_count.value());
3692 }
3693
3694
fldl(const Address & src)3695 void X86_64Assembler::fldl(const Address& src) {
3696 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3697 EmitUint8(0xDD);
3698 EmitOperand(0, src);
3699 }
3700
3701
fstl(const Address & dst)3702 void X86_64Assembler::fstl(const Address& dst) {
3703 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3704 EmitUint8(0xDD);
3705 EmitOperand(2, dst);
3706 }
3707
3708
fstpl(const Address & dst)3709 void X86_64Assembler::fstpl(const Address& dst) {
3710 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3711 EmitUint8(0xDD);
3712 EmitOperand(3, dst);
3713 }
3714
3715
fstsw()3716 void X86_64Assembler::fstsw() {
3717 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3718 EmitUint8(0x9B);
3719 EmitUint8(0xDF);
3720 EmitUint8(0xE0);
3721 }
3722
3723
fnstcw(const Address & dst)3724 void X86_64Assembler::fnstcw(const Address& dst) {
3725 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3726 EmitUint8(0xD9);
3727 EmitOperand(7, dst);
3728 }
3729
3730
fldcw(const Address & src)3731 void X86_64Assembler::fldcw(const Address& src) {
3732 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3733 EmitUint8(0xD9);
3734 EmitOperand(5, src);
3735 }
3736
3737
fistpl(const Address & dst)3738 void X86_64Assembler::fistpl(const Address& dst) {
3739 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3740 EmitUint8(0xDF);
3741 EmitOperand(7, dst);
3742 }
3743
3744
fistps(const Address & dst)3745 void X86_64Assembler::fistps(const Address& dst) {
3746 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3747 EmitUint8(0xDB);
3748 EmitOperand(3, dst);
3749 }
3750
3751
fildl(const Address & src)3752 void X86_64Assembler::fildl(const Address& src) {
3753 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3754 EmitUint8(0xDF);
3755 EmitOperand(5, src);
3756 }
3757
3758
filds(const Address & src)3759 void X86_64Assembler::filds(const Address& src) {
3760 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3761 EmitUint8(0xDB);
3762 EmitOperand(0, src);
3763 }
3764
3765
fincstp()3766 void X86_64Assembler::fincstp() {
3767 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3768 EmitUint8(0xD9);
3769 EmitUint8(0xF7);
3770 }
3771
3772
ffree(const Immediate & index)3773 void X86_64Assembler::ffree(const Immediate& index) {
3774 CHECK_LT(index.value(), 7);
3775 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3776 EmitUint8(0xDD);
3777 EmitUint8(0xC0 + index.value());
3778 }
3779
3780
fsin()3781 void X86_64Assembler::fsin() {
3782 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3783 EmitUint8(0xD9);
3784 EmitUint8(0xFE);
3785 }
3786
3787
fcos()3788 void X86_64Assembler::fcos() {
3789 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3790 EmitUint8(0xD9);
3791 EmitUint8(0xFF);
3792 }
3793
3794
fptan()3795 void X86_64Assembler::fptan() {
3796 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3797 EmitUint8(0xD9);
3798 EmitUint8(0xF2);
3799 }
3800
fucompp()3801 void X86_64Assembler::fucompp() {
3802 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3803 EmitUint8(0xDA);
3804 EmitUint8(0xE9);
3805 }
3806
3807
fprem()3808 void X86_64Assembler::fprem() {
3809 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3810 EmitUint8(0xD9);
3811 EmitUint8(0xF8);
3812 }
3813
3814
xchgl(CpuRegister dst,CpuRegister src)3815 void X86_64Assembler::xchgl(CpuRegister dst, CpuRegister src) {
3816 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3817 // There is a short version for rax.
3818 // It's a bit awkward, as CpuRegister has a const field, so assignment and thus swapping doesn't
3819 // work.
3820 const bool src_rax = src.AsRegister() == RAX;
3821 const bool dst_rax = dst.AsRegister() == RAX;
3822 if (src_rax || dst_rax) {
3823 EmitOptionalRex32(src_rax ? dst : src);
3824 EmitUint8(0x90 + (src_rax ? dst.LowBits() : src.LowBits()));
3825 return;
3826 }
3827
3828 // General case.
3829 EmitOptionalRex32(src, dst);
3830 EmitUint8(0x87);
3831 EmitRegisterOperand(src.LowBits(), dst.LowBits());
3832 }
3833
3834
xchgq(CpuRegister dst,CpuRegister src)3835 void X86_64Assembler::xchgq(CpuRegister dst, CpuRegister src) {
3836 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3837 // There is a short version for rax.
3838 // It's a bit awkward, as CpuRegister has a const field, so assignment and thus swapping doesn't
3839 // work.
3840 const bool src_rax = src.AsRegister() == RAX;
3841 const bool dst_rax = dst.AsRegister() == RAX;
3842 if (src_rax || dst_rax) {
3843 // If src == target, emit a nop instead.
3844 if (src_rax && dst_rax) {
3845 EmitUint8(0x90);
3846 } else {
3847 EmitRex64(src_rax ? dst : src);
3848 EmitUint8(0x90 + (src_rax ? dst.LowBits() : src.LowBits()));
3849 }
3850 return;
3851 }
3852
3853 // General case.
3854 EmitRex64(src, dst);
3855 EmitUint8(0x87);
3856 EmitRegisterOperand(src.LowBits(), dst.LowBits());
3857 }
3858
3859
xchgl(CpuRegister reg,const Address & address)3860 void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) {
3861 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3862 EmitOptionalRex32(reg, address);
3863 EmitUint8(0x87);
3864 EmitOperand(reg.LowBits(), address);
3865 }
3866
3867
cmpb(const Address & address,const Immediate & imm)3868 void X86_64Assembler::cmpb(const Address& address, const Immediate& imm) {
3869 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3870 CHECK(imm.is_int32());
3871 EmitOptionalRex32(address);
3872 EmitUint8(0x80);
3873 EmitOperand(7, address);
3874 EmitUint8(imm.value() & 0xFF);
3875 }
3876
3877
cmpw(const Address & address,const Immediate & imm)3878 void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) {
3879 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3880 CHECK(imm.is_int32());
3881 EmitOperandSizeOverride();
3882 EmitOptionalRex32(address);
3883 EmitComplex(7, address, imm, /* is_16_op= */ true);
3884 }
3885
3886
cmpl(CpuRegister reg,const Immediate & imm)3887 void X86_64Assembler::cmpl(CpuRegister reg, const Immediate& imm) {
3888 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3889 CHECK(imm.is_int32());
3890 EmitOptionalRex32(reg);
3891 EmitComplex(7, Operand(reg), imm);
3892 }
3893
3894
cmpl(CpuRegister reg0,CpuRegister reg1)3895 void X86_64Assembler::cmpl(CpuRegister reg0, CpuRegister reg1) {
3896 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3897 EmitOptionalRex32(reg0, reg1);
3898 EmitUint8(0x3B);
3899 EmitOperand(reg0.LowBits(), Operand(reg1));
3900 }
3901
3902
cmpl(CpuRegister reg,const Address & address)3903 void X86_64Assembler::cmpl(CpuRegister reg, const Address& address) {
3904 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3905 EmitOptionalRex32(reg, address);
3906 EmitUint8(0x3B);
3907 EmitOperand(reg.LowBits(), address);
3908 }
3909
3910
cmpl(const Address & address,CpuRegister reg)3911 void X86_64Assembler::cmpl(const Address& address, CpuRegister reg) {
3912 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3913 EmitOptionalRex32(reg, address);
3914 EmitUint8(0x39);
3915 EmitOperand(reg.LowBits(), address);
3916 }
3917
3918
cmpl(const Address & address,const Immediate & imm)3919 void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
3920 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3921 CHECK(imm.is_int32());
3922 EmitOptionalRex32(address);
3923 EmitComplex(7, address, imm);
3924 }
3925
3926
cmpq(CpuRegister reg0,CpuRegister reg1)3927 void X86_64Assembler::cmpq(CpuRegister reg0, CpuRegister reg1) {
3928 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3929 EmitRex64(reg0, reg1);
3930 EmitUint8(0x3B);
3931 EmitOperand(reg0.LowBits(), Operand(reg1));
3932 }
3933
3934
cmpq(CpuRegister reg,const Immediate & imm)3935 void X86_64Assembler::cmpq(CpuRegister reg, const Immediate& imm) {
3936 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3937 CHECK(imm.is_int32()); // cmpq only supports 32b immediate.
3938 EmitRex64(reg);
3939 EmitComplex(7, Operand(reg), imm);
3940 }
3941
3942
cmpq(CpuRegister reg,const Address & address)3943 void X86_64Assembler::cmpq(CpuRegister reg, const Address& address) {
3944 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3945 EmitRex64(reg, address);
3946 EmitUint8(0x3B);
3947 EmitOperand(reg.LowBits(), address);
3948 }
3949
3950
cmpq(const Address & address,const Immediate & imm)3951 void X86_64Assembler::cmpq(const Address& address, const Immediate& imm) {
3952 CHECK(imm.is_int32()); // cmpq only supports 32b immediate.
3953 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3954 EmitRex64(address);
3955 EmitComplex(7, address, imm);
3956 }
3957
3958
addl(CpuRegister dst,CpuRegister src)3959 void X86_64Assembler::addl(CpuRegister dst, CpuRegister src) {
3960 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3961 EmitOptionalRex32(dst, src);
3962 EmitUint8(0x03);
3963 EmitRegisterOperand(dst.LowBits(), src.LowBits());
3964 }
3965
3966
addl(CpuRegister reg,const Address & address)3967 void X86_64Assembler::addl(CpuRegister reg, const Address& address) {
3968 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3969 EmitOptionalRex32(reg, address);
3970 EmitUint8(0x03);
3971 EmitOperand(reg.LowBits(), address);
3972 }
3973
3974
testl(CpuRegister reg1,CpuRegister reg2)3975 void X86_64Assembler::testl(CpuRegister reg1, CpuRegister reg2) {
3976 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3977 EmitOptionalRex32(reg1, reg2);
3978 EmitUint8(0x85);
3979 EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
3980 }
3981
3982
testl(CpuRegister reg,const Address & address)3983 void X86_64Assembler::testl(CpuRegister reg, const Address& address) {
3984 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3985 EmitOptionalRex32(reg, address);
3986 EmitUint8(0x85);
3987 EmitOperand(reg.LowBits(), address);
3988 }
3989
3990
testl(CpuRegister reg,const Immediate & immediate)3991 void X86_64Assembler::testl(CpuRegister reg, const Immediate& immediate) {
3992 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3993 // For registers that have a byte variant (RAX, RBX, RCX, and RDX)
3994 // we only test the byte CpuRegister to keep the encoding short.
3995 if (immediate.is_uint8() && reg.AsRegister() < 4) {
3996 // Use zero-extended 8-bit immediate.
3997 if (reg.AsRegister() == RAX) {
3998 EmitUint8(0xA8);
3999 } else {
4000 EmitUint8(0xF6);
4001 EmitUint8(0xC0 + reg.AsRegister());
4002 }
4003 EmitUint8(immediate.value() & 0xFF);
4004 } else if (reg.AsRegister() == RAX) {
4005 // Use short form if the destination is RAX.
4006 EmitUint8(0xA9);
4007 EmitImmediate(immediate);
4008 } else {
4009 EmitOptionalRex32(reg);
4010 EmitUint8(0xF7);
4011 EmitOperand(0, Operand(reg));
4012 EmitImmediate(immediate);
4013 }
4014 }
4015
4016
testq(CpuRegister reg1,CpuRegister reg2)4017 void X86_64Assembler::testq(CpuRegister reg1, CpuRegister reg2) {
4018 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4019 EmitRex64(reg1, reg2);
4020 EmitUint8(0x85);
4021 EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
4022 }
4023
4024
testq(CpuRegister reg,const Address & address)4025 void X86_64Assembler::testq(CpuRegister reg, const Address& address) {
4026 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4027 EmitRex64(reg, address);
4028 EmitUint8(0x85);
4029 EmitOperand(reg.LowBits(), address);
4030 }
4031
4032
testb(const Address & dst,const Immediate & imm)4033 void X86_64Assembler::testb(const Address& dst, const Immediate& imm) {
4034 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4035 EmitOptionalRex32(dst);
4036 EmitUint8(0xF6);
4037 EmitOperand(Register::RAX, dst);
4038 CHECK(imm.is_int8());
4039 EmitUint8(imm.value() & 0xFF);
4040 }
4041
4042
testl(const Address & dst,const Immediate & imm)4043 void X86_64Assembler::testl(const Address& dst, const Immediate& imm) {
4044 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4045 EmitOptionalRex32(dst);
4046 EmitUint8(0xF7);
4047 EmitOperand(0, dst);
4048 EmitImmediate(imm);
4049 }
4050
4051
andl(CpuRegister dst,CpuRegister src)4052 void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) {
4053 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4054 EmitOptionalRex32(dst, src);
4055 EmitUint8(0x23);
4056 EmitOperand(dst.LowBits(), Operand(src));
4057 }
4058
4059
andl(CpuRegister reg,const Address & address)4060 void X86_64Assembler::andl(CpuRegister reg, const Address& address) {
4061 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4062 EmitOptionalRex32(reg, address);
4063 EmitUint8(0x23);
4064 EmitOperand(reg.LowBits(), address);
4065 }
4066
4067
andl(CpuRegister dst,const Immediate & imm)4068 void X86_64Assembler::andl(CpuRegister dst, const Immediate& imm) {
4069 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4070 EmitOptionalRex32(dst);
4071 EmitComplex(4, Operand(dst), imm);
4072 }
4073
4074
andq(CpuRegister reg,const Immediate & imm)4075 void X86_64Assembler::andq(CpuRegister reg, const Immediate& imm) {
4076 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4077 CHECK(imm.is_int32()); // andq only supports 32b immediate.
4078 EmitRex64(reg);
4079 EmitComplex(4, Operand(reg), imm);
4080 }
4081
4082
andq(CpuRegister dst,CpuRegister src)4083 void X86_64Assembler::andq(CpuRegister dst, CpuRegister src) {
4084 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4085 EmitRex64(dst, src);
4086 EmitUint8(0x23);
4087 EmitOperand(dst.LowBits(), Operand(src));
4088 }
4089
4090
andq(CpuRegister dst,const Address & src)4091 void X86_64Assembler::andq(CpuRegister dst, const Address& src) {
4092 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4093 EmitRex64(dst, src);
4094 EmitUint8(0x23);
4095 EmitOperand(dst.LowBits(), src);
4096 }
4097
4098
andw(const Address & address,const Immediate & imm)4099 void X86_64Assembler::andw(const Address& address, const Immediate& imm) {
4100 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4101 CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
4102 EmitUint8(0x66);
4103 EmitOptionalRex32(address);
4104 EmitComplex(4, address, imm, /* is_16_op= */ true);
4105 }
4106
4107
orl(CpuRegister dst,CpuRegister src)4108 void X86_64Assembler::orl(CpuRegister dst, CpuRegister src) {
4109 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4110 EmitOptionalRex32(dst, src);
4111 EmitUint8(0x0B);
4112 EmitOperand(dst.LowBits(), Operand(src));
4113 }
4114
4115
orl(CpuRegister reg,const Address & address)4116 void X86_64Assembler::orl(CpuRegister reg, const Address& address) {
4117 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4118 EmitOptionalRex32(reg, address);
4119 EmitUint8(0x0B);
4120 EmitOperand(reg.LowBits(), address);
4121 }
4122
4123
orl(CpuRegister dst,const Immediate & imm)4124 void X86_64Assembler::orl(CpuRegister dst, const Immediate& imm) {
4125 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4126 EmitOptionalRex32(dst);
4127 EmitComplex(1, Operand(dst), imm);
4128 }
4129
4130
orq(CpuRegister dst,const Immediate & imm)4131 void X86_64Assembler::orq(CpuRegister dst, const Immediate& imm) {
4132 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4133 CHECK(imm.is_int32()); // orq only supports 32b immediate.
4134 EmitRex64(dst);
4135 EmitComplex(1, Operand(dst), imm);
4136 }
4137
4138
orq(CpuRegister dst,CpuRegister src)4139 void X86_64Assembler::orq(CpuRegister dst, CpuRegister src) {
4140 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4141 EmitRex64(dst, src);
4142 EmitUint8(0x0B);
4143 EmitOperand(dst.LowBits(), Operand(src));
4144 }
4145
4146
orq(CpuRegister dst,const Address & src)4147 void X86_64Assembler::orq(CpuRegister dst, const Address& src) {
4148 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4149 EmitRex64(dst, src);
4150 EmitUint8(0x0B);
4151 EmitOperand(dst.LowBits(), src);
4152 }
4153
4154
xorl(CpuRegister dst,CpuRegister src)4155 void X86_64Assembler::xorl(CpuRegister dst, CpuRegister src) {
4156 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4157 EmitOptionalRex32(dst, src);
4158 EmitUint8(0x33);
4159 EmitOperand(dst.LowBits(), Operand(src));
4160 }
4161
4162
xorl(CpuRegister reg,const Address & address)4163 void X86_64Assembler::xorl(CpuRegister reg, const Address& address) {
4164 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4165 EmitOptionalRex32(reg, address);
4166 EmitUint8(0x33);
4167 EmitOperand(reg.LowBits(), address);
4168 }
4169
4170
xorl(CpuRegister dst,const Immediate & imm)4171 void X86_64Assembler::xorl(CpuRegister dst, const Immediate& imm) {
4172 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4173 EmitOptionalRex32(dst);
4174 EmitComplex(6, Operand(dst), imm);
4175 }
4176
4177
xorq(CpuRegister dst,CpuRegister src)4178 void X86_64Assembler::xorq(CpuRegister dst, CpuRegister src) {
4179 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4180 EmitRex64(dst, src);
4181 EmitUint8(0x33);
4182 EmitOperand(dst.LowBits(), Operand(src));
4183 }
4184
4185
xorq(CpuRegister dst,const Immediate & imm)4186 void X86_64Assembler::xorq(CpuRegister dst, const Immediate& imm) {
4187 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4188 CHECK(imm.is_int32()); // xorq only supports 32b immediate.
4189 EmitRex64(dst);
4190 EmitComplex(6, Operand(dst), imm);
4191 }
4192
xorq(CpuRegister dst,const Address & src)4193 void X86_64Assembler::xorq(CpuRegister dst, const Address& src) {
4194 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4195 EmitRex64(dst, src);
4196 EmitUint8(0x33);
4197 EmitOperand(dst.LowBits(), src);
4198 }
4199
4200
4201 #if 0
4202 void X86_64Assembler::rex(bool force, bool w, Register* r, Register* x, Register* b) {
4203 // REX.WRXB
4204 // W - 64-bit operand
4205 // R - MODRM.reg
4206 // X - SIB.index
4207 // B - MODRM.rm/SIB.base
4208 uint8_t rex = force ? 0x40 : 0;
4209 if (w) {
4210 rex |= 0x48; // REX.W000
4211 }
4212 if (r != nullptr && *r >= Register::R8 && *r < Register::kNumberOfCpuRegisters) {
4213 rex |= 0x44; // REX.0R00
4214 *r = static_cast<Register>(*r - 8);
4215 }
4216 if (x != nullptr && *x >= Register::R8 && *x < Register::kNumberOfCpuRegisters) {
4217 rex |= 0x42; // REX.00X0
4218 *x = static_cast<Register>(*x - 8);
4219 }
4220 if (b != nullptr && *b >= Register::R8 && *b < Register::kNumberOfCpuRegisters) {
4221 rex |= 0x41; // REX.000B
4222 *b = static_cast<Register>(*b - 8);
4223 }
4224 if (rex != 0) {
4225 EmitUint8(rex);
4226 }
4227 }
4228
4229 void X86_64Assembler::rex_reg_mem(bool force, bool w, Register* dst, const Address& mem) {
4230 // REX.WRXB
4231 // W - 64-bit operand
4232 // R - MODRM.reg
4233 // X - SIB.index
4234 // B - MODRM.rm/SIB.base
4235 uint8_t rex = mem->rex();
4236 if (force) {
4237 rex |= 0x40; // REX.0000
4238 }
4239 if (w) {
4240 rex |= 0x48; // REX.W000
4241 }
4242 if (dst != nullptr && *dst >= Register::R8 && *dst < Register::kNumberOfCpuRegisters) {
4243 rex |= 0x44; // REX.0R00
4244 *dst = static_cast<Register>(*dst - 8);
4245 }
4246 if (rex != 0) {
4247 EmitUint8(rex);
4248 }
4249 }
4250
4251 void rex_mem_reg(bool force, bool w, Address* mem, Register* src);
4252 #endif
4253
addl(CpuRegister reg,const Immediate & imm)4254 void X86_64Assembler::addl(CpuRegister reg, const Immediate& imm) {
4255 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4256 EmitOptionalRex32(reg);
4257 EmitComplex(0, Operand(reg), imm);
4258 }
4259
4260
addq(CpuRegister reg,const Immediate & imm)4261 void X86_64Assembler::addq(CpuRegister reg, const Immediate& imm) {
4262 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4263 CHECK(imm.is_int32()); // addq only supports 32b immediate.
4264 EmitRex64(reg);
4265 EmitComplex(0, Operand(reg), imm);
4266 }
4267
4268
addq(CpuRegister dst,const Address & address)4269 void X86_64Assembler::addq(CpuRegister dst, const Address& address) {
4270 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4271 EmitRex64(dst, address);
4272 EmitUint8(0x03);
4273 EmitOperand(dst.LowBits(), address);
4274 }
4275
4276
addq(CpuRegister dst,CpuRegister src)4277 void X86_64Assembler::addq(CpuRegister dst, CpuRegister src) {
4278 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4279 // 0x01 is addq r/m64 <- r/m64 + r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
4280 EmitRex64(src, dst);
4281 EmitUint8(0x01);
4282 EmitRegisterOperand(src.LowBits(), dst.LowBits());
4283 }
4284
4285
addl(const Address & address,CpuRegister reg)4286 void X86_64Assembler::addl(const Address& address, CpuRegister reg) {
4287 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4288 EmitOptionalRex32(reg, address);
4289 EmitUint8(0x01);
4290 EmitOperand(reg.LowBits(), address);
4291 }
4292
4293
addl(const Address & address,const Immediate & imm)4294 void X86_64Assembler::addl(const Address& address, const Immediate& imm) {
4295 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4296 EmitOptionalRex32(address);
4297 EmitComplex(0, address, imm);
4298 }
4299
4300
addw(const Address & address,const Immediate & imm)4301 void X86_64Assembler::addw(const Address& address, const Immediate& imm) {
4302 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4303 CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
4304 EmitUint8(0x66);
4305 EmitOptionalRex32(address);
4306 EmitComplex(0, address, imm, /* is_16_op= */ true);
4307 }
4308
4309
subl(CpuRegister dst,CpuRegister src)4310 void X86_64Assembler::subl(CpuRegister dst, CpuRegister src) {
4311 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4312 EmitOptionalRex32(dst, src);
4313 EmitUint8(0x2B);
4314 EmitOperand(dst.LowBits(), Operand(src));
4315 }
4316
4317
subl(CpuRegister reg,const Immediate & imm)4318 void X86_64Assembler::subl(CpuRegister reg, const Immediate& imm) {
4319 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4320 EmitOptionalRex32(reg);
4321 EmitComplex(5, Operand(reg), imm);
4322 }
4323
4324
subq(CpuRegister reg,const Immediate & imm)4325 void X86_64Assembler::subq(CpuRegister reg, const Immediate& imm) {
4326 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4327 CHECK(imm.is_int32()); // subq only supports 32b immediate.
4328 EmitRex64(reg);
4329 EmitComplex(5, Operand(reg), imm);
4330 }
4331
4332
subq(CpuRegister dst,CpuRegister src)4333 void X86_64Assembler::subq(CpuRegister dst, CpuRegister src) {
4334 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4335 EmitRex64(dst, src);
4336 EmitUint8(0x2B);
4337 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4338 }
4339
4340
subq(CpuRegister reg,const Address & address)4341 void X86_64Assembler::subq(CpuRegister reg, const Address& address) {
4342 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4343 EmitRex64(reg, address);
4344 EmitUint8(0x2B);
4345 EmitOperand(reg.LowBits() & 7, address);
4346 }
4347
4348
subl(CpuRegister reg,const Address & address)4349 void X86_64Assembler::subl(CpuRegister reg, const Address& address) {
4350 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4351 EmitOptionalRex32(reg, address);
4352 EmitUint8(0x2B);
4353 EmitOperand(reg.LowBits(), address);
4354 }
4355
4356
cdq()4357 void X86_64Assembler::cdq() {
4358 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4359 EmitUint8(0x99);
4360 }
4361
4362
cqo()4363 void X86_64Assembler::cqo() {
4364 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4365 EmitRex64();
4366 EmitUint8(0x99);
4367 }
4368
4369
idivl(CpuRegister reg)4370 void X86_64Assembler::idivl(CpuRegister reg) {
4371 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4372 EmitOptionalRex32(reg);
4373 EmitUint8(0xF7);
4374 EmitUint8(0xF8 | reg.LowBits());
4375 }
4376
4377
idivq(CpuRegister reg)4378 void X86_64Assembler::idivq(CpuRegister reg) {
4379 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4380 EmitRex64(reg);
4381 EmitUint8(0xF7);
4382 EmitUint8(0xF8 | reg.LowBits());
4383 }
4384
4385
divl(CpuRegister reg)4386 void X86_64Assembler::divl(CpuRegister reg) {
4387 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4388 EmitOptionalRex32(reg);
4389 EmitUint8(0xF7);
4390 EmitUint8(0xF0 | reg.LowBits());
4391 }
4392
4393
divq(CpuRegister reg)4394 void X86_64Assembler::divq(CpuRegister reg) {
4395 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4396 EmitRex64(reg);
4397 EmitUint8(0xF7);
4398 EmitUint8(0xF0 | reg.LowBits());
4399 }
4400
4401
imull(CpuRegister dst,CpuRegister src)4402 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src) {
4403 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4404 EmitOptionalRex32(dst, src);
4405 EmitUint8(0x0F);
4406 EmitUint8(0xAF);
4407 EmitOperand(dst.LowBits(), Operand(src));
4408 }
4409
imull(CpuRegister dst,CpuRegister src,const Immediate & imm)4410 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src, const Immediate& imm) {
4411 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4412 CHECK(imm.is_int32()); // imull only supports 32b immediate.
4413
4414 EmitOptionalRex32(dst, src);
4415
4416 // See whether imm can be represented as a sign-extended 8bit value.
4417 int32_t v32 = static_cast<int32_t>(imm.value());
4418 if (IsInt<8>(v32)) {
4419 // Sign-extension works.
4420 EmitUint8(0x6B);
4421 EmitOperand(dst.LowBits(), Operand(src));
4422 EmitUint8(static_cast<uint8_t>(v32 & 0xFF));
4423 } else {
4424 // Not representable, use full immediate.
4425 EmitUint8(0x69);
4426 EmitOperand(dst.LowBits(), Operand(src));
4427 EmitImmediate(imm);
4428 }
4429 }
4430
4431
imull(CpuRegister reg,const Immediate & imm)4432 void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
4433 imull(reg, reg, imm);
4434 }
4435
4436
imull(CpuRegister reg,const Address & address)4437 void X86_64Assembler::imull(CpuRegister reg, const Address& address) {
4438 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4439 EmitOptionalRex32(reg, address);
4440 EmitUint8(0x0F);
4441 EmitUint8(0xAF);
4442 EmitOperand(reg.LowBits(), address);
4443 }
4444
4445
imulq(CpuRegister dst,CpuRegister src)4446 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister src) {
4447 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4448 EmitRex64(dst, src);
4449 EmitUint8(0x0F);
4450 EmitUint8(0xAF);
4451 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4452 }
4453
4454
imulq(CpuRegister reg,const Immediate & imm)4455 void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) {
4456 imulq(reg, reg, imm);
4457 }
4458
imulq(CpuRegister dst,CpuRegister reg,const Immediate & imm)4459 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm) {
4460 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4461 CHECK(imm.is_int32()); // imulq only supports 32b immediate.
4462
4463 EmitRex64(dst, reg);
4464
4465 // See whether imm can be represented as a sign-extended 8bit value.
4466 int64_t v64 = imm.value();
4467 if (IsInt<8>(v64)) {
4468 // Sign-extension works.
4469 EmitUint8(0x6B);
4470 EmitOperand(dst.LowBits(), Operand(reg));
4471 EmitUint8(static_cast<uint8_t>(v64 & 0xFF));
4472 } else {
4473 // Not representable, use full immediate.
4474 EmitUint8(0x69);
4475 EmitOperand(dst.LowBits(), Operand(reg));
4476 EmitImmediate(imm);
4477 }
4478 }
4479
imulq(CpuRegister reg,const Address & address)4480 void X86_64Assembler::imulq(CpuRegister reg, const Address& address) {
4481 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4482 EmitRex64(reg, address);
4483 EmitUint8(0x0F);
4484 EmitUint8(0xAF);
4485 EmitOperand(reg.LowBits(), address);
4486 }
4487
4488
imull(CpuRegister reg)4489 void X86_64Assembler::imull(CpuRegister reg) {
4490 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4491 EmitOptionalRex32(reg);
4492 EmitUint8(0xF7);
4493 EmitOperand(5, Operand(reg));
4494 }
4495
4496
imulq(CpuRegister reg)4497 void X86_64Assembler::imulq(CpuRegister reg) {
4498 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4499 EmitRex64(reg);
4500 EmitUint8(0xF7);
4501 EmitOperand(5, Operand(reg));
4502 }
4503
4504
imull(const Address & address)4505 void X86_64Assembler::imull(const Address& address) {
4506 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4507 EmitOptionalRex32(address);
4508 EmitUint8(0xF7);
4509 EmitOperand(5, address);
4510 }
4511
4512
mull(CpuRegister reg)4513 void X86_64Assembler::mull(CpuRegister reg) {
4514 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4515 EmitOptionalRex32(reg);
4516 EmitUint8(0xF7);
4517 EmitOperand(4, Operand(reg));
4518 }
4519
4520
mull(const Address & address)4521 void X86_64Assembler::mull(const Address& address) {
4522 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4523 EmitOptionalRex32(address);
4524 EmitUint8(0xF7);
4525 EmitOperand(4, address);
4526 }
4527
4528
shll(CpuRegister reg,const Immediate & imm)4529 void X86_64Assembler::shll(CpuRegister reg, const Immediate& imm) {
4530 EmitGenericShift(false, 4, reg, imm);
4531 }
4532
4533
shlq(CpuRegister reg,const Immediate & imm)4534 void X86_64Assembler::shlq(CpuRegister reg, const Immediate& imm) {
4535 EmitGenericShift(true, 4, reg, imm);
4536 }
4537
4538
shll(CpuRegister operand,CpuRegister shifter)4539 void X86_64Assembler::shll(CpuRegister operand, CpuRegister shifter) {
4540 EmitGenericShift(false, 4, operand, shifter);
4541 }
4542
4543
shlq(CpuRegister operand,CpuRegister shifter)4544 void X86_64Assembler::shlq(CpuRegister operand, CpuRegister shifter) {
4545 EmitGenericShift(true, 4, operand, shifter);
4546 }
4547
4548
shrl(CpuRegister reg,const Immediate & imm)4549 void X86_64Assembler::shrl(CpuRegister reg, const Immediate& imm) {
4550 EmitGenericShift(false, 5, reg, imm);
4551 }
4552
4553
shrq(CpuRegister reg,const Immediate & imm)4554 void X86_64Assembler::shrq(CpuRegister reg, const Immediate& imm) {
4555 EmitGenericShift(true, 5, reg, imm);
4556 }
4557
4558
shrl(CpuRegister operand,CpuRegister shifter)4559 void X86_64Assembler::shrl(CpuRegister operand, CpuRegister shifter) {
4560 EmitGenericShift(false, 5, operand, shifter);
4561 }
4562
4563
shrq(CpuRegister operand,CpuRegister shifter)4564 void X86_64Assembler::shrq(CpuRegister operand, CpuRegister shifter) {
4565 EmitGenericShift(true, 5, operand, shifter);
4566 }
4567
4568
sarl(CpuRegister reg,const Immediate & imm)4569 void X86_64Assembler::sarl(CpuRegister reg, const Immediate& imm) {
4570 EmitGenericShift(false, 7, reg, imm);
4571 }
4572
4573
sarl(CpuRegister operand,CpuRegister shifter)4574 void X86_64Assembler::sarl(CpuRegister operand, CpuRegister shifter) {
4575 EmitGenericShift(false, 7, operand, shifter);
4576 }
4577
4578
sarq(CpuRegister reg,const Immediate & imm)4579 void X86_64Assembler::sarq(CpuRegister reg, const Immediate& imm) {
4580 EmitGenericShift(true, 7, reg, imm);
4581 }
4582
4583
sarq(CpuRegister operand,CpuRegister shifter)4584 void X86_64Assembler::sarq(CpuRegister operand, CpuRegister shifter) {
4585 EmitGenericShift(true, 7, operand, shifter);
4586 }
4587
4588
roll(CpuRegister reg,const Immediate & imm)4589 void X86_64Assembler::roll(CpuRegister reg, const Immediate& imm) {
4590 EmitGenericShift(false, 0, reg, imm);
4591 }
4592
4593
roll(CpuRegister operand,CpuRegister shifter)4594 void X86_64Assembler::roll(CpuRegister operand, CpuRegister shifter) {
4595 EmitGenericShift(false, 0, operand, shifter);
4596 }
4597
4598
rorl(CpuRegister reg,const Immediate & imm)4599 void X86_64Assembler::rorl(CpuRegister reg, const Immediate& imm) {
4600 EmitGenericShift(false, 1, reg, imm);
4601 }
4602
4603
rorl(CpuRegister operand,CpuRegister shifter)4604 void X86_64Assembler::rorl(CpuRegister operand, CpuRegister shifter) {
4605 EmitGenericShift(false, 1, operand, shifter);
4606 }
4607
4608
rolq(CpuRegister reg,const Immediate & imm)4609 void X86_64Assembler::rolq(CpuRegister reg, const Immediate& imm) {
4610 EmitGenericShift(true, 0, reg, imm);
4611 }
4612
4613
rolq(CpuRegister operand,CpuRegister shifter)4614 void X86_64Assembler::rolq(CpuRegister operand, CpuRegister shifter) {
4615 EmitGenericShift(true, 0, operand, shifter);
4616 }
4617
4618
rorq(CpuRegister reg,const Immediate & imm)4619 void X86_64Assembler::rorq(CpuRegister reg, const Immediate& imm) {
4620 EmitGenericShift(true, 1, reg, imm);
4621 }
4622
4623
rorq(CpuRegister operand,CpuRegister shifter)4624 void X86_64Assembler::rorq(CpuRegister operand, CpuRegister shifter) {
4625 EmitGenericShift(true, 1, operand, shifter);
4626 }
4627
4628
negl(CpuRegister reg)4629 void X86_64Assembler::negl(CpuRegister reg) {
4630 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4631 EmitOptionalRex32(reg);
4632 EmitUint8(0xF7);
4633 EmitOperand(3, Operand(reg));
4634 }
4635
4636
negq(CpuRegister reg)4637 void X86_64Assembler::negq(CpuRegister reg) {
4638 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4639 EmitRex64(reg);
4640 EmitUint8(0xF7);
4641 EmitOperand(3, Operand(reg));
4642 }
4643
4644
notl(CpuRegister reg)4645 void X86_64Assembler::notl(CpuRegister reg) {
4646 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4647 EmitOptionalRex32(reg);
4648 EmitUint8(0xF7);
4649 EmitUint8(0xD0 | reg.LowBits());
4650 }
4651
4652
notq(CpuRegister reg)4653 void X86_64Assembler::notq(CpuRegister reg) {
4654 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4655 EmitRex64(reg);
4656 EmitUint8(0xF7);
4657 EmitOperand(2, Operand(reg));
4658 }
4659
4660
enter(const Immediate & imm)4661 void X86_64Assembler::enter(const Immediate& imm) {
4662 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4663 EmitUint8(0xC8);
4664 CHECK(imm.is_uint16()) << imm.value();
4665 EmitUint8(imm.value() & 0xFF);
4666 EmitUint8((imm.value() >> 8) & 0xFF);
4667 EmitUint8(0x00);
4668 }
4669
4670
leave()4671 void X86_64Assembler::leave() {
4672 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4673 EmitUint8(0xC9);
4674 }
4675
4676
ret()4677 void X86_64Assembler::ret() {
4678 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4679 EmitUint8(0xC3);
4680 }
4681
4682
ret(const Immediate & imm)4683 void X86_64Assembler::ret(const Immediate& imm) {
4684 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4685 EmitUint8(0xC2);
4686 CHECK(imm.is_uint16());
4687 EmitUint8(imm.value() & 0xFF);
4688 EmitUint8((imm.value() >> 8) & 0xFF);
4689 }
4690
4691
4692
nop()4693 void X86_64Assembler::nop() {
4694 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4695 EmitUint8(0x90);
4696 }
4697
4698
int3()4699 void X86_64Assembler::int3() {
4700 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4701 EmitUint8(0xCC);
4702 }
4703
4704
hlt()4705 void X86_64Assembler::hlt() {
4706 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4707 EmitUint8(0xF4);
4708 }
4709
4710
j(Condition condition,Label * label)4711 void X86_64Assembler::j(Condition condition, Label* label) {
4712 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4713 if (label->IsBound()) {
4714 static const int kShortSize = 2;
4715 static const int kLongSize = 6;
4716 int offset = label->Position() - buffer_.Size();
4717 CHECK_LE(offset, 0);
4718 if (IsInt<8>(offset - kShortSize)) {
4719 EmitUint8(0x70 + condition);
4720 EmitUint8((offset - kShortSize) & 0xFF);
4721 } else {
4722 EmitUint8(0x0F);
4723 EmitUint8(0x80 + condition);
4724 EmitInt32(offset - kLongSize);
4725 }
4726 } else {
4727 EmitUint8(0x0F);
4728 EmitUint8(0x80 + condition);
4729 EmitLabelLink(label);
4730 }
4731 }
4732
4733
j(Condition condition,NearLabel * label)4734 void X86_64Assembler::j(Condition condition, NearLabel* label) {
4735 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4736 if (label->IsBound()) {
4737 static const int kShortSize = 2;
4738 int offset = label->Position() - buffer_.Size();
4739 CHECK_LE(offset, 0);
4740 CHECK(IsInt<8>(offset - kShortSize));
4741 EmitUint8(0x70 + condition);
4742 EmitUint8((offset - kShortSize) & 0xFF);
4743 } else {
4744 EmitUint8(0x70 + condition);
4745 EmitLabelLink(label);
4746 }
4747 }
4748
4749
jrcxz(NearLabel * label)4750 void X86_64Assembler::jrcxz(NearLabel* label) {
4751 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4752 if (label->IsBound()) {
4753 static const int kShortSize = 2;
4754 int offset = label->Position() - buffer_.Size();
4755 CHECK_LE(offset, 0);
4756 CHECK(IsInt<8>(offset - kShortSize));
4757 EmitUint8(0xE3);
4758 EmitUint8((offset - kShortSize) & 0xFF);
4759 } else {
4760 EmitUint8(0xE3);
4761 EmitLabelLink(label);
4762 }
4763 }
4764
4765
jmp(CpuRegister reg)4766 void X86_64Assembler::jmp(CpuRegister reg) {
4767 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4768 EmitOptionalRex32(reg);
4769 EmitUint8(0xFF);
4770 EmitRegisterOperand(4, reg.LowBits());
4771 }
4772
jmp(const Address & address)4773 void X86_64Assembler::jmp(const Address& address) {
4774 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4775 EmitOptionalRex32(address);
4776 EmitUint8(0xFF);
4777 EmitOperand(4, address);
4778 }
4779
jmp(Label * label)4780 void X86_64Assembler::jmp(Label* label) {
4781 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4782 if (label->IsBound()) {
4783 static const int kShortSize = 2;
4784 static const int kLongSize = 5;
4785 int offset = label->Position() - buffer_.Size();
4786 CHECK_LE(offset, 0);
4787 if (IsInt<8>(offset - kShortSize)) {
4788 EmitUint8(0xEB);
4789 EmitUint8((offset - kShortSize) & 0xFF);
4790 } else {
4791 EmitUint8(0xE9);
4792 EmitInt32(offset - kLongSize);
4793 }
4794 } else {
4795 EmitUint8(0xE9);
4796 EmitLabelLink(label);
4797 }
4798 }
4799
4800
jmp(NearLabel * label)4801 void X86_64Assembler::jmp(NearLabel* label) {
4802 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4803 if (label->IsBound()) {
4804 static const int kShortSize = 2;
4805 int offset = label->Position() - buffer_.Size();
4806 CHECK_LE(offset, 0);
4807 CHECK(IsInt<8>(offset - kShortSize));
4808 EmitUint8(0xEB);
4809 EmitUint8((offset - kShortSize) & 0xFF);
4810 } else {
4811 EmitUint8(0xEB);
4812 EmitLabelLink(label);
4813 }
4814 }
4815
4816
rep_movsw()4817 void X86_64Assembler::rep_movsw() {
4818 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4819 EmitUint8(0x66);
4820 EmitUint8(0xF3);
4821 EmitUint8(0xA5);
4822 }
4823
4824
lock()4825 X86_64Assembler* X86_64Assembler::lock() {
4826 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4827 EmitUint8(0xF0);
4828 return this;
4829 }
4830
4831
cmpxchgl(const Address & address,CpuRegister reg)4832 void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) {
4833 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4834 EmitOptionalRex32(reg, address);
4835 EmitUint8(0x0F);
4836 EmitUint8(0xB1);
4837 EmitOperand(reg.LowBits(), address);
4838 }
4839
4840
cmpxchgq(const Address & address,CpuRegister reg)4841 void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) {
4842 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4843 EmitRex64(reg, address);
4844 EmitUint8(0x0F);
4845 EmitUint8(0xB1);
4846 EmitOperand(reg.LowBits(), address);
4847 }
4848
4849
mfence()4850 void X86_64Assembler::mfence() {
4851 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4852 EmitUint8(0x0F);
4853 EmitUint8(0xAE);
4854 EmitUint8(0xF0);
4855 }
4856
4857
gs()4858 X86_64Assembler* X86_64Assembler::gs() {
4859 // TODO: gs is a prefix and not an instruction
4860 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4861 EmitUint8(0x65);
4862 return this;
4863 }
4864
4865
AddImmediate(CpuRegister reg,const Immediate & imm)4866 void X86_64Assembler::AddImmediate(CpuRegister reg, const Immediate& imm) {
4867 int value = imm.value();
4868 if (value != 0) {
4869 if (value > 0) {
4870 addl(reg, imm);
4871 } else {
4872 subl(reg, Immediate(value));
4873 }
4874 }
4875 }
4876
4877
setcc(Condition condition,CpuRegister dst)4878 void X86_64Assembler::setcc(Condition condition, CpuRegister dst) {
4879 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4880 // RSP, RBP, RDI, RSI need rex prefix (else the pattern encodes ah/bh/ch/dh).
4881 if (dst.NeedsRex() || dst.AsRegister() > 3) {
4882 EmitOptionalRex(true, false, false, false, dst.NeedsRex());
4883 }
4884 EmitUint8(0x0F);
4885 EmitUint8(0x90 + condition);
4886 EmitUint8(0xC0 + dst.LowBits());
4887 }
4888
blsi(CpuRegister dst,CpuRegister src)4889 void X86_64Assembler::blsi(CpuRegister dst, CpuRegister src) {
4890 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4891 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
4892 uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
4893 /*X=*/ false,
4894 src.NeedsRex(),
4895 SET_VEX_M_0F_38);
4896 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/true,
4897 X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
4898 SET_VEX_L_128,
4899 SET_VEX_PP_NONE);
4900 EmitUint8(byte_zero);
4901 EmitUint8(byte_one);
4902 EmitUint8(byte_two);
4903 EmitUint8(0xF3);
4904 EmitRegisterOperand(3, src.LowBits());
4905 }
4906
blsmsk(CpuRegister dst,CpuRegister src)4907 void X86_64Assembler::blsmsk(CpuRegister dst, CpuRegister src) {
4908 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4909 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
4910 uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
4911 /*X=*/ false,
4912 src.NeedsRex(),
4913 SET_VEX_M_0F_38);
4914 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
4915 X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
4916 SET_VEX_L_128,
4917 SET_VEX_PP_NONE);
4918 EmitUint8(byte_zero);
4919 EmitUint8(byte_one);
4920 EmitUint8(byte_two);
4921 EmitUint8(0xF3);
4922 EmitRegisterOperand(2, src.LowBits());
4923 }
4924
blsr(CpuRegister dst,CpuRegister src)4925 void X86_64Assembler::blsr(CpuRegister dst, CpuRegister src) {
4926 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4927 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/false);
4928 uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
4929 /*X=*/ false,
4930 src.NeedsRex(),
4931 SET_VEX_M_0F_38);
4932 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
4933 X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
4934 SET_VEX_L_128,
4935 SET_VEX_PP_NONE);
4936 EmitUint8(byte_zero);
4937 EmitUint8(byte_one);
4938 EmitUint8(byte_two);
4939 EmitUint8(0xF3);
4940 EmitRegisterOperand(1, src.LowBits());
4941 }
4942
bswapl(CpuRegister dst)4943 void X86_64Assembler::bswapl(CpuRegister dst) {
4944 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4945 EmitOptionalRex(false, false, false, false, dst.NeedsRex());
4946 EmitUint8(0x0F);
4947 EmitUint8(0xC8 + dst.LowBits());
4948 }
4949
bswapq(CpuRegister dst)4950 void X86_64Assembler::bswapq(CpuRegister dst) {
4951 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4952 EmitOptionalRex(false, true, false, false, dst.NeedsRex());
4953 EmitUint8(0x0F);
4954 EmitUint8(0xC8 + dst.LowBits());
4955 }
4956
bsfl(CpuRegister dst,CpuRegister src)4957 void X86_64Assembler::bsfl(CpuRegister dst, CpuRegister src) {
4958 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4959 EmitOptionalRex32(dst, src);
4960 EmitUint8(0x0F);
4961 EmitUint8(0xBC);
4962 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4963 }
4964
bsfl(CpuRegister dst,const Address & src)4965 void X86_64Assembler::bsfl(CpuRegister dst, const Address& src) {
4966 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4967 EmitOptionalRex32(dst, src);
4968 EmitUint8(0x0F);
4969 EmitUint8(0xBC);
4970 EmitOperand(dst.LowBits(), src);
4971 }
4972
bsfq(CpuRegister dst,CpuRegister src)4973 void X86_64Assembler::bsfq(CpuRegister dst, CpuRegister src) {
4974 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4975 EmitRex64(dst, src);
4976 EmitUint8(0x0F);
4977 EmitUint8(0xBC);
4978 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4979 }
4980
bsfq(CpuRegister dst,const Address & src)4981 void X86_64Assembler::bsfq(CpuRegister dst, const Address& src) {
4982 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4983 EmitRex64(dst, src);
4984 EmitUint8(0x0F);
4985 EmitUint8(0xBC);
4986 EmitOperand(dst.LowBits(), src);
4987 }
4988
bsrl(CpuRegister dst,CpuRegister src)4989 void X86_64Assembler::bsrl(CpuRegister dst, CpuRegister src) {
4990 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4991 EmitOptionalRex32(dst, src);
4992 EmitUint8(0x0F);
4993 EmitUint8(0xBD);
4994 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4995 }
4996
bsrl(CpuRegister dst,const Address & src)4997 void X86_64Assembler::bsrl(CpuRegister dst, const Address& src) {
4998 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4999 EmitOptionalRex32(dst, src);
5000 EmitUint8(0x0F);
5001 EmitUint8(0xBD);
5002 EmitOperand(dst.LowBits(), src);
5003 }
5004
bsrq(CpuRegister dst,CpuRegister src)5005 void X86_64Assembler::bsrq(CpuRegister dst, CpuRegister src) {
5006 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5007 EmitRex64(dst, src);
5008 EmitUint8(0x0F);
5009 EmitUint8(0xBD);
5010 EmitRegisterOperand(dst.LowBits(), src.LowBits());
5011 }
5012
bsrq(CpuRegister dst,const Address & src)5013 void X86_64Assembler::bsrq(CpuRegister dst, const Address& src) {
5014 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5015 EmitRex64(dst, src);
5016 EmitUint8(0x0F);
5017 EmitUint8(0xBD);
5018 EmitOperand(dst.LowBits(), src);
5019 }
5020
popcntl(CpuRegister dst,CpuRegister src)5021 void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) {
5022 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5023 EmitUint8(0xF3);
5024 EmitOptionalRex32(dst, src);
5025 EmitUint8(0x0F);
5026 EmitUint8(0xB8);
5027 EmitRegisterOperand(dst.LowBits(), src.LowBits());
5028 }
5029
popcntl(CpuRegister dst,const Address & src)5030 void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) {
5031 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5032 EmitUint8(0xF3);
5033 EmitOptionalRex32(dst, src);
5034 EmitUint8(0x0F);
5035 EmitUint8(0xB8);
5036 EmitOperand(dst.LowBits(), src);
5037 }
5038
popcntq(CpuRegister dst,CpuRegister src)5039 void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) {
5040 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5041 EmitUint8(0xF3);
5042 EmitRex64(dst, src);
5043 EmitUint8(0x0F);
5044 EmitUint8(0xB8);
5045 EmitRegisterOperand(dst.LowBits(), src.LowBits());
5046 }
5047
popcntq(CpuRegister dst,const Address & src)5048 void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
5049 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5050 EmitUint8(0xF3);
5051 EmitRex64(dst, src);
5052 EmitUint8(0x0F);
5053 EmitUint8(0xB8);
5054 EmitOperand(dst.LowBits(), src);
5055 }
5056
repne_scasb()5057 void X86_64Assembler::repne_scasb() {
5058 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5059 EmitUint8(0xF2);
5060 EmitUint8(0xAE);
5061 }
5062
repne_scasw()5063 void X86_64Assembler::repne_scasw() {
5064 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5065 EmitUint8(0x66);
5066 EmitUint8(0xF2);
5067 EmitUint8(0xAF);
5068 }
5069
repe_cmpsw()5070 void X86_64Assembler::repe_cmpsw() {
5071 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5072 EmitUint8(0x66);
5073 EmitUint8(0xF3);
5074 EmitUint8(0xA7);
5075 }
5076
5077
repe_cmpsl()5078 void X86_64Assembler::repe_cmpsl() {
5079 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5080 EmitUint8(0xF3);
5081 EmitUint8(0xA7);
5082 }
5083
5084
repe_cmpsq()5085 void X86_64Assembler::repe_cmpsq() {
5086 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5087 EmitUint8(0xF3);
5088 EmitRex64();
5089 EmitUint8(0xA7);
5090 }
5091
5092
LoadDoubleConstant(XmmRegister dst,double value)5093 void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
5094 // TODO: Need to have a code constants table.
5095 int64_t constant = bit_cast<int64_t, double>(value);
5096 pushq(Immediate(High32Bits(constant)));
5097 pushq(Immediate(Low32Bits(constant)));
5098 movsd(dst, Address(CpuRegister(RSP), 0));
5099 addq(CpuRegister(RSP), Immediate(2 * sizeof(intptr_t)));
5100 }
5101
5102
Align(int alignment,int offset)5103 void X86_64Assembler::Align(int alignment, int offset) {
5104 CHECK(IsPowerOfTwo(alignment));
5105 // Emit nop instruction until the real position is aligned.
5106 while (((offset + buffer_.GetPosition()) & (alignment-1)) != 0) {
5107 nop();
5108 }
5109 }
5110
5111
Bind(Label * label)5112 void X86_64Assembler::Bind(Label* label) {
5113 int bound = buffer_.Size();
5114 CHECK(!label->IsBound()); // Labels can only be bound once.
5115 while (label->IsLinked()) {
5116 int position = label->LinkPosition();
5117 int next = buffer_.Load<int32_t>(position);
5118 buffer_.Store<int32_t>(position, bound - (position + 4));
5119 label->position_ = next;
5120 }
5121 label->BindTo(bound);
5122 }
5123
5124
Bind(NearLabel * label)5125 void X86_64Assembler::Bind(NearLabel* label) {
5126 int bound = buffer_.Size();
5127 CHECK(!label->IsBound()); // Labels can only be bound once.
5128 while (label->IsLinked()) {
5129 int position = label->LinkPosition();
5130 uint8_t delta = buffer_.Load<uint8_t>(position);
5131 int offset = bound - (position + 1);
5132 CHECK(IsInt<8>(offset));
5133 buffer_.Store<int8_t>(position, offset);
5134 label->position_ = delta != 0u ? label->position_ - delta : 0;
5135 }
5136 label->BindTo(bound);
5137 }
5138
5139
EmitOperand(uint8_t reg_or_opcode,const Operand & operand)5140 void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand) {
5141 CHECK_GE(reg_or_opcode, 0);
5142 CHECK_LT(reg_or_opcode, 8);
5143 const int length = operand.length_;
5144 CHECK_GT(length, 0);
5145 // Emit the ModRM byte updated with the given reg value.
5146 CHECK_EQ(operand.encoding_[0] & 0x38, 0);
5147 EmitUint8(operand.encoding_[0] + (reg_or_opcode << 3));
5148 // Emit the rest of the encoded operand.
5149 for (int i = 1; i < length; i++) {
5150 EmitUint8(operand.encoding_[i]);
5151 }
5152 AssemblerFixup* fixup = operand.GetFixup();
5153 if (fixup != nullptr) {
5154 EmitFixup(fixup);
5155 }
5156 }
5157
5158
EmitImmediate(const Immediate & imm,bool is_16_op)5159 void X86_64Assembler::EmitImmediate(const Immediate& imm, bool is_16_op) {
5160 if (is_16_op) {
5161 EmitUint8(imm.value() & 0xFF);
5162 EmitUint8(imm.value() >> 8);
5163 } else if (imm.is_int32()) {
5164 EmitInt32(static_cast<int32_t>(imm.value()));
5165 } else {
5166 EmitInt64(imm.value());
5167 }
5168 }
5169
5170
EmitComplex(uint8_t reg_or_opcode,const Operand & operand,const Immediate & immediate,bool is_16_op)5171 void X86_64Assembler::EmitComplex(uint8_t reg_or_opcode,
5172 const Operand& operand,
5173 const Immediate& immediate,
5174 bool is_16_op) {
5175 CHECK_GE(reg_or_opcode, 0);
5176 CHECK_LT(reg_or_opcode, 8);
5177 if (immediate.is_int8()) {
5178 // Use sign-extended 8-bit immediate.
5179 EmitUint8(0x83);
5180 EmitOperand(reg_or_opcode, operand);
5181 EmitUint8(immediate.value() & 0xFF);
5182 } else if (operand.IsRegister(CpuRegister(RAX))) {
5183 // Use short form if the destination is eax.
5184 EmitUint8(0x05 + (reg_or_opcode << 3));
5185 EmitImmediate(immediate, is_16_op);
5186 } else {
5187 EmitUint8(0x81);
5188 EmitOperand(reg_or_opcode, operand);
5189 EmitImmediate(immediate, is_16_op);
5190 }
5191 }
5192
5193
EmitLabel(Label * label,int instruction_size)5194 void X86_64Assembler::EmitLabel(Label* label, int instruction_size) {
5195 if (label->IsBound()) {
5196 int offset = label->Position() - buffer_.Size();
5197 CHECK_LE(offset, 0);
5198 EmitInt32(offset - instruction_size);
5199 } else {
5200 EmitLabelLink(label);
5201 }
5202 }
5203
5204
EmitLabelLink(Label * label)5205 void X86_64Assembler::EmitLabelLink(Label* label) {
5206 CHECK(!label->IsBound());
5207 int position = buffer_.Size();
5208 EmitInt32(label->position_);
5209 label->LinkTo(position);
5210 }
5211
5212
EmitLabelLink(NearLabel * label)5213 void X86_64Assembler::EmitLabelLink(NearLabel* label) {
5214 CHECK(!label->IsBound());
5215 int position = buffer_.Size();
5216 if (label->IsLinked()) {
5217 // Save the delta in the byte that we have to play with.
5218 uint32_t delta = position - label->LinkPosition();
5219 CHECK(IsUint<8>(delta));
5220 EmitUint8(delta & 0xFF);
5221 } else {
5222 EmitUint8(0);
5223 }
5224 label->LinkTo(position);
5225 }
5226
5227
EmitGenericShift(bool wide,int reg_or_opcode,CpuRegister reg,const Immediate & imm)5228 void X86_64Assembler::EmitGenericShift(bool wide,
5229 int reg_or_opcode,
5230 CpuRegister reg,
5231 const Immediate& imm) {
5232 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5233 CHECK(imm.is_int8());
5234 if (wide) {
5235 EmitRex64(reg);
5236 } else {
5237 EmitOptionalRex32(reg);
5238 }
5239 if (imm.value() == 1) {
5240 EmitUint8(0xD1);
5241 EmitOperand(reg_or_opcode, Operand(reg));
5242 } else {
5243 EmitUint8(0xC1);
5244 EmitOperand(reg_or_opcode, Operand(reg));
5245 EmitUint8(imm.value() & 0xFF);
5246 }
5247 }
5248
5249
EmitGenericShift(bool wide,int reg_or_opcode,CpuRegister operand,CpuRegister shifter)5250 void X86_64Assembler::EmitGenericShift(bool wide,
5251 int reg_or_opcode,
5252 CpuRegister operand,
5253 CpuRegister shifter) {
5254 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5255 CHECK_EQ(shifter.AsRegister(), RCX);
5256 if (wide) {
5257 EmitRex64(operand);
5258 } else {
5259 EmitOptionalRex32(operand);
5260 }
5261 EmitUint8(0xD3);
5262 EmitOperand(reg_or_opcode, Operand(operand));
5263 }
5264
EmitOptionalRex(bool force,bool w,bool r,bool x,bool b)5265 void X86_64Assembler::EmitOptionalRex(bool force, bool w, bool r, bool x, bool b) {
5266 // REX.WRXB
5267 // W - 64-bit operand
5268 // R - MODRM.reg
5269 // X - SIB.index
5270 // B - MODRM.rm/SIB.base
5271 uint8_t rex = force ? 0x40 : 0;
5272 if (w) {
5273 rex |= 0x48; // REX.W000
5274 }
5275 if (r) {
5276 rex |= 0x44; // REX.0R00
5277 }
5278 if (x) {
5279 rex |= 0x42; // REX.00X0
5280 }
5281 if (b) {
5282 rex |= 0x41; // REX.000B
5283 }
5284 if (rex != 0) {
5285 EmitUint8(rex);
5286 }
5287 }
5288
EmitOptionalRex32(CpuRegister reg)5289 void X86_64Assembler::EmitOptionalRex32(CpuRegister reg) {
5290 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
5291 }
5292
EmitOptionalRex32(CpuRegister dst,CpuRegister src)5293 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, CpuRegister src) {
5294 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5295 }
5296
EmitOptionalRex32(XmmRegister dst,XmmRegister src)5297 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, XmmRegister src) {
5298 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5299 }
5300
EmitOptionalRex32(CpuRegister dst,XmmRegister src)5301 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, XmmRegister src) {
5302 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5303 }
5304
EmitOptionalRex32(XmmRegister dst,CpuRegister src)5305 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, CpuRegister src) {
5306 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5307 }
5308
EmitOptionalRex32(const Operand & operand)5309 void X86_64Assembler::EmitOptionalRex32(const Operand& operand) {
5310 uint8_t rex = operand.rex();
5311 if (rex != 0) {
5312 EmitUint8(rex);
5313 }
5314 }
5315
EmitOptionalRex32(CpuRegister dst,const Operand & operand)5316 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, const Operand& operand) {
5317 uint8_t rex = operand.rex();
5318 if (dst.NeedsRex()) {
5319 rex |= 0x44; // REX.0R00
5320 }
5321 if (rex != 0) {
5322 EmitUint8(rex);
5323 }
5324 }
5325
EmitOptionalRex32(XmmRegister dst,const Operand & operand)5326 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, const Operand& operand) {
5327 uint8_t rex = operand.rex();
5328 if (dst.NeedsRex()) {
5329 rex |= 0x44; // REX.0R00
5330 }
5331 if (rex != 0) {
5332 EmitUint8(rex);
5333 }
5334 }
5335
EmitRex64()5336 void X86_64Assembler::EmitRex64() {
5337 EmitOptionalRex(false, true, false, false, false);
5338 }
5339
EmitRex64(CpuRegister reg)5340 void X86_64Assembler::EmitRex64(CpuRegister reg) {
5341 EmitOptionalRex(false, true, false, false, reg.NeedsRex());
5342 }
5343
EmitRex64(const Operand & operand)5344 void X86_64Assembler::EmitRex64(const Operand& operand) {
5345 uint8_t rex = operand.rex();
5346 rex |= 0x48; // REX.W000
5347 EmitUint8(rex);
5348 }
5349
EmitRex64(CpuRegister dst,CpuRegister src)5350 void X86_64Assembler::EmitRex64(CpuRegister dst, CpuRegister src) {
5351 EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5352 }
5353
EmitRex64(XmmRegister dst,CpuRegister src)5354 void X86_64Assembler::EmitRex64(XmmRegister dst, CpuRegister src) {
5355 EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5356 }
5357
EmitRex64(CpuRegister dst,XmmRegister src)5358 void X86_64Assembler::EmitRex64(CpuRegister dst, XmmRegister src) {
5359 EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5360 }
5361
EmitRex64(CpuRegister dst,const Operand & operand)5362 void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) {
5363 uint8_t rex = 0x48 | operand.rex(); // REX.W000
5364 if (dst.NeedsRex()) {
5365 rex |= 0x44; // REX.0R00
5366 }
5367 EmitUint8(rex);
5368 }
5369
EmitRex64(XmmRegister dst,const Operand & operand)5370 void X86_64Assembler::EmitRex64(XmmRegister dst, const Operand& operand) {
5371 uint8_t rex = 0x48 | operand.rex(); // REX.W000
5372 if (dst.NeedsRex()) {
5373 rex |= 0x44; // REX.0R00
5374 }
5375 EmitUint8(rex);
5376 }
5377
EmitOptionalByteRegNormalizingRex32(CpuRegister dst,CpuRegister src)5378 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src) {
5379 // For src, SPL, BPL, SIL, DIL need the rex prefix.
5380 bool force = src.AsRegister() > 3;
5381 EmitOptionalRex(force, false, dst.NeedsRex(), false, src.NeedsRex());
5382 }
5383
EmitOptionalByteRegNormalizingRex32(CpuRegister dst,const Operand & operand)5384 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand) {
5385 uint8_t rex = operand.rex();
5386 // For dst, SPL, BPL, SIL, DIL need the rex prefix.
5387 bool force = dst.AsRegister() > 3;
5388 if (force) {
5389 rex |= 0x40; // REX.0000
5390 }
5391 if (dst.NeedsRex()) {
5392 rex |= 0x44; // REX.0R00
5393 }
5394 if (rex != 0) {
5395 EmitUint8(rex);
5396 }
5397 }
5398
AddConstantArea()5399 void X86_64Assembler::AddConstantArea() {
5400 ArrayRef<const int32_t> area = constant_area_.GetBuffer();
5401 for (size_t i = 0, e = area.size(); i < e; i++) {
5402 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5403 EmitInt32(area[i]);
5404 }
5405 }
5406
AppendInt32(int32_t v)5407 size_t ConstantArea::AppendInt32(int32_t v) {
5408 size_t result = buffer_.size() * elem_size_;
5409 buffer_.push_back(v);
5410 return result;
5411 }
5412
AddInt32(int32_t v)5413 size_t ConstantArea::AddInt32(int32_t v) {
5414 // Look for an existing match.
5415 for (size_t i = 0, e = buffer_.size(); i < e; i++) {
5416 if (v == buffer_[i]) {
5417 return i * elem_size_;
5418 }
5419 }
5420
5421 // Didn't match anything.
5422 return AppendInt32(v);
5423 }
5424
AddInt64(int64_t v)5425 size_t ConstantArea::AddInt64(int64_t v) {
5426 int32_t v_low = v;
5427 int32_t v_high = v >> 32;
5428 if (buffer_.size() > 1) {
5429 // Ensure we don't pass the end of the buffer.
5430 for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) {
5431 if (v_low == buffer_[i] && v_high == buffer_[i + 1]) {
5432 return i * elem_size_;
5433 }
5434 }
5435 }
5436
5437 // Didn't match anything.
5438 size_t result = buffer_.size() * elem_size_;
5439 buffer_.push_back(v_low);
5440 buffer_.push_back(v_high);
5441 return result;
5442 }
5443
AddDouble(double v)5444 size_t ConstantArea::AddDouble(double v) {
5445 // Treat the value as a 64-bit integer value.
5446 return AddInt64(bit_cast<int64_t, double>(v));
5447 }
5448
AddFloat(float v)5449 size_t ConstantArea::AddFloat(float v) {
5450 // Treat the value as a 32-bit integer value.
5451 return AddInt32(bit_cast<int32_t, float>(v));
5452 }
5453
EmitVexPrefixByteZero(bool is_twobyte_form)5454 uint8_t X86_64Assembler::EmitVexPrefixByteZero(bool is_twobyte_form) {
5455 // Vex Byte 0,
5456 // Bits [7:0] must contain the value 11000101b (0xC5) for 2-byte Vex
5457 // Bits [7:0] must contain the value 11000100b (0xC4) for 3-byte Vex
5458 uint8_t vex_prefix = 0xC0;
5459 if (is_twobyte_form) {
5460 vex_prefix |= TWO_BYTE_VEX; // 2-Byte Vex
5461 } else {
5462 vex_prefix |= THREE_BYTE_VEX; // 3-Byte Vex
5463 }
5464 return vex_prefix;
5465 }
5466
EmitVexPrefixByteOne(bool R,bool X,bool B,int SET_VEX_M)5467 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R, bool X, bool B, int SET_VEX_M) {
5468 // Vex Byte 1,
5469 uint8_t vex_prefix = VEX_INIT;
5470 /** Bit[7] This bit needs to be set to '1'
5471 otherwise the instruction is LES or LDS */
5472 if (!R) {
5473 // R .
5474 vex_prefix |= SET_VEX_R;
5475 }
5476 /** Bit[6] This bit needs to be set to '1'
5477 otherwise the instruction is LES or LDS */
5478 if (!X) {
5479 // X .
5480 vex_prefix |= SET_VEX_X;
5481 }
5482 /** Bit[5] This bit needs to be set to '1' */
5483 if (!B) {
5484 // B .
5485 vex_prefix |= SET_VEX_B;
5486 }
5487 /** Bits[4:0], Based on the instruction documentaion */
5488 vex_prefix |= SET_VEX_M;
5489 return vex_prefix;
5490 }
5491
EmitVexPrefixByteOne(bool R,X86_64ManagedRegister operand,int SET_VEX_L,int SET_VEX_PP)5492 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R,
5493 X86_64ManagedRegister operand,
5494 int SET_VEX_L,
5495 int SET_VEX_PP) {
5496 // Vex Byte 1,
5497 uint8_t vex_prefix = VEX_INIT;
5498 /** Bit[7] This bit needs to be set to '1'
5499 otherwise the instruction is LES or LDS */
5500 if (!R) {
5501 // R .
5502 vex_prefix |= SET_VEX_R;
5503 }
5504 /**Bits[6:3] - 'vvvv' the source or dest register specifier */
5505 if (operand.IsNoRegister()) {
5506 vex_prefix |= 0x78;
5507 } else if (operand.IsXmmRegister()) {
5508 XmmRegister vvvv = operand.AsXmmRegister();
5509 int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
5510 uint8_t reg = static_cast<uint8_t>(inverted_reg);
5511 vex_prefix |= ((reg & 0x0F) << 3);
5512 } else if (operand.IsCpuRegister()) {
5513 CpuRegister vvvv = operand.AsCpuRegister();
5514 int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
5515 uint8_t reg = static_cast<uint8_t>(inverted_reg);
5516 vex_prefix |= ((reg & 0x0F) << 3);
5517 }
5518 /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5519 VEX.L = 0 indicates 128 bit vector operation */
5520 vex_prefix |= SET_VEX_L;
5521 // Bits[1:0] - "pp"
5522 vex_prefix |= SET_VEX_PP;
5523 return vex_prefix;
5524 }
5525
EmitVexPrefixByteTwo(bool W,X86_64ManagedRegister operand,int SET_VEX_L,int SET_VEX_PP)5526 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
5527 X86_64ManagedRegister operand,
5528 int SET_VEX_L,
5529 int SET_VEX_PP) {
5530 // Vex Byte 2,
5531 uint8_t vex_prefix = VEX_INIT;
5532
5533 /** Bit[7] This bits needs to be set to '1' with default value.
5534 When using C4H form of VEX prefix, REX.W value is ignored */
5535 if (W) {
5536 vex_prefix |= SET_VEX_W;
5537 }
5538 // Bits[6:3] - 'vvvv' the source or dest register specifier
5539 if (operand.IsXmmRegister()) {
5540 XmmRegister vvvv = operand.AsXmmRegister();
5541 int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
5542 uint8_t reg = static_cast<uint8_t>(inverted_reg);
5543 vex_prefix |= ((reg & 0x0F) << 3);
5544 } else if (operand.IsCpuRegister()) {
5545 CpuRegister vvvv = operand.AsCpuRegister();
5546 int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
5547 uint8_t reg = static_cast<uint8_t>(inverted_reg);
5548 vex_prefix |= ((reg & 0x0F) << 3);
5549 }
5550 /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5551 VEX.L = 0 indicates 128 bit vector operation */
5552 vex_prefix |= SET_VEX_L;
5553 // Bits[1:0] - "pp"
5554 vex_prefix |= SET_VEX_PP;
5555 return vex_prefix;
5556 }
5557
EmitVexPrefixByteTwo(bool W,int SET_VEX_L,int SET_VEX_PP)5558 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
5559 int SET_VEX_L,
5560 int SET_VEX_PP) {
5561 // Vex Byte 2,
5562 uint8_t vex_prefix = VEX_INIT;
5563
5564 /** Bit[7] This bits needs to be set to '1' with default value.
5565 When using C4H form of VEX prefix, REX.W value is ignored */
5566 if (W) {
5567 vex_prefix |= SET_VEX_W;
5568 }
5569 /** Bits[6:3] - 'vvvv' the source or dest register specifier */
5570 vex_prefix |= (0x0F << 3);
5571 /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5572 VEX.L = 0 indicates 128 bit vector operation */
5573 vex_prefix |= SET_VEX_L;
5574
5575 // Bits[1:0] - "pp"
5576 if (SET_VEX_PP != SET_VEX_PP_NONE) {
5577 vex_prefix |= SET_VEX_PP;
5578 }
5579 return vex_prefix;
5580 }
5581
5582 } // namespace x86_64
5583 } // namespace art
5584