1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "assembler_x86_64.h"
18
19 #include "base/casts.h"
20 #include "base/memory_region.h"
21 #include "entrypoints/quick/quick_entrypoints.h"
22 #include "thread.h"
23
24 namespace art {
25 namespace x86_64 {
26
operator <<(std::ostream & os,const CpuRegister & reg)27 std::ostream& operator<<(std::ostream& os, const CpuRegister& reg) {
28 return os << reg.AsRegister();
29 }
30
operator <<(std::ostream & os,const XmmRegister & reg)31 std::ostream& operator<<(std::ostream& os, const XmmRegister& reg) {
32 return os << reg.AsFloatRegister();
33 }
34
operator <<(std::ostream & os,const X87Register & reg)35 std::ostream& operator<<(std::ostream& os, const X87Register& reg) {
36 return os << "ST" << static_cast<int>(reg);
37 }
38
operator <<(std::ostream & os,const Address & addr)39 std::ostream& operator<<(std::ostream& os, const Address& addr) {
40 switch (addr.mod()) {
41 case 0:
42 if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
43 return os << "(%" << addr.cpu_rm() << ")";
44 } else if (addr.base() == RBP) {
45 return os << static_cast<int>(addr.disp32()) << "(,%" << addr.cpu_index()
46 << "," << (1 << addr.scale()) << ")";
47 }
48 return os << "(%" << addr.cpu_base() << ",%"
49 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
50 case 1:
51 if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
52 return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_rm() << ")";
53 }
54 return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_base() << ",%"
55 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
56 case 2:
57 if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
58 return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_rm() << ")";
59 }
60 return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_base() << ",%"
61 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
62 default:
63 return os << "<address?>";
64 }
65 }
66
CpuHasAVXorAVX2FeatureFlag()67 bool X86_64Assembler::CpuHasAVXorAVX2FeatureFlag() {
68 if (has_AVX_ || has_AVX2_) {
69 return true;
70 }
71 return false;
72 }
73
74
call(CpuRegister reg)75 void X86_64Assembler::call(CpuRegister reg) {
76 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
77 EmitOptionalRex32(reg);
78 EmitUint8(0xFF);
79 EmitRegisterOperand(2, reg.LowBits());
80 }
81
82
call(const Address & address)83 void X86_64Assembler::call(const Address& address) {
84 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
85 EmitOptionalRex32(address);
86 EmitUint8(0xFF);
87 EmitOperand(2, address);
88 }
89
90
call(Label * label)91 void X86_64Assembler::call(Label* label) {
92 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
93 EmitUint8(0xE8);
94 static const int kSize = 5;
95 // Offset by one because we already have emitted the opcode.
96 EmitLabel(label, kSize - 1);
97 }
98
pushq(CpuRegister reg)99 void X86_64Assembler::pushq(CpuRegister reg) {
100 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
101 EmitOptionalRex32(reg);
102 EmitUint8(0x50 + reg.LowBits());
103 }
104
105
pushq(const Address & address)106 void X86_64Assembler::pushq(const Address& address) {
107 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
108 EmitOptionalRex32(address);
109 EmitUint8(0xFF);
110 EmitOperand(6, address);
111 }
112
113
pushq(const Immediate & imm)114 void X86_64Assembler::pushq(const Immediate& imm) {
115 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
116 CHECK(imm.is_int32()); // pushq only supports 32b immediate.
117 if (imm.is_int8()) {
118 EmitUint8(0x6A);
119 EmitUint8(imm.value() & 0xFF);
120 } else {
121 EmitUint8(0x68);
122 EmitImmediate(imm);
123 }
124 }
125
126
popq(CpuRegister reg)127 void X86_64Assembler::popq(CpuRegister reg) {
128 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
129 EmitOptionalRex32(reg);
130 EmitUint8(0x58 + reg.LowBits());
131 }
132
133
popq(const Address & address)134 void X86_64Assembler::popq(const Address& address) {
135 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
136 EmitOptionalRex32(address);
137 EmitUint8(0x8F);
138 EmitOperand(0, address);
139 }
140
141
movq(CpuRegister dst,const Immediate & imm)142 void X86_64Assembler::movq(CpuRegister dst, const Immediate& imm) {
143 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
144 if (imm.is_int32()) {
145 // 32 bit. Note: sign-extends.
146 EmitRex64(dst);
147 EmitUint8(0xC7);
148 EmitRegisterOperand(0, dst.LowBits());
149 EmitInt32(static_cast<int32_t>(imm.value()));
150 } else {
151 EmitRex64(dst);
152 EmitUint8(0xB8 + dst.LowBits());
153 EmitInt64(imm.value());
154 }
155 }
156
157
movl(CpuRegister dst,const Immediate & imm)158 void X86_64Assembler::movl(CpuRegister dst, const Immediate& imm) {
159 CHECK(imm.is_int32());
160 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
161 EmitOptionalRex32(dst);
162 EmitUint8(0xB8 + dst.LowBits());
163 EmitImmediate(imm);
164 }
165
166
movq(const Address & dst,const Immediate & imm)167 void X86_64Assembler::movq(const Address& dst, const Immediate& imm) {
168 CHECK(imm.is_int32());
169 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
170 EmitRex64(dst);
171 EmitUint8(0xC7);
172 EmitOperand(0, dst);
173 EmitImmediate(imm);
174 }
175
176
movq(CpuRegister dst,CpuRegister src)177 void X86_64Assembler::movq(CpuRegister dst, CpuRegister src) {
178 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
179 // 0x89 is movq r/m64 <- r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
180 EmitRex64(src, dst);
181 EmitUint8(0x89);
182 EmitRegisterOperand(src.LowBits(), dst.LowBits());
183 }
184
185
movl(CpuRegister dst,CpuRegister src)186 void X86_64Assembler::movl(CpuRegister dst, CpuRegister src) {
187 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
188 EmitOptionalRex32(dst, src);
189 EmitUint8(0x8B);
190 EmitRegisterOperand(dst.LowBits(), src.LowBits());
191 }
192
193
movq(CpuRegister dst,const Address & src)194 void X86_64Assembler::movq(CpuRegister dst, const Address& src) {
195 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
196 EmitRex64(dst, src);
197 EmitUint8(0x8B);
198 EmitOperand(dst.LowBits(), src);
199 }
200
201
movl(CpuRegister dst,const Address & src)202 void X86_64Assembler::movl(CpuRegister dst, const Address& src) {
203 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
204 EmitOptionalRex32(dst, src);
205 EmitUint8(0x8B);
206 EmitOperand(dst.LowBits(), src);
207 }
208
209
movq(const Address & dst,CpuRegister src)210 void X86_64Assembler::movq(const Address& dst, CpuRegister src) {
211 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
212 EmitRex64(src, dst);
213 EmitUint8(0x89);
214 EmitOperand(src.LowBits(), dst);
215 }
216
217
movl(const Address & dst,CpuRegister src)218 void X86_64Assembler::movl(const Address& dst, CpuRegister src) {
219 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
220 EmitOptionalRex32(src, dst);
221 EmitUint8(0x89);
222 EmitOperand(src.LowBits(), dst);
223 }
224
movl(const Address & dst,const Immediate & imm)225 void X86_64Assembler::movl(const Address& dst, const Immediate& imm) {
226 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
227 EmitOptionalRex32(dst);
228 EmitUint8(0xC7);
229 EmitOperand(0, dst);
230 EmitImmediate(imm);
231 }
232
movntl(const Address & dst,CpuRegister src)233 void X86_64Assembler::movntl(const Address& dst, CpuRegister src) {
234 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
235 EmitOptionalRex32(src, dst);
236 EmitUint8(0x0F);
237 EmitUint8(0xC3);
238 EmitOperand(src.LowBits(), dst);
239 }
240
movntq(const Address & dst,CpuRegister src)241 void X86_64Assembler::movntq(const Address& dst, CpuRegister src) {
242 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
243 EmitRex64(src, dst);
244 EmitUint8(0x0F);
245 EmitUint8(0xC3);
246 EmitOperand(src.LowBits(), dst);
247 }
248
cmov(Condition c,CpuRegister dst,CpuRegister src)249 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src) {
250 cmov(c, dst, src, true);
251 }
252
cmov(Condition c,CpuRegister dst,CpuRegister src,bool is64bit)253 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit) {
254 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
255 EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
256 EmitUint8(0x0F);
257 EmitUint8(0x40 + c);
258 EmitRegisterOperand(dst.LowBits(), src.LowBits());
259 }
260
261
cmov(Condition c,CpuRegister dst,const Address & src,bool is64bit)262 void X86_64Assembler::cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit) {
263 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
264 if (is64bit) {
265 EmitRex64(dst, src);
266 } else {
267 EmitOptionalRex32(dst, src);
268 }
269 EmitUint8(0x0F);
270 EmitUint8(0x40 + c);
271 EmitOperand(dst.LowBits(), src);
272 }
273
274
movzxb(CpuRegister dst,CpuRegister src)275 void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) {
276 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
277 EmitOptionalByteRegNormalizingRex32(dst, src);
278 EmitUint8(0x0F);
279 EmitUint8(0xB6);
280 EmitRegisterOperand(dst.LowBits(), src.LowBits());
281 }
282
283
movzxb(CpuRegister dst,const Address & src)284 void X86_64Assembler::movzxb(CpuRegister dst, const Address& src) {
285 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
286 // Byte register is only in the source register form, so we don't use
287 // EmitOptionalByteRegNormalizingRex32(dst, src);
288 EmitOptionalRex32(dst, src);
289 EmitUint8(0x0F);
290 EmitUint8(0xB6);
291 EmitOperand(dst.LowBits(), src);
292 }
293
294
movsxb(CpuRegister dst,CpuRegister src)295 void X86_64Assembler::movsxb(CpuRegister dst, CpuRegister src) {
296 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
297 EmitOptionalByteRegNormalizingRex32(dst, src);
298 EmitUint8(0x0F);
299 EmitUint8(0xBE);
300 EmitRegisterOperand(dst.LowBits(), src.LowBits());
301 }
302
303
movsxb(CpuRegister dst,const Address & src)304 void X86_64Assembler::movsxb(CpuRegister dst, const Address& src) {
305 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
306 // Byte register is only in the source register form, so we don't use
307 // EmitOptionalByteRegNormalizingRex32(dst, src);
308 EmitOptionalRex32(dst, src);
309 EmitUint8(0x0F);
310 EmitUint8(0xBE);
311 EmitOperand(dst.LowBits(), src);
312 }
313
314
movb(CpuRegister,const Address &)315 void X86_64Assembler::movb(CpuRegister /*dst*/, const Address& /*src*/) {
316 LOG(FATAL) << "Use movzxb or movsxb instead.";
317 }
318
319
movb(const Address & dst,CpuRegister src)320 void X86_64Assembler::movb(const Address& dst, CpuRegister src) {
321 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
322 EmitOptionalByteRegNormalizingRex32(src, dst);
323 EmitUint8(0x88);
324 EmitOperand(src.LowBits(), dst);
325 }
326
327
movb(const Address & dst,const Immediate & imm)328 void X86_64Assembler::movb(const Address& dst, const Immediate& imm) {
329 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
330 EmitOptionalRex32(dst);
331 EmitUint8(0xC6);
332 EmitOperand(Register::RAX, dst);
333 CHECK(imm.is_int8());
334 EmitUint8(imm.value() & 0xFF);
335 }
336
337
movzxw(CpuRegister dst,CpuRegister src)338 void X86_64Assembler::movzxw(CpuRegister dst, CpuRegister src) {
339 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
340 EmitOptionalRex32(dst, src);
341 EmitUint8(0x0F);
342 EmitUint8(0xB7);
343 EmitRegisterOperand(dst.LowBits(), src.LowBits());
344 }
345
346
movzxw(CpuRegister dst,const Address & src)347 void X86_64Assembler::movzxw(CpuRegister dst, const Address& src) {
348 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
349 EmitOptionalRex32(dst, src);
350 EmitUint8(0x0F);
351 EmitUint8(0xB7);
352 EmitOperand(dst.LowBits(), src);
353 }
354
355
movsxw(CpuRegister dst,CpuRegister src)356 void X86_64Assembler::movsxw(CpuRegister dst, CpuRegister src) {
357 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
358 EmitOptionalRex32(dst, src);
359 EmitUint8(0x0F);
360 EmitUint8(0xBF);
361 EmitRegisterOperand(dst.LowBits(), src.LowBits());
362 }
363
364
movsxw(CpuRegister dst,const Address & src)365 void X86_64Assembler::movsxw(CpuRegister dst, const Address& src) {
366 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
367 EmitOptionalRex32(dst, src);
368 EmitUint8(0x0F);
369 EmitUint8(0xBF);
370 EmitOperand(dst.LowBits(), src);
371 }
372
373
movw(CpuRegister,const Address &)374 void X86_64Assembler::movw(CpuRegister /*dst*/, const Address& /*src*/) {
375 LOG(FATAL) << "Use movzxw or movsxw instead.";
376 }
377
378
movw(const Address & dst,CpuRegister src)379 void X86_64Assembler::movw(const Address& dst, CpuRegister src) {
380 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
381 EmitOperandSizeOverride();
382 EmitOptionalRex32(src, dst);
383 EmitUint8(0x89);
384 EmitOperand(src.LowBits(), dst);
385 }
386
387
movw(const Address & dst,const Immediate & imm)388 void X86_64Assembler::movw(const Address& dst, const Immediate& imm) {
389 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
390 EmitOperandSizeOverride();
391 EmitOptionalRex32(dst);
392 EmitUint8(0xC7);
393 EmitOperand(Register::RAX, dst);
394 CHECK(imm.is_uint16() || imm.is_int16());
395 EmitUint8(imm.value() & 0xFF);
396 EmitUint8(imm.value() >> 8);
397 }
398
399
leaq(CpuRegister dst,const Address & src)400 void X86_64Assembler::leaq(CpuRegister dst, const Address& src) {
401 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
402 EmitRex64(dst, src);
403 EmitUint8(0x8D);
404 EmitOperand(dst.LowBits(), src);
405 }
406
407
leal(CpuRegister dst,const Address & src)408 void X86_64Assembler::leal(CpuRegister dst, const Address& src) {
409 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
410 EmitOptionalRex32(dst, src);
411 EmitUint8(0x8D);
412 EmitOperand(dst.LowBits(), src);
413 }
414
415
movaps(XmmRegister dst,XmmRegister src)416 void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) {
417 if (CpuHasAVXorAVX2FeatureFlag()) {
418 vmovaps(dst, src);
419 return;
420 }
421 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
422 EmitOptionalRex32(dst, src);
423 EmitUint8(0x0F);
424 EmitUint8(0x28);
425 EmitXmmRegisterOperand(dst.LowBits(), src);
426 }
427
428
429 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, xmm2 */
vmovaps(XmmRegister dst,XmmRegister src)430 void X86_64Assembler::vmovaps(XmmRegister dst, XmmRegister src) {
431 DCHECK(CpuHasAVXorAVX2FeatureFlag());
432 uint8_t byte_zero, byte_one, byte_two;
433 bool is_twobyte_form = true;
434 bool load = dst.NeedsRex();
435 bool store = !load;
436
437 if (src.NeedsRex()&& dst.NeedsRex()) {
438 is_twobyte_form = false;
439 }
440 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
441 // Instruction VEX Prefix
442 byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
443 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
444 if (is_twobyte_form) {
445 bool rex_bit = (load) ? dst.NeedsRex() : src.NeedsRex();
446 byte_one = EmitVexPrefixByteOne(rex_bit,
447 vvvv_reg,
448 SET_VEX_L_128,
449 SET_VEX_PP_NONE);
450 } else {
451 byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
452 /*X=*/ false,
453 src.NeedsRex(),
454 SET_VEX_M_0F);
455 byte_two = EmitVexPrefixByteTwo(/*W=*/ false,
456 SET_VEX_L_128,
457 SET_VEX_PP_NONE);
458 }
459 EmitUint8(byte_zero);
460 EmitUint8(byte_one);
461 if (!is_twobyte_form) {
462 EmitUint8(byte_two);
463 }
464 // Instruction Opcode
465 if (is_twobyte_form && store) {
466 EmitUint8(0x29);
467 } else {
468 EmitUint8(0x28);
469 }
470 // Instruction Operands
471 if (is_twobyte_form && store) {
472 EmitXmmRegisterOperand(src.LowBits(), dst);
473 } else {
474 EmitXmmRegisterOperand(dst.LowBits(), src);
475 }
476 }
477
movaps(XmmRegister dst,const Address & src)478 void X86_64Assembler::movaps(XmmRegister dst, const Address& src) {
479 if (CpuHasAVXorAVX2FeatureFlag()) {
480 vmovaps(dst, src);
481 return;
482 }
483 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
484 EmitOptionalRex32(dst, src);
485 EmitUint8(0x0F);
486 EmitUint8(0x28);
487 EmitOperand(dst.LowBits(), src);
488 }
489
490 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, m128 */
vmovaps(XmmRegister dst,const Address & src)491 void X86_64Assembler::vmovaps(XmmRegister dst, const Address& src) {
492 DCHECK(CpuHasAVXorAVX2FeatureFlag());
493 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
494 uint8_t ByteZero, ByteOne, ByteTwo;
495 bool is_twobyte_form = false;
496 // Instruction VEX Prefix
497 uint8_t rex = src.rex();
498 bool Rex_x = rex & GET_REX_X;
499 bool Rex_b = rex & GET_REX_B;
500 if (!Rex_b && !Rex_x) {
501 is_twobyte_form = true;
502 }
503 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
504 if (is_twobyte_form) {
505 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
506 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
507 vvvv_reg,
508 SET_VEX_L_128,
509 SET_VEX_PP_NONE);
510 } else {
511 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
512 Rex_x,
513 Rex_b,
514 SET_VEX_M_0F);
515 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
516 SET_VEX_L_128,
517 SET_VEX_PP_NONE);
518 }
519 EmitUint8(ByteZero);
520 EmitUint8(ByteOne);
521 if (!is_twobyte_form) {
522 EmitUint8(ByteTwo);
523 }
524 // Instruction Opcode
525 EmitUint8(0x28);
526 // Instruction Operands
527 EmitOperand(dst.LowBits(), src);
528 }
529
movups(XmmRegister dst,const Address & src)530 void X86_64Assembler::movups(XmmRegister dst, const Address& src) {
531 if (CpuHasAVXorAVX2FeatureFlag()) {
532 vmovups(dst, src);
533 return;
534 }
535 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
536 EmitOptionalRex32(dst, src);
537 EmitUint8(0x0F);
538 EmitUint8(0x10);
539 EmitOperand(dst.LowBits(), src);
540 }
541
542 /** VEX.128.0F.WIG 10 /r VMOVUPS xmm1, m128 */
vmovups(XmmRegister dst,const Address & src)543 void X86_64Assembler::vmovups(XmmRegister dst, const Address& src) {
544 DCHECK(CpuHasAVXorAVX2FeatureFlag());
545 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
546 uint8_t ByteZero, ByteOne, ByteTwo;
547 bool is_twobyte_form = false;
548 // Instruction VEX Prefix
549 uint8_t rex = src.rex();
550 bool Rex_x = rex & GET_REX_X;
551 bool Rex_b = rex & GET_REX_B;
552 if (!Rex_x && !Rex_b) {
553 is_twobyte_form = true;
554 }
555 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
556 if (is_twobyte_form) {
557 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
558 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
559 vvvv_reg,
560 SET_VEX_L_128,
561 SET_VEX_PP_NONE);
562 } else {
563 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
564 Rex_x,
565 Rex_b,
566 SET_VEX_M_0F);
567 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
568 SET_VEX_L_128,
569 SET_VEX_PP_NONE);
570 }
571 EmitUint8(ByteZero);
572 EmitUint8(ByteOne);
573 if (!is_twobyte_form) {
574 EmitUint8(ByteTwo);
575 }
576 // Instruction Opcode
577 EmitUint8(0x10);
578 // Instruction Operands
579 EmitOperand(dst.LowBits(), src);
580 }
581
582
movaps(const Address & dst,XmmRegister src)583 void X86_64Assembler::movaps(const Address& dst, XmmRegister src) {
584 if (CpuHasAVXorAVX2FeatureFlag()) {
585 vmovaps(dst, src);
586 return;
587 }
588 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
589 EmitOptionalRex32(src, dst);
590 EmitUint8(0x0F);
591 EmitUint8(0x29);
592 EmitOperand(src.LowBits(), dst);
593 }
594
595 /** VEX.128.0F.WIG 29 /r VMOVAPS m128, xmm1 */
vmovaps(const Address & dst,XmmRegister src)596 void X86_64Assembler::vmovaps(const Address& dst, XmmRegister src) {
597 DCHECK(CpuHasAVXorAVX2FeatureFlag());
598 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
599 uint8_t ByteZero, ByteOne, ByteTwo;
600 bool is_twobyte_form = false;
601
602 // Instruction VEX Prefix
603 uint8_t rex = dst.rex();
604 bool Rex_x = rex & GET_REX_X;
605 bool Rex_b = rex & GET_REX_B;
606 if (!Rex_b && !Rex_x) {
607 is_twobyte_form = true;
608 }
609 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
610 if (is_twobyte_form) {
611 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
612 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
613 vvvv_reg,
614 SET_VEX_L_128,
615 SET_VEX_PP_NONE);
616 } else {
617 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
618 Rex_x,
619 Rex_b,
620 SET_VEX_M_0F);
621 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
622 SET_VEX_L_128,
623 SET_VEX_PP_NONE);
624 }
625 EmitUint8(ByteZero);
626 EmitUint8(ByteOne);
627 if (!is_twobyte_form) {
628 EmitUint8(ByteTwo);
629 }
630 // Instruction Opcode
631 EmitUint8(0x29);
632 // Instruction Operands
633 EmitOperand(src.LowBits(), dst);
634 }
635
movups(const Address & dst,XmmRegister src)636 void X86_64Assembler::movups(const Address& dst, XmmRegister src) {
637 if (CpuHasAVXorAVX2FeatureFlag()) {
638 vmovups(dst, src);
639 return;
640 }
641 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
642 EmitOptionalRex32(src, dst);
643 EmitUint8(0x0F);
644 EmitUint8(0x11);
645 EmitOperand(src.LowBits(), dst);
646 }
647
648 /** VEX.128.0F.WIG 11 /r VMOVUPS m128, xmm1 */
vmovups(const Address & dst,XmmRegister src)649 void X86_64Assembler::vmovups(const Address& dst, XmmRegister src) {
650 DCHECK(CpuHasAVXorAVX2FeatureFlag());
651 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
652 uint8_t ByteZero, ByteOne, ByteTwo;
653 bool is_twobyte_form = false;
654
655 // Instruction VEX Prefix
656 uint8_t rex = dst.rex();
657 bool Rex_x = rex & GET_REX_X;
658 bool Rex_b = rex & GET_REX_B;
659 if (!Rex_b && !Rex_x) {
660 is_twobyte_form = true;
661 }
662 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
663 if (is_twobyte_form) {
664 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
665 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
666 vvvv_reg,
667 SET_VEX_L_128,
668 SET_VEX_PP_NONE);
669 } else {
670 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
671 Rex_x,
672 Rex_b,
673 SET_VEX_M_0F);
674 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
675 SET_VEX_L_128,
676 SET_VEX_PP_NONE);
677 }
678 EmitUint8(ByteZero);
679 EmitUint8(ByteOne);
680 if (!is_twobyte_form) {
681 EmitUint8(ByteTwo);
682 }
683 // Instruction Opcode
684 EmitUint8(0x11);
685 // Instruction Operands
686 EmitOperand(src.LowBits(), dst);
687 }
688
689
movss(XmmRegister dst,const Address & src)690 void X86_64Assembler::movss(XmmRegister dst, const Address& src) {
691 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
692 EmitUint8(0xF3);
693 EmitOptionalRex32(dst, src);
694 EmitUint8(0x0F);
695 EmitUint8(0x10);
696 EmitOperand(dst.LowBits(), src);
697 }
698
699
movss(const Address & dst,XmmRegister src)700 void X86_64Assembler::movss(const Address& dst, XmmRegister src) {
701 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
702 EmitUint8(0xF3);
703 EmitOptionalRex32(src, dst);
704 EmitUint8(0x0F);
705 EmitUint8(0x11);
706 EmitOperand(src.LowBits(), dst);
707 }
708
709
movss(XmmRegister dst,XmmRegister src)710 void X86_64Assembler::movss(XmmRegister dst, XmmRegister src) {
711 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
712 EmitUint8(0xF3);
713 EmitOptionalRex32(src, dst); // Movss is MR encoding instead of the usual RM.
714 EmitUint8(0x0F);
715 EmitUint8(0x11);
716 EmitXmmRegisterOperand(src.LowBits(), dst);
717 }
718
719
movsxd(CpuRegister dst,CpuRegister src)720 void X86_64Assembler::movsxd(CpuRegister dst, CpuRegister src) {
721 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
722 EmitRex64(dst, src);
723 EmitUint8(0x63);
724 EmitRegisterOperand(dst.LowBits(), src.LowBits());
725 }
726
727
movsxd(CpuRegister dst,const Address & src)728 void X86_64Assembler::movsxd(CpuRegister dst, const Address& src) {
729 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
730 EmitRex64(dst, src);
731 EmitUint8(0x63);
732 EmitOperand(dst.LowBits(), src);
733 }
734
735
movd(XmmRegister dst,CpuRegister src)736 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) {
737 movd(dst, src, true);
738 }
739
movd(CpuRegister dst,XmmRegister src)740 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) {
741 movd(dst, src, true);
742 }
743
movd(XmmRegister dst,CpuRegister src,bool is64bit)744 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src, bool is64bit) {
745 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
746 EmitUint8(0x66);
747 EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
748 EmitUint8(0x0F);
749 EmitUint8(0x6E);
750 EmitOperand(dst.LowBits(), Operand(src));
751 }
752
movd(CpuRegister dst,XmmRegister src,bool is64bit)753 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src, bool is64bit) {
754 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
755 EmitUint8(0x66);
756 EmitOptionalRex(false, is64bit, src.NeedsRex(), false, dst.NeedsRex());
757 EmitUint8(0x0F);
758 EmitUint8(0x7E);
759 EmitOperand(src.LowBits(), Operand(dst));
760 }
761
addss(XmmRegister dst,XmmRegister src)762 void X86_64Assembler::addss(XmmRegister dst, XmmRegister src) {
763 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
764 EmitUint8(0xF3);
765 EmitOptionalRex32(dst, src);
766 EmitUint8(0x0F);
767 EmitUint8(0x58);
768 EmitXmmRegisterOperand(dst.LowBits(), src);
769 }
770
addss(XmmRegister dst,const Address & src)771 void X86_64Assembler::addss(XmmRegister dst, const Address& src) {
772 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
773 EmitUint8(0xF3);
774 EmitOptionalRex32(dst, src);
775 EmitUint8(0x0F);
776 EmitUint8(0x58);
777 EmitOperand(dst.LowBits(), src);
778 }
779
780
subss(XmmRegister dst,XmmRegister src)781 void X86_64Assembler::subss(XmmRegister dst, XmmRegister src) {
782 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
783 EmitUint8(0xF3);
784 EmitOptionalRex32(dst, src);
785 EmitUint8(0x0F);
786 EmitUint8(0x5C);
787 EmitXmmRegisterOperand(dst.LowBits(), src);
788 }
789
790
subss(XmmRegister dst,const Address & src)791 void X86_64Assembler::subss(XmmRegister dst, const Address& src) {
792 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
793 EmitUint8(0xF3);
794 EmitOptionalRex32(dst, src);
795 EmitUint8(0x0F);
796 EmitUint8(0x5C);
797 EmitOperand(dst.LowBits(), src);
798 }
799
800
mulss(XmmRegister dst,XmmRegister src)801 void X86_64Assembler::mulss(XmmRegister dst, XmmRegister src) {
802 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
803 EmitUint8(0xF3);
804 EmitOptionalRex32(dst, src);
805 EmitUint8(0x0F);
806 EmitUint8(0x59);
807 EmitXmmRegisterOperand(dst.LowBits(), src);
808 }
809
810
mulss(XmmRegister dst,const Address & src)811 void X86_64Assembler::mulss(XmmRegister dst, const Address& src) {
812 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
813 EmitUint8(0xF3);
814 EmitOptionalRex32(dst, src);
815 EmitUint8(0x0F);
816 EmitUint8(0x59);
817 EmitOperand(dst.LowBits(), src);
818 }
819
820
divss(XmmRegister dst,XmmRegister src)821 void X86_64Assembler::divss(XmmRegister dst, XmmRegister src) {
822 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
823 EmitUint8(0xF3);
824 EmitOptionalRex32(dst, src);
825 EmitUint8(0x0F);
826 EmitUint8(0x5E);
827 EmitXmmRegisterOperand(dst.LowBits(), src);
828 }
829
830
divss(XmmRegister dst,const Address & src)831 void X86_64Assembler::divss(XmmRegister dst, const Address& src) {
832 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
833 EmitUint8(0xF3);
834 EmitOptionalRex32(dst, src);
835 EmitUint8(0x0F);
836 EmitUint8(0x5E);
837 EmitOperand(dst.LowBits(), src);
838 }
839
840
addps(XmmRegister dst,XmmRegister src)841 void X86_64Assembler::addps(XmmRegister dst, XmmRegister src) {
842 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
843 EmitOptionalRex32(dst, src);
844 EmitUint8(0x0F);
845 EmitUint8(0x58);
846 EmitXmmRegisterOperand(dst.LowBits(), src);
847 }
848
849
subps(XmmRegister dst,XmmRegister src)850 void X86_64Assembler::subps(XmmRegister dst, XmmRegister src) {
851 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
852 EmitOptionalRex32(dst, src);
853 EmitUint8(0x0F);
854 EmitUint8(0x5C);
855 EmitXmmRegisterOperand(dst.LowBits(), src);
856 }
857
vaddps(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)858 void X86_64Assembler::vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
859 DCHECK(CpuHasAVXorAVX2FeatureFlag());
860 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
861 bool is_twobyte_form = false;
862 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
863 if (!add_right.NeedsRex()) {
864 is_twobyte_form = true;
865 }
866 X86_64ManagedRegister vvvv_reg =
867 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
868 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
869 if (is_twobyte_form) {
870 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
871 } else {
872 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
873 /*X=*/ false,
874 add_right.NeedsRex(),
875 SET_VEX_M_0F);
876 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
877 }
878 EmitUint8(ByteZero);
879 EmitUint8(ByteOne);
880 if (!is_twobyte_form) {
881 EmitUint8(ByteTwo);
882 }
883 EmitUint8(0x58);
884 EmitXmmRegisterOperand(dst.LowBits(), add_right);
885 }
886
vsubps(XmmRegister dst,XmmRegister src1,XmmRegister src2)887 void X86_64Assembler::vsubps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
888 DCHECK(CpuHasAVXorAVX2FeatureFlag());
889 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
890 bool is_twobyte_form = false;
891 uint8_t byte_zero = 0x00, byte_one = 0x00, byte_two = 0x00;
892 if (!src2.NeedsRex()) {
893 is_twobyte_form = true;
894 }
895 byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
896 X86_64ManagedRegister vvvv_reg = X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
897 if (is_twobyte_form) {
898 byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
899 } else {
900 byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), /*X=*/ false, src2.NeedsRex(), SET_VEX_M_0F);
901 byte_two = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
902 }
903 EmitUint8(byte_zero);
904 EmitUint8(byte_one);
905 if (!is_twobyte_form) {
906 EmitUint8(byte_two);
907 }
908 EmitUint8(0x5C);
909 EmitXmmRegisterOperand(dst.LowBits(), src2);
910 }
911
912
mulps(XmmRegister dst,XmmRegister src)913 void X86_64Assembler::mulps(XmmRegister dst, XmmRegister src) {
914 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
915 EmitOptionalRex32(dst, src);
916 EmitUint8(0x0F);
917 EmitUint8(0x59);
918 EmitXmmRegisterOperand(dst.LowBits(), src);
919 }
920
vmulps(XmmRegister dst,XmmRegister src1,XmmRegister src2)921 void X86_64Assembler::vmulps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
922 DCHECK(CpuHasAVXorAVX2FeatureFlag());
923 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
924 bool is_twobyte_form = false;
925 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
926 if (!src2.NeedsRex()) {
927 is_twobyte_form = true;
928 }
929 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
930 X86_64ManagedRegister vvvv_reg =
931 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
932 if (is_twobyte_form) {
933 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
934 } else {
935 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
936 /*X=*/ false,
937 src2.NeedsRex(),
938 SET_VEX_M_0F);
939 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
940 }
941 EmitUint8(ByteZero);
942 EmitUint8(ByteOne);
943 if (!is_twobyte_form) {
944 EmitUint8(ByteTwo);
945 }
946 EmitUint8(0x59);
947 EmitXmmRegisterOperand(dst.LowBits(), src2);
948 }
949
divps(XmmRegister dst,XmmRegister src)950 void X86_64Assembler::divps(XmmRegister dst, XmmRegister src) {
951 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
952 EmitOptionalRex32(dst, src);
953 EmitUint8(0x0F);
954 EmitUint8(0x5E);
955 EmitXmmRegisterOperand(dst.LowBits(), src);
956 }
957
vdivps(XmmRegister dst,XmmRegister src1,XmmRegister src2)958 void X86_64Assembler::vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
959 DCHECK(CpuHasAVXorAVX2FeatureFlag());
960 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
961 bool is_twobyte_form = false;
962 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
963 if (!src2.NeedsRex()) {
964 is_twobyte_form = true;
965 }
966 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
967 X86_64ManagedRegister vvvv_reg =
968 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
969 if (is_twobyte_form) {
970 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
971 } else {
972 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
973 /*X=*/ false,
974 src2.NeedsRex(),
975 SET_VEX_M_0F);
976 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
977 }
978 EmitUint8(ByteZero);
979 EmitUint8(ByteOne);
980 if (!is_twobyte_form) {
981 EmitUint8(ByteTwo);
982 }
983 EmitUint8(0x5E);
984 EmitXmmRegisterOperand(dst.LowBits(), src2);
985 }
986
flds(const Address & src)987 void X86_64Assembler::flds(const Address& src) {
988 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
989 EmitUint8(0xD9);
990 EmitOperand(0, src);
991 }
992
993
fsts(const Address & dst)994 void X86_64Assembler::fsts(const Address& dst) {
995 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
996 EmitUint8(0xD9);
997 EmitOperand(2, dst);
998 }
999
1000
fstps(const Address & dst)1001 void X86_64Assembler::fstps(const Address& dst) {
1002 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1003 EmitUint8(0xD9);
1004 EmitOperand(3, dst);
1005 }
1006
1007
movapd(XmmRegister dst,XmmRegister src)1008 void X86_64Assembler::movapd(XmmRegister dst, XmmRegister src) {
1009 if (CpuHasAVXorAVX2FeatureFlag()) {
1010 vmovapd(dst, src);
1011 return;
1012 }
1013 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1014 EmitUint8(0x66);
1015 EmitOptionalRex32(dst, src);
1016 EmitUint8(0x0F);
1017 EmitUint8(0x28);
1018 EmitXmmRegisterOperand(dst.LowBits(), src);
1019 }
1020
1021 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, xmm2 */
vmovapd(XmmRegister dst,XmmRegister src)1022 void X86_64Assembler::vmovapd(XmmRegister dst, XmmRegister src) {
1023 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1024 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1025 uint8_t ByteZero, ByteOne, ByteTwo;
1026 bool is_twobyte_form = true;
1027
1028 if (src.NeedsRex() && dst.NeedsRex()) {
1029 is_twobyte_form = false;
1030 }
1031 // Instruction VEX Prefix
1032 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1033 bool load = dst.NeedsRex();
1034 if (is_twobyte_form) {
1035 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1036 bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
1037 ByteOne = EmitVexPrefixByteOne(rex_bit,
1038 vvvv_reg,
1039 SET_VEX_L_128,
1040 SET_VEX_PP_66);
1041 } else {
1042 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1043 /*X=*/ false,
1044 src.NeedsRex(),
1045 SET_VEX_M_0F);
1046 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1047 SET_VEX_L_128,
1048 SET_VEX_PP_66);
1049 }
1050 EmitUint8(ByteZero);
1051 EmitUint8(ByteOne);
1052 if (!is_twobyte_form) {
1053 EmitUint8(ByteTwo);
1054 }
1055 // Instruction Opcode
1056 if (is_twobyte_form && !load) {
1057 EmitUint8(0x29);
1058 } else {
1059 EmitUint8(0x28);
1060 }
1061 // Instruction Operands
1062 if (is_twobyte_form && !load) {
1063 EmitXmmRegisterOperand(src.LowBits(), dst);
1064 } else {
1065 EmitXmmRegisterOperand(dst.LowBits(), src);
1066 }
1067 }
1068
movapd(XmmRegister dst,const Address & src)1069 void X86_64Assembler::movapd(XmmRegister dst, const Address& src) {
1070 if (CpuHasAVXorAVX2FeatureFlag()) {
1071 vmovapd(dst, src);
1072 return;
1073 }
1074 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1075 EmitUint8(0x66);
1076 EmitOptionalRex32(dst, src);
1077 EmitUint8(0x0F);
1078 EmitUint8(0x28);
1079 EmitOperand(dst.LowBits(), src);
1080 }
1081
1082 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, m128 */
vmovapd(XmmRegister dst,const Address & src)1083 void X86_64Assembler::vmovapd(XmmRegister dst, const Address& src) {
1084 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1085 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1086 uint8_t ByteZero, ByteOne, ByteTwo;
1087 bool is_twobyte_form = false;
1088
1089 // Instruction VEX Prefix
1090 uint8_t rex = src.rex();
1091 bool Rex_x = rex & GET_REX_X;
1092 bool Rex_b = rex & GET_REX_B;
1093 if (!Rex_b && !Rex_x) {
1094 is_twobyte_form = true;
1095 }
1096 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1097 if (is_twobyte_form) {
1098 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1099 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1100 vvvv_reg,
1101 SET_VEX_L_128,
1102 SET_VEX_PP_66);
1103 } else {
1104 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1105 Rex_x,
1106 Rex_b,
1107 SET_VEX_M_0F);
1108 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1109 SET_VEX_L_128,
1110 SET_VEX_PP_66);
1111 }
1112 EmitUint8(ByteZero);
1113 EmitUint8(ByteOne);
1114 if (!is_twobyte_form) {
1115 EmitUint8(ByteTwo);
1116 }
1117 // Instruction Opcode
1118 EmitUint8(0x28);
1119 // Instruction Operands
1120 EmitOperand(dst.LowBits(), src);
1121 }
1122
movupd(XmmRegister dst,const Address & src)1123 void X86_64Assembler::movupd(XmmRegister dst, const Address& src) {
1124 if (CpuHasAVXorAVX2FeatureFlag()) {
1125 vmovupd(dst, src);
1126 return;
1127 }
1128 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1129 EmitUint8(0x66);
1130 EmitOptionalRex32(dst, src);
1131 EmitUint8(0x0F);
1132 EmitUint8(0x10);
1133 EmitOperand(dst.LowBits(), src);
1134 }
1135
1136 /** VEX.128.66.0F.WIG 10 /r VMOVUPD xmm1, m128 */
vmovupd(XmmRegister dst,const Address & src)1137 void X86_64Assembler::vmovupd(XmmRegister dst, const Address& src) {
1138 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1139 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1140 bool is_twobyte_form = false;
1141 uint8_t ByteZero, ByteOne, ByteTwo;
1142
1143 // Instruction VEX Prefix
1144 uint8_t rex = src.rex();
1145 bool Rex_x = rex & GET_REX_X;
1146 bool Rex_b = rex & GET_REX_B;
1147 if (!Rex_b && !Rex_x) {
1148 is_twobyte_form = true;
1149 }
1150 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1151 if (is_twobyte_form) {
1152 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1153 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1154 vvvv_reg,
1155 SET_VEX_L_128,
1156 SET_VEX_PP_66);
1157 } else {
1158 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1159 Rex_x,
1160 Rex_b,
1161 SET_VEX_M_0F);
1162 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1163 SET_VEX_L_128,
1164 SET_VEX_PP_66);
1165 }
1166 EmitUint8(ByteZero);
1167 EmitUint8(ByteOne);
1168 if (!is_twobyte_form)
1169 EmitUint8(ByteTwo);
1170 // Instruction Opcode
1171 EmitUint8(0x10);
1172 // Instruction Operands
1173 EmitOperand(dst.LowBits(), src);
1174 }
1175
movapd(const Address & dst,XmmRegister src)1176 void X86_64Assembler::movapd(const Address& dst, XmmRegister src) {
1177 if (CpuHasAVXorAVX2FeatureFlag()) {
1178 vmovapd(dst, src);
1179 return;
1180 }
1181 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1182 EmitUint8(0x66);
1183 EmitOptionalRex32(src, dst);
1184 EmitUint8(0x0F);
1185 EmitUint8(0x29);
1186 EmitOperand(src.LowBits(), dst);
1187 }
1188
1189 /** VEX.128.66.0F.WIG 29 /r VMOVAPD m128, xmm1 */
vmovapd(const Address & dst,XmmRegister src)1190 void X86_64Assembler::vmovapd(const Address& dst, XmmRegister src) {
1191 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1192 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1193 bool is_twobyte_form = false;
1194 uint8_t ByteZero, ByteOne, ByteTwo;
1195 // Instruction VEX Prefix
1196 uint8_t rex = dst.rex();
1197 bool Rex_x = rex & GET_REX_X;
1198 bool Rex_b = rex & GET_REX_B;
1199 if (!Rex_x && !Rex_b) {
1200 is_twobyte_form = true;
1201 }
1202 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1203 if (is_twobyte_form) {
1204 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1205 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1206 vvvv_reg,
1207 SET_VEX_L_128,
1208 SET_VEX_PP_66);
1209 } else {
1210 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1211 Rex_x,
1212 Rex_b,
1213 SET_VEX_M_0F);
1214 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1215 SET_VEX_L_128,
1216 SET_VEX_PP_66);
1217 }
1218 EmitUint8(ByteZero);
1219 EmitUint8(ByteOne);
1220 if (!is_twobyte_form) {
1221 EmitUint8(ByteTwo);
1222 }
1223 // Instruction Opcode
1224 EmitUint8(0x29);
1225 // Instruction Operands
1226 EmitOperand(src.LowBits(), dst);
1227 }
1228
movupd(const Address & dst,XmmRegister src)1229 void X86_64Assembler::movupd(const Address& dst, XmmRegister src) {
1230 if (CpuHasAVXorAVX2FeatureFlag()) {
1231 vmovupd(dst, src);
1232 return;
1233 }
1234 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1235 EmitUint8(0x66);
1236 EmitOptionalRex32(src, dst);
1237 EmitUint8(0x0F);
1238 EmitUint8(0x11);
1239 EmitOperand(src.LowBits(), dst);
1240 }
1241
1242 /** VEX.128.66.0F.WIG 11 /r VMOVUPD m128, xmm1 */
vmovupd(const Address & dst,XmmRegister src)1243 void X86_64Assembler::vmovupd(const Address& dst, XmmRegister src) {
1244 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1245 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1246 bool is_twobyte_form = false;
1247 uint8_t ByteZero, ByteOne, ByteTwo;
1248
1249 // Instruction VEX Prefix
1250 uint8_t rex = dst.rex();
1251 bool Rex_x = rex & GET_REX_X;
1252 bool Rex_b = rex & GET_REX_B;
1253 if (!Rex_x && !Rex_b) {
1254 is_twobyte_form = true;
1255 }
1256 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1257 if (is_twobyte_form) {
1258 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1259 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1260 vvvv_reg,
1261 SET_VEX_L_128,
1262 SET_VEX_PP_66);
1263 } else {
1264 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1265 Rex_x,
1266 Rex_b,
1267 SET_VEX_M_0F);
1268 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1269 SET_VEX_L_128,
1270 SET_VEX_PP_66);
1271 }
1272 EmitUint8(ByteZero);
1273 EmitUint8(ByteOne);
1274 if (!is_twobyte_form) {
1275 EmitUint8(ByteTwo);
1276 }
1277 // Instruction Opcode
1278 EmitUint8(0x11);
1279 // Instruction Operands
1280 EmitOperand(src.LowBits(), dst);
1281 }
1282
1283
movsd(XmmRegister dst,const Address & src)1284 void X86_64Assembler::movsd(XmmRegister dst, const Address& src) {
1285 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1286 EmitUint8(0xF2);
1287 EmitOptionalRex32(dst, src);
1288 EmitUint8(0x0F);
1289 EmitUint8(0x10);
1290 EmitOperand(dst.LowBits(), src);
1291 }
1292
1293
movsd(const Address & dst,XmmRegister src)1294 void X86_64Assembler::movsd(const Address& dst, XmmRegister src) {
1295 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1296 EmitUint8(0xF2);
1297 EmitOptionalRex32(src, dst);
1298 EmitUint8(0x0F);
1299 EmitUint8(0x11);
1300 EmitOperand(src.LowBits(), dst);
1301 }
1302
1303
movsd(XmmRegister dst,XmmRegister src)1304 void X86_64Assembler::movsd(XmmRegister dst, XmmRegister src) {
1305 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1306 EmitUint8(0xF2);
1307 EmitOptionalRex32(src, dst); // Movsd is MR encoding instead of the usual RM.
1308 EmitUint8(0x0F);
1309 EmitUint8(0x11);
1310 EmitXmmRegisterOperand(src.LowBits(), dst);
1311 }
1312
1313
addsd(XmmRegister dst,XmmRegister src)1314 void X86_64Assembler::addsd(XmmRegister dst, XmmRegister src) {
1315 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1316 EmitUint8(0xF2);
1317 EmitOptionalRex32(dst, src);
1318 EmitUint8(0x0F);
1319 EmitUint8(0x58);
1320 EmitXmmRegisterOperand(dst.LowBits(), src);
1321 }
1322
1323
addsd(XmmRegister dst,const Address & src)1324 void X86_64Assembler::addsd(XmmRegister dst, const Address& src) {
1325 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1326 EmitUint8(0xF2);
1327 EmitOptionalRex32(dst, src);
1328 EmitUint8(0x0F);
1329 EmitUint8(0x58);
1330 EmitOperand(dst.LowBits(), src);
1331 }
1332
1333
subsd(XmmRegister dst,XmmRegister src)1334 void X86_64Assembler::subsd(XmmRegister dst, XmmRegister src) {
1335 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1336 EmitUint8(0xF2);
1337 EmitOptionalRex32(dst, src);
1338 EmitUint8(0x0F);
1339 EmitUint8(0x5C);
1340 EmitXmmRegisterOperand(dst.LowBits(), src);
1341 }
1342
1343
subsd(XmmRegister dst,const Address & src)1344 void X86_64Assembler::subsd(XmmRegister dst, const Address& src) {
1345 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1346 EmitUint8(0xF2);
1347 EmitOptionalRex32(dst, src);
1348 EmitUint8(0x0F);
1349 EmitUint8(0x5C);
1350 EmitOperand(dst.LowBits(), src);
1351 }
1352
1353
mulsd(XmmRegister dst,XmmRegister src)1354 void X86_64Assembler::mulsd(XmmRegister dst, XmmRegister src) {
1355 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1356 EmitUint8(0xF2);
1357 EmitOptionalRex32(dst, src);
1358 EmitUint8(0x0F);
1359 EmitUint8(0x59);
1360 EmitXmmRegisterOperand(dst.LowBits(), src);
1361 }
1362
1363
mulsd(XmmRegister dst,const Address & src)1364 void X86_64Assembler::mulsd(XmmRegister dst, const Address& src) {
1365 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1366 EmitUint8(0xF2);
1367 EmitOptionalRex32(dst, src);
1368 EmitUint8(0x0F);
1369 EmitUint8(0x59);
1370 EmitOperand(dst.LowBits(), src);
1371 }
1372
1373
divsd(XmmRegister dst,XmmRegister src)1374 void X86_64Assembler::divsd(XmmRegister dst, XmmRegister src) {
1375 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1376 EmitUint8(0xF2);
1377 EmitOptionalRex32(dst, src);
1378 EmitUint8(0x0F);
1379 EmitUint8(0x5E);
1380 EmitXmmRegisterOperand(dst.LowBits(), src);
1381 }
1382
1383
divsd(XmmRegister dst,const Address & src)1384 void X86_64Assembler::divsd(XmmRegister dst, const Address& src) {
1385 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1386 EmitUint8(0xF2);
1387 EmitOptionalRex32(dst, src);
1388 EmitUint8(0x0F);
1389 EmitUint8(0x5E);
1390 EmitOperand(dst.LowBits(), src);
1391 }
1392
1393
addpd(XmmRegister dst,XmmRegister src)1394 void X86_64Assembler::addpd(XmmRegister dst, XmmRegister src) {
1395 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1396 EmitUint8(0x66);
1397 EmitOptionalRex32(dst, src);
1398 EmitUint8(0x0F);
1399 EmitUint8(0x58);
1400 EmitXmmRegisterOperand(dst.LowBits(), src);
1401 }
1402
1403
vaddpd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1404 void X86_64Assembler::vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1405 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1406 bool is_twobyte_form = false;
1407 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1408 if (!add_right.NeedsRex()) {
1409 is_twobyte_form = true;
1410 }
1411 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1412 X86_64ManagedRegister vvvv_reg =
1413 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1414 if (is_twobyte_form) {
1415 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1416 } else {
1417 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1418 /*X=*/ false,
1419 add_right.NeedsRex(),
1420 SET_VEX_M_0F);
1421 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1422 }
1423 EmitUint8(ByteZero);
1424 EmitUint8(ByteOne);
1425 if (!is_twobyte_form) {
1426 EmitUint8(ByteTwo);
1427 }
1428 EmitUint8(0x58);
1429 EmitXmmRegisterOperand(dst.LowBits(), add_right);
1430 }
1431
1432
subpd(XmmRegister dst,XmmRegister src)1433 void X86_64Assembler::subpd(XmmRegister dst, XmmRegister src) {
1434 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1435 EmitUint8(0x66);
1436 EmitOptionalRex32(dst, src);
1437 EmitUint8(0x0F);
1438 EmitUint8(0x5C);
1439 EmitXmmRegisterOperand(dst.LowBits(), src);
1440 }
1441
1442
vsubpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1443 void X86_64Assembler::vsubpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1444 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1445 bool is_twobyte_form = false;
1446 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1447 if (!src2.NeedsRex()) {
1448 is_twobyte_form = true;
1449 }
1450 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1451 X86_64ManagedRegister vvvv_reg =
1452 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1453 if (is_twobyte_form) {
1454 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1455 } else {
1456 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1457 /*X=*/ false,
1458 src2.NeedsRex(),
1459 SET_VEX_M_0F);
1460 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1461 }
1462 EmitUint8(ByteZero);
1463 EmitUint8(ByteOne);
1464 if (!is_twobyte_form) {
1465 EmitUint8(ByteTwo);
1466 }
1467 EmitUint8(0x5C);
1468 EmitXmmRegisterOperand(dst.LowBits(), src2);
1469 }
1470
1471
mulpd(XmmRegister dst,XmmRegister src)1472 void X86_64Assembler::mulpd(XmmRegister dst, XmmRegister src) {
1473 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1474 EmitUint8(0x66);
1475 EmitOptionalRex32(dst, src);
1476 EmitUint8(0x0F);
1477 EmitUint8(0x59);
1478 EmitXmmRegisterOperand(dst.LowBits(), src);
1479 }
1480
vmulpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1481 void X86_64Assembler::vmulpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1482 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1483 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1484 bool is_twobyte_form = false;
1485 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1486 if (!src2.NeedsRex()) {
1487 is_twobyte_form = true;
1488 }
1489 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1490 X86_64ManagedRegister vvvv_reg =
1491 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1492 if (is_twobyte_form) {
1493 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1494 } else {
1495 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1496 /*X=*/ false,
1497 src2.NeedsRex(),
1498 SET_VEX_M_0F);
1499 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1500 }
1501 EmitUint8(ByteZero);
1502 EmitUint8(ByteOne);
1503 if (!is_twobyte_form) {
1504 EmitUint8(ByteTwo);
1505 }
1506 EmitUint8(0x59);
1507 EmitXmmRegisterOperand(dst.LowBits(), src2);
1508 }
1509
divpd(XmmRegister dst,XmmRegister src)1510 void X86_64Assembler::divpd(XmmRegister dst, XmmRegister src) {
1511 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1512 EmitUint8(0x66);
1513 EmitOptionalRex32(dst, src);
1514 EmitUint8(0x0F);
1515 EmitUint8(0x5E);
1516 EmitXmmRegisterOperand(dst.LowBits(), src);
1517 }
1518
1519
vdivpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1520 void X86_64Assembler::vdivpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1521 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1522 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1523 bool is_twobyte_form = false;
1524 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1525 if (!src2.NeedsRex()) {
1526 is_twobyte_form = true;
1527 }
1528 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1529 X86_64ManagedRegister vvvv_reg =
1530 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1531 if (is_twobyte_form) {
1532 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1533 } else {
1534 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1535 /*X=*/ false,
1536 src2.NeedsRex(),
1537 SET_VEX_M_0F);
1538 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1539 }
1540 EmitUint8(ByteZero);
1541 EmitUint8(ByteOne);
1542 if (!is_twobyte_form) {
1543 EmitUint8(ByteTwo);
1544 }
1545 EmitUint8(0x5E);
1546 EmitXmmRegisterOperand(dst.LowBits(), src2);
1547 }
1548
1549
movdqa(XmmRegister dst,XmmRegister src)1550 void X86_64Assembler::movdqa(XmmRegister dst, XmmRegister src) {
1551 if (CpuHasAVXorAVX2FeatureFlag()) {
1552 vmovdqa(dst, src);
1553 return;
1554 }
1555 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1556 EmitUint8(0x66);
1557 EmitOptionalRex32(dst, src);
1558 EmitUint8(0x0F);
1559 EmitUint8(0x6F);
1560 EmitXmmRegisterOperand(dst.LowBits(), src);
1561 }
1562
1563 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, xmm2 */
vmovdqa(XmmRegister dst,XmmRegister src)1564 void X86_64Assembler::vmovdqa(XmmRegister dst, XmmRegister src) {
1565 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1566 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1567 uint8_t ByteZero, ByteOne, ByteTwo;
1568 bool is_twobyte_form = true;
1569
1570 // Instruction VEX Prefix
1571 if (src.NeedsRex() && dst.NeedsRex()) {
1572 is_twobyte_form = false;
1573 }
1574 bool load = dst.NeedsRex();
1575 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1576 if (is_twobyte_form) {
1577 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1578 bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
1579 ByteOne = EmitVexPrefixByteOne(rex_bit,
1580 vvvv_reg,
1581 SET_VEX_L_128,
1582 SET_VEX_PP_66);
1583 } else {
1584 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1585 /*X=*/ false,
1586 src.NeedsRex(),
1587 SET_VEX_M_0F);
1588 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1589 SET_VEX_L_128,
1590 SET_VEX_PP_66);
1591 }
1592 EmitUint8(ByteZero);
1593 EmitUint8(ByteOne);
1594 if (!is_twobyte_form) {
1595 EmitUint8(ByteTwo);
1596 }
1597 // Instruction Opcode
1598 if (is_twobyte_form && !load) {
1599 EmitUint8(0x7F);
1600 } else {
1601 EmitUint8(0x6F);
1602 }
1603 // Instruction Operands
1604 if (is_twobyte_form && !load) {
1605 EmitXmmRegisterOperand(src.LowBits(), dst);
1606 } else {
1607 EmitXmmRegisterOperand(dst.LowBits(), src);
1608 }
1609 }
1610
movdqa(XmmRegister dst,const Address & src)1611 void X86_64Assembler::movdqa(XmmRegister dst, const Address& src) {
1612 if (CpuHasAVXorAVX2FeatureFlag()) {
1613 vmovdqa(dst, src);
1614 return;
1615 }
1616 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1617 EmitUint8(0x66);
1618 EmitOptionalRex32(dst, src);
1619 EmitUint8(0x0F);
1620 EmitUint8(0x6F);
1621 EmitOperand(dst.LowBits(), src);
1622 }
1623
1624 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, m128 */
vmovdqa(XmmRegister dst,const Address & src)1625 void X86_64Assembler::vmovdqa(XmmRegister dst, const Address& src) {
1626 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1627 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1628 uint8_t ByteZero, ByteOne, ByteTwo;
1629 bool is_twobyte_form = false;
1630
1631 // Instruction VEX Prefix
1632 uint8_t rex = src.rex();
1633 bool Rex_x = rex & GET_REX_X;
1634 bool Rex_b = rex & GET_REX_B;
1635 if (!Rex_x && !Rex_b) {
1636 is_twobyte_form = true;
1637 }
1638 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1639 if (is_twobyte_form) {
1640 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1641 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1642 vvvv_reg,
1643 SET_VEX_L_128,
1644 SET_VEX_PP_66);
1645 } else {
1646 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1647 Rex_x,
1648 Rex_b,
1649 SET_VEX_M_0F);
1650 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1651 SET_VEX_L_128,
1652 SET_VEX_PP_66);
1653 }
1654 EmitUint8(ByteZero);
1655 EmitUint8(ByteOne);
1656 if (!is_twobyte_form) {
1657 EmitUint8(ByteTwo);
1658 }
1659 // Instruction Opcode
1660 EmitUint8(0x6F);
1661 // Instruction Operands
1662 EmitOperand(dst.LowBits(), src);
1663 }
1664
movdqu(XmmRegister dst,const Address & src)1665 void X86_64Assembler::movdqu(XmmRegister dst, const Address& src) {
1666 if (CpuHasAVXorAVX2FeatureFlag()) {
1667 vmovdqu(dst, src);
1668 return;
1669 }
1670 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1671 EmitUint8(0xF3);
1672 EmitOptionalRex32(dst, src);
1673 EmitUint8(0x0F);
1674 EmitUint8(0x6F);
1675 EmitOperand(dst.LowBits(), src);
1676 }
1677
1678 /** VEX.128.F3.0F.WIG 6F /r VMOVDQU xmm1, m128
1679 Load Unaligned */
vmovdqu(XmmRegister dst,const Address & src)1680 void X86_64Assembler::vmovdqu(XmmRegister dst, const Address& src) {
1681 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1682 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1683 uint8_t ByteZero, ByteOne, ByteTwo;
1684 bool is_twobyte_form = false;
1685
1686 // Instruction VEX Prefix
1687 uint8_t rex = src.rex();
1688 bool Rex_x = rex & GET_REX_X;
1689 bool Rex_b = rex & GET_REX_B;
1690 if (!Rex_x && !Rex_b) {
1691 is_twobyte_form = true;
1692 }
1693 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1694 if (is_twobyte_form) {
1695 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1696 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1697 vvvv_reg,
1698 SET_VEX_L_128,
1699 SET_VEX_PP_F3);
1700 } else {
1701 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1702 Rex_x,
1703 Rex_b,
1704 SET_VEX_M_0F);
1705 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1706 SET_VEX_L_128,
1707 SET_VEX_PP_F3);
1708 }
1709 EmitUint8(ByteZero);
1710 EmitUint8(ByteOne);
1711 if (!is_twobyte_form) {
1712 EmitUint8(ByteTwo);
1713 }
1714 // Instruction Opcode
1715 EmitUint8(0x6F);
1716 // Instruction Operands
1717 EmitOperand(dst.LowBits(), src);
1718 }
1719
movdqa(const Address & dst,XmmRegister src)1720 void X86_64Assembler::movdqa(const Address& dst, XmmRegister src) {
1721 if (CpuHasAVXorAVX2FeatureFlag()) {
1722 vmovdqa(dst, src);
1723 return;
1724 }
1725 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1726 EmitUint8(0x66);
1727 EmitOptionalRex32(src, dst);
1728 EmitUint8(0x0F);
1729 EmitUint8(0x7F);
1730 EmitOperand(src.LowBits(), dst);
1731 }
1732
1733 /** VEX.128.66.0F.WIG 7F /r VMOVDQA m128, xmm1 */
vmovdqa(const Address & dst,XmmRegister src)1734 void X86_64Assembler::vmovdqa(const Address& dst, XmmRegister src) {
1735 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1736 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1737 bool is_twobyte_form = false;
1738 uint8_t ByteZero, ByteOne, ByteTwo;
1739 // Instruction VEX Prefix
1740 uint8_t rex = dst.rex();
1741 bool Rex_x = rex & GET_REX_X;
1742 bool Rex_b = rex & GET_REX_B;
1743 if (!Rex_x && !Rex_b) {
1744 is_twobyte_form = true;
1745 }
1746 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1747 if (is_twobyte_form) {
1748 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1749 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1750 vvvv_reg,
1751 SET_VEX_L_128,
1752 SET_VEX_PP_66);
1753 } else {
1754 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1755 Rex_x,
1756 Rex_b,
1757 SET_VEX_M_0F);
1758 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1759 SET_VEX_L_128,
1760 SET_VEX_PP_66);
1761 }
1762 EmitUint8(ByteZero);
1763 EmitUint8(ByteOne);
1764 if (!is_twobyte_form) {
1765 EmitUint8(ByteTwo);
1766 }
1767 // Instruction Opcode
1768 EmitUint8(0x7F);
1769 // Instruction Operands
1770 EmitOperand(src.LowBits(), dst);
1771 }
1772
movdqu(const Address & dst,XmmRegister src)1773 void X86_64Assembler::movdqu(const Address& dst, XmmRegister src) {
1774 if (CpuHasAVXorAVX2FeatureFlag()) {
1775 vmovdqu(dst, src);
1776 return;
1777 }
1778 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1779 EmitUint8(0xF3);
1780 EmitOptionalRex32(src, dst);
1781 EmitUint8(0x0F);
1782 EmitUint8(0x7F);
1783 EmitOperand(src.LowBits(), dst);
1784 }
1785
1786 /** VEX.128.F3.0F.WIG 7F /r VMOVDQU m128, xmm1 */
vmovdqu(const Address & dst,XmmRegister src)1787 void X86_64Assembler::vmovdqu(const Address& dst, XmmRegister src) {
1788 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1789 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1790 uint8_t ByteZero, ByteOne, ByteTwo;
1791 bool is_twobyte_form = false;
1792
1793 // Instruction VEX Prefix
1794 uint8_t rex = dst.rex();
1795 bool Rex_x = rex & GET_REX_X;
1796 bool Rex_b = rex & GET_REX_B;
1797 if (!Rex_b && !Rex_x) {
1798 is_twobyte_form = true;
1799 }
1800 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1801 if (is_twobyte_form) {
1802 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1803 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1804 vvvv_reg,
1805 SET_VEX_L_128,
1806 SET_VEX_PP_F3);
1807 } else {
1808 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1809 Rex_x,
1810 Rex_b,
1811 SET_VEX_M_0F);
1812 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1813 SET_VEX_L_128,
1814 SET_VEX_PP_F3);
1815 }
1816 EmitUint8(ByteZero);
1817 EmitUint8(ByteOne);
1818 if (!is_twobyte_form) {
1819 EmitUint8(ByteTwo);
1820 }
1821 // Instruction Opcode
1822 EmitUint8(0x7F);
1823 // Instruction Operands
1824 EmitOperand(src.LowBits(), dst);
1825 }
1826
paddb(XmmRegister dst,XmmRegister src)1827 void X86_64Assembler::paddb(XmmRegister dst, XmmRegister src) {
1828 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1829 EmitUint8(0x66);
1830 EmitOptionalRex32(dst, src);
1831 EmitUint8(0x0F);
1832 EmitUint8(0xFC);
1833 EmitXmmRegisterOperand(dst.LowBits(), src);
1834 }
1835
1836
vpaddb(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1837 void X86_64Assembler::vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1838 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1839 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1840 uint8_t ByteOne = 0x00, ByteZero = 0x00, ByteTwo = 0x00;
1841 bool is_twobyte_form = true;
1842 if (add_right.NeedsRex()) {
1843 is_twobyte_form = false;
1844 }
1845 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1846 X86_64ManagedRegister vvvv_reg =
1847 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1848 if (is_twobyte_form) {
1849 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1850 } else {
1851 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1852 /*X=*/ false,
1853 add_right.NeedsRex(),
1854 SET_VEX_M_0F);
1855 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1856 }
1857 EmitUint8(ByteZero);
1858 EmitUint8(ByteOne);
1859 if (!is_twobyte_form) {
1860 EmitUint8(ByteTwo);
1861 }
1862 EmitUint8(0xFC);
1863 EmitXmmRegisterOperand(dst.LowBits(), add_right);
1864 }
1865
1866
psubb(XmmRegister dst,XmmRegister src)1867 void X86_64Assembler::psubb(XmmRegister dst, XmmRegister src) {
1868 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1869 EmitUint8(0x66);
1870 EmitOptionalRex32(dst, src);
1871 EmitUint8(0x0F);
1872 EmitUint8(0xF8);
1873 EmitXmmRegisterOperand(dst.LowBits(), src);
1874 }
1875
1876
vpsubb(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1877 void X86_64Assembler::vpsubb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1878 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1879 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1880 bool is_twobyte_form = false;
1881 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1882 if (!add_right.NeedsRex()) {
1883 is_twobyte_form = true;
1884 }
1885 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1886 X86_64ManagedRegister vvvv_reg =
1887 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1888 if (is_twobyte_form) {
1889 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1890 } else {
1891 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1892 /*X=*/ false,
1893 add_right.NeedsRex(),
1894 SET_VEX_M_0F);
1895 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1896 }
1897 EmitUint8(ByteZero);
1898 EmitUint8(ByteOne);
1899 if (!is_twobyte_form) {
1900 EmitUint8(ByteTwo);
1901 }
1902 EmitUint8(0xF8);
1903 EmitXmmRegisterOperand(dst.LowBits(), add_right);
1904 }
1905
1906
paddw(XmmRegister dst,XmmRegister src)1907 void X86_64Assembler::paddw(XmmRegister dst, XmmRegister src) {
1908 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1909 EmitUint8(0x66);
1910 EmitOptionalRex32(dst, src);
1911 EmitUint8(0x0F);
1912 EmitUint8(0xFD);
1913 EmitXmmRegisterOperand(dst.LowBits(), src);
1914 }
1915
vpaddw(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1916 void X86_64Assembler::vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1917 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1918 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1919 bool is_twobyte_form = false;
1920 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1921 if (!add_right.NeedsRex()) {
1922 is_twobyte_form = true;
1923 }
1924 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1925 X86_64ManagedRegister vvvv_reg =
1926 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1927 if (is_twobyte_form) {
1928 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1929 } else {
1930 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1931 /*X=*/ false,
1932 add_right.NeedsRex(),
1933 SET_VEX_M_0F);
1934 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1935 }
1936 EmitUint8(ByteZero);
1937 EmitUint8(ByteOne);
1938 if (!is_twobyte_form) {
1939 EmitUint8(ByteTwo);
1940 }
1941 EmitUint8(0xFD);
1942 EmitXmmRegisterOperand(dst.LowBits(), add_right);
1943 }
1944
1945
psubw(XmmRegister dst,XmmRegister src)1946 void X86_64Assembler::psubw(XmmRegister dst, XmmRegister src) {
1947 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1948 EmitUint8(0x66);
1949 EmitOptionalRex32(dst, src);
1950 EmitUint8(0x0F);
1951 EmitUint8(0xF9);
1952 EmitXmmRegisterOperand(dst.LowBits(), src);
1953 }
1954
vpsubw(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1955 void X86_64Assembler::vpsubw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1956 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1957 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1958 bool is_twobyte_form = false;
1959 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1960 if (!add_right.NeedsRex()) {
1961 is_twobyte_form = true;
1962 }
1963 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1964 X86_64ManagedRegister vvvv_reg =
1965 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1966 if (is_twobyte_form) {
1967 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1968 } else {
1969 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1970 /*X=*/ false,
1971 add_right.NeedsRex(),
1972 SET_VEX_M_0F);
1973 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1974 }
1975 EmitUint8(ByteZero);
1976 EmitUint8(ByteOne);
1977 if (!is_twobyte_form) {
1978 EmitUint8(ByteTwo);
1979 }
1980 EmitUint8(0xF9);
1981 EmitXmmRegisterOperand(dst.LowBits(), add_right);
1982 }
1983
1984
pmullw(XmmRegister dst,XmmRegister src)1985 void X86_64Assembler::pmullw(XmmRegister dst, XmmRegister src) {
1986 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1987 EmitUint8(0x66);
1988 EmitOptionalRex32(dst, src);
1989 EmitUint8(0x0F);
1990 EmitUint8(0xD5);
1991 EmitXmmRegisterOperand(dst.LowBits(), src);
1992 }
1993
vpmullw(XmmRegister dst,XmmRegister src1,XmmRegister src2)1994 void X86_64Assembler::vpmullw(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1995 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1996 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1997 bool is_twobyte_form = false;
1998 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1999 if (!src2.NeedsRex()) {
2000 is_twobyte_form = true;
2001 }
2002 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2003 X86_64ManagedRegister vvvv_reg =
2004 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2005 if (is_twobyte_form) {
2006 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2007 } else {
2008 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2009 /*X=*/ false,
2010 src2.NeedsRex(),
2011 SET_VEX_M_0F);
2012 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2013 }
2014 EmitUint8(ByteZero);
2015 EmitUint8(ByteOne);
2016 if (!is_twobyte_form) {
2017 EmitUint8(ByteTwo);
2018 }
2019 EmitUint8(0xD5);
2020 EmitXmmRegisterOperand(dst.LowBits(), src2);
2021 }
2022
paddd(XmmRegister dst,XmmRegister src)2023 void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) {
2024 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2025 EmitUint8(0x66);
2026 EmitOptionalRex32(dst, src);
2027 EmitUint8(0x0F);
2028 EmitUint8(0xFE);
2029 EmitXmmRegisterOperand(dst.LowBits(), src);
2030 }
2031
vpaddd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2032 void X86_64Assembler::vpaddd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2033 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2034 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2035 bool is_twobyte_form = false;
2036 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2037 if (!add_right.NeedsRex()) {
2038 is_twobyte_form = true;
2039 }
2040 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2041 X86_64ManagedRegister vvvv_reg =
2042 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2043 if (is_twobyte_form) {
2044 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2045 } else {
2046 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2047 /*X=*/ false,
2048 add_right.NeedsRex(),
2049 SET_VEX_M_0F);
2050 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2051 }
2052 EmitUint8(ByteZero);
2053 EmitUint8(ByteOne);
2054 if (!is_twobyte_form) {
2055 EmitUint8(ByteTwo);
2056 }
2057 EmitUint8(0xFE);
2058 EmitXmmRegisterOperand(dst.LowBits(), add_right);
2059 }
2060
psubd(XmmRegister dst,XmmRegister src)2061 void X86_64Assembler::psubd(XmmRegister dst, XmmRegister src) {
2062 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2063 EmitUint8(0x66);
2064 EmitOptionalRex32(dst, src);
2065 EmitUint8(0x0F);
2066 EmitUint8(0xFA);
2067 EmitXmmRegisterOperand(dst.LowBits(), src);
2068 }
2069
2070
pmulld(XmmRegister dst,XmmRegister src)2071 void X86_64Assembler::pmulld(XmmRegister dst, XmmRegister src) {
2072 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2073 EmitUint8(0x66);
2074 EmitOptionalRex32(dst, src);
2075 EmitUint8(0x0F);
2076 EmitUint8(0x38);
2077 EmitUint8(0x40);
2078 EmitXmmRegisterOperand(dst.LowBits(), src);
2079 }
2080
vpmulld(XmmRegister dst,XmmRegister src1,XmmRegister src2)2081 void X86_64Assembler::vpmulld(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2082 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2083 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2084 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2085 ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form*/ false);
2086 X86_64ManagedRegister vvvv_reg =
2087 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2088 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2089 /*X=*/ false,
2090 src2.NeedsRex(),
2091 SET_VEX_M_0F_38);
2092 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2093 EmitUint8(ByteZero);
2094 EmitUint8(ByteOne);
2095 EmitUint8(ByteTwo);
2096 EmitUint8(0x40);
2097 EmitXmmRegisterOperand(dst.LowBits(), src2);
2098 }
2099
paddq(XmmRegister dst,XmmRegister src)2100 void X86_64Assembler::paddq(XmmRegister dst, XmmRegister src) {
2101 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2102 EmitUint8(0x66);
2103 EmitOptionalRex32(dst, src);
2104 EmitUint8(0x0F);
2105 EmitUint8(0xD4);
2106 EmitXmmRegisterOperand(dst.LowBits(), src);
2107 }
2108
2109
vpaddq(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2110 void X86_64Assembler::vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2111 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2112 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2113 bool is_twobyte_form = false;
2114 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2115 if (!add_right.NeedsRex()) {
2116 is_twobyte_form = true;
2117 }
2118 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2119 X86_64ManagedRegister vvvv_reg =
2120 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2121 if (is_twobyte_form) {
2122 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2123 } else {
2124 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2125 /*X=*/ false,
2126 add_right.NeedsRex(),
2127 SET_VEX_M_0F);
2128 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2129 }
2130 EmitUint8(ByteZero);
2131 EmitUint8(ByteOne);
2132 if (!is_twobyte_form) {
2133 EmitUint8(ByteTwo);
2134 }
2135 EmitUint8(0xD4);
2136 EmitXmmRegisterOperand(dst.LowBits(), add_right);
2137 }
2138
2139
psubq(XmmRegister dst,XmmRegister src)2140 void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) {
2141 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2142 EmitUint8(0x66);
2143 EmitOptionalRex32(dst, src);
2144 EmitUint8(0x0F);
2145 EmitUint8(0xFB);
2146 EmitXmmRegisterOperand(dst.LowBits(), src);
2147 }
2148
vpsubq(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2149 void X86_64Assembler::vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2150 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2151 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2152 bool is_twobyte_form = false;
2153 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2154 if (!add_right.NeedsRex()) {
2155 is_twobyte_form = true;
2156 }
2157 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2158 X86_64ManagedRegister vvvv_reg =
2159 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2160 if (is_twobyte_form) {
2161 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2162 } else {
2163 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2164 /*X=*/ false,
2165 add_right.NeedsRex(),
2166 SET_VEX_M_0F);
2167 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2168 }
2169 EmitUint8(ByteZero);
2170 EmitUint8(ByteOne);
2171 if (!is_twobyte_form) {
2172 EmitUint8(ByteTwo);
2173 }
2174 EmitUint8(0xFB);
2175 EmitXmmRegisterOperand(dst.LowBits(), add_right);
2176 }
2177
2178
paddusb(XmmRegister dst,XmmRegister src)2179 void X86_64Assembler::paddusb(XmmRegister dst, XmmRegister src) {
2180 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2181 EmitUint8(0x66);
2182 EmitOptionalRex32(dst, src);
2183 EmitUint8(0x0F);
2184 EmitUint8(0xDC);
2185 EmitXmmRegisterOperand(dst.LowBits(), src);
2186 }
2187
2188
paddsb(XmmRegister dst,XmmRegister src)2189 void X86_64Assembler::paddsb(XmmRegister dst, XmmRegister src) {
2190 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2191 EmitUint8(0x66);
2192 EmitOptionalRex32(dst, src);
2193 EmitUint8(0x0F);
2194 EmitUint8(0xEC);
2195 EmitXmmRegisterOperand(dst.LowBits(), src);
2196 }
2197
2198
paddusw(XmmRegister dst,XmmRegister src)2199 void X86_64Assembler::paddusw(XmmRegister dst, XmmRegister src) {
2200 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2201 EmitUint8(0x66);
2202 EmitOptionalRex32(dst, src);
2203 EmitUint8(0x0F);
2204 EmitUint8(0xDD);
2205 EmitXmmRegisterOperand(dst.LowBits(), src);
2206 }
2207
2208
paddsw(XmmRegister dst,XmmRegister src)2209 void X86_64Assembler::paddsw(XmmRegister dst, XmmRegister src) {
2210 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2211 EmitUint8(0x66);
2212 EmitOptionalRex32(dst, src);
2213 EmitUint8(0x0F);
2214 EmitUint8(0xED);
2215 EmitXmmRegisterOperand(dst.LowBits(), src);
2216 }
2217
2218
psubusb(XmmRegister dst,XmmRegister src)2219 void X86_64Assembler::psubusb(XmmRegister dst, XmmRegister src) {
2220 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2221 EmitUint8(0x66);
2222 EmitOptionalRex32(dst, src);
2223 EmitUint8(0x0F);
2224 EmitUint8(0xD8);
2225 EmitXmmRegisterOperand(dst.LowBits(), src);
2226 }
2227
2228
psubsb(XmmRegister dst,XmmRegister src)2229 void X86_64Assembler::psubsb(XmmRegister dst, XmmRegister src) {
2230 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2231 EmitUint8(0x66);
2232 EmitOptionalRex32(dst, src);
2233 EmitUint8(0x0F);
2234 EmitUint8(0xE8);
2235 EmitXmmRegisterOperand(dst.LowBits(), src);
2236 }
2237
2238
vpsubd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2239 void X86_64Assembler::vpsubd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2240 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2241 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2242 bool is_twobyte_form = false;
2243 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2244 if (!add_right.NeedsRex()) {
2245 is_twobyte_form = true;
2246 }
2247 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2248 X86_64ManagedRegister vvvv_reg =
2249 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2250 if (is_twobyte_form) {
2251 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2252 } else {
2253 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2254 /*X=*/ false,
2255 add_right.NeedsRex(),
2256 SET_VEX_M_0F);
2257 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2258 }
2259 EmitUint8(ByteZero);
2260 EmitUint8(ByteOne);
2261 if (!is_twobyte_form) {
2262 EmitUint8(ByteTwo);
2263 }
2264 EmitUint8(0xFA);
2265 EmitXmmRegisterOperand(dst.LowBits(), add_right);
2266 }
2267
2268
psubusw(XmmRegister dst,XmmRegister src)2269 void X86_64Assembler::psubusw(XmmRegister dst, XmmRegister src) {
2270 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2271 EmitUint8(0x66);
2272 EmitOptionalRex32(dst, src);
2273 EmitUint8(0x0F);
2274 EmitUint8(0xD9);
2275 EmitXmmRegisterOperand(dst.LowBits(), src);
2276 }
2277
2278
psubsw(XmmRegister dst,XmmRegister src)2279 void X86_64Assembler::psubsw(XmmRegister dst, XmmRegister src) {
2280 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2281 EmitUint8(0x66);
2282 EmitOptionalRex32(dst, src);
2283 EmitUint8(0x0F);
2284 EmitUint8(0xE9);
2285 EmitXmmRegisterOperand(dst.LowBits(), src);
2286 }
2287
2288
cvtsi2ss(XmmRegister dst,CpuRegister src)2289 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) {
2290 cvtsi2ss(dst, src, false);
2291 }
2292
2293
cvtsi2ss(XmmRegister dst,CpuRegister src,bool is64bit)2294 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit) {
2295 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2296 EmitUint8(0xF3);
2297 if (is64bit) {
2298 // Emit a REX.W prefix if the operand size is 64 bits.
2299 EmitRex64(dst, src);
2300 } else {
2301 EmitOptionalRex32(dst, src);
2302 }
2303 EmitUint8(0x0F);
2304 EmitUint8(0x2A);
2305 EmitOperand(dst.LowBits(), Operand(src));
2306 }
2307
2308
cvtsi2ss(XmmRegister dst,const Address & src,bool is64bit)2309 void X86_64Assembler::cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit) {
2310 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2311 EmitUint8(0xF3);
2312 if (is64bit) {
2313 // Emit a REX.W prefix if the operand size is 64 bits.
2314 EmitRex64(dst, src);
2315 } else {
2316 EmitOptionalRex32(dst, src);
2317 }
2318 EmitUint8(0x0F);
2319 EmitUint8(0x2A);
2320 EmitOperand(dst.LowBits(), src);
2321 }
2322
2323
cvtsi2sd(XmmRegister dst,CpuRegister src)2324 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src) {
2325 cvtsi2sd(dst, src, false);
2326 }
2327
2328
cvtsi2sd(XmmRegister dst,CpuRegister src,bool is64bit)2329 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit) {
2330 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2331 EmitUint8(0xF2);
2332 if (is64bit) {
2333 // Emit a REX.W prefix if the operand size is 64 bits.
2334 EmitRex64(dst, src);
2335 } else {
2336 EmitOptionalRex32(dst, src);
2337 }
2338 EmitUint8(0x0F);
2339 EmitUint8(0x2A);
2340 EmitOperand(dst.LowBits(), Operand(src));
2341 }
2342
2343
cvtsi2sd(XmmRegister dst,const Address & src,bool is64bit)2344 void X86_64Assembler::cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit) {
2345 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2346 EmitUint8(0xF2);
2347 if (is64bit) {
2348 // Emit a REX.W prefix if the operand size is 64 bits.
2349 EmitRex64(dst, src);
2350 } else {
2351 EmitOptionalRex32(dst, src);
2352 }
2353 EmitUint8(0x0F);
2354 EmitUint8(0x2A);
2355 EmitOperand(dst.LowBits(), src);
2356 }
2357
2358
cvtss2si(CpuRegister dst,XmmRegister src)2359 void X86_64Assembler::cvtss2si(CpuRegister dst, XmmRegister src) {
2360 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2361 EmitUint8(0xF3);
2362 EmitOptionalRex32(dst, src);
2363 EmitUint8(0x0F);
2364 EmitUint8(0x2D);
2365 EmitXmmRegisterOperand(dst.LowBits(), src);
2366 }
2367
2368
cvtss2sd(XmmRegister dst,XmmRegister src)2369 void X86_64Assembler::cvtss2sd(XmmRegister dst, XmmRegister src) {
2370 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2371 EmitUint8(0xF3);
2372 EmitOptionalRex32(dst, src);
2373 EmitUint8(0x0F);
2374 EmitUint8(0x5A);
2375 EmitXmmRegisterOperand(dst.LowBits(), src);
2376 }
2377
2378
cvtss2sd(XmmRegister dst,const Address & src)2379 void X86_64Assembler::cvtss2sd(XmmRegister dst, const Address& src) {
2380 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2381 EmitUint8(0xF3);
2382 EmitOptionalRex32(dst, src);
2383 EmitUint8(0x0F);
2384 EmitUint8(0x5A);
2385 EmitOperand(dst.LowBits(), src);
2386 }
2387
2388
cvtsd2si(CpuRegister dst,XmmRegister src)2389 void X86_64Assembler::cvtsd2si(CpuRegister dst, XmmRegister src) {
2390 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2391 EmitUint8(0xF2);
2392 EmitOptionalRex32(dst, src);
2393 EmitUint8(0x0F);
2394 EmitUint8(0x2D);
2395 EmitXmmRegisterOperand(dst.LowBits(), src);
2396 }
2397
2398
cvttss2si(CpuRegister dst,XmmRegister src)2399 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src) {
2400 cvttss2si(dst, src, false);
2401 }
2402
2403
cvttss2si(CpuRegister dst,XmmRegister src,bool is64bit)2404 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit) {
2405 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2406 EmitUint8(0xF3);
2407 if (is64bit) {
2408 // Emit a REX.W prefix if the operand size is 64 bits.
2409 EmitRex64(dst, src);
2410 } else {
2411 EmitOptionalRex32(dst, src);
2412 }
2413 EmitUint8(0x0F);
2414 EmitUint8(0x2C);
2415 EmitXmmRegisterOperand(dst.LowBits(), src);
2416 }
2417
2418
cvttsd2si(CpuRegister dst,XmmRegister src)2419 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src) {
2420 cvttsd2si(dst, src, false);
2421 }
2422
2423
cvttsd2si(CpuRegister dst,XmmRegister src,bool is64bit)2424 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit) {
2425 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2426 EmitUint8(0xF2);
2427 if (is64bit) {
2428 // Emit a REX.W prefix if the operand size is 64 bits.
2429 EmitRex64(dst, src);
2430 } else {
2431 EmitOptionalRex32(dst, src);
2432 }
2433 EmitUint8(0x0F);
2434 EmitUint8(0x2C);
2435 EmitXmmRegisterOperand(dst.LowBits(), src);
2436 }
2437
2438
cvtsd2ss(XmmRegister dst,XmmRegister src)2439 void X86_64Assembler::cvtsd2ss(XmmRegister dst, XmmRegister src) {
2440 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2441 EmitUint8(0xF2);
2442 EmitOptionalRex32(dst, src);
2443 EmitUint8(0x0F);
2444 EmitUint8(0x5A);
2445 EmitXmmRegisterOperand(dst.LowBits(), src);
2446 }
2447
2448
cvtsd2ss(XmmRegister dst,const Address & src)2449 void X86_64Assembler::cvtsd2ss(XmmRegister dst, const Address& src) {
2450 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2451 EmitUint8(0xF2);
2452 EmitOptionalRex32(dst, src);
2453 EmitUint8(0x0F);
2454 EmitUint8(0x5A);
2455 EmitOperand(dst.LowBits(), src);
2456 }
2457
2458
cvtdq2ps(XmmRegister dst,XmmRegister src)2459 void X86_64Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) {
2460 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2461 EmitOptionalRex32(dst, src);
2462 EmitUint8(0x0F);
2463 EmitUint8(0x5B);
2464 EmitXmmRegisterOperand(dst.LowBits(), src);
2465 }
2466
2467
cvtdq2pd(XmmRegister dst,XmmRegister src)2468 void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
2469 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2470 EmitUint8(0xF3);
2471 EmitOptionalRex32(dst, src);
2472 EmitUint8(0x0F);
2473 EmitUint8(0xE6);
2474 EmitXmmRegisterOperand(dst.LowBits(), src);
2475 }
2476
2477
comiss(XmmRegister a,XmmRegister b)2478 void X86_64Assembler::comiss(XmmRegister a, XmmRegister b) {
2479 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2480 EmitOptionalRex32(a, b);
2481 EmitUint8(0x0F);
2482 EmitUint8(0x2F);
2483 EmitXmmRegisterOperand(a.LowBits(), b);
2484 }
2485
2486
comiss(XmmRegister a,const Address & b)2487 void X86_64Assembler::comiss(XmmRegister a, const Address& b) {
2488 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2489 EmitOptionalRex32(a, b);
2490 EmitUint8(0x0F);
2491 EmitUint8(0x2F);
2492 EmitOperand(a.LowBits(), b);
2493 }
2494
2495
comisd(XmmRegister a,XmmRegister b)2496 void X86_64Assembler::comisd(XmmRegister a, XmmRegister b) {
2497 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2498 EmitUint8(0x66);
2499 EmitOptionalRex32(a, b);
2500 EmitUint8(0x0F);
2501 EmitUint8(0x2F);
2502 EmitXmmRegisterOperand(a.LowBits(), b);
2503 }
2504
2505
comisd(XmmRegister a,const Address & b)2506 void X86_64Assembler::comisd(XmmRegister a, const Address& b) {
2507 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2508 EmitUint8(0x66);
2509 EmitOptionalRex32(a, b);
2510 EmitUint8(0x0F);
2511 EmitUint8(0x2F);
2512 EmitOperand(a.LowBits(), b);
2513 }
2514
2515
ucomiss(XmmRegister a,XmmRegister b)2516 void X86_64Assembler::ucomiss(XmmRegister a, XmmRegister b) {
2517 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2518 EmitOptionalRex32(a, b);
2519 EmitUint8(0x0F);
2520 EmitUint8(0x2E);
2521 EmitXmmRegisterOperand(a.LowBits(), b);
2522 }
2523
2524
ucomiss(XmmRegister a,const Address & b)2525 void X86_64Assembler::ucomiss(XmmRegister a, const Address& b) {
2526 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2527 EmitOptionalRex32(a, b);
2528 EmitUint8(0x0F);
2529 EmitUint8(0x2E);
2530 EmitOperand(a.LowBits(), b);
2531 }
2532
2533
ucomisd(XmmRegister a,XmmRegister b)2534 void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) {
2535 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2536 EmitUint8(0x66);
2537 EmitOptionalRex32(a, b);
2538 EmitUint8(0x0F);
2539 EmitUint8(0x2E);
2540 EmitXmmRegisterOperand(a.LowBits(), b);
2541 }
2542
2543
ucomisd(XmmRegister a,const Address & b)2544 void X86_64Assembler::ucomisd(XmmRegister a, const Address& b) {
2545 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2546 EmitUint8(0x66);
2547 EmitOptionalRex32(a, b);
2548 EmitUint8(0x0F);
2549 EmitUint8(0x2E);
2550 EmitOperand(a.LowBits(), b);
2551 }
2552
2553
roundsd(XmmRegister dst,XmmRegister src,const Immediate & imm)2554 void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2555 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2556 EmitUint8(0x66);
2557 EmitOptionalRex32(dst, src);
2558 EmitUint8(0x0F);
2559 EmitUint8(0x3A);
2560 EmitUint8(0x0B);
2561 EmitXmmRegisterOperand(dst.LowBits(), src);
2562 EmitUint8(imm.value());
2563 }
2564
2565
roundss(XmmRegister dst,XmmRegister src,const Immediate & imm)2566 void X86_64Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2567 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2568 EmitUint8(0x66);
2569 EmitOptionalRex32(dst, src);
2570 EmitUint8(0x0F);
2571 EmitUint8(0x3A);
2572 EmitUint8(0x0A);
2573 EmitXmmRegisterOperand(dst.LowBits(), src);
2574 EmitUint8(imm.value());
2575 }
2576
2577
sqrtsd(XmmRegister dst,XmmRegister src)2578 void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
2579 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2580 EmitUint8(0xF2);
2581 EmitOptionalRex32(dst, src);
2582 EmitUint8(0x0F);
2583 EmitUint8(0x51);
2584 EmitXmmRegisterOperand(dst.LowBits(), src);
2585 }
2586
2587
sqrtss(XmmRegister dst,XmmRegister src)2588 void X86_64Assembler::sqrtss(XmmRegister dst, XmmRegister src) {
2589 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2590 EmitUint8(0xF3);
2591 EmitOptionalRex32(dst, src);
2592 EmitUint8(0x0F);
2593 EmitUint8(0x51);
2594 EmitXmmRegisterOperand(dst.LowBits(), src);
2595 }
2596
2597
xorpd(XmmRegister dst,const Address & src)2598 void X86_64Assembler::xorpd(XmmRegister dst, const Address& src) {
2599 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2600 EmitUint8(0x66);
2601 EmitOptionalRex32(dst, src);
2602 EmitUint8(0x0F);
2603 EmitUint8(0x57);
2604 EmitOperand(dst.LowBits(), src);
2605 }
2606
2607
xorpd(XmmRegister dst,XmmRegister src)2608 void X86_64Assembler::xorpd(XmmRegister dst, XmmRegister src) {
2609 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2610 EmitUint8(0x66);
2611 EmitOptionalRex32(dst, src);
2612 EmitUint8(0x0F);
2613 EmitUint8(0x57);
2614 EmitXmmRegisterOperand(dst.LowBits(), src);
2615 }
2616
2617
xorps(XmmRegister dst,const Address & src)2618 void X86_64Assembler::xorps(XmmRegister dst, const Address& src) {
2619 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2620 EmitOptionalRex32(dst, src);
2621 EmitUint8(0x0F);
2622 EmitUint8(0x57);
2623 EmitOperand(dst.LowBits(), src);
2624 }
2625
2626
xorps(XmmRegister dst,XmmRegister src)2627 void X86_64Assembler::xorps(XmmRegister dst, XmmRegister src) {
2628 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2629 EmitOptionalRex32(dst, src);
2630 EmitUint8(0x0F);
2631 EmitUint8(0x57);
2632 EmitXmmRegisterOperand(dst.LowBits(), src);
2633 }
2634
pxor(XmmRegister dst,XmmRegister src)2635 void X86_64Assembler::pxor(XmmRegister dst, XmmRegister src) {
2636 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2637 EmitUint8(0x66);
2638 EmitOptionalRex32(dst, src);
2639 EmitUint8(0x0F);
2640 EmitUint8(0xEF);
2641 EmitXmmRegisterOperand(dst.LowBits(), src);
2642 }
2643
2644 /* VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 */
vpxor(XmmRegister dst,XmmRegister src1,XmmRegister src2)2645 void X86_64Assembler::vpxor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2646 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2647 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2648 bool is_twobyte_form = false;
2649 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2650 if (!src2.NeedsRex()) {
2651 is_twobyte_form = true;
2652 }
2653 X86_64ManagedRegister vvvv_reg =
2654 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2655 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2656 if (is_twobyte_form) {
2657 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2658 } else {
2659 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2660 /*X=*/ false,
2661 src2.NeedsRex(),
2662 SET_VEX_M_0F);
2663 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2664 }
2665 EmitUint8(ByteZero);
2666 EmitUint8(ByteOne);
2667 if (!is_twobyte_form) {
2668 EmitUint8(ByteTwo);
2669 }
2670 EmitUint8(0xEF);
2671 EmitXmmRegisterOperand(dst.LowBits(), src2);
2672 }
2673
2674 /* VEX.128.0F.WIG 57 /r VXORPS xmm1,xmm2, xmm3/m128 */
vxorps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2675 void X86_64Assembler::vxorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2676 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2677 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2678 bool is_twobyte_form = false;
2679 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2680 if (!src2.NeedsRex()) {
2681 is_twobyte_form = true;
2682 }
2683 X86_64ManagedRegister vvvv_reg =
2684 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2685 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2686 if (is_twobyte_form) {
2687 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2688 } else {
2689 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2690 /*X=*/ false,
2691 src2.NeedsRex(),
2692 SET_VEX_M_0F);
2693 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2694 }
2695 EmitUint8(ByteZero);
2696 EmitUint8(ByteOne);
2697 if (!is_twobyte_form) {
2698 EmitUint8(ByteTwo);
2699 }
2700 EmitUint8(0x57);
2701 EmitXmmRegisterOperand(dst.LowBits(), src2);
2702 }
2703
2704 /* VEX.128.66.0F.WIG 57 /r VXORPD xmm1,xmm2, xmm3/m128 */
vxorpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2705 void X86_64Assembler::vxorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2706 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2707 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2708 bool is_twobyte_form = false;
2709 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2710 if (!src2.NeedsRex()) {
2711 is_twobyte_form = true;
2712 }
2713 X86_64ManagedRegister vvvv_reg =
2714 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2715 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2716 if (is_twobyte_form) {
2717 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2718 } else {
2719 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2720 /*X=*/ false,
2721 src2.NeedsRex(),
2722 SET_VEX_M_0F);
2723 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2724 }
2725 EmitUint8(ByteZero);
2726 EmitUint8(ByteOne);
2727 if (!is_twobyte_form) {
2728 EmitUint8(ByteTwo);
2729 }
2730 EmitUint8(0x57);
2731 EmitXmmRegisterOperand(dst.LowBits(), src2);
2732 }
2733
andpd(XmmRegister dst,const Address & src)2734 void X86_64Assembler::andpd(XmmRegister dst, const Address& src) {
2735 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2736 EmitUint8(0x66);
2737 EmitOptionalRex32(dst, src);
2738 EmitUint8(0x0F);
2739 EmitUint8(0x54);
2740 EmitOperand(dst.LowBits(), src);
2741 }
2742
andpd(XmmRegister dst,XmmRegister src)2743 void X86_64Assembler::andpd(XmmRegister dst, XmmRegister src) {
2744 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2745 EmitUint8(0x66);
2746 EmitOptionalRex32(dst, src);
2747 EmitUint8(0x0F);
2748 EmitUint8(0x54);
2749 EmitXmmRegisterOperand(dst.LowBits(), src);
2750 }
2751
andps(XmmRegister dst,XmmRegister src)2752 void X86_64Assembler::andps(XmmRegister dst, XmmRegister src) {
2753 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2754 EmitOptionalRex32(dst, src);
2755 EmitUint8(0x0F);
2756 EmitUint8(0x54);
2757 EmitXmmRegisterOperand(dst.LowBits(), src);
2758 }
2759
pand(XmmRegister dst,XmmRegister src)2760 void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) {
2761 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2762 EmitUint8(0x66);
2763 EmitOptionalRex32(dst, src);
2764 EmitUint8(0x0F);
2765 EmitUint8(0xDB);
2766 EmitXmmRegisterOperand(dst.LowBits(), src);
2767 }
2768
2769 /* VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 */
vpand(XmmRegister dst,XmmRegister src1,XmmRegister src2)2770 void X86_64Assembler::vpand(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2771 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2772 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2773 bool is_twobyte_form = false;
2774 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2775 if (!src2.NeedsRex()) {
2776 is_twobyte_form = true;
2777 }
2778 X86_64ManagedRegister vvvv_reg =
2779 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2780 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2781 if (is_twobyte_form) {
2782 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2783 } else {
2784 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2785 /*X=*/ false,
2786 src2.NeedsRex(),
2787 SET_VEX_M_0F);
2788 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2789 }
2790 EmitUint8(ByteZero);
2791 EmitUint8(ByteOne);
2792 if (!is_twobyte_form) {
2793 EmitUint8(ByteTwo);
2794 }
2795 EmitUint8(0xDB);
2796 EmitXmmRegisterOperand(dst.LowBits(), src2);
2797 }
2798
2799 /* VEX.128.0F 54 /r VANDPS xmm1,xmm2, xmm3/m128 */
vandps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2800 void X86_64Assembler::vandps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2801 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2802 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2803 bool is_twobyte_form = false;
2804 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2805 if (!src2.NeedsRex()) {
2806 is_twobyte_form = true;
2807 }
2808 X86_64ManagedRegister vvvv_reg =
2809 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2810 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2811 if (is_twobyte_form) {
2812 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2813 } else {
2814 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2815 /*X=*/ false,
2816 src2.NeedsRex(),
2817 SET_VEX_M_0F);
2818 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2819 }
2820 EmitUint8(ByteZero);
2821 EmitUint8(ByteOne);
2822 if (!is_twobyte_form) {
2823 EmitUint8(ByteTwo);
2824 }
2825 EmitUint8(0x54);
2826 EmitXmmRegisterOperand(dst.LowBits(), src2);
2827 }
2828
2829 /* VEX.128.66.0F 54 /r VANDPD xmm1, xmm2, xmm3/m128 */
vandpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2830 void X86_64Assembler::vandpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2831 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2832 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2833 bool is_twobyte_form = false;
2834 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2835 if (!src2.NeedsRex()) {
2836 is_twobyte_form = true;
2837 }
2838 X86_64ManagedRegister vvvv_reg =
2839 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2840 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2841 if (is_twobyte_form) {
2842 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2843 } else {
2844 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2845 /*X=*/ false,
2846 src2.NeedsRex(),
2847 SET_VEX_M_0F);
2848 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2849 }
2850 EmitUint8(ByteZero);
2851 EmitUint8(ByteOne);
2852 if (!is_twobyte_form) {
2853 EmitUint8(ByteTwo);
2854 }
2855 EmitUint8(0x54);
2856 EmitXmmRegisterOperand(dst.LowBits(), src2);
2857 }
2858
andn(CpuRegister dst,CpuRegister src1,CpuRegister src2)2859 void X86_64Assembler::andn(CpuRegister dst, CpuRegister src1, CpuRegister src2) {
2860 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2861 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
2862 uint8_t byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
2863 /*X=*/ false,
2864 src2.NeedsRex(),
2865 SET_VEX_M_0F_38);
2866 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
2867 X86_64ManagedRegister::FromCpuRegister(src1.AsRegister()),
2868 SET_VEX_L_128,
2869 SET_VEX_PP_NONE);
2870 EmitUint8(byte_zero);
2871 EmitUint8(byte_one);
2872 EmitUint8(byte_two);
2873 // Opcode field
2874 EmitUint8(0xF2);
2875 EmitRegisterOperand(dst.LowBits(), src2.LowBits());
2876 }
2877
andnpd(XmmRegister dst,XmmRegister src)2878 void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) {
2879 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2880 EmitUint8(0x66);
2881 EmitOptionalRex32(dst, src);
2882 EmitUint8(0x0F);
2883 EmitUint8(0x55);
2884 EmitXmmRegisterOperand(dst.LowBits(), src);
2885 }
2886
andnps(XmmRegister dst,XmmRegister src)2887 void X86_64Assembler::andnps(XmmRegister dst, XmmRegister src) {
2888 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2889 EmitOptionalRex32(dst, src);
2890 EmitUint8(0x0F);
2891 EmitUint8(0x55);
2892 EmitXmmRegisterOperand(dst.LowBits(), src);
2893 }
2894
pandn(XmmRegister dst,XmmRegister src)2895 void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) {
2896 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2897 EmitUint8(0x66);
2898 EmitOptionalRex32(dst, src);
2899 EmitUint8(0x0F);
2900 EmitUint8(0xDF);
2901 EmitXmmRegisterOperand(dst.LowBits(), src);
2902 }
2903
2904 /* VEX.128.66.0F.WIG DF /r VPANDN xmm1, xmm2, xmm3/m128 */
vpandn(XmmRegister dst,XmmRegister src1,XmmRegister src2)2905 void X86_64Assembler::vpandn(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2906 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2907 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2908 bool is_twobyte_form = false;
2909 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2910 if (!src2.NeedsRex()) {
2911 is_twobyte_form = true;
2912 }
2913 X86_64ManagedRegister vvvv_reg =
2914 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2915 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2916 if (is_twobyte_form) {
2917 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2918 } else {
2919 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2920 /*X=*/ false,
2921 src2.NeedsRex(),
2922 SET_VEX_M_0F);
2923 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2924 }
2925 EmitUint8(ByteZero);
2926 EmitUint8(ByteOne);
2927 if (!is_twobyte_form) {
2928 EmitUint8(ByteTwo);
2929 }
2930 EmitUint8(0xDF);
2931 EmitXmmRegisterOperand(dst.LowBits(), src2);
2932 }
2933
2934 /* VEX.128.0F 55 /r VANDNPS xmm1, xmm2, xmm3/m128 */
vandnps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2935 void X86_64Assembler::vandnps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2936 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2937 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2938 bool is_twobyte_form = false;
2939 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2940 if (!src2.NeedsRex()) {
2941 is_twobyte_form = true;
2942 }
2943 X86_64ManagedRegister vvvv_reg =
2944 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2945 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2946 if (is_twobyte_form) {
2947 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2948 } else {
2949 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2950 /*X=*/ false,
2951 src2.NeedsRex(),
2952 SET_VEX_M_0F);
2953 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2954 }
2955 EmitUint8(ByteZero);
2956 EmitUint8(ByteOne);
2957 if (!is_twobyte_form) {
2958 EmitUint8(ByteTwo);
2959 }
2960 EmitUint8(0x55);
2961 EmitXmmRegisterOperand(dst.LowBits(), src2);
2962 }
2963
2964 /* VEX.128.66.0F 55 /r VANDNPD xmm1, xmm2, xmm3/m128 */
vandnpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2965 void X86_64Assembler::vandnpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2966 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2967 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2968 bool is_twobyte_form = false;
2969 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2970 if (!src2.NeedsRex()) {
2971 is_twobyte_form = true;
2972 }
2973 X86_64ManagedRegister vvvv_reg =
2974 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2975 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2976 if (is_twobyte_form) {
2977 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2978 } else {
2979 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2980 /*X=*/ false,
2981 src2.NeedsRex(),
2982 SET_VEX_M_0F);
2983 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2984 }
2985 EmitUint8(ByteZero);
2986 EmitUint8(ByteOne);
2987 if (!is_twobyte_form) {
2988 EmitUint8(ByteTwo);
2989 }
2990 EmitUint8(0x55);
2991 EmitXmmRegisterOperand(dst.LowBits(), src2);
2992 }
2993
orpd(XmmRegister dst,XmmRegister src)2994 void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) {
2995 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2996 EmitUint8(0x66);
2997 EmitOptionalRex32(dst, src);
2998 EmitUint8(0x0F);
2999 EmitUint8(0x56);
3000 EmitXmmRegisterOperand(dst.LowBits(), src);
3001 }
3002
orps(XmmRegister dst,XmmRegister src)3003 void X86_64Assembler::orps(XmmRegister dst, XmmRegister src) {
3004 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3005 EmitOptionalRex32(dst, src);
3006 EmitUint8(0x0F);
3007 EmitUint8(0x56);
3008 EmitXmmRegisterOperand(dst.LowBits(), src);
3009 }
3010
por(XmmRegister dst,XmmRegister src)3011 void X86_64Assembler::por(XmmRegister dst, XmmRegister src) {
3012 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3013 EmitUint8(0x66);
3014 EmitOptionalRex32(dst, src);
3015 EmitUint8(0x0F);
3016 EmitUint8(0xEB);
3017 EmitXmmRegisterOperand(dst.LowBits(), src);
3018 }
3019
3020 /* VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 */
vpor(XmmRegister dst,XmmRegister src1,XmmRegister src2)3021 void X86_64Assembler::vpor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3022 DCHECK(CpuHasAVXorAVX2FeatureFlag());
3023 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3024 bool is_twobyte_form = false;
3025 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3026 if (!src2.NeedsRex()) {
3027 is_twobyte_form = true;
3028 }
3029 X86_64ManagedRegister vvvv_reg =
3030 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3031 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3032 if (is_twobyte_form) {
3033 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3034 } else {
3035 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3036 /*X=*/ false,
3037 src2.NeedsRex(),
3038 SET_VEX_M_0F);
3039 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3040 }
3041 EmitUint8(ByteZero);
3042 EmitUint8(ByteOne);
3043 if (!is_twobyte_form) {
3044 EmitUint8(ByteTwo);
3045 }
3046 EmitUint8(0xEB);
3047 EmitXmmRegisterOperand(dst.LowBits(), src2);
3048 }
3049
3050 /* VEX.128.0F 56 /r VORPS xmm1,xmm2, xmm3/m128 */
vorps(XmmRegister dst,XmmRegister src1,XmmRegister src2)3051 void X86_64Assembler::vorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3052 DCHECK(CpuHasAVXorAVX2FeatureFlag());
3053 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3054 bool is_twobyte_form = false;
3055 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3056 if (!src2.NeedsRex()) {
3057 is_twobyte_form = true;
3058 }
3059 X86_64ManagedRegister vvvv_reg =
3060 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3061 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3062 if (is_twobyte_form) {
3063 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3064 } else {
3065 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3066 /*X=*/ false,
3067 src2.NeedsRex(),
3068 SET_VEX_M_0F);
3069 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3070 }
3071 EmitUint8(ByteZero);
3072 EmitUint8(ByteOne);
3073 if (!is_twobyte_form) {
3074 EmitUint8(ByteTwo);
3075 }
3076 EmitUint8(0x56);
3077 EmitXmmRegisterOperand(dst.LowBits(), src2);
3078 }
3079
3080 /* VEX.128.66.0F 56 /r VORPD xmm1,xmm2, xmm3/m128 */
vorpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)3081 void X86_64Assembler::vorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3082 DCHECK(CpuHasAVXorAVX2FeatureFlag());
3083 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3084 bool is_twobyte_form = false;
3085 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3086 if (!src2.NeedsRex()) {
3087 is_twobyte_form = true;
3088 }
3089 X86_64ManagedRegister vvvv_reg =
3090 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3091 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3092 if (is_twobyte_form) {
3093 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3094 } else {
3095 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3096 /*X=*/ false,
3097 src2.NeedsRex(),
3098 SET_VEX_M_0F);
3099 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3100 }
3101 EmitUint8(ByteZero);
3102 EmitUint8(ByteOne);
3103 if (!is_twobyte_form) {
3104 EmitUint8(ByteTwo);
3105 }
3106 EmitUint8(0x56);
3107 EmitXmmRegisterOperand(dst.LowBits(), src2);
3108 }
3109
pavgb(XmmRegister dst,XmmRegister src)3110 void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) {
3111 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3112 EmitUint8(0x66);
3113 EmitOptionalRex32(dst, src);
3114 EmitUint8(0x0F);
3115 EmitUint8(0xE0);
3116 EmitXmmRegisterOperand(dst.LowBits(), src);
3117 }
3118
pavgw(XmmRegister dst,XmmRegister src)3119 void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) {
3120 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3121 EmitUint8(0x66);
3122 EmitOptionalRex32(dst, src);
3123 EmitUint8(0x0F);
3124 EmitUint8(0xE3);
3125 EmitXmmRegisterOperand(dst.LowBits(), src);
3126 }
3127
psadbw(XmmRegister dst,XmmRegister src)3128 void X86_64Assembler::psadbw(XmmRegister dst, XmmRegister src) {
3129 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3130 EmitUint8(0x66);
3131 EmitOptionalRex32(dst, src);
3132 EmitUint8(0x0F);
3133 EmitUint8(0xF6);
3134 EmitXmmRegisterOperand(dst.LowBits(), src);
3135 }
3136
pmaddwd(XmmRegister dst,XmmRegister src)3137 void X86_64Assembler::pmaddwd(XmmRegister dst, XmmRegister src) {
3138 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3139 EmitUint8(0x66);
3140 EmitOptionalRex32(dst, src);
3141 EmitUint8(0x0F);
3142 EmitUint8(0xF5);
3143 EmitXmmRegisterOperand(dst.LowBits(), src);
3144 }
3145
vpmaddwd(XmmRegister dst,XmmRegister src1,XmmRegister src2)3146 void X86_64Assembler::vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3147 DCHECK(CpuHasAVXorAVX2FeatureFlag());
3148 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3149 bool is_twobyte_form = false;
3150 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3151 if (!src2.NeedsRex()) {
3152 is_twobyte_form = true;
3153 }
3154 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3155 X86_64ManagedRegister vvvv_reg =
3156 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3157 if (is_twobyte_form) {
3158 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3159 } else {
3160 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3161 /*X=*/ false,
3162 src2.NeedsRex(),
3163 SET_VEX_M_0F);
3164 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3165 }
3166 EmitUint8(ByteZero);
3167 EmitUint8(ByteOne);
3168 if (!is_twobyte_form) {
3169 EmitUint8(ByteTwo);
3170 }
3171 EmitUint8(0xF5);
3172 EmitXmmRegisterOperand(dst.LowBits(), src2);
3173 }
3174
phaddw(XmmRegister dst,XmmRegister src)3175 void X86_64Assembler::phaddw(XmmRegister dst, XmmRegister src) {
3176 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3177 EmitUint8(0x66);
3178 EmitOptionalRex32(dst, src);
3179 EmitUint8(0x0F);
3180 EmitUint8(0x38);
3181 EmitUint8(0x01);
3182 EmitXmmRegisterOperand(dst.LowBits(), src);
3183 }
3184
phaddd(XmmRegister dst,XmmRegister src)3185 void X86_64Assembler::phaddd(XmmRegister dst, XmmRegister src) {
3186 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3187 EmitUint8(0x66);
3188 EmitOptionalRex32(dst, src);
3189 EmitUint8(0x0F);
3190 EmitUint8(0x38);
3191 EmitUint8(0x02);
3192 EmitXmmRegisterOperand(dst.LowBits(), src);
3193 }
3194
haddps(XmmRegister dst,XmmRegister src)3195 void X86_64Assembler::haddps(XmmRegister dst, XmmRegister src) {
3196 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3197 EmitUint8(0xF2);
3198 EmitOptionalRex32(dst, src);
3199 EmitUint8(0x0F);
3200 EmitUint8(0x7C);
3201 EmitXmmRegisterOperand(dst.LowBits(), src);
3202 }
3203
haddpd(XmmRegister dst,XmmRegister src)3204 void X86_64Assembler::haddpd(XmmRegister dst, XmmRegister src) {
3205 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3206 EmitUint8(0x66);
3207 EmitOptionalRex32(dst, src);
3208 EmitUint8(0x0F);
3209 EmitUint8(0x7C);
3210 EmitXmmRegisterOperand(dst.LowBits(), src);
3211 }
3212
phsubw(XmmRegister dst,XmmRegister src)3213 void X86_64Assembler::phsubw(XmmRegister dst, XmmRegister src) {
3214 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3215 EmitUint8(0x66);
3216 EmitOptionalRex32(dst, src);
3217 EmitUint8(0x0F);
3218 EmitUint8(0x38);
3219 EmitUint8(0x05);
3220 EmitXmmRegisterOperand(dst.LowBits(), src);
3221 }
3222
phsubd(XmmRegister dst,XmmRegister src)3223 void X86_64Assembler::phsubd(XmmRegister dst, XmmRegister src) {
3224 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3225 EmitUint8(0x66);
3226 EmitOptionalRex32(dst, src);
3227 EmitUint8(0x0F);
3228 EmitUint8(0x38);
3229 EmitUint8(0x06);
3230 EmitXmmRegisterOperand(dst.LowBits(), src);
3231 }
3232
hsubps(XmmRegister dst,XmmRegister src)3233 void X86_64Assembler::hsubps(XmmRegister dst, XmmRegister src) {
3234 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3235 EmitUint8(0xF2);
3236 EmitOptionalRex32(dst, src);
3237 EmitUint8(0x0F);
3238 EmitUint8(0x7D);
3239 EmitXmmRegisterOperand(dst.LowBits(), src);
3240 }
3241
hsubpd(XmmRegister dst,XmmRegister src)3242 void X86_64Assembler::hsubpd(XmmRegister dst, XmmRegister src) {
3243 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3244 EmitUint8(0x66);
3245 EmitOptionalRex32(dst, src);
3246 EmitUint8(0x0F);
3247 EmitUint8(0x7D);
3248 EmitXmmRegisterOperand(dst.LowBits(), src);
3249 }
3250
pminsb(XmmRegister dst,XmmRegister src)3251 void X86_64Assembler::pminsb(XmmRegister dst, XmmRegister src) {
3252 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3253 EmitUint8(0x66);
3254 EmitOptionalRex32(dst, src);
3255 EmitUint8(0x0F);
3256 EmitUint8(0x38);
3257 EmitUint8(0x38);
3258 EmitXmmRegisterOperand(dst.LowBits(), src);
3259 }
3260
pmaxsb(XmmRegister dst,XmmRegister src)3261 void X86_64Assembler::pmaxsb(XmmRegister dst, XmmRegister src) {
3262 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3263 EmitUint8(0x66);
3264 EmitOptionalRex32(dst, src);
3265 EmitUint8(0x0F);
3266 EmitUint8(0x38);
3267 EmitUint8(0x3C);
3268 EmitXmmRegisterOperand(dst.LowBits(), src);
3269 }
3270
pminsw(XmmRegister dst,XmmRegister src)3271 void X86_64Assembler::pminsw(XmmRegister dst, XmmRegister src) {
3272 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3273 EmitUint8(0x66);
3274 EmitOptionalRex32(dst, src);
3275 EmitUint8(0x0F);
3276 EmitUint8(0xEA);
3277 EmitXmmRegisterOperand(dst.LowBits(), src);
3278 }
3279
pmaxsw(XmmRegister dst,XmmRegister src)3280 void X86_64Assembler::pmaxsw(XmmRegister dst, XmmRegister src) {
3281 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3282 EmitUint8(0x66);
3283 EmitOptionalRex32(dst, src);
3284 EmitUint8(0x0F);
3285 EmitUint8(0xEE);
3286 EmitXmmRegisterOperand(dst.LowBits(), src);
3287 }
3288
pminsd(XmmRegister dst,XmmRegister src)3289 void X86_64Assembler::pminsd(XmmRegister dst, XmmRegister src) {
3290 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3291 EmitUint8(0x66);
3292 EmitOptionalRex32(dst, src);
3293 EmitUint8(0x0F);
3294 EmitUint8(0x38);
3295 EmitUint8(0x39);
3296 EmitXmmRegisterOperand(dst.LowBits(), src);
3297 }
3298
pmaxsd(XmmRegister dst,XmmRegister src)3299 void X86_64Assembler::pmaxsd(XmmRegister dst, XmmRegister src) {
3300 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3301 EmitUint8(0x66);
3302 EmitOptionalRex32(dst, src);
3303 EmitUint8(0x0F);
3304 EmitUint8(0x38);
3305 EmitUint8(0x3D);
3306 EmitXmmRegisterOperand(dst.LowBits(), src);
3307 }
3308
pminub(XmmRegister dst,XmmRegister src)3309 void X86_64Assembler::pminub(XmmRegister dst, XmmRegister src) {
3310 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3311 EmitUint8(0x66);
3312 EmitOptionalRex32(dst, src);
3313 EmitUint8(0x0F);
3314 EmitUint8(0xDA);
3315 EmitXmmRegisterOperand(dst.LowBits(), src);
3316 }
3317
pmaxub(XmmRegister dst,XmmRegister src)3318 void X86_64Assembler::pmaxub(XmmRegister dst, XmmRegister src) {
3319 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3320 EmitUint8(0x66);
3321 EmitOptionalRex32(dst, src);
3322 EmitUint8(0x0F);
3323 EmitUint8(0xDE);
3324 EmitXmmRegisterOperand(dst.LowBits(), src);
3325 }
3326
pminuw(XmmRegister dst,XmmRegister src)3327 void X86_64Assembler::pminuw(XmmRegister dst, XmmRegister src) {
3328 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3329 EmitUint8(0x66);
3330 EmitOptionalRex32(dst, src);
3331 EmitUint8(0x0F);
3332 EmitUint8(0x38);
3333 EmitUint8(0x3A);
3334 EmitXmmRegisterOperand(dst.LowBits(), src);
3335 }
3336
pmaxuw(XmmRegister dst,XmmRegister src)3337 void X86_64Assembler::pmaxuw(XmmRegister dst, XmmRegister src) {
3338 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3339 EmitUint8(0x66);
3340 EmitOptionalRex32(dst, src);
3341 EmitUint8(0x0F);
3342 EmitUint8(0x38);
3343 EmitUint8(0x3E);
3344 EmitXmmRegisterOperand(dst.LowBits(), src);
3345 }
3346
pminud(XmmRegister dst,XmmRegister src)3347 void X86_64Assembler::pminud(XmmRegister dst, XmmRegister src) {
3348 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3349 EmitUint8(0x66);
3350 EmitOptionalRex32(dst, src);
3351 EmitUint8(0x0F);
3352 EmitUint8(0x38);
3353 EmitUint8(0x3B);
3354 EmitXmmRegisterOperand(dst.LowBits(), src);
3355 }
3356
pmaxud(XmmRegister dst,XmmRegister src)3357 void X86_64Assembler::pmaxud(XmmRegister dst, XmmRegister src) {
3358 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3359 EmitUint8(0x66);
3360 EmitOptionalRex32(dst, src);
3361 EmitUint8(0x0F);
3362 EmitUint8(0x38);
3363 EmitUint8(0x3F);
3364 EmitXmmRegisterOperand(dst.LowBits(), src);
3365 }
3366
minps(XmmRegister dst,XmmRegister src)3367 void X86_64Assembler::minps(XmmRegister dst, XmmRegister src) {
3368 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3369 EmitOptionalRex32(dst, src);
3370 EmitUint8(0x0F);
3371 EmitUint8(0x5D);
3372 EmitXmmRegisterOperand(dst.LowBits(), src);
3373 }
3374
maxps(XmmRegister dst,XmmRegister src)3375 void X86_64Assembler::maxps(XmmRegister dst, XmmRegister src) {
3376 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3377 EmitOptionalRex32(dst, src);
3378 EmitUint8(0x0F);
3379 EmitUint8(0x5F);
3380 EmitXmmRegisterOperand(dst.LowBits(), src);
3381 }
3382
minpd(XmmRegister dst,XmmRegister src)3383 void X86_64Assembler::minpd(XmmRegister dst, XmmRegister src) {
3384 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3385 EmitUint8(0x66);
3386 EmitOptionalRex32(dst, src);
3387 EmitUint8(0x0F);
3388 EmitUint8(0x5D);
3389 EmitXmmRegisterOperand(dst.LowBits(), src);
3390 }
3391
maxpd(XmmRegister dst,XmmRegister src)3392 void X86_64Assembler::maxpd(XmmRegister dst, XmmRegister src) {
3393 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3394 EmitUint8(0x66);
3395 EmitOptionalRex32(dst, src);
3396 EmitUint8(0x0F);
3397 EmitUint8(0x5F);
3398 EmitXmmRegisterOperand(dst.LowBits(), src);
3399 }
3400
pcmpeqb(XmmRegister dst,XmmRegister src)3401 void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
3402 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3403 EmitUint8(0x66);
3404 EmitOptionalRex32(dst, src);
3405 EmitUint8(0x0F);
3406 EmitUint8(0x74);
3407 EmitXmmRegisterOperand(dst.LowBits(), src);
3408 }
3409
pcmpeqw(XmmRegister dst,XmmRegister src)3410 void X86_64Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) {
3411 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3412 EmitUint8(0x66);
3413 EmitOptionalRex32(dst, src);
3414 EmitUint8(0x0F);
3415 EmitUint8(0x75);
3416 EmitXmmRegisterOperand(dst.LowBits(), src);
3417 }
3418
pcmpeqd(XmmRegister dst,XmmRegister src)3419 void X86_64Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) {
3420 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3421 EmitUint8(0x66);
3422 EmitOptionalRex32(dst, src);
3423 EmitUint8(0x0F);
3424 EmitUint8(0x76);
3425 EmitXmmRegisterOperand(dst.LowBits(), src);
3426 }
3427
pcmpeqq(XmmRegister dst,XmmRegister src)3428 void X86_64Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) {
3429 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3430 EmitUint8(0x66);
3431 EmitOptionalRex32(dst, src);
3432 EmitUint8(0x0F);
3433 EmitUint8(0x38);
3434 EmitUint8(0x29);
3435 EmitXmmRegisterOperand(dst.LowBits(), src);
3436 }
3437
pcmpgtb(XmmRegister dst,XmmRegister src)3438 void X86_64Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) {
3439 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3440 EmitUint8(0x66);
3441 EmitOptionalRex32(dst, src);
3442 EmitUint8(0x0F);
3443 EmitUint8(0x64);
3444 EmitXmmRegisterOperand(dst.LowBits(), src);
3445 }
3446
pcmpgtw(XmmRegister dst,XmmRegister src)3447 void X86_64Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) {
3448 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3449 EmitUint8(0x66);
3450 EmitOptionalRex32(dst, src);
3451 EmitUint8(0x0F);
3452 EmitUint8(0x65);
3453 EmitXmmRegisterOperand(dst.LowBits(), src);
3454 }
3455
pcmpgtd(XmmRegister dst,XmmRegister src)3456 void X86_64Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) {
3457 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3458 EmitUint8(0x66);
3459 EmitOptionalRex32(dst, src);
3460 EmitUint8(0x0F);
3461 EmitUint8(0x66);
3462 EmitXmmRegisterOperand(dst.LowBits(), src);
3463 }
3464
pcmpgtq(XmmRegister dst,XmmRegister src)3465 void X86_64Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) {
3466 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3467 EmitUint8(0x66);
3468 EmitOptionalRex32(dst, src);
3469 EmitUint8(0x0F);
3470 EmitUint8(0x38);
3471 EmitUint8(0x37);
3472 EmitXmmRegisterOperand(dst.LowBits(), src);
3473 }
3474
shufpd(XmmRegister dst,XmmRegister src,const Immediate & imm)3475 void X86_64Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3476 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3477 EmitUint8(0x66);
3478 EmitOptionalRex32(dst, src);
3479 EmitUint8(0x0F);
3480 EmitUint8(0xC6);
3481 EmitXmmRegisterOperand(dst.LowBits(), src);
3482 EmitUint8(imm.value());
3483 }
3484
3485
shufps(XmmRegister dst,XmmRegister src,const Immediate & imm)3486 void X86_64Assembler::shufps(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3487 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3488 EmitOptionalRex32(dst, src);
3489 EmitUint8(0x0F);
3490 EmitUint8(0xC6);
3491 EmitXmmRegisterOperand(dst.LowBits(), src);
3492 EmitUint8(imm.value());
3493 }
3494
3495
pshufd(XmmRegister dst,XmmRegister src,const Immediate & imm)3496 void X86_64Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3497 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3498 EmitUint8(0x66);
3499 EmitOptionalRex32(dst, src);
3500 EmitUint8(0x0F);
3501 EmitUint8(0x70);
3502 EmitXmmRegisterOperand(dst.LowBits(), src);
3503 EmitUint8(imm.value());
3504 }
3505
3506
punpcklbw(XmmRegister dst,XmmRegister src)3507 void X86_64Assembler::punpcklbw(XmmRegister dst, XmmRegister src) {
3508 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3509 EmitUint8(0x66);
3510 EmitOptionalRex32(dst, src);
3511 EmitUint8(0x0F);
3512 EmitUint8(0x60);
3513 EmitXmmRegisterOperand(dst.LowBits(), src);
3514 }
3515
3516
punpcklwd(XmmRegister dst,XmmRegister src)3517 void X86_64Assembler::punpcklwd(XmmRegister dst, XmmRegister src) {
3518 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3519 EmitUint8(0x66);
3520 EmitOptionalRex32(dst, src);
3521 EmitUint8(0x0F);
3522 EmitUint8(0x61);
3523 EmitXmmRegisterOperand(dst.LowBits(), src);
3524 }
3525
3526
punpckldq(XmmRegister dst,XmmRegister src)3527 void X86_64Assembler::punpckldq(XmmRegister dst, XmmRegister src) {
3528 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3529 EmitUint8(0x66);
3530 EmitOptionalRex32(dst, src);
3531 EmitUint8(0x0F);
3532 EmitUint8(0x62);
3533 EmitXmmRegisterOperand(dst.LowBits(), src);
3534 }
3535
3536
punpcklqdq(XmmRegister dst,XmmRegister src)3537 void X86_64Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) {
3538 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3539 EmitUint8(0x66);
3540 EmitOptionalRex32(dst, src);
3541 EmitUint8(0x0F);
3542 EmitUint8(0x6C);
3543 EmitXmmRegisterOperand(dst.LowBits(), src);
3544 }
3545
3546
punpckhbw(XmmRegister dst,XmmRegister src)3547 void X86_64Assembler::punpckhbw(XmmRegister dst, XmmRegister src) {
3548 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3549 EmitUint8(0x66);
3550 EmitOptionalRex32(dst, src);
3551 EmitUint8(0x0F);
3552 EmitUint8(0x68);
3553 EmitXmmRegisterOperand(dst.LowBits(), src);
3554 }
3555
3556
punpckhwd(XmmRegister dst,XmmRegister src)3557 void X86_64Assembler::punpckhwd(XmmRegister dst, XmmRegister src) {
3558 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3559 EmitUint8(0x66);
3560 EmitOptionalRex32(dst, src);
3561 EmitUint8(0x0F);
3562 EmitUint8(0x69);
3563 EmitXmmRegisterOperand(dst.LowBits(), src);
3564 }
3565
3566
punpckhdq(XmmRegister dst,XmmRegister src)3567 void X86_64Assembler::punpckhdq(XmmRegister dst, XmmRegister src) {
3568 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3569 EmitUint8(0x66);
3570 EmitOptionalRex32(dst, src);
3571 EmitUint8(0x0F);
3572 EmitUint8(0x6A);
3573 EmitXmmRegisterOperand(dst.LowBits(), src);
3574 }
3575
3576
punpckhqdq(XmmRegister dst,XmmRegister src)3577 void X86_64Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) {
3578 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3579 EmitUint8(0x66);
3580 EmitOptionalRex32(dst, src);
3581 EmitUint8(0x0F);
3582 EmitUint8(0x6D);
3583 EmitXmmRegisterOperand(dst.LowBits(), src);
3584 }
3585
3586
psllw(XmmRegister reg,const Immediate & shift_count)3587 void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
3588 DCHECK(shift_count.is_uint8());
3589 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3590 EmitUint8(0x66);
3591 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3592 EmitUint8(0x0F);
3593 EmitUint8(0x71);
3594 EmitXmmRegisterOperand(6, reg);
3595 EmitUint8(shift_count.value());
3596 }
3597
3598
pslld(XmmRegister reg,const Immediate & shift_count)3599 void X86_64Assembler::pslld(XmmRegister reg, const Immediate& shift_count) {
3600 DCHECK(shift_count.is_uint8());
3601 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3602 EmitUint8(0x66);
3603 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3604 EmitUint8(0x0F);
3605 EmitUint8(0x72);
3606 EmitXmmRegisterOperand(6, reg);
3607 EmitUint8(shift_count.value());
3608 }
3609
3610
psllq(XmmRegister reg,const Immediate & shift_count)3611 void X86_64Assembler::psllq(XmmRegister reg, const Immediate& shift_count) {
3612 DCHECK(shift_count.is_uint8());
3613 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3614 EmitUint8(0x66);
3615 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3616 EmitUint8(0x0F);
3617 EmitUint8(0x73);
3618 EmitXmmRegisterOperand(6, reg);
3619 EmitUint8(shift_count.value());
3620 }
3621
3622
psraw(XmmRegister reg,const Immediate & shift_count)3623 void X86_64Assembler::psraw(XmmRegister reg, const Immediate& shift_count) {
3624 DCHECK(shift_count.is_uint8());
3625 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3626 EmitUint8(0x66);
3627 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3628 EmitUint8(0x0F);
3629 EmitUint8(0x71);
3630 EmitXmmRegisterOperand(4, reg);
3631 EmitUint8(shift_count.value());
3632 }
3633
3634
psrad(XmmRegister reg,const Immediate & shift_count)3635 void X86_64Assembler::psrad(XmmRegister reg, const Immediate& shift_count) {
3636 DCHECK(shift_count.is_uint8());
3637 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3638 EmitUint8(0x66);
3639 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3640 EmitUint8(0x0F);
3641 EmitUint8(0x72);
3642 EmitXmmRegisterOperand(4, reg);
3643 EmitUint8(shift_count.value());
3644 }
3645
3646
psrlw(XmmRegister reg,const Immediate & shift_count)3647 void X86_64Assembler::psrlw(XmmRegister reg, const Immediate& shift_count) {
3648 DCHECK(shift_count.is_uint8());
3649 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3650 EmitUint8(0x66);
3651 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3652 EmitUint8(0x0F);
3653 EmitUint8(0x71);
3654 EmitXmmRegisterOperand(2, reg);
3655 EmitUint8(shift_count.value());
3656 }
3657
3658
psrld(XmmRegister reg,const Immediate & shift_count)3659 void X86_64Assembler::psrld(XmmRegister reg, const Immediate& shift_count) {
3660 DCHECK(shift_count.is_uint8());
3661 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3662 EmitUint8(0x66);
3663 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3664 EmitUint8(0x0F);
3665 EmitUint8(0x72);
3666 EmitXmmRegisterOperand(2, reg);
3667 EmitUint8(shift_count.value());
3668 }
3669
3670
psrlq(XmmRegister reg,const Immediate & shift_count)3671 void X86_64Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) {
3672 DCHECK(shift_count.is_uint8());
3673 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3674 EmitUint8(0x66);
3675 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3676 EmitUint8(0x0F);
3677 EmitUint8(0x73);
3678 EmitXmmRegisterOperand(2, reg);
3679 EmitUint8(shift_count.value());
3680 }
3681
3682
psrldq(XmmRegister reg,const Immediate & shift_count)3683 void X86_64Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) {
3684 DCHECK(shift_count.is_uint8());
3685 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3686 EmitUint8(0x66);
3687 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3688 EmitUint8(0x0F);
3689 EmitUint8(0x73);
3690 EmitXmmRegisterOperand(3, reg);
3691 EmitUint8(shift_count.value());
3692 }
3693
3694
fldl(const Address & src)3695 void X86_64Assembler::fldl(const Address& src) {
3696 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3697 EmitUint8(0xDD);
3698 EmitOperand(0, src);
3699 }
3700
3701
fstl(const Address & dst)3702 void X86_64Assembler::fstl(const Address& dst) {
3703 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3704 EmitUint8(0xDD);
3705 EmitOperand(2, dst);
3706 }
3707
3708
fstpl(const Address & dst)3709 void X86_64Assembler::fstpl(const Address& dst) {
3710 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3711 EmitUint8(0xDD);
3712 EmitOperand(3, dst);
3713 }
3714
3715
fstsw()3716 void X86_64Assembler::fstsw() {
3717 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3718 EmitUint8(0x9B);
3719 EmitUint8(0xDF);
3720 EmitUint8(0xE0);
3721 }
3722
3723
fnstcw(const Address & dst)3724 void X86_64Assembler::fnstcw(const Address& dst) {
3725 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3726 EmitUint8(0xD9);
3727 EmitOperand(7, dst);
3728 }
3729
3730
fldcw(const Address & src)3731 void X86_64Assembler::fldcw(const Address& src) {
3732 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3733 EmitUint8(0xD9);
3734 EmitOperand(5, src);
3735 }
3736
3737
fistpl(const Address & dst)3738 void X86_64Assembler::fistpl(const Address& dst) {
3739 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3740 EmitUint8(0xDF);
3741 EmitOperand(7, dst);
3742 }
3743
3744
fistps(const Address & dst)3745 void X86_64Assembler::fistps(const Address& dst) {
3746 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3747 EmitUint8(0xDB);
3748 EmitOperand(3, dst);
3749 }
3750
3751
fildl(const Address & src)3752 void X86_64Assembler::fildl(const Address& src) {
3753 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3754 EmitUint8(0xDF);
3755 EmitOperand(5, src);
3756 }
3757
3758
filds(const Address & src)3759 void X86_64Assembler::filds(const Address& src) {
3760 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3761 EmitUint8(0xDB);
3762 EmitOperand(0, src);
3763 }
3764
3765
fincstp()3766 void X86_64Assembler::fincstp() {
3767 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3768 EmitUint8(0xD9);
3769 EmitUint8(0xF7);
3770 }
3771
3772
ffree(const Immediate & index)3773 void X86_64Assembler::ffree(const Immediate& index) {
3774 CHECK_LT(index.value(), 7);
3775 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3776 EmitUint8(0xDD);
3777 EmitUint8(0xC0 + index.value());
3778 }
3779
3780
fsin()3781 void X86_64Assembler::fsin() {
3782 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3783 EmitUint8(0xD9);
3784 EmitUint8(0xFE);
3785 }
3786
3787
fcos()3788 void X86_64Assembler::fcos() {
3789 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3790 EmitUint8(0xD9);
3791 EmitUint8(0xFF);
3792 }
3793
3794
fptan()3795 void X86_64Assembler::fptan() {
3796 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3797 EmitUint8(0xD9);
3798 EmitUint8(0xF2);
3799 }
3800
fucompp()3801 void X86_64Assembler::fucompp() {
3802 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3803 EmitUint8(0xDA);
3804 EmitUint8(0xE9);
3805 }
3806
3807
fprem()3808 void X86_64Assembler::fprem() {
3809 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3810 EmitUint8(0xD9);
3811 EmitUint8(0xF8);
3812 }
3813
3814
xchgl(CpuRegister dst,CpuRegister src)3815 void X86_64Assembler::xchgl(CpuRegister dst, CpuRegister src) {
3816 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3817 // There is a short version for rax.
3818 // It's a bit awkward, as CpuRegister has a const field, so assignment and thus swapping doesn't
3819 // work.
3820 const bool src_rax = src.AsRegister() == RAX;
3821 const bool dst_rax = dst.AsRegister() == RAX;
3822 if (src_rax || dst_rax) {
3823 EmitOptionalRex32(src_rax ? dst : src);
3824 EmitUint8(0x90 + (src_rax ? dst.LowBits() : src.LowBits()));
3825 return;
3826 }
3827
3828 // General case.
3829 EmitOptionalRex32(src, dst);
3830 EmitUint8(0x87);
3831 EmitRegisterOperand(src.LowBits(), dst.LowBits());
3832 }
3833
3834
xchgq(CpuRegister dst,CpuRegister src)3835 void X86_64Assembler::xchgq(CpuRegister dst, CpuRegister src) {
3836 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3837 // There is a short version for rax.
3838 // It's a bit awkward, as CpuRegister has a const field, so assignment and thus swapping doesn't
3839 // work.
3840 const bool src_rax = src.AsRegister() == RAX;
3841 const bool dst_rax = dst.AsRegister() == RAX;
3842 if (src_rax || dst_rax) {
3843 // If src == target, emit a nop instead.
3844 if (src_rax && dst_rax) {
3845 EmitUint8(0x90);
3846 } else {
3847 EmitRex64(src_rax ? dst : src);
3848 EmitUint8(0x90 + (src_rax ? dst.LowBits() : src.LowBits()));
3849 }
3850 return;
3851 }
3852
3853 // General case.
3854 EmitRex64(src, dst);
3855 EmitUint8(0x87);
3856 EmitRegisterOperand(src.LowBits(), dst.LowBits());
3857 }
3858
3859
xchgl(CpuRegister reg,const Address & address)3860 void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) {
3861 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3862 EmitOptionalRex32(reg, address);
3863 EmitUint8(0x87);
3864 EmitOperand(reg.LowBits(), address);
3865 }
3866
3867
cmpb(const Address & address,const Immediate & imm)3868 void X86_64Assembler::cmpb(const Address& address, const Immediate& imm) {
3869 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3870 CHECK(imm.is_int32());
3871 EmitOptionalRex32(address);
3872 EmitUint8(0x80);
3873 EmitOperand(7, address);
3874 EmitUint8(imm.value() & 0xFF);
3875 }
3876
3877
cmpw(const Address & address,const Immediate & imm)3878 void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) {
3879 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3880 CHECK(imm.is_int32());
3881 EmitOperandSizeOverride();
3882 EmitOptionalRex32(address);
3883 EmitComplex(7, address, imm, /* is_16_op= */ true);
3884 }
3885
3886
cmpl(CpuRegister reg,const Immediate & imm)3887 void X86_64Assembler::cmpl(CpuRegister reg, const Immediate& imm) {
3888 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3889 CHECK(imm.is_int32());
3890 EmitOptionalRex32(reg);
3891 EmitComplex(7, Operand(reg), imm);
3892 }
3893
3894
cmpl(CpuRegister reg0,CpuRegister reg1)3895 void X86_64Assembler::cmpl(CpuRegister reg0, CpuRegister reg1) {
3896 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3897 EmitOptionalRex32(reg0, reg1);
3898 EmitUint8(0x3B);
3899 EmitOperand(reg0.LowBits(), Operand(reg1));
3900 }
3901
3902
cmpl(CpuRegister reg,const Address & address)3903 void X86_64Assembler::cmpl(CpuRegister reg, const Address& address) {
3904 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3905 EmitOptionalRex32(reg, address);
3906 EmitUint8(0x3B);
3907 EmitOperand(reg.LowBits(), address);
3908 }
3909
3910
cmpl(const Address & address,CpuRegister reg)3911 void X86_64Assembler::cmpl(const Address& address, CpuRegister reg) {
3912 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3913 EmitOptionalRex32(reg, address);
3914 EmitUint8(0x39);
3915 EmitOperand(reg.LowBits(), address);
3916 }
3917
3918
cmpl(const Address & address,const Immediate & imm)3919 void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
3920 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3921 CHECK(imm.is_int32());
3922 EmitOptionalRex32(address);
3923 EmitComplex(7, address, imm);
3924 }
3925
3926
cmpq(CpuRegister reg0,CpuRegister reg1)3927 void X86_64Assembler::cmpq(CpuRegister reg0, CpuRegister reg1) {
3928 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3929 EmitRex64(reg0, reg1);
3930 EmitUint8(0x3B);
3931 EmitOperand(reg0.LowBits(), Operand(reg1));
3932 }
3933
3934
cmpq(CpuRegister reg,const Immediate & imm)3935 void X86_64Assembler::cmpq(CpuRegister reg, const Immediate& imm) {
3936 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3937 CHECK(imm.is_int32()); // cmpq only supports 32b immediate.
3938 EmitRex64(reg);
3939 EmitComplex(7, Operand(reg), imm);
3940 }
3941
3942
cmpq(CpuRegister reg,const Address & address)3943 void X86_64Assembler::cmpq(CpuRegister reg, const Address& address) {
3944 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3945 EmitRex64(reg, address);
3946 EmitUint8(0x3B);
3947 EmitOperand(reg.LowBits(), address);
3948 }
3949
3950
cmpq(const Address & address,const Immediate & imm)3951 void X86_64Assembler::cmpq(const Address& address, const Immediate& imm) {
3952 CHECK(imm.is_int32()); // cmpq only supports 32b immediate.
3953 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3954 EmitRex64(address);
3955 EmitComplex(7, address, imm);
3956 }
3957
3958
addl(CpuRegister dst,CpuRegister src)3959 void X86_64Assembler::addl(CpuRegister dst, CpuRegister src) {
3960 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3961 EmitOptionalRex32(dst, src);
3962 EmitUint8(0x03);
3963 EmitRegisterOperand(dst.LowBits(), src.LowBits());
3964 }
3965
3966
addl(CpuRegister reg,const Address & address)3967 void X86_64Assembler::addl(CpuRegister reg, const Address& address) {
3968 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3969 EmitOptionalRex32(reg, address);
3970 EmitUint8(0x03);
3971 EmitOperand(reg.LowBits(), address);
3972 }
3973
3974
testl(CpuRegister reg1,CpuRegister reg2)3975 void X86_64Assembler::testl(CpuRegister reg1, CpuRegister reg2) {
3976 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3977 EmitOptionalRex32(reg1, reg2);
3978 EmitUint8(0x85);
3979 EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
3980 }
3981
3982
testl(CpuRegister reg,const Address & address)3983 void X86_64Assembler::testl(CpuRegister reg, const Address& address) {
3984 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3985 EmitOptionalRex32(reg, address);
3986 EmitUint8(0x85);
3987 EmitOperand(reg.LowBits(), address);
3988 }
3989
3990
testl(CpuRegister reg,const Immediate & immediate)3991 void X86_64Assembler::testl(CpuRegister reg, const Immediate& immediate) {
3992 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3993 // For registers that have a byte variant (RAX, RBX, RCX, and RDX)
3994 // we only test the byte CpuRegister to keep the encoding short.
3995 if (immediate.is_uint8() && reg.AsRegister() < 4) {
3996 // Use zero-extended 8-bit immediate.
3997 if (reg.AsRegister() == RAX) {
3998 EmitUint8(0xA8);
3999 } else {
4000 EmitUint8(0xF6);
4001 EmitUint8(0xC0 + reg.AsRegister());
4002 }
4003 EmitUint8(immediate.value() & 0xFF);
4004 } else if (reg.AsRegister() == RAX) {
4005 // Use short form if the destination is RAX.
4006 EmitUint8(0xA9);
4007 EmitImmediate(immediate);
4008 } else {
4009 EmitOptionalRex32(reg);
4010 EmitUint8(0xF7);
4011 EmitOperand(0, Operand(reg));
4012 EmitImmediate(immediate);
4013 }
4014 }
4015
4016
testq(CpuRegister reg1,CpuRegister reg2)4017 void X86_64Assembler::testq(CpuRegister reg1, CpuRegister reg2) {
4018 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4019 EmitRex64(reg1, reg2);
4020 EmitUint8(0x85);
4021 EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
4022 }
4023
4024
testq(CpuRegister reg,const Address & address)4025 void X86_64Assembler::testq(CpuRegister reg, const Address& address) {
4026 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4027 EmitRex64(reg, address);
4028 EmitUint8(0x85);
4029 EmitOperand(reg.LowBits(), address);
4030 }
4031
4032
testb(const Address & dst,const Immediate & imm)4033 void X86_64Assembler::testb(const Address& dst, const Immediate& imm) {
4034 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4035 EmitOptionalRex32(dst);
4036 EmitUint8(0xF6);
4037 EmitOperand(Register::RAX, dst);
4038 CHECK(imm.is_int8());
4039 EmitUint8(imm.value() & 0xFF);
4040 }
4041
4042
testl(const Address & dst,const Immediate & imm)4043 void X86_64Assembler::testl(const Address& dst, const Immediate& imm) {
4044 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4045 EmitOptionalRex32(dst);
4046 EmitUint8(0xF7);
4047 EmitOperand(0, dst);
4048 EmitImmediate(imm);
4049 }
4050
4051
andl(CpuRegister dst,CpuRegister src)4052 void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) {
4053 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4054 EmitOptionalRex32(dst, src);
4055 EmitUint8(0x23);
4056 EmitOperand(dst.LowBits(), Operand(src));
4057 }
4058
4059
andl(CpuRegister reg,const Address & address)4060 void X86_64Assembler::andl(CpuRegister reg, const Address& address) {
4061 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4062 EmitOptionalRex32(reg, address);
4063 EmitUint8(0x23);
4064 EmitOperand(reg.LowBits(), address);
4065 }
4066
4067
andl(CpuRegister dst,const Immediate & imm)4068 void X86_64Assembler::andl(CpuRegister dst, const Immediate& imm) {
4069 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4070 EmitOptionalRex32(dst);
4071 EmitComplex(4, Operand(dst), imm);
4072 }
4073
4074
andq(CpuRegister reg,const Immediate & imm)4075 void X86_64Assembler::andq(CpuRegister reg, const Immediate& imm) {
4076 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4077 CHECK(imm.is_int32()); // andq only supports 32b immediate.
4078 EmitRex64(reg);
4079 EmitComplex(4, Operand(reg), imm);
4080 }
4081
4082
andq(CpuRegister dst,CpuRegister src)4083 void X86_64Assembler::andq(CpuRegister dst, CpuRegister src) {
4084 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4085 EmitRex64(dst, src);
4086 EmitUint8(0x23);
4087 EmitOperand(dst.LowBits(), Operand(src));
4088 }
4089
4090
andq(CpuRegister dst,const Address & src)4091 void X86_64Assembler::andq(CpuRegister dst, const Address& src) {
4092 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4093 EmitRex64(dst, src);
4094 EmitUint8(0x23);
4095 EmitOperand(dst.LowBits(), src);
4096 }
4097
4098
orl(CpuRegister dst,CpuRegister src)4099 void X86_64Assembler::orl(CpuRegister dst, CpuRegister src) {
4100 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4101 EmitOptionalRex32(dst, src);
4102 EmitUint8(0x0B);
4103 EmitOperand(dst.LowBits(), Operand(src));
4104 }
4105
4106
orl(CpuRegister reg,const Address & address)4107 void X86_64Assembler::orl(CpuRegister reg, const Address& address) {
4108 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4109 EmitOptionalRex32(reg, address);
4110 EmitUint8(0x0B);
4111 EmitOperand(reg.LowBits(), address);
4112 }
4113
4114
orl(CpuRegister dst,const Immediate & imm)4115 void X86_64Assembler::orl(CpuRegister dst, const Immediate& imm) {
4116 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4117 EmitOptionalRex32(dst);
4118 EmitComplex(1, Operand(dst), imm);
4119 }
4120
4121
orq(CpuRegister dst,const Immediate & imm)4122 void X86_64Assembler::orq(CpuRegister dst, const Immediate& imm) {
4123 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4124 CHECK(imm.is_int32()); // orq only supports 32b immediate.
4125 EmitRex64(dst);
4126 EmitComplex(1, Operand(dst), imm);
4127 }
4128
4129
orq(CpuRegister dst,CpuRegister src)4130 void X86_64Assembler::orq(CpuRegister dst, CpuRegister src) {
4131 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4132 EmitRex64(dst, src);
4133 EmitUint8(0x0B);
4134 EmitOperand(dst.LowBits(), Operand(src));
4135 }
4136
4137
orq(CpuRegister dst,const Address & src)4138 void X86_64Assembler::orq(CpuRegister dst, const Address& src) {
4139 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4140 EmitRex64(dst, src);
4141 EmitUint8(0x0B);
4142 EmitOperand(dst.LowBits(), src);
4143 }
4144
4145
xorl(CpuRegister dst,CpuRegister src)4146 void X86_64Assembler::xorl(CpuRegister dst, CpuRegister src) {
4147 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4148 EmitOptionalRex32(dst, src);
4149 EmitUint8(0x33);
4150 EmitOperand(dst.LowBits(), Operand(src));
4151 }
4152
4153
xorl(CpuRegister reg,const Address & address)4154 void X86_64Assembler::xorl(CpuRegister reg, const Address& address) {
4155 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4156 EmitOptionalRex32(reg, address);
4157 EmitUint8(0x33);
4158 EmitOperand(reg.LowBits(), address);
4159 }
4160
4161
xorl(CpuRegister dst,const Immediate & imm)4162 void X86_64Assembler::xorl(CpuRegister dst, const Immediate& imm) {
4163 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4164 EmitOptionalRex32(dst);
4165 EmitComplex(6, Operand(dst), imm);
4166 }
4167
4168
xorq(CpuRegister dst,CpuRegister src)4169 void X86_64Assembler::xorq(CpuRegister dst, CpuRegister src) {
4170 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4171 EmitRex64(dst, src);
4172 EmitUint8(0x33);
4173 EmitOperand(dst.LowBits(), Operand(src));
4174 }
4175
4176
xorq(CpuRegister dst,const Immediate & imm)4177 void X86_64Assembler::xorq(CpuRegister dst, const Immediate& imm) {
4178 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4179 CHECK(imm.is_int32()); // xorq only supports 32b immediate.
4180 EmitRex64(dst);
4181 EmitComplex(6, Operand(dst), imm);
4182 }
4183
xorq(CpuRegister dst,const Address & src)4184 void X86_64Assembler::xorq(CpuRegister dst, const Address& src) {
4185 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4186 EmitRex64(dst, src);
4187 EmitUint8(0x33);
4188 EmitOperand(dst.LowBits(), src);
4189 }
4190
4191
4192 #if 0
4193 void X86_64Assembler::rex(bool force, bool w, Register* r, Register* x, Register* b) {
4194 // REX.WRXB
4195 // W - 64-bit operand
4196 // R - MODRM.reg
4197 // X - SIB.index
4198 // B - MODRM.rm/SIB.base
4199 uint8_t rex = force ? 0x40 : 0;
4200 if (w) {
4201 rex |= 0x48; // REX.W000
4202 }
4203 if (r != nullptr && *r >= Register::R8 && *r < Register::kNumberOfCpuRegisters) {
4204 rex |= 0x44; // REX.0R00
4205 *r = static_cast<Register>(*r - 8);
4206 }
4207 if (x != nullptr && *x >= Register::R8 && *x < Register::kNumberOfCpuRegisters) {
4208 rex |= 0x42; // REX.00X0
4209 *x = static_cast<Register>(*x - 8);
4210 }
4211 if (b != nullptr && *b >= Register::R8 && *b < Register::kNumberOfCpuRegisters) {
4212 rex |= 0x41; // REX.000B
4213 *b = static_cast<Register>(*b - 8);
4214 }
4215 if (rex != 0) {
4216 EmitUint8(rex);
4217 }
4218 }
4219
4220 void X86_64Assembler::rex_reg_mem(bool force, bool w, Register* dst, const Address& mem) {
4221 // REX.WRXB
4222 // W - 64-bit operand
4223 // R - MODRM.reg
4224 // X - SIB.index
4225 // B - MODRM.rm/SIB.base
4226 uint8_t rex = mem->rex();
4227 if (force) {
4228 rex |= 0x40; // REX.0000
4229 }
4230 if (w) {
4231 rex |= 0x48; // REX.W000
4232 }
4233 if (dst != nullptr && *dst >= Register::R8 && *dst < Register::kNumberOfCpuRegisters) {
4234 rex |= 0x44; // REX.0R00
4235 *dst = static_cast<Register>(*dst - 8);
4236 }
4237 if (rex != 0) {
4238 EmitUint8(rex);
4239 }
4240 }
4241
4242 void rex_mem_reg(bool force, bool w, Address* mem, Register* src);
4243 #endif
4244
addl(CpuRegister reg,const Immediate & imm)4245 void X86_64Assembler::addl(CpuRegister reg, const Immediate& imm) {
4246 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4247 EmitOptionalRex32(reg);
4248 EmitComplex(0, Operand(reg), imm);
4249 }
4250
4251
addq(CpuRegister reg,const Immediate & imm)4252 void X86_64Assembler::addq(CpuRegister reg, const Immediate& imm) {
4253 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4254 CHECK(imm.is_int32()); // addq only supports 32b immediate.
4255 EmitRex64(reg);
4256 EmitComplex(0, Operand(reg), imm);
4257 }
4258
4259
addq(CpuRegister dst,const Address & address)4260 void X86_64Assembler::addq(CpuRegister dst, const Address& address) {
4261 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4262 EmitRex64(dst, address);
4263 EmitUint8(0x03);
4264 EmitOperand(dst.LowBits(), address);
4265 }
4266
4267
addq(CpuRegister dst,CpuRegister src)4268 void X86_64Assembler::addq(CpuRegister dst, CpuRegister src) {
4269 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4270 // 0x01 is addq r/m64 <- r/m64 + r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
4271 EmitRex64(src, dst);
4272 EmitUint8(0x01);
4273 EmitRegisterOperand(src.LowBits(), dst.LowBits());
4274 }
4275
4276
addl(const Address & address,CpuRegister reg)4277 void X86_64Assembler::addl(const Address& address, CpuRegister reg) {
4278 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4279 EmitOptionalRex32(reg, address);
4280 EmitUint8(0x01);
4281 EmitOperand(reg.LowBits(), address);
4282 }
4283
4284
addl(const Address & address,const Immediate & imm)4285 void X86_64Assembler::addl(const Address& address, const Immediate& imm) {
4286 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4287 EmitOptionalRex32(address);
4288 EmitComplex(0, address, imm);
4289 }
4290
4291
addw(const Address & address,const Immediate & imm)4292 void X86_64Assembler::addw(const Address& address, const Immediate& imm) {
4293 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4294 CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
4295 EmitUint8(0x66);
4296 EmitOptionalRex32(address);
4297 EmitComplex(0, address, imm, /* is_16_op= */ true);
4298 }
4299
4300
subl(CpuRegister dst,CpuRegister src)4301 void X86_64Assembler::subl(CpuRegister dst, CpuRegister src) {
4302 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4303 EmitOptionalRex32(dst, src);
4304 EmitUint8(0x2B);
4305 EmitOperand(dst.LowBits(), Operand(src));
4306 }
4307
4308
subl(CpuRegister reg,const Immediate & imm)4309 void X86_64Assembler::subl(CpuRegister reg, const Immediate& imm) {
4310 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4311 EmitOptionalRex32(reg);
4312 EmitComplex(5, Operand(reg), imm);
4313 }
4314
4315
subq(CpuRegister reg,const Immediate & imm)4316 void X86_64Assembler::subq(CpuRegister reg, const Immediate& imm) {
4317 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4318 CHECK(imm.is_int32()); // subq only supports 32b immediate.
4319 EmitRex64(reg);
4320 EmitComplex(5, Operand(reg), imm);
4321 }
4322
4323
subq(CpuRegister dst,CpuRegister src)4324 void X86_64Assembler::subq(CpuRegister dst, CpuRegister src) {
4325 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4326 EmitRex64(dst, src);
4327 EmitUint8(0x2B);
4328 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4329 }
4330
4331
subq(CpuRegister reg,const Address & address)4332 void X86_64Assembler::subq(CpuRegister reg, const Address& address) {
4333 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4334 EmitRex64(reg, address);
4335 EmitUint8(0x2B);
4336 EmitOperand(reg.LowBits() & 7, address);
4337 }
4338
4339
subl(CpuRegister reg,const Address & address)4340 void X86_64Assembler::subl(CpuRegister reg, const Address& address) {
4341 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4342 EmitOptionalRex32(reg, address);
4343 EmitUint8(0x2B);
4344 EmitOperand(reg.LowBits(), address);
4345 }
4346
4347
cdq()4348 void X86_64Assembler::cdq() {
4349 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4350 EmitUint8(0x99);
4351 }
4352
4353
cqo()4354 void X86_64Assembler::cqo() {
4355 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4356 EmitRex64();
4357 EmitUint8(0x99);
4358 }
4359
4360
idivl(CpuRegister reg)4361 void X86_64Assembler::idivl(CpuRegister reg) {
4362 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4363 EmitOptionalRex32(reg);
4364 EmitUint8(0xF7);
4365 EmitUint8(0xF8 | reg.LowBits());
4366 }
4367
4368
idivq(CpuRegister reg)4369 void X86_64Assembler::idivq(CpuRegister reg) {
4370 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4371 EmitRex64(reg);
4372 EmitUint8(0xF7);
4373 EmitUint8(0xF8 | reg.LowBits());
4374 }
4375
4376
imull(CpuRegister dst,CpuRegister src)4377 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src) {
4378 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4379 EmitOptionalRex32(dst, src);
4380 EmitUint8(0x0F);
4381 EmitUint8(0xAF);
4382 EmitOperand(dst.LowBits(), Operand(src));
4383 }
4384
imull(CpuRegister dst,CpuRegister src,const Immediate & imm)4385 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src, const Immediate& imm) {
4386 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4387 CHECK(imm.is_int32()); // imull only supports 32b immediate.
4388
4389 EmitOptionalRex32(dst, src);
4390
4391 // See whether imm can be represented as a sign-extended 8bit value.
4392 int32_t v32 = static_cast<int32_t>(imm.value());
4393 if (IsInt<8>(v32)) {
4394 // Sign-extension works.
4395 EmitUint8(0x6B);
4396 EmitOperand(dst.LowBits(), Operand(src));
4397 EmitUint8(static_cast<uint8_t>(v32 & 0xFF));
4398 } else {
4399 // Not representable, use full immediate.
4400 EmitUint8(0x69);
4401 EmitOperand(dst.LowBits(), Operand(src));
4402 EmitImmediate(imm);
4403 }
4404 }
4405
4406
imull(CpuRegister reg,const Immediate & imm)4407 void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
4408 imull(reg, reg, imm);
4409 }
4410
4411
imull(CpuRegister reg,const Address & address)4412 void X86_64Assembler::imull(CpuRegister reg, const Address& address) {
4413 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4414 EmitOptionalRex32(reg, address);
4415 EmitUint8(0x0F);
4416 EmitUint8(0xAF);
4417 EmitOperand(reg.LowBits(), address);
4418 }
4419
4420
imulq(CpuRegister dst,CpuRegister src)4421 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister src) {
4422 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4423 EmitRex64(dst, src);
4424 EmitUint8(0x0F);
4425 EmitUint8(0xAF);
4426 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4427 }
4428
4429
imulq(CpuRegister reg,const Immediate & imm)4430 void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) {
4431 imulq(reg, reg, imm);
4432 }
4433
imulq(CpuRegister dst,CpuRegister reg,const Immediate & imm)4434 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm) {
4435 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4436 CHECK(imm.is_int32()); // imulq only supports 32b immediate.
4437
4438 EmitRex64(dst, reg);
4439
4440 // See whether imm can be represented as a sign-extended 8bit value.
4441 int64_t v64 = imm.value();
4442 if (IsInt<8>(v64)) {
4443 // Sign-extension works.
4444 EmitUint8(0x6B);
4445 EmitOperand(dst.LowBits(), Operand(reg));
4446 EmitUint8(static_cast<uint8_t>(v64 & 0xFF));
4447 } else {
4448 // Not representable, use full immediate.
4449 EmitUint8(0x69);
4450 EmitOperand(dst.LowBits(), Operand(reg));
4451 EmitImmediate(imm);
4452 }
4453 }
4454
imulq(CpuRegister reg,const Address & address)4455 void X86_64Assembler::imulq(CpuRegister reg, const Address& address) {
4456 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4457 EmitRex64(reg, address);
4458 EmitUint8(0x0F);
4459 EmitUint8(0xAF);
4460 EmitOperand(reg.LowBits(), address);
4461 }
4462
4463
imull(CpuRegister reg)4464 void X86_64Assembler::imull(CpuRegister reg) {
4465 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4466 EmitOptionalRex32(reg);
4467 EmitUint8(0xF7);
4468 EmitOperand(5, Operand(reg));
4469 }
4470
4471
imulq(CpuRegister reg)4472 void X86_64Assembler::imulq(CpuRegister reg) {
4473 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4474 EmitRex64(reg);
4475 EmitUint8(0xF7);
4476 EmitOperand(5, Operand(reg));
4477 }
4478
4479
imull(const Address & address)4480 void X86_64Assembler::imull(const Address& address) {
4481 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4482 EmitOptionalRex32(address);
4483 EmitUint8(0xF7);
4484 EmitOperand(5, address);
4485 }
4486
4487
mull(CpuRegister reg)4488 void X86_64Assembler::mull(CpuRegister reg) {
4489 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4490 EmitOptionalRex32(reg);
4491 EmitUint8(0xF7);
4492 EmitOperand(4, Operand(reg));
4493 }
4494
4495
mull(const Address & address)4496 void X86_64Assembler::mull(const Address& address) {
4497 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4498 EmitOptionalRex32(address);
4499 EmitUint8(0xF7);
4500 EmitOperand(4, address);
4501 }
4502
4503
shll(CpuRegister reg,const Immediate & imm)4504 void X86_64Assembler::shll(CpuRegister reg, const Immediate& imm) {
4505 EmitGenericShift(false, 4, reg, imm);
4506 }
4507
4508
shlq(CpuRegister reg,const Immediate & imm)4509 void X86_64Assembler::shlq(CpuRegister reg, const Immediate& imm) {
4510 EmitGenericShift(true, 4, reg, imm);
4511 }
4512
4513
shll(CpuRegister operand,CpuRegister shifter)4514 void X86_64Assembler::shll(CpuRegister operand, CpuRegister shifter) {
4515 EmitGenericShift(false, 4, operand, shifter);
4516 }
4517
4518
shlq(CpuRegister operand,CpuRegister shifter)4519 void X86_64Assembler::shlq(CpuRegister operand, CpuRegister shifter) {
4520 EmitGenericShift(true, 4, operand, shifter);
4521 }
4522
4523
shrl(CpuRegister reg,const Immediate & imm)4524 void X86_64Assembler::shrl(CpuRegister reg, const Immediate& imm) {
4525 EmitGenericShift(false, 5, reg, imm);
4526 }
4527
4528
shrq(CpuRegister reg,const Immediate & imm)4529 void X86_64Assembler::shrq(CpuRegister reg, const Immediate& imm) {
4530 EmitGenericShift(true, 5, reg, imm);
4531 }
4532
4533
shrl(CpuRegister operand,CpuRegister shifter)4534 void X86_64Assembler::shrl(CpuRegister operand, CpuRegister shifter) {
4535 EmitGenericShift(false, 5, operand, shifter);
4536 }
4537
4538
shrq(CpuRegister operand,CpuRegister shifter)4539 void X86_64Assembler::shrq(CpuRegister operand, CpuRegister shifter) {
4540 EmitGenericShift(true, 5, operand, shifter);
4541 }
4542
4543
sarl(CpuRegister reg,const Immediate & imm)4544 void X86_64Assembler::sarl(CpuRegister reg, const Immediate& imm) {
4545 EmitGenericShift(false, 7, reg, imm);
4546 }
4547
4548
sarl(CpuRegister operand,CpuRegister shifter)4549 void X86_64Assembler::sarl(CpuRegister operand, CpuRegister shifter) {
4550 EmitGenericShift(false, 7, operand, shifter);
4551 }
4552
4553
sarq(CpuRegister reg,const Immediate & imm)4554 void X86_64Assembler::sarq(CpuRegister reg, const Immediate& imm) {
4555 EmitGenericShift(true, 7, reg, imm);
4556 }
4557
4558
sarq(CpuRegister operand,CpuRegister shifter)4559 void X86_64Assembler::sarq(CpuRegister operand, CpuRegister shifter) {
4560 EmitGenericShift(true, 7, operand, shifter);
4561 }
4562
4563
roll(CpuRegister reg,const Immediate & imm)4564 void X86_64Assembler::roll(CpuRegister reg, const Immediate& imm) {
4565 EmitGenericShift(false, 0, reg, imm);
4566 }
4567
4568
roll(CpuRegister operand,CpuRegister shifter)4569 void X86_64Assembler::roll(CpuRegister operand, CpuRegister shifter) {
4570 EmitGenericShift(false, 0, operand, shifter);
4571 }
4572
4573
rorl(CpuRegister reg,const Immediate & imm)4574 void X86_64Assembler::rorl(CpuRegister reg, const Immediate& imm) {
4575 EmitGenericShift(false, 1, reg, imm);
4576 }
4577
4578
rorl(CpuRegister operand,CpuRegister shifter)4579 void X86_64Assembler::rorl(CpuRegister operand, CpuRegister shifter) {
4580 EmitGenericShift(false, 1, operand, shifter);
4581 }
4582
4583
rolq(CpuRegister reg,const Immediate & imm)4584 void X86_64Assembler::rolq(CpuRegister reg, const Immediate& imm) {
4585 EmitGenericShift(true, 0, reg, imm);
4586 }
4587
4588
rolq(CpuRegister operand,CpuRegister shifter)4589 void X86_64Assembler::rolq(CpuRegister operand, CpuRegister shifter) {
4590 EmitGenericShift(true, 0, operand, shifter);
4591 }
4592
4593
rorq(CpuRegister reg,const Immediate & imm)4594 void X86_64Assembler::rorq(CpuRegister reg, const Immediate& imm) {
4595 EmitGenericShift(true, 1, reg, imm);
4596 }
4597
4598
rorq(CpuRegister operand,CpuRegister shifter)4599 void X86_64Assembler::rorq(CpuRegister operand, CpuRegister shifter) {
4600 EmitGenericShift(true, 1, operand, shifter);
4601 }
4602
4603
negl(CpuRegister reg)4604 void X86_64Assembler::negl(CpuRegister reg) {
4605 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4606 EmitOptionalRex32(reg);
4607 EmitUint8(0xF7);
4608 EmitOperand(3, Operand(reg));
4609 }
4610
4611
negq(CpuRegister reg)4612 void X86_64Assembler::negq(CpuRegister reg) {
4613 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4614 EmitRex64(reg);
4615 EmitUint8(0xF7);
4616 EmitOperand(3, Operand(reg));
4617 }
4618
4619
notl(CpuRegister reg)4620 void X86_64Assembler::notl(CpuRegister reg) {
4621 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4622 EmitOptionalRex32(reg);
4623 EmitUint8(0xF7);
4624 EmitUint8(0xD0 | reg.LowBits());
4625 }
4626
4627
notq(CpuRegister reg)4628 void X86_64Assembler::notq(CpuRegister reg) {
4629 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4630 EmitRex64(reg);
4631 EmitUint8(0xF7);
4632 EmitOperand(2, Operand(reg));
4633 }
4634
4635
enter(const Immediate & imm)4636 void X86_64Assembler::enter(const Immediate& imm) {
4637 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4638 EmitUint8(0xC8);
4639 CHECK(imm.is_uint16()) << imm.value();
4640 EmitUint8(imm.value() & 0xFF);
4641 EmitUint8((imm.value() >> 8) & 0xFF);
4642 EmitUint8(0x00);
4643 }
4644
4645
leave()4646 void X86_64Assembler::leave() {
4647 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4648 EmitUint8(0xC9);
4649 }
4650
4651
ret()4652 void X86_64Assembler::ret() {
4653 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4654 EmitUint8(0xC3);
4655 }
4656
4657
ret(const Immediate & imm)4658 void X86_64Assembler::ret(const Immediate& imm) {
4659 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4660 EmitUint8(0xC2);
4661 CHECK(imm.is_uint16());
4662 EmitUint8(imm.value() & 0xFF);
4663 EmitUint8((imm.value() >> 8) & 0xFF);
4664 }
4665
4666
4667
nop()4668 void X86_64Assembler::nop() {
4669 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4670 EmitUint8(0x90);
4671 }
4672
4673
int3()4674 void X86_64Assembler::int3() {
4675 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4676 EmitUint8(0xCC);
4677 }
4678
4679
hlt()4680 void X86_64Assembler::hlt() {
4681 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4682 EmitUint8(0xF4);
4683 }
4684
4685
j(Condition condition,Label * label)4686 void X86_64Assembler::j(Condition condition, Label* label) {
4687 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4688 if (label->IsBound()) {
4689 static const int kShortSize = 2;
4690 static const int kLongSize = 6;
4691 int offset = label->Position() - buffer_.Size();
4692 CHECK_LE(offset, 0);
4693 if (IsInt<8>(offset - kShortSize)) {
4694 EmitUint8(0x70 + condition);
4695 EmitUint8((offset - kShortSize) & 0xFF);
4696 } else {
4697 EmitUint8(0x0F);
4698 EmitUint8(0x80 + condition);
4699 EmitInt32(offset - kLongSize);
4700 }
4701 } else {
4702 EmitUint8(0x0F);
4703 EmitUint8(0x80 + condition);
4704 EmitLabelLink(label);
4705 }
4706 }
4707
4708
j(Condition condition,NearLabel * label)4709 void X86_64Assembler::j(Condition condition, NearLabel* label) {
4710 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4711 if (label->IsBound()) {
4712 static const int kShortSize = 2;
4713 int offset = label->Position() - buffer_.Size();
4714 CHECK_LE(offset, 0);
4715 CHECK(IsInt<8>(offset - kShortSize));
4716 EmitUint8(0x70 + condition);
4717 EmitUint8((offset - kShortSize) & 0xFF);
4718 } else {
4719 EmitUint8(0x70 + condition);
4720 EmitLabelLink(label);
4721 }
4722 }
4723
4724
jrcxz(NearLabel * label)4725 void X86_64Assembler::jrcxz(NearLabel* label) {
4726 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4727 if (label->IsBound()) {
4728 static const int kShortSize = 2;
4729 int offset = label->Position() - buffer_.Size();
4730 CHECK_LE(offset, 0);
4731 CHECK(IsInt<8>(offset - kShortSize));
4732 EmitUint8(0xE3);
4733 EmitUint8((offset - kShortSize) & 0xFF);
4734 } else {
4735 EmitUint8(0xE3);
4736 EmitLabelLink(label);
4737 }
4738 }
4739
4740
jmp(CpuRegister reg)4741 void X86_64Assembler::jmp(CpuRegister reg) {
4742 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4743 EmitOptionalRex32(reg);
4744 EmitUint8(0xFF);
4745 EmitRegisterOperand(4, reg.LowBits());
4746 }
4747
jmp(const Address & address)4748 void X86_64Assembler::jmp(const Address& address) {
4749 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4750 EmitOptionalRex32(address);
4751 EmitUint8(0xFF);
4752 EmitOperand(4, address);
4753 }
4754
jmp(Label * label)4755 void X86_64Assembler::jmp(Label* label) {
4756 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4757 if (label->IsBound()) {
4758 static const int kShortSize = 2;
4759 static const int kLongSize = 5;
4760 int offset = label->Position() - buffer_.Size();
4761 CHECK_LE(offset, 0);
4762 if (IsInt<8>(offset - kShortSize)) {
4763 EmitUint8(0xEB);
4764 EmitUint8((offset - kShortSize) & 0xFF);
4765 } else {
4766 EmitUint8(0xE9);
4767 EmitInt32(offset - kLongSize);
4768 }
4769 } else {
4770 EmitUint8(0xE9);
4771 EmitLabelLink(label);
4772 }
4773 }
4774
4775
jmp(NearLabel * label)4776 void X86_64Assembler::jmp(NearLabel* label) {
4777 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4778 if (label->IsBound()) {
4779 static const int kShortSize = 2;
4780 int offset = label->Position() - buffer_.Size();
4781 CHECK_LE(offset, 0);
4782 CHECK(IsInt<8>(offset - kShortSize));
4783 EmitUint8(0xEB);
4784 EmitUint8((offset - kShortSize) & 0xFF);
4785 } else {
4786 EmitUint8(0xEB);
4787 EmitLabelLink(label);
4788 }
4789 }
4790
4791
rep_movsw()4792 void X86_64Assembler::rep_movsw() {
4793 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4794 EmitUint8(0x66);
4795 EmitUint8(0xF3);
4796 EmitUint8(0xA5);
4797 }
4798
4799
lock()4800 X86_64Assembler* X86_64Assembler::lock() {
4801 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4802 EmitUint8(0xF0);
4803 return this;
4804 }
4805
4806
cmpxchgl(const Address & address,CpuRegister reg)4807 void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) {
4808 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4809 EmitOptionalRex32(reg, address);
4810 EmitUint8(0x0F);
4811 EmitUint8(0xB1);
4812 EmitOperand(reg.LowBits(), address);
4813 }
4814
4815
cmpxchgq(const Address & address,CpuRegister reg)4816 void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) {
4817 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4818 EmitRex64(reg, address);
4819 EmitUint8(0x0F);
4820 EmitUint8(0xB1);
4821 EmitOperand(reg.LowBits(), address);
4822 }
4823
4824
mfence()4825 void X86_64Assembler::mfence() {
4826 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4827 EmitUint8(0x0F);
4828 EmitUint8(0xAE);
4829 EmitUint8(0xF0);
4830 }
4831
4832
gs()4833 X86_64Assembler* X86_64Assembler::gs() {
4834 // TODO: gs is a prefix and not an instruction
4835 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4836 EmitUint8(0x65);
4837 return this;
4838 }
4839
4840
AddImmediate(CpuRegister reg,const Immediate & imm)4841 void X86_64Assembler::AddImmediate(CpuRegister reg, const Immediate& imm) {
4842 int value = imm.value();
4843 if (value != 0) {
4844 if (value > 0) {
4845 addl(reg, imm);
4846 } else {
4847 subl(reg, Immediate(value));
4848 }
4849 }
4850 }
4851
4852
setcc(Condition condition,CpuRegister dst)4853 void X86_64Assembler::setcc(Condition condition, CpuRegister dst) {
4854 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4855 // RSP, RBP, RDI, RSI need rex prefix (else the pattern encodes ah/bh/ch/dh).
4856 if (dst.NeedsRex() || dst.AsRegister() > 3) {
4857 EmitOptionalRex(true, false, false, false, dst.NeedsRex());
4858 }
4859 EmitUint8(0x0F);
4860 EmitUint8(0x90 + condition);
4861 EmitUint8(0xC0 + dst.LowBits());
4862 }
4863
blsi(CpuRegister dst,CpuRegister src)4864 void X86_64Assembler::blsi(CpuRegister dst, CpuRegister src) {
4865 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4866 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
4867 uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
4868 /*X=*/ false,
4869 src.NeedsRex(),
4870 SET_VEX_M_0F_38);
4871 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/true,
4872 X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
4873 SET_VEX_L_128,
4874 SET_VEX_PP_NONE);
4875 EmitUint8(byte_zero);
4876 EmitUint8(byte_one);
4877 EmitUint8(byte_two);
4878 EmitUint8(0xF3);
4879 EmitRegisterOperand(3, src.LowBits());
4880 }
4881
blsmsk(CpuRegister dst,CpuRegister src)4882 void X86_64Assembler::blsmsk(CpuRegister dst, CpuRegister src) {
4883 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4884 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
4885 uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
4886 /*X=*/ false,
4887 src.NeedsRex(),
4888 SET_VEX_M_0F_38);
4889 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
4890 X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
4891 SET_VEX_L_128,
4892 SET_VEX_PP_NONE);
4893 EmitUint8(byte_zero);
4894 EmitUint8(byte_one);
4895 EmitUint8(byte_two);
4896 EmitUint8(0xF3);
4897 EmitRegisterOperand(2, src.LowBits());
4898 }
4899
blsr(CpuRegister dst,CpuRegister src)4900 void X86_64Assembler::blsr(CpuRegister dst, CpuRegister src) {
4901 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4902 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/false);
4903 uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
4904 /*X=*/ false,
4905 src.NeedsRex(),
4906 SET_VEX_M_0F_38);
4907 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
4908 X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
4909 SET_VEX_L_128,
4910 SET_VEX_PP_NONE);
4911 EmitUint8(byte_zero);
4912 EmitUint8(byte_one);
4913 EmitUint8(byte_two);
4914 EmitUint8(0xF3);
4915 EmitRegisterOperand(1, src.LowBits());
4916 }
4917
bswapl(CpuRegister dst)4918 void X86_64Assembler::bswapl(CpuRegister dst) {
4919 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4920 EmitOptionalRex(false, false, false, false, dst.NeedsRex());
4921 EmitUint8(0x0F);
4922 EmitUint8(0xC8 + dst.LowBits());
4923 }
4924
bswapq(CpuRegister dst)4925 void X86_64Assembler::bswapq(CpuRegister dst) {
4926 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4927 EmitOptionalRex(false, true, false, false, dst.NeedsRex());
4928 EmitUint8(0x0F);
4929 EmitUint8(0xC8 + dst.LowBits());
4930 }
4931
bsfl(CpuRegister dst,CpuRegister src)4932 void X86_64Assembler::bsfl(CpuRegister dst, CpuRegister src) {
4933 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4934 EmitOptionalRex32(dst, src);
4935 EmitUint8(0x0F);
4936 EmitUint8(0xBC);
4937 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4938 }
4939
bsfl(CpuRegister dst,const Address & src)4940 void X86_64Assembler::bsfl(CpuRegister dst, const Address& src) {
4941 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4942 EmitOptionalRex32(dst, src);
4943 EmitUint8(0x0F);
4944 EmitUint8(0xBC);
4945 EmitOperand(dst.LowBits(), src);
4946 }
4947
bsfq(CpuRegister dst,CpuRegister src)4948 void X86_64Assembler::bsfq(CpuRegister dst, CpuRegister src) {
4949 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4950 EmitRex64(dst, src);
4951 EmitUint8(0x0F);
4952 EmitUint8(0xBC);
4953 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4954 }
4955
bsfq(CpuRegister dst,const Address & src)4956 void X86_64Assembler::bsfq(CpuRegister dst, const Address& src) {
4957 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4958 EmitRex64(dst, src);
4959 EmitUint8(0x0F);
4960 EmitUint8(0xBC);
4961 EmitOperand(dst.LowBits(), src);
4962 }
4963
bsrl(CpuRegister dst,CpuRegister src)4964 void X86_64Assembler::bsrl(CpuRegister dst, CpuRegister src) {
4965 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4966 EmitOptionalRex32(dst, src);
4967 EmitUint8(0x0F);
4968 EmitUint8(0xBD);
4969 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4970 }
4971
bsrl(CpuRegister dst,const Address & src)4972 void X86_64Assembler::bsrl(CpuRegister dst, const Address& src) {
4973 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4974 EmitOptionalRex32(dst, src);
4975 EmitUint8(0x0F);
4976 EmitUint8(0xBD);
4977 EmitOperand(dst.LowBits(), src);
4978 }
4979
bsrq(CpuRegister dst,CpuRegister src)4980 void X86_64Assembler::bsrq(CpuRegister dst, CpuRegister src) {
4981 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4982 EmitRex64(dst, src);
4983 EmitUint8(0x0F);
4984 EmitUint8(0xBD);
4985 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4986 }
4987
bsrq(CpuRegister dst,const Address & src)4988 void X86_64Assembler::bsrq(CpuRegister dst, const Address& src) {
4989 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4990 EmitRex64(dst, src);
4991 EmitUint8(0x0F);
4992 EmitUint8(0xBD);
4993 EmitOperand(dst.LowBits(), src);
4994 }
4995
popcntl(CpuRegister dst,CpuRegister src)4996 void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) {
4997 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4998 EmitUint8(0xF3);
4999 EmitOptionalRex32(dst, src);
5000 EmitUint8(0x0F);
5001 EmitUint8(0xB8);
5002 EmitRegisterOperand(dst.LowBits(), src.LowBits());
5003 }
5004
popcntl(CpuRegister dst,const Address & src)5005 void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) {
5006 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5007 EmitUint8(0xF3);
5008 EmitOptionalRex32(dst, src);
5009 EmitUint8(0x0F);
5010 EmitUint8(0xB8);
5011 EmitOperand(dst.LowBits(), src);
5012 }
5013
popcntq(CpuRegister dst,CpuRegister src)5014 void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) {
5015 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5016 EmitUint8(0xF3);
5017 EmitRex64(dst, src);
5018 EmitUint8(0x0F);
5019 EmitUint8(0xB8);
5020 EmitRegisterOperand(dst.LowBits(), src.LowBits());
5021 }
5022
popcntq(CpuRegister dst,const Address & src)5023 void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
5024 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5025 EmitUint8(0xF3);
5026 EmitRex64(dst, src);
5027 EmitUint8(0x0F);
5028 EmitUint8(0xB8);
5029 EmitOperand(dst.LowBits(), src);
5030 }
5031
repne_scasb()5032 void X86_64Assembler::repne_scasb() {
5033 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5034 EmitUint8(0xF2);
5035 EmitUint8(0xAE);
5036 }
5037
repne_scasw()5038 void X86_64Assembler::repne_scasw() {
5039 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5040 EmitUint8(0x66);
5041 EmitUint8(0xF2);
5042 EmitUint8(0xAF);
5043 }
5044
repe_cmpsw()5045 void X86_64Assembler::repe_cmpsw() {
5046 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5047 EmitUint8(0x66);
5048 EmitUint8(0xF3);
5049 EmitUint8(0xA7);
5050 }
5051
5052
repe_cmpsl()5053 void X86_64Assembler::repe_cmpsl() {
5054 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5055 EmitUint8(0xF3);
5056 EmitUint8(0xA7);
5057 }
5058
5059
repe_cmpsq()5060 void X86_64Assembler::repe_cmpsq() {
5061 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5062 EmitUint8(0xF3);
5063 EmitRex64();
5064 EmitUint8(0xA7);
5065 }
5066
5067
LoadDoubleConstant(XmmRegister dst,double value)5068 void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
5069 // TODO: Need to have a code constants table.
5070 int64_t constant = bit_cast<int64_t, double>(value);
5071 pushq(Immediate(High32Bits(constant)));
5072 pushq(Immediate(Low32Bits(constant)));
5073 movsd(dst, Address(CpuRegister(RSP), 0));
5074 addq(CpuRegister(RSP), Immediate(2 * sizeof(intptr_t)));
5075 }
5076
5077
Align(int alignment,int offset)5078 void X86_64Assembler::Align(int alignment, int offset) {
5079 CHECK(IsPowerOfTwo(alignment));
5080 // Emit nop instruction until the real position is aligned.
5081 while (((offset + buffer_.GetPosition()) & (alignment-1)) != 0) {
5082 nop();
5083 }
5084 }
5085
5086
Bind(Label * label)5087 void X86_64Assembler::Bind(Label* label) {
5088 int bound = buffer_.Size();
5089 CHECK(!label->IsBound()); // Labels can only be bound once.
5090 while (label->IsLinked()) {
5091 int position = label->LinkPosition();
5092 int next = buffer_.Load<int32_t>(position);
5093 buffer_.Store<int32_t>(position, bound - (position + 4));
5094 label->position_ = next;
5095 }
5096 label->BindTo(bound);
5097 }
5098
5099
Bind(NearLabel * label)5100 void X86_64Assembler::Bind(NearLabel* label) {
5101 int bound = buffer_.Size();
5102 CHECK(!label->IsBound()); // Labels can only be bound once.
5103 while (label->IsLinked()) {
5104 int position = label->LinkPosition();
5105 uint8_t delta = buffer_.Load<uint8_t>(position);
5106 int offset = bound - (position + 1);
5107 CHECK(IsInt<8>(offset));
5108 buffer_.Store<int8_t>(position, offset);
5109 label->position_ = delta != 0u ? label->position_ - delta : 0;
5110 }
5111 label->BindTo(bound);
5112 }
5113
5114
EmitOperand(uint8_t reg_or_opcode,const Operand & operand)5115 void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand) {
5116 CHECK_GE(reg_or_opcode, 0);
5117 CHECK_LT(reg_or_opcode, 8);
5118 const int length = operand.length_;
5119 CHECK_GT(length, 0);
5120 // Emit the ModRM byte updated with the given reg value.
5121 CHECK_EQ(operand.encoding_[0] & 0x38, 0);
5122 EmitUint8(operand.encoding_[0] + (reg_or_opcode << 3));
5123 // Emit the rest of the encoded operand.
5124 for (int i = 1; i < length; i++) {
5125 EmitUint8(operand.encoding_[i]);
5126 }
5127 AssemblerFixup* fixup = operand.GetFixup();
5128 if (fixup != nullptr) {
5129 EmitFixup(fixup);
5130 }
5131 }
5132
5133
EmitImmediate(const Immediate & imm,bool is_16_op)5134 void X86_64Assembler::EmitImmediate(const Immediate& imm, bool is_16_op) {
5135 if (is_16_op) {
5136 EmitUint8(imm.value() & 0xFF);
5137 EmitUint8(imm.value() >> 8);
5138 } else if (imm.is_int32()) {
5139 EmitInt32(static_cast<int32_t>(imm.value()));
5140 } else {
5141 EmitInt64(imm.value());
5142 }
5143 }
5144
5145
EmitComplex(uint8_t reg_or_opcode,const Operand & operand,const Immediate & immediate,bool is_16_op)5146 void X86_64Assembler::EmitComplex(uint8_t reg_or_opcode,
5147 const Operand& operand,
5148 const Immediate& immediate,
5149 bool is_16_op) {
5150 CHECK_GE(reg_or_opcode, 0);
5151 CHECK_LT(reg_or_opcode, 8);
5152 if (immediate.is_int8()) {
5153 // Use sign-extended 8-bit immediate.
5154 EmitUint8(0x83);
5155 EmitOperand(reg_or_opcode, operand);
5156 EmitUint8(immediate.value() & 0xFF);
5157 } else if (operand.IsRegister(CpuRegister(RAX))) {
5158 // Use short form if the destination is eax.
5159 EmitUint8(0x05 + (reg_or_opcode << 3));
5160 EmitImmediate(immediate, is_16_op);
5161 } else {
5162 EmitUint8(0x81);
5163 EmitOperand(reg_or_opcode, operand);
5164 EmitImmediate(immediate, is_16_op);
5165 }
5166 }
5167
5168
EmitLabel(Label * label,int instruction_size)5169 void X86_64Assembler::EmitLabel(Label* label, int instruction_size) {
5170 if (label->IsBound()) {
5171 int offset = label->Position() - buffer_.Size();
5172 CHECK_LE(offset, 0);
5173 EmitInt32(offset - instruction_size);
5174 } else {
5175 EmitLabelLink(label);
5176 }
5177 }
5178
5179
EmitLabelLink(Label * label)5180 void X86_64Assembler::EmitLabelLink(Label* label) {
5181 CHECK(!label->IsBound());
5182 int position = buffer_.Size();
5183 EmitInt32(label->position_);
5184 label->LinkTo(position);
5185 }
5186
5187
EmitLabelLink(NearLabel * label)5188 void X86_64Assembler::EmitLabelLink(NearLabel* label) {
5189 CHECK(!label->IsBound());
5190 int position = buffer_.Size();
5191 if (label->IsLinked()) {
5192 // Save the delta in the byte that we have to play with.
5193 uint32_t delta = position - label->LinkPosition();
5194 CHECK(IsUint<8>(delta));
5195 EmitUint8(delta & 0xFF);
5196 } else {
5197 EmitUint8(0);
5198 }
5199 label->LinkTo(position);
5200 }
5201
5202
EmitGenericShift(bool wide,int reg_or_opcode,CpuRegister reg,const Immediate & imm)5203 void X86_64Assembler::EmitGenericShift(bool wide,
5204 int reg_or_opcode,
5205 CpuRegister reg,
5206 const Immediate& imm) {
5207 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5208 CHECK(imm.is_int8());
5209 if (wide) {
5210 EmitRex64(reg);
5211 } else {
5212 EmitOptionalRex32(reg);
5213 }
5214 if (imm.value() == 1) {
5215 EmitUint8(0xD1);
5216 EmitOperand(reg_or_opcode, Operand(reg));
5217 } else {
5218 EmitUint8(0xC1);
5219 EmitOperand(reg_or_opcode, Operand(reg));
5220 EmitUint8(imm.value() & 0xFF);
5221 }
5222 }
5223
5224
EmitGenericShift(bool wide,int reg_or_opcode,CpuRegister operand,CpuRegister shifter)5225 void X86_64Assembler::EmitGenericShift(bool wide,
5226 int reg_or_opcode,
5227 CpuRegister operand,
5228 CpuRegister shifter) {
5229 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5230 CHECK_EQ(shifter.AsRegister(), RCX);
5231 if (wide) {
5232 EmitRex64(operand);
5233 } else {
5234 EmitOptionalRex32(operand);
5235 }
5236 EmitUint8(0xD3);
5237 EmitOperand(reg_or_opcode, Operand(operand));
5238 }
5239
EmitOptionalRex(bool force,bool w,bool r,bool x,bool b)5240 void X86_64Assembler::EmitOptionalRex(bool force, bool w, bool r, bool x, bool b) {
5241 // REX.WRXB
5242 // W - 64-bit operand
5243 // R - MODRM.reg
5244 // X - SIB.index
5245 // B - MODRM.rm/SIB.base
5246 uint8_t rex = force ? 0x40 : 0;
5247 if (w) {
5248 rex |= 0x48; // REX.W000
5249 }
5250 if (r) {
5251 rex |= 0x44; // REX.0R00
5252 }
5253 if (x) {
5254 rex |= 0x42; // REX.00X0
5255 }
5256 if (b) {
5257 rex |= 0x41; // REX.000B
5258 }
5259 if (rex != 0) {
5260 EmitUint8(rex);
5261 }
5262 }
5263
EmitOptionalRex32(CpuRegister reg)5264 void X86_64Assembler::EmitOptionalRex32(CpuRegister reg) {
5265 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
5266 }
5267
EmitOptionalRex32(CpuRegister dst,CpuRegister src)5268 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, CpuRegister src) {
5269 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5270 }
5271
EmitOptionalRex32(XmmRegister dst,XmmRegister src)5272 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, XmmRegister src) {
5273 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5274 }
5275
EmitOptionalRex32(CpuRegister dst,XmmRegister src)5276 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, XmmRegister src) {
5277 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5278 }
5279
EmitOptionalRex32(XmmRegister dst,CpuRegister src)5280 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, CpuRegister src) {
5281 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5282 }
5283
EmitOptionalRex32(const Operand & operand)5284 void X86_64Assembler::EmitOptionalRex32(const Operand& operand) {
5285 uint8_t rex = operand.rex();
5286 if (rex != 0) {
5287 EmitUint8(rex);
5288 }
5289 }
5290
EmitOptionalRex32(CpuRegister dst,const Operand & operand)5291 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, const Operand& operand) {
5292 uint8_t rex = operand.rex();
5293 if (dst.NeedsRex()) {
5294 rex |= 0x44; // REX.0R00
5295 }
5296 if (rex != 0) {
5297 EmitUint8(rex);
5298 }
5299 }
5300
EmitOptionalRex32(XmmRegister dst,const Operand & operand)5301 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, const Operand& operand) {
5302 uint8_t rex = operand.rex();
5303 if (dst.NeedsRex()) {
5304 rex |= 0x44; // REX.0R00
5305 }
5306 if (rex != 0) {
5307 EmitUint8(rex);
5308 }
5309 }
5310
EmitRex64()5311 void X86_64Assembler::EmitRex64() {
5312 EmitOptionalRex(false, true, false, false, false);
5313 }
5314
EmitRex64(CpuRegister reg)5315 void X86_64Assembler::EmitRex64(CpuRegister reg) {
5316 EmitOptionalRex(false, true, false, false, reg.NeedsRex());
5317 }
5318
EmitRex64(const Operand & operand)5319 void X86_64Assembler::EmitRex64(const Operand& operand) {
5320 uint8_t rex = operand.rex();
5321 rex |= 0x48; // REX.W000
5322 EmitUint8(rex);
5323 }
5324
EmitRex64(CpuRegister dst,CpuRegister src)5325 void X86_64Assembler::EmitRex64(CpuRegister dst, CpuRegister src) {
5326 EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5327 }
5328
EmitRex64(XmmRegister dst,CpuRegister src)5329 void X86_64Assembler::EmitRex64(XmmRegister dst, CpuRegister src) {
5330 EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5331 }
5332
EmitRex64(CpuRegister dst,XmmRegister src)5333 void X86_64Assembler::EmitRex64(CpuRegister dst, XmmRegister src) {
5334 EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5335 }
5336
EmitRex64(CpuRegister dst,const Operand & operand)5337 void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) {
5338 uint8_t rex = 0x48 | operand.rex(); // REX.W000
5339 if (dst.NeedsRex()) {
5340 rex |= 0x44; // REX.0R00
5341 }
5342 EmitUint8(rex);
5343 }
5344
EmitRex64(XmmRegister dst,const Operand & operand)5345 void X86_64Assembler::EmitRex64(XmmRegister dst, const Operand& operand) {
5346 uint8_t rex = 0x48 | operand.rex(); // REX.W000
5347 if (dst.NeedsRex()) {
5348 rex |= 0x44; // REX.0R00
5349 }
5350 EmitUint8(rex);
5351 }
5352
EmitOptionalByteRegNormalizingRex32(CpuRegister dst,CpuRegister src)5353 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src) {
5354 // For src, SPL, BPL, SIL, DIL need the rex prefix.
5355 bool force = src.AsRegister() > 3;
5356 EmitOptionalRex(force, false, dst.NeedsRex(), false, src.NeedsRex());
5357 }
5358
EmitOptionalByteRegNormalizingRex32(CpuRegister dst,const Operand & operand)5359 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand) {
5360 uint8_t rex = operand.rex();
5361 // For dst, SPL, BPL, SIL, DIL need the rex prefix.
5362 bool force = dst.AsRegister() > 3;
5363 if (force) {
5364 rex |= 0x40; // REX.0000
5365 }
5366 if (dst.NeedsRex()) {
5367 rex |= 0x44; // REX.0R00
5368 }
5369 if (rex != 0) {
5370 EmitUint8(rex);
5371 }
5372 }
5373
AddConstantArea()5374 void X86_64Assembler::AddConstantArea() {
5375 ArrayRef<const int32_t> area = constant_area_.GetBuffer();
5376 for (size_t i = 0, e = area.size(); i < e; i++) {
5377 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5378 EmitInt32(area[i]);
5379 }
5380 }
5381
AppendInt32(int32_t v)5382 size_t ConstantArea::AppendInt32(int32_t v) {
5383 size_t result = buffer_.size() * elem_size_;
5384 buffer_.push_back(v);
5385 return result;
5386 }
5387
AddInt32(int32_t v)5388 size_t ConstantArea::AddInt32(int32_t v) {
5389 // Look for an existing match.
5390 for (size_t i = 0, e = buffer_.size(); i < e; i++) {
5391 if (v == buffer_[i]) {
5392 return i * elem_size_;
5393 }
5394 }
5395
5396 // Didn't match anything.
5397 return AppendInt32(v);
5398 }
5399
AddInt64(int64_t v)5400 size_t ConstantArea::AddInt64(int64_t v) {
5401 int32_t v_low = v;
5402 int32_t v_high = v >> 32;
5403 if (buffer_.size() > 1) {
5404 // Ensure we don't pass the end of the buffer.
5405 for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) {
5406 if (v_low == buffer_[i] && v_high == buffer_[i + 1]) {
5407 return i * elem_size_;
5408 }
5409 }
5410 }
5411
5412 // Didn't match anything.
5413 size_t result = buffer_.size() * elem_size_;
5414 buffer_.push_back(v_low);
5415 buffer_.push_back(v_high);
5416 return result;
5417 }
5418
AddDouble(double v)5419 size_t ConstantArea::AddDouble(double v) {
5420 // Treat the value as a 64-bit integer value.
5421 return AddInt64(bit_cast<int64_t, double>(v));
5422 }
5423
AddFloat(float v)5424 size_t ConstantArea::AddFloat(float v) {
5425 // Treat the value as a 32-bit integer value.
5426 return AddInt32(bit_cast<int32_t, float>(v));
5427 }
5428
EmitVexPrefixByteZero(bool is_twobyte_form)5429 uint8_t X86_64Assembler::EmitVexPrefixByteZero(bool is_twobyte_form) {
5430 // Vex Byte 0,
5431 // Bits [7:0] must contain the value 11000101b (0xC5) for 2-byte Vex
5432 // Bits [7:0] must contain the value 11000100b (0xC4) for 3-byte Vex
5433 uint8_t vex_prefix = 0xC0;
5434 if (is_twobyte_form) {
5435 vex_prefix |= TWO_BYTE_VEX; // 2-Byte Vex
5436 } else {
5437 vex_prefix |= THREE_BYTE_VEX; // 3-Byte Vex
5438 }
5439 return vex_prefix;
5440 }
5441
EmitVexPrefixByteOne(bool R,bool X,bool B,int SET_VEX_M)5442 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R, bool X, bool B, int SET_VEX_M) {
5443 // Vex Byte 1,
5444 uint8_t vex_prefix = VEX_INIT;
5445 /** Bit[7] This bit needs to be set to '1'
5446 otherwise the instruction is LES or LDS */
5447 if (!R) {
5448 // R .
5449 vex_prefix |= SET_VEX_R;
5450 }
5451 /** Bit[6] This bit needs to be set to '1'
5452 otherwise the instruction is LES or LDS */
5453 if (!X) {
5454 // X .
5455 vex_prefix |= SET_VEX_X;
5456 }
5457 /** Bit[5] This bit needs to be set to '1' */
5458 if (!B) {
5459 // B .
5460 vex_prefix |= SET_VEX_B;
5461 }
5462 /** Bits[4:0], Based on the instruction documentaion */
5463 vex_prefix |= SET_VEX_M;
5464 return vex_prefix;
5465 }
5466
EmitVexPrefixByteOne(bool R,X86_64ManagedRegister operand,int SET_VEX_L,int SET_VEX_PP)5467 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R,
5468 X86_64ManagedRegister operand,
5469 int SET_VEX_L,
5470 int SET_VEX_PP) {
5471 // Vex Byte 1,
5472 uint8_t vex_prefix = VEX_INIT;
5473 /** Bit[7] This bit needs to be set to '1'
5474 otherwise the instruction is LES or LDS */
5475 if (!R) {
5476 // R .
5477 vex_prefix |= SET_VEX_R;
5478 }
5479 /**Bits[6:3] - 'vvvv' the source or dest register specifier */
5480 if (operand.IsNoRegister()) {
5481 vex_prefix |= 0x78;
5482 } else if (operand.IsXmmRegister()) {
5483 XmmRegister vvvv = operand.AsXmmRegister();
5484 int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
5485 uint8_t reg = static_cast<uint8_t>(inverted_reg);
5486 vex_prefix |= ((reg & 0x0F) << 3);
5487 } else if (operand.IsCpuRegister()) {
5488 CpuRegister vvvv = operand.AsCpuRegister();
5489 int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
5490 uint8_t reg = static_cast<uint8_t>(inverted_reg);
5491 vex_prefix |= ((reg & 0x0F) << 3);
5492 }
5493 /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5494 VEX.L = 0 indicates 128 bit vector operation */
5495 vex_prefix |= SET_VEX_L;
5496 // Bits[1:0] - "pp"
5497 vex_prefix |= SET_VEX_PP;
5498 return vex_prefix;
5499 }
5500
EmitVexPrefixByteTwo(bool W,X86_64ManagedRegister operand,int SET_VEX_L,int SET_VEX_PP)5501 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
5502 X86_64ManagedRegister operand,
5503 int SET_VEX_L,
5504 int SET_VEX_PP) {
5505 // Vex Byte 2,
5506 uint8_t vex_prefix = VEX_INIT;
5507
5508 /** Bit[7] This bits needs to be set to '1' with default value.
5509 When using C4H form of VEX prefix, REX.W value is ignored */
5510 if (W) {
5511 vex_prefix |= SET_VEX_W;
5512 }
5513 // Bits[6:3] - 'vvvv' the source or dest register specifier
5514 if (operand.IsXmmRegister()) {
5515 XmmRegister vvvv = operand.AsXmmRegister();
5516 int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
5517 uint8_t reg = static_cast<uint8_t>(inverted_reg);
5518 vex_prefix |= ((reg & 0x0F) << 3);
5519 } else if (operand.IsCpuRegister()) {
5520 CpuRegister vvvv = operand.AsCpuRegister();
5521 int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
5522 uint8_t reg = static_cast<uint8_t>(inverted_reg);
5523 vex_prefix |= ((reg & 0x0F) << 3);
5524 }
5525 /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5526 VEX.L = 0 indicates 128 bit vector operation */
5527 vex_prefix |= SET_VEX_L;
5528 // Bits[1:0] - "pp"
5529 vex_prefix |= SET_VEX_PP;
5530 return vex_prefix;
5531 }
5532
EmitVexPrefixByteTwo(bool W,int SET_VEX_L,int SET_VEX_PP)5533 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
5534 int SET_VEX_L,
5535 int SET_VEX_PP) {
5536 // Vex Byte 2,
5537 uint8_t vex_prefix = VEX_INIT;
5538
5539 /** Bit[7] This bits needs to be set to '1' with default value.
5540 When using C4H form of VEX prefix, REX.W value is ignored */
5541 if (W) {
5542 vex_prefix |= SET_VEX_W;
5543 }
5544 /** Bits[6:3] - 'vvvv' the source or dest register specifier */
5545 vex_prefix |= (0x0F << 3);
5546 /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5547 VEX.L = 0 indicates 128 bit vector operation */
5548 vex_prefix |= SET_VEX_L;
5549
5550 // Bits[1:0] - "pp"
5551 if (SET_VEX_PP != SET_VEX_PP_NONE) {
5552 vex_prefix |= SET_VEX_PP;
5553 }
5554 return vex_prefix;
5555 }
5556
5557 } // namespace x86_64
5558 } // namespace art
5559