1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "assembler_x86_64.h"
18 
19 #include "base/casts.h"
20 #include "base/memory_region.h"
21 #include "entrypoints/quick/quick_entrypoints.h"
22 #include "thread.h"
23 
24 namespace art {
25 namespace x86_64 {
26 
operator <<(std::ostream & os,const CpuRegister & reg)27 std::ostream& operator<<(std::ostream& os, const CpuRegister& reg) {
28   return os << reg.AsRegister();
29 }
30 
operator <<(std::ostream & os,const XmmRegister & reg)31 std::ostream& operator<<(std::ostream& os, const XmmRegister& reg) {
32   return os << reg.AsFloatRegister();
33 }
34 
operator <<(std::ostream & os,const X87Register & reg)35 std::ostream& operator<<(std::ostream& os, const X87Register& reg) {
36   return os << "ST" << static_cast<int>(reg);
37 }
38 
operator <<(std::ostream & os,const Address & addr)39 std::ostream& operator<<(std::ostream& os, const Address& addr) {
40   switch (addr.mod()) {
41     case 0:
42       if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
43         return os << "(%" << addr.cpu_rm() << ")";
44       } else if (addr.base() == RBP) {
45         return os << static_cast<int>(addr.disp32()) << "(,%" << addr.cpu_index()
46                   << "," << (1 << addr.scale()) << ")";
47       }
48       return os << "(%" << addr.cpu_base() << ",%"
49                 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
50     case 1:
51       if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
52         return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_rm() << ")";
53       }
54       return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_base() << ",%"
55                 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
56     case 2:
57       if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
58         return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_rm() << ")";
59       }
60       return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_base() << ",%"
61                 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
62     default:
63       return os << "<address?>";
64   }
65 }
66 
CpuHasAVXorAVX2FeatureFlag()67 bool X86_64Assembler::CpuHasAVXorAVX2FeatureFlag() {
68   if (has_AVX_ || has_AVX2_) {
69     return true;
70   }
71   return false;
72 }
73 
74 
call(CpuRegister reg)75 void X86_64Assembler::call(CpuRegister reg) {
76   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
77   EmitOptionalRex32(reg);
78   EmitUint8(0xFF);
79   EmitRegisterOperand(2, reg.LowBits());
80 }
81 
82 
call(const Address & address)83 void X86_64Assembler::call(const Address& address) {
84   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
85   EmitOptionalRex32(address);
86   EmitUint8(0xFF);
87   EmitOperand(2, address);
88 }
89 
90 
call(Label * label)91 void X86_64Assembler::call(Label* label) {
92   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
93   EmitUint8(0xE8);
94   static const int kSize = 5;
95   // Offset by one because we already have emitted the opcode.
96   EmitLabel(label, kSize - 1);
97 }
98 
pushq(CpuRegister reg)99 void X86_64Assembler::pushq(CpuRegister reg) {
100   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
101   EmitOptionalRex32(reg);
102   EmitUint8(0x50 + reg.LowBits());
103 }
104 
105 
pushq(const Address & address)106 void X86_64Assembler::pushq(const Address& address) {
107   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
108   EmitOptionalRex32(address);
109   EmitUint8(0xFF);
110   EmitOperand(6, address);
111 }
112 
113 
pushq(const Immediate & imm)114 void X86_64Assembler::pushq(const Immediate& imm) {
115   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
116   CHECK(imm.is_int32());  // pushq only supports 32b immediate.
117   if (imm.is_int8()) {
118     EmitUint8(0x6A);
119     EmitUint8(imm.value() & 0xFF);
120   } else {
121     EmitUint8(0x68);
122     EmitImmediate(imm);
123   }
124 }
125 
126 
popq(CpuRegister reg)127 void X86_64Assembler::popq(CpuRegister reg) {
128   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
129   EmitOptionalRex32(reg);
130   EmitUint8(0x58 + reg.LowBits());
131 }
132 
133 
popq(const Address & address)134 void X86_64Assembler::popq(const Address& address) {
135   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
136   EmitOptionalRex32(address);
137   EmitUint8(0x8F);
138   EmitOperand(0, address);
139 }
140 
141 
movq(CpuRegister dst,const Immediate & imm)142 void X86_64Assembler::movq(CpuRegister dst, const Immediate& imm) {
143   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
144   if (imm.is_int32()) {
145     // 32 bit. Note: sign-extends.
146     EmitRex64(dst);
147     EmitUint8(0xC7);
148     EmitRegisterOperand(0, dst.LowBits());
149     EmitInt32(static_cast<int32_t>(imm.value()));
150   } else {
151     EmitRex64(dst);
152     EmitUint8(0xB8 + dst.LowBits());
153     EmitInt64(imm.value());
154   }
155 }
156 
157 
movl(CpuRegister dst,const Immediate & imm)158 void X86_64Assembler::movl(CpuRegister dst, const Immediate& imm) {
159   CHECK(imm.is_int32());
160   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
161   EmitOptionalRex32(dst);
162   EmitUint8(0xB8 + dst.LowBits());
163   EmitImmediate(imm);
164 }
165 
166 
movq(const Address & dst,const Immediate & imm)167 void X86_64Assembler::movq(const Address& dst, const Immediate& imm) {
168   CHECK(imm.is_int32());
169   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
170   EmitRex64(dst);
171   EmitUint8(0xC7);
172   EmitOperand(0, dst);
173   EmitImmediate(imm);
174 }
175 
176 
movq(CpuRegister dst,CpuRegister src)177 void X86_64Assembler::movq(CpuRegister dst, CpuRegister src) {
178   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
179   // 0x89 is movq r/m64 <- r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
180   EmitRex64(src, dst);
181   EmitUint8(0x89);
182   EmitRegisterOperand(src.LowBits(), dst.LowBits());
183 }
184 
185 
movl(CpuRegister dst,CpuRegister src)186 void X86_64Assembler::movl(CpuRegister dst, CpuRegister src) {
187   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
188   EmitOptionalRex32(dst, src);
189   EmitUint8(0x8B);
190   EmitRegisterOperand(dst.LowBits(), src.LowBits());
191 }
192 
193 
movq(CpuRegister dst,const Address & src)194 void X86_64Assembler::movq(CpuRegister dst, const Address& src) {
195   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
196   EmitRex64(dst, src);
197   EmitUint8(0x8B);
198   EmitOperand(dst.LowBits(), src);
199 }
200 
201 
movl(CpuRegister dst,const Address & src)202 void X86_64Assembler::movl(CpuRegister dst, const Address& src) {
203   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
204   EmitOptionalRex32(dst, src);
205   EmitUint8(0x8B);
206   EmitOperand(dst.LowBits(), src);
207 }
208 
209 
movq(const Address & dst,CpuRegister src)210 void X86_64Assembler::movq(const Address& dst, CpuRegister src) {
211   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
212   EmitRex64(src, dst);
213   EmitUint8(0x89);
214   EmitOperand(src.LowBits(), dst);
215 }
216 
217 
movl(const Address & dst,CpuRegister src)218 void X86_64Assembler::movl(const Address& dst, CpuRegister src) {
219   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
220   EmitOptionalRex32(src, dst);
221   EmitUint8(0x89);
222   EmitOperand(src.LowBits(), dst);
223 }
224 
movl(const Address & dst,const Immediate & imm)225 void X86_64Assembler::movl(const Address& dst, const Immediate& imm) {
226   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
227   EmitOptionalRex32(dst);
228   EmitUint8(0xC7);
229   EmitOperand(0, dst);
230   EmitImmediate(imm);
231 }
232 
movntl(const Address & dst,CpuRegister src)233 void X86_64Assembler::movntl(const Address& dst, CpuRegister src) {
234   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
235   EmitOptionalRex32(src, dst);
236   EmitUint8(0x0F);
237   EmitUint8(0xC3);
238   EmitOperand(src.LowBits(), dst);
239 }
240 
movntq(const Address & dst,CpuRegister src)241 void X86_64Assembler::movntq(const Address& dst, CpuRegister src) {
242   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
243   EmitRex64(src, dst);
244   EmitUint8(0x0F);
245   EmitUint8(0xC3);
246   EmitOperand(src.LowBits(), dst);
247 }
248 
cmov(Condition c,CpuRegister dst,CpuRegister src)249 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src) {
250   cmov(c, dst, src, true);
251 }
252 
cmov(Condition c,CpuRegister dst,CpuRegister src,bool is64bit)253 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit) {
254   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
255   EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
256   EmitUint8(0x0F);
257   EmitUint8(0x40 + c);
258   EmitRegisterOperand(dst.LowBits(), src.LowBits());
259 }
260 
261 
cmov(Condition c,CpuRegister dst,const Address & src,bool is64bit)262 void X86_64Assembler::cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit) {
263   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
264   if (is64bit) {
265     EmitRex64(dst, src);
266   } else {
267     EmitOptionalRex32(dst, src);
268   }
269   EmitUint8(0x0F);
270   EmitUint8(0x40 + c);
271   EmitOperand(dst.LowBits(), src);
272 }
273 
274 
movzxb(CpuRegister dst,CpuRegister src)275 void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) {
276   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
277   EmitOptionalByteRegNormalizingRex32(dst, src);
278   EmitUint8(0x0F);
279   EmitUint8(0xB6);
280   EmitRegisterOperand(dst.LowBits(), src.LowBits());
281 }
282 
283 
movzxb(CpuRegister dst,const Address & src)284 void X86_64Assembler::movzxb(CpuRegister dst, const Address& src) {
285   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
286   // Byte register is only in the source register form, so we don't use
287   // EmitOptionalByteRegNormalizingRex32(dst, src);
288   EmitOptionalRex32(dst, src);
289   EmitUint8(0x0F);
290   EmitUint8(0xB6);
291   EmitOperand(dst.LowBits(), src);
292 }
293 
294 
movsxb(CpuRegister dst,CpuRegister src)295 void X86_64Assembler::movsxb(CpuRegister dst, CpuRegister src) {
296   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
297   EmitOptionalByteRegNormalizingRex32(dst, src);
298   EmitUint8(0x0F);
299   EmitUint8(0xBE);
300   EmitRegisterOperand(dst.LowBits(), src.LowBits());
301 }
302 
303 
movsxb(CpuRegister dst,const Address & src)304 void X86_64Assembler::movsxb(CpuRegister dst, const Address& src) {
305   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
306   // Byte register is only in the source register form, so we don't use
307   // EmitOptionalByteRegNormalizingRex32(dst, src);
308   EmitOptionalRex32(dst, src);
309   EmitUint8(0x0F);
310   EmitUint8(0xBE);
311   EmitOperand(dst.LowBits(), src);
312 }
313 
314 
movb(CpuRegister,const Address &)315 void X86_64Assembler::movb(CpuRegister /*dst*/, const Address& /*src*/) {
316   LOG(FATAL) << "Use movzxb or movsxb instead.";
317 }
318 
319 
movb(const Address & dst,CpuRegister src)320 void X86_64Assembler::movb(const Address& dst, CpuRegister src) {
321   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
322   EmitOptionalByteRegNormalizingRex32(src, dst);
323   EmitUint8(0x88);
324   EmitOperand(src.LowBits(), dst);
325 }
326 
327 
movb(const Address & dst,const Immediate & imm)328 void X86_64Assembler::movb(const Address& dst, const Immediate& imm) {
329   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
330   EmitOptionalRex32(dst);
331   EmitUint8(0xC6);
332   EmitOperand(Register::RAX, dst);
333   CHECK(imm.is_int8());
334   EmitUint8(imm.value() & 0xFF);
335 }
336 
337 
movzxw(CpuRegister dst,CpuRegister src)338 void X86_64Assembler::movzxw(CpuRegister dst, CpuRegister src) {
339   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
340   EmitOptionalRex32(dst, src);
341   EmitUint8(0x0F);
342   EmitUint8(0xB7);
343   EmitRegisterOperand(dst.LowBits(), src.LowBits());
344 }
345 
346 
movzxw(CpuRegister dst,const Address & src)347 void X86_64Assembler::movzxw(CpuRegister dst, const Address& src) {
348   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
349   EmitOptionalRex32(dst, src);
350   EmitUint8(0x0F);
351   EmitUint8(0xB7);
352   EmitOperand(dst.LowBits(), src);
353 }
354 
355 
movsxw(CpuRegister dst,CpuRegister src)356 void X86_64Assembler::movsxw(CpuRegister dst, CpuRegister src) {
357   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
358   EmitOptionalRex32(dst, src);
359   EmitUint8(0x0F);
360   EmitUint8(0xBF);
361   EmitRegisterOperand(dst.LowBits(), src.LowBits());
362 }
363 
364 
movsxw(CpuRegister dst,const Address & src)365 void X86_64Assembler::movsxw(CpuRegister dst, const Address& src) {
366   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
367   EmitOptionalRex32(dst, src);
368   EmitUint8(0x0F);
369   EmitUint8(0xBF);
370   EmitOperand(dst.LowBits(), src);
371 }
372 
373 
movw(CpuRegister,const Address &)374 void X86_64Assembler::movw(CpuRegister /*dst*/, const Address& /*src*/) {
375   LOG(FATAL) << "Use movzxw or movsxw instead.";
376 }
377 
378 
movw(const Address & dst,CpuRegister src)379 void X86_64Assembler::movw(const Address& dst, CpuRegister src) {
380   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
381   EmitOperandSizeOverride();
382   EmitOptionalRex32(src, dst);
383   EmitUint8(0x89);
384   EmitOperand(src.LowBits(), dst);
385 }
386 
387 
movw(const Address & dst,const Immediate & imm)388 void X86_64Assembler::movw(const Address& dst, const Immediate& imm) {
389   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
390   EmitOperandSizeOverride();
391   EmitOptionalRex32(dst);
392   EmitUint8(0xC7);
393   EmitOperand(Register::RAX, dst);
394   CHECK(imm.is_uint16() || imm.is_int16());
395   EmitUint8(imm.value() & 0xFF);
396   EmitUint8(imm.value() >> 8);
397 }
398 
399 
leaq(CpuRegister dst,const Address & src)400 void X86_64Assembler::leaq(CpuRegister dst, const Address& src) {
401   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
402   EmitRex64(dst, src);
403   EmitUint8(0x8D);
404   EmitOperand(dst.LowBits(), src);
405 }
406 
407 
leal(CpuRegister dst,const Address & src)408 void X86_64Assembler::leal(CpuRegister dst, const Address& src) {
409   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
410   EmitOptionalRex32(dst, src);
411   EmitUint8(0x8D);
412   EmitOperand(dst.LowBits(), src);
413 }
414 
415 
movaps(XmmRegister dst,XmmRegister src)416 void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) {
417   if (CpuHasAVXorAVX2FeatureFlag()) {
418     vmovaps(dst, src);
419     return;
420   }
421   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
422   EmitOptionalRex32(dst, src);
423   EmitUint8(0x0F);
424   EmitUint8(0x28);
425   EmitXmmRegisterOperand(dst.LowBits(), src);
426 }
427 
428 
429 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, xmm2 */
vmovaps(XmmRegister dst,XmmRegister src)430 void X86_64Assembler::vmovaps(XmmRegister dst, XmmRegister src) {
431   DCHECK(CpuHasAVXorAVX2FeatureFlag());
432   uint8_t byte_zero, byte_one, byte_two;
433   bool is_twobyte_form = true;
434   bool load = dst.NeedsRex();
435   bool store = !load;
436 
437   if (src.NeedsRex()&& dst.NeedsRex()) {
438     is_twobyte_form = false;
439   }
440   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
441   // Instruction VEX Prefix
442   byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
443   X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
444   if (is_twobyte_form) {
445     bool rex_bit = (load) ? dst.NeedsRex() : src.NeedsRex();
446     byte_one = EmitVexPrefixByteOne(rex_bit,
447                                     vvvv_reg,
448                                     SET_VEX_L_128,
449                                     SET_VEX_PP_NONE);
450   } else {
451     byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
452                                     /*X=*/ false,
453                                     src.NeedsRex(),
454                                     SET_VEX_M_0F);
455     byte_two = EmitVexPrefixByteTwo(/*W=*/ false,
456                                     SET_VEX_L_128,
457                                     SET_VEX_PP_NONE);
458   }
459   EmitUint8(byte_zero);
460   EmitUint8(byte_one);
461   if (!is_twobyte_form) {
462     EmitUint8(byte_two);
463   }
464   // Instruction Opcode
465   if (is_twobyte_form && store) {
466     EmitUint8(0x29);
467   } else {
468     EmitUint8(0x28);
469   }
470   // Instruction Operands
471   if (is_twobyte_form && store) {
472     EmitXmmRegisterOperand(src.LowBits(), dst);
473   } else {
474     EmitXmmRegisterOperand(dst.LowBits(), src);
475   }
476 }
477 
movaps(XmmRegister dst,const Address & src)478 void X86_64Assembler::movaps(XmmRegister dst, const Address& src) {
479   if (CpuHasAVXorAVX2FeatureFlag()) {
480     vmovaps(dst, src);
481     return;
482   }
483   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
484   EmitOptionalRex32(dst, src);
485   EmitUint8(0x0F);
486   EmitUint8(0x28);
487   EmitOperand(dst.LowBits(), src);
488 }
489 
490 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, m128 */
vmovaps(XmmRegister dst,const Address & src)491 void X86_64Assembler::vmovaps(XmmRegister dst, const Address& src) {
492   DCHECK(CpuHasAVXorAVX2FeatureFlag());
493   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
494   uint8_t ByteZero, ByteOne, ByteTwo;
495   bool is_twobyte_form = false;
496   // Instruction VEX Prefix
497   uint8_t rex = src.rex();
498   bool Rex_x = rex & GET_REX_X;
499   bool Rex_b = rex & GET_REX_B;
500   if (!Rex_b && !Rex_x) {
501     is_twobyte_form = true;
502   }
503   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
504   if (is_twobyte_form) {
505     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
506     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
507                                    vvvv_reg,
508                                    SET_VEX_L_128,
509                                    SET_VEX_PP_NONE);
510   } else {
511     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
512                                    Rex_x,
513                                    Rex_b,
514                                    SET_VEX_M_0F);
515     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
516                                    SET_VEX_L_128,
517                                    SET_VEX_PP_NONE);
518   }
519   EmitUint8(ByteZero);
520   EmitUint8(ByteOne);
521   if (!is_twobyte_form) {
522     EmitUint8(ByteTwo);
523   }
524   // Instruction Opcode
525   EmitUint8(0x28);
526   // Instruction Operands
527   EmitOperand(dst.LowBits(), src);
528 }
529 
movups(XmmRegister dst,const Address & src)530 void X86_64Assembler::movups(XmmRegister dst, const Address& src) {
531   if (CpuHasAVXorAVX2FeatureFlag()) {
532     vmovups(dst, src);
533     return;
534   }
535   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
536   EmitOptionalRex32(dst, src);
537   EmitUint8(0x0F);
538   EmitUint8(0x10);
539   EmitOperand(dst.LowBits(), src);
540 }
541 
542 /** VEX.128.0F.WIG 10 /r VMOVUPS xmm1, m128 */
vmovups(XmmRegister dst,const Address & src)543 void X86_64Assembler::vmovups(XmmRegister dst, const Address& src) {
544   DCHECK(CpuHasAVXorAVX2FeatureFlag());
545   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
546   uint8_t ByteZero, ByteOne, ByteTwo;
547   bool is_twobyte_form = false;
548   // Instruction VEX Prefix
549   uint8_t rex = src.rex();
550   bool Rex_x = rex & GET_REX_X;
551   bool Rex_b = rex & GET_REX_B;
552   if (!Rex_x && !Rex_b) {
553     is_twobyte_form = true;
554   }
555   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
556   if (is_twobyte_form) {
557     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
558     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
559                                    vvvv_reg,
560                                    SET_VEX_L_128,
561                                    SET_VEX_PP_NONE);
562   } else {
563     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
564                                    Rex_x,
565                                    Rex_b,
566                                    SET_VEX_M_0F);
567     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
568                                    SET_VEX_L_128,
569                                    SET_VEX_PP_NONE);
570   }
571   EmitUint8(ByteZero);
572   EmitUint8(ByteOne);
573   if (!is_twobyte_form) {
574     EmitUint8(ByteTwo);
575   }
576   // Instruction Opcode
577   EmitUint8(0x10);
578   // Instruction Operands
579   EmitOperand(dst.LowBits(), src);
580 }
581 
582 
movaps(const Address & dst,XmmRegister src)583 void X86_64Assembler::movaps(const Address& dst, XmmRegister src) {
584   if (CpuHasAVXorAVX2FeatureFlag()) {
585     vmovaps(dst, src);
586     return;
587   }
588   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
589   EmitOptionalRex32(src, dst);
590   EmitUint8(0x0F);
591   EmitUint8(0x29);
592   EmitOperand(src.LowBits(), dst);
593 }
594 
595 /** VEX.128.0F.WIG 29 /r VMOVAPS m128, xmm1 */
vmovaps(const Address & dst,XmmRegister src)596 void X86_64Assembler::vmovaps(const Address& dst, XmmRegister src) {
597   DCHECK(CpuHasAVXorAVX2FeatureFlag());
598   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
599   uint8_t ByteZero, ByteOne, ByteTwo;
600   bool is_twobyte_form = false;
601 
602   // Instruction VEX Prefix
603   uint8_t rex = dst.rex();
604   bool Rex_x = rex & GET_REX_X;
605   bool Rex_b = rex & GET_REX_B;
606   if (!Rex_b && !Rex_x) {
607     is_twobyte_form = true;
608   }
609   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
610   if (is_twobyte_form) {
611     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
612     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
613                                    vvvv_reg,
614                                    SET_VEX_L_128,
615                                    SET_VEX_PP_NONE);
616   } else {
617     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
618                                    Rex_x,
619                                    Rex_b,
620                                    SET_VEX_M_0F);
621     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
622                                    SET_VEX_L_128,
623                                    SET_VEX_PP_NONE);
624   }
625   EmitUint8(ByteZero);
626   EmitUint8(ByteOne);
627   if (!is_twobyte_form) {
628     EmitUint8(ByteTwo);
629   }
630   // Instruction Opcode
631   EmitUint8(0x29);
632   // Instruction Operands
633   EmitOperand(src.LowBits(), dst);
634 }
635 
movups(const Address & dst,XmmRegister src)636 void X86_64Assembler::movups(const Address& dst, XmmRegister src) {
637   if (CpuHasAVXorAVX2FeatureFlag()) {
638     vmovups(dst, src);
639     return;
640   }
641   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
642   EmitOptionalRex32(src, dst);
643   EmitUint8(0x0F);
644   EmitUint8(0x11);
645   EmitOperand(src.LowBits(), dst);
646 }
647 
648 /** VEX.128.0F.WIG 11 /r VMOVUPS m128, xmm1 */
vmovups(const Address & dst,XmmRegister src)649 void X86_64Assembler::vmovups(const Address& dst, XmmRegister src) {
650   DCHECK(CpuHasAVXorAVX2FeatureFlag());
651   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
652   uint8_t ByteZero, ByteOne, ByteTwo;
653   bool is_twobyte_form = false;
654 
655   // Instruction VEX Prefix
656   uint8_t rex = dst.rex();
657   bool Rex_x = rex & GET_REX_X;
658   bool Rex_b = rex & GET_REX_B;
659   if (!Rex_b && !Rex_x) {
660     is_twobyte_form = true;
661   }
662   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
663   if (is_twobyte_form) {
664     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
665     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
666                                    vvvv_reg,
667                                    SET_VEX_L_128,
668                                    SET_VEX_PP_NONE);
669   } else {
670     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
671                                    Rex_x,
672                                    Rex_b,
673                                    SET_VEX_M_0F);
674     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
675                                    SET_VEX_L_128,
676                                    SET_VEX_PP_NONE);
677   }
678   EmitUint8(ByteZero);
679   EmitUint8(ByteOne);
680   if (!is_twobyte_form) {
681     EmitUint8(ByteTwo);
682   }
683   // Instruction Opcode
684   EmitUint8(0x11);
685   // Instruction Operands
686   EmitOperand(src.LowBits(), dst);
687 }
688 
689 
movss(XmmRegister dst,const Address & src)690 void X86_64Assembler::movss(XmmRegister dst, const Address& src) {
691   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
692   EmitUint8(0xF3);
693   EmitOptionalRex32(dst, src);
694   EmitUint8(0x0F);
695   EmitUint8(0x10);
696   EmitOperand(dst.LowBits(), src);
697 }
698 
699 
movss(const Address & dst,XmmRegister src)700 void X86_64Assembler::movss(const Address& dst, XmmRegister src) {
701   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
702   EmitUint8(0xF3);
703   EmitOptionalRex32(src, dst);
704   EmitUint8(0x0F);
705   EmitUint8(0x11);
706   EmitOperand(src.LowBits(), dst);
707 }
708 
709 
movss(XmmRegister dst,XmmRegister src)710 void X86_64Assembler::movss(XmmRegister dst, XmmRegister src) {
711   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
712   EmitUint8(0xF3);
713   EmitOptionalRex32(src, dst);  // Movss is MR encoding instead of the usual RM.
714   EmitUint8(0x0F);
715   EmitUint8(0x11);
716   EmitXmmRegisterOperand(src.LowBits(), dst);
717 }
718 
719 
movsxd(CpuRegister dst,CpuRegister src)720 void X86_64Assembler::movsxd(CpuRegister dst, CpuRegister src) {
721   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
722   EmitRex64(dst, src);
723   EmitUint8(0x63);
724   EmitRegisterOperand(dst.LowBits(), src.LowBits());
725 }
726 
727 
movsxd(CpuRegister dst,const Address & src)728 void X86_64Assembler::movsxd(CpuRegister dst, const Address& src) {
729   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
730   EmitRex64(dst, src);
731   EmitUint8(0x63);
732   EmitOperand(dst.LowBits(), src);
733 }
734 
735 
movd(XmmRegister dst,CpuRegister src)736 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) {
737   movd(dst, src, true);
738 }
739 
movd(CpuRegister dst,XmmRegister src)740 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) {
741   movd(dst, src, true);
742 }
743 
movd(XmmRegister dst,CpuRegister src,bool is64bit)744 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src, bool is64bit) {
745   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
746   EmitUint8(0x66);
747   EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
748   EmitUint8(0x0F);
749   EmitUint8(0x6E);
750   EmitOperand(dst.LowBits(), Operand(src));
751 }
752 
movd(CpuRegister dst,XmmRegister src,bool is64bit)753 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src, bool is64bit) {
754   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
755   EmitUint8(0x66);
756   EmitOptionalRex(false, is64bit, src.NeedsRex(), false, dst.NeedsRex());
757   EmitUint8(0x0F);
758   EmitUint8(0x7E);
759   EmitOperand(src.LowBits(), Operand(dst));
760 }
761 
addss(XmmRegister dst,XmmRegister src)762 void X86_64Assembler::addss(XmmRegister dst, XmmRegister src) {
763   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
764   EmitUint8(0xF3);
765   EmitOptionalRex32(dst, src);
766   EmitUint8(0x0F);
767   EmitUint8(0x58);
768   EmitXmmRegisterOperand(dst.LowBits(), src);
769 }
770 
addss(XmmRegister dst,const Address & src)771 void X86_64Assembler::addss(XmmRegister dst, const Address& src) {
772   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
773   EmitUint8(0xF3);
774   EmitOptionalRex32(dst, src);
775   EmitUint8(0x0F);
776   EmitUint8(0x58);
777   EmitOperand(dst.LowBits(), src);
778 }
779 
780 
subss(XmmRegister dst,XmmRegister src)781 void X86_64Assembler::subss(XmmRegister dst, XmmRegister src) {
782   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
783   EmitUint8(0xF3);
784   EmitOptionalRex32(dst, src);
785   EmitUint8(0x0F);
786   EmitUint8(0x5C);
787   EmitXmmRegisterOperand(dst.LowBits(), src);
788 }
789 
790 
subss(XmmRegister dst,const Address & src)791 void X86_64Assembler::subss(XmmRegister dst, const Address& src) {
792   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
793   EmitUint8(0xF3);
794   EmitOptionalRex32(dst, src);
795   EmitUint8(0x0F);
796   EmitUint8(0x5C);
797   EmitOperand(dst.LowBits(), src);
798 }
799 
800 
mulss(XmmRegister dst,XmmRegister src)801 void X86_64Assembler::mulss(XmmRegister dst, XmmRegister src) {
802   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
803   EmitUint8(0xF3);
804   EmitOptionalRex32(dst, src);
805   EmitUint8(0x0F);
806   EmitUint8(0x59);
807   EmitXmmRegisterOperand(dst.LowBits(), src);
808 }
809 
810 
mulss(XmmRegister dst,const Address & src)811 void X86_64Assembler::mulss(XmmRegister dst, const Address& src) {
812   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
813   EmitUint8(0xF3);
814   EmitOptionalRex32(dst, src);
815   EmitUint8(0x0F);
816   EmitUint8(0x59);
817   EmitOperand(dst.LowBits(), src);
818 }
819 
820 
divss(XmmRegister dst,XmmRegister src)821 void X86_64Assembler::divss(XmmRegister dst, XmmRegister src) {
822   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
823   EmitUint8(0xF3);
824   EmitOptionalRex32(dst, src);
825   EmitUint8(0x0F);
826   EmitUint8(0x5E);
827   EmitXmmRegisterOperand(dst.LowBits(), src);
828 }
829 
830 
divss(XmmRegister dst,const Address & src)831 void X86_64Assembler::divss(XmmRegister dst, const Address& src) {
832   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
833   EmitUint8(0xF3);
834   EmitOptionalRex32(dst, src);
835   EmitUint8(0x0F);
836   EmitUint8(0x5E);
837   EmitOperand(dst.LowBits(), src);
838 }
839 
840 
addps(XmmRegister dst,XmmRegister src)841 void X86_64Assembler::addps(XmmRegister dst, XmmRegister src) {
842   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
843   EmitOptionalRex32(dst, src);
844   EmitUint8(0x0F);
845   EmitUint8(0x58);
846   EmitXmmRegisterOperand(dst.LowBits(), src);
847 }
848 
849 
subps(XmmRegister dst,XmmRegister src)850 void X86_64Assembler::subps(XmmRegister dst, XmmRegister src) {
851   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
852   EmitOptionalRex32(dst, src);
853   EmitUint8(0x0F);
854   EmitUint8(0x5C);
855   EmitXmmRegisterOperand(dst.LowBits(), src);
856 }
857 
vaddps(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)858 void X86_64Assembler::vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
859   DCHECK(CpuHasAVXorAVX2FeatureFlag());
860   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
861   bool is_twobyte_form = false;
862   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
863   if (!add_right.NeedsRex()) {
864     is_twobyte_form = true;
865   }
866   X86_64ManagedRegister vvvv_reg =
867       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
868   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
869   if (is_twobyte_form) {
870     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
871   } else {
872     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
873                                    /*X=*/ false,
874                                    add_right.NeedsRex(),
875                                    SET_VEX_M_0F);
876     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
877   }
878   EmitUint8(ByteZero);
879   EmitUint8(ByteOne);
880   if (!is_twobyte_form) {
881     EmitUint8(ByteTwo);
882   }
883   EmitUint8(0x58);
884   EmitXmmRegisterOperand(dst.LowBits(), add_right);
885 }
886 
vsubps(XmmRegister dst,XmmRegister src1,XmmRegister src2)887 void X86_64Assembler::vsubps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
888   DCHECK(CpuHasAVXorAVX2FeatureFlag());
889   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
890   bool is_twobyte_form = false;
891   uint8_t byte_zero = 0x00, byte_one = 0x00, byte_two = 0x00;
892   if (!src2.NeedsRex()) {
893     is_twobyte_form = true;
894   }
895   byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
896   X86_64ManagedRegister vvvv_reg = X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
897   if (is_twobyte_form) {
898     byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
899   } else {
900     byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), /*X=*/ false, src2.NeedsRex(), SET_VEX_M_0F);
901     byte_two = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
902   }
903   EmitUint8(byte_zero);
904   EmitUint8(byte_one);
905   if (!is_twobyte_form) {
906     EmitUint8(byte_two);
907   }
908   EmitUint8(0x5C);
909   EmitXmmRegisterOperand(dst.LowBits(), src2);
910 }
911 
912 
mulps(XmmRegister dst,XmmRegister src)913 void X86_64Assembler::mulps(XmmRegister dst, XmmRegister src) {
914   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
915   EmitOptionalRex32(dst, src);
916   EmitUint8(0x0F);
917   EmitUint8(0x59);
918   EmitXmmRegisterOperand(dst.LowBits(), src);
919 }
920 
vmulps(XmmRegister dst,XmmRegister src1,XmmRegister src2)921 void X86_64Assembler::vmulps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
922   DCHECK(CpuHasAVXorAVX2FeatureFlag());
923   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
924   bool is_twobyte_form = false;
925   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
926   if (!src2.NeedsRex()) {
927     is_twobyte_form = true;
928   }
929   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
930   X86_64ManagedRegister vvvv_reg =
931       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
932   if (is_twobyte_form) {
933     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
934   } else {
935     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
936                                    /*X=*/ false,
937                                    src2.NeedsRex(),
938                                    SET_VEX_M_0F);
939     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
940   }
941   EmitUint8(ByteZero);
942   EmitUint8(ByteOne);
943   if (!is_twobyte_form) {
944     EmitUint8(ByteTwo);
945   }
946   EmitUint8(0x59);
947   EmitXmmRegisterOperand(dst.LowBits(), src2);
948 }
949 
divps(XmmRegister dst,XmmRegister src)950 void X86_64Assembler::divps(XmmRegister dst, XmmRegister src) {
951   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
952   EmitOptionalRex32(dst, src);
953   EmitUint8(0x0F);
954   EmitUint8(0x5E);
955   EmitXmmRegisterOperand(dst.LowBits(), src);
956 }
957 
vdivps(XmmRegister dst,XmmRegister src1,XmmRegister src2)958 void X86_64Assembler::vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
959   DCHECK(CpuHasAVXorAVX2FeatureFlag());
960   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
961   bool is_twobyte_form = false;
962   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
963   if (!src2.NeedsRex()) {
964     is_twobyte_form = true;
965   }
966   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
967   X86_64ManagedRegister vvvv_reg =
968       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
969   if (is_twobyte_form) {
970     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
971   } else {
972     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
973                                    /*X=*/ false,
974                                    src2.NeedsRex(),
975                                    SET_VEX_M_0F);
976     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
977   }
978   EmitUint8(ByteZero);
979   EmitUint8(ByteOne);
980   if (!is_twobyte_form) {
981     EmitUint8(ByteTwo);
982   }
983   EmitUint8(0x5E);
984   EmitXmmRegisterOperand(dst.LowBits(), src2);
985 }
986 
flds(const Address & src)987 void X86_64Assembler::flds(const Address& src) {
988   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
989   EmitUint8(0xD9);
990   EmitOperand(0, src);
991 }
992 
993 
fsts(const Address & dst)994 void X86_64Assembler::fsts(const Address& dst) {
995   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
996   EmitUint8(0xD9);
997   EmitOperand(2, dst);
998 }
999 
1000 
fstps(const Address & dst)1001 void X86_64Assembler::fstps(const Address& dst) {
1002   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1003   EmitUint8(0xD9);
1004   EmitOperand(3, dst);
1005 }
1006 
1007 
movapd(XmmRegister dst,XmmRegister src)1008 void X86_64Assembler::movapd(XmmRegister dst, XmmRegister src) {
1009   if (CpuHasAVXorAVX2FeatureFlag()) {
1010     vmovapd(dst, src);
1011     return;
1012   }
1013   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1014   EmitUint8(0x66);
1015   EmitOptionalRex32(dst, src);
1016   EmitUint8(0x0F);
1017   EmitUint8(0x28);
1018   EmitXmmRegisterOperand(dst.LowBits(), src);
1019 }
1020 
1021 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, xmm2 */
vmovapd(XmmRegister dst,XmmRegister src)1022 void X86_64Assembler::vmovapd(XmmRegister dst, XmmRegister src) {
1023   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1024   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1025   uint8_t ByteZero, ByteOne, ByteTwo;
1026   bool is_twobyte_form = true;
1027 
1028   if (src.NeedsRex() && dst.NeedsRex()) {
1029     is_twobyte_form = false;
1030   }
1031   // Instruction VEX Prefix
1032   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1033   bool load = dst.NeedsRex();
1034   if (is_twobyte_form) {
1035     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1036     bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
1037     ByteOne = EmitVexPrefixByteOne(rex_bit,
1038                                    vvvv_reg,
1039                                    SET_VEX_L_128,
1040                                    SET_VEX_PP_66);
1041   } else {
1042     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1043                                    /*X=*/ false,
1044                                    src.NeedsRex(),
1045                                    SET_VEX_M_0F);
1046     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1047                                    SET_VEX_L_128,
1048                                    SET_VEX_PP_66);
1049   }
1050   EmitUint8(ByteZero);
1051   EmitUint8(ByteOne);
1052   if (!is_twobyte_form) {
1053     EmitUint8(ByteTwo);
1054   }
1055   // Instruction Opcode
1056   if (is_twobyte_form && !load) {
1057     EmitUint8(0x29);
1058   } else {
1059     EmitUint8(0x28);
1060   }
1061   // Instruction Operands
1062   if (is_twobyte_form && !load) {
1063     EmitXmmRegisterOperand(src.LowBits(), dst);
1064   } else {
1065     EmitXmmRegisterOperand(dst.LowBits(), src);
1066   }
1067 }
1068 
movapd(XmmRegister dst,const Address & src)1069 void X86_64Assembler::movapd(XmmRegister dst, const Address& src) {
1070   if (CpuHasAVXorAVX2FeatureFlag()) {
1071     vmovapd(dst, src);
1072     return;
1073   }
1074   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1075   EmitUint8(0x66);
1076   EmitOptionalRex32(dst, src);
1077   EmitUint8(0x0F);
1078   EmitUint8(0x28);
1079   EmitOperand(dst.LowBits(), src);
1080 }
1081 
1082 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, m128 */
vmovapd(XmmRegister dst,const Address & src)1083 void X86_64Assembler::vmovapd(XmmRegister dst, const Address& src) {
1084   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1085   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1086   uint8_t ByteZero, ByteOne, ByteTwo;
1087   bool is_twobyte_form = false;
1088 
1089   // Instruction VEX Prefix
1090   uint8_t rex = src.rex();
1091   bool Rex_x = rex & GET_REX_X;
1092   bool Rex_b = rex & GET_REX_B;
1093   if (!Rex_b && !Rex_x) {
1094     is_twobyte_form = true;
1095   }
1096   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1097   if (is_twobyte_form) {
1098     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1099     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1100                                    vvvv_reg,
1101                                    SET_VEX_L_128,
1102                                    SET_VEX_PP_66);
1103   } else {
1104     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1105                                    Rex_x,
1106                                    Rex_b,
1107                                    SET_VEX_M_0F);
1108     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1109                                    SET_VEX_L_128,
1110                                    SET_VEX_PP_66);
1111   }
1112   EmitUint8(ByteZero);
1113   EmitUint8(ByteOne);
1114   if (!is_twobyte_form) {
1115     EmitUint8(ByteTwo);
1116   }
1117   // Instruction Opcode
1118   EmitUint8(0x28);
1119   // Instruction Operands
1120   EmitOperand(dst.LowBits(), src);
1121 }
1122 
movupd(XmmRegister dst,const Address & src)1123 void X86_64Assembler::movupd(XmmRegister dst, const Address& src) {
1124   if (CpuHasAVXorAVX2FeatureFlag()) {
1125     vmovupd(dst, src);
1126     return;
1127   }
1128   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1129   EmitUint8(0x66);
1130   EmitOptionalRex32(dst, src);
1131   EmitUint8(0x0F);
1132   EmitUint8(0x10);
1133   EmitOperand(dst.LowBits(), src);
1134 }
1135 
1136 /** VEX.128.66.0F.WIG 10 /r VMOVUPD xmm1, m128 */
vmovupd(XmmRegister dst,const Address & src)1137 void X86_64Assembler::vmovupd(XmmRegister dst, const Address& src) {
1138   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1139   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1140   bool is_twobyte_form = false;
1141   uint8_t ByteZero, ByteOne, ByteTwo;
1142 
1143   // Instruction VEX Prefix
1144   uint8_t rex = src.rex();
1145   bool Rex_x = rex & GET_REX_X;
1146   bool Rex_b = rex & GET_REX_B;
1147   if (!Rex_b && !Rex_x) {
1148     is_twobyte_form = true;
1149   }
1150   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1151   if (is_twobyte_form) {
1152     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1153     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1154                                    vvvv_reg,
1155                                    SET_VEX_L_128,
1156                                    SET_VEX_PP_66);
1157   } else {
1158     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1159                                    Rex_x,
1160                                    Rex_b,
1161                                    SET_VEX_M_0F);
1162     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1163                                    SET_VEX_L_128,
1164                                    SET_VEX_PP_66);
1165   }
1166   EmitUint8(ByteZero);
1167   EmitUint8(ByteOne);
1168   if (!is_twobyte_form)
1169   EmitUint8(ByteTwo);
1170   // Instruction Opcode
1171   EmitUint8(0x10);
1172   // Instruction Operands
1173   EmitOperand(dst.LowBits(), src);
1174 }
1175 
movapd(const Address & dst,XmmRegister src)1176 void X86_64Assembler::movapd(const Address& dst, XmmRegister src) {
1177   if (CpuHasAVXorAVX2FeatureFlag()) {
1178     vmovapd(dst, src);
1179     return;
1180   }
1181   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1182   EmitUint8(0x66);
1183   EmitOptionalRex32(src, dst);
1184   EmitUint8(0x0F);
1185   EmitUint8(0x29);
1186   EmitOperand(src.LowBits(), dst);
1187 }
1188 
1189 /** VEX.128.66.0F.WIG 29 /r VMOVAPD m128, xmm1 */
vmovapd(const Address & dst,XmmRegister src)1190 void X86_64Assembler::vmovapd(const Address& dst, XmmRegister src) {
1191   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1192   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1193   bool is_twobyte_form = false;
1194   uint8_t ByteZero, ByteOne, ByteTwo;
1195   // Instruction VEX Prefix
1196   uint8_t rex = dst.rex();
1197   bool Rex_x = rex & GET_REX_X;
1198   bool Rex_b = rex & GET_REX_B;
1199   if (!Rex_x && !Rex_b) {
1200     is_twobyte_form = true;
1201   }
1202   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1203   if (is_twobyte_form) {
1204     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1205     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1206                                    vvvv_reg,
1207                                    SET_VEX_L_128,
1208                                    SET_VEX_PP_66);
1209   } else {
1210     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1211                                    Rex_x,
1212                                    Rex_b,
1213                                    SET_VEX_M_0F);
1214     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1215                                    SET_VEX_L_128,
1216                                    SET_VEX_PP_66);
1217   }
1218   EmitUint8(ByteZero);
1219   EmitUint8(ByteOne);
1220   if (!is_twobyte_form) {
1221     EmitUint8(ByteTwo);
1222   }
1223   // Instruction Opcode
1224   EmitUint8(0x29);
1225   // Instruction Operands
1226   EmitOperand(src.LowBits(), dst);
1227 }
1228 
movupd(const Address & dst,XmmRegister src)1229 void X86_64Assembler::movupd(const Address& dst, XmmRegister src) {
1230   if (CpuHasAVXorAVX2FeatureFlag()) {
1231     vmovupd(dst, src);
1232     return;
1233   }
1234   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1235   EmitUint8(0x66);
1236   EmitOptionalRex32(src, dst);
1237   EmitUint8(0x0F);
1238   EmitUint8(0x11);
1239   EmitOperand(src.LowBits(), dst);
1240 }
1241 
1242 /** VEX.128.66.0F.WIG 11 /r VMOVUPD m128, xmm1 */
vmovupd(const Address & dst,XmmRegister src)1243 void X86_64Assembler::vmovupd(const Address& dst, XmmRegister src) {
1244   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1245   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1246   bool is_twobyte_form = false;
1247   uint8_t ByteZero, ByteOne, ByteTwo;
1248 
1249   // Instruction VEX Prefix
1250   uint8_t rex = dst.rex();
1251   bool Rex_x = rex & GET_REX_X;
1252   bool Rex_b = rex & GET_REX_B;
1253   if (!Rex_x && !Rex_b) {
1254     is_twobyte_form = true;
1255   }
1256   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1257   if (is_twobyte_form) {
1258     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1259     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1260                                    vvvv_reg,
1261                                    SET_VEX_L_128,
1262                                    SET_VEX_PP_66);
1263   } else {
1264     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1265                                    Rex_x,
1266                                    Rex_b,
1267                                    SET_VEX_M_0F);
1268     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1269                                    SET_VEX_L_128,
1270                                    SET_VEX_PP_66);
1271   }
1272   EmitUint8(ByteZero);
1273   EmitUint8(ByteOne);
1274   if (!is_twobyte_form) {
1275     EmitUint8(ByteTwo);
1276   }
1277   // Instruction Opcode
1278   EmitUint8(0x11);
1279   // Instruction Operands
1280   EmitOperand(src.LowBits(), dst);
1281 }
1282 
1283 
movsd(XmmRegister dst,const Address & src)1284 void X86_64Assembler::movsd(XmmRegister dst, const Address& src) {
1285   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1286   EmitUint8(0xF2);
1287   EmitOptionalRex32(dst, src);
1288   EmitUint8(0x0F);
1289   EmitUint8(0x10);
1290   EmitOperand(dst.LowBits(), src);
1291 }
1292 
1293 
movsd(const Address & dst,XmmRegister src)1294 void X86_64Assembler::movsd(const Address& dst, XmmRegister src) {
1295   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1296   EmitUint8(0xF2);
1297   EmitOptionalRex32(src, dst);
1298   EmitUint8(0x0F);
1299   EmitUint8(0x11);
1300   EmitOperand(src.LowBits(), dst);
1301 }
1302 
1303 
movsd(XmmRegister dst,XmmRegister src)1304 void X86_64Assembler::movsd(XmmRegister dst, XmmRegister src) {
1305   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1306   EmitUint8(0xF2);
1307   EmitOptionalRex32(src, dst);  // Movsd is MR encoding instead of the usual RM.
1308   EmitUint8(0x0F);
1309   EmitUint8(0x11);
1310   EmitXmmRegisterOperand(src.LowBits(), dst);
1311 }
1312 
1313 
addsd(XmmRegister dst,XmmRegister src)1314 void X86_64Assembler::addsd(XmmRegister dst, XmmRegister src) {
1315   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1316   EmitUint8(0xF2);
1317   EmitOptionalRex32(dst, src);
1318   EmitUint8(0x0F);
1319   EmitUint8(0x58);
1320   EmitXmmRegisterOperand(dst.LowBits(), src);
1321 }
1322 
1323 
addsd(XmmRegister dst,const Address & src)1324 void X86_64Assembler::addsd(XmmRegister dst, const Address& src) {
1325   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1326   EmitUint8(0xF2);
1327   EmitOptionalRex32(dst, src);
1328   EmitUint8(0x0F);
1329   EmitUint8(0x58);
1330   EmitOperand(dst.LowBits(), src);
1331 }
1332 
1333 
subsd(XmmRegister dst,XmmRegister src)1334 void X86_64Assembler::subsd(XmmRegister dst, XmmRegister src) {
1335   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1336   EmitUint8(0xF2);
1337   EmitOptionalRex32(dst, src);
1338   EmitUint8(0x0F);
1339   EmitUint8(0x5C);
1340   EmitXmmRegisterOperand(dst.LowBits(), src);
1341 }
1342 
1343 
subsd(XmmRegister dst,const Address & src)1344 void X86_64Assembler::subsd(XmmRegister dst, const Address& src) {
1345   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1346   EmitUint8(0xF2);
1347   EmitOptionalRex32(dst, src);
1348   EmitUint8(0x0F);
1349   EmitUint8(0x5C);
1350   EmitOperand(dst.LowBits(), src);
1351 }
1352 
1353 
mulsd(XmmRegister dst,XmmRegister src)1354 void X86_64Assembler::mulsd(XmmRegister dst, XmmRegister src) {
1355   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1356   EmitUint8(0xF2);
1357   EmitOptionalRex32(dst, src);
1358   EmitUint8(0x0F);
1359   EmitUint8(0x59);
1360   EmitXmmRegisterOperand(dst.LowBits(), src);
1361 }
1362 
1363 
mulsd(XmmRegister dst,const Address & src)1364 void X86_64Assembler::mulsd(XmmRegister dst, const Address& src) {
1365   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1366   EmitUint8(0xF2);
1367   EmitOptionalRex32(dst, src);
1368   EmitUint8(0x0F);
1369   EmitUint8(0x59);
1370   EmitOperand(dst.LowBits(), src);
1371 }
1372 
1373 
divsd(XmmRegister dst,XmmRegister src)1374 void X86_64Assembler::divsd(XmmRegister dst, XmmRegister src) {
1375   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1376   EmitUint8(0xF2);
1377   EmitOptionalRex32(dst, src);
1378   EmitUint8(0x0F);
1379   EmitUint8(0x5E);
1380   EmitXmmRegisterOperand(dst.LowBits(), src);
1381 }
1382 
1383 
divsd(XmmRegister dst,const Address & src)1384 void X86_64Assembler::divsd(XmmRegister dst, const Address& src) {
1385   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1386   EmitUint8(0xF2);
1387   EmitOptionalRex32(dst, src);
1388   EmitUint8(0x0F);
1389   EmitUint8(0x5E);
1390   EmitOperand(dst.LowBits(), src);
1391 }
1392 
1393 
addpd(XmmRegister dst,XmmRegister src)1394 void X86_64Assembler::addpd(XmmRegister dst, XmmRegister src) {
1395   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1396   EmitUint8(0x66);
1397   EmitOptionalRex32(dst, src);
1398   EmitUint8(0x0F);
1399   EmitUint8(0x58);
1400   EmitXmmRegisterOperand(dst.LowBits(), src);
1401 }
1402 
1403 
vaddpd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1404 void X86_64Assembler::vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1405   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1406   bool is_twobyte_form = false;
1407   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1408   if (!add_right.NeedsRex()) {
1409     is_twobyte_form = true;
1410   }
1411   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1412   X86_64ManagedRegister vvvv_reg =
1413       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1414   if (is_twobyte_form) {
1415     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1416   } else {
1417     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1418                                    /*X=*/ false,
1419                                    add_right.NeedsRex(),
1420                                    SET_VEX_M_0F);
1421     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1422   }
1423   EmitUint8(ByteZero);
1424   EmitUint8(ByteOne);
1425   if (!is_twobyte_form) {
1426     EmitUint8(ByteTwo);
1427   }
1428   EmitUint8(0x58);
1429   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1430 }
1431 
1432 
subpd(XmmRegister dst,XmmRegister src)1433 void X86_64Assembler::subpd(XmmRegister dst, XmmRegister src) {
1434   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1435   EmitUint8(0x66);
1436   EmitOptionalRex32(dst, src);
1437   EmitUint8(0x0F);
1438   EmitUint8(0x5C);
1439   EmitXmmRegisterOperand(dst.LowBits(), src);
1440 }
1441 
1442 
vsubpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1443 void X86_64Assembler::vsubpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1444   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1445   bool is_twobyte_form = false;
1446   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1447   if (!src2.NeedsRex()) {
1448     is_twobyte_form = true;
1449   }
1450   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1451   X86_64ManagedRegister vvvv_reg =
1452       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1453   if (is_twobyte_form) {
1454     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1455   } else {
1456     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1457                                    /*X=*/ false,
1458                                    src2.NeedsRex(),
1459                                    SET_VEX_M_0F);
1460     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1461   }
1462   EmitUint8(ByteZero);
1463   EmitUint8(ByteOne);
1464   if (!is_twobyte_form) {
1465     EmitUint8(ByteTwo);
1466   }
1467   EmitUint8(0x5C);
1468   EmitXmmRegisterOperand(dst.LowBits(), src2);
1469 }
1470 
1471 
mulpd(XmmRegister dst,XmmRegister src)1472 void X86_64Assembler::mulpd(XmmRegister dst, XmmRegister src) {
1473   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1474   EmitUint8(0x66);
1475   EmitOptionalRex32(dst, src);
1476   EmitUint8(0x0F);
1477   EmitUint8(0x59);
1478   EmitXmmRegisterOperand(dst.LowBits(), src);
1479 }
1480 
vmulpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1481 void X86_64Assembler::vmulpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1482   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1483   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1484   bool is_twobyte_form = false;
1485   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1486   if (!src2.NeedsRex()) {
1487     is_twobyte_form = true;
1488   }
1489   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1490   X86_64ManagedRegister vvvv_reg =
1491       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1492   if (is_twobyte_form) {
1493     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1494   } else {
1495     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1496                                    /*X=*/ false,
1497                                    src2.NeedsRex(),
1498                                    SET_VEX_M_0F);
1499     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1500   }
1501   EmitUint8(ByteZero);
1502   EmitUint8(ByteOne);
1503   if (!is_twobyte_form) {
1504     EmitUint8(ByteTwo);
1505   }
1506   EmitUint8(0x59);
1507   EmitXmmRegisterOperand(dst.LowBits(), src2);
1508 }
1509 
divpd(XmmRegister dst,XmmRegister src)1510 void X86_64Assembler::divpd(XmmRegister dst, XmmRegister src) {
1511   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1512   EmitUint8(0x66);
1513   EmitOptionalRex32(dst, src);
1514   EmitUint8(0x0F);
1515   EmitUint8(0x5E);
1516   EmitXmmRegisterOperand(dst.LowBits(), src);
1517 }
1518 
1519 
vdivpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1520 void X86_64Assembler::vdivpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1521   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1522   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1523   bool is_twobyte_form = false;
1524   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1525   if (!src2.NeedsRex()) {
1526     is_twobyte_form = true;
1527   }
1528   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1529   X86_64ManagedRegister vvvv_reg =
1530       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1531   if (is_twobyte_form) {
1532     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1533   } else {
1534     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1535                                    /*X=*/ false,
1536                                    src2.NeedsRex(),
1537                                    SET_VEX_M_0F);
1538     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1539   }
1540   EmitUint8(ByteZero);
1541   EmitUint8(ByteOne);
1542   if (!is_twobyte_form) {
1543     EmitUint8(ByteTwo);
1544   }
1545   EmitUint8(0x5E);
1546   EmitXmmRegisterOperand(dst.LowBits(), src2);
1547 }
1548 
1549 
movdqa(XmmRegister dst,XmmRegister src)1550 void X86_64Assembler::movdqa(XmmRegister dst, XmmRegister src) {
1551   if (CpuHasAVXorAVX2FeatureFlag()) {
1552     vmovdqa(dst, src);
1553     return;
1554   }
1555   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1556   EmitUint8(0x66);
1557   EmitOptionalRex32(dst, src);
1558   EmitUint8(0x0F);
1559   EmitUint8(0x6F);
1560   EmitXmmRegisterOperand(dst.LowBits(), src);
1561 }
1562 
1563 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, xmm2 */
vmovdqa(XmmRegister dst,XmmRegister src)1564 void X86_64Assembler::vmovdqa(XmmRegister dst, XmmRegister src) {
1565   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1566   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1567   uint8_t ByteZero, ByteOne, ByteTwo;
1568   bool is_twobyte_form = true;
1569 
1570   // Instruction VEX Prefix
1571   if (src.NeedsRex() && dst.NeedsRex()) {
1572     is_twobyte_form = false;
1573   }
1574   bool load = dst.NeedsRex();
1575   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1576   if (is_twobyte_form) {
1577     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1578     bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
1579     ByteOne = EmitVexPrefixByteOne(rex_bit,
1580                                    vvvv_reg,
1581                                    SET_VEX_L_128,
1582                                    SET_VEX_PP_66);
1583   } else {
1584     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1585                                    /*X=*/ false,
1586                                    src.NeedsRex(),
1587                                    SET_VEX_M_0F);
1588     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1589                                    SET_VEX_L_128,
1590                                    SET_VEX_PP_66);
1591   }
1592   EmitUint8(ByteZero);
1593   EmitUint8(ByteOne);
1594   if (!is_twobyte_form) {
1595     EmitUint8(ByteTwo);
1596   }
1597   // Instruction Opcode
1598   if (is_twobyte_form && !load) {
1599     EmitUint8(0x7F);
1600   } else {
1601     EmitUint8(0x6F);
1602   }
1603   // Instruction Operands
1604   if (is_twobyte_form && !load) {
1605     EmitXmmRegisterOperand(src.LowBits(), dst);
1606   } else {
1607     EmitXmmRegisterOperand(dst.LowBits(), src);
1608   }
1609 }
1610 
movdqa(XmmRegister dst,const Address & src)1611 void X86_64Assembler::movdqa(XmmRegister dst, const Address& src) {
1612   if (CpuHasAVXorAVX2FeatureFlag()) {
1613     vmovdqa(dst, src);
1614     return;
1615   }
1616   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1617   EmitUint8(0x66);
1618   EmitOptionalRex32(dst, src);
1619   EmitUint8(0x0F);
1620   EmitUint8(0x6F);
1621   EmitOperand(dst.LowBits(), src);
1622 }
1623 
1624 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, m128 */
vmovdqa(XmmRegister dst,const Address & src)1625 void X86_64Assembler::vmovdqa(XmmRegister dst, const Address& src) {
1626   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1627   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1628   uint8_t  ByteZero, ByteOne, ByteTwo;
1629   bool is_twobyte_form = false;
1630 
1631   // Instruction VEX Prefix
1632   uint8_t rex = src.rex();
1633   bool Rex_x = rex & GET_REX_X;
1634   bool Rex_b = rex & GET_REX_B;
1635   if (!Rex_x && !Rex_b) {
1636     is_twobyte_form = true;
1637   }
1638   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1639   if (is_twobyte_form) {
1640     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1641     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1642                                    vvvv_reg,
1643                                    SET_VEX_L_128,
1644                                    SET_VEX_PP_66);
1645   } else {
1646     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1647                                    Rex_x,
1648                                    Rex_b,
1649                                    SET_VEX_M_0F);
1650     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1651                                    SET_VEX_L_128,
1652                                    SET_VEX_PP_66);
1653   }
1654   EmitUint8(ByteZero);
1655   EmitUint8(ByteOne);
1656   if (!is_twobyte_form) {
1657     EmitUint8(ByteTwo);
1658   }
1659   // Instruction Opcode
1660   EmitUint8(0x6F);
1661   // Instruction Operands
1662   EmitOperand(dst.LowBits(), src);
1663 }
1664 
movdqu(XmmRegister dst,const Address & src)1665 void X86_64Assembler::movdqu(XmmRegister dst, const Address& src) {
1666   if (CpuHasAVXorAVX2FeatureFlag()) {
1667     vmovdqu(dst, src);
1668     return;
1669   }
1670   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1671   EmitUint8(0xF3);
1672   EmitOptionalRex32(dst, src);
1673   EmitUint8(0x0F);
1674   EmitUint8(0x6F);
1675   EmitOperand(dst.LowBits(), src);
1676 }
1677 
1678 /** VEX.128.F3.0F.WIG 6F /r VMOVDQU xmm1, m128
1679 Load Unaligned */
vmovdqu(XmmRegister dst,const Address & src)1680 void X86_64Assembler::vmovdqu(XmmRegister dst, const Address& src) {
1681   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1682   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1683   uint8_t ByteZero, ByteOne, ByteTwo;
1684   bool is_twobyte_form = false;
1685 
1686   // Instruction VEX Prefix
1687   uint8_t rex = src.rex();
1688   bool Rex_x = rex & GET_REX_X;
1689   bool Rex_b = rex & GET_REX_B;
1690   if (!Rex_x && !Rex_b) {
1691     is_twobyte_form = true;
1692   }
1693   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1694   if (is_twobyte_form) {
1695     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1696     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1697                                    vvvv_reg,
1698                                    SET_VEX_L_128,
1699                                    SET_VEX_PP_F3);
1700   } else {
1701     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1702                                    Rex_x,
1703                                    Rex_b,
1704                                    SET_VEX_M_0F);
1705     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1706                                    SET_VEX_L_128,
1707                                    SET_VEX_PP_F3);
1708   }
1709   EmitUint8(ByteZero);
1710   EmitUint8(ByteOne);
1711   if (!is_twobyte_form) {
1712     EmitUint8(ByteTwo);
1713   }
1714   // Instruction Opcode
1715   EmitUint8(0x6F);
1716   // Instruction Operands
1717   EmitOperand(dst.LowBits(), src);
1718 }
1719 
movdqa(const Address & dst,XmmRegister src)1720 void X86_64Assembler::movdqa(const Address& dst, XmmRegister src) {
1721   if (CpuHasAVXorAVX2FeatureFlag()) {
1722     vmovdqa(dst, src);
1723     return;
1724   }
1725   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1726   EmitUint8(0x66);
1727   EmitOptionalRex32(src, dst);
1728   EmitUint8(0x0F);
1729   EmitUint8(0x7F);
1730   EmitOperand(src.LowBits(), dst);
1731 }
1732 
1733 /** VEX.128.66.0F.WIG 7F /r VMOVDQA m128, xmm1 */
vmovdqa(const Address & dst,XmmRegister src)1734 void X86_64Assembler::vmovdqa(const Address& dst, XmmRegister src) {
1735   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1736   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1737   bool is_twobyte_form = false;
1738   uint8_t ByteZero, ByteOne, ByteTwo;
1739   // Instruction VEX Prefix
1740   uint8_t rex = dst.rex();
1741   bool Rex_x = rex & GET_REX_X;
1742   bool Rex_b = rex & GET_REX_B;
1743   if (!Rex_x && !Rex_b) {
1744     is_twobyte_form = true;
1745   }
1746   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1747   if (is_twobyte_form) {
1748     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1749     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1750                                    vvvv_reg,
1751                                    SET_VEX_L_128,
1752                                    SET_VEX_PP_66);
1753   } else {
1754     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1755                                    Rex_x,
1756                                    Rex_b,
1757                                    SET_VEX_M_0F);
1758     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1759                                    SET_VEX_L_128,
1760                                    SET_VEX_PP_66);
1761   }
1762   EmitUint8(ByteZero);
1763   EmitUint8(ByteOne);
1764   if (!is_twobyte_form) {
1765     EmitUint8(ByteTwo);
1766   }
1767   // Instruction Opcode
1768   EmitUint8(0x7F);
1769   // Instruction Operands
1770   EmitOperand(src.LowBits(), dst);
1771 }
1772 
movdqu(const Address & dst,XmmRegister src)1773 void X86_64Assembler::movdqu(const Address& dst, XmmRegister src) {
1774   if (CpuHasAVXorAVX2FeatureFlag()) {
1775     vmovdqu(dst, src);
1776     return;
1777   }
1778   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1779   EmitUint8(0xF3);
1780   EmitOptionalRex32(src, dst);
1781   EmitUint8(0x0F);
1782   EmitUint8(0x7F);
1783   EmitOperand(src.LowBits(), dst);
1784 }
1785 
1786 /** VEX.128.F3.0F.WIG 7F /r VMOVDQU m128, xmm1 */
vmovdqu(const Address & dst,XmmRegister src)1787 void X86_64Assembler::vmovdqu(const Address& dst, XmmRegister src) {
1788   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1789   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1790   uint8_t ByteZero, ByteOne, ByteTwo;
1791   bool is_twobyte_form = false;
1792 
1793   // Instruction VEX Prefix
1794   uint8_t rex = dst.rex();
1795   bool Rex_x = rex & GET_REX_X;
1796   bool Rex_b = rex & GET_REX_B;
1797   if (!Rex_b && !Rex_x) {
1798     is_twobyte_form = true;
1799   }
1800   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1801   if (is_twobyte_form) {
1802     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1803     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1804                                    vvvv_reg,
1805                                    SET_VEX_L_128,
1806                                    SET_VEX_PP_F3);
1807   } else {
1808     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1809                                    Rex_x,
1810                                    Rex_b,
1811                                    SET_VEX_M_0F);
1812     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1813                                    SET_VEX_L_128,
1814                                    SET_VEX_PP_F3);
1815   }
1816   EmitUint8(ByteZero);
1817   EmitUint8(ByteOne);
1818   if (!is_twobyte_form) {
1819     EmitUint8(ByteTwo);
1820   }
1821   // Instruction Opcode
1822   EmitUint8(0x7F);
1823   // Instruction Operands
1824   EmitOperand(src.LowBits(), dst);
1825 }
1826 
paddb(XmmRegister dst,XmmRegister src)1827 void X86_64Assembler::paddb(XmmRegister dst, XmmRegister src) {
1828   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1829   EmitUint8(0x66);
1830   EmitOptionalRex32(dst, src);
1831   EmitUint8(0x0F);
1832   EmitUint8(0xFC);
1833   EmitXmmRegisterOperand(dst.LowBits(), src);
1834 }
1835 
1836 
vpaddb(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1837 void X86_64Assembler::vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1838   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1839   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1840   uint8_t ByteOne = 0x00, ByteZero = 0x00, ByteTwo = 0x00;
1841   bool is_twobyte_form = true;
1842   if (add_right.NeedsRex()) {
1843     is_twobyte_form = false;
1844   }
1845   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1846   X86_64ManagedRegister vvvv_reg =
1847       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1848   if (is_twobyte_form) {
1849     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1850   } else {
1851     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1852                                    /*X=*/ false,
1853                                    add_right.NeedsRex(),
1854                                    SET_VEX_M_0F);
1855     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1856   }
1857   EmitUint8(ByteZero);
1858   EmitUint8(ByteOne);
1859   if (!is_twobyte_form) {
1860     EmitUint8(ByteTwo);
1861   }
1862   EmitUint8(0xFC);
1863   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1864 }
1865 
1866 
psubb(XmmRegister dst,XmmRegister src)1867 void X86_64Assembler::psubb(XmmRegister dst, XmmRegister src) {
1868   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1869   EmitUint8(0x66);
1870   EmitOptionalRex32(dst, src);
1871   EmitUint8(0x0F);
1872   EmitUint8(0xF8);
1873   EmitXmmRegisterOperand(dst.LowBits(), src);
1874 }
1875 
1876 
vpsubb(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1877 void X86_64Assembler::vpsubb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1878   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1879   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1880   bool is_twobyte_form = false;
1881   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1882   if (!add_right.NeedsRex()) {
1883     is_twobyte_form = true;
1884   }
1885   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1886   X86_64ManagedRegister vvvv_reg =
1887       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1888   if (is_twobyte_form) {
1889     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1890   } else {
1891     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1892                                    /*X=*/ false,
1893                                    add_right.NeedsRex(),
1894                                    SET_VEX_M_0F);
1895     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1896   }
1897   EmitUint8(ByteZero);
1898   EmitUint8(ByteOne);
1899   if (!is_twobyte_form) {
1900     EmitUint8(ByteTwo);
1901   }
1902   EmitUint8(0xF8);
1903   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1904 }
1905 
1906 
paddw(XmmRegister dst,XmmRegister src)1907 void X86_64Assembler::paddw(XmmRegister dst, XmmRegister src) {
1908   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1909   EmitUint8(0x66);
1910   EmitOptionalRex32(dst, src);
1911   EmitUint8(0x0F);
1912   EmitUint8(0xFD);
1913   EmitXmmRegisterOperand(dst.LowBits(), src);
1914 }
1915 
vpaddw(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1916 void X86_64Assembler::vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1917   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1918   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1919   bool is_twobyte_form = false;
1920   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1921   if (!add_right.NeedsRex()) {
1922     is_twobyte_form = true;
1923   }
1924   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1925   X86_64ManagedRegister vvvv_reg =
1926       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1927   if (is_twobyte_form) {
1928     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1929   } else {
1930     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1931                                    /*X=*/ false,
1932                                    add_right.NeedsRex(),
1933                                    SET_VEX_M_0F);
1934     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1935   }
1936   EmitUint8(ByteZero);
1937   EmitUint8(ByteOne);
1938   if (!is_twobyte_form) {
1939     EmitUint8(ByteTwo);
1940   }
1941   EmitUint8(0xFD);
1942   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1943 }
1944 
1945 
psubw(XmmRegister dst,XmmRegister src)1946 void X86_64Assembler::psubw(XmmRegister dst, XmmRegister src) {
1947   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1948   EmitUint8(0x66);
1949   EmitOptionalRex32(dst, src);
1950   EmitUint8(0x0F);
1951   EmitUint8(0xF9);
1952   EmitXmmRegisterOperand(dst.LowBits(), src);
1953 }
1954 
vpsubw(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1955 void X86_64Assembler::vpsubw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1956   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1957   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1958   bool is_twobyte_form = false;
1959   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1960   if (!add_right.NeedsRex()) {
1961     is_twobyte_form = true;
1962   }
1963   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1964   X86_64ManagedRegister vvvv_reg =
1965       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1966   if (is_twobyte_form) {
1967     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1968   } else {
1969     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1970                                    /*X=*/ false,
1971                                    add_right.NeedsRex(),
1972                                    SET_VEX_M_0F);
1973     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1974   }
1975   EmitUint8(ByteZero);
1976   EmitUint8(ByteOne);
1977   if (!is_twobyte_form) {
1978     EmitUint8(ByteTwo);
1979   }
1980   EmitUint8(0xF9);
1981   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1982 }
1983 
1984 
pmullw(XmmRegister dst,XmmRegister src)1985 void X86_64Assembler::pmullw(XmmRegister dst, XmmRegister src) {
1986   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1987   EmitUint8(0x66);
1988   EmitOptionalRex32(dst, src);
1989   EmitUint8(0x0F);
1990   EmitUint8(0xD5);
1991   EmitXmmRegisterOperand(dst.LowBits(), src);
1992 }
1993 
vpmullw(XmmRegister dst,XmmRegister src1,XmmRegister src2)1994 void X86_64Assembler::vpmullw(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1995   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1996   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1997   bool is_twobyte_form = false;
1998   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1999   if (!src2.NeedsRex()) {
2000     is_twobyte_form = true;
2001   }
2002   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2003   X86_64ManagedRegister vvvv_reg =
2004       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2005   if (is_twobyte_form) {
2006     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2007   } else {
2008     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2009                                    /*X=*/ false,
2010                                    src2.NeedsRex(),
2011                                    SET_VEX_M_0F);
2012     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2013   }
2014   EmitUint8(ByteZero);
2015   EmitUint8(ByteOne);
2016   if (!is_twobyte_form) {
2017     EmitUint8(ByteTwo);
2018   }
2019   EmitUint8(0xD5);
2020   EmitXmmRegisterOperand(dst.LowBits(), src2);
2021 }
2022 
paddd(XmmRegister dst,XmmRegister src)2023 void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) {
2024   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2025   EmitUint8(0x66);
2026   EmitOptionalRex32(dst, src);
2027   EmitUint8(0x0F);
2028   EmitUint8(0xFE);
2029   EmitXmmRegisterOperand(dst.LowBits(), src);
2030 }
2031 
vpaddd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2032 void X86_64Assembler::vpaddd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2033   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2034   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2035   bool is_twobyte_form = false;
2036   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2037   if (!add_right.NeedsRex()) {
2038     is_twobyte_form = true;
2039   }
2040   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2041   X86_64ManagedRegister vvvv_reg =
2042       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2043   if (is_twobyte_form) {
2044     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2045   } else {
2046     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2047                                    /*X=*/ false,
2048                                    add_right.NeedsRex(),
2049                                    SET_VEX_M_0F);
2050     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2051   }
2052   EmitUint8(ByteZero);
2053   EmitUint8(ByteOne);
2054   if (!is_twobyte_form) {
2055     EmitUint8(ByteTwo);
2056   }
2057   EmitUint8(0xFE);
2058   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2059 }
2060 
psubd(XmmRegister dst,XmmRegister src)2061 void X86_64Assembler::psubd(XmmRegister dst, XmmRegister src) {
2062   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2063   EmitUint8(0x66);
2064   EmitOptionalRex32(dst, src);
2065   EmitUint8(0x0F);
2066   EmitUint8(0xFA);
2067   EmitXmmRegisterOperand(dst.LowBits(), src);
2068 }
2069 
2070 
pmulld(XmmRegister dst,XmmRegister src)2071 void X86_64Assembler::pmulld(XmmRegister dst, XmmRegister src) {
2072   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2073   EmitUint8(0x66);
2074   EmitOptionalRex32(dst, src);
2075   EmitUint8(0x0F);
2076   EmitUint8(0x38);
2077   EmitUint8(0x40);
2078   EmitXmmRegisterOperand(dst.LowBits(), src);
2079 }
2080 
vpmulld(XmmRegister dst,XmmRegister src1,XmmRegister src2)2081 void X86_64Assembler::vpmulld(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2082   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2083   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2084   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2085   ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form*/ false);
2086   X86_64ManagedRegister vvvv_reg =
2087       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2088   ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2089                                    /*X=*/ false,
2090                                    src2.NeedsRex(),
2091                                    SET_VEX_M_0F_38);
2092   ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2093   EmitUint8(ByteZero);
2094   EmitUint8(ByteOne);
2095   EmitUint8(ByteTwo);
2096   EmitUint8(0x40);
2097   EmitXmmRegisterOperand(dst.LowBits(), src2);
2098 }
2099 
paddq(XmmRegister dst,XmmRegister src)2100 void X86_64Assembler::paddq(XmmRegister dst, XmmRegister src) {
2101   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2102   EmitUint8(0x66);
2103   EmitOptionalRex32(dst, src);
2104   EmitUint8(0x0F);
2105   EmitUint8(0xD4);
2106   EmitXmmRegisterOperand(dst.LowBits(), src);
2107 }
2108 
2109 
vpaddq(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2110 void X86_64Assembler::vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2111   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2112   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2113   bool is_twobyte_form = false;
2114   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2115   if (!add_right.NeedsRex()) {
2116     is_twobyte_form = true;
2117   }
2118   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2119   X86_64ManagedRegister vvvv_reg =
2120       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2121   if (is_twobyte_form) {
2122     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2123   } else {
2124     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2125                                    /*X=*/ false,
2126                                    add_right.NeedsRex(),
2127                                    SET_VEX_M_0F);
2128     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2129   }
2130   EmitUint8(ByteZero);
2131   EmitUint8(ByteOne);
2132   if (!is_twobyte_form) {
2133     EmitUint8(ByteTwo);
2134   }
2135   EmitUint8(0xD4);
2136   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2137 }
2138 
2139 
psubq(XmmRegister dst,XmmRegister src)2140 void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) {
2141   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2142   EmitUint8(0x66);
2143   EmitOptionalRex32(dst, src);
2144   EmitUint8(0x0F);
2145   EmitUint8(0xFB);
2146   EmitXmmRegisterOperand(dst.LowBits(), src);
2147 }
2148 
vpsubq(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2149 void X86_64Assembler::vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2150   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2151   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2152   bool is_twobyte_form = false;
2153   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2154   if (!add_right.NeedsRex()) {
2155     is_twobyte_form = true;
2156   }
2157   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2158   X86_64ManagedRegister vvvv_reg =
2159       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2160   if (is_twobyte_form) {
2161     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2162   } else {
2163     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2164                                    /*X=*/ false,
2165                                    add_right.NeedsRex(),
2166                                    SET_VEX_M_0F);
2167     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2168   }
2169   EmitUint8(ByteZero);
2170   EmitUint8(ByteOne);
2171   if (!is_twobyte_form) {
2172     EmitUint8(ByteTwo);
2173   }
2174   EmitUint8(0xFB);
2175   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2176 }
2177 
2178 
paddusb(XmmRegister dst,XmmRegister src)2179 void X86_64Assembler::paddusb(XmmRegister dst, XmmRegister src) {
2180   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2181   EmitUint8(0x66);
2182   EmitOptionalRex32(dst, src);
2183   EmitUint8(0x0F);
2184   EmitUint8(0xDC);
2185   EmitXmmRegisterOperand(dst.LowBits(), src);
2186 }
2187 
2188 
paddsb(XmmRegister dst,XmmRegister src)2189 void X86_64Assembler::paddsb(XmmRegister dst, XmmRegister src) {
2190   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2191   EmitUint8(0x66);
2192   EmitOptionalRex32(dst, src);
2193   EmitUint8(0x0F);
2194   EmitUint8(0xEC);
2195   EmitXmmRegisterOperand(dst.LowBits(), src);
2196 }
2197 
2198 
paddusw(XmmRegister dst,XmmRegister src)2199 void X86_64Assembler::paddusw(XmmRegister dst, XmmRegister src) {
2200   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2201   EmitUint8(0x66);
2202   EmitOptionalRex32(dst, src);
2203   EmitUint8(0x0F);
2204   EmitUint8(0xDD);
2205   EmitXmmRegisterOperand(dst.LowBits(), src);
2206 }
2207 
2208 
paddsw(XmmRegister dst,XmmRegister src)2209 void X86_64Assembler::paddsw(XmmRegister dst, XmmRegister src) {
2210   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2211   EmitUint8(0x66);
2212   EmitOptionalRex32(dst, src);
2213   EmitUint8(0x0F);
2214   EmitUint8(0xED);
2215   EmitXmmRegisterOperand(dst.LowBits(), src);
2216 }
2217 
2218 
psubusb(XmmRegister dst,XmmRegister src)2219 void X86_64Assembler::psubusb(XmmRegister dst, XmmRegister src) {
2220   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2221   EmitUint8(0x66);
2222   EmitOptionalRex32(dst, src);
2223   EmitUint8(0x0F);
2224   EmitUint8(0xD8);
2225   EmitXmmRegisterOperand(dst.LowBits(), src);
2226 }
2227 
2228 
psubsb(XmmRegister dst,XmmRegister src)2229 void X86_64Assembler::psubsb(XmmRegister dst, XmmRegister src) {
2230   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2231   EmitUint8(0x66);
2232   EmitOptionalRex32(dst, src);
2233   EmitUint8(0x0F);
2234   EmitUint8(0xE8);
2235   EmitXmmRegisterOperand(dst.LowBits(), src);
2236 }
2237 
2238 
vpsubd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2239 void X86_64Assembler::vpsubd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2240   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2241   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2242   bool is_twobyte_form = false;
2243   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2244   if (!add_right.NeedsRex()) {
2245     is_twobyte_form = true;
2246   }
2247   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2248   X86_64ManagedRegister vvvv_reg =
2249       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2250   if (is_twobyte_form) {
2251     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2252   } else {
2253     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2254                                    /*X=*/ false,
2255                                    add_right.NeedsRex(),
2256                                    SET_VEX_M_0F);
2257     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2258   }
2259   EmitUint8(ByteZero);
2260   EmitUint8(ByteOne);
2261   if (!is_twobyte_form) {
2262     EmitUint8(ByteTwo);
2263   }
2264   EmitUint8(0xFA);
2265   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2266 }
2267 
2268 
psubusw(XmmRegister dst,XmmRegister src)2269 void X86_64Assembler::psubusw(XmmRegister dst, XmmRegister src) {
2270   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2271   EmitUint8(0x66);
2272   EmitOptionalRex32(dst, src);
2273   EmitUint8(0x0F);
2274   EmitUint8(0xD9);
2275   EmitXmmRegisterOperand(dst.LowBits(), src);
2276 }
2277 
2278 
psubsw(XmmRegister dst,XmmRegister src)2279 void X86_64Assembler::psubsw(XmmRegister dst, XmmRegister src) {
2280   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2281   EmitUint8(0x66);
2282   EmitOptionalRex32(dst, src);
2283   EmitUint8(0x0F);
2284   EmitUint8(0xE9);
2285   EmitXmmRegisterOperand(dst.LowBits(), src);
2286 }
2287 
2288 
cvtsi2ss(XmmRegister dst,CpuRegister src)2289 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) {
2290   cvtsi2ss(dst, src, false);
2291 }
2292 
2293 
cvtsi2ss(XmmRegister dst,CpuRegister src,bool is64bit)2294 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit) {
2295   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2296   EmitUint8(0xF3);
2297   if (is64bit) {
2298     // Emit a REX.W prefix if the operand size is 64 bits.
2299     EmitRex64(dst, src);
2300   } else {
2301     EmitOptionalRex32(dst, src);
2302   }
2303   EmitUint8(0x0F);
2304   EmitUint8(0x2A);
2305   EmitOperand(dst.LowBits(), Operand(src));
2306 }
2307 
2308 
cvtsi2ss(XmmRegister dst,const Address & src,bool is64bit)2309 void X86_64Assembler::cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit) {
2310   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2311   EmitUint8(0xF3);
2312   if (is64bit) {
2313     // Emit a REX.W prefix if the operand size is 64 bits.
2314     EmitRex64(dst, src);
2315   } else {
2316     EmitOptionalRex32(dst, src);
2317   }
2318   EmitUint8(0x0F);
2319   EmitUint8(0x2A);
2320   EmitOperand(dst.LowBits(), src);
2321 }
2322 
2323 
cvtsi2sd(XmmRegister dst,CpuRegister src)2324 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src) {
2325   cvtsi2sd(dst, src, false);
2326 }
2327 
2328 
cvtsi2sd(XmmRegister dst,CpuRegister src,bool is64bit)2329 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit) {
2330   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2331   EmitUint8(0xF2);
2332   if (is64bit) {
2333     // Emit a REX.W prefix if the operand size is 64 bits.
2334     EmitRex64(dst, src);
2335   } else {
2336     EmitOptionalRex32(dst, src);
2337   }
2338   EmitUint8(0x0F);
2339   EmitUint8(0x2A);
2340   EmitOperand(dst.LowBits(), Operand(src));
2341 }
2342 
2343 
cvtsi2sd(XmmRegister dst,const Address & src,bool is64bit)2344 void X86_64Assembler::cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit) {
2345   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2346   EmitUint8(0xF2);
2347   if (is64bit) {
2348     // Emit a REX.W prefix if the operand size is 64 bits.
2349     EmitRex64(dst, src);
2350   } else {
2351     EmitOptionalRex32(dst, src);
2352   }
2353   EmitUint8(0x0F);
2354   EmitUint8(0x2A);
2355   EmitOperand(dst.LowBits(), src);
2356 }
2357 
2358 
cvtss2si(CpuRegister dst,XmmRegister src)2359 void X86_64Assembler::cvtss2si(CpuRegister dst, XmmRegister src) {
2360   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2361   EmitUint8(0xF3);
2362   EmitOptionalRex32(dst, src);
2363   EmitUint8(0x0F);
2364   EmitUint8(0x2D);
2365   EmitXmmRegisterOperand(dst.LowBits(), src);
2366 }
2367 
2368 
cvtss2sd(XmmRegister dst,XmmRegister src)2369 void X86_64Assembler::cvtss2sd(XmmRegister dst, XmmRegister src) {
2370   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2371   EmitUint8(0xF3);
2372   EmitOptionalRex32(dst, src);
2373   EmitUint8(0x0F);
2374   EmitUint8(0x5A);
2375   EmitXmmRegisterOperand(dst.LowBits(), src);
2376 }
2377 
2378 
cvtss2sd(XmmRegister dst,const Address & src)2379 void X86_64Assembler::cvtss2sd(XmmRegister dst, const Address& src) {
2380   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2381   EmitUint8(0xF3);
2382   EmitOptionalRex32(dst, src);
2383   EmitUint8(0x0F);
2384   EmitUint8(0x5A);
2385   EmitOperand(dst.LowBits(), src);
2386 }
2387 
2388 
cvtsd2si(CpuRegister dst,XmmRegister src)2389 void X86_64Assembler::cvtsd2si(CpuRegister dst, XmmRegister src) {
2390   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2391   EmitUint8(0xF2);
2392   EmitOptionalRex32(dst, src);
2393   EmitUint8(0x0F);
2394   EmitUint8(0x2D);
2395   EmitXmmRegisterOperand(dst.LowBits(), src);
2396 }
2397 
2398 
cvttss2si(CpuRegister dst,XmmRegister src)2399 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src) {
2400   cvttss2si(dst, src, false);
2401 }
2402 
2403 
cvttss2si(CpuRegister dst,XmmRegister src,bool is64bit)2404 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit) {
2405   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2406   EmitUint8(0xF3);
2407   if (is64bit) {
2408     // Emit a REX.W prefix if the operand size is 64 bits.
2409     EmitRex64(dst, src);
2410   } else {
2411     EmitOptionalRex32(dst, src);
2412   }
2413   EmitUint8(0x0F);
2414   EmitUint8(0x2C);
2415   EmitXmmRegisterOperand(dst.LowBits(), src);
2416 }
2417 
2418 
cvttsd2si(CpuRegister dst,XmmRegister src)2419 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src) {
2420   cvttsd2si(dst, src, false);
2421 }
2422 
2423 
cvttsd2si(CpuRegister dst,XmmRegister src,bool is64bit)2424 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit) {
2425   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2426   EmitUint8(0xF2);
2427   if (is64bit) {
2428     // Emit a REX.W prefix if the operand size is 64 bits.
2429     EmitRex64(dst, src);
2430   } else {
2431     EmitOptionalRex32(dst, src);
2432   }
2433   EmitUint8(0x0F);
2434   EmitUint8(0x2C);
2435   EmitXmmRegisterOperand(dst.LowBits(), src);
2436 }
2437 
2438 
cvtsd2ss(XmmRegister dst,XmmRegister src)2439 void X86_64Assembler::cvtsd2ss(XmmRegister dst, XmmRegister src) {
2440   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2441   EmitUint8(0xF2);
2442   EmitOptionalRex32(dst, src);
2443   EmitUint8(0x0F);
2444   EmitUint8(0x5A);
2445   EmitXmmRegisterOperand(dst.LowBits(), src);
2446 }
2447 
2448 
cvtsd2ss(XmmRegister dst,const Address & src)2449 void X86_64Assembler::cvtsd2ss(XmmRegister dst, const Address& src) {
2450   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2451   EmitUint8(0xF2);
2452   EmitOptionalRex32(dst, src);
2453   EmitUint8(0x0F);
2454   EmitUint8(0x5A);
2455   EmitOperand(dst.LowBits(), src);
2456 }
2457 
2458 
cvtdq2ps(XmmRegister dst,XmmRegister src)2459 void X86_64Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) {
2460   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2461   EmitOptionalRex32(dst, src);
2462   EmitUint8(0x0F);
2463   EmitUint8(0x5B);
2464   EmitXmmRegisterOperand(dst.LowBits(), src);
2465 }
2466 
2467 
cvtdq2pd(XmmRegister dst,XmmRegister src)2468 void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
2469   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2470   EmitUint8(0xF3);
2471   EmitOptionalRex32(dst, src);
2472   EmitUint8(0x0F);
2473   EmitUint8(0xE6);
2474   EmitXmmRegisterOperand(dst.LowBits(), src);
2475 }
2476 
2477 
comiss(XmmRegister a,XmmRegister b)2478 void X86_64Assembler::comiss(XmmRegister a, XmmRegister b) {
2479   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2480   EmitOptionalRex32(a, b);
2481   EmitUint8(0x0F);
2482   EmitUint8(0x2F);
2483   EmitXmmRegisterOperand(a.LowBits(), b);
2484 }
2485 
2486 
comiss(XmmRegister a,const Address & b)2487 void X86_64Assembler::comiss(XmmRegister a, const Address& b) {
2488   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2489   EmitOptionalRex32(a, b);
2490   EmitUint8(0x0F);
2491   EmitUint8(0x2F);
2492   EmitOperand(a.LowBits(), b);
2493 }
2494 
2495 
comisd(XmmRegister a,XmmRegister b)2496 void X86_64Assembler::comisd(XmmRegister a, XmmRegister b) {
2497   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2498   EmitUint8(0x66);
2499   EmitOptionalRex32(a, b);
2500   EmitUint8(0x0F);
2501   EmitUint8(0x2F);
2502   EmitXmmRegisterOperand(a.LowBits(), b);
2503 }
2504 
2505 
comisd(XmmRegister a,const Address & b)2506 void X86_64Assembler::comisd(XmmRegister a, const Address& b) {
2507   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2508   EmitUint8(0x66);
2509   EmitOptionalRex32(a, b);
2510   EmitUint8(0x0F);
2511   EmitUint8(0x2F);
2512   EmitOperand(a.LowBits(), b);
2513 }
2514 
2515 
ucomiss(XmmRegister a,XmmRegister b)2516 void X86_64Assembler::ucomiss(XmmRegister a, XmmRegister b) {
2517   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2518   EmitOptionalRex32(a, b);
2519   EmitUint8(0x0F);
2520   EmitUint8(0x2E);
2521   EmitXmmRegisterOperand(a.LowBits(), b);
2522 }
2523 
2524 
ucomiss(XmmRegister a,const Address & b)2525 void X86_64Assembler::ucomiss(XmmRegister a, const Address& b) {
2526   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2527   EmitOptionalRex32(a, b);
2528   EmitUint8(0x0F);
2529   EmitUint8(0x2E);
2530   EmitOperand(a.LowBits(), b);
2531 }
2532 
2533 
ucomisd(XmmRegister a,XmmRegister b)2534 void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) {
2535   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2536   EmitUint8(0x66);
2537   EmitOptionalRex32(a, b);
2538   EmitUint8(0x0F);
2539   EmitUint8(0x2E);
2540   EmitXmmRegisterOperand(a.LowBits(), b);
2541 }
2542 
2543 
ucomisd(XmmRegister a,const Address & b)2544 void X86_64Assembler::ucomisd(XmmRegister a, const Address& b) {
2545   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2546   EmitUint8(0x66);
2547   EmitOptionalRex32(a, b);
2548   EmitUint8(0x0F);
2549   EmitUint8(0x2E);
2550   EmitOperand(a.LowBits(), b);
2551 }
2552 
2553 
roundsd(XmmRegister dst,XmmRegister src,const Immediate & imm)2554 void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2555   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2556   EmitUint8(0x66);
2557   EmitOptionalRex32(dst, src);
2558   EmitUint8(0x0F);
2559   EmitUint8(0x3A);
2560   EmitUint8(0x0B);
2561   EmitXmmRegisterOperand(dst.LowBits(), src);
2562   EmitUint8(imm.value());
2563 }
2564 
2565 
roundss(XmmRegister dst,XmmRegister src,const Immediate & imm)2566 void X86_64Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2567   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2568   EmitUint8(0x66);
2569   EmitOptionalRex32(dst, src);
2570   EmitUint8(0x0F);
2571   EmitUint8(0x3A);
2572   EmitUint8(0x0A);
2573   EmitXmmRegisterOperand(dst.LowBits(), src);
2574   EmitUint8(imm.value());
2575 }
2576 
2577 
sqrtsd(XmmRegister dst,XmmRegister src)2578 void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
2579   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2580   EmitUint8(0xF2);
2581   EmitOptionalRex32(dst, src);
2582   EmitUint8(0x0F);
2583   EmitUint8(0x51);
2584   EmitXmmRegisterOperand(dst.LowBits(), src);
2585 }
2586 
2587 
sqrtss(XmmRegister dst,XmmRegister src)2588 void X86_64Assembler::sqrtss(XmmRegister dst, XmmRegister src) {
2589   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2590   EmitUint8(0xF3);
2591   EmitOptionalRex32(dst, src);
2592   EmitUint8(0x0F);
2593   EmitUint8(0x51);
2594   EmitXmmRegisterOperand(dst.LowBits(), src);
2595 }
2596 
2597 
xorpd(XmmRegister dst,const Address & src)2598 void X86_64Assembler::xorpd(XmmRegister dst, const Address& src) {
2599   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2600   EmitUint8(0x66);
2601   EmitOptionalRex32(dst, src);
2602   EmitUint8(0x0F);
2603   EmitUint8(0x57);
2604   EmitOperand(dst.LowBits(), src);
2605 }
2606 
2607 
xorpd(XmmRegister dst,XmmRegister src)2608 void X86_64Assembler::xorpd(XmmRegister dst, XmmRegister src) {
2609   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2610   EmitUint8(0x66);
2611   EmitOptionalRex32(dst, src);
2612   EmitUint8(0x0F);
2613   EmitUint8(0x57);
2614   EmitXmmRegisterOperand(dst.LowBits(), src);
2615 }
2616 
2617 
xorps(XmmRegister dst,const Address & src)2618 void X86_64Assembler::xorps(XmmRegister dst, const Address& src) {
2619   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2620   EmitOptionalRex32(dst, src);
2621   EmitUint8(0x0F);
2622   EmitUint8(0x57);
2623   EmitOperand(dst.LowBits(), src);
2624 }
2625 
2626 
xorps(XmmRegister dst,XmmRegister src)2627 void X86_64Assembler::xorps(XmmRegister dst, XmmRegister src) {
2628   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2629   EmitOptionalRex32(dst, src);
2630   EmitUint8(0x0F);
2631   EmitUint8(0x57);
2632   EmitXmmRegisterOperand(dst.LowBits(), src);
2633 }
2634 
pxor(XmmRegister dst,XmmRegister src)2635 void X86_64Assembler::pxor(XmmRegister dst, XmmRegister src) {
2636   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2637   EmitUint8(0x66);
2638   EmitOptionalRex32(dst, src);
2639   EmitUint8(0x0F);
2640   EmitUint8(0xEF);
2641   EmitXmmRegisterOperand(dst.LowBits(), src);
2642 }
2643 
2644 /* VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 */
vpxor(XmmRegister dst,XmmRegister src1,XmmRegister src2)2645 void X86_64Assembler::vpxor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2646   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2647   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2648   bool is_twobyte_form = false;
2649   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2650   if (!src2.NeedsRex()) {
2651     is_twobyte_form = true;
2652   }
2653   X86_64ManagedRegister vvvv_reg =
2654       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2655   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2656   if (is_twobyte_form) {
2657     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2658   } else {
2659     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2660                                    /*X=*/ false,
2661                                    src2.NeedsRex(),
2662                                    SET_VEX_M_0F);
2663     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2664   }
2665   EmitUint8(ByteZero);
2666   EmitUint8(ByteOne);
2667   if (!is_twobyte_form) {
2668     EmitUint8(ByteTwo);
2669   }
2670   EmitUint8(0xEF);
2671   EmitXmmRegisterOperand(dst.LowBits(), src2);
2672 }
2673 
2674 /* VEX.128.0F.WIG 57 /r VXORPS xmm1,xmm2, xmm3/m128 */
vxorps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2675 void X86_64Assembler::vxorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2676   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2677   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2678   bool is_twobyte_form = false;
2679   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2680   if (!src2.NeedsRex()) {
2681     is_twobyte_form = true;
2682   }
2683   X86_64ManagedRegister vvvv_reg =
2684       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2685   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2686   if (is_twobyte_form) {
2687     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2688   } else {
2689     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2690                                    /*X=*/ false,
2691                                    src2.NeedsRex(),
2692                                    SET_VEX_M_0F);
2693     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2694   }
2695   EmitUint8(ByteZero);
2696   EmitUint8(ByteOne);
2697   if (!is_twobyte_form) {
2698     EmitUint8(ByteTwo);
2699   }
2700   EmitUint8(0x57);
2701   EmitXmmRegisterOperand(dst.LowBits(), src2);
2702 }
2703 
2704 /* VEX.128.66.0F.WIG 57 /r VXORPD xmm1,xmm2, xmm3/m128 */
vxorpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2705 void X86_64Assembler::vxorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2706   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2707   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2708   bool is_twobyte_form = false;
2709   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2710   if (!src2.NeedsRex()) {
2711     is_twobyte_form = true;
2712   }
2713   X86_64ManagedRegister vvvv_reg =
2714       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2715   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2716   if (is_twobyte_form) {
2717     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2718   } else {
2719     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2720                                    /*X=*/ false,
2721                                    src2.NeedsRex(),
2722                                    SET_VEX_M_0F);
2723     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2724   }
2725   EmitUint8(ByteZero);
2726   EmitUint8(ByteOne);
2727   if (!is_twobyte_form) {
2728     EmitUint8(ByteTwo);
2729   }
2730   EmitUint8(0x57);
2731   EmitXmmRegisterOperand(dst.LowBits(), src2);
2732 }
2733 
andpd(XmmRegister dst,const Address & src)2734 void X86_64Assembler::andpd(XmmRegister dst, const Address& src) {
2735   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2736   EmitUint8(0x66);
2737   EmitOptionalRex32(dst, src);
2738   EmitUint8(0x0F);
2739   EmitUint8(0x54);
2740   EmitOperand(dst.LowBits(), src);
2741 }
2742 
andpd(XmmRegister dst,XmmRegister src)2743 void X86_64Assembler::andpd(XmmRegister dst, XmmRegister src) {
2744   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2745   EmitUint8(0x66);
2746   EmitOptionalRex32(dst, src);
2747   EmitUint8(0x0F);
2748   EmitUint8(0x54);
2749   EmitXmmRegisterOperand(dst.LowBits(), src);
2750 }
2751 
andps(XmmRegister dst,XmmRegister src)2752 void X86_64Assembler::andps(XmmRegister dst, XmmRegister src) {
2753   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2754   EmitOptionalRex32(dst, src);
2755   EmitUint8(0x0F);
2756   EmitUint8(0x54);
2757   EmitXmmRegisterOperand(dst.LowBits(), src);
2758 }
2759 
pand(XmmRegister dst,XmmRegister src)2760 void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) {
2761   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2762   EmitUint8(0x66);
2763   EmitOptionalRex32(dst, src);
2764   EmitUint8(0x0F);
2765   EmitUint8(0xDB);
2766   EmitXmmRegisterOperand(dst.LowBits(), src);
2767 }
2768 
2769 /* VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 */
vpand(XmmRegister dst,XmmRegister src1,XmmRegister src2)2770 void X86_64Assembler::vpand(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2771   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2772   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2773   bool is_twobyte_form = false;
2774   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2775   if (!src2.NeedsRex()) {
2776     is_twobyte_form = true;
2777   }
2778   X86_64ManagedRegister vvvv_reg =
2779       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2780   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2781   if (is_twobyte_form) {
2782     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2783   } else {
2784     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2785                                    /*X=*/ false,
2786                                    src2.NeedsRex(),
2787                                    SET_VEX_M_0F);
2788     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2789   }
2790   EmitUint8(ByteZero);
2791   EmitUint8(ByteOne);
2792   if (!is_twobyte_form) {
2793     EmitUint8(ByteTwo);
2794   }
2795   EmitUint8(0xDB);
2796   EmitXmmRegisterOperand(dst.LowBits(), src2);
2797 }
2798 
2799 /* VEX.128.0F 54 /r VANDPS xmm1,xmm2, xmm3/m128 */
vandps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2800 void X86_64Assembler::vandps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2801   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2802   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2803   bool is_twobyte_form = false;
2804   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2805   if (!src2.NeedsRex()) {
2806     is_twobyte_form = true;
2807   }
2808   X86_64ManagedRegister vvvv_reg =
2809       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2810   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2811   if (is_twobyte_form) {
2812     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2813   } else {
2814     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2815                                    /*X=*/ false,
2816                                    src2.NeedsRex(),
2817                                    SET_VEX_M_0F);
2818     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2819   }
2820   EmitUint8(ByteZero);
2821   EmitUint8(ByteOne);
2822   if (!is_twobyte_form) {
2823     EmitUint8(ByteTwo);
2824   }
2825   EmitUint8(0x54);
2826   EmitXmmRegisterOperand(dst.LowBits(), src2);
2827 }
2828 
2829 /* VEX.128.66.0F 54 /r VANDPD xmm1, xmm2, xmm3/m128 */
vandpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2830 void X86_64Assembler::vandpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2831   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2832   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2833   bool is_twobyte_form = false;
2834   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2835   if (!src2.NeedsRex()) {
2836     is_twobyte_form = true;
2837   }
2838   X86_64ManagedRegister vvvv_reg =
2839       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2840   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2841   if (is_twobyte_form) {
2842     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2843   } else {
2844     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2845                                    /*X=*/ false,
2846                                    src2.NeedsRex(),
2847                                    SET_VEX_M_0F);
2848     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2849   }
2850   EmitUint8(ByteZero);
2851   EmitUint8(ByteOne);
2852   if (!is_twobyte_form) {
2853     EmitUint8(ByteTwo);
2854   }
2855   EmitUint8(0x54);
2856   EmitXmmRegisterOperand(dst.LowBits(), src2);
2857 }
2858 
andn(CpuRegister dst,CpuRegister src1,CpuRegister src2)2859 void X86_64Assembler::andn(CpuRegister dst, CpuRegister src1, CpuRegister src2) {
2860   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2861   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
2862   uint8_t byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
2863                                           /*X=*/ false,
2864                                           src2.NeedsRex(),
2865                                           SET_VEX_M_0F_38);
2866   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
2867                                           X86_64ManagedRegister::FromCpuRegister(src1.AsRegister()),
2868                                           SET_VEX_L_128,
2869                                           SET_VEX_PP_NONE);
2870   EmitUint8(byte_zero);
2871   EmitUint8(byte_one);
2872   EmitUint8(byte_two);
2873   // Opcode field
2874   EmitUint8(0xF2);
2875   EmitRegisterOperand(dst.LowBits(), src2.LowBits());
2876 }
2877 
andnpd(XmmRegister dst,XmmRegister src)2878 void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) {
2879   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2880   EmitUint8(0x66);
2881   EmitOptionalRex32(dst, src);
2882   EmitUint8(0x0F);
2883   EmitUint8(0x55);
2884   EmitXmmRegisterOperand(dst.LowBits(), src);
2885 }
2886 
andnps(XmmRegister dst,XmmRegister src)2887 void X86_64Assembler::andnps(XmmRegister dst, XmmRegister src) {
2888   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2889   EmitOptionalRex32(dst, src);
2890   EmitUint8(0x0F);
2891   EmitUint8(0x55);
2892   EmitXmmRegisterOperand(dst.LowBits(), src);
2893 }
2894 
pandn(XmmRegister dst,XmmRegister src)2895 void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) {
2896   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2897   EmitUint8(0x66);
2898   EmitOptionalRex32(dst, src);
2899   EmitUint8(0x0F);
2900   EmitUint8(0xDF);
2901   EmitXmmRegisterOperand(dst.LowBits(), src);
2902 }
2903 
2904 /* VEX.128.66.0F.WIG DF /r VPANDN xmm1, xmm2, xmm3/m128 */
vpandn(XmmRegister dst,XmmRegister src1,XmmRegister src2)2905 void X86_64Assembler::vpandn(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2906   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2907   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2908   bool is_twobyte_form = false;
2909   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2910   if (!src2.NeedsRex()) {
2911     is_twobyte_form = true;
2912   }
2913   X86_64ManagedRegister vvvv_reg =
2914       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2915   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2916   if (is_twobyte_form) {
2917     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2918   } else {
2919     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2920                                    /*X=*/ false,
2921                                    src2.NeedsRex(),
2922                                    SET_VEX_M_0F);
2923     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2924   }
2925   EmitUint8(ByteZero);
2926   EmitUint8(ByteOne);
2927   if (!is_twobyte_form) {
2928     EmitUint8(ByteTwo);
2929   }
2930   EmitUint8(0xDF);
2931   EmitXmmRegisterOperand(dst.LowBits(), src2);
2932 }
2933 
2934 /* VEX.128.0F 55 /r VANDNPS xmm1, xmm2, xmm3/m128 */
vandnps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2935 void X86_64Assembler::vandnps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2936   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2937   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2938   bool is_twobyte_form = false;
2939   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2940   if (!src2.NeedsRex()) {
2941     is_twobyte_form = true;
2942   }
2943   X86_64ManagedRegister vvvv_reg =
2944       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2945   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2946   if (is_twobyte_form) {
2947     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2948   } else {
2949     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2950                                    /*X=*/ false,
2951                                    src2.NeedsRex(),
2952                                    SET_VEX_M_0F);
2953     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2954   }
2955   EmitUint8(ByteZero);
2956   EmitUint8(ByteOne);
2957   if (!is_twobyte_form) {
2958     EmitUint8(ByteTwo);
2959   }
2960   EmitUint8(0x55);
2961   EmitXmmRegisterOperand(dst.LowBits(), src2);
2962 }
2963 
2964 /* VEX.128.66.0F 55 /r VANDNPD xmm1, xmm2, xmm3/m128 */
vandnpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2965 void X86_64Assembler::vandnpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2966   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2967   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2968   bool is_twobyte_form = false;
2969   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2970   if (!src2.NeedsRex()) {
2971     is_twobyte_form = true;
2972   }
2973   X86_64ManagedRegister vvvv_reg =
2974       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2975   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2976   if (is_twobyte_form) {
2977     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2978   } else {
2979     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2980                                    /*X=*/ false,
2981                                    src2.NeedsRex(),
2982                                    SET_VEX_M_0F);
2983     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2984   }
2985   EmitUint8(ByteZero);
2986   EmitUint8(ByteOne);
2987   if (!is_twobyte_form) {
2988     EmitUint8(ByteTwo);
2989   }
2990   EmitUint8(0x55);
2991   EmitXmmRegisterOperand(dst.LowBits(), src2);
2992 }
2993 
orpd(XmmRegister dst,XmmRegister src)2994 void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) {
2995   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2996   EmitUint8(0x66);
2997   EmitOptionalRex32(dst, src);
2998   EmitUint8(0x0F);
2999   EmitUint8(0x56);
3000   EmitXmmRegisterOperand(dst.LowBits(), src);
3001 }
3002 
orps(XmmRegister dst,XmmRegister src)3003 void X86_64Assembler::orps(XmmRegister dst, XmmRegister src) {
3004   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3005   EmitOptionalRex32(dst, src);
3006   EmitUint8(0x0F);
3007   EmitUint8(0x56);
3008   EmitXmmRegisterOperand(dst.LowBits(), src);
3009 }
3010 
por(XmmRegister dst,XmmRegister src)3011 void X86_64Assembler::por(XmmRegister dst, XmmRegister src) {
3012   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3013   EmitUint8(0x66);
3014   EmitOptionalRex32(dst, src);
3015   EmitUint8(0x0F);
3016   EmitUint8(0xEB);
3017   EmitXmmRegisterOperand(dst.LowBits(), src);
3018 }
3019 
3020 /* VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 */
vpor(XmmRegister dst,XmmRegister src1,XmmRegister src2)3021 void X86_64Assembler::vpor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3022   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3023   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3024   bool is_twobyte_form = false;
3025   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3026   if (!src2.NeedsRex()) {
3027     is_twobyte_form = true;
3028   }
3029   X86_64ManagedRegister vvvv_reg =
3030       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3031   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3032   if (is_twobyte_form) {
3033     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3034   } else {
3035     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3036                                    /*X=*/ false,
3037                                    src2.NeedsRex(),
3038                                    SET_VEX_M_0F);
3039     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3040   }
3041   EmitUint8(ByteZero);
3042   EmitUint8(ByteOne);
3043   if (!is_twobyte_form) {
3044     EmitUint8(ByteTwo);
3045   }
3046   EmitUint8(0xEB);
3047   EmitXmmRegisterOperand(dst.LowBits(), src2);
3048 }
3049 
3050 /* VEX.128.0F 56 /r VORPS xmm1,xmm2, xmm3/m128 */
vorps(XmmRegister dst,XmmRegister src1,XmmRegister src2)3051 void X86_64Assembler::vorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3052   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3053   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3054   bool is_twobyte_form = false;
3055   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3056   if (!src2.NeedsRex()) {
3057     is_twobyte_form = true;
3058   }
3059   X86_64ManagedRegister vvvv_reg =
3060       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3061   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3062   if (is_twobyte_form) {
3063     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3064   } else {
3065     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3066                                    /*X=*/ false,
3067                                    src2.NeedsRex(),
3068                                    SET_VEX_M_0F);
3069     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3070   }
3071   EmitUint8(ByteZero);
3072   EmitUint8(ByteOne);
3073   if (!is_twobyte_form) {
3074     EmitUint8(ByteTwo);
3075   }
3076   EmitUint8(0x56);
3077   EmitXmmRegisterOperand(dst.LowBits(), src2);
3078 }
3079 
3080 /* VEX.128.66.0F 56 /r VORPD xmm1,xmm2, xmm3/m128 */
vorpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)3081 void X86_64Assembler::vorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3082   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3083   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3084   bool is_twobyte_form = false;
3085   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3086   if (!src2.NeedsRex()) {
3087     is_twobyte_form = true;
3088   }
3089   X86_64ManagedRegister vvvv_reg =
3090       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3091   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3092   if (is_twobyte_form) {
3093     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3094   } else {
3095     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3096                                    /*X=*/ false,
3097                                    src2.NeedsRex(),
3098                                    SET_VEX_M_0F);
3099     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3100   }
3101   EmitUint8(ByteZero);
3102   EmitUint8(ByteOne);
3103   if (!is_twobyte_form) {
3104     EmitUint8(ByteTwo);
3105   }
3106   EmitUint8(0x56);
3107   EmitXmmRegisterOperand(dst.LowBits(), src2);
3108 }
3109 
pavgb(XmmRegister dst,XmmRegister src)3110 void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) {
3111   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3112   EmitUint8(0x66);
3113   EmitOptionalRex32(dst, src);
3114   EmitUint8(0x0F);
3115   EmitUint8(0xE0);
3116   EmitXmmRegisterOperand(dst.LowBits(), src);
3117 }
3118 
pavgw(XmmRegister dst,XmmRegister src)3119 void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) {
3120   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3121   EmitUint8(0x66);
3122   EmitOptionalRex32(dst, src);
3123   EmitUint8(0x0F);
3124   EmitUint8(0xE3);
3125   EmitXmmRegisterOperand(dst.LowBits(), src);
3126 }
3127 
psadbw(XmmRegister dst,XmmRegister src)3128 void X86_64Assembler::psadbw(XmmRegister dst, XmmRegister src) {
3129   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3130   EmitUint8(0x66);
3131   EmitOptionalRex32(dst, src);
3132   EmitUint8(0x0F);
3133   EmitUint8(0xF6);
3134   EmitXmmRegisterOperand(dst.LowBits(), src);
3135 }
3136 
pmaddwd(XmmRegister dst,XmmRegister src)3137 void X86_64Assembler::pmaddwd(XmmRegister dst, XmmRegister src) {
3138   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3139   EmitUint8(0x66);
3140   EmitOptionalRex32(dst, src);
3141   EmitUint8(0x0F);
3142   EmitUint8(0xF5);
3143   EmitXmmRegisterOperand(dst.LowBits(), src);
3144 }
3145 
vpmaddwd(XmmRegister dst,XmmRegister src1,XmmRegister src2)3146 void X86_64Assembler::vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3147   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3148   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3149   bool is_twobyte_form = false;
3150   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3151   if (!src2.NeedsRex()) {
3152     is_twobyte_form = true;
3153   }
3154   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3155   X86_64ManagedRegister vvvv_reg =
3156       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3157   if (is_twobyte_form) {
3158     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3159   } else {
3160     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3161                                    /*X=*/ false,
3162                                    src2.NeedsRex(),
3163                                    SET_VEX_M_0F);
3164     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3165   }
3166   EmitUint8(ByteZero);
3167   EmitUint8(ByteOne);
3168   if (!is_twobyte_form) {
3169     EmitUint8(ByteTwo);
3170   }
3171   EmitUint8(0xF5);
3172   EmitXmmRegisterOperand(dst.LowBits(), src2);
3173 }
3174 
phaddw(XmmRegister dst,XmmRegister src)3175 void X86_64Assembler::phaddw(XmmRegister dst, XmmRegister src) {
3176   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3177   EmitUint8(0x66);
3178   EmitOptionalRex32(dst, src);
3179   EmitUint8(0x0F);
3180   EmitUint8(0x38);
3181   EmitUint8(0x01);
3182   EmitXmmRegisterOperand(dst.LowBits(), src);
3183 }
3184 
phaddd(XmmRegister dst,XmmRegister src)3185 void X86_64Assembler::phaddd(XmmRegister dst, XmmRegister src) {
3186   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3187   EmitUint8(0x66);
3188   EmitOptionalRex32(dst, src);
3189   EmitUint8(0x0F);
3190   EmitUint8(0x38);
3191   EmitUint8(0x02);
3192   EmitXmmRegisterOperand(dst.LowBits(), src);
3193 }
3194 
haddps(XmmRegister dst,XmmRegister src)3195 void X86_64Assembler::haddps(XmmRegister dst, XmmRegister src) {
3196   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3197   EmitUint8(0xF2);
3198   EmitOptionalRex32(dst, src);
3199   EmitUint8(0x0F);
3200   EmitUint8(0x7C);
3201   EmitXmmRegisterOperand(dst.LowBits(), src);
3202 }
3203 
haddpd(XmmRegister dst,XmmRegister src)3204 void X86_64Assembler::haddpd(XmmRegister dst, XmmRegister src) {
3205   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3206   EmitUint8(0x66);
3207   EmitOptionalRex32(dst, src);
3208   EmitUint8(0x0F);
3209   EmitUint8(0x7C);
3210   EmitXmmRegisterOperand(dst.LowBits(), src);
3211 }
3212 
phsubw(XmmRegister dst,XmmRegister src)3213 void X86_64Assembler::phsubw(XmmRegister dst, XmmRegister src) {
3214   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3215   EmitUint8(0x66);
3216   EmitOptionalRex32(dst, src);
3217   EmitUint8(0x0F);
3218   EmitUint8(0x38);
3219   EmitUint8(0x05);
3220   EmitXmmRegisterOperand(dst.LowBits(), src);
3221 }
3222 
phsubd(XmmRegister dst,XmmRegister src)3223 void X86_64Assembler::phsubd(XmmRegister dst, XmmRegister src) {
3224   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3225   EmitUint8(0x66);
3226   EmitOptionalRex32(dst, src);
3227   EmitUint8(0x0F);
3228   EmitUint8(0x38);
3229   EmitUint8(0x06);
3230   EmitXmmRegisterOperand(dst.LowBits(), src);
3231 }
3232 
hsubps(XmmRegister dst,XmmRegister src)3233 void X86_64Assembler::hsubps(XmmRegister dst, XmmRegister src) {
3234   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3235   EmitUint8(0xF2);
3236   EmitOptionalRex32(dst, src);
3237   EmitUint8(0x0F);
3238   EmitUint8(0x7D);
3239   EmitXmmRegisterOperand(dst.LowBits(), src);
3240 }
3241 
hsubpd(XmmRegister dst,XmmRegister src)3242 void X86_64Assembler::hsubpd(XmmRegister dst, XmmRegister src) {
3243   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3244   EmitUint8(0x66);
3245   EmitOptionalRex32(dst, src);
3246   EmitUint8(0x0F);
3247   EmitUint8(0x7D);
3248   EmitXmmRegisterOperand(dst.LowBits(), src);
3249 }
3250 
pminsb(XmmRegister dst,XmmRegister src)3251 void X86_64Assembler::pminsb(XmmRegister dst, XmmRegister src) {
3252   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3253   EmitUint8(0x66);
3254   EmitOptionalRex32(dst, src);
3255   EmitUint8(0x0F);
3256   EmitUint8(0x38);
3257   EmitUint8(0x38);
3258   EmitXmmRegisterOperand(dst.LowBits(), src);
3259 }
3260 
pmaxsb(XmmRegister dst,XmmRegister src)3261 void X86_64Assembler::pmaxsb(XmmRegister dst, XmmRegister src) {
3262   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3263   EmitUint8(0x66);
3264   EmitOptionalRex32(dst, src);
3265   EmitUint8(0x0F);
3266   EmitUint8(0x38);
3267   EmitUint8(0x3C);
3268   EmitXmmRegisterOperand(dst.LowBits(), src);
3269 }
3270 
pminsw(XmmRegister dst,XmmRegister src)3271 void X86_64Assembler::pminsw(XmmRegister dst, XmmRegister src) {
3272   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3273   EmitUint8(0x66);
3274   EmitOptionalRex32(dst, src);
3275   EmitUint8(0x0F);
3276   EmitUint8(0xEA);
3277   EmitXmmRegisterOperand(dst.LowBits(), src);
3278 }
3279 
pmaxsw(XmmRegister dst,XmmRegister src)3280 void X86_64Assembler::pmaxsw(XmmRegister dst, XmmRegister src) {
3281   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3282   EmitUint8(0x66);
3283   EmitOptionalRex32(dst, src);
3284   EmitUint8(0x0F);
3285   EmitUint8(0xEE);
3286   EmitXmmRegisterOperand(dst.LowBits(), src);
3287 }
3288 
pminsd(XmmRegister dst,XmmRegister src)3289 void X86_64Assembler::pminsd(XmmRegister dst, XmmRegister src) {
3290   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3291   EmitUint8(0x66);
3292   EmitOptionalRex32(dst, src);
3293   EmitUint8(0x0F);
3294   EmitUint8(0x38);
3295   EmitUint8(0x39);
3296   EmitXmmRegisterOperand(dst.LowBits(), src);
3297 }
3298 
pmaxsd(XmmRegister dst,XmmRegister src)3299 void X86_64Assembler::pmaxsd(XmmRegister dst, XmmRegister src) {
3300   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3301   EmitUint8(0x66);
3302   EmitOptionalRex32(dst, src);
3303   EmitUint8(0x0F);
3304   EmitUint8(0x38);
3305   EmitUint8(0x3D);
3306   EmitXmmRegisterOperand(dst.LowBits(), src);
3307 }
3308 
pminub(XmmRegister dst,XmmRegister src)3309 void X86_64Assembler::pminub(XmmRegister dst, XmmRegister src) {
3310   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3311   EmitUint8(0x66);
3312   EmitOptionalRex32(dst, src);
3313   EmitUint8(0x0F);
3314   EmitUint8(0xDA);
3315   EmitXmmRegisterOperand(dst.LowBits(), src);
3316 }
3317 
pmaxub(XmmRegister dst,XmmRegister src)3318 void X86_64Assembler::pmaxub(XmmRegister dst, XmmRegister src) {
3319   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3320   EmitUint8(0x66);
3321   EmitOptionalRex32(dst, src);
3322   EmitUint8(0x0F);
3323   EmitUint8(0xDE);
3324   EmitXmmRegisterOperand(dst.LowBits(), src);
3325 }
3326 
pminuw(XmmRegister dst,XmmRegister src)3327 void X86_64Assembler::pminuw(XmmRegister dst, XmmRegister src) {
3328   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3329   EmitUint8(0x66);
3330   EmitOptionalRex32(dst, src);
3331   EmitUint8(0x0F);
3332   EmitUint8(0x38);
3333   EmitUint8(0x3A);
3334   EmitXmmRegisterOperand(dst.LowBits(), src);
3335 }
3336 
pmaxuw(XmmRegister dst,XmmRegister src)3337 void X86_64Assembler::pmaxuw(XmmRegister dst, XmmRegister src) {
3338   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3339   EmitUint8(0x66);
3340   EmitOptionalRex32(dst, src);
3341   EmitUint8(0x0F);
3342   EmitUint8(0x38);
3343   EmitUint8(0x3E);
3344   EmitXmmRegisterOperand(dst.LowBits(), src);
3345 }
3346 
pminud(XmmRegister dst,XmmRegister src)3347 void X86_64Assembler::pminud(XmmRegister dst, XmmRegister src) {
3348   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3349   EmitUint8(0x66);
3350   EmitOptionalRex32(dst, src);
3351   EmitUint8(0x0F);
3352   EmitUint8(0x38);
3353   EmitUint8(0x3B);
3354   EmitXmmRegisterOperand(dst.LowBits(), src);
3355 }
3356 
pmaxud(XmmRegister dst,XmmRegister src)3357 void X86_64Assembler::pmaxud(XmmRegister dst, XmmRegister src) {
3358   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3359   EmitUint8(0x66);
3360   EmitOptionalRex32(dst, src);
3361   EmitUint8(0x0F);
3362   EmitUint8(0x38);
3363   EmitUint8(0x3F);
3364   EmitXmmRegisterOperand(dst.LowBits(), src);
3365 }
3366 
minps(XmmRegister dst,XmmRegister src)3367 void X86_64Assembler::minps(XmmRegister dst, XmmRegister src) {
3368   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3369   EmitOptionalRex32(dst, src);
3370   EmitUint8(0x0F);
3371   EmitUint8(0x5D);
3372   EmitXmmRegisterOperand(dst.LowBits(), src);
3373 }
3374 
maxps(XmmRegister dst,XmmRegister src)3375 void X86_64Assembler::maxps(XmmRegister dst, XmmRegister src) {
3376   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3377   EmitOptionalRex32(dst, src);
3378   EmitUint8(0x0F);
3379   EmitUint8(0x5F);
3380   EmitXmmRegisterOperand(dst.LowBits(), src);
3381 }
3382 
minpd(XmmRegister dst,XmmRegister src)3383 void X86_64Assembler::minpd(XmmRegister dst, XmmRegister src) {
3384   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3385   EmitUint8(0x66);
3386   EmitOptionalRex32(dst, src);
3387   EmitUint8(0x0F);
3388   EmitUint8(0x5D);
3389   EmitXmmRegisterOperand(dst.LowBits(), src);
3390 }
3391 
maxpd(XmmRegister dst,XmmRegister src)3392 void X86_64Assembler::maxpd(XmmRegister dst, XmmRegister src) {
3393   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3394   EmitUint8(0x66);
3395   EmitOptionalRex32(dst, src);
3396   EmitUint8(0x0F);
3397   EmitUint8(0x5F);
3398   EmitXmmRegisterOperand(dst.LowBits(), src);
3399 }
3400 
pcmpeqb(XmmRegister dst,XmmRegister src)3401 void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
3402   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3403   EmitUint8(0x66);
3404   EmitOptionalRex32(dst, src);
3405   EmitUint8(0x0F);
3406   EmitUint8(0x74);
3407   EmitXmmRegisterOperand(dst.LowBits(), src);
3408 }
3409 
pcmpeqw(XmmRegister dst,XmmRegister src)3410 void X86_64Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) {
3411   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3412   EmitUint8(0x66);
3413   EmitOptionalRex32(dst, src);
3414   EmitUint8(0x0F);
3415   EmitUint8(0x75);
3416   EmitXmmRegisterOperand(dst.LowBits(), src);
3417 }
3418 
pcmpeqd(XmmRegister dst,XmmRegister src)3419 void X86_64Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) {
3420   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3421   EmitUint8(0x66);
3422   EmitOptionalRex32(dst, src);
3423   EmitUint8(0x0F);
3424   EmitUint8(0x76);
3425   EmitXmmRegisterOperand(dst.LowBits(), src);
3426 }
3427 
pcmpeqq(XmmRegister dst,XmmRegister src)3428 void X86_64Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) {
3429   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3430   EmitUint8(0x66);
3431   EmitOptionalRex32(dst, src);
3432   EmitUint8(0x0F);
3433   EmitUint8(0x38);
3434   EmitUint8(0x29);
3435   EmitXmmRegisterOperand(dst.LowBits(), src);
3436 }
3437 
pcmpgtb(XmmRegister dst,XmmRegister src)3438 void X86_64Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) {
3439   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3440   EmitUint8(0x66);
3441   EmitOptionalRex32(dst, src);
3442   EmitUint8(0x0F);
3443   EmitUint8(0x64);
3444   EmitXmmRegisterOperand(dst.LowBits(), src);
3445 }
3446 
pcmpgtw(XmmRegister dst,XmmRegister src)3447 void X86_64Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) {
3448   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3449   EmitUint8(0x66);
3450   EmitOptionalRex32(dst, src);
3451   EmitUint8(0x0F);
3452   EmitUint8(0x65);
3453   EmitXmmRegisterOperand(dst.LowBits(), src);
3454 }
3455 
pcmpgtd(XmmRegister dst,XmmRegister src)3456 void X86_64Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) {
3457   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3458   EmitUint8(0x66);
3459   EmitOptionalRex32(dst, src);
3460   EmitUint8(0x0F);
3461   EmitUint8(0x66);
3462   EmitXmmRegisterOperand(dst.LowBits(), src);
3463 }
3464 
pcmpgtq(XmmRegister dst,XmmRegister src)3465 void X86_64Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) {
3466   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3467   EmitUint8(0x66);
3468   EmitOptionalRex32(dst, src);
3469   EmitUint8(0x0F);
3470   EmitUint8(0x38);
3471   EmitUint8(0x37);
3472   EmitXmmRegisterOperand(dst.LowBits(), src);
3473 }
3474 
shufpd(XmmRegister dst,XmmRegister src,const Immediate & imm)3475 void X86_64Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3476   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3477   EmitUint8(0x66);
3478   EmitOptionalRex32(dst, src);
3479   EmitUint8(0x0F);
3480   EmitUint8(0xC6);
3481   EmitXmmRegisterOperand(dst.LowBits(), src);
3482   EmitUint8(imm.value());
3483 }
3484 
3485 
shufps(XmmRegister dst,XmmRegister src,const Immediate & imm)3486 void X86_64Assembler::shufps(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3487   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3488   EmitOptionalRex32(dst, src);
3489   EmitUint8(0x0F);
3490   EmitUint8(0xC6);
3491   EmitXmmRegisterOperand(dst.LowBits(), src);
3492   EmitUint8(imm.value());
3493 }
3494 
3495 
pshufd(XmmRegister dst,XmmRegister src,const Immediate & imm)3496 void X86_64Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3497   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3498   EmitUint8(0x66);
3499   EmitOptionalRex32(dst, src);
3500   EmitUint8(0x0F);
3501   EmitUint8(0x70);
3502   EmitXmmRegisterOperand(dst.LowBits(), src);
3503   EmitUint8(imm.value());
3504 }
3505 
3506 
punpcklbw(XmmRegister dst,XmmRegister src)3507 void X86_64Assembler::punpcklbw(XmmRegister dst, XmmRegister src) {
3508   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3509   EmitUint8(0x66);
3510   EmitOptionalRex32(dst, src);
3511   EmitUint8(0x0F);
3512   EmitUint8(0x60);
3513   EmitXmmRegisterOperand(dst.LowBits(), src);
3514 }
3515 
3516 
punpcklwd(XmmRegister dst,XmmRegister src)3517 void X86_64Assembler::punpcklwd(XmmRegister dst, XmmRegister src) {
3518   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3519   EmitUint8(0x66);
3520   EmitOptionalRex32(dst, src);
3521   EmitUint8(0x0F);
3522   EmitUint8(0x61);
3523   EmitXmmRegisterOperand(dst.LowBits(), src);
3524 }
3525 
3526 
punpckldq(XmmRegister dst,XmmRegister src)3527 void X86_64Assembler::punpckldq(XmmRegister dst, XmmRegister src) {
3528   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3529   EmitUint8(0x66);
3530   EmitOptionalRex32(dst, src);
3531   EmitUint8(0x0F);
3532   EmitUint8(0x62);
3533   EmitXmmRegisterOperand(dst.LowBits(), src);
3534 }
3535 
3536 
punpcklqdq(XmmRegister dst,XmmRegister src)3537 void X86_64Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) {
3538   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3539   EmitUint8(0x66);
3540   EmitOptionalRex32(dst, src);
3541   EmitUint8(0x0F);
3542   EmitUint8(0x6C);
3543   EmitXmmRegisterOperand(dst.LowBits(), src);
3544 }
3545 
3546 
punpckhbw(XmmRegister dst,XmmRegister src)3547 void X86_64Assembler::punpckhbw(XmmRegister dst, XmmRegister src) {
3548   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3549   EmitUint8(0x66);
3550   EmitOptionalRex32(dst, src);
3551   EmitUint8(0x0F);
3552   EmitUint8(0x68);
3553   EmitXmmRegisterOperand(dst.LowBits(), src);
3554 }
3555 
3556 
punpckhwd(XmmRegister dst,XmmRegister src)3557 void X86_64Assembler::punpckhwd(XmmRegister dst, XmmRegister src) {
3558   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3559   EmitUint8(0x66);
3560   EmitOptionalRex32(dst, src);
3561   EmitUint8(0x0F);
3562   EmitUint8(0x69);
3563   EmitXmmRegisterOperand(dst.LowBits(), src);
3564 }
3565 
3566 
punpckhdq(XmmRegister dst,XmmRegister src)3567 void X86_64Assembler::punpckhdq(XmmRegister dst, XmmRegister src) {
3568   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3569   EmitUint8(0x66);
3570   EmitOptionalRex32(dst, src);
3571   EmitUint8(0x0F);
3572   EmitUint8(0x6A);
3573   EmitXmmRegisterOperand(dst.LowBits(), src);
3574 }
3575 
3576 
punpckhqdq(XmmRegister dst,XmmRegister src)3577 void X86_64Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) {
3578   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3579   EmitUint8(0x66);
3580   EmitOptionalRex32(dst, src);
3581   EmitUint8(0x0F);
3582   EmitUint8(0x6D);
3583   EmitXmmRegisterOperand(dst.LowBits(), src);
3584 }
3585 
3586 
psllw(XmmRegister reg,const Immediate & shift_count)3587 void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
3588   DCHECK(shift_count.is_uint8());
3589   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3590   EmitUint8(0x66);
3591   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3592   EmitUint8(0x0F);
3593   EmitUint8(0x71);
3594   EmitXmmRegisterOperand(6, reg);
3595   EmitUint8(shift_count.value());
3596 }
3597 
3598 
pslld(XmmRegister reg,const Immediate & shift_count)3599 void X86_64Assembler::pslld(XmmRegister reg, const Immediate& shift_count) {
3600   DCHECK(shift_count.is_uint8());
3601   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3602   EmitUint8(0x66);
3603   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3604   EmitUint8(0x0F);
3605   EmitUint8(0x72);
3606   EmitXmmRegisterOperand(6, reg);
3607   EmitUint8(shift_count.value());
3608 }
3609 
3610 
psllq(XmmRegister reg,const Immediate & shift_count)3611 void X86_64Assembler::psllq(XmmRegister reg, const Immediate& shift_count) {
3612   DCHECK(shift_count.is_uint8());
3613   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3614   EmitUint8(0x66);
3615   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3616   EmitUint8(0x0F);
3617   EmitUint8(0x73);
3618   EmitXmmRegisterOperand(6, reg);
3619   EmitUint8(shift_count.value());
3620 }
3621 
3622 
psraw(XmmRegister reg,const Immediate & shift_count)3623 void X86_64Assembler::psraw(XmmRegister reg, const Immediate& shift_count) {
3624   DCHECK(shift_count.is_uint8());
3625   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3626   EmitUint8(0x66);
3627   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3628   EmitUint8(0x0F);
3629   EmitUint8(0x71);
3630   EmitXmmRegisterOperand(4, reg);
3631   EmitUint8(shift_count.value());
3632 }
3633 
3634 
psrad(XmmRegister reg,const Immediate & shift_count)3635 void X86_64Assembler::psrad(XmmRegister reg, const Immediate& shift_count) {
3636   DCHECK(shift_count.is_uint8());
3637   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3638   EmitUint8(0x66);
3639   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3640   EmitUint8(0x0F);
3641   EmitUint8(0x72);
3642   EmitXmmRegisterOperand(4, reg);
3643   EmitUint8(shift_count.value());
3644 }
3645 
3646 
psrlw(XmmRegister reg,const Immediate & shift_count)3647 void X86_64Assembler::psrlw(XmmRegister reg, const Immediate& shift_count) {
3648   DCHECK(shift_count.is_uint8());
3649   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3650   EmitUint8(0x66);
3651   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3652   EmitUint8(0x0F);
3653   EmitUint8(0x71);
3654   EmitXmmRegisterOperand(2, reg);
3655   EmitUint8(shift_count.value());
3656 }
3657 
3658 
psrld(XmmRegister reg,const Immediate & shift_count)3659 void X86_64Assembler::psrld(XmmRegister reg, const Immediate& shift_count) {
3660   DCHECK(shift_count.is_uint8());
3661   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3662   EmitUint8(0x66);
3663   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3664   EmitUint8(0x0F);
3665   EmitUint8(0x72);
3666   EmitXmmRegisterOperand(2, reg);
3667   EmitUint8(shift_count.value());
3668 }
3669 
3670 
psrlq(XmmRegister reg,const Immediate & shift_count)3671 void X86_64Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) {
3672   DCHECK(shift_count.is_uint8());
3673   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3674   EmitUint8(0x66);
3675   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3676   EmitUint8(0x0F);
3677   EmitUint8(0x73);
3678   EmitXmmRegisterOperand(2, reg);
3679   EmitUint8(shift_count.value());
3680 }
3681 
3682 
psrldq(XmmRegister reg,const Immediate & shift_count)3683 void X86_64Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) {
3684   DCHECK(shift_count.is_uint8());
3685   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3686   EmitUint8(0x66);
3687   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3688   EmitUint8(0x0F);
3689   EmitUint8(0x73);
3690   EmitXmmRegisterOperand(3, reg);
3691   EmitUint8(shift_count.value());
3692 }
3693 
3694 
fldl(const Address & src)3695 void X86_64Assembler::fldl(const Address& src) {
3696   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3697   EmitUint8(0xDD);
3698   EmitOperand(0, src);
3699 }
3700 
3701 
fstl(const Address & dst)3702 void X86_64Assembler::fstl(const Address& dst) {
3703   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3704   EmitUint8(0xDD);
3705   EmitOperand(2, dst);
3706 }
3707 
3708 
fstpl(const Address & dst)3709 void X86_64Assembler::fstpl(const Address& dst) {
3710   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3711   EmitUint8(0xDD);
3712   EmitOperand(3, dst);
3713 }
3714 
3715 
fstsw()3716 void X86_64Assembler::fstsw() {
3717   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3718   EmitUint8(0x9B);
3719   EmitUint8(0xDF);
3720   EmitUint8(0xE0);
3721 }
3722 
3723 
fnstcw(const Address & dst)3724 void X86_64Assembler::fnstcw(const Address& dst) {
3725   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3726   EmitUint8(0xD9);
3727   EmitOperand(7, dst);
3728 }
3729 
3730 
fldcw(const Address & src)3731 void X86_64Assembler::fldcw(const Address& src) {
3732   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3733   EmitUint8(0xD9);
3734   EmitOperand(5, src);
3735 }
3736 
3737 
fistpl(const Address & dst)3738 void X86_64Assembler::fistpl(const Address& dst) {
3739   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3740   EmitUint8(0xDF);
3741   EmitOperand(7, dst);
3742 }
3743 
3744 
fistps(const Address & dst)3745 void X86_64Assembler::fistps(const Address& dst) {
3746   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3747   EmitUint8(0xDB);
3748   EmitOperand(3, dst);
3749 }
3750 
3751 
fildl(const Address & src)3752 void X86_64Assembler::fildl(const Address& src) {
3753   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3754   EmitUint8(0xDF);
3755   EmitOperand(5, src);
3756 }
3757 
3758 
filds(const Address & src)3759 void X86_64Assembler::filds(const Address& src) {
3760   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3761   EmitUint8(0xDB);
3762   EmitOperand(0, src);
3763 }
3764 
3765 
fincstp()3766 void X86_64Assembler::fincstp() {
3767   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3768   EmitUint8(0xD9);
3769   EmitUint8(0xF7);
3770 }
3771 
3772 
ffree(const Immediate & index)3773 void X86_64Assembler::ffree(const Immediate& index) {
3774   CHECK_LT(index.value(), 7);
3775   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3776   EmitUint8(0xDD);
3777   EmitUint8(0xC0 + index.value());
3778 }
3779 
3780 
fsin()3781 void X86_64Assembler::fsin() {
3782   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3783   EmitUint8(0xD9);
3784   EmitUint8(0xFE);
3785 }
3786 
3787 
fcos()3788 void X86_64Assembler::fcos() {
3789   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3790   EmitUint8(0xD9);
3791   EmitUint8(0xFF);
3792 }
3793 
3794 
fptan()3795 void X86_64Assembler::fptan() {
3796   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3797   EmitUint8(0xD9);
3798   EmitUint8(0xF2);
3799 }
3800 
fucompp()3801 void X86_64Assembler::fucompp() {
3802   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3803   EmitUint8(0xDA);
3804   EmitUint8(0xE9);
3805 }
3806 
3807 
fprem()3808 void X86_64Assembler::fprem() {
3809   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3810   EmitUint8(0xD9);
3811   EmitUint8(0xF8);
3812 }
3813 
3814 
xchgl(CpuRegister dst,CpuRegister src)3815 void X86_64Assembler::xchgl(CpuRegister dst, CpuRegister src) {
3816   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3817   // There is a short version for rax.
3818   // It's a bit awkward, as CpuRegister has a const field, so assignment and thus swapping doesn't
3819   // work.
3820   const bool src_rax = src.AsRegister() == RAX;
3821   const bool dst_rax = dst.AsRegister() == RAX;
3822   if (src_rax || dst_rax) {
3823     EmitOptionalRex32(src_rax ? dst : src);
3824     EmitUint8(0x90 + (src_rax ? dst.LowBits() : src.LowBits()));
3825     return;
3826   }
3827 
3828   // General case.
3829   EmitOptionalRex32(src, dst);
3830   EmitUint8(0x87);
3831   EmitRegisterOperand(src.LowBits(), dst.LowBits());
3832 }
3833 
3834 
xchgq(CpuRegister dst,CpuRegister src)3835 void X86_64Assembler::xchgq(CpuRegister dst, CpuRegister src) {
3836   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3837   // There is a short version for rax.
3838   // It's a bit awkward, as CpuRegister has a const field, so assignment and thus swapping doesn't
3839   // work.
3840   const bool src_rax = src.AsRegister() == RAX;
3841   const bool dst_rax = dst.AsRegister() == RAX;
3842   if (src_rax || dst_rax) {
3843     // If src == target, emit a nop instead.
3844     if (src_rax && dst_rax) {
3845       EmitUint8(0x90);
3846     } else {
3847       EmitRex64(src_rax ? dst : src);
3848       EmitUint8(0x90 + (src_rax ? dst.LowBits() : src.LowBits()));
3849     }
3850     return;
3851   }
3852 
3853   // General case.
3854   EmitRex64(src, dst);
3855   EmitUint8(0x87);
3856   EmitRegisterOperand(src.LowBits(), dst.LowBits());
3857 }
3858 
3859 
xchgl(CpuRegister reg,const Address & address)3860 void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) {
3861   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3862   EmitOptionalRex32(reg, address);
3863   EmitUint8(0x87);
3864   EmitOperand(reg.LowBits(), address);
3865 }
3866 
3867 
cmpb(const Address & address,const Immediate & imm)3868 void X86_64Assembler::cmpb(const Address& address, const Immediate& imm) {
3869   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3870   CHECK(imm.is_int32());
3871   EmitOptionalRex32(address);
3872   EmitUint8(0x80);
3873   EmitOperand(7, address);
3874   EmitUint8(imm.value() & 0xFF);
3875 }
3876 
3877 
cmpw(const Address & address,const Immediate & imm)3878 void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) {
3879   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3880   CHECK(imm.is_int32());
3881   EmitOperandSizeOverride();
3882   EmitOptionalRex32(address);
3883   EmitComplex(7, address, imm, /* is_16_op= */ true);
3884 }
3885 
3886 
cmpl(CpuRegister reg,const Immediate & imm)3887 void X86_64Assembler::cmpl(CpuRegister reg, const Immediate& imm) {
3888   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3889   CHECK(imm.is_int32());
3890   EmitOptionalRex32(reg);
3891   EmitComplex(7, Operand(reg), imm);
3892 }
3893 
3894 
cmpl(CpuRegister reg0,CpuRegister reg1)3895 void X86_64Assembler::cmpl(CpuRegister reg0, CpuRegister reg1) {
3896   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3897   EmitOptionalRex32(reg0, reg1);
3898   EmitUint8(0x3B);
3899   EmitOperand(reg0.LowBits(), Operand(reg1));
3900 }
3901 
3902 
cmpl(CpuRegister reg,const Address & address)3903 void X86_64Assembler::cmpl(CpuRegister reg, const Address& address) {
3904   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3905   EmitOptionalRex32(reg, address);
3906   EmitUint8(0x3B);
3907   EmitOperand(reg.LowBits(), address);
3908 }
3909 
3910 
cmpl(const Address & address,CpuRegister reg)3911 void X86_64Assembler::cmpl(const Address& address, CpuRegister reg) {
3912   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3913   EmitOptionalRex32(reg, address);
3914   EmitUint8(0x39);
3915   EmitOperand(reg.LowBits(), address);
3916 }
3917 
3918 
cmpl(const Address & address,const Immediate & imm)3919 void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
3920   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3921   CHECK(imm.is_int32());
3922   EmitOptionalRex32(address);
3923   EmitComplex(7, address, imm);
3924 }
3925 
3926 
cmpq(CpuRegister reg0,CpuRegister reg1)3927 void X86_64Assembler::cmpq(CpuRegister reg0, CpuRegister reg1) {
3928   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3929   EmitRex64(reg0, reg1);
3930   EmitUint8(0x3B);
3931   EmitOperand(reg0.LowBits(), Operand(reg1));
3932 }
3933 
3934 
cmpq(CpuRegister reg,const Immediate & imm)3935 void X86_64Assembler::cmpq(CpuRegister reg, const Immediate& imm) {
3936   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3937   CHECK(imm.is_int32());  // cmpq only supports 32b immediate.
3938   EmitRex64(reg);
3939   EmitComplex(7, Operand(reg), imm);
3940 }
3941 
3942 
cmpq(CpuRegister reg,const Address & address)3943 void X86_64Assembler::cmpq(CpuRegister reg, const Address& address) {
3944   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3945   EmitRex64(reg, address);
3946   EmitUint8(0x3B);
3947   EmitOperand(reg.LowBits(), address);
3948 }
3949 
3950 
cmpq(const Address & address,const Immediate & imm)3951 void X86_64Assembler::cmpq(const Address& address, const Immediate& imm) {
3952   CHECK(imm.is_int32());  // cmpq only supports 32b immediate.
3953   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3954   EmitRex64(address);
3955   EmitComplex(7, address, imm);
3956 }
3957 
3958 
addl(CpuRegister dst,CpuRegister src)3959 void X86_64Assembler::addl(CpuRegister dst, CpuRegister src) {
3960   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3961   EmitOptionalRex32(dst, src);
3962   EmitUint8(0x03);
3963   EmitRegisterOperand(dst.LowBits(), src.LowBits());
3964 }
3965 
3966 
addl(CpuRegister reg,const Address & address)3967 void X86_64Assembler::addl(CpuRegister reg, const Address& address) {
3968   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3969   EmitOptionalRex32(reg, address);
3970   EmitUint8(0x03);
3971   EmitOperand(reg.LowBits(), address);
3972 }
3973 
3974 
testl(CpuRegister reg1,CpuRegister reg2)3975 void X86_64Assembler::testl(CpuRegister reg1, CpuRegister reg2) {
3976   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3977   EmitOptionalRex32(reg1, reg2);
3978   EmitUint8(0x85);
3979   EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
3980 }
3981 
3982 
testl(CpuRegister reg,const Address & address)3983 void X86_64Assembler::testl(CpuRegister reg, const Address& address) {
3984   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3985   EmitOptionalRex32(reg, address);
3986   EmitUint8(0x85);
3987   EmitOperand(reg.LowBits(), address);
3988 }
3989 
3990 
testl(CpuRegister reg,const Immediate & immediate)3991 void X86_64Assembler::testl(CpuRegister reg, const Immediate& immediate) {
3992   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3993   // For registers that have a byte variant (RAX, RBX, RCX, and RDX)
3994   // we only test the byte CpuRegister to keep the encoding short.
3995   if (immediate.is_uint8() && reg.AsRegister() < 4) {
3996     // Use zero-extended 8-bit immediate.
3997     if (reg.AsRegister() == RAX) {
3998       EmitUint8(0xA8);
3999     } else {
4000       EmitUint8(0xF6);
4001       EmitUint8(0xC0 + reg.AsRegister());
4002     }
4003     EmitUint8(immediate.value() & 0xFF);
4004   } else if (reg.AsRegister() == RAX) {
4005     // Use short form if the destination is RAX.
4006     EmitUint8(0xA9);
4007     EmitImmediate(immediate);
4008   } else {
4009     EmitOptionalRex32(reg);
4010     EmitUint8(0xF7);
4011     EmitOperand(0, Operand(reg));
4012     EmitImmediate(immediate);
4013   }
4014 }
4015 
4016 
testq(CpuRegister reg1,CpuRegister reg2)4017 void X86_64Assembler::testq(CpuRegister reg1, CpuRegister reg2) {
4018   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4019   EmitRex64(reg1, reg2);
4020   EmitUint8(0x85);
4021   EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
4022 }
4023 
4024 
testq(CpuRegister reg,const Address & address)4025 void X86_64Assembler::testq(CpuRegister reg, const Address& address) {
4026   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4027   EmitRex64(reg, address);
4028   EmitUint8(0x85);
4029   EmitOperand(reg.LowBits(), address);
4030 }
4031 
4032 
testb(const Address & dst,const Immediate & imm)4033 void X86_64Assembler::testb(const Address& dst, const Immediate& imm) {
4034   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4035   EmitOptionalRex32(dst);
4036   EmitUint8(0xF6);
4037   EmitOperand(Register::RAX, dst);
4038   CHECK(imm.is_int8());
4039   EmitUint8(imm.value() & 0xFF);
4040 }
4041 
4042 
testl(const Address & dst,const Immediate & imm)4043 void X86_64Assembler::testl(const Address& dst, const Immediate& imm) {
4044   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4045   EmitOptionalRex32(dst);
4046   EmitUint8(0xF7);
4047   EmitOperand(0, dst);
4048   EmitImmediate(imm);
4049 }
4050 
4051 
andl(CpuRegister dst,CpuRegister src)4052 void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) {
4053   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4054   EmitOptionalRex32(dst, src);
4055   EmitUint8(0x23);
4056   EmitOperand(dst.LowBits(), Operand(src));
4057 }
4058 
4059 
andl(CpuRegister reg,const Address & address)4060 void X86_64Assembler::andl(CpuRegister reg, const Address& address) {
4061   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4062   EmitOptionalRex32(reg, address);
4063   EmitUint8(0x23);
4064   EmitOperand(reg.LowBits(), address);
4065 }
4066 
4067 
andl(CpuRegister dst,const Immediate & imm)4068 void X86_64Assembler::andl(CpuRegister dst, const Immediate& imm) {
4069   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4070   EmitOptionalRex32(dst);
4071   EmitComplex(4, Operand(dst), imm);
4072 }
4073 
4074 
andq(CpuRegister reg,const Immediate & imm)4075 void X86_64Assembler::andq(CpuRegister reg, const Immediate& imm) {
4076   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4077   CHECK(imm.is_int32());  // andq only supports 32b immediate.
4078   EmitRex64(reg);
4079   EmitComplex(4, Operand(reg), imm);
4080 }
4081 
4082 
andq(CpuRegister dst,CpuRegister src)4083 void X86_64Assembler::andq(CpuRegister dst, CpuRegister src) {
4084   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4085   EmitRex64(dst, src);
4086   EmitUint8(0x23);
4087   EmitOperand(dst.LowBits(), Operand(src));
4088 }
4089 
4090 
andq(CpuRegister dst,const Address & src)4091 void X86_64Assembler::andq(CpuRegister dst, const Address& src) {
4092   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4093   EmitRex64(dst, src);
4094   EmitUint8(0x23);
4095   EmitOperand(dst.LowBits(), src);
4096 }
4097 
4098 
andw(const Address & address,const Immediate & imm)4099 void X86_64Assembler::andw(const Address& address, const Immediate& imm) {
4100   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4101   CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
4102   EmitUint8(0x66);
4103   EmitOptionalRex32(address);
4104   EmitComplex(4, address, imm, /* is_16_op= */ true);
4105 }
4106 
4107 
orl(CpuRegister dst,CpuRegister src)4108 void X86_64Assembler::orl(CpuRegister dst, CpuRegister src) {
4109   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4110   EmitOptionalRex32(dst, src);
4111   EmitUint8(0x0B);
4112   EmitOperand(dst.LowBits(), Operand(src));
4113 }
4114 
4115 
orl(CpuRegister reg,const Address & address)4116 void X86_64Assembler::orl(CpuRegister reg, const Address& address) {
4117   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4118   EmitOptionalRex32(reg, address);
4119   EmitUint8(0x0B);
4120   EmitOperand(reg.LowBits(), address);
4121 }
4122 
4123 
orl(CpuRegister dst,const Immediate & imm)4124 void X86_64Assembler::orl(CpuRegister dst, const Immediate& imm) {
4125   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4126   EmitOptionalRex32(dst);
4127   EmitComplex(1, Operand(dst), imm);
4128 }
4129 
4130 
orq(CpuRegister dst,const Immediate & imm)4131 void X86_64Assembler::orq(CpuRegister dst, const Immediate& imm) {
4132   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4133   CHECK(imm.is_int32());  // orq only supports 32b immediate.
4134   EmitRex64(dst);
4135   EmitComplex(1, Operand(dst), imm);
4136 }
4137 
4138 
orq(CpuRegister dst,CpuRegister src)4139 void X86_64Assembler::orq(CpuRegister dst, CpuRegister src) {
4140   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4141   EmitRex64(dst, src);
4142   EmitUint8(0x0B);
4143   EmitOperand(dst.LowBits(), Operand(src));
4144 }
4145 
4146 
orq(CpuRegister dst,const Address & src)4147 void X86_64Assembler::orq(CpuRegister dst, const Address& src) {
4148   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4149   EmitRex64(dst, src);
4150   EmitUint8(0x0B);
4151   EmitOperand(dst.LowBits(), src);
4152 }
4153 
4154 
xorl(CpuRegister dst,CpuRegister src)4155 void X86_64Assembler::xorl(CpuRegister dst, CpuRegister src) {
4156   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4157   EmitOptionalRex32(dst, src);
4158   EmitUint8(0x33);
4159   EmitOperand(dst.LowBits(), Operand(src));
4160 }
4161 
4162 
xorl(CpuRegister reg,const Address & address)4163 void X86_64Assembler::xorl(CpuRegister reg, const Address& address) {
4164   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4165   EmitOptionalRex32(reg, address);
4166   EmitUint8(0x33);
4167   EmitOperand(reg.LowBits(), address);
4168 }
4169 
4170 
xorl(CpuRegister dst,const Immediate & imm)4171 void X86_64Assembler::xorl(CpuRegister dst, const Immediate& imm) {
4172   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4173   EmitOptionalRex32(dst);
4174   EmitComplex(6, Operand(dst), imm);
4175 }
4176 
4177 
xorq(CpuRegister dst,CpuRegister src)4178 void X86_64Assembler::xorq(CpuRegister dst, CpuRegister src) {
4179   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4180   EmitRex64(dst, src);
4181   EmitUint8(0x33);
4182   EmitOperand(dst.LowBits(), Operand(src));
4183 }
4184 
4185 
xorq(CpuRegister dst,const Immediate & imm)4186 void X86_64Assembler::xorq(CpuRegister dst, const Immediate& imm) {
4187   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4188   CHECK(imm.is_int32());  // xorq only supports 32b immediate.
4189   EmitRex64(dst);
4190   EmitComplex(6, Operand(dst), imm);
4191 }
4192 
xorq(CpuRegister dst,const Address & src)4193 void X86_64Assembler::xorq(CpuRegister dst, const Address& src) {
4194   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4195   EmitRex64(dst, src);
4196   EmitUint8(0x33);
4197   EmitOperand(dst.LowBits(), src);
4198 }
4199 
4200 
4201 #if 0
4202 void X86_64Assembler::rex(bool force, bool w, Register* r, Register* x, Register* b) {
4203   // REX.WRXB
4204   // W - 64-bit operand
4205   // R - MODRM.reg
4206   // X - SIB.index
4207   // B - MODRM.rm/SIB.base
4208   uint8_t rex = force ? 0x40 : 0;
4209   if (w) {
4210     rex |= 0x48;  // REX.W000
4211   }
4212   if (r != nullptr && *r >= Register::R8 && *r < Register::kNumberOfCpuRegisters) {
4213     rex |= 0x44;  // REX.0R00
4214     *r = static_cast<Register>(*r - 8);
4215   }
4216   if (x != nullptr && *x >= Register::R8 && *x < Register::kNumberOfCpuRegisters) {
4217     rex |= 0x42;  // REX.00X0
4218     *x = static_cast<Register>(*x - 8);
4219   }
4220   if (b != nullptr && *b >= Register::R8 && *b < Register::kNumberOfCpuRegisters) {
4221     rex |= 0x41;  // REX.000B
4222     *b = static_cast<Register>(*b - 8);
4223   }
4224   if (rex != 0) {
4225     EmitUint8(rex);
4226   }
4227 }
4228 
4229 void X86_64Assembler::rex_reg_mem(bool force, bool w, Register* dst, const Address& mem) {
4230   // REX.WRXB
4231   // W - 64-bit operand
4232   // R - MODRM.reg
4233   // X - SIB.index
4234   // B - MODRM.rm/SIB.base
4235   uint8_t rex = mem->rex();
4236   if (force) {
4237     rex |= 0x40;  // REX.0000
4238   }
4239   if (w) {
4240     rex |= 0x48;  // REX.W000
4241   }
4242   if (dst != nullptr && *dst >= Register::R8 && *dst < Register::kNumberOfCpuRegisters) {
4243     rex |= 0x44;  // REX.0R00
4244     *dst = static_cast<Register>(*dst - 8);
4245   }
4246   if (rex != 0) {
4247     EmitUint8(rex);
4248   }
4249 }
4250 
4251 void rex_mem_reg(bool force, bool w, Address* mem, Register* src);
4252 #endif
4253 
addl(CpuRegister reg,const Immediate & imm)4254 void X86_64Assembler::addl(CpuRegister reg, const Immediate& imm) {
4255   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4256   EmitOptionalRex32(reg);
4257   EmitComplex(0, Operand(reg), imm);
4258 }
4259 
4260 
addq(CpuRegister reg,const Immediate & imm)4261 void X86_64Assembler::addq(CpuRegister reg, const Immediate& imm) {
4262   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4263   CHECK(imm.is_int32());  // addq only supports 32b immediate.
4264   EmitRex64(reg);
4265   EmitComplex(0, Operand(reg), imm);
4266 }
4267 
4268 
addq(CpuRegister dst,const Address & address)4269 void X86_64Assembler::addq(CpuRegister dst, const Address& address) {
4270   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4271   EmitRex64(dst, address);
4272   EmitUint8(0x03);
4273   EmitOperand(dst.LowBits(), address);
4274 }
4275 
4276 
addq(CpuRegister dst,CpuRegister src)4277 void X86_64Assembler::addq(CpuRegister dst, CpuRegister src) {
4278   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4279   // 0x01 is addq r/m64 <- r/m64 + r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
4280   EmitRex64(src, dst);
4281   EmitUint8(0x01);
4282   EmitRegisterOperand(src.LowBits(), dst.LowBits());
4283 }
4284 
4285 
addl(const Address & address,CpuRegister reg)4286 void X86_64Assembler::addl(const Address& address, CpuRegister reg) {
4287   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4288   EmitOptionalRex32(reg, address);
4289   EmitUint8(0x01);
4290   EmitOperand(reg.LowBits(), address);
4291 }
4292 
4293 
addl(const Address & address,const Immediate & imm)4294 void X86_64Assembler::addl(const Address& address, const Immediate& imm) {
4295   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4296   EmitOptionalRex32(address);
4297   EmitComplex(0, address, imm);
4298 }
4299 
4300 
addw(const Address & address,const Immediate & imm)4301 void X86_64Assembler::addw(const Address& address, const Immediate& imm) {
4302   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4303   CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
4304   EmitUint8(0x66);
4305   EmitOptionalRex32(address);
4306   EmitComplex(0, address, imm, /* is_16_op= */ true);
4307 }
4308 
4309 
subl(CpuRegister dst,CpuRegister src)4310 void X86_64Assembler::subl(CpuRegister dst, CpuRegister src) {
4311   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4312   EmitOptionalRex32(dst, src);
4313   EmitUint8(0x2B);
4314   EmitOperand(dst.LowBits(), Operand(src));
4315 }
4316 
4317 
subl(CpuRegister reg,const Immediate & imm)4318 void X86_64Assembler::subl(CpuRegister reg, const Immediate& imm) {
4319   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4320   EmitOptionalRex32(reg);
4321   EmitComplex(5, Operand(reg), imm);
4322 }
4323 
4324 
subq(CpuRegister reg,const Immediate & imm)4325 void X86_64Assembler::subq(CpuRegister reg, const Immediate& imm) {
4326   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4327   CHECK(imm.is_int32());  // subq only supports 32b immediate.
4328   EmitRex64(reg);
4329   EmitComplex(5, Operand(reg), imm);
4330 }
4331 
4332 
subq(CpuRegister dst,CpuRegister src)4333 void X86_64Assembler::subq(CpuRegister dst, CpuRegister src) {
4334   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4335   EmitRex64(dst, src);
4336   EmitUint8(0x2B);
4337   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4338 }
4339 
4340 
subq(CpuRegister reg,const Address & address)4341 void X86_64Assembler::subq(CpuRegister reg, const Address& address) {
4342   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4343   EmitRex64(reg, address);
4344   EmitUint8(0x2B);
4345   EmitOperand(reg.LowBits() & 7, address);
4346 }
4347 
4348 
subl(CpuRegister reg,const Address & address)4349 void X86_64Assembler::subl(CpuRegister reg, const Address& address) {
4350   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4351   EmitOptionalRex32(reg, address);
4352   EmitUint8(0x2B);
4353   EmitOperand(reg.LowBits(), address);
4354 }
4355 
4356 
cdq()4357 void X86_64Assembler::cdq() {
4358   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4359   EmitUint8(0x99);
4360 }
4361 
4362 
cqo()4363 void X86_64Assembler::cqo() {
4364   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4365   EmitRex64();
4366   EmitUint8(0x99);
4367 }
4368 
4369 
idivl(CpuRegister reg)4370 void X86_64Assembler::idivl(CpuRegister reg) {
4371   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4372   EmitOptionalRex32(reg);
4373   EmitUint8(0xF7);
4374   EmitUint8(0xF8 | reg.LowBits());
4375 }
4376 
4377 
idivq(CpuRegister reg)4378 void X86_64Assembler::idivq(CpuRegister reg) {
4379   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4380   EmitRex64(reg);
4381   EmitUint8(0xF7);
4382   EmitUint8(0xF8 | reg.LowBits());
4383 }
4384 
4385 
divl(CpuRegister reg)4386 void X86_64Assembler::divl(CpuRegister reg) {
4387   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4388   EmitOptionalRex32(reg);
4389   EmitUint8(0xF7);
4390   EmitUint8(0xF0 | reg.LowBits());
4391 }
4392 
4393 
divq(CpuRegister reg)4394 void X86_64Assembler::divq(CpuRegister reg) {
4395   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4396   EmitRex64(reg);
4397   EmitUint8(0xF7);
4398   EmitUint8(0xF0 | reg.LowBits());
4399 }
4400 
4401 
imull(CpuRegister dst,CpuRegister src)4402 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src) {
4403   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4404   EmitOptionalRex32(dst, src);
4405   EmitUint8(0x0F);
4406   EmitUint8(0xAF);
4407   EmitOperand(dst.LowBits(), Operand(src));
4408 }
4409 
imull(CpuRegister dst,CpuRegister src,const Immediate & imm)4410 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src, const Immediate& imm) {
4411   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4412   CHECK(imm.is_int32());  // imull only supports 32b immediate.
4413 
4414   EmitOptionalRex32(dst, src);
4415 
4416   // See whether imm can be represented as a sign-extended 8bit value.
4417   int32_t v32 = static_cast<int32_t>(imm.value());
4418   if (IsInt<8>(v32)) {
4419     // Sign-extension works.
4420     EmitUint8(0x6B);
4421     EmitOperand(dst.LowBits(), Operand(src));
4422     EmitUint8(static_cast<uint8_t>(v32 & 0xFF));
4423   } else {
4424     // Not representable, use full immediate.
4425     EmitUint8(0x69);
4426     EmitOperand(dst.LowBits(), Operand(src));
4427     EmitImmediate(imm);
4428   }
4429 }
4430 
4431 
imull(CpuRegister reg,const Immediate & imm)4432 void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
4433   imull(reg, reg, imm);
4434 }
4435 
4436 
imull(CpuRegister reg,const Address & address)4437 void X86_64Assembler::imull(CpuRegister reg, const Address& address) {
4438   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4439   EmitOptionalRex32(reg, address);
4440   EmitUint8(0x0F);
4441   EmitUint8(0xAF);
4442   EmitOperand(reg.LowBits(), address);
4443 }
4444 
4445 
imulq(CpuRegister dst,CpuRegister src)4446 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister src) {
4447   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4448   EmitRex64(dst, src);
4449   EmitUint8(0x0F);
4450   EmitUint8(0xAF);
4451   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4452 }
4453 
4454 
imulq(CpuRegister reg,const Immediate & imm)4455 void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) {
4456   imulq(reg, reg, imm);
4457 }
4458 
imulq(CpuRegister dst,CpuRegister reg,const Immediate & imm)4459 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm) {
4460   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4461   CHECK(imm.is_int32());  // imulq only supports 32b immediate.
4462 
4463   EmitRex64(dst, reg);
4464 
4465   // See whether imm can be represented as a sign-extended 8bit value.
4466   int64_t v64 = imm.value();
4467   if (IsInt<8>(v64)) {
4468     // Sign-extension works.
4469     EmitUint8(0x6B);
4470     EmitOperand(dst.LowBits(), Operand(reg));
4471     EmitUint8(static_cast<uint8_t>(v64 & 0xFF));
4472   } else {
4473     // Not representable, use full immediate.
4474     EmitUint8(0x69);
4475     EmitOperand(dst.LowBits(), Operand(reg));
4476     EmitImmediate(imm);
4477   }
4478 }
4479 
imulq(CpuRegister reg,const Address & address)4480 void X86_64Assembler::imulq(CpuRegister reg, const Address& address) {
4481   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4482   EmitRex64(reg, address);
4483   EmitUint8(0x0F);
4484   EmitUint8(0xAF);
4485   EmitOperand(reg.LowBits(), address);
4486 }
4487 
4488 
imull(CpuRegister reg)4489 void X86_64Assembler::imull(CpuRegister reg) {
4490   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4491   EmitOptionalRex32(reg);
4492   EmitUint8(0xF7);
4493   EmitOperand(5, Operand(reg));
4494 }
4495 
4496 
imulq(CpuRegister reg)4497 void X86_64Assembler::imulq(CpuRegister reg) {
4498   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4499   EmitRex64(reg);
4500   EmitUint8(0xF7);
4501   EmitOperand(5, Operand(reg));
4502 }
4503 
4504 
imull(const Address & address)4505 void X86_64Assembler::imull(const Address& address) {
4506   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4507   EmitOptionalRex32(address);
4508   EmitUint8(0xF7);
4509   EmitOperand(5, address);
4510 }
4511 
4512 
mull(CpuRegister reg)4513 void X86_64Assembler::mull(CpuRegister reg) {
4514   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4515   EmitOptionalRex32(reg);
4516   EmitUint8(0xF7);
4517   EmitOperand(4, Operand(reg));
4518 }
4519 
4520 
mull(const Address & address)4521 void X86_64Assembler::mull(const Address& address) {
4522   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4523   EmitOptionalRex32(address);
4524   EmitUint8(0xF7);
4525   EmitOperand(4, address);
4526 }
4527 
4528 
shll(CpuRegister reg,const Immediate & imm)4529 void X86_64Assembler::shll(CpuRegister reg, const Immediate& imm) {
4530   EmitGenericShift(false, 4, reg, imm);
4531 }
4532 
4533 
shlq(CpuRegister reg,const Immediate & imm)4534 void X86_64Assembler::shlq(CpuRegister reg, const Immediate& imm) {
4535   EmitGenericShift(true, 4, reg, imm);
4536 }
4537 
4538 
shll(CpuRegister operand,CpuRegister shifter)4539 void X86_64Assembler::shll(CpuRegister operand, CpuRegister shifter) {
4540   EmitGenericShift(false, 4, operand, shifter);
4541 }
4542 
4543 
shlq(CpuRegister operand,CpuRegister shifter)4544 void X86_64Assembler::shlq(CpuRegister operand, CpuRegister shifter) {
4545   EmitGenericShift(true, 4, operand, shifter);
4546 }
4547 
4548 
shrl(CpuRegister reg,const Immediate & imm)4549 void X86_64Assembler::shrl(CpuRegister reg, const Immediate& imm) {
4550   EmitGenericShift(false, 5, reg, imm);
4551 }
4552 
4553 
shrq(CpuRegister reg,const Immediate & imm)4554 void X86_64Assembler::shrq(CpuRegister reg, const Immediate& imm) {
4555   EmitGenericShift(true, 5, reg, imm);
4556 }
4557 
4558 
shrl(CpuRegister operand,CpuRegister shifter)4559 void X86_64Assembler::shrl(CpuRegister operand, CpuRegister shifter) {
4560   EmitGenericShift(false, 5, operand, shifter);
4561 }
4562 
4563 
shrq(CpuRegister operand,CpuRegister shifter)4564 void X86_64Assembler::shrq(CpuRegister operand, CpuRegister shifter) {
4565   EmitGenericShift(true, 5, operand, shifter);
4566 }
4567 
4568 
sarl(CpuRegister reg,const Immediate & imm)4569 void X86_64Assembler::sarl(CpuRegister reg, const Immediate& imm) {
4570   EmitGenericShift(false, 7, reg, imm);
4571 }
4572 
4573 
sarl(CpuRegister operand,CpuRegister shifter)4574 void X86_64Assembler::sarl(CpuRegister operand, CpuRegister shifter) {
4575   EmitGenericShift(false, 7, operand, shifter);
4576 }
4577 
4578 
sarq(CpuRegister reg,const Immediate & imm)4579 void X86_64Assembler::sarq(CpuRegister reg, const Immediate& imm) {
4580   EmitGenericShift(true, 7, reg, imm);
4581 }
4582 
4583 
sarq(CpuRegister operand,CpuRegister shifter)4584 void X86_64Assembler::sarq(CpuRegister operand, CpuRegister shifter) {
4585   EmitGenericShift(true, 7, operand, shifter);
4586 }
4587 
4588 
roll(CpuRegister reg,const Immediate & imm)4589 void X86_64Assembler::roll(CpuRegister reg, const Immediate& imm) {
4590   EmitGenericShift(false, 0, reg, imm);
4591 }
4592 
4593 
roll(CpuRegister operand,CpuRegister shifter)4594 void X86_64Assembler::roll(CpuRegister operand, CpuRegister shifter) {
4595   EmitGenericShift(false, 0, operand, shifter);
4596 }
4597 
4598 
rorl(CpuRegister reg,const Immediate & imm)4599 void X86_64Assembler::rorl(CpuRegister reg, const Immediate& imm) {
4600   EmitGenericShift(false, 1, reg, imm);
4601 }
4602 
4603 
rorl(CpuRegister operand,CpuRegister shifter)4604 void X86_64Assembler::rorl(CpuRegister operand, CpuRegister shifter) {
4605   EmitGenericShift(false, 1, operand, shifter);
4606 }
4607 
4608 
rolq(CpuRegister reg,const Immediate & imm)4609 void X86_64Assembler::rolq(CpuRegister reg, const Immediate& imm) {
4610   EmitGenericShift(true, 0, reg, imm);
4611 }
4612 
4613 
rolq(CpuRegister operand,CpuRegister shifter)4614 void X86_64Assembler::rolq(CpuRegister operand, CpuRegister shifter) {
4615   EmitGenericShift(true, 0, operand, shifter);
4616 }
4617 
4618 
rorq(CpuRegister reg,const Immediate & imm)4619 void X86_64Assembler::rorq(CpuRegister reg, const Immediate& imm) {
4620   EmitGenericShift(true, 1, reg, imm);
4621 }
4622 
4623 
rorq(CpuRegister operand,CpuRegister shifter)4624 void X86_64Assembler::rorq(CpuRegister operand, CpuRegister shifter) {
4625   EmitGenericShift(true, 1, operand, shifter);
4626 }
4627 
4628 
negl(CpuRegister reg)4629 void X86_64Assembler::negl(CpuRegister reg) {
4630   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4631   EmitOptionalRex32(reg);
4632   EmitUint8(0xF7);
4633   EmitOperand(3, Operand(reg));
4634 }
4635 
4636 
negq(CpuRegister reg)4637 void X86_64Assembler::negq(CpuRegister reg) {
4638   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4639   EmitRex64(reg);
4640   EmitUint8(0xF7);
4641   EmitOperand(3, Operand(reg));
4642 }
4643 
4644 
notl(CpuRegister reg)4645 void X86_64Assembler::notl(CpuRegister reg) {
4646   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4647   EmitOptionalRex32(reg);
4648   EmitUint8(0xF7);
4649   EmitUint8(0xD0 | reg.LowBits());
4650 }
4651 
4652 
notq(CpuRegister reg)4653 void X86_64Assembler::notq(CpuRegister reg) {
4654   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4655   EmitRex64(reg);
4656   EmitUint8(0xF7);
4657   EmitOperand(2, Operand(reg));
4658 }
4659 
4660 
enter(const Immediate & imm)4661 void X86_64Assembler::enter(const Immediate& imm) {
4662   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4663   EmitUint8(0xC8);
4664   CHECK(imm.is_uint16()) << imm.value();
4665   EmitUint8(imm.value() & 0xFF);
4666   EmitUint8((imm.value() >> 8) & 0xFF);
4667   EmitUint8(0x00);
4668 }
4669 
4670 
leave()4671 void X86_64Assembler::leave() {
4672   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4673   EmitUint8(0xC9);
4674 }
4675 
4676 
ret()4677 void X86_64Assembler::ret() {
4678   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4679   EmitUint8(0xC3);
4680 }
4681 
4682 
ret(const Immediate & imm)4683 void X86_64Assembler::ret(const Immediate& imm) {
4684   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4685   EmitUint8(0xC2);
4686   CHECK(imm.is_uint16());
4687   EmitUint8(imm.value() & 0xFF);
4688   EmitUint8((imm.value() >> 8) & 0xFF);
4689 }
4690 
4691 
4692 
nop()4693 void X86_64Assembler::nop() {
4694   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4695   EmitUint8(0x90);
4696 }
4697 
4698 
int3()4699 void X86_64Assembler::int3() {
4700   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4701   EmitUint8(0xCC);
4702 }
4703 
4704 
hlt()4705 void X86_64Assembler::hlt() {
4706   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4707   EmitUint8(0xF4);
4708 }
4709 
4710 
j(Condition condition,Label * label)4711 void X86_64Assembler::j(Condition condition, Label* label) {
4712   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4713   if (label->IsBound()) {
4714     static const int kShortSize = 2;
4715     static const int kLongSize = 6;
4716     int offset = label->Position() - buffer_.Size();
4717     CHECK_LE(offset, 0);
4718     if (IsInt<8>(offset - kShortSize)) {
4719       EmitUint8(0x70 + condition);
4720       EmitUint8((offset - kShortSize) & 0xFF);
4721     } else {
4722       EmitUint8(0x0F);
4723       EmitUint8(0x80 + condition);
4724       EmitInt32(offset - kLongSize);
4725     }
4726   } else {
4727     EmitUint8(0x0F);
4728     EmitUint8(0x80 + condition);
4729     EmitLabelLink(label);
4730   }
4731 }
4732 
4733 
j(Condition condition,NearLabel * label)4734 void X86_64Assembler::j(Condition condition, NearLabel* label) {
4735   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4736   if (label->IsBound()) {
4737     static const int kShortSize = 2;
4738     int offset = label->Position() - buffer_.Size();
4739     CHECK_LE(offset, 0);
4740     CHECK(IsInt<8>(offset - kShortSize));
4741     EmitUint8(0x70 + condition);
4742     EmitUint8((offset - kShortSize) & 0xFF);
4743   } else {
4744     EmitUint8(0x70 + condition);
4745     EmitLabelLink(label);
4746   }
4747 }
4748 
4749 
jrcxz(NearLabel * label)4750 void X86_64Assembler::jrcxz(NearLabel* label) {
4751   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4752   if (label->IsBound()) {
4753     static const int kShortSize = 2;
4754     int offset = label->Position() - buffer_.Size();
4755     CHECK_LE(offset, 0);
4756     CHECK(IsInt<8>(offset - kShortSize));
4757     EmitUint8(0xE3);
4758     EmitUint8((offset - kShortSize) & 0xFF);
4759   } else {
4760     EmitUint8(0xE3);
4761     EmitLabelLink(label);
4762   }
4763 }
4764 
4765 
jmp(CpuRegister reg)4766 void X86_64Assembler::jmp(CpuRegister reg) {
4767   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4768   EmitOptionalRex32(reg);
4769   EmitUint8(0xFF);
4770   EmitRegisterOperand(4, reg.LowBits());
4771 }
4772 
jmp(const Address & address)4773 void X86_64Assembler::jmp(const Address& address) {
4774   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4775   EmitOptionalRex32(address);
4776   EmitUint8(0xFF);
4777   EmitOperand(4, address);
4778 }
4779 
jmp(Label * label)4780 void X86_64Assembler::jmp(Label* label) {
4781   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4782   if (label->IsBound()) {
4783     static const int kShortSize = 2;
4784     static const int kLongSize = 5;
4785     int offset = label->Position() - buffer_.Size();
4786     CHECK_LE(offset, 0);
4787     if (IsInt<8>(offset - kShortSize)) {
4788       EmitUint8(0xEB);
4789       EmitUint8((offset - kShortSize) & 0xFF);
4790     } else {
4791       EmitUint8(0xE9);
4792       EmitInt32(offset - kLongSize);
4793     }
4794   } else {
4795     EmitUint8(0xE9);
4796     EmitLabelLink(label);
4797   }
4798 }
4799 
4800 
jmp(NearLabel * label)4801 void X86_64Assembler::jmp(NearLabel* label) {
4802   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4803   if (label->IsBound()) {
4804     static const int kShortSize = 2;
4805     int offset = label->Position() - buffer_.Size();
4806     CHECK_LE(offset, 0);
4807     CHECK(IsInt<8>(offset - kShortSize));
4808     EmitUint8(0xEB);
4809     EmitUint8((offset - kShortSize) & 0xFF);
4810   } else {
4811     EmitUint8(0xEB);
4812     EmitLabelLink(label);
4813   }
4814 }
4815 
4816 
rep_movsw()4817 void X86_64Assembler::rep_movsw() {
4818   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4819   EmitUint8(0x66);
4820   EmitUint8(0xF3);
4821   EmitUint8(0xA5);
4822 }
4823 
4824 
lock()4825 X86_64Assembler* X86_64Assembler::lock() {
4826   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4827   EmitUint8(0xF0);
4828   return this;
4829 }
4830 
4831 
cmpxchgl(const Address & address,CpuRegister reg)4832 void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) {
4833   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4834   EmitOptionalRex32(reg, address);
4835   EmitUint8(0x0F);
4836   EmitUint8(0xB1);
4837   EmitOperand(reg.LowBits(), address);
4838 }
4839 
4840 
cmpxchgq(const Address & address,CpuRegister reg)4841 void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) {
4842   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4843   EmitRex64(reg, address);
4844   EmitUint8(0x0F);
4845   EmitUint8(0xB1);
4846   EmitOperand(reg.LowBits(), address);
4847 }
4848 
4849 
mfence()4850 void X86_64Assembler::mfence() {
4851   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4852   EmitUint8(0x0F);
4853   EmitUint8(0xAE);
4854   EmitUint8(0xF0);
4855 }
4856 
4857 
gs()4858 X86_64Assembler* X86_64Assembler::gs() {
4859   // TODO: gs is a prefix and not an instruction
4860   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4861   EmitUint8(0x65);
4862   return this;
4863 }
4864 
4865 
AddImmediate(CpuRegister reg,const Immediate & imm)4866 void X86_64Assembler::AddImmediate(CpuRegister reg, const Immediate& imm) {
4867   int value = imm.value();
4868   if (value != 0) {
4869     if (value > 0) {
4870       addl(reg, imm);
4871     } else {
4872       subl(reg, Immediate(value));
4873     }
4874   }
4875 }
4876 
4877 
setcc(Condition condition,CpuRegister dst)4878 void X86_64Assembler::setcc(Condition condition, CpuRegister dst) {
4879   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4880   // RSP, RBP, RDI, RSI need rex prefix (else the pattern encodes ah/bh/ch/dh).
4881   if (dst.NeedsRex() || dst.AsRegister() > 3) {
4882     EmitOptionalRex(true, false, false, false, dst.NeedsRex());
4883   }
4884   EmitUint8(0x0F);
4885   EmitUint8(0x90 + condition);
4886   EmitUint8(0xC0 + dst.LowBits());
4887 }
4888 
blsi(CpuRegister dst,CpuRegister src)4889 void X86_64Assembler::blsi(CpuRegister dst, CpuRegister src) {
4890   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4891   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
4892   uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
4893                                           /*X=*/ false,
4894                                           src.NeedsRex(),
4895                                           SET_VEX_M_0F_38);
4896   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/true,
4897                                           X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
4898                                           SET_VEX_L_128,
4899                                           SET_VEX_PP_NONE);
4900   EmitUint8(byte_zero);
4901   EmitUint8(byte_one);
4902   EmitUint8(byte_two);
4903   EmitUint8(0xF3);
4904   EmitRegisterOperand(3, src.LowBits());
4905 }
4906 
blsmsk(CpuRegister dst,CpuRegister src)4907 void X86_64Assembler::blsmsk(CpuRegister dst, CpuRegister src) {
4908   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4909   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
4910   uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
4911                                           /*X=*/ false,
4912                                           src.NeedsRex(),
4913                                           SET_VEX_M_0F_38);
4914   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
4915                                           X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
4916                                           SET_VEX_L_128,
4917                                           SET_VEX_PP_NONE);
4918   EmitUint8(byte_zero);
4919   EmitUint8(byte_one);
4920   EmitUint8(byte_two);
4921   EmitUint8(0xF3);
4922   EmitRegisterOperand(2, src.LowBits());
4923 }
4924 
blsr(CpuRegister dst,CpuRegister src)4925 void X86_64Assembler::blsr(CpuRegister dst, CpuRegister src) {
4926   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4927   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/false);
4928   uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
4929                                           /*X=*/ false,
4930                                           src.NeedsRex(),
4931                                           SET_VEX_M_0F_38);
4932   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
4933                                           X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
4934                                           SET_VEX_L_128,
4935                                           SET_VEX_PP_NONE);
4936   EmitUint8(byte_zero);
4937   EmitUint8(byte_one);
4938   EmitUint8(byte_two);
4939   EmitUint8(0xF3);
4940   EmitRegisterOperand(1, src.LowBits());
4941 }
4942 
bswapl(CpuRegister dst)4943 void X86_64Assembler::bswapl(CpuRegister dst) {
4944   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4945   EmitOptionalRex(false, false, false, false, dst.NeedsRex());
4946   EmitUint8(0x0F);
4947   EmitUint8(0xC8 + dst.LowBits());
4948 }
4949 
bswapq(CpuRegister dst)4950 void X86_64Assembler::bswapq(CpuRegister dst) {
4951   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4952   EmitOptionalRex(false, true, false, false, dst.NeedsRex());
4953   EmitUint8(0x0F);
4954   EmitUint8(0xC8 + dst.LowBits());
4955 }
4956 
bsfl(CpuRegister dst,CpuRegister src)4957 void X86_64Assembler::bsfl(CpuRegister dst, CpuRegister src) {
4958   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4959   EmitOptionalRex32(dst, src);
4960   EmitUint8(0x0F);
4961   EmitUint8(0xBC);
4962   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4963 }
4964 
bsfl(CpuRegister dst,const Address & src)4965 void X86_64Assembler::bsfl(CpuRegister dst, const Address& src) {
4966   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4967   EmitOptionalRex32(dst, src);
4968   EmitUint8(0x0F);
4969   EmitUint8(0xBC);
4970   EmitOperand(dst.LowBits(), src);
4971 }
4972 
bsfq(CpuRegister dst,CpuRegister src)4973 void X86_64Assembler::bsfq(CpuRegister dst, CpuRegister src) {
4974   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4975   EmitRex64(dst, src);
4976   EmitUint8(0x0F);
4977   EmitUint8(0xBC);
4978   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4979 }
4980 
bsfq(CpuRegister dst,const Address & src)4981 void X86_64Assembler::bsfq(CpuRegister dst, const Address& src) {
4982   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4983   EmitRex64(dst, src);
4984   EmitUint8(0x0F);
4985   EmitUint8(0xBC);
4986   EmitOperand(dst.LowBits(), src);
4987 }
4988 
bsrl(CpuRegister dst,CpuRegister src)4989 void X86_64Assembler::bsrl(CpuRegister dst, CpuRegister src) {
4990   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4991   EmitOptionalRex32(dst, src);
4992   EmitUint8(0x0F);
4993   EmitUint8(0xBD);
4994   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4995 }
4996 
bsrl(CpuRegister dst,const Address & src)4997 void X86_64Assembler::bsrl(CpuRegister dst, const Address& src) {
4998   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4999   EmitOptionalRex32(dst, src);
5000   EmitUint8(0x0F);
5001   EmitUint8(0xBD);
5002   EmitOperand(dst.LowBits(), src);
5003 }
5004 
bsrq(CpuRegister dst,CpuRegister src)5005 void X86_64Assembler::bsrq(CpuRegister dst, CpuRegister src) {
5006   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5007   EmitRex64(dst, src);
5008   EmitUint8(0x0F);
5009   EmitUint8(0xBD);
5010   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5011 }
5012 
bsrq(CpuRegister dst,const Address & src)5013 void X86_64Assembler::bsrq(CpuRegister dst, const Address& src) {
5014   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5015   EmitRex64(dst, src);
5016   EmitUint8(0x0F);
5017   EmitUint8(0xBD);
5018   EmitOperand(dst.LowBits(), src);
5019 }
5020 
popcntl(CpuRegister dst,CpuRegister src)5021 void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) {
5022   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5023   EmitUint8(0xF3);
5024   EmitOptionalRex32(dst, src);
5025   EmitUint8(0x0F);
5026   EmitUint8(0xB8);
5027   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5028 }
5029 
popcntl(CpuRegister dst,const Address & src)5030 void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) {
5031   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5032   EmitUint8(0xF3);
5033   EmitOptionalRex32(dst, src);
5034   EmitUint8(0x0F);
5035   EmitUint8(0xB8);
5036   EmitOperand(dst.LowBits(), src);
5037 }
5038 
popcntq(CpuRegister dst,CpuRegister src)5039 void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) {
5040   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5041   EmitUint8(0xF3);
5042   EmitRex64(dst, src);
5043   EmitUint8(0x0F);
5044   EmitUint8(0xB8);
5045   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5046 }
5047 
popcntq(CpuRegister dst,const Address & src)5048 void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
5049   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5050   EmitUint8(0xF3);
5051   EmitRex64(dst, src);
5052   EmitUint8(0x0F);
5053   EmitUint8(0xB8);
5054   EmitOperand(dst.LowBits(), src);
5055 }
5056 
repne_scasb()5057 void X86_64Assembler::repne_scasb() {
5058   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5059   EmitUint8(0xF2);
5060   EmitUint8(0xAE);
5061 }
5062 
repne_scasw()5063 void X86_64Assembler::repne_scasw() {
5064   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5065   EmitUint8(0x66);
5066   EmitUint8(0xF2);
5067   EmitUint8(0xAF);
5068 }
5069 
repe_cmpsw()5070 void X86_64Assembler::repe_cmpsw() {
5071   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5072   EmitUint8(0x66);
5073   EmitUint8(0xF3);
5074   EmitUint8(0xA7);
5075 }
5076 
5077 
repe_cmpsl()5078 void X86_64Assembler::repe_cmpsl() {
5079   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5080   EmitUint8(0xF3);
5081   EmitUint8(0xA7);
5082 }
5083 
5084 
repe_cmpsq()5085 void X86_64Assembler::repe_cmpsq() {
5086   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5087   EmitUint8(0xF3);
5088   EmitRex64();
5089   EmitUint8(0xA7);
5090 }
5091 
5092 
LoadDoubleConstant(XmmRegister dst,double value)5093 void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
5094   // TODO: Need to have a code constants table.
5095   int64_t constant = bit_cast<int64_t, double>(value);
5096   pushq(Immediate(High32Bits(constant)));
5097   pushq(Immediate(Low32Bits(constant)));
5098   movsd(dst, Address(CpuRegister(RSP), 0));
5099   addq(CpuRegister(RSP), Immediate(2 * sizeof(intptr_t)));
5100 }
5101 
5102 
Align(int alignment,int offset)5103 void X86_64Assembler::Align(int alignment, int offset) {
5104   CHECK(IsPowerOfTwo(alignment));
5105   // Emit nop instruction until the real position is aligned.
5106   while (((offset + buffer_.GetPosition()) & (alignment-1)) != 0) {
5107     nop();
5108   }
5109 }
5110 
5111 
Bind(Label * label)5112 void X86_64Assembler::Bind(Label* label) {
5113   int bound = buffer_.Size();
5114   CHECK(!label->IsBound());  // Labels can only be bound once.
5115   while (label->IsLinked()) {
5116     int position = label->LinkPosition();
5117     int next = buffer_.Load<int32_t>(position);
5118     buffer_.Store<int32_t>(position, bound - (position + 4));
5119     label->position_ = next;
5120   }
5121   label->BindTo(bound);
5122 }
5123 
5124 
Bind(NearLabel * label)5125 void X86_64Assembler::Bind(NearLabel* label) {
5126   int bound = buffer_.Size();
5127   CHECK(!label->IsBound());  // Labels can only be bound once.
5128   while (label->IsLinked()) {
5129     int position = label->LinkPosition();
5130     uint8_t delta = buffer_.Load<uint8_t>(position);
5131     int offset = bound - (position + 1);
5132     CHECK(IsInt<8>(offset));
5133     buffer_.Store<int8_t>(position, offset);
5134     label->position_ = delta != 0u ? label->position_ - delta : 0;
5135   }
5136   label->BindTo(bound);
5137 }
5138 
5139 
EmitOperand(uint8_t reg_or_opcode,const Operand & operand)5140 void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand) {
5141   CHECK_GE(reg_or_opcode, 0);
5142   CHECK_LT(reg_or_opcode, 8);
5143   const int length = operand.length_;
5144   CHECK_GT(length, 0);
5145   // Emit the ModRM byte updated with the given reg value.
5146   CHECK_EQ(operand.encoding_[0] & 0x38, 0);
5147   EmitUint8(operand.encoding_[0] + (reg_or_opcode << 3));
5148   // Emit the rest of the encoded operand.
5149   for (int i = 1; i < length; i++) {
5150     EmitUint8(operand.encoding_[i]);
5151   }
5152   AssemblerFixup* fixup = operand.GetFixup();
5153   if (fixup != nullptr) {
5154     EmitFixup(fixup);
5155   }
5156 }
5157 
5158 
EmitImmediate(const Immediate & imm,bool is_16_op)5159 void X86_64Assembler::EmitImmediate(const Immediate& imm, bool is_16_op) {
5160   if (is_16_op) {
5161     EmitUint8(imm.value() & 0xFF);
5162     EmitUint8(imm.value() >> 8);
5163   } else if (imm.is_int32()) {
5164     EmitInt32(static_cast<int32_t>(imm.value()));
5165   } else {
5166     EmitInt64(imm.value());
5167   }
5168 }
5169 
5170 
EmitComplex(uint8_t reg_or_opcode,const Operand & operand,const Immediate & immediate,bool is_16_op)5171 void X86_64Assembler::EmitComplex(uint8_t reg_or_opcode,
5172                                   const Operand& operand,
5173                                   const Immediate& immediate,
5174                                   bool is_16_op) {
5175   CHECK_GE(reg_or_opcode, 0);
5176   CHECK_LT(reg_or_opcode, 8);
5177   if (immediate.is_int8()) {
5178     // Use sign-extended 8-bit immediate.
5179     EmitUint8(0x83);
5180     EmitOperand(reg_or_opcode, operand);
5181     EmitUint8(immediate.value() & 0xFF);
5182   } else if (operand.IsRegister(CpuRegister(RAX))) {
5183     // Use short form if the destination is eax.
5184     EmitUint8(0x05 + (reg_or_opcode << 3));
5185     EmitImmediate(immediate, is_16_op);
5186   } else {
5187     EmitUint8(0x81);
5188     EmitOperand(reg_or_opcode, operand);
5189     EmitImmediate(immediate, is_16_op);
5190   }
5191 }
5192 
5193 
EmitLabel(Label * label,int instruction_size)5194 void X86_64Assembler::EmitLabel(Label* label, int instruction_size) {
5195   if (label->IsBound()) {
5196     int offset = label->Position() - buffer_.Size();
5197     CHECK_LE(offset, 0);
5198     EmitInt32(offset - instruction_size);
5199   } else {
5200     EmitLabelLink(label);
5201   }
5202 }
5203 
5204 
EmitLabelLink(Label * label)5205 void X86_64Assembler::EmitLabelLink(Label* label) {
5206   CHECK(!label->IsBound());
5207   int position = buffer_.Size();
5208   EmitInt32(label->position_);
5209   label->LinkTo(position);
5210 }
5211 
5212 
EmitLabelLink(NearLabel * label)5213 void X86_64Assembler::EmitLabelLink(NearLabel* label) {
5214   CHECK(!label->IsBound());
5215   int position = buffer_.Size();
5216   if (label->IsLinked()) {
5217     // Save the delta in the byte that we have to play with.
5218     uint32_t delta = position - label->LinkPosition();
5219     CHECK(IsUint<8>(delta));
5220     EmitUint8(delta & 0xFF);
5221   } else {
5222     EmitUint8(0);
5223   }
5224   label->LinkTo(position);
5225 }
5226 
5227 
EmitGenericShift(bool wide,int reg_or_opcode,CpuRegister reg,const Immediate & imm)5228 void X86_64Assembler::EmitGenericShift(bool wide,
5229                                        int reg_or_opcode,
5230                                        CpuRegister reg,
5231                                        const Immediate& imm) {
5232   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5233   CHECK(imm.is_int8());
5234   if (wide) {
5235     EmitRex64(reg);
5236   } else {
5237     EmitOptionalRex32(reg);
5238   }
5239   if (imm.value() == 1) {
5240     EmitUint8(0xD1);
5241     EmitOperand(reg_or_opcode, Operand(reg));
5242   } else {
5243     EmitUint8(0xC1);
5244     EmitOperand(reg_or_opcode, Operand(reg));
5245     EmitUint8(imm.value() & 0xFF);
5246   }
5247 }
5248 
5249 
EmitGenericShift(bool wide,int reg_or_opcode,CpuRegister operand,CpuRegister shifter)5250 void X86_64Assembler::EmitGenericShift(bool wide,
5251                                        int reg_or_opcode,
5252                                        CpuRegister operand,
5253                                        CpuRegister shifter) {
5254   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5255   CHECK_EQ(shifter.AsRegister(), RCX);
5256   if (wide) {
5257     EmitRex64(operand);
5258   } else {
5259     EmitOptionalRex32(operand);
5260   }
5261   EmitUint8(0xD3);
5262   EmitOperand(reg_or_opcode, Operand(operand));
5263 }
5264 
EmitOptionalRex(bool force,bool w,bool r,bool x,bool b)5265 void X86_64Assembler::EmitOptionalRex(bool force, bool w, bool r, bool x, bool b) {
5266   // REX.WRXB
5267   // W - 64-bit operand
5268   // R - MODRM.reg
5269   // X - SIB.index
5270   // B - MODRM.rm/SIB.base
5271   uint8_t rex = force ? 0x40 : 0;
5272   if (w) {
5273     rex |= 0x48;  // REX.W000
5274   }
5275   if (r) {
5276     rex |= 0x44;  // REX.0R00
5277   }
5278   if (x) {
5279     rex |= 0x42;  // REX.00X0
5280   }
5281   if (b) {
5282     rex |= 0x41;  // REX.000B
5283   }
5284   if (rex != 0) {
5285     EmitUint8(rex);
5286   }
5287 }
5288 
EmitOptionalRex32(CpuRegister reg)5289 void X86_64Assembler::EmitOptionalRex32(CpuRegister reg) {
5290   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
5291 }
5292 
EmitOptionalRex32(CpuRegister dst,CpuRegister src)5293 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, CpuRegister src) {
5294   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5295 }
5296 
EmitOptionalRex32(XmmRegister dst,XmmRegister src)5297 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, XmmRegister src) {
5298   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5299 }
5300 
EmitOptionalRex32(CpuRegister dst,XmmRegister src)5301 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, XmmRegister src) {
5302   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5303 }
5304 
EmitOptionalRex32(XmmRegister dst,CpuRegister src)5305 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, CpuRegister src) {
5306   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5307 }
5308 
EmitOptionalRex32(const Operand & operand)5309 void X86_64Assembler::EmitOptionalRex32(const Operand& operand) {
5310   uint8_t rex = operand.rex();
5311   if (rex != 0) {
5312     EmitUint8(rex);
5313   }
5314 }
5315 
EmitOptionalRex32(CpuRegister dst,const Operand & operand)5316 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, const Operand& operand) {
5317   uint8_t rex = operand.rex();
5318   if (dst.NeedsRex()) {
5319     rex |= 0x44;  // REX.0R00
5320   }
5321   if (rex != 0) {
5322     EmitUint8(rex);
5323   }
5324 }
5325 
EmitOptionalRex32(XmmRegister dst,const Operand & operand)5326 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, const Operand& operand) {
5327   uint8_t rex = operand.rex();
5328   if (dst.NeedsRex()) {
5329     rex |= 0x44;  // REX.0R00
5330   }
5331   if (rex != 0) {
5332     EmitUint8(rex);
5333   }
5334 }
5335 
EmitRex64()5336 void X86_64Assembler::EmitRex64() {
5337   EmitOptionalRex(false, true, false, false, false);
5338 }
5339 
EmitRex64(CpuRegister reg)5340 void X86_64Assembler::EmitRex64(CpuRegister reg) {
5341   EmitOptionalRex(false, true, false, false, reg.NeedsRex());
5342 }
5343 
EmitRex64(const Operand & operand)5344 void X86_64Assembler::EmitRex64(const Operand& operand) {
5345   uint8_t rex = operand.rex();
5346   rex |= 0x48;  // REX.W000
5347   EmitUint8(rex);
5348 }
5349 
EmitRex64(CpuRegister dst,CpuRegister src)5350 void X86_64Assembler::EmitRex64(CpuRegister dst, CpuRegister src) {
5351   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5352 }
5353 
EmitRex64(XmmRegister dst,CpuRegister src)5354 void X86_64Assembler::EmitRex64(XmmRegister dst, CpuRegister src) {
5355   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5356 }
5357 
EmitRex64(CpuRegister dst,XmmRegister src)5358 void X86_64Assembler::EmitRex64(CpuRegister dst, XmmRegister src) {
5359   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5360 }
5361 
EmitRex64(CpuRegister dst,const Operand & operand)5362 void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) {
5363   uint8_t rex = 0x48 | operand.rex();  // REX.W000
5364   if (dst.NeedsRex()) {
5365     rex |= 0x44;  // REX.0R00
5366   }
5367   EmitUint8(rex);
5368 }
5369 
EmitRex64(XmmRegister dst,const Operand & operand)5370 void X86_64Assembler::EmitRex64(XmmRegister dst, const Operand& operand) {
5371   uint8_t rex = 0x48 | operand.rex();  // REX.W000
5372   if (dst.NeedsRex()) {
5373     rex |= 0x44;  // REX.0R00
5374   }
5375   EmitUint8(rex);
5376 }
5377 
EmitOptionalByteRegNormalizingRex32(CpuRegister dst,CpuRegister src)5378 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src) {
5379   // For src, SPL, BPL, SIL, DIL need the rex prefix.
5380   bool force = src.AsRegister() > 3;
5381   EmitOptionalRex(force, false, dst.NeedsRex(), false, src.NeedsRex());
5382 }
5383 
EmitOptionalByteRegNormalizingRex32(CpuRegister dst,const Operand & operand)5384 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand) {
5385   uint8_t rex = operand.rex();
5386   // For dst, SPL, BPL, SIL, DIL need the rex prefix.
5387   bool force = dst.AsRegister() > 3;
5388   if (force) {
5389     rex |= 0x40;  // REX.0000
5390   }
5391   if (dst.NeedsRex()) {
5392     rex |= 0x44;  // REX.0R00
5393   }
5394   if (rex != 0) {
5395     EmitUint8(rex);
5396   }
5397 }
5398 
AddConstantArea()5399 void X86_64Assembler::AddConstantArea() {
5400   ArrayRef<const int32_t> area = constant_area_.GetBuffer();
5401   for (size_t i = 0, e = area.size(); i < e; i++) {
5402     AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5403     EmitInt32(area[i]);
5404   }
5405 }
5406 
AppendInt32(int32_t v)5407 size_t ConstantArea::AppendInt32(int32_t v) {
5408   size_t result = buffer_.size() * elem_size_;
5409   buffer_.push_back(v);
5410   return result;
5411 }
5412 
AddInt32(int32_t v)5413 size_t ConstantArea::AddInt32(int32_t v) {
5414   // Look for an existing match.
5415   for (size_t i = 0, e = buffer_.size(); i < e; i++) {
5416     if (v == buffer_[i]) {
5417       return i * elem_size_;
5418     }
5419   }
5420 
5421   // Didn't match anything.
5422   return AppendInt32(v);
5423 }
5424 
AddInt64(int64_t v)5425 size_t ConstantArea::AddInt64(int64_t v) {
5426   int32_t v_low = v;
5427   int32_t v_high = v >> 32;
5428   if (buffer_.size() > 1) {
5429     // Ensure we don't pass the end of the buffer.
5430     for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) {
5431       if (v_low == buffer_[i] && v_high == buffer_[i + 1]) {
5432         return i * elem_size_;
5433       }
5434     }
5435   }
5436 
5437   // Didn't match anything.
5438   size_t result = buffer_.size() * elem_size_;
5439   buffer_.push_back(v_low);
5440   buffer_.push_back(v_high);
5441   return result;
5442 }
5443 
AddDouble(double v)5444 size_t ConstantArea::AddDouble(double v) {
5445   // Treat the value as a 64-bit integer value.
5446   return AddInt64(bit_cast<int64_t, double>(v));
5447 }
5448 
AddFloat(float v)5449 size_t ConstantArea::AddFloat(float v) {
5450   // Treat the value as a 32-bit integer value.
5451   return AddInt32(bit_cast<int32_t, float>(v));
5452 }
5453 
EmitVexPrefixByteZero(bool is_twobyte_form)5454 uint8_t X86_64Assembler::EmitVexPrefixByteZero(bool is_twobyte_form) {
5455   // Vex Byte 0,
5456   // Bits [7:0] must contain the value 11000101b (0xC5) for 2-byte Vex
5457   // Bits [7:0] must contain the value 11000100b (0xC4) for 3-byte Vex
5458   uint8_t vex_prefix = 0xC0;
5459   if (is_twobyte_form) {
5460     vex_prefix |= TWO_BYTE_VEX;  // 2-Byte Vex
5461   } else {
5462     vex_prefix |= THREE_BYTE_VEX;  // 3-Byte Vex
5463   }
5464   return vex_prefix;
5465 }
5466 
EmitVexPrefixByteOne(bool R,bool X,bool B,int SET_VEX_M)5467 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R, bool X, bool B, int SET_VEX_M) {
5468   // Vex Byte 1,
5469   uint8_t vex_prefix = VEX_INIT;
5470   /** Bit[7] This bit needs to be set to '1'
5471   otherwise the instruction is LES or LDS */
5472   if (!R) {
5473     // R .
5474     vex_prefix |= SET_VEX_R;
5475   }
5476   /** Bit[6] This bit needs to be set to '1'
5477   otherwise the instruction is LES or LDS */
5478   if (!X) {
5479     // X .
5480     vex_prefix |= SET_VEX_X;
5481   }
5482   /** Bit[5] This bit needs to be set to '1' */
5483   if (!B) {
5484     // B .
5485     vex_prefix |= SET_VEX_B;
5486   }
5487   /** Bits[4:0], Based on the instruction documentaion */
5488   vex_prefix |= SET_VEX_M;
5489   return vex_prefix;
5490 }
5491 
EmitVexPrefixByteOne(bool R,X86_64ManagedRegister operand,int SET_VEX_L,int SET_VEX_PP)5492 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R,
5493                                               X86_64ManagedRegister operand,
5494                                               int SET_VEX_L,
5495                                               int SET_VEX_PP) {
5496   // Vex Byte 1,
5497   uint8_t vex_prefix = VEX_INIT;
5498   /** Bit[7] This bit needs to be set to '1'
5499   otherwise the instruction is LES or LDS */
5500   if (!R) {
5501     // R .
5502     vex_prefix |= SET_VEX_R;
5503   }
5504   /**Bits[6:3] - 'vvvv' the source or dest register specifier */
5505   if (operand.IsNoRegister()) {
5506     vex_prefix |= 0x78;
5507   } else if (operand.IsXmmRegister()) {
5508     XmmRegister vvvv = operand.AsXmmRegister();
5509     int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
5510     uint8_t reg = static_cast<uint8_t>(inverted_reg);
5511     vex_prefix |= ((reg & 0x0F) << 3);
5512   } else if (operand.IsCpuRegister()) {
5513     CpuRegister vvvv = operand.AsCpuRegister();
5514     int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
5515     uint8_t reg = static_cast<uint8_t>(inverted_reg);
5516     vex_prefix |= ((reg & 0x0F) << 3);
5517   }
5518   /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5519   VEX.L = 0 indicates 128 bit vector operation */
5520   vex_prefix |= SET_VEX_L;
5521   // Bits[1:0] -  "pp"
5522   vex_prefix |= SET_VEX_PP;
5523   return vex_prefix;
5524 }
5525 
EmitVexPrefixByteTwo(bool W,X86_64ManagedRegister operand,int SET_VEX_L,int SET_VEX_PP)5526 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
5527                                               X86_64ManagedRegister operand,
5528                                               int SET_VEX_L,
5529                                               int SET_VEX_PP) {
5530   // Vex Byte 2,
5531   uint8_t vex_prefix = VEX_INIT;
5532 
5533   /** Bit[7] This bits needs to be set to '1' with default value.
5534   When using C4H form of VEX prefix, REX.W value is ignored */
5535   if (W) {
5536     vex_prefix |= SET_VEX_W;
5537   }
5538   // Bits[6:3] - 'vvvv' the source or dest register specifier
5539   if (operand.IsXmmRegister()) {
5540     XmmRegister vvvv = operand.AsXmmRegister();
5541     int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
5542     uint8_t reg = static_cast<uint8_t>(inverted_reg);
5543     vex_prefix |= ((reg & 0x0F) << 3);
5544   } else if (operand.IsCpuRegister()) {
5545     CpuRegister vvvv = operand.AsCpuRegister();
5546     int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
5547     uint8_t reg = static_cast<uint8_t>(inverted_reg);
5548     vex_prefix |= ((reg & 0x0F) << 3);
5549   }
5550   /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5551   VEX.L = 0 indicates 128 bit vector operation */
5552   vex_prefix |= SET_VEX_L;
5553   // Bits[1:0] -  "pp"
5554   vex_prefix |= SET_VEX_PP;
5555   return vex_prefix;
5556 }
5557 
EmitVexPrefixByteTwo(bool W,int SET_VEX_L,int SET_VEX_PP)5558 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
5559                                               int SET_VEX_L,
5560                                               int SET_VEX_PP) {
5561   // Vex Byte 2,
5562   uint8_t vex_prefix = VEX_INIT;
5563 
5564   /** Bit[7] This bits needs to be set to '1' with default value.
5565   When using C4H form of VEX prefix, REX.W value is ignored */
5566   if (W) {
5567     vex_prefix |= SET_VEX_W;
5568   }
5569   /** Bits[6:3] - 'vvvv' the source or dest register specifier */
5570   vex_prefix |= (0x0F << 3);
5571   /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5572   VEX.L = 0 indicates 128 bit vector operation */
5573   vex_prefix |= SET_VEX_L;
5574 
5575   // Bits[1:0] -  "pp"
5576   if (SET_VEX_PP != SET_VEX_PP_NONE) {
5577     vex_prefix |= SET_VEX_PP;
5578   }
5579   return vex_prefix;
5580 }
5581 
5582 }  // namespace x86_64
5583 }  // namespace art
5584