1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_amd64_defs.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2013 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38 #include "libvex_trc_values.h"
39
40 #include "main_util.h"
41 #include "host_generic_regs.h"
42 #include "host_amd64_defs.h"
43
44
45 /* --------- Registers. --------- */
46
getRRegUniverse_AMD64(void)47 const RRegUniverse* getRRegUniverse_AMD64 ( void )
48 {
49 /* The real-register universe is a big constant, so we just want to
50 initialise it once. */
51 static RRegUniverse rRegUniverse_AMD64;
52 static Bool rRegUniverse_AMD64_initted = False;
53
54 /* Handy shorthand, nothing more */
55 RRegUniverse* ru = &rRegUniverse_AMD64;
56
57 /* This isn't thread-safe. Sigh. */
58 if (LIKELY(rRegUniverse_AMD64_initted))
59 return ru;
60
61 RRegUniverse__init(ru);
62
63 /* Add the registers. The initial segment of this array must be
64 those available for allocation by reg-alloc, and those that
65 follow are not available for allocation. */
66 ru->regs[ru->size++] = hregAMD64_RSI();
67 ru->regs[ru->size++] = hregAMD64_RDI();
68 ru->regs[ru->size++] = hregAMD64_R8();
69 ru->regs[ru->size++] = hregAMD64_R9();
70 ru->regs[ru->size++] = hregAMD64_R12();
71 ru->regs[ru->size++] = hregAMD64_R13();
72 ru->regs[ru->size++] = hregAMD64_R14();
73 ru->regs[ru->size++] = hregAMD64_R15();
74 ru->regs[ru->size++] = hregAMD64_RBX();
75 ru->regs[ru->size++] = hregAMD64_XMM3();
76 ru->regs[ru->size++] = hregAMD64_XMM4();
77 ru->regs[ru->size++] = hregAMD64_XMM5();
78 ru->regs[ru->size++] = hregAMD64_XMM6();
79 ru->regs[ru->size++] = hregAMD64_XMM7();
80 ru->regs[ru->size++] = hregAMD64_XMM8();
81 ru->regs[ru->size++] = hregAMD64_XMM9();
82 ru->regs[ru->size++] = hregAMD64_XMM10();
83 ru->regs[ru->size++] = hregAMD64_XMM11();
84 ru->regs[ru->size++] = hregAMD64_XMM12();
85 ru->regs[ru->size++] = hregAMD64_R10();
86 ru->allocable = ru->size;
87 /* And other regs, not available to the allocator. */
88 ru->regs[ru->size++] = hregAMD64_RAX();
89 ru->regs[ru->size++] = hregAMD64_RCX();
90 ru->regs[ru->size++] = hregAMD64_RDX();
91 ru->regs[ru->size++] = hregAMD64_RSP();
92 ru->regs[ru->size++] = hregAMD64_RBP();
93 ru->regs[ru->size++] = hregAMD64_R11();
94 ru->regs[ru->size++] = hregAMD64_XMM0();
95 ru->regs[ru->size++] = hregAMD64_XMM1();
96
97 rRegUniverse_AMD64_initted = True;
98
99 RRegUniverse__check_is_sane(ru);
100 return ru;
101 }
102
103
ppHRegAMD64(HReg reg)104 void ppHRegAMD64 ( HReg reg )
105 {
106 Int r;
107 static const HChar* ireg64_names[16]
108 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
109 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
110 /* Be generic for all virtual regs. */
111 if (hregIsVirtual(reg)) {
112 ppHReg(reg);
113 return;
114 }
115 /* But specific for real regs. */
116 switch (hregClass(reg)) {
117 case HRcInt64:
118 r = hregEncoding(reg);
119 vassert(r >= 0 && r < 16);
120 vex_printf("%s", ireg64_names[r]);
121 return;
122 case HRcVec128:
123 r = hregEncoding(reg);
124 vassert(r >= 0 && r < 16);
125 vex_printf("%%xmm%d", r);
126 return;
127 default:
128 vpanic("ppHRegAMD64");
129 }
130 }
131
ppHRegAMD64_lo32(HReg reg)132 static void ppHRegAMD64_lo32 ( HReg reg )
133 {
134 Int r;
135 static const HChar* ireg32_names[16]
136 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
137 "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" };
138 /* Be generic for all virtual regs. */
139 if (hregIsVirtual(reg)) {
140 ppHReg(reg);
141 vex_printf("d");
142 return;
143 }
144 /* But specific for real regs. */
145 switch (hregClass(reg)) {
146 case HRcInt64:
147 r = hregEncoding(reg);
148 vassert(r >= 0 && r < 16);
149 vex_printf("%s", ireg32_names[r]);
150 return;
151 default:
152 vpanic("ppHRegAMD64_lo32: invalid regclass");
153 }
154 }
155
156
157 /* --------- Condition codes, Intel encoding. --------- */
158
showAMD64CondCode(AMD64CondCode cond)159 const HChar* showAMD64CondCode ( AMD64CondCode cond )
160 {
161 switch (cond) {
162 case Acc_O: return "o";
163 case Acc_NO: return "no";
164 case Acc_B: return "b";
165 case Acc_NB: return "nb";
166 case Acc_Z: return "z";
167 case Acc_NZ: return "nz";
168 case Acc_BE: return "be";
169 case Acc_NBE: return "nbe";
170 case Acc_S: return "s";
171 case Acc_NS: return "ns";
172 case Acc_P: return "p";
173 case Acc_NP: return "np";
174 case Acc_L: return "l";
175 case Acc_NL: return "nl";
176 case Acc_LE: return "le";
177 case Acc_NLE: return "nle";
178 case Acc_ALWAYS: return "ALWAYS";
179 default: vpanic("ppAMD64CondCode");
180 }
181 }
182
183
184 /* --------- AMD64AMode: memory address expressions. --------- */
185
AMD64AMode_IR(UInt imm32,HReg reg)186 AMD64AMode* AMD64AMode_IR ( UInt imm32, HReg reg ) {
187 AMD64AMode* am = LibVEX_Alloc_inline(sizeof(AMD64AMode));
188 am->tag = Aam_IR;
189 am->Aam.IR.imm = imm32;
190 am->Aam.IR.reg = reg;
191 return am;
192 }
AMD64AMode_IRRS(UInt imm32,HReg base,HReg indEx,Int shift)193 AMD64AMode* AMD64AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
194 AMD64AMode* am = LibVEX_Alloc_inline(sizeof(AMD64AMode));
195 am->tag = Aam_IRRS;
196 am->Aam.IRRS.imm = imm32;
197 am->Aam.IRRS.base = base;
198 am->Aam.IRRS.index = indEx;
199 am->Aam.IRRS.shift = shift;
200 vassert(shift >= 0 && shift <= 3);
201 return am;
202 }
203
ppAMD64AMode(AMD64AMode * am)204 void ppAMD64AMode ( AMD64AMode* am ) {
205 switch (am->tag) {
206 case Aam_IR:
207 if (am->Aam.IR.imm == 0)
208 vex_printf("(");
209 else
210 vex_printf("0x%x(", am->Aam.IR.imm);
211 ppHRegAMD64(am->Aam.IR.reg);
212 vex_printf(")");
213 return;
214 case Aam_IRRS:
215 vex_printf("0x%x(", am->Aam.IRRS.imm);
216 ppHRegAMD64(am->Aam.IRRS.base);
217 vex_printf(",");
218 ppHRegAMD64(am->Aam.IRRS.index);
219 vex_printf(",%d)", 1 << am->Aam.IRRS.shift);
220 return;
221 default:
222 vpanic("ppAMD64AMode");
223 }
224 }
225
addRegUsage_AMD64AMode(HRegUsage * u,AMD64AMode * am)226 static void addRegUsage_AMD64AMode ( HRegUsage* u, AMD64AMode* am ) {
227 switch (am->tag) {
228 case Aam_IR:
229 addHRegUse(u, HRmRead, am->Aam.IR.reg);
230 return;
231 case Aam_IRRS:
232 addHRegUse(u, HRmRead, am->Aam.IRRS.base);
233 addHRegUse(u, HRmRead, am->Aam.IRRS.index);
234 return;
235 default:
236 vpanic("addRegUsage_AMD64AMode");
237 }
238 }
239
mapRegs_AMD64AMode(HRegRemap * m,AMD64AMode * am)240 static void mapRegs_AMD64AMode ( HRegRemap* m, AMD64AMode* am ) {
241 switch (am->tag) {
242 case Aam_IR:
243 am->Aam.IR.reg = lookupHRegRemap(m, am->Aam.IR.reg);
244 return;
245 case Aam_IRRS:
246 am->Aam.IRRS.base = lookupHRegRemap(m, am->Aam.IRRS.base);
247 am->Aam.IRRS.index = lookupHRegRemap(m, am->Aam.IRRS.index);
248 return;
249 default:
250 vpanic("mapRegs_AMD64AMode");
251 }
252 }
253
254 /* --------- Operand, which can be reg, immediate or memory. --------- */
255
AMD64RMI_Imm(UInt imm32)256 AMD64RMI* AMD64RMI_Imm ( UInt imm32 ) {
257 AMD64RMI* op = LibVEX_Alloc_inline(sizeof(AMD64RMI));
258 op->tag = Armi_Imm;
259 op->Armi.Imm.imm32 = imm32;
260 return op;
261 }
AMD64RMI_Reg(HReg reg)262 AMD64RMI* AMD64RMI_Reg ( HReg reg ) {
263 AMD64RMI* op = LibVEX_Alloc_inline(sizeof(AMD64RMI));
264 op->tag = Armi_Reg;
265 op->Armi.Reg.reg = reg;
266 return op;
267 }
AMD64RMI_Mem(AMD64AMode * am)268 AMD64RMI* AMD64RMI_Mem ( AMD64AMode* am ) {
269 AMD64RMI* op = LibVEX_Alloc_inline(sizeof(AMD64RMI));
270 op->tag = Armi_Mem;
271 op->Armi.Mem.am = am;
272 return op;
273 }
274
ppAMD64RMI_wrk(AMD64RMI * op,Bool lo32)275 static void ppAMD64RMI_wrk ( AMD64RMI* op, Bool lo32 ) {
276 switch (op->tag) {
277 case Armi_Imm:
278 vex_printf("$0x%x", op->Armi.Imm.imm32);
279 return;
280 case Armi_Reg:
281 if (lo32)
282 ppHRegAMD64_lo32(op->Armi.Reg.reg);
283 else
284 ppHRegAMD64(op->Armi.Reg.reg);
285 return;
286 case Armi_Mem:
287 ppAMD64AMode(op->Armi.Mem.am);
288 return;
289 default:
290 vpanic("ppAMD64RMI");
291 }
292 }
ppAMD64RMI(AMD64RMI * op)293 void ppAMD64RMI ( AMD64RMI* op ) {
294 ppAMD64RMI_wrk(op, False/*!lo32*/);
295 }
ppAMD64RMI_lo32(AMD64RMI * op)296 void ppAMD64RMI_lo32 ( AMD64RMI* op ) {
297 ppAMD64RMI_wrk(op, True/*lo32*/);
298 }
299
300 /* An AMD64RMI can only be used in a "read" context (what would it mean
301 to write or modify a literal?) and so we enumerate its registers
302 accordingly. */
addRegUsage_AMD64RMI(HRegUsage * u,AMD64RMI * op)303 static void addRegUsage_AMD64RMI ( HRegUsage* u, AMD64RMI* op ) {
304 switch (op->tag) {
305 case Armi_Imm:
306 return;
307 case Armi_Reg:
308 addHRegUse(u, HRmRead, op->Armi.Reg.reg);
309 return;
310 case Armi_Mem:
311 addRegUsage_AMD64AMode(u, op->Armi.Mem.am);
312 return;
313 default:
314 vpanic("addRegUsage_AMD64RMI");
315 }
316 }
317
mapRegs_AMD64RMI(HRegRemap * m,AMD64RMI * op)318 static void mapRegs_AMD64RMI ( HRegRemap* m, AMD64RMI* op ) {
319 switch (op->tag) {
320 case Armi_Imm:
321 return;
322 case Armi_Reg:
323 op->Armi.Reg.reg = lookupHRegRemap(m, op->Armi.Reg.reg);
324 return;
325 case Armi_Mem:
326 mapRegs_AMD64AMode(m, op->Armi.Mem.am);
327 return;
328 default:
329 vpanic("mapRegs_AMD64RMI");
330 }
331 }
332
333
334 /* --------- Operand, which can be reg or immediate only. --------- */
335
AMD64RI_Imm(UInt imm32)336 AMD64RI* AMD64RI_Imm ( UInt imm32 ) {
337 AMD64RI* op = LibVEX_Alloc_inline(sizeof(AMD64RI));
338 op->tag = Ari_Imm;
339 op->Ari.Imm.imm32 = imm32;
340 return op;
341 }
AMD64RI_Reg(HReg reg)342 AMD64RI* AMD64RI_Reg ( HReg reg ) {
343 AMD64RI* op = LibVEX_Alloc_inline(sizeof(AMD64RI));
344 op->tag = Ari_Reg;
345 op->Ari.Reg.reg = reg;
346 return op;
347 }
348
ppAMD64RI(AMD64RI * op)349 void ppAMD64RI ( AMD64RI* op ) {
350 switch (op->tag) {
351 case Ari_Imm:
352 vex_printf("$0x%x", op->Ari.Imm.imm32);
353 return;
354 case Ari_Reg:
355 ppHRegAMD64(op->Ari.Reg.reg);
356 return;
357 default:
358 vpanic("ppAMD64RI");
359 }
360 }
361
362 /* An AMD64RI can only be used in a "read" context (what would it mean
363 to write or modify a literal?) and so we enumerate its registers
364 accordingly. */
addRegUsage_AMD64RI(HRegUsage * u,AMD64RI * op)365 static void addRegUsage_AMD64RI ( HRegUsage* u, AMD64RI* op ) {
366 switch (op->tag) {
367 case Ari_Imm:
368 return;
369 case Ari_Reg:
370 addHRegUse(u, HRmRead, op->Ari.Reg.reg);
371 return;
372 default:
373 vpanic("addRegUsage_AMD64RI");
374 }
375 }
376
mapRegs_AMD64RI(HRegRemap * m,AMD64RI * op)377 static void mapRegs_AMD64RI ( HRegRemap* m, AMD64RI* op ) {
378 switch (op->tag) {
379 case Ari_Imm:
380 return;
381 case Ari_Reg:
382 op->Ari.Reg.reg = lookupHRegRemap(m, op->Ari.Reg.reg);
383 return;
384 default:
385 vpanic("mapRegs_AMD64RI");
386 }
387 }
388
389
390 /* --------- Operand, which can be reg or memory only. --------- */
391
AMD64RM_Reg(HReg reg)392 AMD64RM* AMD64RM_Reg ( HReg reg ) {
393 AMD64RM* op = LibVEX_Alloc_inline(sizeof(AMD64RM));
394 op->tag = Arm_Reg;
395 op->Arm.Reg.reg = reg;
396 return op;
397 }
AMD64RM_Mem(AMD64AMode * am)398 AMD64RM* AMD64RM_Mem ( AMD64AMode* am ) {
399 AMD64RM* op = LibVEX_Alloc_inline(sizeof(AMD64RM));
400 op->tag = Arm_Mem;
401 op->Arm.Mem.am = am;
402 return op;
403 }
404
ppAMD64RM(AMD64RM * op)405 void ppAMD64RM ( AMD64RM* op ) {
406 switch (op->tag) {
407 case Arm_Mem:
408 ppAMD64AMode(op->Arm.Mem.am);
409 return;
410 case Arm_Reg:
411 ppHRegAMD64(op->Arm.Reg.reg);
412 return;
413 default:
414 vpanic("ppAMD64RM");
415 }
416 }
417
418 /* Because an AMD64RM can be both a source or destination operand, we
419 have to supply a mode -- pertaining to the operand as a whole --
420 indicating how it's being used. */
addRegUsage_AMD64RM(HRegUsage * u,AMD64RM * op,HRegMode mode)421 static void addRegUsage_AMD64RM ( HRegUsage* u, AMD64RM* op, HRegMode mode ) {
422 switch (op->tag) {
423 case Arm_Mem:
424 /* Memory is read, written or modified. So we just want to
425 know the regs read by the amode. */
426 addRegUsage_AMD64AMode(u, op->Arm.Mem.am);
427 return;
428 case Arm_Reg:
429 /* reg is read, written or modified. Add it in the
430 appropriate way. */
431 addHRegUse(u, mode, op->Arm.Reg.reg);
432 return;
433 default:
434 vpanic("addRegUsage_AMD64RM");
435 }
436 }
437
mapRegs_AMD64RM(HRegRemap * m,AMD64RM * op)438 static void mapRegs_AMD64RM ( HRegRemap* m, AMD64RM* op )
439 {
440 switch (op->tag) {
441 case Arm_Mem:
442 mapRegs_AMD64AMode(m, op->Arm.Mem.am);
443 return;
444 case Arm_Reg:
445 op->Arm.Reg.reg = lookupHRegRemap(m, op->Arm.Reg.reg);
446 return;
447 default:
448 vpanic("mapRegs_AMD64RM");
449 }
450 }
451
452
453 /* --------- Instructions. --------- */
454
showAMD64ScalarSz(Int sz)455 static const HChar* showAMD64ScalarSz ( Int sz ) {
456 switch (sz) {
457 case 2: return "w";
458 case 4: return "l";
459 case 8: return "q";
460 default: vpanic("showAMD64ScalarSz");
461 }
462 }
463
showAMD64UnaryOp(AMD64UnaryOp op)464 const HChar* showAMD64UnaryOp ( AMD64UnaryOp op ) {
465 switch (op) {
466 case Aun_NOT: return "not";
467 case Aun_NEG: return "neg";
468 default: vpanic("showAMD64UnaryOp");
469 }
470 }
471
showAMD64AluOp(AMD64AluOp op)472 const HChar* showAMD64AluOp ( AMD64AluOp op ) {
473 switch (op) {
474 case Aalu_MOV: return "mov";
475 case Aalu_CMP: return "cmp";
476 case Aalu_ADD: return "add";
477 case Aalu_SUB: return "sub";
478 case Aalu_ADC: return "adc";
479 case Aalu_SBB: return "sbb";
480 case Aalu_AND: return "and";
481 case Aalu_OR: return "or";
482 case Aalu_XOR: return "xor";
483 case Aalu_MUL: return "imul";
484 default: vpanic("showAMD64AluOp");
485 }
486 }
487
showAMD64ShiftOp(AMD64ShiftOp op)488 const HChar* showAMD64ShiftOp ( AMD64ShiftOp op ) {
489 switch (op) {
490 case Ash_SHL: return "shl";
491 case Ash_SHR: return "shr";
492 case Ash_SAR: return "sar";
493 default: vpanic("showAMD64ShiftOp");
494 }
495 }
496
showA87FpOp(A87FpOp op)497 const HChar* showA87FpOp ( A87FpOp op ) {
498 switch (op) {
499 case Afp_SCALE: return "scale";
500 case Afp_ATAN: return "atan";
501 case Afp_YL2X: return "yl2x";
502 case Afp_YL2XP1: return "yl2xp1";
503 case Afp_PREM: return "prem";
504 case Afp_PREM1: return "prem1";
505 case Afp_SQRT: return "sqrt";
506 case Afp_SIN: return "sin";
507 case Afp_COS: return "cos";
508 case Afp_TAN: return "tan";
509 case Afp_ROUND: return "round";
510 case Afp_2XM1: return "2xm1";
511 default: vpanic("showA87FpOp");
512 }
513 }
514
showAMD64SseOp(AMD64SseOp op)515 const HChar* showAMD64SseOp ( AMD64SseOp op ) {
516 switch (op) {
517 case Asse_MOV: return "movups";
518 case Asse_ADDF: return "add";
519 case Asse_SUBF: return "sub";
520 case Asse_MULF: return "mul";
521 case Asse_DIVF: return "div";
522 case Asse_MAXF: return "max";
523 case Asse_MINF: return "min";
524 case Asse_CMPEQF: return "cmpFeq";
525 case Asse_CMPLTF: return "cmpFlt";
526 case Asse_CMPLEF: return "cmpFle";
527 case Asse_CMPUNF: return "cmpFun";
528 case Asse_RCPF: return "rcp";
529 case Asse_RSQRTF: return "rsqrt";
530 case Asse_SQRTF: return "sqrt";
531 case Asse_AND: return "and";
532 case Asse_OR: return "or";
533 case Asse_XOR: return "xor";
534 case Asse_ANDN: return "andn";
535 case Asse_ADD8: return "paddb";
536 case Asse_ADD16: return "paddw";
537 case Asse_ADD32: return "paddd";
538 case Asse_ADD64: return "paddq";
539 case Asse_QADD8U: return "paddusb";
540 case Asse_QADD16U: return "paddusw";
541 case Asse_QADD8S: return "paddsb";
542 case Asse_QADD16S: return "paddsw";
543 case Asse_SUB8: return "psubb";
544 case Asse_SUB16: return "psubw";
545 case Asse_SUB32: return "psubd";
546 case Asse_SUB64: return "psubq";
547 case Asse_QSUB8U: return "psubusb";
548 case Asse_QSUB16U: return "psubusw";
549 case Asse_QSUB8S: return "psubsb";
550 case Asse_QSUB16S: return "psubsw";
551 case Asse_MUL16: return "pmullw";
552 case Asse_MULHI16U: return "pmulhuw";
553 case Asse_MULHI16S: return "pmulhw";
554 case Asse_AVG8U: return "pavgb";
555 case Asse_AVG16U: return "pavgw";
556 case Asse_MAX16S: return "pmaxw";
557 case Asse_MAX8U: return "pmaxub";
558 case Asse_MIN16S: return "pminw";
559 case Asse_MIN8U: return "pminub";
560 case Asse_CMPEQ8: return "pcmpeqb";
561 case Asse_CMPEQ16: return "pcmpeqw";
562 case Asse_CMPEQ32: return "pcmpeqd";
563 case Asse_CMPGT8S: return "pcmpgtb";
564 case Asse_CMPGT16S: return "pcmpgtw";
565 case Asse_CMPGT32S: return "pcmpgtd";
566 case Asse_SHL16: return "psllw";
567 case Asse_SHL32: return "pslld";
568 case Asse_SHL64: return "psllq";
569 case Asse_SHR16: return "psrlw";
570 case Asse_SHR32: return "psrld";
571 case Asse_SHR64: return "psrlq";
572 case Asse_SAR16: return "psraw";
573 case Asse_SAR32: return "psrad";
574 case Asse_PACKSSD: return "packssdw";
575 case Asse_PACKSSW: return "packsswb";
576 case Asse_PACKUSW: return "packuswb";
577 case Asse_UNPCKHB: return "punpckhb";
578 case Asse_UNPCKHW: return "punpckhw";
579 case Asse_UNPCKHD: return "punpckhd";
580 case Asse_UNPCKHQ: return "punpckhq";
581 case Asse_UNPCKLB: return "punpcklb";
582 case Asse_UNPCKLW: return "punpcklw";
583 case Asse_UNPCKLD: return "punpckld";
584 case Asse_UNPCKLQ: return "punpcklq";
585 default: vpanic("showAMD64SseOp");
586 }
587 }
588
AMD64Instr_Imm64(ULong imm64,HReg dst)589 AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst ) {
590 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
591 i->tag = Ain_Imm64;
592 i->Ain.Imm64.imm64 = imm64;
593 i->Ain.Imm64.dst = dst;
594 return i;
595 }
AMD64Instr_Alu64R(AMD64AluOp op,AMD64RMI * src,HReg dst)596 AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
597 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
598 i->tag = Ain_Alu64R;
599 i->Ain.Alu64R.op = op;
600 i->Ain.Alu64R.src = src;
601 i->Ain.Alu64R.dst = dst;
602 return i;
603 }
AMD64Instr_Alu64M(AMD64AluOp op,AMD64RI * src,AMD64AMode * dst)604 AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp op, AMD64RI* src, AMD64AMode* dst ) {
605 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
606 i->tag = Ain_Alu64M;
607 i->Ain.Alu64M.op = op;
608 i->Ain.Alu64M.src = src;
609 i->Ain.Alu64M.dst = dst;
610 vassert(op != Aalu_MUL);
611 return i;
612 }
AMD64Instr_Sh64(AMD64ShiftOp op,UInt src,HReg dst)613 AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) {
614 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
615 i->tag = Ain_Sh64;
616 i->Ain.Sh64.op = op;
617 i->Ain.Sh64.src = src;
618 i->Ain.Sh64.dst = dst;
619 return i;
620 }
AMD64Instr_Test64(UInt imm32,HReg dst)621 AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) {
622 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
623 i->tag = Ain_Test64;
624 i->Ain.Test64.imm32 = imm32;
625 i->Ain.Test64.dst = dst;
626 return i;
627 }
AMD64Instr_Unary64(AMD64UnaryOp op,HReg dst)628 AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst ) {
629 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
630 i->tag = Ain_Unary64;
631 i->Ain.Unary64.op = op;
632 i->Ain.Unary64.dst = dst;
633 return i;
634 }
AMD64Instr_Lea64(AMD64AMode * am,HReg dst)635 AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst ) {
636 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
637 i->tag = Ain_Lea64;
638 i->Ain.Lea64.am = am;
639 i->Ain.Lea64.dst = dst;
640 return i;
641 }
AMD64Instr_Alu32R(AMD64AluOp op,AMD64RMI * src,HReg dst)642 AMD64Instr* AMD64Instr_Alu32R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
643 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
644 i->tag = Ain_Alu32R;
645 i->Ain.Alu32R.op = op;
646 i->Ain.Alu32R.src = src;
647 i->Ain.Alu32R.dst = dst;
648 switch (op) {
649 case Aalu_ADD: case Aalu_SUB: case Aalu_CMP:
650 case Aalu_AND: case Aalu_OR: case Aalu_XOR: break;
651 default: vassert(0);
652 }
653 return i;
654 }
AMD64Instr_MulL(Bool syned,AMD64RM * src)655 AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* src ) {
656 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
657 i->tag = Ain_MulL;
658 i->Ain.MulL.syned = syned;
659 i->Ain.MulL.src = src;
660 return i;
661 }
AMD64Instr_Div(Bool syned,Int sz,AMD64RM * src)662 AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* src ) {
663 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
664 i->tag = Ain_Div;
665 i->Ain.Div.syned = syned;
666 i->Ain.Div.sz = sz;
667 i->Ain.Div.src = src;
668 vassert(sz == 4 || sz == 8);
669 return i;
670 }
AMD64Instr_Push(AMD64RMI * src)671 AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) {
672 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
673 i->tag = Ain_Push;
674 i->Ain.Push.src = src;
675 return i;
676 }
AMD64Instr_Call(AMD64CondCode cond,Addr64 target,Int regparms,RetLoc rloc)677 AMD64Instr* AMD64Instr_Call ( AMD64CondCode cond, Addr64 target, Int regparms,
678 RetLoc rloc ) {
679 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
680 i->tag = Ain_Call;
681 i->Ain.Call.cond = cond;
682 i->Ain.Call.target = target;
683 i->Ain.Call.regparms = regparms;
684 i->Ain.Call.rloc = rloc;
685 vassert(regparms >= 0 && regparms <= 6);
686 vassert(is_sane_RetLoc(rloc));
687 return i;
688 }
689
AMD64Instr_XDirect(Addr64 dstGA,AMD64AMode * amRIP,AMD64CondCode cond,Bool toFastEP)690 AMD64Instr* AMD64Instr_XDirect ( Addr64 dstGA, AMD64AMode* amRIP,
691 AMD64CondCode cond, Bool toFastEP ) {
692 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
693 i->tag = Ain_XDirect;
694 i->Ain.XDirect.dstGA = dstGA;
695 i->Ain.XDirect.amRIP = amRIP;
696 i->Ain.XDirect.cond = cond;
697 i->Ain.XDirect.toFastEP = toFastEP;
698 return i;
699 }
AMD64Instr_XIndir(HReg dstGA,AMD64AMode * amRIP,AMD64CondCode cond)700 AMD64Instr* AMD64Instr_XIndir ( HReg dstGA, AMD64AMode* amRIP,
701 AMD64CondCode cond ) {
702 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
703 i->tag = Ain_XIndir;
704 i->Ain.XIndir.dstGA = dstGA;
705 i->Ain.XIndir.amRIP = amRIP;
706 i->Ain.XIndir.cond = cond;
707 return i;
708 }
AMD64Instr_XAssisted(HReg dstGA,AMD64AMode * amRIP,AMD64CondCode cond,IRJumpKind jk)709 AMD64Instr* AMD64Instr_XAssisted ( HReg dstGA, AMD64AMode* amRIP,
710 AMD64CondCode cond, IRJumpKind jk ) {
711 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
712 i->tag = Ain_XAssisted;
713 i->Ain.XAssisted.dstGA = dstGA;
714 i->Ain.XAssisted.amRIP = amRIP;
715 i->Ain.XAssisted.cond = cond;
716 i->Ain.XAssisted.jk = jk;
717 return i;
718 }
719
AMD64Instr_CMov64(AMD64CondCode cond,HReg src,HReg dst)720 AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, HReg src, HReg dst ) {
721 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
722 i->tag = Ain_CMov64;
723 i->Ain.CMov64.cond = cond;
724 i->Ain.CMov64.src = src;
725 i->Ain.CMov64.dst = dst;
726 vassert(cond != Acc_ALWAYS);
727 return i;
728 }
AMD64Instr_CLoad(AMD64CondCode cond,UChar szB,AMD64AMode * addr,HReg dst)729 AMD64Instr* AMD64Instr_CLoad ( AMD64CondCode cond, UChar szB,
730 AMD64AMode* addr, HReg dst ) {
731 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
732 i->tag = Ain_CLoad;
733 i->Ain.CLoad.cond = cond;
734 i->Ain.CLoad.szB = szB;
735 i->Ain.CLoad.addr = addr;
736 i->Ain.CLoad.dst = dst;
737 vassert(cond != Acc_ALWAYS && (szB == 4 || szB == 8));
738 return i;
739 }
AMD64Instr_CStore(AMD64CondCode cond,UChar szB,HReg src,AMD64AMode * addr)740 AMD64Instr* AMD64Instr_CStore ( AMD64CondCode cond, UChar szB,
741 HReg src, AMD64AMode* addr ) {
742 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
743 i->tag = Ain_CStore;
744 i->Ain.CStore.cond = cond;
745 i->Ain.CStore.szB = szB;
746 i->Ain.CStore.src = src;
747 i->Ain.CStore.addr = addr;
748 vassert(cond != Acc_ALWAYS && (szB == 4 || szB == 8));
749 return i;
750 }
AMD64Instr_MovxLQ(Bool syned,HReg src,HReg dst)751 AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) {
752 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
753 i->tag = Ain_MovxLQ;
754 i->Ain.MovxLQ.syned = syned;
755 i->Ain.MovxLQ.src = src;
756 i->Ain.MovxLQ.dst = dst;
757 return i;
758 }
AMD64Instr_LoadEX(UChar szSmall,Bool syned,AMD64AMode * src,HReg dst)759 AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
760 AMD64AMode* src, HReg dst ) {
761 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
762 i->tag = Ain_LoadEX;
763 i->Ain.LoadEX.szSmall = szSmall;
764 i->Ain.LoadEX.syned = syned;
765 i->Ain.LoadEX.src = src;
766 i->Ain.LoadEX.dst = dst;
767 vassert(szSmall == 1 || szSmall == 2 || szSmall == 4);
768 return i;
769 }
AMD64Instr_Store(UChar sz,HReg src,AMD64AMode * dst)770 AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst ) {
771 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
772 i->tag = Ain_Store;
773 i->Ain.Store.sz = sz;
774 i->Ain.Store.src = src;
775 i->Ain.Store.dst = dst;
776 vassert(sz == 1 || sz == 2 || sz == 4);
777 return i;
778 }
AMD64Instr_Set64(AMD64CondCode cond,HReg dst)779 AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ) {
780 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
781 i->tag = Ain_Set64;
782 i->Ain.Set64.cond = cond;
783 i->Ain.Set64.dst = dst;
784 return i;
785 }
AMD64Instr_Bsfr64(Bool isFwds,HReg src,HReg dst)786 AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ) {
787 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
788 i->tag = Ain_Bsfr64;
789 i->Ain.Bsfr64.isFwds = isFwds;
790 i->Ain.Bsfr64.src = src;
791 i->Ain.Bsfr64.dst = dst;
792 return i;
793 }
AMD64Instr_MFence(void)794 AMD64Instr* AMD64Instr_MFence ( void ) {
795 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
796 i->tag = Ain_MFence;
797 return i;
798 }
AMD64Instr_ACAS(AMD64AMode * addr,UChar sz)799 AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ) {
800 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
801 i->tag = Ain_ACAS;
802 i->Ain.ACAS.addr = addr;
803 i->Ain.ACAS.sz = sz;
804 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
805 return i;
806 }
AMD64Instr_DACAS(AMD64AMode * addr,UChar sz)807 AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ) {
808 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
809 i->tag = Ain_DACAS;
810 i->Ain.DACAS.addr = addr;
811 i->Ain.DACAS.sz = sz;
812 vassert(sz == 8 || sz == 4);
813 return i;
814 }
815
AMD64Instr_A87Free(Int nregs)816 AMD64Instr* AMD64Instr_A87Free ( Int nregs )
817 {
818 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
819 i->tag = Ain_A87Free;
820 i->Ain.A87Free.nregs = nregs;
821 vassert(nregs >= 1 && nregs <= 7);
822 return i;
823 }
AMD64Instr_A87PushPop(AMD64AMode * addr,Bool isPush,UChar szB)824 AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB )
825 {
826 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
827 i->tag = Ain_A87PushPop;
828 i->Ain.A87PushPop.addr = addr;
829 i->Ain.A87PushPop.isPush = isPush;
830 i->Ain.A87PushPop.szB = szB;
831 vassert(szB == 8 || szB == 4);
832 return i;
833 }
AMD64Instr_A87FpOp(A87FpOp op)834 AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op )
835 {
836 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
837 i->tag = Ain_A87FpOp;
838 i->Ain.A87FpOp.op = op;
839 return i;
840 }
AMD64Instr_A87LdCW(AMD64AMode * addr)841 AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr )
842 {
843 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
844 i->tag = Ain_A87LdCW;
845 i->Ain.A87LdCW.addr = addr;
846 return i;
847 }
AMD64Instr_A87StSW(AMD64AMode * addr)848 AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr )
849 {
850 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
851 i->tag = Ain_A87StSW;
852 i->Ain.A87StSW.addr = addr;
853 return i;
854 }
AMD64Instr_LdMXCSR(AMD64AMode * addr)855 AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) {
856 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
857 i->tag = Ain_LdMXCSR;
858 i->Ain.LdMXCSR.addr = addr;
859 return i;
860 }
AMD64Instr_SseUComIS(Int sz,HReg srcL,HReg srcR,HReg dst)861 AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) {
862 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
863 i->tag = Ain_SseUComIS;
864 i->Ain.SseUComIS.sz = toUChar(sz);
865 i->Ain.SseUComIS.srcL = srcL;
866 i->Ain.SseUComIS.srcR = srcR;
867 i->Ain.SseUComIS.dst = dst;
868 vassert(sz == 4 || sz == 8);
869 return i;
870 }
AMD64Instr_SseSI2SF(Int szS,Int szD,HReg src,HReg dst)871 AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ) {
872 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
873 i->tag = Ain_SseSI2SF;
874 i->Ain.SseSI2SF.szS = toUChar(szS);
875 i->Ain.SseSI2SF.szD = toUChar(szD);
876 i->Ain.SseSI2SF.src = src;
877 i->Ain.SseSI2SF.dst = dst;
878 vassert(szS == 4 || szS == 8);
879 vassert(szD == 4 || szD == 8);
880 return i;
881 }
AMD64Instr_SseSF2SI(Int szS,Int szD,HReg src,HReg dst)882 AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ) {
883 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
884 i->tag = Ain_SseSF2SI;
885 i->Ain.SseSF2SI.szS = toUChar(szS);
886 i->Ain.SseSF2SI.szD = toUChar(szD);
887 i->Ain.SseSF2SI.src = src;
888 i->Ain.SseSF2SI.dst = dst;
889 vassert(szS == 4 || szS == 8);
890 vassert(szD == 4 || szD == 8);
891 return i;
892 }
AMD64Instr_SseSDSS(Bool from64,HReg src,HReg dst)893 AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst )
894 {
895 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
896 i->tag = Ain_SseSDSS;
897 i->Ain.SseSDSS.from64 = from64;
898 i->Ain.SseSDSS.src = src;
899 i->Ain.SseSDSS.dst = dst;
900 return i;
901 }
AMD64Instr_SseLdSt(Bool isLoad,Int sz,HReg reg,AMD64AMode * addr)902 AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz,
903 HReg reg, AMD64AMode* addr ) {
904 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
905 i->tag = Ain_SseLdSt;
906 i->Ain.SseLdSt.isLoad = isLoad;
907 i->Ain.SseLdSt.sz = toUChar(sz);
908 i->Ain.SseLdSt.reg = reg;
909 i->Ain.SseLdSt.addr = addr;
910 vassert(sz == 4 || sz == 8 || sz == 16);
911 return i;
912 }
AMD64Instr_SseLdzLO(Int sz,HReg reg,AMD64AMode * addr)913 AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg reg, AMD64AMode* addr )
914 {
915 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
916 i->tag = Ain_SseLdzLO;
917 i->Ain.SseLdzLO.sz = sz;
918 i->Ain.SseLdzLO.reg = reg;
919 i->Ain.SseLdzLO.addr = addr;
920 vassert(sz == 4 || sz == 8);
921 return i;
922 }
AMD64Instr_Sse32Fx4(AMD64SseOp op,HReg src,HReg dst)923 AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp op, HReg src, HReg dst ) {
924 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
925 i->tag = Ain_Sse32Fx4;
926 i->Ain.Sse32Fx4.op = op;
927 i->Ain.Sse32Fx4.src = src;
928 i->Ain.Sse32Fx4.dst = dst;
929 vassert(op != Asse_MOV);
930 return i;
931 }
AMD64Instr_Sse32FLo(AMD64SseOp op,HReg src,HReg dst)932 AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp op, HReg src, HReg dst ) {
933 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
934 i->tag = Ain_Sse32FLo;
935 i->Ain.Sse32FLo.op = op;
936 i->Ain.Sse32FLo.src = src;
937 i->Ain.Sse32FLo.dst = dst;
938 vassert(op != Asse_MOV);
939 return i;
940 }
AMD64Instr_Sse64Fx2(AMD64SseOp op,HReg src,HReg dst)941 AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp op, HReg src, HReg dst ) {
942 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
943 i->tag = Ain_Sse64Fx2;
944 i->Ain.Sse64Fx2.op = op;
945 i->Ain.Sse64Fx2.src = src;
946 i->Ain.Sse64Fx2.dst = dst;
947 vassert(op != Asse_MOV);
948 return i;
949 }
AMD64Instr_Sse64FLo(AMD64SseOp op,HReg src,HReg dst)950 AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp op, HReg src, HReg dst ) {
951 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
952 i->tag = Ain_Sse64FLo;
953 i->Ain.Sse64FLo.op = op;
954 i->Ain.Sse64FLo.src = src;
955 i->Ain.Sse64FLo.dst = dst;
956 vassert(op != Asse_MOV);
957 return i;
958 }
AMD64Instr_SseReRg(AMD64SseOp op,HReg re,HReg rg)959 AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp op, HReg re, HReg rg ) {
960 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
961 i->tag = Ain_SseReRg;
962 i->Ain.SseReRg.op = op;
963 i->Ain.SseReRg.src = re;
964 i->Ain.SseReRg.dst = rg;
965 return i;
966 }
AMD64Instr_SseCMov(AMD64CondCode cond,HReg src,HReg dst)967 AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode cond, HReg src, HReg dst ) {
968 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
969 i->tag = Ain_SseCMov;
970 i->Ain.SseCMov.cond = cond;
971 i->Ain.SseCMov.src = src;
972 i->Ain.SseCMov.dst = dst;
973 vassert(cond != Acc_ALWAYS);
974 return i;
975 }
AMD64Instr_SseShuf(Int order,HReg src,HReg dst)976 AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ) {
977 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
978 i->tag = Ain_SseShuf;
979 i->Ain.SseShuf.order = order;
980 i->Ain.SseShuf.src = src;
981 i->Ain.SseShuf.dst = dst;
982 vassert(order >= 0 && order <= 0xFF);
983 return i;
984 }
985 //uu AMD64Instr* AMD64Instr_AvxLdSt ( Bool isLoad,
986 //uu HReg reg, AMD64AMode* addr ) {
987 //uu AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
988 //uu i->tag = Ain_AvxLdSt;
989 //uu i->Ain.AvxLdSt.isLoad = isLoad;
990 //uu i->Ain.AvxLdSt.reg = reg;
991 //uu i->Ain.AvxLdSt.addr = addr;
992 //uu return i;
993 //uu }
994 //uu AMD64Instr* AMD64Instr_AvxReRg ( AMD64SseOp op, HReg re, HReg rg ) {
995 //uu AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
996 //uu i->tag = Ain_AvxReRg;
997 //uu i->Ain.AvxReRg.op = op;
998 //uu i->Ain.AvxReRg.src = re;
999 //uu i->Ain.AvxReRg.dst = rg;
1000 //uu return i;
1001 //uu }
AMD64Instr_EvCheck(AMD64AMode * amCounter,AMD64AMode * amFailAddr)1002 AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter,
1003 AMD64AMode* amFailAddr ) {
1004 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1005 i->tag = Ain_EvCheck;
1006 i->Ain.EvCheck.amCounter = amCounter;
1007 i->Ain.EvCheck.amFailAddr = amFailAddr;
1008 return i;
1009 }
AMD64Instr_ProfInc(void)1010 AMD64Instr* AMD64Instr_ProfInc ( void ) {
1011 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1012 i->tag = Ain_ProfInc;
1013 return i;
1014 }
1015
ppAMD64Instr(const AMD64Instr * i,Bool mode64)1016 void ppAMD64Instr ( const AMD64Instr* i, Bool mode64 )
1017 {
1018 vassert(mode64 == True);
1019 switch (i->tag) {
1020 case Ain_Imm64:
1021 vex_printf("movabsq $0x%llx,", i->Ain.Imm64.imm64);
1022 ppHRegAMD64(i->Ain.Imm64.dst);
1023 return;
1024 case Ain_Alu64R:
1025 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64R.op));
1026 ppAMD64RMI(i->Ain.Alu64R.src);
1027 vex_printf(",");
1028 ppHRegAMD64(i->Ain.Alu64R.dst);
1029 return;
1030 case Ain_Alu64M:
1031 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64M.op));
1032 ppAMD64RI(i->Ain.Alu64M.src);
1033 vex_printf(",");
1034 ppAMD64AMode(i->Ain.Alu64M.dst);
1035 return;
1036 case Ain_Sh64:
1037 vex_printf("%sq ", showAMD64ShiftOp(i->Ain.Sh64.op));
1038 if (i->Ain.Sh64.src == 0)
1039 vex_printf("%%cl,");
1040 else
1041 vex_printf("$%d,", (Int)i->Ain.Sh64.src);
1042 ppHRegAMD64(i->Ain.Sh64.dst);
1043 return;
1044 case Ain_Test64:
1045 vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32);
1046 ppHRegAMD64(i->Ain.Test64.dst);
1047 return;
1048 case Ain_Unary64:
1049 vex_printf("%sq ", showAMD64UnaryOp(i->Ain.Unary64.op));
1050 ppHRegAMD64(i->Ain.Unary64.dst);
1051 return;
1052 case Ain_Lea64:
1053 vex_printf("leaq ");
1054 ppAMD64AMode(i->Ain.Lea64.am);
1055 vex_printf(",");
1056 ppHRegAMD64(i->Ain.Lea64.dst);
1057 return;
1058 case Ain_Alu32R:
1059 vex_printf("%sl ", showAMD64AluOp(i->Ain.Alu32R.op));
1060 ppAMD64RMI_lo32(i->Ain.Alu32R.src);
1061 vex_printf(",");
1062 ppHRegAMD64_lo32(i->Ain.Alu32R.dst);
1063 return;
1064 case Ain_MulL:
1065 vex_printf("%cmulq ", i->Ain.MulL.syned ? 's' : 'u');
1066 ppAMD64RM(i->Ain.MulL.src);
1067 return;
1068 case Ain_Div:
1069 vex_printf("%cdiv%s ",
1070 i->Ain.Div.syned ? 's' : 'u',
1071 showAMD64ScalarSz(i->Ain.Div.sz));
1072 ppAMD64RM(i->Ain.Div.src);
1073 return;
1074 case Ain_Push:
1075 vex_printf("pushq ");
1076 ppAMD64RMI(i->Ain.Push.src);
1077 return;
1078 case Ain_Call:
1079 vex_printf("call%s[%d,",
1080 i->Ain.Call.cond==Acc_ALWAYS
1081 ? "" : showAMD64CondCode(i->Ain.Call.cond),
1082 i->Ain.Call.regparms );
1083 ppRetLoc(i->Ain.Call.rloc);
1084 vex_printf("] 0x%llx", i->Ain.Call.target);
1085 break;
1086
1087 case Ain_XDirect:
1088 vex_printf("(xDirect) ");
1089 vex_printf("if (%%rflags.%s) { ",
1090 showAMD64CondCode(i->Ain.XDirect.cond));
1091 vex_printf("movabsq $0x%llx,%%r11; ", i->Ain.XDirect.dstGA);
1092 vex_printf("movq %%r11,");
1093 ppAMD64AMode(i->Ain.XDirect.amRIP);
1094 vex_printf("; ");
1095 vex_printf("movabsq $disp_cp_chain_me_to_%sEP,%%r11; call *%%r11 }",
1096 i->Ain.XDirect.toFastEP ? "fast" : "slow");
1097 return;
1098 case Ain_XIndir:
1099 vex_printf("(xIndir) ");
1100 vex_printf("if (%%rflags.%s) { ",
1101 showAMD64CondCode(i->Ain.XIndir.cond));
1102 vex_printf("movq ");
1103 ppHRegAMD64(i->Ain.XIndir.dstGA);
1104 vex_printf(",");
1105 ppAMD64AMode(i->Ain.XIndir.amRIP);
1106 vex_printf("; movabsq $disp_indir,%%r11; jmp *%%r11 }");
1107 return;
1108 case Ain_XAssisted:
1109 vex_printf("(xAssisted) ");
1110 vex_printf("if (%%rflags.%s) { ",
1111 showAMD64CondCode(i->Ain.XAssisted.cond));
1112 vex_printf("movq ");
1113 ppHRegAMD64(i->Ain.XAssisted.dstGA);
1114 vex_printf(",");
1115 ppAMD64AMode(i->Ain.XAssisted.amRIP);
1116 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%rbp",
1117 (Int)i->Ain.XAssisted.jk);
1118 vex_printf("; movabsq $disp_assisted,%%r11; jmp *%%r11 }");
1119 return;
1120
1121 case Ain_CMov64:
1122 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond));
1123 ppHRegAMD64(i->Ain.CMov64.src);
1124 vex_printf(",");
1125 ppHRegAMD64(i->Ain.CMov64.dst);
1126 return;
1127 case Ain_CLoad:
1128 vex_printf("if (%%rflags.%s) { ",
1129 showAMD64CondCode(i->Ain.CLoad.cond));
1130 vex_printf("mov%c ", i->Ain.CLoad.szB == 4 ? 'l' : 'q');
1131 ppAMD64AMode(i->Ain.CLoad.addr);
1132 vex_printf(", ");
1133 (i->Ain.CLoad.szB == 4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1134 (i->Ain.CLoad.dst);
1135 vex_printf(" }");
1136 return;
1137 case Ain_CStore:
1138 vex_printf("if (%%rflags.%s) { ",
1139 showAMD64CondCode(i->Ain.CStore.cond));
1140 vex_printf("mov%c ", i->Ain.CStore.szB == 4 ? 'l' : 'q');
1141 (i->Ain.CStore.szB == 4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1142 (i->Ain.CStore.src);
1143 vex_printf(", ");
1144 ppAMD64AMode(i->Ain.CStore.addr);
1145 vex_printf(" }");
1146 return;
1147
1148 case Ain_MovxLQ:
1149 vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z');
1150 ppHRegAMD64_lo32(i->Ain.MovxLQ.src);
1151 vex_printf(",");
1152 ppHRegAMD64(i->Ain.MovxLQ.dst);
1153 return;
1154 case Ain_LoadEX:
1155 if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) {
1156 vex_printf("movl ");
1157 ppAMD64AMode(i->Ain.LoadEX.src);
1158 vex_printf(",");
1159 ppHRegAMD64_lo32(i->Ain.LoadEX.dst);
1160 } else {
1161 vex_printf("mov%c%cq ",
1162 i->Ain.LoadEX.syned ? 's' : 'z',
1163 i->Ain.LoadEX.szSmall==1
1164 ? 'b'
1165 : (i->Ain.LoadEX.szSmall==2 ? 'w' : 'l'));
1166 ppAMD64AMode(i->Ain.LoadEX.src);
1167 vex_printf(",");
1168 ppHRegAMD64(i->Ain.LoadEX.dst);
1169 }
1170 return;
1171 case Ain_Store:
1172 vex_printf("mov%c ", i->Ain.Store.sz==1 ? 'b'
1173 : (i->Ain.Store.sz==2 ? 'w' : 'l'));
1174 ppHRegAMD64(i->Ain.Store.src);
1175 vex_printf(",");
1176 ppAMD64AMode(i->Ain.Store.dst);
1177 return;
1178 case Ain_Set64:
1179 vex_printf("setq%s ", showAMD64CondCode(i->Ain.Set64.cond));
1180 ppHRegAMD64(i->Ain.Set64.dst);
1181 return;
1182 case Ain_Bsfr64:
1183 vex_printf("bs%cq ", i->Ain.Bsfr64.isFwds ? 'f' : 'r');
1184 ppHRegAMD64(i->Ain.Bsfr64.src);
1185 vex_printf(",");
1186 ppHRegAMD64(i->Ain.Bsfr64.dst);
1187 return;
1188 case Ain_MFence:
1189 vex_printf("mfence" );
1190 return;
1191 case Ain_ACAS:
1192 vex_printf("lock cmpxchg%c ",
1193 i->Ain.ACAS.sz==1 ? 'b' : i->Ain.ACAS.sz==2 ? 'w'
1194 : i->Ain.ACAS.sz==4 ? 'l' : 'q' );
1195 vex_printf("{%%rax->%%rbx},");
1196 ppAMD64AMode(i->Ain.ACAS.addr);
1197 return;
1198 case Ain_DACAS:
1199 vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},",
1200 (Int)(2 * i->Ain.DACAS.sz));
1201 ppAMD64AMode(i->Ain.DACAS.addr);
1202 return;
1203 case Ain_A87Free:
1204 vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs );
1205 break;
1206 case Ain_A87PushPop:
1207 vex_printf(i->Ain.A87PushPop.isPush ? "fld%c " : "fstp%c ",
1208 i->Ain.A87PushPop.szB == 4 ? 's' : 'l');
1209 ppAMD64AMode(i->Ain.A87PushPop.addr);
1210 break;
1211 case Ain_A87FpOp:
1212 vex_printf("f%s", showA87FpOp(i->Ain.A87FpOp.op));
1213 break;
1214 case Ain_A87LdCW:
1215 vex_printf("fldcw ");
1216 ppAMD64AMode(i->Ain.A87LdCW.addr);
1217 break;
1218 case Ain_A87StSW:
1219 vex_printf("fstsw ");
1220 ppAMD64AMode(i->Ain.A87StSW.addr);
1221 break;
1222 case Ain_LdMXCSR:
1223 vex_printf("ldmxcsr ");
1224 ppAMD64AMode(i->Ain.LdMXCSR.addr);
1225 break;
1226 case Ain_SseUComIS:
1227 vex_printf("ucomis%s ", i->Ain.SseUComIS.sz==4 ? "s" : "d");
1228 ppHRegAMD64(i->Ain.SseUComIS.srcL);
1229 vex_printf(",");
1230 ppHRegAMD64(i->Ain.SseUComIS.srcR);
1231 vex_printf(" ; pushfq ; popq ");
1232 ppHRegAMD64(i->Ain.SseUComIS.dst);
1233 break;
1234 case Ain_SseSI2SF:
1235 vex_printf("cvtsi2s%s ", i->Ain.SseSI2SF.szD==4 ? "s" : "d");
1236 (i->Ain.SseSI2SF.szS==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1237 (i->Ain.SseSI2SF.src);
1238 vex_printf(",");
1239 ppHRegAMD64(i->Ain.SseSI2SF.dst);
1240 break;
1241 case Ain_SseSF2SI:
1242 vex_printf("cvts%s2si ", i->Ain.SseSF2SI.szS==4 ? "s" : "d");
1243 ppHRegAMD64(i->Ain.SseSF2SI.src);
1244 vex_printf(",");
1245 (i->Ain.SseSF2SI.szD==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1246 (i->Ain.SseSF2SI.dst);
1247 break;
1248 case Ain_SseSDSS:
1249 vex_printf(i->Ain.SseSDSS.from64 ? "cvtsd2ss " : "cvtss2sd ");
1250 ppHRegAMD64(i->Ain.SseSDSS.src);
1251 vex_printf(",");
1252 ppHRegAMD64(i->Ain.SseSDSS.dst);
1253 break;
1254 case Ain_SseLdSt:
1255 switch (i->Ain.SseLdSt.sz) {
1256 case 4: vex_printf("movss "); break;
1257 case 8: vex_printf("movsd "); break;
1258 case 16: vex_printf("movups "); break;
1259 default: vassert(0);
1260 }
1261 if (i->Ain.SseLdSt.isLoad) {
1262 ppAMD64AMode(i->Ain.SseLdSt.addr);
1263 vex_printf(",");
1264 ppHRegAMD64(i->Ain.SseLdSt.reg);
1265 } else {
1266 ppHRegAMD64(i->Ain.SseLdSt.reg);
1267 vex_printf(",");
1268 ppAMD64AMode(i->Ain.SseLdSt.addr);
1269 }
1270 return;
1271 case Ain_SseLdzLO:
1272 vex_printf("movs%s ", i->Ain.SseLdzLO.sz==4 ? "s" : "d");
1273 ppAMD64AMode(i->Ain.SseLdzLO.addr);
1274 vex_printf(",");
1275 ppHRegAMD64(i->Ain.SseLdzLO.reg);
1276 return;
1277 case Ain_Sse32Fx4:
1278 vex_printf("%sps ", showAMD64SseOp(i->Ain.Sse32Fx4.op));
1279 ppHRegAMD64(i->Ain.Sse32Fx4.src);
1280 vex_printf(",");
1281 ppHRegAMD64(i->Ain.Sse32Fx4.dst);
1282 return;
1283 case Ain_Sse32FLo:
1284 vex_printf("%sss ", showAMD64SseOp(i->Ain.Sse32FLo.op));
1285 ppHRegAMD64(i->Ain.Sse32FLo.src);
1286 vex_printf(",");
1287 ppHRegAMD64(i->Ain.Sse32FLo.dst);
1288 return;
1289 case Ain_Sse64Fx2:
1290 vex_printf("%spd ", showAMD64SseOp(i->Ain.Sse64Fx2.op));
1291 ppHRegAMD64(i->Ain.Sse64Fx2.src);
1292 vex_printf(",");
1293 ppHRegAMD64(i->Ain.Sse64Fx2.dst);
1294 return;
1295 case Ain_Sse64FLo:
1296 vex_printf("%ssd ", showAMD64SseOp(i->Ain.Sse64FLo.op));
1297 ppHRegAMD64(i->Ain.Sse64FLo.src);
1298 vex_printf(",");
1299 ppHRegAMD64(i->Ain.Sse64FLo.dst);
1300 return;
1301 case Ain_SseReRg:
1302 vex_printf("%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1303 ppHRegAMD64(i->Ain.SseReRg.src);
1304 vex_printf(",");
1305 ppHRegAMD64(i->Ain.SseReRg.dst);
1306 return;
1307 case Ain_SseCMov:
1308 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.SseCMov.cond));
1309 ppHRegAMD64(i->Ain.SseCMov.src);
1310 vex_printf(",");
1311 ppHRegAMD64(i->Ain.SseCMov.dst);
1312 return;
1313 case Ain_SseShuf:
1314 vex_printf("pshufd $0x%x,", i->Ain.SseShuf.order);
1315 ppHRegAMD64(i->Ain.SseShuf.src);
1316 vex_printf(",");
1317 ppHRegAMD64(i->Ain.SseShuf.dst);
1318 return;
1319 //uu case Ain_AvxLdSt:
1320 //uu vex_printf("vmovups ");
1321 //uu if (i->Ain.AvxLdSt.isLoad) {
1322 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1323 //uu vex_printf(",");
1324 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1325 //uu } else {
1326 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1327 //uu vex_printf(",");
1328 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1329 //uu }
1330 //uu return;
1331 //uu case Ain_AvxReRg:
1332 //uu vex_printf("v%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1333 //uu ppHRegAMD64(i->Ain.AvxReRg.src);
1334 //uu vex_printf(",");
1335 //uu ppHRegAMD64(i->Ain.AvxReRg.dst);
1336 //uu return;
1337 case Ain_EvCheck:
1338 vex_printf("(evCheck) decl ");
1339 ppAMD64AMode(i->Ain.EvCheck.amCounter);
1340 vex_printf("; jns nofail; jmp *");
1341 ppAMD64AMode(i->Ain.EvCheck.amFailAddr);
1342 vex_printf("; nofail:");
1343 return;
1344 case Ain_ProfInc:
1345 vex_printf("(profInc) movabsq $NotKnownYet, %%r11; incq (%%r11)");
1346 return;
1347 default:
1348 vpanic("ppAMD64Instr");
1349 }
1350 }
1351
1352 /* --------- Helpers for register allocation. --------- */
1353
getRegUsage_AMD64Instr(HRegUsage * u,const AMD64Instr * i,Bool mode64)1354 void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 )
1355 {
1356 Bool unary;
1357 vassert(mode64 == True);
1358 initHRegUsage(u);
1359 switch (i->tag) {
1360 case Ain_Imm64:
1361 addHRegUse(u, HRmWrite, i->Ain.Imm64.dst);
1362 return;
1363 case Ain_Alu64R:
1364 addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src);
1365 if (i->Ain.Alu64R.op == Aalu_MOV) {
1366 addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst);
1367 return;
1368 }
1369 if (i->Ain.Alu64R.op == Aalu_CMP) {
1370 addHRegUse(u, HRmRead, i->Ain.Alu64R.dst);
1371 return;
1372 }
1373 addHRegUse(u, HRmModify, i->Ain.Alu64R.dst);
1374 return;
1375 case Ain_Alu64M:
1376 addRegUsage_AMD64RI(u, i->Ain.Alu64M.src);
1377 addRegUsage_AMD64AMode(u, i->Ain.Alu64M.dst);
1378 return;
1379 case Ain_Sh64:
1380 addHRegUse(u, HRmModify, i->Ain.Sh64.dst);
1381 if (i->Ain.Sh64.src == 0)
1382 addHRegUse(u, HRmRead, hregAMD64_RCX());
1383 return;
1384 case Ain_Test64:
1385 addHRegUse(u, HRmRead, i->Ain.Test64.dst);
1386 return;
1387 case Ain_Unary64:
1388 addHRegUse(u, HRmModify, i->Ain.Unary64.dst);
1389 return;
1390 case Ain_Lea64:
1391 addRegUsage_AMD64AMode(u, i->Ain.Lea64.am);
1392 addHRegUse(u, HRmWrite, i->Ain.Lea64.dst);
1393 return;
1394 case Ain_Alu32R:
1395 vassert(i->Ain.Alu32R.op != Aalu_MOV);
1396 addRegUsage_AMD64RMI(u, i->Ain.Alu32R.src);
1397 if (i->Ain.Alu32R.op == Aalu_CMP) {
1398 addHRegUse(u, HRmRead, i->Ain.Alu32R.dst);
1399 return;
1400 }
1401 addHRegUse(u, HRmModify, i->Ain.Alu32R.dst);
1402 return;
1403 case Ain_MulL:
1404 addRegUsage_AMD64RM(u, i->Ain.MulL.src, HRmRead);
1405 addHRegUse(u, HRmModify, hregAMD64_RAX());
1406 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1407 return;
1408 case Ain_Div:
1409 addRegUsage_AMD64RM(u, i->Ain.Div.src, HRmRead);
1410 addHRegUse(u, HRmModify, hregAMD64_RAX());
1411 addHRegUse(u, HRmModify, hregAMD64_RDX());
1412 return;
1413 case Ain_Push:
1414 addRegUsage_AMD64RMI(u, i->Ain.Push.src);
1415 addHRegUse(u, HRmModify, hregAMD64_RSP());
1416 return;
1417 case Ain_Call:
1418 /* This is a bit subtle. */
1419 /* First off, claim it trashes all the caller-saved regs
1420 which fall within the register allocator's jurisdiction.
1421 These I believe to be: rax rcx rdx rsi rdi r8 r9 r10 r11
1422 and all the xmm registers.
1423 */
1424 addHRegUse(u, HRmWrite, hregAMD64_RAX());
1425 addHRegUse(u, HRmWrite, hregAMD64_RCX());
1426 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1427 addHRegUse(u, HRmWrite, hregAMD64_RSI());
1428 addHRegUse(u, HRmWrite, hregAMD64_RDI());
1429 addHRegUse(u, HRmWrite, hregAMD64_R8());
1430 addHRegUse(u, HRmWrite, hregAMD64_R9());
1431 addHRegUse(u, HRmWrite, hregAMD64_R10());
1432 addHRegUse(u, HRmWrite, hregAMD64_R11());
1433 addHRegUse(u, HRmWrite, hregAMD64_XMM0());
1434 addHRegUse(u, HRmWrite, hregAMD64_XMM1());
1435 addHRegUse(u, HRmWrite, hregAMD64_XMM3());
1436 addHRegUse(u, HRmWrite, hregAMD64_XMM4());
1437 addHRegUse(u, HRmWrite, hregAMD64_XMM5());
1438 addHRegUse(u, HRmWrite, hregAMD64_XMM6());
1439 addHRegUse(u, HRmWrite, hregAMD64_XMM7());
1440 addHRegUse(u, HRmWrite, hregAMD64_XMM8());
1441 addHRegUse(u, HRmWrite, hregAMD64_XMM9());
1442 addHRegUse(u, HRmWrite, hregAMD64_XMM10());
1443 addHRegUse(u, HRmWrite, hregAMD64_XMM11());
1444 addHRegUse(u, HRmWrite, hregAMD64_XMM12());
1445
1446 /* Now we have to state any parameter-carrying registers
1447 which might be read. This depends on the regparmness. */
1448 switch (i->Ain.Call.regparms) {
1449 case 6: addHRegUse(u, HRmRead, hregAMD64_R9()); /*fallthru*/
1450 case 5: addHRegUse(u, HRmRead, hregAMD64_R8()); /*fallthru*/
1451 case 4: addHRegUse(u, HRmRead, hregAMD64_RCX()); /*fallthru*/
1452 case 3: addHRegUse(u, HRmRead, hregAMD64_RDX()); /*fallthru*/
1453 case 2: addHRegUse(u, HRmRead, hregAMD64_RSI()); /*fallthru*/
1454 case 1: addHRegUse(u, HRmRead, hregAMD64_RDI()); break;
1455 case 0: break;
1456 default: vpanic("getRegUsage_AMD64Instr:Call:regparms");
1457 }
1458 /* Finally, there is the issue that the insn trashes a
1459 register because the literal target address has to be
1460 loaded into a register. Fortunately, r11 is stated in the
1461 ABI as a scratch register, and so seems a suitable victim. */
1462 addHRegUse(u, HRmWrite, hregAMD64_R11());
1463 /* Upshot of this is that the assembler really must use r11,
1464 and no other, as a destination temporary. */
1465 return;
1466 /* XDirect/XIndir/XAssisted are also a bit subtle. They
1467 conditionally exit the block. Hence we only need to list (1)
1468 the registers that they read, and (2) the registers that they
1469 write in the case where the block is not exited. (2) is
1470 empty, hence only (1) is relevant here. */
1471 case Ain_XDirect:
1472 /* Don't bother to mention the write to %r11, since it is not
1473 available to the allocator. */
1474 addRegUsage_AMD64AMode(u, i->Ain.XDirect.amRIP);
1475 return;
1476 case Ain_XIndir:
1477 /* Ditto re %r11 */
1478 addHRegUse(u, HRmRead, i->Ain.XIndir.dstGA);
1479 addRegUsage_AMD64AMode(u, i->Ain.XIndir.amRIP);
1480 return;
1481 case Ain_XAssisted:
1482 /* Ditto re %r11 and %rbp (the baseblock ptr) */
1483 addHRegUse(u, HRmRead, i->Ain.XAssisted.dstGA);
1484 addRegUsage_AMD64AMode(u, i->Ain.XAssisted.amRIP);
1485 return;
1486 case Ain_CMov64:
1487 addHRegUse(u, HRmRead, i->Ain.CMov64.src);
1488 addHRegUse(u, HRmModify, i->Ain.CMov64.dst);
1489 return;
1490 case Ain_CLoad:
1491 addRegUsage_AMD64AMode(u, i->Ain.CLoad.addr);
1492 addHRegUse(u, HRmModify, i->Ain.CLoad.dst);
1493 return;
1494 case Ain_CStore:
1495 addRegUsage_AMD64AMode(u, i->Ain.CStore.addr);
1496 addHRegUse(u, HRmRead, i->Ain.CStore.src);
1497 return;
1498 case Ain_MovxLQ:
1499 addHRegUse(u, HRmRead, i->Ain.MovxLQ.src);
1500 addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst);
1501 return;
1502 case Ain_LoadEX:
1503 addRegUsage_AMD64AMode(u, i->Ain.LoadEX.src);
1504 addHRegUse(u, HRmWrite, i->Ain.LoadEX.dst);
1505 return;
1506 case Ain_Store:
1507 addHRegUse(u, HRmRead, i->Ain.Store.src);
1508 addRegUsage_AMD64AMode(u, i->Ain.Store.dst);
1509 return;
1510 case Ain_Set64:
1511 addHRegUse(u, HRmWrite, i->Ain.Set64.dst);
1512 return;
1513 case Ain_Bsfr64:
1514 addHRegUse(u, HRmRead, i->Ain.Bsfr64.src);
1515 addHRegUse(u, HRmWrite, i->Ain.Bsfr64.dst);
1516 return;
1517 case Ain_MFence:
1518 return;
1519 case Ain_ACAS:
1520 addRegUsage_AMD64AMode(u, i->Ain.ACAS.addr);
1521 addHRegUse(u, HRmRead, hregAMD64_RBX());
1522 addHRegUse(u, HRmModify, hregAMD64_RAX());
1523 return;
1524 case Ain_DACAS:
1525 addRegUsage_AMD64AMode(u, i->Ain.DACAS.addr);
1526 addHRegUse(u, HRmRead, hregAMD64_RCX());
1527 addHRegUse(u, HRmRead, hregAMD64_RBX());
1528 addHRegUse(u, HRmModify, hregAMD64_RDX());
1529 addHRegUse(u, HRmModify, hregAMD64_RAX());
1530 return;
1531 case Ain_A87Free:
1532 return;
1533 case Ain_A87PushPop:
1534 addRegUsage_AMD64AMode(u, i->Ain.A87PushPop.addr);
1535 return;
1536 case Ain_A87FpOp:
1537 return;
1538 case Ain_A87LdCW:
1539 addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr);
1540 return;
1541 case Ain_A87StSW:
1542 addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr);
1543 return;
1544 case Ain_LdMXCSR:
1545 addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr);
1546 return;
1547 case Ain_SseUComIS:
1548 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcL);
1549 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR);
1550 addHRegUse(u, HRmWrite, i->Ain.SseUComIS.dst);
1551 return;
1552 case Ain_SseSI2SF:
1553 addHRegUse(u, HRmRead, i->Ain.SseSI2SF.src);
1554 addHRegUse(u, HRmWrite, i->Ain.SseSI2SF.dst);
1555 return;
1556 case Ain_SseSF2SI:
1557 addHRegUse(u, HRmRead, i->Ain.SseSF2SI.src);
1558 addHRegUse(u, HRmWrite, i->Ain.SseSF2SI.dst);
1559 return;
1560 case Ain_SseSDSS:
1561 addHRegUse(u, HRmRead, i->Ain.SseSDSS.src);
1562 addHRegUse(u, HRmWrite, i->Ain.SseSDSS.dst);
1563 return;
1564 case Ain_SseLdSt:
1565 addRegUsage_AMD64AMode(u, i->Ain.SseLdSt.addr);
1566 addHRegUse(u, i->Ain.SseLdSt.isLoad ? HRmWrite : HRmRead,
1567 i->Ain.SseLdSt.reg);
1568 return;
1569 case Ain_SseLdzLO:
1570 addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr);
1571 addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg);
1572 return;
1573 case Ain_Sse32Fx4:
1574 vassert(i->Ain.Sse32Fx4.op != Asse_MOV);
1575 unary = toBool( i->Ain.Sse32Fx4.op == Asse_RCPF
1576 || i->Ain.Sse32Fx4.op == Asse_RSQRTF
1577 || i->Ain.Sse32Fx4.op == Asse_SQRTF );
1578 addHRegUse(u, HRmRead, i->Ain.Sse32Fx4.src);
1579 addHRegUse(u, unary ? HRmWrite : HRmModify,
1580 i->Ain.Sse32Fx4.dst);
1581 return;
1582 case Ain_Sse32FLo:
1583 vassert(i->Ain.Sse32FLo.op != Asse_MOV);
1584 unary = toBool( i->Ain.Sse32FLo.op == Asse_RCPF
1585 || i->Ain.Sse32FLo.op == Asse_RSQRTF
1586 || i->Ain.Sse32FLo.op == Asse_SQRTF );
1587 addHRegUse(u, HRmRead, i->Ain.Sse32FLo.src);
1588 addHRegUse(u, unary ? HRmWrite : HRmModify,
1589 i->Ain.Sse32FLo.dst);
1590 return;
1591 case Ain_Sse64Fx2:
1592 vassert(i->Ain.Sse64Fx2.op != Asse_MOV);
1593 unary = toBool( i->Ain.Sse64Fx2.op == Asse_RCPF
1594 || i->Ain.Sse64Fx2.op == Asse_RSQRTF
1595 || i->Ain.Sse64Fx2.op == Asse_SQRTF );
1596 addHRegUse(u, HRmRead, i->Ain.Sse64Fx2.src);
1597 addHRegUse(u, unary ? HRmWrite : HRmModify,
1598 i->Ain.Sse64Fx2.dst);
1599 return;
1600 case Ain_Sse64FLo:
1601 vassert(i->Ain.Sse64FLo.op != Asse_MOV);
1602 unary = toBool( i->Ain.Sse64FLo.op == Asse_RCPF
1603 || i->Ain.Sse64FLo.op == Asse_RSQRTF
1604 || i->Ain.Sse64FLo.op == Asse_SQRTF );
1605 addHRegUse(u, HRmRead, i->Ain.Sse64FLo.src);
1606 addHRegUse(u, unary ? HRmWrite : HRmModify,
1607 i->Ain.Sse64FLo.dst);
1608 return;
1609 case Ain_SseReRg:
1610 if ( (i->Ain.SseReRg.op == Asse_XOR
1611 || i->Ain.SseReRg.op == Asse_CMPEQ32)
1612 && sameHReg(i->Ain.SseReRg.src, i->Ain.SseReRg.dst)) {
1613 /* reg-alloc needs to understand 'xor r,r' and 'cmpeqd
1614 r,r' as a write of a value to r, and independent of any
1615 previous value in r */
1616 /* (as opposed to a rite of passage :-) */
1617 addHRegUse(u, HRmWrite, i->Ain.SseReRg.dst);
1618 } else {
1619 addHRegUse(u, HRmRead, i->Ain.SseReRg.src);
1620 addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV
1621 ? HRmWrite : HRmModify,
1622 i->Ain.SseReRg.dst);
1623 }
1624 return;
1625 case Ain_SseCMov:
1626 addHRegUse(u, HRmRead, i->Ain.SseCMov.src);
1627 addHRegUse(u, HRmModify, i->Ain.SseCMov.dst);
1628 return;
1629 case Ain_SseShuf:
1630 addHRegUse(u, HRmRead, i->Ain.SseShuf.src);
1631 addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst);
1632 return;
1633 //uu case Ain_AvxLdSt:
1634 //uu addRegUsage_AMD64AMode(u, i->Ain.AvxLdSt.addr);
1635 //uu addHRegUse(u, i->Ain.AvxLdSt.isLoad ? HRmWrite : HRmRead,
1636 //uu i->Ain.AvxLdSt.reg);
1637 //uu return;
1638 //uu case Ain_AvxReRg:
1639 //uu if ( (i->Ain.AvxReRg.op == Asse_XOR
1640 //uu || i->Ain.AvxReRg.op == Asse_CMPEQ32)
1641 //uu && i->Ain.AvxReRg.src == i->Ain.AvxReRg.dst) {
1642 //uu /* See comments on the case for Ain_SseReRg. */
1643 //uu addHRegUse(u, HRmWrite, i->Ain.AvxReRg.dst);
1644 //uu } else {
1645 //uu addHRegUse(u, HRmRead, i->Ain.AvxReRg.src);
1646 //uu addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV
1647 //uu ? HRmWrite : HRmModify,
1648 //uu i->Ain.AvxReRg.dst);
1649 //uu }
1650 //uu return;
1651 case Ain_EvCheck:
1652 /* We expect both amodes only to mention %rbp, so this is in
1653 fact pointless, since %rbp isn't allocatable, but anyway.. */
1654 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amCounter);
1655 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amFailAddr);
1656 return;
1657 case Ain_ProfInc:
1658 addHRegUse(u, HRmWrite, hregAMD64_R11());
1659 return;
1660 default:
1661 ppAMD64Instr(i, mode64);
1662 vpanic("getRegUsage_AMD64Instr");
1663 }
1664 }
1665
1666 /* local helper */
mapReg(HRegRemap * m,HReg * r)1667 static inline void mapReg(HRegRemap* m, HReg* r)
1668 {
1669 *r = lookupHRegRemap(m, *r);
1670 }
1671
mapRegs_AMD64Instr(HRegRemap * m,AMD64Instr * i,Bool mode64)1672 void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
1673 {
1674 vassert(mode64 == True);
1675 switch (i->tag) {
1676 case Ain_Imm64:
1677 mapReg(m, &i->Ain.Imm64.dst);
1678 return;
1679 case Ain_Alu64R:
1680 mapRegs_AMD64RMI(m, i->Ain.Alu64R.src);
1681 mapReg(m, &i->Ain.Alu64R.dst);
1682 return;
1683 case Ain_Alu64M:
1684 mapRegs_AMD64RI(m, i->Ain.Alu64M.src);
1685 mapRegs_AMD64AMode(m, i->Ain.Alu64M.dst);
1686 return;
1687 case Ain_Sh64:
1688 mapReg(m, &i->Ain.Sh64.dst);
1689 return;
1690 case Ain_Test64:
1691 mapReg(m, &i->Ain.Test64.dst);
1692 return;
1693 case Ain_Unary64:
1694 mapReg(m, &i->Ain.Unary64.dst);
1695 return;
1696 case Ain_Lea64:
1697 mapRegs_AMD64AMode(m, i->Ain.Lea64.am);
1698 mapReg(m, &i->Ain.Lea64.dst);
1699 return;
1700 case Ain_Alu32R:
1701 mapRegs_AMD64RMI(m, i->Ain.Alu32R.src);
1702 mapReg(m, &i->Ain.Alu32R.dst);
1703 return;
1704 case Ain_MulL:
1705 mapRegs_AMD64RM(m, i->Ain.MulL.src);
1706 return;
1707 case Ain_Div:
1708 mapRegs_AMD64RM(m, i->Ain.Div.src);
1709 return;
1710 case Ain_Push:
1711 mapRegs_AMD64RMI(m, i->Ain.Push.src);
1712 return;
1713 case Ain_Call:
1714 return;
1715 case Ain_XDirect:
1716 mapRegs_AMD64AMode(m, i->Ain.XDirect.amRIP);
1717 return;
1718 case Ain_XIndir:
1719 mapReg(m, &i->Ain.XIndir.dstGA);
1720 mapRegs_AMD64AMode(m, i->Ain.XIndir.amRIP);
1721 return;
1722 case Ain_XAssisted:
1723 mapReg(m, &i->Ain.XAssisted.dstGA);
1724 mapRegs_AMD64AMode(m, i->Ain.XAssisted.amRIP);
1725 return;
1726 case Ain_CMov64:
1727 mapReg(m, &i->Ain.CMov64.src);
1728 mapReg(m, &i->Ain.CMov64.dst);
1729 return;
1730 case Ain_CLoad:
1731 mapRegs_AMD64AMode(m, i->Ain.CLoad.addr);
1732 mapReg(m, &i->Ain.CLoad.dst);
1733 return;
1734 case Ain_CStore:
1735 mapRegs_AMD64AMode(m, i->Ain.CStore.addr);
1736 mapReg(m, &i->Ain.CStore.src);
1737 return;
1738 case Ain_MovxLQ:
1739 mapReg(m, &i->Ain.MovxLQ.src);
1740 mapReg(m, &i->Ain.MovxLQ.dst);
1741 return;
1742 case Ain_LoadEX:
1743 mapRegs_AMD64AMode(m, i->Ain.LoadEX.src);
1744 mapReg(m, &i->Ain.LoadEX.dst);
1745 return;
1746 case Ain_Store:
1747 mapReg(m, &i->Ain.Store.src);
1748 mapRegs_AMD64AMode(m, i->Ain.Store.dst);
1749 return;
1750 case Ain_Set64:
1751 mapReg(m, &i->Ain.Set64.dst);
1752 return;
1753 case Ain_Bsfr64:
1754 mapReg(m, &i->Ain.Bsfr64.src);
1755 mapReg(m, &i->Ain.Bsfr64.dst);
1756 return;
1757 case Ain_MFence:
1758 return;
1759 case Ain_ACAS:
1760 mapRegs_AMD64AMode(m, i->Ain.ACAS.addr);
1761 return;
1762 case Ain_DACAS:
1763 mapRegs_AMD64AMode(m, i->Ain.DACAS.addr);
1764 return;
1765 case Ain_A87Free:
1766 return;
1767 case Ain_A87PushPop:
1768 mapRegs_AMD64AMode(m, i->Ain.A87PushPop.addr);
1769 return;
1770 case Ain_A87FpOp:
1771 return;
1772 case Ain_A87LdCW:
1773 mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr);
1774 return;
1775 case Ain_A87StSW:
1776 mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr);
1777 return;
1778 case Ain_LdMXCSR:
1779 mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr);
1780 return;
1781 case Ain_SseUComIS:
1782 mapReg(m, &i->Ain.SseUComIS.srcL);
1783 mapReg(m, &i->Ain.SseUComIS.srcR);
1784 mapReg(m, &i->Ain.SseUComIS.dst);
1785 return;
1786 case Ain_SseSI2SF:
1787 mapReg(m, &i->Ain.SseSI2SF.src);
1788 mapReg(m, &i->Ain.SseSI2SF.dst);
1789 return;
1790 case Ain_SseSF2SI:
1791 mapReg(m, &i->Ain.SseSF2SI.src);
1792 mapReg(m, &i->Ain.SseSF2SI.dst);
1793 return;
1794 case Ain_SseSDSS:
1795 mapReg(m, &i->Ain.SseSDSS.src);
1796 mapReg(m, &i->Ain.SseSDSS.dst);
1797 return;
1798 case Ain_SseLdSt:
1799 mapReg(m, &i->Ain.SseLdSt.reg);
1800 mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr);
1801 break;
1802 case Ain_SseLdzLO:
1803 mapReg(m, &i->Ain.SseLdzLO.reg);
1804 mapRegs_AMD64AMode(m, i->Ain.SseLdzLO.addr);
1805 break;
1806 case Ain_Sse32Fx4:
1807 mapReg(m, &i->Ain.Sse32Fx4.src);
1808 mapReg(m, &i->Ain.Sse32Fx4.dst);
1809 return;
1810 case Ain_Sse32FLo:
1811 mapReg(m, &i->Ain.Sse32FLo.src);
1812 mapReg(m, &i->Ain.Sse32FLo.dst);
1813 return;
1814 case Ain_Sse64Fx2:
1815 mapReg(m, &i->Ain.Sse64Fx2.src);
1816 mapReg(m, &i->Ain.Sse64Fx2.dst);
1817 return;
1818 case Ain_Sse64FLo:
1819 mapReg(m, &i->Ain.Sse64FLo.src);
1820 mapReg(m, &i->Ain.Sse64FLo.dst);
1821 return;
1822 case Ain_SseReRg:
1823 mapReg(m, &i->Ain.SseReRg.src);
1824 mapReg(m, &i->Ain.SseReRg.dst);
1825 return;
1826 case Ain_SseCMov:
1827 mapReg(m, &i->Ain.SseCMov.src);
1828 mapReg(m, &i->Ain.SseCMov.dst);
1829 return;
1830 case Ain_SseShuf:
1831 mapReg(m, &i->Ain.SseShuf.src);
1832 mapReg(m, &i->Ain.SseShuf.dst);
1833 return;
1834 //uu case Ain_AvxLdSt:
1835 //uu mapReg(m, &i->Ain.AvxLdSt.reg);
1836 //uu mapRegs_AMD64AMode(m, i->Ain.AvxLdSt.addr);
1837 //uu break;
1838 //uu case Ain_AvxReRg:
1839 //uu mapReg(m, &i->Ain.AvxReRg.src);
1840 //uu mapReg(m, &i->Ain.AvxReRg.dst);
1841 //uu return;
1842 case Ain_EvCheck:
1843 /* We expect both amodes only to mention %rbp, so this is in
1844 fact pointless, since %rbp isn't allocatable, but anyway.. */
1845 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amCounter);
1846 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amFailAddr);
1847 return;
1848 case Ain_ProfInc:
1849 /* hardwires r11 -- nothing to modify. */
1850 return;
1851 default:
1852 ppAMD64Instr(i, mode64);
1853 vpanic("mapRegs_AMD64Instr");
1854 }
1855 }
1856
1857 /* Figure out if i represents a reg-reg move, and if so assign the
1858 source and destination to *src and *dst. If in doubt say No. Used
1859 by the register allocator to do move coalescing.
1860 */
isMove_AMD64Instr(const AMD64Instr * i,HReg * src,HReg * dst)1861 Bool isMove_AMD64Instr ( const AMD64Instr* i, HReg* src, HReg* dst )
1862 {
1863 switch (i->tag) {
1864 case Ain_Alu64R:
1865 /* Moves between integer regs */
1866 if (i->Ain.Alu64R.op != Aalu_MOV)
1867 return False;
1868 if (i->Ain.Alu64R.src->tag != Armi_Reg)
1869 return False;
1870 *src = i->Ain.Alu64R.src->Armi.Reg.reg;
1871 *dst = i->Ain.Alu64R.dst;
1872 return True;
1873 case Ain_SseReRg:
1874 /* Moves between SSE regs */
1875 if (i->Ain.SseReRg.op != Asse_MOV)
1876 return False;
1877 *src = i->Ain.SseReRg.src;
1878 *dst = i->Ain.SseReRg.dst;
1879 return True;
1880 //uu case Ain_AvxReRg:
1881 //uu /* Moves between AVX regs */
1882 //uu if (i->Ain.AvxReRg.op != Asse_MOV)
1883 //uu return False;
1884 //uu *src = i->Ain.AvxReRg.src;
1885 //uu *dst = i->Ain.AvxReRg.dst;
1886 //uu return True;
1887 default:
1888 return False;
1889 }
1890 /*NOTREACHED*/
1891 }
1892
1893
1894 /* Generate amd64 spill/reload instructions under the direction of the
1895 register allocator. Note it's critical these don't write the
1896 condition codes. */
1897
genSpill_AMD64(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)1898 void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1899 HReg rreg, Int offsetB, Bool mode64 )
1900 {
1901 AMD64AMode* am;
1902 vassert(offsetB >= 0);
1903 vassert(!hregIsVirtual(rreg));
1904 vassert(mode64 == True);
1905 *i1 = *i2 = NULL;
1906 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
1907 switch (hregClass(rreg)) {
1908 case HRcInt64:
1909 *i1 = AMD64Instr_Alu64M ( Aalu_MOV, AMD64RI_Reg(rreg), am );
1910 return;
1911 case HRcVec128:
1912 *i1 = AMD64Instr_SseLdSt ( False/*store*/, 16, rreg, am );
1913 return;
1914 default:
1915 ppHRegClass(hregClass(rreg));
1916 vpanic("genSpill_AMD64: unimplemented regclass");
1917 }
1918 }
1919
genReload_AMD64(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)1920 void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1921 HReg rreg, Int offsetB, Bool mode64 )
1922 {
1923 AMD64AMode* am;
1924 vassert(offsetB >= 0);
1925 vassert(!hregIsVirtual(rreg));
1926 vassert(mode64 == True);
1927 *i1 = *i2 = NULL;
1928 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
1929 switch (hregClass(rreg)) {
1930 case HRcInt64:
1931 *i1 = AMD64Instr_Alu64R ( Aalu_MOV, AMD64RMI_Mem(am), rreg );
1932 return;
1933 case HRcVec128:
1934 *i1 = AMD64Instr_SseLdSt ( True/*load*/, 16, rreg, am );
1935 return;
1936 default:
1937 ppHRegClass(hregClass(rreg));
1938 vpanic("genReload_AMD64: unimplemented regclass");
1939 }
1940 }
1941
1942
1943 /* --------- The amd64 assembler (bleh.) --------- */
1944
1945 /* Produce the low three bits of an integer register number. */
iregEnc210(HReg r)1946 inline static UInt iregEnc210 ( HReg r )
1947 {
1948 UInt n;
1949 vassert(hregClass(r) == HRcInt64);
1950 vassert(!hregIsVirtual(r));
1951 n = hregEncoding(r);
1952 vassert(n <= 15);
1953 return n & 7;
1954 }
1955
1956 /* Produce bit 3 of an integer register number. */
iregEnc3(HReg r)1957 inline static UInt iregEnc3 ( HReg r )
1958 {
1959 UInt n;
1960 vassert(hregClass(r) == HRcInt64);
1961 vassert(!hregIsVirtual(r));
1962 n = hregEncoding(r);
1963 vassert(n <= 15);
1964 return (n >> 3) & 1;
1965 }
1966
1967 /* Produce a complete 4-bit integer register number. */
iregEnc3210(HReg r)1968 inline static UInt iregEnc3210 ( HReg r )
1969 {
1970 UInt n;
1971 vassert(hregClass(r) == HRcInt64);
1972 vassert(!hregIsVirtual(r));
1973 n = hregEncoding(r);
1974 vassert(n <= 15);
1975 return n;
1976 }
1977
1978 /* Produce a complete 4-bit integer register number. */
vregEnc3210(HReg r)1979 inline static UInt vregEnc3210 ( HReg r )
1980 {
1981 UInt n;
1982 vassert(hregClass(r) == HRcVec128);
1983 vassert(!hregIsVirtual(r));
1984 n = hregEncoding(r);
1985 vassert(n <= 15);
1986 return n;
1987 }
1988
mkModRegRM(UInt mod,UInt reg,UInt regmem)1989 inline static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem )
1990 {
1991 vassert(mod < 4);
1992 vassert((reg|regmem) < 8);
1993 return (UChar)( ((mod & 3) << 6) | ((reg & 7) << 3) | (regmem & 7) );
1994 }
1995
mkSIB(UInt shift,UInt regindex,UInt regbase)1996 inline static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase )
1997 {
1998 vassert(shift < 4);
1999 vassert((regindex|regbase) < 8);
2000 return (UChar)( ((shift & 3) << 6) | ((regindex & 7) << 3) | (regbase & 7) );
2001 }
2002
emit32(UChar * p,UInt w32)2003 static UChar* emit32 ( UChar* p, UInt w32 )
2004 {
2005 *p++ = toUChar((w32) & 0x000000FF);
2006 *p++ = toUChar((w32 >> 8) & 0x000000FF);
2007 *p++ = toUChar((w32 >> 16) & 0x000000FF);
2008 *p++ = toUChar((w32 >> 24) & 0x000000FF);
2009 return p;
2010 }
2011
emit64(UChar * p,ULong w64)2012 static UChar* emit64 ( UChar* p, ULong w64 )
2013 {
2014 p = emit32(p, toUInt(w64 & 0xFFFFFFFF));
2015 p = emit32(p, toUInt((w64 >> 32) & 0xFFFFFFFF));
2016 return p;
2017 }
2018
2019 /* Does a sign-extend of the lowest 8 bits give
2020 the original number? */
fits8bits(UInt w32)2021 static Bool fits8bits ( UInt w32 )
2022 {
2023 Int i32 = (Int)w32;
2024 return toBool(i32 == ((Int)(w32 << 24) >> 24));
2025 }
2026 /* Can the lower 32 bits be signedly widened to produce the whole
2027 64-bit value? In other words, are the top 33 bits either all 0 or
2028 all 1 ? */
fitsIn32Bits(ULong x)2029 static Bool fitsIn32Bits ( ULong x )
2030 {
2031 Long y1;
2032 y1 = x << 32;
2033 y1 >>=/*s*/ 32;
2034 return toBool(x == y1);
2035 }
2036
2037
2038 /* Forming mod-reg-rm bytes and scale-index-base bytes.
2039
2040 greg, 0(ereg) | ereg is not any of: RSP RBP R12 R13
2041 = 00 greg ereg
2042
2043 greg, d8(ereg) | ereg is neither of: RSP R12
2044 = 01 greg ereg, d8
2045
2046 greg, d32(ereg) | ereg is neither of: RSP R12
2047 = 10 greg ereg, d32
2048
2049 greg, d8(ereg) | ereg is either: RSP R12
2050 = 01 greg 100, 0x24, d8
2051 (lowest bit of rex distinguishes R12/RSP)
2052
2053 greg, d32(ereg) | ereg is either: RSP R12
2054 = 10 greg 100, 0x24, d32
2055 (lowest bit of rex distinguishes R12/RSP)
2056
2057 -----------------------------------------------
2058
2059 greg, d8(base,index,scale)
2060 | index != RSP
2061 = 01 greg 100, scale index base, d8
2062
2063 greg, d32(base,index,scale)
2064 | index != RSP
2065 = 10 greg 100, scale index base, d32
2066 */
doAMode_M__wrk(UChar * p,UInt gregEnc3210,AMD64AMode * am)2067 static UChar* doAMode_M__wrk ( UChar* p, UInt gregEnc3210, AMD64AMode* am )
2068 {
2069 UInt gregEnc210 = gregEnc3210 & 7;
2070 if (am->tag == Aam_IR) {
2071 if (am->Aam.IR.imm == 0
2072 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2073 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RBP())
2074 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2075 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R13())
2076 ) {
2077 *p++ = mkModRegRM(0, gregEnc210, iregEnc210(am->Aam.IR.reg));
2078 return p;
2079 }
2080 if (fits8bits(am->Aam.IR.imm)
2081 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2082 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2083 ) {
2084 *p++ = mkModRegRM(1, gregEnc210, iregEnc210(am->Aam.IR.reg));
2085 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
2086 return p;
2087 }
2088 if (! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2089 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2090 ) {
2091 *p++ = mkModRegRM(2, gregEnc210, iregEnc210(am->Aam.IR.reg));
2092 p = emit32(p, am->Aam.IR.imm);
2093 return p;
2094 }
2095 if ((sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2096 || sameHReg(am->Aam.IR.reg, hregAMD64_R12()))
2097 && fits8bits(am->Aam.IR.imm)) {
2098 *p++ = mkModRegRM(1, gregEnc210, 4);
2099 *p++ = 0x24;
2100 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
2101 return p;
2102 }
2103 if (/* (sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2104 || wait for test case for RSP case */
2105 sameHReg(am->Aam.IR.reg, hregAMD64_R12())) {
2106 *p++ = mkModRegRM(2, gregEnc210, 4);
2107 *p++ = 0x24;
2108 p = emit32(p, am->Aam.IR.imm);
2109 return p;
2110 }
2111 ppAMD64AMode(am);
2112 vpanic("doAMode_M: can't emit amode IR");
2113 /*NOTREACHED*/
2114 }
2115 if (am->tag == Aam_IRRS) {
2116 if (fits8bits(am->Aam.IRRS.imm)
2117 && ! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) {
2118 *p++ = mkModRegRM(1, gregEnc210, 4);
2119 *p++ = mkSIB(am->Aam.IRRS.shift, iregEnc210(am->Aam.IRRS.index),
2120 iregEnc210(am->Aam.IRRS.base));
2121 *p++ = toUChar(am->Aam.IRRS.imm & 0xFF);
2122 return p;
2123 }
2124 if (! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) {
2125 *p++ = mkModRegRM(2, gregEnc210, 4);
2126 *p++ = mkSIB(am->Aam.IRRS.shift, iregEnc210(am->Aam.IRRS.index),
2127 iregEnc210(am->Aam.IRRS.base));
2128 p = emit32(p, am->Aam.IRRS.imm);
2129 return p;
2130 }
2131 ppAMD64AMode(am);
2132 vpanic("doAMode_M: can't emit amode IRRS");
2133 /*NOTREACHED*/
2134 }
2135 vpanic("doAMode_M: unknown amode");
2136 /*NOTREACHED*/
2137 }
2138
doAMode_M(UChar * p,HReg greg,AMD64AMode * am)2139 static UChar* doAMode_M ( UChar* p, HReg greg, AMD64AMode* am )
2140 {
2141 return doAMode_M__wrk(p, iregEnc3210(greg), am);
2142 }
2143
doAMode_M_enc(UChar * p,UInt gregEnc3210,AMD64AMode * am)2144 static UChar* doAMode_M_enc ( UChar* p, UInt gregEnc3210, AMD64AMode* am )
2145 {
2146 vassert(gregEnc3210 < 16);
2147 return doAMode_M__wrk(p, gregEnc3210, am);
2148 }
2149
2150
2151 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
2152 inline
doAMode_R__wrk(UChar * p,UInt gregEnc3210,UInt eregEnc3210)2153 static UChar* doAMode_R__wrk ( UChar* p, UInt gregEnc3210, UInt eregEnc3210 )
2154 {
2155 *p++ = mkModRegRM(3, gregEnc3210 & 7, eregEnc3210 & 7);
2156 return p;
2157 }
2158
doAMode_R(UChar * p,HReg greg,HReg ereg)2159 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
2160 {
2161 return doAMode_R__wrk(p, iregEnc3210(greg), iregEnc3210(ereg));
2162 }
2163
doAMode_R_enc_reg(UChar * p,UInt gregEnc3210,HReg ereg)2164 static UChar* doAMode_R_enc_reg ( UChar* p, UInt gregEnc3210, HReg ereg )
2165 {
2166 vassert(gregEnc3210 < 16);
2167 return doAMode_R__wrk(p, gregEnc3210, iregEnc3210(ereg));
2168 }
2169
doAMode_R_reg_enc(UChar * p,HReg greg,UInt eregEnc3210)2170 static UChar* doAMode_R_reg_enc ( UChar* p, HReg greg, UInt eregEnc3210 )
2171 {
2172 vassert(eregEnc3210 < 16);
2173 return doAMode_R__wrk(p, iregEnc3210(greg), eregEnc3210);
2174 }
2175
doAMode_R_enc_enc(UChar * p,UInt gregEnc3210,UInt eregEnc3210)2176 static UChar* doAMode_R_enc_enc ( UChar* p, UInt gregEnc3210, UInt eregEnc3210 )
2177 {
2178 vassert( (gregEnc3210|eregEnc3210) < 16);
2179 return doAMode_R__wrk(p, gregEnc3210, eregEnc3210);
2180 }
2181
2182
2183 /* Clear the W bit on a REX byte, thereby changing the operand size
2184 back to whatever that instruction's default operand size is. */
clearWBit(UChar rex)2185 static inline UChar clearWBit ( UChar rex )
2186 {
2187 return rex & ~(1<<3);
2188 }
2189
2190
2191 /* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */
rexAMode_M__wrk(UInt gregEnc3210,AMD64AMode * am)2192 inline static UChar rexAMode_M__wrk ( UInt gregEnc3210, AMD64AMode* am )
2193 {
2194 if (am->tag == Aam_IR) {
2195 UChar W = 1; /* we want 64-bit mode */
2196 UChar R = (gregEnc3210 >> 3) & 1;
2197 UChar X = 0; /* not relevant */
2198 UChar B = iregEnc3(am->Aam.IR.reg);
2199 return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
2200 }
2201 if (am->tag == Aam_IRRS) {
2202 UChar W = 1; /* we want 64-bit mode */
2203 UChar R = (gregEnc3210 >> 3) & 1;
2204 UChar X = iregEnc3(am->Aam.IRRS.index);
2205 UChar B = iregEnc3(am->Aam.IRRS.base);
2206 return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
2207 }
2208 vassert(0);
2209 return 0; /*NOTREACHED*/
2210 }
2211
rexAMode_M(HReg greg,AMD64AMode * am)2212 static UChar rexAMode_M ( HReg greg, AMD64AMode* am )
2213 {
2214 return rexAMode_M__wrk(iregEnc3210(greg), am);
2215 }
2216
rexAMode_M_enc(UInt gregEnc3210,AMD64AMode * am)2217 static UChar rexAMode_M_enc ( UInt gregEnc3210, AMD64AMode* am )
2218 {
2219 vassert(gregEnc3210 < 16);
2220 return rexAMode_M__wrk(gregEnc3210, am);
2221 }
2222
2223
2224 /* Make up a REX byte, with W=1 (size=64), for a (greg,ereg) pair. */
rexAMode_R__wrk(UInt gregEnc3210,UInt eregEnc3210)2225 inline static UChar rexAMode_R__wrk ( UInt gregEnc3210, UInt eregEnc3210 )
2226 {
2227 UChar W = 1; /* we want 64-bit mode */
2228 UChar R = (gregEnc3210 >> 3) & 1;
2229 UChar X = 0; /* not relevant */
2230 UChar B = (eregEnc3210 >> 3) & 1;
2231 return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
2232 }
2233
rexAMode_R(HReg greg,HReg ereg)2234 static UChar rexAMode_R ( HReg greg, HReg ereg )
2235 {
2236 return rexAMode_R__wrk(iregEnc3210(greg), iregEnc3210(ereg));
2237 }
2238
rexAMode_R_enc_reg(UInt gregEnc3210,HReg ereg)2239 static UChar rexAMode_R_enc_reg ( UInt gregEnc3210, HReg ereg )
2240 {
2241 vassert(gregEnc3210 < 16);
2242 return rexAMode_R__wrk(gregEnc3210, iregEnc3210(ereg));
2243 }
2244
rexAMode_R_reg_enc(HReg greg,UInt eregEnc3210)2245 static UChar rexAMode_R_reg_enc ( HReg greg, UInt eregEnc3210 )
2246 {
2247 vassert(eregEnc3210 < 16);
2248 return rexAMode_R__wrk(iregEnc3210(greg), eregEnc3210);
2249 }
2250
rexAMode_R_enc_enc(UInt gregEnc3210,UInt eregEnc3210)2251 static UChar rexAMode_R_enc_enc ( UInt gregEnc3210, UInt eregEnc3210 )
2252 {
2253 vassert((gregEnc3210|eregEnc3210) < 16);
2254 return rexAMode_R__wrk(gregEnc3210, eregEnc3210);
2255 }
2256
2257
2258 //uu /* May 2012: this VEX prefix stuff is currently unused, but has
2259 //uu verified correct (I reckon). Certainly it has been known to
2260 //uu produce correct VEX prefixes during testing. */
2261 //uu
2262 //uu /* Assemble a 2 or 3 byte VEX prefix from parts. rexR, rexX, rexB and
2263 //uu notVvvvv need to be not-ed before packing. mmmmm, rexW, L and pp go
2264 //uu in verbatim. There's no range checking on the bits. */
2265 //uu static UInt packVexPrefix ( UInt rexR, UInt rexX, UInt rexB,
2266 //uu UInt mmmmm, UInt rexW, UInt notVvvv,
2267 //uu UInt L, UInt pp )
2268 //uu {
2269 //uu UChar byte0 = 0;
2270 //uu UChar byte1 = 0;
2271 //uu UChar byte2 = 0;
2272 //uu if (rexX == 0 && rexB == 0 && mmmmm == 1 && rexW == 0) {
2273 //uu /* 2 byte encoding is possible. */
2274 //uu byte0 = 0xC5;
2275 //uu byte1 = ((rexR ^ 1) << 7) | ((notVvvv ^ 0xF) << 3)
2276 //uu | (L << 2) | pp;
2277 //uu } else {
2278 //uu /* 3 byte encoding is needed. */
2279 //uu byte0 = 0xC4;
2280 //uu byte1 = ((rexR ^ 1) << 7) | ((rexX ^ 1) << 6)
2281 //uu | ((rexB ^ 1) << 5) | mmmmm;
2282 //uu byte2 = (rexW << 7) | ((notVvvv ^ 0xF) << 3) | (L << 2) | pp;
2283 //uu }
2284 //uu return (((UInt)byte2) << 16) | (((UInt)byte1) << 8) | ((UInt)byte0);
2285 //uu }
2286 //uu
2287 //uu /* Make up a VEX prefix for a (greg,amode) pair. First byte in bits
2288 //uu 7:0 of result, second in 15:8, third (for a 3 byte prefix) in
2289 //uu 23:16. Has m-mmmm set to indicate a prefix of 0F, pp set to
2290 //uu indicate no SIMD prefix, W=0 (ignore), L=1 (size=256), and
2291 //uu vvvv=1111 (unused 3rd reg). */
2292 //uu static UInt vexAMode_M ( HReg greg, AMD64AMode* am )
2293 //uu {
2294 //uu UChar L = 1; /* size = 256 */
2295 //uu UChar pp = 0; /* no SIMD prefix */
2296 //uu UChar mmmmm = 1; /* 0F */
2297 //uu UChar notVvvv = 0; /* unused */
2298 //uu UChar rexW = 0;
2299 //uu UChar rexR = 0;
2300 //uu UChar rexX = 0;
2301 //uu UChar rexB = 0;
2302 //uu /* Same logic as in rexAMode_M. */
2303 //uu if (am->tag == Aam_IR) {
2304 //uu rexR = iregEnc3(greg);
2305 //uu rexX = 0; /* not relevant */
2306 //uu rexB = iregEnc3(am->Aam.IR.reg);
2307 //uu }
2308 //uu else if (am->tag == Aam_IRRS) {
2309 //uu rexR = iregEnc3(greg);
2310 //uu rexX = iregEnc3(am->Aam.IRRS.index);
2311 //uu rexB = iregEnc3(am->Aam.IRRS.base);
2312 //uu } else {
2313 //uu vassert(0);
2314 //uu }
2315 //uu return packVexPrefix( rexR, rexX, rexB, mmmmm, rexW, notVvvv, L, pp );
2316 //uu }
2317 //uu
2318 //uu static UChar* emitVexPrefix ( UChar* p, UInt vex )
2319 //uu {
2320 //uu switch (vex & 0xFF) {
2321 //uu case 0xC5:
2322 //uu *p++ = 0xC5;
2323 //uu *p++ = (vex >> 8) & 0xFF;
2324 //uu vassert(0 == (vex >> 16));
2325 //uu break;
2326 //uu case 0xC4:
2327 //uu *p++ = 0xC4;
2328 //uu *p++ = (vex >> 8) & 0xFF;
2329 //uu *p++ = (vex >> 16) & 0xFF;
2330 //uu vassert(0 == (vex >> 24));
2331 //uu break;
2332 //uu default:
2333 //uu vassert(0);
2334 //uu }
2335 //uu return p;
2336 //uu }
2337
2338
2339 /* Emit ffree %st(N) */
do_ffree_st(UChar * p,Int n)2340 static UChar* do_ffree_st ( UChar* p, Int n )
2341 {
2342 vassert(n >= 0 && n <= 7);
2343 *p++ = 0xDD;
2344 *p++ = toUChar(0xC0 + n);
2345 return p;
2346 }
2347
2348 /* Emit an instruction into buf and return the number of bytes used.
2349 Note that buf is not the insn's final place, and therefore it is
2350 imperative to emit position-independent code. If the emitted
2351 instruction was a profiler inc, set *is_profInc to True, else
2352 leave it unchanged. */
2353
emit_AMD64Instr(Bool * is_profInc,UChar * buf,Int nbuf,const AMD64Instr * i,Bool mode64,VexEndness endness_host,const void * disp_cp_chain_me_to_slowEP,const void * disp_cp_chain_me_to_fastEP,const void * disp_cp_xindir,const void * disp_cp_xassisted)2354 Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
2355 UChar* buf, Int nbuf, const AMD64Instr* i,
2356 Bool mode64, VexEndness endness_host,
2357 const void* disp_cp_chain_me_to_slowEP,
2358 const void* disp_cp_chain_me_to_fastEP,
2359 const void* disp_cp_xindir,
2360 const void* disp_cp_xassisted )
2361 {
2362 UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
2363 UInt xtra;
2364 UInt reg;
2365 UChar rex;
2366 UChar* p = &buf[0];
2367 UChar* ptmp;
2368 Int j;
2369 vassert(nbuf >= 32);
2370 vassert(mode64 == True);
2371
2372 /* vex_printf("asm "); ppAMD64Instr(i, mode64); vex_printf("\n"); */
2373
2374 switch (i->tag) {
2375
2376 case Ain_Imm64:
2377 if (i->Ain.Imm64.imm64 <= 0xFFFFFULL) {
2378 /* Use the short form (load into 32 bit reg, + default
2379 widening rule) for constants under 1 million. We could
2380 use this form for the range 0 to 0x7FFFFFFF inclusive, but
2381 limit it to a smaller range for verifiability purposes. */
2382 if (1 & iregEnc3(i->Ain.Imm64.dst))
2383 *p++ = 0x41;
2384 *p++ = 0xB8 + iregEnc210(i->Ain.Imm64.dst);
2385 p = emit32(p, (UInt)i->Ain.Imm64.imm64);
2386 } else {
2387 *p++ = toUChar(0x48 + (1 & iregEnc3(i->Ain.Imm64.dst)));
2388 *p++ = toUChar(0xB8 + iregEnc210(i->Ain.Imm64.dst));
2389 p = emit64(p, i->Ain.Imm64.imm64);
2390 }
2391 goto done;
2392
2393 case Ain_Alu64R:
2394 /* Deal specially with MOV */
2395 if (i->Ain.Alu64R.op == Aalu_MOV) {
2396 switch (i->Ain.Alu64R.src->tag) {
2397 case Armi_Imm:
2398 if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFFFF)) {
2399 /* Actually we could use this form for constants in
2400 the range 0 through 0x7FFFFFFF inclusive, but
2401 limit it to a small range for verifiability
2402 purposes. */
2403 /* Generate "movl $imm32, 32-bit-register" and let
2404 the default zero-extend rule cause the upper half
2405 of the dst to be zeroed out too. This saves 1
2406 and sometimes 2 bytes compared to the more
2407 obvious encoding in the 'else' branch. */
2408 if (1 & iregEnc3(i->Ain.Alu64R.dst))
2409 *p++ = 0x41;
2410 *p++ = 0xB8 + iregEnc210(i->Ain.Alu64R.dst);
2411 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2412 } else {
2413 *p++ = toUChar(0x48 + (1 & iregEnc3(i->Ain.Alu64R.dst)));
2414 *p++ = 0xC7;
2415 *p++ = toUChar(0xC0 + iregEnc210(i->Ain.Alu64R.dst));
2416 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2417 }
2418 goto done;
2419 case Armi_Reg:
2420 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2421 i->Ain.Alu64R.dst );
2422 *p++ = 0x89;
2423 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2424 i->Ain.Alu64R.dst);
2425 goto done;
2426 case Armi_Mem:
2427 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2428 i->Ain.Alu64R.src->Armi.Mem.am);
2429 *p++ = 0x8B;
2430 p = doAMode_M(p, i->Ain.Alu64R.dst,
2431 i->Ain.Alu64R.src->Armi.Mem.am);
2432 goto done;
2433 default:
2434 goto bad;
2435 }
2436 }
2437 /* MUL */
2438 if (i->Ain.Alu64R.op == Aalu_MUL) {
2439 switch (i->Ain.Alu64R.src->tag) {
2440 case Armi_Reg:
2441 *p++ = rexAMode_R( i->Ain.Alu64R.dst,
2442 i->Ain.Alu64R.src->Armi.Reg.reg);
2443 *p++ = 0x0F;
2444 *p++ = 0xAF;
2445 p = doAMode_R(p, i->Ain.Alu64R.dst,
2446 i->Ain.Alu64R.src->Armi.Reg.reg);
2447 goto done;
2448 case Armi_Mem:
2449 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2450 i->Ain.Alu64R.src->Armi.Mem.am);
2451 *p++ = 0x0F;
2452 *p++ = 0xAF;
2453 p = doAMode_M(p, i->Ain.Alu64R.dst,
2454 i->Ain.Alu64R.src->Armi.Mem.am);
2455 goto done;
2456 case Armi_Imm:
2457 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2458 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2459 *p++ = 0x6B;
2460 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2461 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
2462 } else {
2463 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2464 *p++ = 0x69;
2465 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2466 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2467 }
2468 goto done;
2469 default:
2470 goto bad;
2471 }
2472 }
2473 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2474 opc = opc_rr = subopc_imm = opc_imma = 0;
2475 switch (i->Ain.Alu64R.op) {
2476 case Aalu_ADC: opc = 0x13; opc_rr = 0x11;
2477 subopc_imm = 2; opc_imma = 0x15; break;
2478 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2479 subopc_imm = 0; opc_imma = 0x05; break;
2480 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2481 subopc_imm = 5; opc_imma = 0x2D; break;
2482 case Aalu_SBB: opc = 0x1B; opc_rr = 0x19;
2483 subopc_imm = 3; opc_imma = 0x1D; break;
2484 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2485 subopc_imm = 4; opc_imma = 0x25; break;
2486 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2487 subopc_imm = 6; opc_imma = 0x35; break;
2488 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2489 subopc_imm = 1; opc_imma = 0x0D; break;
2490 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2491 subopc_imm = 7; opc_imma = 0x3D; break;
2492 default: goto bad;
2493 }
2494 switch (i->Ain.Alu64R.src->tag) {
2495 case Armi_Imm:
2496 if (sameHReg(i->Ain.Alu64R.dst, hregAMD64_RAX())
2497 && !fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2498 goto bad; /* FIXME: awaiting test case */
2499 *p++ = toUChar(opc_imma);
2500 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2501 } else
2502 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2503 *p++ = rexAMode_R_enc_reg( 0, i->Ain.Alu64R.dst );
2504 *p++ = 0x83;
2505 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu64R.dst);
2506 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
2507 } else {
2508 *p++ = rexAMode_R_enc_reg( 0, i->Ain.Alu64R.dst);
2509 *p++ = 0x81;
2510 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu64R.dst);
2511 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2512 }
2513 goto done;
2514 case Armi_Reg:
2515 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2516 i->Ain.Alu64R.dst);
2517 *p++ = toUChar(opc_rr);
2518 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2519 i->Ain.Alu64R.dst);
2520 goto done;
2521 case Armi_Mem:
2522 *p++ = rexAMode_M( i->Ain.Alu64R.dst,
2523 i->Ain.Alu64R.src->Armi.Mem.am);
2524 *p++ = toUChar(opc);
2525 p = doAMode_M(p, i->Ain.Alu64R.dst,
2526 i->Ain.Alu64R.src->Armi.Mem.am);
2527 goto done;
2528 default:
2529 goto bad;
2530 }
2531 break;
2532
2533 case Ain_Alu64M:
2534 /* Deal specially with MOV */
2535 if (i->Ain.Alu64M.op == Aalu_MOV) {
2536 switch (i->Ain.Alu64M.src->tag) {
2537 case Ari_Reg:
2538 *p++ = rexAMode_M(i->Ain.Alu64M.src->Ari.Reg.reg,
2539 i->Ain.Alu64M.dst);
2540 *p++ = 0x89;
2541 p = doAMode_M(p, i->Ain.Alu64M.src->Ari.Reg.reg,
2542 i->Ain.Alu64M.dst);
2543 goto done;
2544 case Ari_Imm:
2545 *p++ = rexAMode_M_enc(0, i->Ain.Alu64M.dst);
2546 *p++ = 0xC7;
2547 p = doAMode_M_enc(p, 0, i->Ain.Alu64M.dst);
2548 p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
2549 goto done;
2550 default:
2551 goto bad;
2552 }
2553 }
2554 break;
2555
2556 case Ain_Sh64:
2557 opc_cl = opc_imm = subopc = 0;
2558 switch (i->Ain.Sh64.op) {
2559 case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
2560 case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
2561 case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
2562 default: goto bad;
2563 }
2564 if (i->Ain.Sh64.src == 0) {
2565 *p++ = rexAMode_R_enc_reg(0, i->Ain.Sh64.dst);
2566 *p++ = toUChar(opc_cl);
2567 p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh64.dst);
2568 goto done;
2569 } else {
2570 *p++ = rexAMode_R_enc_reg(0, i->Ain.Sh64.dst);
2571 *p++ = toUChar(opc_imm);
2572 p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh64.dst);
2573 *p++ = (UChar)(i->Ain.Sh64.src);
2574 goto done;
2575 }
2576 break;
2577
2578 case Ain_Test64:
2579 /* testq sign-extend($imm32), %reg */
2580 *p++ = rexAMode_R_enc_reg(0, i->Ain.Test64.dst);
2581 *p++ = 0xF7;
2582 p = doAMode_R_enc_reg(p, 0, i->Ain.Test64.dst);
2583 p = emit32(p, i->Ain.Test64.imm32);
2584 goto done;
2585
2586 case Ain_Unary64:
2587 if (i->Ain.Unary64.op == Aun_NOT) {
2588 *p++ = rexAMode_R_enc_reg(0, i->Ain.Unary64.dst);
2589 *p++ = 0xF7;
2590 p = doAMode_R_enc_reg(p, 2, i->Ain.Unary64.dst);
2591 goto done;
2592 }
2593 if (i->Ain.Unary64.op == Aun_NEG) {
2594 *p++ = rexAMode_R_enc_reg(0, i->Ain.Unary64.dst);
2595 *p++ = 0xF7;
2596 p = doAMode_R_enc_reg(p, 3, i->Ain.Unary64.dst);
2597 goto done;
2598 }
2599 break;
2600
2601 case Ain_Lea64:
2602 *p++ = rexAMode_M(i->Ain.Lea64.dst, i->Ain.Lea64.am);
2603 *p++ = 0x8D;
2604 p = doAMode_M(p, i->Ain.Lea64.dst, i->Ain.Lea64.am);
2605 goto done;
2606
2607 case Ain_Alu32R:
2608 /* ADD/SUB/AND/OR/XOR/CMP */
2609 opc = opc_rr = subopc_imm = opc_imma = 0;
2610 switch (i->Ain.Alu32R.op) {
2611 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2612 subopc_imm = 0; opc_imma = 0x05; break;
2613 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2614 subopc_imm = 5; opc_imma = 0x2D; break;
2615 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2616 subopc_imm = 4; opc_imma = 0x25; break;
2617 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2618 subopc_imm = 6; opc_imma = 0x35; break;
2619 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2620 subopc_imm = 1; opc_imma = 0x0D; break;
2621 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2622 subopc_imm = 7; opc_imma = 0x3D; break;
2623 default: goto bad;
2624 }
2625 switch (i->Ain.Alu32R.src->tag) {
2626 case Armi_Imm:
2627 if (sameHReg(i->Ain.Alu32R.dst, hregAMD64_RAX())
2628 && !fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
2629 goto bad; /* FIXME: awaiting test case */
2630 *p++ = toUChar(opc_imma);
2631 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
2632 } else
2633 if (fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
2634 rex = clearWBit( rexAMode_R_enc_reg( 0, i->Ain.Alu32R.dst ) );
2635 if (rex != 0x40) *p++ = rex;
2636 *p++ = 0x83;
2637 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu32R.dst);
2638 *p++ = toUChar(0xFF & i->Ain.Alu32R.src->Armi.Imm.imm32);
2639 } else {
2640 rex = clearWBit( rexAMode_R_enc_reg( 0, i->Ain.Alu32R.dst) );
2641 if (rex != 0x40) *p++ = rex;
2642 *p++ = 0x81;
2643 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu32R.dst);
2644 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
2645 }
2646 goto done;
2647 case Armi_Reg:
2648 rex = clearWBit(
2649 rexAMode_R( i->Ain.Alu32R.src->Armi.Reg.reg,
2650 i->Ain.Alu32R.dst) );
2651 if (rex != 0x40) *p++ = rex;
2652 *p++ = toUChar(opc_rr);
2653 p = doAMode_R(p, i->Ain.Alu32R.src->Armi.Reg.reg,
2654 i->Ain.Alu32R.dst);
2655 goto done;
2656 case Armi_Mem:
2657 rex = clearWBit(
2658 rexAMode_M( i->Ain.Alu32R.dst,
2659 i->Ain.Alu32R.src->Armi.Mem.am) );
2660 if (rex != 0x40) *p++ = rex;
2661 *p++ = toUChar(opc);
2662 p = doAMode_M(p, i->Ain.Alu32R.dst,
2663 i->Ain.Alu32R.src->Armi.Mem.am);
2664 goto done;
2665 default:
2666 goto bad;
2667 }
2668 break;
2669
2670 case Ain_MulL:
2671 subopc = i->Ain.MulL.syned ? 5 : 4;
2672 switch (i->Ain.MulL.src->tag) {
2673 case Arm_Mem:
2674 *p++ = rexAMode_M_enc(0, i->Ain.MulL.src->Arm.Mem.am);
2675 *p++ = 0xF7;
2676 p = doAMode_M_enc(p, subopc, i->Ain.MulL.src->Arm.Mem.am);
2677 goto done;
2678 case Arm_Reg:
2679 *p++ = rexAMode_R_enc_reg(0, i->Ain.MulL.src->Arm.Reg.reg);
2680 *p++ = 0xF7;
2681 p = doAMode_R_enc_reg(p, subopc, i->Ain.MulL.src->Arm.Reg.reg);
2682 goto done;
2683 default:
2684 goto bad;
2685 }
2686 break;
2687
2688 case Ain_Div:
2689 subopc = i->Ain.Div.syned ? 7 : 6;
2690 if (i->Ain.Div.sz == 4) {
2691 switch (i->Ain.Div.src->tag) {
2692 case Arm_Mem:
2693 goto bad;
2694 /*FIXME*/
2695 *p++ = 0xF7;
2696 p = doAMode_M_enc(p, subopc, i->Ain.Div.src->Arm.Mem.am);
2697 goto done;
2698 case Arm_Reg:
2699 *p++ = clearWBit(
2700 rexAMode_R_enc_reg(0, i->Ain.Div.src->Arm.Reg.reg));
2701 *p++ = 0xF7;
2702 p = doAMode_R_enc_reg(p, subopc, i->Ain.Div.src->Arm.Reg.reg);
2703 goto done;
2704 default:
2705 goto bad;
2706 }
2707 }
2708 if (i->Ain.Div.sz == 8) {
2709 switch (i->Ain.Div.src->tag) {
2710 case Arm_Mem:
2711 *p++ = rexAMode_M_enc(0, i->Ain.Div.src->Arm.Mem.am);
2712 *p++ = 0xF7;
2713 p = doAMode_M_enc(p, subopc, i->Ain.Div.src->Arm.Mem.am);
2714 goto done;
2715 case Arm_Reg:
2716 *p++ = rexAMode_R_enc_reg(0, i->Ain.Div.src->Arm.Reg.reg);
2717 *p++ = 0xF7;
2718 p = doAMode_R_enc_reg(p, subopc, i->Ain.Div.src->Arm.Reg.reg);
2719 goto done;
2720 default:
2721 goto bad;
2722 }
2723 }
2724 break;
2725
2726 case Ain_Push:
2727 switch (i->Ain.Push.src->tag) {
2728 case Armi_Mem:
2729 *p++ = clearWBit(
2730 rexAMode_M_enc(0, i->Ain.Push.src->Armi.Mem.am));
2731 *p++ = 0xFF;
2732 p = doAMode_M_enc(p, 6, i->Ain.Push.src->Armi.Mem.am);
2733 goto done;
2734 case Armi_Imm:
2735 *p++ = 0x68;
2736 p = emit32(p, i->Ain.Push.src->Armi.Imm.imm32);
2737 goto done;
2738 case Armi_Reg:
2739 *p++ = toUChar(0x40 + (1 & iregEnc3(i->Ain.Push.src->Armi.Reg.reg)));
2740 *p++ = toUChar(0x50 + iregEnc210(i->Ain.Push.src->Armi.Reg.reg));
2741 goto done;
2742 default:
2743 goto bad;
2744 }
2745
2746 case Ain_Call: {
2747 /* As per detailed comment for Ain_Call in getRegUsage_AMD64Instr
2748 above, %r11 is used as an address temporary. */
2749 /* If we don't need to do any fixup actions in the case that the
2750 call doesn't happen, just do the simple thing and emit
2751 straight-line code. This is usually the case. */
2752 if (i->Ain.Call.cond == Acc_ALWAYS/*call always happens*/
2753 || i->Ain.Call.rloc.pri == RLPri_None/*no fixup action*/) {
2754 /* jump over the following two insns if the condition does
2755 not hold */
2756 Bool shortImm = fitsIn32Bits(i->Ain.Call.target);
2757 if (i->Ain.Call.cond != Acc_ALWAYS) {
2758 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
2759 *p++ = shortImm ? 10 : 13;
2760 /* 10 or 13 bytes in the next two insns */
2761 }
2762 if (shortImm) {
2763 /* 7 bytes: movl sign-extend(imm32), %r11 */
2764 *p++ = 0x49;
2765 *p++ = 0xC7;
2766 *p++ = 0xC3;
2767 p = emit32(p, (UInt)i->Ain.Call.target);
2768 } else {
2769 /* 10 bytes: movabsq $target, %r11 */
2770 *p++ = 0x49;
2771 *p++ = 0xBB;
2772 p = emit64(p, i->Ain.Call.target);
2773 }
2774 /* 3 bytes: call *%r11 */
2775 *p++ = 0x41;
2776 *p++ = 0xFF;
2777 *p++ = 0xD3;
2778 } else {
2779 Int delta;
2780 /* Complex case. We have to generate an if-then-else diamond. */
2781 // before:
2782 // j{!cond} else:
2783 // movabsq $target, %r11
2784 // call* %r11
2785 // preElse:
2786 // jmp after:
2787 // else:
2788 // movabsq $0x5555555555555555, %rax // possibly
2789 // movq %rax, %rdx // possibly
2790 // after:
2791
2792 // before:
2793 UChar* pBefore = p;
2794
2795 // j{!cond} else:
2796 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
2797 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2798
2799 // movabsq $target, %r11
2800 *p++ = 0x49;
2801 *p++ = 0xBB;
2802 p = emit64(p, i->Ain.Call.target);
2803
2804 // call* %r11
2805 *p++ = 0x41;
2806 *p++ = 0xFF;
2807 *p++ = 0xD3;
2808
2809 // preElse:
2810 UChar* pPreElse = p;
2811
2812 // jmp after:
2813 *p++ = 0xEB;
2814 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2815
2816 // else:
2817 UChar* pElse = p;
2818
2819 /* Do the 'else' actions */
2820 switch (i->Ain.Call.rloc.pri) {
2821 case RLPri_Int:
2822 // movabsq $0x5555555555555555, %rax
2823 *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
2824 break;
2825 case RLPri_2Int:
2826 vassert(0); //ATC
2827 // movabsq $0x5555555555555555, %rax
2828 *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
2829 // movq %rax, %rdx
2830 *p++ = 0x48; *p++ = 0x89; *p++ = 0xC2;
2831 case RLPri_None: case RLPri_INVALID: default:
2832 vassert(0);
2833 }
2834
2835 // after:
2836 UChar* pAfter = p;
2837
2838 // Fix up the branch offsets. The +2s in the offset
2839 // calculations are there because x86 requires conditional
2840 // branches to have their offset stated relative to the
2841 // instruction immediately following the branch insn. And in
2842 // both cases the branch insns are 2 bytes long.
2843
2844 // First, the "j{!cond} else:" at pBefore.
2845 delta = (Int)(Long)(pElse - (pBefore + 2));
2846 vassert(delta >= 0 && delta < 100/*arbitrary*/);
2847 *(pBefore+1) = (UChar)delta;
2848
2849 // And secondly, the "jmp after:" at pPreElse.
2850 delta = (Int)(Long)(pAfter - (pPreElse + 2));
2851 vassert(delta >= 0 && delta < 100/*arbitrary*/);
2852 *(pPreElse+1) = (UChar)delta;
2853 }
2854 goto done;
2855 }
2856
2857 case Ain_XDirect: {
2858 /* NB: what goes on here has to be very closely coordinated with the
2859 chainXDirect_AMD64 and unchainXDirect_AMD64 below. */
2860 /* We're generating chain-me requests here, so we need to be
2861 sure this is actually allowed -- no-redir translations can't
2862 use chain-me's. Hence: */
2863 vassert(disp_cp_chain_me_to_slowEP != NULL);
2864 vassert(disp_cp_chain_me_to_fastEP != NULL);
2865
2866 HReg r11 = hregAMD64_R11();
2867
2868 /* Use ptmp for backpatching conditional jumps. */
2869 ptmp = NULL;
2870
2871 /* First off, if this is conditional, create a conditional
2872 jump over the rest of it. */
2873 if (i->Ain.XDirect.cond != Acc_ALWAYS) {
2874 /* jmp fwds if !condition */
2875 *p++ = toUChar(0x70 + (0xF & (i->Ain.XDirect.cond ^ 1)));
2876 ptmp = p; /* fill in this bit later */
2877 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2878 }
2879
2880 /* Update the guest RIP. */
2881 if (fitsIn32Bits(i->Ain.XDirect.dstGA)) {
2882 /* use a shorter encoding */
2883 /* movl sign-extend(dstGA), %r11 */
2884 *p++ = 0x49;
2885 *p++ = 0xC7;
2886 *p++ = 0xC3;
2887 p = emit32(p, (UInt)i->Ain.XDirect.dstGA);
2888 } else {
2889 /* movabsq $dstGA, %r11 */
2890 *p++ = 0x49;
2891 *p++ = 0xBB;
2892 p = emit64(p, i->Ain.XDirect.dstGA);
2893 }
2894
2895 /* movq %r11, amRIP */
2896 *p++ = rexAMode_M(r11, i->Ain.XDirect.amRIP);
2897 *p++ = 0x89;
2898 p = doAMode_M(p, r11, i->Ain.XDirect.amRIP);
2899
2900 /* --- FIRST PATCHABLE BYTE follows --- */
2901 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
2902 to) backs up the return address, so as to find the address of
2903 the first patchable byte. So: don't change the length of the
2904 two instructions below. */
2905 /* movabsq $disp_cp_chain_me_to_{slow,fast}EP,%r11; */
2906 *p++ = 0x49;
2907 *p++ = 0xBB;
2908 const void* disp_cp_chain_me
2909 = i->Ain.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
2910 : disp_cp_chain_me_to_slowEP;
2911 p = emit64(p, (Addr)disp_cp_chain_me);
2912 /* call *%r11 */
2913 *p++ = 0x41;
2914 *p++ = 0xFF;
2915 *p++ = 0xD3;
2916 /* --- END of PATCHABLE BYTES --- */
2917
2918 /* Fix up the conditional jump, if there was one. */
2919 if (i->Ain.XDirect.cond != Acc_ALWAYS) {
2920 Int delta = p - ptmp;
2921 vassert(delta > 0 && delta < 40);
2922 *ptmp = toUChar(delta-1);
2923 }
2924 goto done;
2925 }
2926
2927 case Ain_XIndir: {
2928 /* We're generating transfers that could lead indirectly to a
2929 chain-me, so we need to be sure this is actually allowed --
2930 no-redir translations are not allowed to reach normal
2931 translations without going through the scheduler. That means
2932 no XDirects or XIndirs out from no-redir translations.
2933 Hence: */
2934 vassert(disp_cp_xindir != NULL);
2935
2936 /* Use ptmp for backpatching conditional jumps. */
2937 ptmp = NULL;
2938
2939 /* First off, if this is conditional, create a conditional
2940 jump over the rest of it. */
2941 if (i->Ain.XIndir.cond != Acc_ALWAYS) {
2942 /* jmp fwds if !condition */
2943 *p++ = toUChar(0x70 + (0xF & (i->Ain.XIndir.cond ^ 1)));
2944 ptmp = p; /* fill in this bit later */
2945 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2946 }
2947
2948 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
2949 *p++ = rexAMode_M(i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
2950 *p++ = 0x89;
2951 p = doAMode_M(p, i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
2952
2953 /* get $disp_cp_xindir into %r11 */
2954 if (fitsIn32Bits((Addr)disp_cp_xindir)) {
2955 /* use a shorter encoding */
2956 /* movl sign-extend(disp_cp_xindir), %r11 */
2957 *p++ = 0x49;
2958 *p++ = 0xC7;
2959 *p++ = 0xC3;
2960 p = emit32(p, (UInt)(Addr)disp_cp_xindir);
2961 } else {
2962 /* movabsq $disp_cp_xindir, %r11 */
2963 *p++ = 0x49;
2964 *p++ = 0xBB;
2965 p = emit64(p, (Addr)disp_cp_xindir);
2966 }
2967
2968 /* jmp *%r11 */
2969 *p++ = 0x41;
2970 *p++ = 0xFF;
2971 *p++ = 0xE3;
2972
2973 /* Fix up the conditional jump, if there was one. */
2974 if (i->Ain.XIndir.cond != Acc_ALWAYS) {
2975 Int delta = p - ptmp;
2976 vassert(delta > 0 && delta < 40);
2977 *ptmp = toUChar(delta-1);
2978 }
2979 goto done;
2980 }
2981
2982 case Ain_XAssisted: {
2983 /* Use ptmp for backpatching conditional jumps. */
2984 ptmp = NULL;
2985
2986 /* First off, if this is conditional, create a conditional
2987 jump over the rest of it. */
2988 if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
2989 /* jmp fwds if !condition */
2990 *p++ = toUChar(0x70 + (0xF & (i->Ain.XAssisted.cond ^ 1)));
2991 ptmp = p; /* fill in this bit later */
2992 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2993 }
2994
2995 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
2996 *p++ = rexAMode_M(i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
2997 *p++ = 0x89;
2998 p = doAMode_M(p, i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
2999 /* movl $magic_number, %ebp. Since these numbers are all small positive
3000 integers, we can get away with "movl $N, %ebp" rather than
3001 the longer "movq $N, %rbp". */
3002 UInt trcval = 0;
3003 switch (i->Ain.XAssisted.jk) {
3004 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
3005 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3006 case Ijk_Sys_int32: trcval = VEX_TRC_JMP_SYS_INT32; break;
3007 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
3008 case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
3009 case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
3010 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
3011 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
3012 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
3013 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
3014 case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
3015 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
3016 /* We don't expect to see the following being assisted. */
3017 case Ijk_Ret:
3018 case Ijk_Call:
3019 /* fallthrough */
3020 default:
3021 ppIRJumpKind(i->Ain.XAssisted.jk);
3022 vpanic("emit_AMD64Instr.Ain_XAssisted: unexpected jump kind");
3023 }
3024 vassert(trcval != 0);
3025 *p++ = 0xBD;
3026 p = emit32(p, trcval);
3027 /* movabsq $disp_assisted, %r11 */
3028 *p++ = 0x49;
3029 *p++ = 0xBB;
3030 p = emit64(p, (Addr)disp_cp_xassisted);
3031 /* jmp *%r11 */
3032 *p++ = 0x41;
3033 *p++ = 0xFF;
3034 *p++ = 0xE3;
3035
3036 /* Fix up the conditional jump, if there was one. */
3037 if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
3038 Int delta = p - ptmp;
3039 vassert(delta > 0 && delta < 40);
3040 *ptmp = toUChar(delta-1);
3041 }
3042 goto done;
3043 }
3044
3045 case Ain_CMov64:
3046 vassert(i->Ain.CMov64.cond != Acc_ALWAYS);
3047 *p++ = rexAMode_R(i->Ain.CMov64.dst, i->Ain.CMov64.src);
3048 *p++ = 0x0F;
3049 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond));
3050 p = doAMode_R(p, i->Ain.CMov64.dst, i->Ain.CMov64.src);
3051 goto done;
3052
3053 case Ain_CLoad: {
3054 vassert(i->Ain.CLoad.cond != Acc_ALWAYS);
3055
3056 /* Only 32- or 64-bit variants are allowed. */
3057 vassert(i->Ain.CLoad.szB == 4 || i->Ain.CLoad.szB == 8);
3058
3059 /* Use ptmp for backpatching conditional jumps. */
3060 ptmp = NULL;
3061
3062 /* jmp fwds if !condition */
3063 *p++ = toUChar(0x70 + (0xF & (i->Ain.CLoad.cond ^ 1)));
3064 ptmp = p; /* fill in this bit later */
3065 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3066
3067 /* Now the load. Either a normal 64 bit load or a normal 32 bit
3068 load, which, by the default zero-extension rule, zeroes out
3069 the upper half of the destination, as required. */
3070 rex = rexAMode_M(i->Ain.CLoad.dst, i->Ain.CLoad.addr);
3071 *p++ = i->Ain.CLoad.szB == 4 ? clearWBit(rex) : rex;
3072 *p++ = 0x8B;
3073 p = doAMode_M(p, i->Ain.CLoad.dst, i->Ain.CLoad.addr);
3074
3075 /* Fix up the conditional branch */
3076 Int delta = p - ptmp;
3077 vassert(delta > 0 && delta < 40);
3078 *ptmp = toUChar(delta-1);
3079 goto done;
3080 }
3081
3082 case Ain_CStore: {
3083 /* AFAICS this is identical to Ain_CStore except that the opcode
3084 is 0x89 instead of 0x8B. */
3085 vassert(i->Ain.CStore.cond != Acc_ALWAYS);
3086
3087 /* Only 32- or 64-bit variants are allowed. */
3088 vassert(i->Ain.CStore.szB == 4 || i->Ain.CStore.szB == 8);
3089
3090 /* Use ptmp for backpatching conditional jumps. */
3091 ptmp = NULL;
3092
3093 /* jmp fwds if !condition */
3094 *p++ = toUChar(0x70 + (0xF & (i->Ain.CStore.cond ^ 1)));
3095 ptmp = p; /* fill in this bit later */
3096 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3097
3098 /* Now the store. */
3099 rex = rexAMode_M(i->Ain.CStore.src, i->Ain.CStore.addr);
3100 *p++ = i->Ain.CStore.szB == 4 ? clearWBit(rex) : rex;
3101 *p++ = 0x89;
3102 p = doAMode_M(p, i->Ain.CStore.src, i->Ain.CStore.addr);
3103
3104 /* Fix up the conditional branch */
3105 Int delta = p - ptmp;
3106 vassert(delta > 0 && delta < 40);
3107 *ptmp = toUChar(delta-1);
3108 goto done;
3109 }
3110
3111 case Ain_MovxLQ:
3112 /* No, _don't_ ask me why the sense of the args has to be
3113 different in the S vs Z case. I don't know. */
3114 if (i->Ain.MovxLQ.syned) {
3115 /* Need REX.W = 1 here, but rexAMode_R does that for us. */
3116 *p++ = rexAMode_R(i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
3117 *p++ = 0x63;
3118 p = doAMode_R(p, i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
3119 } else {
3120 /* Produce a 32-bit reg-reg move, since the implicit
3121 zero-extend does what we want. */
3122 *p++ = clearWBit (
3123 rexAMode_R(i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst));
3124 *p++ = 0x89;
3125 p = doAMode_R(p, i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst);
3126 }
3127 goto done;
3128
3129 case Ain_LoadEX:
3130 if (i->Ain.LoadEX.szSmall == 1 && !i->Ain.LoadEX.syned) {
3131 /* movzbq */
3132 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3133 *p++ = 0x0F;
3134 *p++ = 0xB6;
3135 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3136 goto done;
3137 }
3138 if (i->Ain.LoadEX.szSmall == 2 && !i->Ain.LoadEX.syned) {
3139 /* movzwq */
3140 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3141 *p++ = 0x0F;
3142 *p++ = 0xB7;
3143 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3144 goto done;
3145 }
3146 if (i->Ain.LoadEX.szSmall == 4 && !i->Ain.LoadEX.syned) {
3147 /* movzlq */
3148 /* This isn't really an existing AMD64 instruction per se.
3149 Rather, we have to do a 32-bit load. Because a 32-bit
3150 write implicitly clears the upper 32 bits of the target
3151 register, we get what we want. */
3152 *p++ = clearWBit(
3153 rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src));
3154 *p++ = 0x8B;
3155 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3156 goto done;
3157 }
3158 break;
3159
3160 case Ain_Set64:
3161 /* Make the destination register be 1 or 0, depending on whether
3162 the relevant condition holds. Complication: the top 56 bits
3163 of the destination should be forced to zero, but doing 'xorq
3164 %r,%r' kills the flag(s) we are about to read. Sigh. So
3165 start off my moving $0 into the dest. */
3166 reg = iregEnc3210(i->Ain.Set64.dst);
3167 vassert(reg < 16);
3168
3169 /* movq $0, %dst */
3170 *p++ = toUChar(reg >= 8 ? 0x49 : 0x48);
3171 *p++ = 0xC7;
3172 *p++ = toUChar(0xC0 + (reg & 7));
3173 p = emit32(p, 0);
3174
3175 /* setb lo8(%dst) */
3176 /* note, 8-bit register rex trickyness. Be careful here. */
3177 *p++ = toUChar(reg >= 8 ? 0x41 : 0x40);
3178 *p++ = 0x0F;
3179 *p++ = toUChar(0x90 + (0x0F & i->Ain.Set64.cond));
3180 *p++ = toUChar(0xC0 + (reg & 7));
3181 goto done;
3182
3183 case Ain_Bsfr64:
3184 *p++ = rexAMode_R(i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
3185 *p++ = 0x0F;
3186 if (i->Ain.Bsfr64.isFwds) {
3187 *p++ = 0xBC;
3188 } else {
3189 *p++ = 0xBD;
3190 }
3191 p = doAMode_R(p, i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
3192 goto done;
3193
3194 case Ain_MFence:
3195 /* mfence */
3196 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
3197 goto done;
3198
3199 case Ain_ACAS:
3200 /* lock */
3201 *p++ = 0xF0;
3202 if (i->Ain.ACAS.sz == 2) *p++ = 0x66;
3203 /* cmpxchg{b,w,l,q} %rbx,mem. Expected-value in %rax, new value
3204 in %rbx. The new-value register is hardwired to be %rbx
3205 since dealing with byte integer registers is too much hassle,
3206 so we force the register operand to %rbx (could equally be
3207 %rcx or %rdx). */
3208 rex = rexAMode_M( hregAMD64_RBX(), i->Ain.ACAS.addr );
3209 if (i->Ain.ACAS.sz != 8)
3210 rex = clearWBit(rex);
3211
3212 *p++ = rex; /* this can emit 0x40, which is pointless. oh well. */
3213 *p++ = 0x0F;
3214 if (i->Ain.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
3215 p = doAMode_M(p, hregAMD64_RBX(), i->Ain.ACAS.addr);
3216 goto done;
3217
3218 case Ain_DACAS:
3219 /* lock */
3220 *p++ = 0xF0;
3221 /* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new
3222 value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so
3223 aren't encoded in the insn. */
3224 rex = rexAMode_M_enc(1, i->Ain.ACAS.addr );
3225 if (i->Ain.ACAS.sz != 8)
3226 rex = clearWBit(rex);
3227 *p++ = rex;
3228 *p++ = 0x0F;
3229 *p++ = 0xC7;
3230 p = doAMode_M_enc(p, 1, i->Ain.DACAS.addr);
3231 goto done;
3232
3233 case Ain_A87Free:
3234 vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7);
3235 for (j = 0; j < i->Ain.A87Free.nregs; j++) {
3236 p = do_ffree_st(p, 7-j);
3237 }
3238 goto done;
3239
3240 case Ain_A87PushPop:
3241 vassert(i->Ain.A87PushPop.szB == 8 || i->Ain.A87PushPop.szB == 4);
3242 if (i->Ain.A87PushPop.isPush) {
3243 /* Load from memory into %st(0): flds/fldl amode */
3244 *p++ = clearWBit(
3245 rexAMode_M_enc(0, i->Ain.A87PushPop.addr) );
3246 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
3247 p = doAMode_M_enc(p, 0/*subopcode*/, i->Ain.A87PushPop.addr);
3248 } else {
3249 /* Dump %st(0) to memory: fstps/fstpl amode */
3250 *p++ = clearWBit(
3251 rexAMode_M_enc(3, i->Ain.A87PushPop.addr) );
3252 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
3253 p = doAMode_M_enc(p, 3/*subopcode*/, i->Ain.A87PushPop.addr);
3254 goto done;
3255 }
3256 goto done;
3257
3258 case Ain_A87FpOp:
3259 switch (i->Ain.A87FpOp.op) {
3260 case Afp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
3261 case Afp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
3262 case Afp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
3263 case Afp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
3264 case Afp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
3265 case Afp_SCALE: *p++ = 0xD9; *p++ = 0xFD; break;
3266 case Afp_ATAN: *p++ = 0xD9; *p++ = 0xF3; break;
3267 case Afp_YL2X: *p++ = 0xD9; *p++ = 0xF1; break;
3268 case Afp_YL2XP1: *p++ = 0xD9; *p++ = 0xF9; break;
3269 case Afp_PREM: *p++ = 0xD9; *p++ = 0xF8; break;
3270 case Afp_PREM1: *p++ = 0xD9; *p++ = 0xF5; break;
3271 case Afp_TAN:
3272 /* fptan pushes 1.0 on the FP stack, except when the
3273 argument is out of range. Hence we have to do the
3274 instruction, then inspect C2 to see if there is an out
3275 of range condition. If there is, we skip the fincstp
3276 that is used by the in-range case to get rid of this
3277 extra 1.0 value. */
3278 *p++ = 0xD9; *p++ = 0xF2; // fptan
3279 *p++ = 0x50; // pushq %rax
3280 *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax
3281 *p++ = 0x66; *p++ = 0xA9;
3282 *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax
3283 *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp
3284 *p++ = 0xD9; *p++ = 0xF7; // fincstp
3285 *p++ = 0x58; // after_fincstp: popq %rax
3286 break;
3287 default:
3288 goto bad;
3289 }
3290 goto done;
3291
3292 case Ain_A87LdCW:
3293 *p++ = clearWBit(
3294 rexAMode_M_enc(5, i->Ain.A87LdCW.addr) );
3295 *p++ = 0xD9;
3296 p = doAMode_M_enc(p, 5/*subopcode*/, i->Ain.A87LdCW.addr);
3297 goto done;
3298
3299 case Ain_A87StSW:
3300 *p++ = clearWBit(
3301 rexAMode_M_enc(7, i->Ain.A87StSW.addr) );
3302 *p++ = 0xDD;
3303 p = doAMode_M_enc(p, 7/*subopcode*/, i->Ain.A87StSW.addr);
3304 goto done;
3305
3306 case Ain_Store:
3307 if (i->Ain.Store.sz == 2) {
3308 /* This just goes to show the crazyness of the instruction
3309 set encoding. We have to insert two prefix bytes, but be
3310 careful to avoid a conflict in what the size should be, by
3311 ensuring that REX.W = 0. */
3312 *p++ = 0x66; /* override to 16-bits */
3313 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3314 *p++ = 0x89;
3315 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3316 goto done;
3317 }
3318 if (i->Ain.Store.sz == 4) {
3319 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3320 *p++ = 0x89;
3321 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3322 goto done;
3323 }
3324 if (i->Ain.Store.sz == 1) {
3325 /* This is one place where it would be wrong to skip emitting
3326 a rex byte of 0x40, since the mere presence of rex changes
3327 the meaning of the byte register access. Be careful. */
3328 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3329 *p++ = 0x88;
3330 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3331 goto done;
3332 }
3333 break;
3334
3335 case Ain_LdMXCSR:
3336 *p++ = clearWBit(rexAMode_M_enc(0, i->Ain.LdMXCSR.addr));
3337 *p++ = 0x0F;
3338 *p++ = 0xAE;
3339 p = doAMode_M_enc(p, 2/*subopcode*/, i->Ain.LdMXCSR.addr);
3340 goto done;
3341
3342 case Ain_SseUComIS:
3343 /* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */
3344 /* ucomi[sd] %srcL, %srcR */
3345 if (i->Ain.SseUComIS.sz == 8) {
3346 *p++ = 0x66;
3347 } else {
3348 goto bad;
3349 vassert(i->Ain.SseUComIS.sz == 4);
3350 }
3351 *p++ = clearWBit (
3352 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseUComIS.srcL),
3353 vregEnc3210(i->Ain.SseUComIS.srcR) ));
3354 *p++ = 0x0F;
3355 *p++ = 0x2E;
3356 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseUComIS.srcL),
3357 vregEnc3210(i->Ain.SseUComIS.srcR) );
3358 /* pushfq */
3359 *p++ = 0x9C;
3360 /* popq %dst */
3361 *p++ = toUChar(0x40 + (1 & iregEnc3(i->Ain.SseUComIS.dst)));
3362 *p++ = toUChar(0x58 + iregEnc210(i->Ain.SseUComIS.dst));
3363 goto done;
3364
3365 case Ain_SseSI2SF:
3366 /* cvssi2s[sd] %src, %dst */
3367 rex = rexAMode_R_enc_reg( vregEnc3210(i->Ain.SseSI2SF.dst),
3368 i->Ain.SseSI2SF.src );
3369 *p++ = toUChar(i->Ain.SseSI2SF.szD==4 ? 0xF3 : 0xF2);
3370 *p++ = toUChar(i->Ain.SseSI2SF.szS==4 ? clearWBit(rex) : rex);
3371 *p++ = 0x0F;
3372 *p++ = 0x2A;
3373 p = doAMode_R_enc_reg( p, vregEnc3210(i->Ain.SseSI2SF.dst),
3374 i->Ain.SseSI2SF.src );
3375 goto done;
3376
3377 case Ain_SseSF2SI:
3378 /* cvss[sd]2si %src, %dst */
3379 rex = rexAMode_R_reg_enc( i->Ain.SseSF2SI.dst,
3380 vregEnc3210(i->Ain.SseSF2SI.src) );
3381 *p++ = toUChar(i->Ain.SseSF2SI.szS==4 ? 0xF3 : 0xF2);
3382 *p++ = toUChar(i->Ain.SseSF2SI.szD==4 ? clearWBit(rex) : rex);
3383 *p++ = 0x0F;
3384 *p++ = 0x2D;
3385 p = doAMode_R_reg_enc( p, i->Ain.SseSF2SI.dst,
3386 vregEnc3210(i->Ain.SseSF2SI.src) );
3387 goto done;
3388
3389 case Ain_SseSDSS:
3390 /* cvtsd2ss/cvtss2sd %src, %dst */
3391 *p++ = toUChar(i->Ain.SseSDSS.from64 ? 0xF2 : 0xF3);
3392 *p++ = clearWBit(
3393 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseSDSS.dst),
3394 vregEnc3210(i->Ain.SseSDSS.src) ));
3395 *p++ = 0x0F;
3396 *p++ = 0x5A;
3397 p = doAMode_R_enc_enc( p, vregEnc3210(i->Ain.SseSDSS.dst),
3398 vregEnc3210(i->Ain.SseSDSS.src) );
3399 goto done;
3400
3401 case Ain_SseLdSt:
3402 if (i->Ain.SseLdSt.sz == 8) {
3403 *p++ = 0xF2;
3404 } else
3405 if (i->Ain.SseLdSt.sz == 4) {
3406 *p++ = 0xF3;
3407 } else
3408 if (i->Ain.SseLdSt.sz != 16) {
3409 vassert(0);
3410 }
3411 *p++ = clearWBit(
3412 rexAMode_M_enc(vregEnc3210(i->Ain.SseLdSt.reg),
3413 i->Ain.SseLdSt.addr));
3414 *p++ = 0x0F;
3415 *p++ = toUChar(i->Ain.SseLdSt.isLoad ? 0x10 : 0x11);
3416 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseLdSt.reg),
3417 i->Ain.SseLdSt.addr);
3418 goto done;
3419
3420 case Ain_SseLdzLO:
3421 vassert(i->Ain.SseLdzLO.sz == 4 || i->Ain.SseLdzLO.sz == 8);
3422 /* movs[sd] amode, %xmm-dst */
3423 *p++ = toUChar(i->Ain.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
3424 *p++ = clearWBit(
3425 rexAMode_M_enc(vregEnc3210(i->Ain.SseLdzLO.reg),
3426 i->Ain.SseLdzLO.addr));
3427 *p++ = 0x0F;
3428 *p++ = 0x10;
3429 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseLdzLO.reg),
3430 i->Ain.SseLdzLO.addr);
3431 goto done;
3432
3433 case Ain_Sse32Fx4:
3434 xtra = 0;
3435 *p++ = clearWBit(
3436 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse32Fx4.dst),
3437 vregEnc3210(i->Ain.Sse32Fx4.src) ));
3438 *p++ = 0x0F;
3439 switch (i->Ain.Sse32Fx4.op) {
3440 case Asse_ADDF: *p++ = 0x58; break;
3441 case Asse_DIVF: *p++ = 0x5E; break;
3442 case Asse_MAXF: *p++ = 0x5F; break;
3443 case Asse_MINF: *p++ = 0x5D; break;
3444 case Asse_MULF: *p++ = 0x59; break;
3445 case Asse_RCPF: *p++ = 0x53; break;
3446 case Asse_RSQRTF: *p++ = 0x52; break;
3447 case Asse_SQRTF: *p++ = 0x51; break;
3448 case Asse_SUBF: *p++ = 0x5C; break;
3449 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3450 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3451 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3452 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3453 default: goto bad;
3454 }
3455 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse32Fx4.dst),
3456 vregEnc3210(i->Ain.Sse32Fx4.src) );
3457 if (xtra & 0x100)
3458 *p++ = toUChar(xtra & 0xFF);
3459 goto done;
3460
3461 case Ain_Sse64Fx2:
3462 xtra = 0;
3463 *p++ = 0x66;
3464 *p++ = clearWBit(
3465 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse64Fx2.dst),
3466 vregEnc3210(i->Ain.Sse64Fx2.src) ));
3467 *p++ = 0x0F;
3468 switch (i->Ain.Sse64Fx2.op) {
3469 case Asse_ADDF: *p++ = 0x58; break;
3470 case Asse_DIVF: *p++ = 0x5E; break;
3471 case Asse_MAXF: *p++ = 0x5F; break;
3472 case Asse_MINF: *p++ = 0x5D; break;
3473 case Asse_MULF: *p++ = 0x59; break;
3474 case Asse_SQRTF: *p++ = 0x51; break;
3475 case Asse_SUBF: *p++ = 0x5C; break;
3476 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3477 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3478 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3479 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3480 default: goto bad;
3481 }
3482 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse64Fx2.dst),
3483 vregEnc3210(i->Ain.Sse64Fx2.src) );
3484 if (xtra & 0x100)
3485 *p++ = toUChar(xtra & 0xFF);
3486 goto done;
3487
3488 case Ain_Sse32FLo:
3489 xtra = 0;
3490 *p++ = 0xF3;
3491 *p++ = clearWBit(
3492 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse32FLo.dst),
3493 vregEnc3210(i->Ain.Sse32FLo.src) ));
3494 *p++ = 0x0F;
3495 switch (i->Ain.Sse32FLo.op) {
3496 case Asse_ADDF: *p++ = 0x58; break;
3497 case Asse_DIVF: *p++ = 0x5E; break;
3498 case Asse_MAXF: *p++ = 0x5F; break;
3499 case Asse_MINF: *p++ = 0x5D; break;
3500 case Asse_MULF: *p++ = 0x59; break;
3501 case Asse_RCPF: *p++ = 0x53; break;
3502 case Asse_RSQRTF: *p++ = 0x52; break;
3503 case Asse_SQRTF: *p++ = 0x51; break;
3504 case Asse_SUBF: *p++ = 0x5C; break;
3505 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3506 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3507 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3508 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3509 default: goto bad;
3510 }
3511 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse32FLo.dst),
3512 vregEnc3210(i->Ain.Sse32FLo.src) );
3513 if (xtra & 0x100)
3514 *p++ = toUChar(xtra & 0xFF);
3515 goto done;
3516
3517 case Ain_Sse64FLo:
3518 xtra = 0;
3519 *p++ = 0xF2;
3520 *p++ = clearWBit(
3521 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse64FLo.dst),
3522 vregEnc3210(i->Ain.Sse64FLo.src) ));
3523 *p++ = 0x0F;
3524 switch (i->Ain.Sse64FLo.op) {
3525 case Asse_ADDF: *p++ = 0x58; break;
3526 case Asse_DIVF: *p++ = 0x5E; break;
3527 case Asse_MAXF: *p++ = 0x5F; break;
3528 case Asse_MINF: *p++ = 0x5D; break;
3529 case Asse_MULF: *p++ = 0x59; break;
3530 case Asse_SQRTF: *p++ = 0x51; break;
3531 case Asse_SUBF: *p++ = 0x5C; break;
3532 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3533 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3534 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3535 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3536 default: goto bad;
3537 }
3538 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse64FLo.dst),
3539 vregEnc3210(i->Ain.Sse64FLo.src) );
3540 if (xtra & 0x100)
3541 *p++ = toUChar(xtra & 0xFF);
3542 goto done;
3543
3544 case Ain_SseReRg:
3545 # define XX(_n) *p++ = (_n)
3546
3547 rex = clearWBit(
3548 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseReRg.dst),
3549 vregEnc3210(i->Ain.SseReRg.src) ));
3550
3551 switch (i->Ain.SseReRg.op) {
3552 case Asse_MOV: /*movups*/ XX(rex); XX(0x0F); XX(0x10); break;
3553 case Asse_OR: XX(rex); XX(0x0F); XX(0x56); break;
3554 case Asse_XOR: XX(rex); XX(0x0F); XX(0x57); break;
3555 case Asse_AND: XX(rex); XX(0x0F); XX(0x54); break;
3556 case Asse_ANDN: XX(rex); XX(0x0F); XX(0x55); break;
3557 case Asse_PACKSSD: XX(0x66); XX(rex); XX(0x0F); XX(0x6B); break;
3558 case Asse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63); break;
3559 case Asse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67); break;
3560 case Asse_ADD8: XX(0x66); XX(rex); XX(0x0F); XX(0xFC); break;
3561 case Asse_ADD16: XX(0x66); XX(rex); XX(0x0F); XX(0xFD); break;
3562 case Asse_ADD32: XX(0x66); XX(rex); XX(0x0F); XX(0xFE); break;
3563 case Asse_ADD64: XX(0x66); XX(rex); XX(0x0F); XX(0xD4); break;
3564 case Asse_QADD8S: XX(0x66); XX(rex); XX(0x0F); XX(0xEC); break;
3565 case Asse_QADD16S: XX(0x66); XX(rex); XX(0x0F); XX(0xED); break;
3566 case Asse_QADD8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDC); break;
3567 case Asse_QADD16U: XX(0x66); XX(rex); XX(0x0F); XX(0xDD); break;
3568 case Asse_AVG8U: XX(0x66); XX(rex); XX(0x0F); XX(0xE0); break;
3569 case Asse_AVG16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE3); break;
3570 case Asse_CMPEQ8: XX(0x66); XX(rex); XX(0x0F); XX(0x74); break;
3571 case Asse_CMPEQ16: XX(0x66); XX(rex); XX(0x0F); XX(0x75); break;
3572 case Asse_CMPEQ32: XX(0x66); XX(rex); XX(0x0F); XX(0x76); break;
3573 case Asse_CMPGT8S: XX(0x66); XX(rex); XX(0x0F); XX(0x64); break;
3574 case Asse_CMPGT16S: XX(0x66); XX(rex); XX(0x0F); XX(0x65); break;
3575 case Asse_CMPGT32S: XX(0x66); XX(rex); XX(0x0F); XX(0x66); break;
3576 case Asse_MAX16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEE); break;
3577 case Asse_MAX8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDE); break;
3578 case Asse_MIN16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEA); break;
3579 case Asse_MIN8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDA); break;
3580 case Asse_MULHI16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE4); break;
3581 case Asse_MULHI16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE5); break;
3582 case Asse_MUL16: XX(0x66); XX(rex); XX(0x0F); XX(0xD5); break;
3583 case Asse_SHL16: XX(0x66); XX(rex); XX(0x0F); XX(0xF1); break;
3584 case Asse_SHL32: XX(0x66); XX(rex); XX(0x0F); XX(0xF2); break;
3585 case Asse_SHL64: XX(0x66); XX(rex); XX(0x0F); XX(0xF3); break;
3586 case Asse_SAR16: XX(0x66); XX(rex); XX(0x0F); XX(0xE1); break;
3587 case Asse_SAR32: XX(0x66); XX(rex); XX(0x0F); XX(0xE2); break;
3588 case Asse_SHR16: XX(0x66); XX(rex); XX(0x0F); XX(0xD1); break;
3589 case Asse_SHR32: XX(0x66); XX(rex); XX(0x0F); XX(0xD2); break;
3590 case Asse_SHR64: XX(0x66); XX(rex); XX(0x0F); XX(0xD3); break;
3591 case Asse_SUB8: XX(0x66); XX(rex); XX(0x0F); XX(0xF8); break;
3592 case Asse_SUB16: XX(0x66); XX(rex); XX(0x0F); XX(0xF9); break;
3593 case Asse_SUB32: XX(0x66); XX(rex); XX(0x0F); XX(0xFA); break;
3594 case Asse_SUB64: XX(0x66); XX(rex); XX(0x0F); XX(0xFB); break;
3595 case Asse_QSUB8S: XX(0x66); XX(rex); XX(0x0F); XX(0xE8); break;
3596 case Asse_QSUB16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE9); break;
3597 case Asse_QSUB8U: XX(0x66); XX(rex); XX(0x0F); XX(0xD8); break;
3598 case Asse_QSUB16U: XX(0x66); XX(rex); XX(0x0F); XX(0xD9); break;
3599 case Asse_UNPCKHB: XX(0x66); XX(rex); XX(0x0F); XX(0x68); break;
3600 case Asse_UNPCKHW: XX(0x66); XX(rex); XX(0x0F); XX(0x69); break;
3601 case Asse_UNPCKHD: XX(0x66); XX(rex); XX(0x0F); XX(0x6A); break;
3602 case Asse_UNPCKHQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6D); break;
3603 case Asse_UNPCKLB: XX(0x66); XX(rex); XX(0x0F); XX(0x60); break;
3604 case Asse_UNPCKLW: XX(0x66); XX(rex); XX(0x0F); XX(0x61); break;
3605 case Asse_UNPCKLD: XX(0x66); XX(rex); XX(0x0F); XX(0x62); break;
3606 case Asse_UNPCKLQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6C); break;
3607 default: goto bad;
3608 }
3609 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseReRg.dst),
3610 vregEnc3210(i->Ain.SseReRg.src) );
3611 # undef XX
3612 goto done;
3613
3614 case Ain_SseCMov:
3615 /* jmp fwds if !condition */
3616 *p++ = toUChar(0x70 + (i->Ain.SseCMov.cond ^ 1));
3617 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
3618 ptmp = p;
3619
3620 /* movaps %src, %dst */
3621 *p++ = clearWBit(
3622 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseCMov.dst),
3623 vregEnc3210(i->Ain.SseCMov.src) ));
3624 *p++ = 0x0F;
3625 *p++ = 0x28;
3626 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseCMov.dst),
3627 vregEnc3210(i->Ain.SseCMov.src) );
3628
3629 /* Fill in the jump offset. */
3630 *(ptmp-1) = toUChar(p - ptmp);
3631 goto done;
3632
3633 case Ain_SseShuf:
3634 *p++ = 0x66;
3635 *p++ = clearWBit(
3636 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseShuf.dst),
3637 vregEnc3210(i->Ain.SseShuf.src) ));
3638 *p++ = 0x0F;
3639 *p++ = 0x70;
3640 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseShuf.dst),
3641 vregEnc3210(i->Ain.SseShuf.src) );
3642 *p++ = (UChar)(i->Ain.SseShuf.order);
3643 goto done;
3644
3645 //uu case Ain_AvxLdSt: {
3646 //uu UInt vex = vexAMode_M( dvreg2ireg(i->Ain.AvxLdSt.reg),
3647 //uu i->Ain.AvxLdSt.addr );
3648 //uu p = emitVexPrefix(p, vex);
3649 //uu *p++ = toUChar(i->Ain.AvxLdSt.isLoad ? 0x10 : 0x11);
3650 //uu p = doAMode_M(p, dvreg2ireg(i->Ain.AvxLdSt.reg), i->Ain.AvxLdSt.addr);
3651 //uu goto done;
3652 //uu }
3653
3654 case Ain_EvCheck: {
3655 /* We generate:
3656 (3 bytes) decl 8(%rbp) 8 == offsetof(host_EvC_COUNTER)
3657 (2 bytes) jns nofail expected taken
3658 (3 bytes) jmp* 0(%rbp) 0 == offsetof(host_EvC_FAILADDR)
3659 nofail:
3660 */
3661 /* This is heavily asserted re instruction lengths. It needs to
3662 be. If we get given unexpected forms of .amCounter or
3663 .amFailAddr -- basically, anything that's not of the form
3664 uimm7(%rbp) -- they are likely to fail. */
3665 /* Note also that after the decl we must be very careful not to
3666 read the carry flag, else we get a partial flags stall.
3667 js/jns avoids that, though. */
3668 UChar* p0 = p;
3669 /* --- decl 8(%rbp) --- */
3670 /* Need to compute the REX byte for the decl in order to prove
3671 that we don't need it, since this is a 32-bit inc and all
3672 registers involved in the amode are < r8. "1" because
3673 there's no register in this encoding; instead the register
3674 field is used as a sub opcode. The encoding for "decl r/m32"
3675 is FF /1, hence the "1". */
3676 rex = clearWBit(rexAMode_M_enc(1, i->Ain.EvCheck.amCounter));
3677 if (rex != 0x40) goto bad; /* We don't expect to need the REX byte. */
3678 *p++ = 0xFF;
3679 p = doAMode_M_enc(p, 1, i->Ain.EvCheck.amCounter);
3680 vassert(p - p0 == 3);
3681 /* --- jns nofail --- */
3682 *p++ = 0x79;
3683 *p++ = 0x03; /* need to check this 0x03 after the next insn */
3684 vassert(p - p0 == 5);
3685 /* --- jmp* 0(%rbp) --- */
3686 /* Once again, verify we don't need REX. The encoding is FF /4.
3687 We don't need REX.W since by default FF /4 in 64-bit mode
3688 implies a 64 bit load. */
3689 rex = clearWBit(rexAMode_M_enc(4, i->Ain.EvCheck.amFailAddr));
3690 if (rex != 0x40) goto bad;
3691 *p++ = 0xFF;
3692 p = doAMode_M_enc(p, 4, i->Ain.EvCheck.amFailAddr);
3693 vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
3694 /* And crosscheck .. */
3695 vassert(evCheckSzB_AMD64() == 8);
3696 goto done;
3697 }
3698
3699 case Ain_ProfInc: {
3700 /* We generate movabsq $0, %r11
3701 incq (%r11)
3702 in the expectation that a later call to LibVEX_patchProfCtr
3703 will be used to fill in the immediate field once the right
3704 value is known.
3705 49 BB 00 00 00 00 00 00 00 00
3706 49 FF 03
3707 */
3708 *p++ = 0x49; *p++ = 0xBB;
3709 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3710 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3711 *p++ = 0x49; *p++ = 0xFF; *p++ = 0x03;
3712 /* Tell the caller .. */
3713 vassert(!(*is_profInc));
3714 *is_profInc = True;
3715 goto done;
3716 }
3717
3718 default:
3719 goto bad;
3720 }
3721
3722 bad:
3723 ppAMD64Instr(i, mode64);
3724 vpanic("emit_AMD64Instr");
3725 /*NOTREACHED*/
3726
3727 done:
3728 vassert(p - &buf[0] <= 32);
3729 return p - &buf[0];
3730 }
3731
3732
3733 /* How big is an event check? See case for Ain_EvCheck in
3734 emit_AMD64Instr just above. That crosschecks what this returns, so
3735 we can tell if we're inconsistent. */
evCheckSzB_AMD64(void)3736 Int evCheckSzB_AMD64 (void)
3737 {
3738 return 8;
3739 }
3740
3741
3742 /* NB: what goes on here has to be very closely coordinated with the
3743 emitInstr case for XDirect, above. */
chainXDirect_AMD64(VexEndness endness_host,void * place_to_chain,const void * disp_cp_chain_me_EXPECTED,const void * place_to_jump_to)3744 VexInvalRange chainXDirect_AMD64 ( VexEndness endness_host,
3745 void* place_to_chain,
3746 const void* disp_cp_chain_me_EXPECTED,
3747 const void* place_to_jump_to )
3748 {
3749 vassert(endness_host == VexEndnessLE);
3750
3751 /* What we're expecting to see is:
3752 movabsq $disp_cp_chain_me_EXPECTED, %r11
3753 call *%r11
3754 viz
3755 49 BB <8 bytes value == disp_cp_chain_me_EXPECTED>
3756 41 FF D3
3757 */
3758 UChar* p = (UChar*)place_to_chain;
3759 vassert(p[0] == 0x49);
3760 vassert(p[1] == 0xBB);
3761 vassert(*(Addr*)(&p[2]) == (Addr)disp_cp_chain_me_EXPECTED);
3762 vassert(p[10] == 0x41);
3763 vassert(p[11] == 0xFF);
3764 vassert(p[12] == 0xD3);
3765 /* And what we want to change it to is either:
3766 (general case):
3767 movabsq $place_to_jump_to, %r11
3768 jmpq *%r11
3769 viz
3770 49 BB <8 bytes value == place_to_jump_to>
3771 41 FF E3
3772 So it's the same length (convenient, huh) and we don't
3773 need to change all the bits.
3774 ---OR---
3775 in the case where the displacement falls within 32 bits
3776 jmpq disp32 where disp32 is relative to the next insn
3777 ud2; ud2; ud2; ud2
3778 viz
3779 E9 <4 bytes == disp32>
3780 0F 0B 0F 0B 0F 0B 0F 0B
3781
3782 In both cases the replacement has the same length as the original.
3783 To remain sane & verifiable,
3784 (1) limit the displacement for the short form to
3785 (say) +/- one billion, so as to avoid wraparound
3786 off-by-ones
3787 (2) even if the short form is applicable, once every (say)
3788 1024 times use the long form anyway, so as to maintain
3789 verifiability
3790 */
3791 /* This is the delta we need to put into a JMP d32 insn. It's
3792 relative to the start of the next insn, hence the -5. */
3793 Long delta = (Long)((const UChar *)place_to_jump_to - (const UChar*)p) - 5;
3794 Bool shortOK = delta >= -1000*1000*1000 && delta < 1000*1000*1000;
3795
3796 static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
3797 if (shortOK) {
3798 shortCTR++; // thread safety bleh
3799 if (0 == (shortCTR & 0x3FF)) {
3800 shortOK = False;
3801 if (0)
3802 vex_printf("QQQ chainXDirect_AMD64: shortCTR = %u, "
3803 "using long jmp\n", shortCTR);
3804 }
3805 }
3806
3807 /* And make the modifications. */
3808 if (shortOK) {
3809 p[0] = 0xE9;
3810 p[1] = (delta >> 0) & 0xFF;
3811 p[2] = (delta >> 8) & 0xFF;
3812 p[3] = (delta >> 16) & 0xFF;
3813 p[4] = (delta >> 24) & 0xFF;
3814 p[5] = 0x0F; p[6] = 0x0B;
3815 p[7] = 0x0F; p[8] = 0x0B;
3816 p[9] = 0x0F; p[10] = 0x0B;
3817 p[11] = 0x0F; p[12] = 0x0B;
3818 /* sanity check on the delta -- top 32 are all 0 or all 1 */
3819 delta >>= 32;
3820 vassert(delta == 0LL || delta == -1LL);
3821 } else {
3822 /* Minimal modifications from the starting sequence. */
3823 *(Addr*)(&p[2]) = (Addr)place_to_jump_to;
3824 p[12] = 0xE3;
3825 }
3826 VexInvalRange vir = { (HWord)place_to_chain, 13 };
3827 return vir;
3828 }
3829
3830
3831 /* NB: what goes on here has to be very closely coordinated with the
3832 emitInstr case for XDirect, above. */
unchainXDirect_AMD64(VexEndness endness_host,void * place_to_unchain,const void * place_to_jump_to_EXPECTED,const void * disp_cp_chain_me)3833 VexInvalRange unchainXDirect_AMD64 ( VexEndness endness_host,
3834 void* place_to_unchain,
3835 const void* place_to_jump_to_EXPECTED,
3836 const void* disp_cp_chain_me )
3837 {
3838 vassert(endness_host == VexEndnessLE);
3839
3840 /* What we're expecting to see is either:
3841 (general case)
3842 movabsq $place_to_jump_to_EXPECTED, %r11
3843 jmpq *%r11
3844 viz
3845 49 BB <8 bytes value == place_to_jump_to_EXPECTED>
3846 41 FF E3
3847 ---OR---
3848 in the case where the displacement falls within 32 bits
3849 jmpq d32
3850 ud2; ud2; ud2; ud2
3851 viz
3852 E9 <4 bytes == disp32>
3853 0F 0B 0F 0B 0F 0B 0F 0B
3854 */
3855 UChar* p = (UChar*)place_to_unchain;
3856 Bool valid = False;
3857 if (p[0] == 0x49 && p[1] == 0xBB
3858 && *(Addr*)(&p[2]) == (Addr)place_to_jump_to_EXPECTED
3859 && p[10] == 0x41 && p[11] == 0xFF && p[12] == 0xE3) {
3860 /* it's the long form */
3861 valid = True;
3862 }
3863 else
3864 if (p[0] == 0xE9
3865 && p[5] == 0x0F && p[6] == 0x0B
3866 && p[7] == 0x0F && p[8] == 0x0B
3867 && p[9] == 0x0F && p[10] == 0x0B
3868 && p[11] == 0x0F && p[12] == 0x0B) {
3869 /* It's the short form. Check the offset is right. */
3870 Int s32 = *(Int*)(&p[1]);
3871 Long s64 = (Long)s32;
3872 if ((UChar*)p + 5 + s64 == place_to_jump_to_EXPECTED) {
3873 valid = True;
3874 if (0)
3875 vex_printf("QQQ unchainXDirect_AMD64: found short form\n");
3876 }
3877 }
3878 vassert(valid);
3879 /* And what we want to change it to is:
3880 movabsq $disp_cp_chain_me, %r11
3881 call *%r11
3882 viz
3883 49 BB <8 bytes value == disp_cp_chain_me>
3884 41 FF D3
3885 So it's the same length (convenient, huh).
3886 */
3887 p[0] = 0x49;
3888 p[1] = 0xBB;
3889 *(Addr*)(&p[2]) = (Addr)disp_cp_chain_me;
3890 p[10] = 0x41;
3891 p[11] = 0xFF;
3892 p[12] = 0xD3;
3893 VexInvalRange vir = { (HWord)place_to_unchain, 13 };
3894 return vir;
3895 }
3896
3897
3898 /* Patch the counter address into a profile inc point, as previously
3899 created by the Ain_ProfInc case for emit_AMD64Instr. */
patchProfInc_AMD64(VexEndness endness_host,void * place_to_patch,const ULong * location_of_counter)3900 VexInvalRange patchProfInc_AMD64 ( VexEndness endness_host,
3901 void* place_to_patch,
3902 const ULong* location_of_counter )
3903 {
3904 vassert(endness_host == VexEndnessLE);
3905 vassert(sizeof(ULong*) == 8);
3906 UChar* p = (UChar*)place_to_patch;
3907 vassert(p[0] == 0x49);
3908 vassert(p[1] == 0xBB);
3909 vassert(p[2] == 0x00);
3910 vassert(p[3] == 0x00);
3911 vassert(p[4] == 0x00);
3912 vassert(p[5] == 0x00);
3913 vassert(p[6] == 0x00);
3914 vassert(p[7] == 0x00);
3915 vassert(p[8] == 0x00);
3916 vassert(p[9] == 0x00);
3917 vassert(p[10] == 0x49);
3918 vassert(p[11] == 0xFF);
3919 vassert(p[12] == 0x03);
3920 ULong imm64 = (ULong)(Addr)location_of_counter;
3921 p[2] = imm64 & 0xFF; imm64 >>= 8;
3922 p[3] = imm64 & 0xFF; imm64 >>= 8;
3923 p[4] = imm64 & 0xFF; imm64 >>= 8;
3924 p[5] = imm64 & 0xFF; imm64 >>= 8;
3925 p[6] = imm64 & 0xFF; imm64 >>= 8;
3926 p[7] = imm64 & 0xFF; imm64 >>= 8;
3927 p[8] = imm64 & 0xFF; imm64 >>= 8;
3928 p[9] = imm64 & 0xFF; imm64 >>= 8;
3929 VexInvalRange vir = { (HWord)place_to_patch, 13 };
3930 return vir;
3931 }
3932
3933
3934 /*---------------------------------------------------------------*/
3935 /*--- end host_amd64_defs.c ---*/
3936 /*---------------------------------------------------------------*/
3937