1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_amd64_defs.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2015 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38 #include "libvex_trc_values.h"
39
40 #include "main_util.h"
41 #include "host_generic_regs.h"
42 #include "host_amd64_defs.h"
43
44
45 /* --------- Registers. --------- */
46
getRRegUniverse_AMD64(void)47 const RRegUniverse* getRRegUniverse_AMD64 ( void )
48 {
49 /* The real-register universe is a big constant, so we just want to
50 initialise it once. */
51 static RRegUniverse rRegUniverse_AMD64;
52 static Bool rRegUniverse_AMD64_initted = False;
53
54 /* Handy shorthand, nothing more */
55 RRegUniverse* ru = &rRegUniverse_AMD64;
56
57 /* This isn't thread-safe. Sigh. */
58 if (LIKELY(rRegUniverse_AMD64_initted))
59 return ru;
60
61 RRegUniverse__init(ru);
62
63 /* Add the registers. The initial segment of this array must be
64 those available for allocation by reg-alloc, and those that
65 follow are not available for allocation. */
66 ru->regs[ru->size++] = hregAMD64_RSI();
67 ru->regs[ru->size++] = hregAMD64_RDI();
68 ru->regs[ru->size++] = hregAMD64_R8();
69 ru->regs[ru->size++] = hregAMD64_R9();
70 ru->regs[ru->size++] = hregAMD64_R12();
71 ru->regs[ru->size++] = hregAMD64_R13();
72 ru->regs[ru->size++] = hregAMD64_R14();
73 ru->regs[ru->size++] = hregAMD64_R15();
74 ru->regs[ru->size++] = hregAMD64_RBX();
75 ru->regs[ru->size++] = hregAMD64_XMM3();
76 ru->regs[ru->size++] = hregAMD64_XMM4();
77 ru->regs[ru->size++] = hregAMD64_XMM5();
78 ru->regs[ru->size++] = hregAMD64_XMM6();
79 ru->regs[ru->size++] = hregAMD64_XMM7();
80 ru->regs[ru->size++] = hregAMD64_XMM8();
81 ru->regs[ru->size++] = hregAMD64_XMM9();
82 ru->regs[ru->size++] = hregAMD64_XMM10();
83 ru->regs[ru->size++] = hregAMD64_XMM11();
84 ru->regs[ru->size++] = hregAMD64_XMM12();
85 ru->regs[ru->size++] = hregAMD64_R10();
86 ru->allocable = ru->size;
87 /* And other regs, not available to the allocator. */
88 ru->regs[ru->size++] = hregAMD64_RAX();
89 ru->regs[ru->size++] = hregAMD64_RCX();
90 ru->regs[ru->size++] = hregAMD64_RDX();
91 ru->regs[ru->size++] = hregAMD64_RSP();
92 ru->regs[ru->size++] = hregAMD64_RBP();
93 ru->regs[ru->size++] = hregAMD64_R11();
94 ru->regs[ru->size++] = hregAMD64_XMM0();
95 ru->regs[ru->size++] = hregAMD64_XMM1();
96
97 rRegUniverse_AMD64_initted = True;
98
99 RRegUniverse__check_is_sane(ru);
100 return ru;
101 }
102
103
ppHRegAMD64(HReg reg)104 void ppHRegAMD64 ( HReg reg )
105 {
106 Int r;
107 static const HChar* ireg64_names[16]
108 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
109 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
110 /* Be generic for all virtual regs. */
111 if (hregIsVirtual(reg)) {
112 ppHReg(reg);
113 return;
114 }
115 /* But specific for real regs. */
116 switch (hregClass(reg)) {
117 case HRcInt64:
118 r = hregEncoding(reg);
119 vassert(r >= 0 && r < 16);
120 vex_printf("%s", ireg64_names[r]);
121 return;
122 case HRcVec128:
123 r = hregEncoding(reg);
124 vassert(r >= 0 && r < 16);
125 vex_printf("%%xmm%d", r);
126 return;
127 default:
128 vpanic("ppHRegAMD64");
129 }
130 }
131
ppHRegAMD64_lo32(HReg reg)132 static void ppHRegAMD64_lo32 ( HReg reg )
133 {
134 Int r;
135 static const HChar* ireg32_names[16]
136 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
137 "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" };
138 /* Be generic for all virtual regs. */
139 if (hregIsVirtual(reg)) {
140 ppHReg(reg);
141 vex_printf("d");
142 return;
143 }
144 /* But specific for real regs. */
145 switch (hregClass(reg)) {
146 case HRcInt64:
147 r = hregEncoding(reg);
148 vassert(r >= 0 && r < 16);
149 vex_printf("%s", ireg32_names[r]);
150 return;
151 default:
152 vpanic("ppHRegAMD64_lo32: invalid regclass");
153 }
154 }
155
156
157 /* --------- Condition codes, Intel encoding. --------- */
158
showAMD64CondCode(AMD64CondCode cond)159 const HChar* showAMD64CondCode ( AMD64CondCode cond )
160 {
161 switch (cond) {
162 case Acc_O: return "o";
163 case Acc_NO: return "no";
164 case Acc_B: return "b";
165 case Acc_NB: return "nb";
166 case Acc_Z: return "z";
167 case Acc_NZ: return "nz";
168 case Acc_BE: return "be";
169 case Acc_NBE: return "nbe";
170 case Acc_S: return "s";
171 case Acc_NS: return "ns";
172 case Acc_P: return "p";
173 case Acc_NP: return "np";
174 case Acc_L: return "l";
175 case Acc_NL: return "nl";
176 case Acc_LE: return "le";
177 case Acc_NLE: return "nle";
178 case Acc_ALWAYS: return "ALWAYS";
179 default: vpanic("ppAMD64CondCode");
180 }
181 }
182
183
184 /* --------- AMD64AMode: memory address expressions. --------- */
185
AMD64AMode_IR(UInt imm32,HReg reg)186 AMD64AMode* AMD64AMode_IR ( UInt imm32, HReg reg ) {
187 AMD64AMode* am = LibVEX_Alloc_inline(sizeof(AMD64AMode));
188 am->tag = Aam_IR;
189 am->Aam.IR.imm = imm32;
190 am->Aam.IR.reg = reg;
191 return am;
192 }
AMD64AMode_IRRS(UInt imm32,HReg base,HReg indEx,Int shift)193 AMD64AMode* AMD64AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
194 AMD64AMode* am = LibVEX_Alloc_inline(sizeof(AMD64AMode));
195 am->tag = Aam_IRRS;
196 am->Aam.IRRS.imm = imm32;
197 am->Aam.IRRS.base = base;
198 am->Aam.IRRS.index = indEx;
199 am->Aam.IRRS.shift = shift;
200 vassert(shift >= 0 && shift <= 3);
201 return am;
202 }
203
ppAMD64AMode(AMD64AMode * am)204 void ppAMD64AMode ( AMD64AMode* am ) {
205 switch (am->tag) {
206 case Aam_IR:
207 if (am->Aam.IR.imm == 0)
208 vex_printf("(");
209 else
210 vex_printf("0x%x(", am->Aam.IR.imm);
211 ppHRegAMD64(am->Aam.IR.reg);
212 vex_printf(")");
213 return;
214 case Aam_IRRS:
215 vex_printf("0x%x(", am->Aam.IRRS.imm);
216 ppHRegAMD64(am->Aam.IRRS.base);
217 vex_printf(",");
218 ppHRegAMD64(am->Aam.IRRS.index);
219 vex_printf(",%d)", 1 << am->Aam.IRRS.shift);
220 return;
221 default:
222 vpanic("ppAMD64AMode");
223 }
224 }
225
addRegUsage_AMD64AMode(HRegUsage * u,AMD64AMode * am)226 static void addRegUsage_AMD64AMode ( HRegUsage* u, AMD64AMode* am ) {
227 switch (am->tag) {
228 case Aam_IR:
229 addHRegUse(u, HRmRead, am->Aam.IR.reg);
230 return;
231 case Aam_IRRS:
232 addHRegUse(u, HRmRead, am->Aam.IRRS.base);
233 addHRegUse(u, HRmRead, am->Aam.IRRS.index);
234 return;
235 default:
236 vpanic("addRegUsage_AMD64AMode");
237 }
238 }
239
mapRegs_AMD64AMode(HRegRemap * m,AMD64AMode * am)240 static void mapRegs_AMD64AMode ( HRegRemap* m, AMD64AMode* am ) {
241 switch (am->tag) {
242 case Aam_IR:
243 am->Aam.IR.reg = lookupHRegRemap(m, am->Aam.IR.reg);
244 return;
245 case Aam_IRRS:
246 am->Aam.IRRS.base = lookupHRegRemap(m, am->Aam.IRRS.base);
247 am->Aam.IRRS.index = lookupHRegRemap(m, am->Aam.IRRS.index);
248 return;
249 default:
250 vpanic("mapRegs_AMD64AMode");
251 }
252 }
253
254 /* --------- Operand, which can be reg, immediate or memory. --------- */
255
AMD64RMI_Imm(UInt imm32)256 AMD64RMI* AMD64RMI_Imm ( UInt imm32 ) {
257 AMD64RMI* op = LibVEX_Alloc_inline(sizeof(AMD64RMI));
258 op->tag = Armi_Imm;
259 op->Armi.Imm.imm32 = imm32;
260 return op;
261 }
AMD64RMI_Reg(HReg reg)262 AMD64RMI* AMD64RMI_Reg ( HReg reg ) {
263 AMD64RMI* op = LibVEX_Alloc_inline(sizeof(AMD64RMI));
264 op->tag = Armi_Reg;
265 op->Armi.Reg.reg = reg;
266 return op;
267 }
AMD64RMI_Mem(AMD64AMode * am)268 AMD64RMI* AMD64RMI_Mem ( AMD64AMode* am ) {
269 AMD64RMI* op = LibVEX_Alloc_inline(sizeof(AMD64RMI));
270 op->tag = Armi_Mem;
271 op->Armi.Mem.am = am;
272 return op;
273 }
274
ppAMD64RMI_wrk(AMD64RMI * op,Bool lo32)275 static void ppAMD64RMI_wrk ( AMD64RMI* op, Bool lo32 ) {
276 switch (op->tag) {
277 case Armi_Imm:
278 vex_printf("$0x%x", op->Armi.Imm.imm32);
279 return;
280 case Armi_Reg:
281 if (lo32)
282 ppHRegAMD64_lo32(op->Armi.Reg.reg);
283 else
284 ppHRegAMD64(op->Armi.Reg.reg);
285 return;
286 case Armi_Mem:
287 ppAMD64AMode(op->Armi.Mem.am);
288 return;
289 default:
290 vpanic("ppAMD64RMI");
291 }
292 }
ppAMD64RMI(AMD64RMI * op)293 void ppAMD64RMI ( AMD64RMI* op ) {
294 ppAMD64RMI_wrk(op, False/*!lo32*/);
295 }
ppAMD64RMI_lo32(AMD64RMI * op)296 void ppAMD64RMI_lo32 ( AMD64RMI* op ) {
297 ppAMD64RMI_wrk(op, True/*lo32*/);
298 }
299
300 /* An AMD64RMI can only be used in a "read" context (what would it mean
301 to write or modify a literal?) and so we enumerate its registers
302 accordingly. */
addRegUsage_AMD64RMI(HRegUsage * u,AMD64RMI * op)303 static void addRegUsage_AMD64RMI ( HRegUsage* u, AMD64RMI* op ) {
304 switch (op->tag) {
305 case Armi_Imm:
306 return;
307 case Armi_Reg:
308 addHRegUse(u, HRmRead, op->Armi.Reg.reg);
309 return;
310 case Armi_Mem:
311 addRegUsage_AMD64AMode(u, op->Armi.Mem.am);
312 return;
313 default:
314 vpanic("addRegUsage_AMD64RMI");
315 }
316 }
317
mapRegs_AMD64RMI(HRegRemap * m,AMD64RMI * op)318 static void mapRegs_AMD64RMI ( HRegRemap* m, AMD64RMI* op ) {
319 switch (op->tag) {
320 case Armi_Imm:
321 return;
322 case Armi_Reg:
323 op->Armi.Reg.reg = lookupHRegRemap(m, op->Armi.Reg.reg);
324 return;
325 case Armi_Mem:
326 mapRegs_AMD64AMode(m, op->Armi.Mem.am);
327 return;
328 default:
329 vpanic("mapRegs_AMD64RMI");
330 }
331 }
332
333
334 /* --------- Operand, which can be reg or immediate only. --------- */
335
AMD64RI_Imm(UInt imm32)336 AMD64RI* AMD64RI_Imm ( UInt imm32 ) {
337 AMD64RI* op = LibVEX_Alloc_inline(sizeof(AMD64RI));
338 op->tag = Ari_Imm;
339 op->Ari.Imm.imm32 = imm32;
340 return op;
341 }
AMD64RI_Reg(HReg reg)342 AMD64RI* AMD64RI_Reg ( HReg reg ) {
343 AMD64RI* op = LibVEX_Alloc_inline(sizeof(AMD64RI));
344 op->tag = Ari_Reg;
345 op->Ari.Reg.reg = reg;
346 return op;
347 }
348
ppAMD64RI(AMD64RI * op)349 void ppAMD64RI ( AMD64RI* op ) {
350 switch (op->tag) {
351 case Ari_Imm:
352 vex_printf("$0x%x", op->Ari.Imm.imm32);
353 return;
354 case Ari_Reg:
355 ppHRegAMD64(op->Ari.Reg.reg);
356 return;
357 default:
358 vpanic("ppAMD64RI");
359 }
360 }
361
362 /* An AMD64RI can only be used in a "read" context (what would it mean
363 to write or modify a literal?) and so we enumerate its registers
364 accordingly. */
addRegUsage_AMD64RI(HRegUsage * u,AMD64RI * op)365 static void addRegUsage_AMD64RI ( HRegUsage* u, AMD64RI* op ) {
366 switch (op->tag) {
367 case Ari_Imm:
368 return;
369 case Ari_Reg:
370 addHRegUse(u, HRmRead, op->Ari.Reg.reg);
371 return;
372 default:
373 vpanic("addRegUsage_AMD64RI");
374 }
375 }
376
mapRegs_AMD64RI(HRegRemap * m,AMD64RI * op)377 static void mapRegs_AMD64RI ( HRegRemap* m, AMD64RI* op ) {
378 switch (op->tag) {
379 case Ari_Imm:
380 return;
381 case Ari_Reg:
382 op->Ari.Reg.reg = lookupHRegRemap(m, op->Ari.Reg.reg);
383 return;
384 default:
385 vpanic("mapRegs_AMD64RI");
386 }
387 }
388
389
390 /* --------- Operand, which can be reg or memory only. --------- */
391
AMD64RM_Reg(HReg reg)392 AMD64RM* AMD64RM_Reg ( HReg reg ) {
393 AMD64RM* op = LibVEX_Alloc_inline(sizeof(AMD64RM));
394 op->tag = Arm_Reg;
395 op->Arm.Reg.reg = reg;
396 return op;
397 }
AMD64RM_Mem(AMD64AMode * am)398 AMD64RM* AMD64RM_Mem ( AMD64AMode* am ) {
399 AMD64RM* op = LibVEX_Alloc_inline(sizeof(AMD64RM));
400 op->tag = Arm_Mem;
401 op->Arm.Mem.am = am;
402 return op;
403 }
404
ppAMD64RM(AMD64RM * op)405 void ppAMD64RM ( AMD64RM* op ) {
406 switch (op->tag) {
407 case Arm_Mem:
408 ppAMD64AMode(op->Arm.Mem.am);
409 return;
410 case Arm_Reg:
411 ppHRegAMD64(op->Arm.Reg.reg);
412 return;
413 default:
414 vpanic("ppAMD64RM");
415 }
416 }
417
418 /* Because an AMD64RM can be both a source or destination operand, we
419 have to supply a mode -- pertaining to the operand as a whole --
420 indicating how it's being used. */
addRegUsage_AMD64RM(HRegUsage * u,AMD64RM * op,HRegMode mode)421 static void addRegUsage_AMD64RM ( HRegUsage* u, AMD64RM* op, HRegMode mode ) {
422 switch (op->tag) {
423 case Arm_Mem:
424 /* Memory is read, written or modified. So we just want to
425 know the regs read by the amode. */
426 addRegUsage_AMD64AMode(u, op->Arm.Mem.am);
427 return;
428 case Arm_Reg:
429 /* reg is read, written or modified. Add it in the
430 appropriate way. */
431 addHRegUse(u, mode, op->Arm.Reg.reg);
432 return;
433 default:
434 vpanic("addRegUsage_AMD64RM");
435 }
436 }
437
mapRegs_AMD64RM(HRegRemap * m,AMD64RM * op)438 static void mapRegs_AMD64RM ( HRegRemap* m, AMD64RM* op )
439 {
440 switch (op->tag) {
441 case Arm_Mem:
442 mapRegs_AMD64AMode(m, op->Arm.Mem.am);
443 return;
444 case Arm_Reg:
445 op->Arm.Reg.reg = lookupHRegRemap(m, op->Arm.Reg.reg);
446 return;
447 default:
448 vpanic("mapRegs_AMD64RM");
449 }
450 }
451
452
453 /* --------- Instructions. --------- */
454
showAMD64ScalarSz(Int sz)455 static const HChar* showAMD64ScalarSz ( Int sz ) {
456 switch (sz) {
457 case 2: return "w";
458 case 4: return "l";
459 case 8: return "q";
460 default: vpanic("showAMD64ScalarSz");
461 }
462 }
463
showAMD64UnaryOp(AMD64UnaryOp op)464 const HChar* showAMD64UnaryOp ( AMD64UnaryOp op ) {
465 switch (op) {
466 case Aun_NOT: return "not";
467 case Aun_NEG: return "neg";
468 default: vpanic("showAMD64UnaryOp");
469 }
470 }
471
showAMD64AluOp(AMD64AluOp op)472 const HChar* showAMD64AluOp ( AMD64AluOp op ) {
473 switch (op) {
474 case Aalu_MOV: return "mov";
475 case Aalu_CMP: return "cmp";
476 case Aalu_ADD: return "add";
477 case Aalu_SUB: return "sub";
478 case Aalu_ADC: return "adc";
479 case Aalu_SBB: return "sbb";
480 case Aalu_AND: return "and";
481 case Aalu_OR: return "or";
482 case Aalu_XOR: return "xor";
483 case Aalu_MUL: return "imul";
484 default: vpanic("showAMD64AluOp");
485 }
486 }
487
showAMD64ShiftOp(AMD64ShiftOp op)488 const HChar* showAMD64ShiftOp ( AMD64ShiftOp op ) {
489 switch (op) {
490 case Ash_SHL: return "shl";
491 case Ash_SHR: return "shr";
492 case Ash_SAR: return "sar";
493 default: vpanic("showAMD64ShiftOp");
494 }
495 }
496
showA87FpOp(A87FpOp op)497 const HChar* showA87FpOp ( A87FpOp op ) {
498 switch (op) {
499 case Afp_SCALE: return "scale";
500 case Afp_ATAN: return "atan";
501 case Afp_YL2X: return "yl2x";
502 case Afp_YL2XP1: return "yl2xp1";
503 case Afp_PREM: return "prem";
504 case Afp_PREM1: return "prem1";
505 case Afp_SQRT: return "sqrt";
506 case Afp_SIN: return "sin";
507 case Afp_COS: return "cos";
508 case Afp_TAN: return "tan";
509 case Afp_ROUND: return "round";
510 case Afp_2XM1: return "2xm1";
511 default: vpanic("showA87FpOp");
512 }
513 }
514
showAMD64SseOp(AMD64SseOp op)515 const HChar* showAMD64SseOp ( AMD64SseOp op ) {
516 switch (op) {
517 case Asse_MOV: return "movups";
518 case Asse_ADDF: return "add";
519 case Asse_SUBF: return "sub";
520 case Asse_MULF: return "mul";
521 case Asse_DIVF: return "div";
522 case Asse_MAXF: return "max";
523 case Asse_MINF: return "min";
524 case Asse_CMPEQF: return "cmpFeq";
525 case Asse_CMPLTF: return "cmpFlt";
526 case Asse_CMPLEF: return "cmpFle";
527 case Asse_CMPUNF: return "cmpFun";
528 case Asse_RCPF: return "rcp";
529 case Asse_RSQRTF: return "rsqrt";
530 case Asse_SQRTF: return "sqrt";
531 case Asse_AND: return "and";
532 case Asse_OR: return "or";
533 case Asse_XOR: return "xor";
534 case Asse_ANDN: return "andn";
535 case Asse_ADD8: return "paddb";
536 case Asse_ADD16: return "paddw";
537 case Asse_ADD32: return "paddd";
538 case Asse_ADD64: return "paddq";
539 case Asse_QADD8U: return "paddusb";
540 case Asse_QADD16U: return "paddusw";
541 case Asse_QADD8S: return "paddsb";
542 case Asse_QADD16S: return "paddsw";
543 case Asse_SUB8: return "psubb";
544 case Asse_SUB16: return "psubw";
545 case Asse_SUB32: return "psubd";
546 case Asse_SUB64: return "psubq";
547 case Asse_QSUB8U: return "psubusb";
548 case Asse_QSUB16U: return "psubusw";
549 case Asse_QSUB8S: return "psubsb";
550 case Asse_QSUB16S: return "psubsw";
551 case Asse_MUL16: return "pmullw";
552 case Asse_MULHI16U: return "pmulhuw";
553 case Asse_MULHI16S: return "pmulhw";
554 case Asse_AVG8U: return "pavgb";
555 case Asse_AVG16U: return "pavgw";
556 case Asse_MAX16S: return "pmaxw";
557 case Asse_MAX8U: return "pmaxub";
558 case Asse_MIN16S: return "pminw";
559 case Asse_MIN8U: return "pminub";
560 case Asse_CMPEQ8: return "pcmpeqb";
561 case Asse_CMPEQ16: return "pcmpeqw";
562 case Asse_CMPEQ32: return "pcmpeqd";
563 case Asse_CMPGT8S: return "pcmpgtb";
564 case Asse_CMPGT16S: return "pcmpgtw";
565 case Asse_CMPGT32S: return "pcmpgtd";
566 case Asse_SHL16: return "psllw";
567 case Asse_SHL32: return "pslld";
568 case Asse_SHL64: return "psllq";
569 case Asse_SHR16: return "psrlw";
570 case Asse_SHR32: return "psrld";
571 case Asse_SHR64: return "psrlq";
572 case Asse_SAR16: return "psraw";
573 case Asse_SAR32: return "psrad";
574 case Asse_PACKSSD: return "packssdw";
575 case Asse_PACKSSW: return "packsswb";
576 case Asse_PACKUSW: return "packuswb";
577 case Asse_UNPCKHB: return "punpckhb";
578 case Asse_UNPCKHW: return "punpckhw";
579 case Asse_UNPCKHD: return "punpckhd";
580 case Asse_UNPCKHQ: return "punpckhq";
581 case Asse_UNPCKLB: return "punpcklb";
582 case Asse_UNPCKLW: return "punpcklw";
583 case Asse_UNPCKLD: return "punpckld";
584 case Asse_UNPCKLQ: return "punpcklq";
585 default: vpanic("showAMD64SseOp");
586 }
587 }
588
AMD64Instr_Imm64(ULong imm64,HReg dst)589 AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst ) {
590 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
591 i->tag = Ain_Imm64;
592 i->Ain.Imm64.imm64 = imm64;
593 i->Ain.Imm64.dst = dst;
594 return i;
595 }
AMD64Instr_Alu64R(AMD64AluOp op,AMD64RMI * src,HReg dst)596 AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
597 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
598 i->tag = Ain_Alu64R;
599 i->Ain.Alu64R.op = op;
600 i->Ain.Alu64R.src = src;
601 i->Ain.Alu64R.dst = dst;
602 return i;
603 }
AMD64Instr_Alu64M(AMD64AluOp op,AMD64RI * src,AMD64AMode * dst)604 AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp op, AMD64RI* src, AMD64AMode* dst ) {
605 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
606 i->tag = Ain_Alu64M;
607 i->Ain.Alu64M.op = op;
608 i->Ain.Alu64M.src = src;
609 i->Ain.Alu64M.dst = dst;
610 vassert(op != Aalu_MUL);
611 return i;
612 }
AMD64Instr_Sh64(AMD64ShiftOp op,UInt src,HReg dst)613 AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) {
614 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
615 i->tag = Ain_Sh64;
616 i->Ain.Sh64.op = op;
617 i->Ain.Sh64.src = src;
618 i->Ain.Sh64.dst = dst;
619 return i;
620 }
AMD64Instr_Test64(UInt imm32,HReg dst)621 AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) {
622 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
623 i->tag = Ain_Test64;
624 i->Ain.Test64.imm32 = imm32;
625 i->Ain.Test64.dst = dst;
626 return i;
627 }
AMD64Instr_Unary64(AMD64UnaryOp op,HReg dst)628 AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst ) {
629 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
630 i->tag = Ain_Unary64;
631 i->Ain.Unary64.op = op;
632 i->Ain.Unary64.dst = dst;
633 return i;
634 }
AMD64Instr_Lea64(AMD64AMode * am,HReg dst)635 AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst ) {
636 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
637 i->tag = Ain_Lea64;
638 i->Ain.Lea64.am = am;
639 i->Ain.Lea64.dst = dst;
640 return i;
641 }
AMD64Instr_Alu32R(AMD64AluOp op,AMD64RMI * src,HReg dst)642 AMD64Instr* AMD64Instr_Alu32R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
643 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
644 i->tag = Ain_Alu32R;
645 i->Ain.Alu32R.op = op;
646 i->Ain.Alu32R.src = src;
647 i->Ain.Alu32R.dst = dst;
648 switch (op) {
649 case Aalu_ADD: case Aalu_SUB: case Aalu_CMP:
650 case Aalu_AND: case Aalu_OR: case Aalu_XOR: break;
651 default: vassert(0);
652 }
653 return i;
654 }
AMD64Instr_MulL(Bool syned,AMD64RM * src)655 AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* src ) {
656 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
657 i->tag = Ain_MulL;
658 i->Ain.MulL.syned = syned;
659 i->Ain.MulL.src = src;
660 return i;
661 }
AMD64Instr_Div(Bool syned,Int sz,AMD64RM * src)662 AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* src ) {
663 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
664 i->tag = Ain_Div;
665 i->Ain.Div.syned = syned;
666 i->Ain.Div.sz = sz;
667 i->Ain.Div.src = src;
668 vassert(sz == 4 || sz == 8);
669 return i;
670 }
AMD64Instr_Push(AMD64RMI * src)671 AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) {
672 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
673 i->tag = Ain_Push;
674 i->Ain.Push.src = src;
675 return i;
676 }
AMD64Instr_Call(AMD64CondCode cond,Addr64 target,Int regparms,RetLoc rloc)677 AMD64Instr* AMD64Instr_Call ( AMD64CondCode cond, Addr64 target, Int regparms,
678 RetLoc rloc ) {
679 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
680 i->tag = Ain_Call;
681 i->Ain.Call.cond = cond;
682 i->Ain.Call.target = target;
683 i->Ain.Call.regparms = regparms;
684 i->Ain.Call.rloc = rloc;
685 vassert(regparms >= 0 && regparms <= 6);
686 vassert(is_sane_RetLoc(rloc));
687 return i;
688 }
689
AMD64Instr_XDirect(Addr64 dstGA,AMD64AMode * amRIP,AMD64CondCode cond,Bool toFastEP)690 AMD64Instr* AMD64Instr_XDirect ( Addr64 dstGA, AMD64AMode* amRIP,
691 AMD64CondCode cond, Bool toFastEP ) {
692 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
693 i->tag = Ain_XDirect;
694 i->Ain.XDirect.dstGA = dstGA;
695 i->Ain.XDirect.amRIP = amRIP;
696 i->Ain.XDirect.cond = cond;
697 i->Ain.XDirect.toFastEP = toFastEP;
698 return i;
699 }
AMD64Instr_XIndir(HReg dstGA,AMD64AMode * amRIP,AMD64CondCode cond)700 AMD64Instr* AMD64Instr_XIndir ( HReg dstGA, AMD64AMode* amRIP,
701 AMD64CondCode cond ) {
702 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
703 i->tag = Ain_XIndir;
704 i->Ain.XIndir.dstGA = dstGA;
705 i->Ain.XIndir.amRIP = amRIP;
706 i->Ain.XIndir.cond = cond;
707 return i;
708 }
AMD64Instr_XAssisted(HReg dstGA,AMD64AMode * amRIP,AMD64CondCode cond,IRJumpKind jk)709 AMD64Instr* AMD64Instr_XAssisted ( HReg dstGA, AMD64AMode* amRIP,
710 AMD64CondCode cond, IRJumpKind jk ) {
711 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
712 i->tag = Ain_XAssisted;
713 i->Ain.XAssisted.dstGA = dstGA;
714 i->Ain.XAssisted.amRIP = amRIP;
715 i->Ain.XAssisted.cond = cond;
716 i->Ain.XAssisted.jk = jk;
717 return i;
718 }
719
AMD64Instr_CMov64(AMD64CondCode cond,HReg src,HReg dst)720 AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, HReg src, HReg dst ) {
721 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
722 i->tag = Ain_CMov64;
723 i->Ain.CMov64.cond = cond;
724 i->Ain.CMov64.src = src;
725 i->Ain.CMov64.dst = dst;
726 vassert(cond != Acc_ALWAYS);
727 return i;
728 }
AMD64Instr_CLoad(AMD64CondCode cond,UChar szB,AMD64AMode * addr,HReg dst)729 AMD64Instr* AMD64Instr_CLoad ( AMD64CondCode cond, UChar szB,
730 AMD64AMode* addr, HReg dst ) {
731 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
732 i->tag = Ain_CLoad;
733 i->Ain.CLoad.cond = cond;
734 i->Ain.CLoad.szB = szB;
735 i->Ain.CLoad.addr = addr;
736 i->Ain.CLoad.dst = dst;
737 vassert(cond != Acc_ALWAYS && (szB == 4 || szB == 8));
738 return i;
739 }
AMD64Instr_CStore(AMD64CondCode cond,UChar szB,HReg src,AMD64AMode * addr)740 AMD64Instr* AMD64Instr_CStore ( AMD64CondCode cond, UChar szB,
741 HReg src, AMD64AMode* addr ) {
742 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
743 i->tag = Ain_CStore;
744 i->Ain.CStore.cond = cond;
745 i->Ain.CStore.szB = szB;
746 i->Ain.CStore.src = src;
747 i->Ain.CStore.addr = addr;
748 vassert(cond != Acc_ALWAYS && (szB == 4 || szB == 8));
749 return i;
750 }
AMD64Instr_MovxLQ(Bool syned,HReg src,HReg dst)751 AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) {
752 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
753 i->tag = Ain_MovxLQ;
754 i->Ain.MovxLQ.syned = syned;
755 i->Ain.MovxLQ.src = src;
756 i->Ain.MovxLQ.dst = dst;
757 return i;
758 }
AMD64Instr_LoadEX(UChar szSmall,Bool syned,AMD64AMode * src,HReg dst)759 AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
760 AMD64AMode* src, HReg dst ) {
761 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
762 i->tag = Ain_LoadEX;
763 i->Ain.LoadEX.szSmall = szSmall;
764 i->Ain.LoadEX.syned = syned;
765 i->Ain.LoadEX.src = src;
766 i->Ain.LoadEX.dst = dst;
767 vassert(szSmall == 1 || szSmall == 2 || szSmall == 4);
768 return i;
769 }
AMD64Instr_Store(UChar sz,HReg src,AMD64AMode * dst)770 AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst ) {
771 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
772 i->tag = Ain_Store;
773 i->Ain.Store.sz = sz;
774 i->Ain.Store.src = src;
775 i->Ain.Store.dst = dst;
776 vassert(sz == 1 || sz == 2 || sz == 4);
777 return i;
778 }
AMD64Instr_Set64(AMD64CondCode cond,HReg dst)779 AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ) {
780 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
781 i->tag = Ain_Set64;
782 i->Ain.Set64.cond = cond;
783 i->Ain.Set64.dst = dst;
784 return i;
785 }
AMD64Instr_Bsfr64(Bool isFwds,HReg src,HReg dst)786 AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ) {
787 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
788 i->tag = Ain_Bsfr64;
789 i->Ain.Bsfr64.isFwds = isFwds;
790 i->Ain.Bsfr64.src = src;
791 i->Ain.Bsfr64.dst = dst;
792 return i;
793 }
AMD64Instr_MFence(void)794 AMD64Instr* AMD64Instr_MFence ( void ) {
795 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
796 i->tag = Ain_MFence;
797 return i;
798 }
AMD64Instr_ACAS(AMD64AMode * addr,UChar sz)799 AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ) {
800 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
801 i->tag = Ain_ACAS;
802 i->Ain.ACAS.addr = addr;
803 i->Ain.ACAS.sz = sz;
804 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
805 return i;
806 }
AMD64Instr_DACAS(AMD64AMode * addr,UChar sz)807 AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ) {
808 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
809 i->tag = Ain_DACAS;
810 i->Ain.DACAS.addr = addr;
811 i->Ain.DACAS.sz = sz;
812 vassert(sz == 8 || sz == 4);
813 return i;
814 }
815
AMD64Instr_A87Free(Int nregs)816 AMD64Instr* AMD64Instr_A87Free ( Int nregs )
817 {
818 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
819 i->tag = Ain_A87Free;
820 i->Ain.A87Free.nregs = nregs;
821 vassert(nregs >= 1 && nregs <= 7);
822 return i;
823 }
AMD64Instr_A87PushPop(AMD64AMode * addr,Bool isPush,UChar szB)824 AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB )
825 {
826 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
827 i->tag = Ain_A87PushPop;
828 i->Ain.A87PushPop.addr = addr;
829 i->Ain.A87PushPop.isPush = isPush;
830 i->Ain.A87PushPop.szB = szB;
831 vassert(szB == 8 || szB == 4);
832 return i;
833 }
AMD64Instr_A87FpOp(A87FpOp op)834 AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op )
835 {
836 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
837 i->tag = Ain_A87FpOp;
838 i->Ain.A87FpOp.op = op;
839 return i;
840 }
AMD64Instr_A87LdCW(AMD64AMode * addr)841 AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr )
842 {
843 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
844 i->tag = Ain_A87LdCW;
845 i->Ain.A87LdCW.addr = addr;
846 return i;
847 }
AMD64Instr_A87StSW(AMD64AMode * addr)848 AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr )
849 {
850 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
851 i->tag = Ain_A87StSW;
852 i->Ain.A87StSW.addr = addr;
853 return i;
854 }
AMD64Instr_LdMXCSR(AMD64AMode * addr)855 AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) {
856 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
857 i->tag = Ain_LdMXCSR;
858 i->Ain.LdMXCSR.addr = addr;
859 return i;
860 }
AMD64Instr_SseUComIS(Int sz,HReg srcL,HReg srcR,HReg dst)861 AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) {
862 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
863 i->tag = Ain_SseUComIS;
864 i->Ain.SseUComIS.sz = toUChar(sz);
865 i->Ain.SseUComIS.srcL = srcL;
866 i->Ain.SseUComIS.srcR = srcR;
867 i->Ain.SseUComIS.dst = dst;
868 vassert(sz == 4 || sz == 8);
869 return i;
870 }
AMD64Instr_SseSI2SF(Int szS,Int szD,HReg src,HReg dst)871 AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ) {
872 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
873 i->tag = Ain_SseSI2SF;
874 i->Ain.SseSI2SF.szS = toUChar(szS);
875 i->Ain.SseSI2SF.szD = toUChar(szD);
876 i->Ain.SseSI2SF.src = src;
877 i->Ain.SseSI2SF.dst = dst;
878 vassert(szS == 4 || szS == 8);
879 vassert(szD == 4 || szD == 8);
880 return i;
881 }
AMD64Instr_SseSF2SI(Int szS,Int szD,HReg src,HReg dst)882 AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ) {
883 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
884 i->tag = Ain_SseSF2SI;
885 i->Ain.SseSF2SI.szS = toUChar(szS);
886 i->Ain.SseSF2SI.szD = toUChar(szD);
887 i->Ain.SseSF2SI.src = src;
888 i->Ain.SseSF2SI.dst = dst;
889 vassert(szS == 4 || szS == 8);
890 vassert(szD == 4 || szD == 8);
891 return i;
892 }
AMD64Instr_SseSDSS(Bool from64,HReg src,HReg dst)893 AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst )
894 {
895 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
896 i->tag = Ain_SseSDSS;
897 i->Ain.SseSDSS.from64 = from64;
898 i->Ain.SseSDSS.src = src;
899 i->Ain.SseSDSS.dst = dst;
900 return i;
901 }
AMD64Instr_SseLdSt(Bool isLoad,Int sz,HReg reg,AMD64AMode * addr)902 AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz,
903 HReg reg, AMD64AMode* addr ) {
904 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
905 i->tag = Ain_SseLdSt;
906 i->Ain.SseLdSt.isLoad = isLoad;
907 i->Ain.SseLdSt.sz = toUChar(sz);
908 i->Ain.SseLdSt.reg = reg;
909 i->Ain.SseLdSt.addr = addr;
910 vassert(sz == 4 || sz == 8 || sz == 16);
911 return i;
912 }
AMD64Instr_SseCStore(AMD64CondCode cond,HReg src,AMD64AMode * addr)913 AMD64Instr* AMD64Instr_SseCStore ( AMD64CondCode cond,
914 HReg src, AMD64AMode* addr )
915 {
916 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
917 i->tag = Ain_SseCStore;
918 i->Ain.SseCStore.cond = cond;
919 i->Ain.SseCStore.src = src;
920 i->Ain.SseCStore.addr = addr;
921 vassert(cond != Acc_ALWAYS);
922 return i;
923 }
AMD64Instr_SseCLoad(AMD64CondCode cond,AMD64AMode * addr,HReg dst)924 AMD64Instr* AMD64Instr_SseCLoad ( AMD64CondCode cond,
925 AMD64AMode* addr, HReg dst )
926 {
927 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
928 i->tag = Ain_SseCLoad;
929 i->Ain.SseCLoad.cond = cond;
930 i->Ain.SseCLoad.addr = addr;
931 i->Ain.SseCLoad.dst = dst;
932 vassert(cond != Acc_ALWAYS);
933 return i;
934 }
AMD64Instr_SseLdzLO(Int sz,HReg reg,AMD64AMode * addr)935 AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg reg, AMD64AMode* addr )
936 {
937 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
938 i->tag = Ain_SseLdzLO;
939 i->Ain.SseLdzLO.sz = sz;
940 i->Ain.SseLdzLO.reg = reg;
941 i->Ain.SseLdzLO.addr = addr;
942 vassert(sz == 4 || sz == 8);
943 return i;
944 }
AMD64Instr_Sse32Fx4(AMD64SseOp op,HReg src,HReg dst)945 AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp op, HReg src, HReg dst ) {
946 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
947 i->tag = Ain_Sse32Fx4;
948 i->Ain.Sse32Fx4.op = op;
949 i->Ain.Sse32Fx4.src = src;
950 i->Ain.Sse32Fx4.dst = dst;
951 vassert(op != Asse_MOV);
952 return i;
953 }
AMD64Instr_Sse32FLo(AMD64SseOp op,HReg src,HReg dst)954 AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp op, HReg src, HReg dst ) {
955 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
956 i->tag = Ain_Sse32FLo;
957 i->Ain.Sse32FLo.op = op;
958 i->Ain.Sse32FLo.src = src;
959 i->Ain.Sse32FLo.dst = dst;
960 vassert(op != Asse_MOV);
961 return i;
962 }
AMD64Instr_Sse64Fx2(AMD64SseOp op,HReg src,HReg dst)963 AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp op, HReg src, HReg dst ) {
964 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
965 i->tag = Ain_Sse64Fx2;
966 i->Ain.Sse64Fx2.op = op;
967 i->Ain.Sse64Fx2.src = src;
968 i->Ain.Sse64Fx2.dst = dst;
969 vassert(op != Asse_MOV);
970 return i;
971 }
AMD64Instr_Sse64FLo(AMD64SseOp op,HReg src,HReg dst)972 AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp op, HReg src, HReg dst ) {
973 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
974 i->tag = Ain_Sse64FLo;
975 i->Ain.Sse64FLo.op = op;
976 i->Ain.Sse64FLo.src = src;
977 i->Ain.Sse64FLo.dst = dst;
978 vassert(op != Asse_MOV);
979 return i;
980 }
AMD64Instr_SseReRg(AMD64SseOp op,HReg re,HReg rg)981 AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp op, HReg re, HReg rg ) {
982 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
983 i->tag = Ain_SseReRg;
984 i->Ain.SseReRg.op = op;
985 i->Ain.SseReRg.src = re;
986 i->Ain.SseReRg.dst = rg;
987 return i;
988 }
AMD64Instr_SseCMov(AMD64CondCode cond,HReg src,HReg dst)989 AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode cond, HReg src, HReg dst ) {
990 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
991 i->tag = Ain_SseCMov;
992 i->Ain.SseCMov.cond = cond;
993 i->Ain.SseCMov.src = src;
994 i->Ain.SseCMov.dst = dst;
995 vassert(cond != Acc_ALWAYS);
996 return i;
997 }
AMD64Instr_SseShuf(Int order,HReg src,HReg dst)998 AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ) {
999 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1000 i->tag = Ain_SseShuf;
1001 i->Ain.SseShuf.order = order;
1002 i->Ain.SseShuf.src = src;
1003 i->Ain.SseShuf.dst = dst;
1004 vassert(order >= 0 && order <= 0xFF);
1005 return i;
1006 }
1007 //uu AMD64Instr* AMD64Instr_AvxLdSt ( Bool isLoad,
1008 //uu HReg reg, AMD64AMode* addr ) {
1009 //uu AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1010 //uu i->tag = Ain_AvxLdSt;
1011 //uu i->Ain.AvxLdSt.isLoad = isLoad;
1012 //uu i->Ain.AvxLdSt.reg = reg;
1013 //uu i->Ain.AvxLdSt.addr = addr;
1014 //uu return i;
1015 //uu }
1016 //uu AMD64Instr* AMD64Instr_AvxReRg ( AMD64SseOp op, HReg re, HReg rg ) {
1017 //uu AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1018 //uu i->tag = Ain_AvxReRg;
1019 //uu i->Ain.AvxReRg.op = op;
1020 //uu i->Ain.AvxReRg.src = re;
1021 //uu i->Ain.AvxReRg.dst = rg;
1022 //uu return i;
1023 //uu }
AMD64Instr_EvCheck(AMD64AMode * amCounter,AMD64AMode * amFailAddr)1024 AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter,
1025 AMD64AMode* amFailAddr ) {
1026 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1027 i->tag = Ain_EvCheck;
1028 i->Ain.EvCheck.amCounter = amCounter;
1029 i->Ain.EvCheck.amFailAddr = amFailAddr;
1030 return i;
1031 }
AMD64Instr_ProfInc(void)1032 AMD64Instr* AMD64Instr_ProfInc ( void ) {
1033 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1034 i->tag = Ain_ProfInc;
1035 return i;
1036 }
1037
ppAMD64Instr(const AMD64Instr * i,Bool mode64)1038 void ppAMD64Instr ( const AMD64Instr* i, Bool mode64 )
1039 {
1040 vassert(mode64 == True);
1041 switch (i->tag) {
1042 case Ain_Imm64:
1043 vex_printf("movabsq $0x%llx,", i->Ain.Imm64.imm64);
1044 ppHRegAMD64(i->Ain.Imm64.dst);
1045 return;
1046 case Ain_Alu64R:
1047 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64R.op));
1048 ppAMD64RMI(i->Ain.Alu64R.src);
1049 vex_printf(",");
1050 ppHRegAMD64(i->Ain.Alu64R.dst);
1051 return;
1052 case Ain_Alu64M:
1053 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64M.op));
1054 ppAMD64RI(i->Ain.Alu64M.src);
1055 vex_printf(",");
1056 ppAMD64AMode(i->Ain.Alu64M.dst);
1057 return;
1058 case Ain_Sh64:
1059 vex_printf("%sq ", showAMD64ShiftOp(i->Ain.Sh64.op));
1060 if (i->Ain.Sh64.src == 0)
1061 vex_printf("%%cl,");
1062 else
1063 vex_printf("$%d,", (Int)i->Ain.Sh64.src);
1064 ppHRegAMD64(i->Ain.Sh64.dst);
1065 return;
1066 case Ain_Test64:
1067 vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32);
1068 ppHRegAMD64(i->Ain.Test64.dst);
1069 return;
1070 case Ain_Unary64:
1071 vex_printf("%sq ", showAMD64UnaryOp(i->Ain.Unary64.op));
1072 ppHRegAMD64(i->Ain.Unary64.dst);
1073 return;
1074 case Ain_Lea64:
1075 vex_printf("leaq ");
1076 ppAMD64AMode(i->Ain.Lea64.am);
1077 vex_printf(",");
1078 ppHRegAMD64(i->Ain.Lea64.dst);
1079 return;
1080 case Ain_Alu32R:
1081 vex_printf("%sl ", showAMD64AluOp(i->Ain.Alu32R.op));
1082 ppAMD64RMI_lo32(i->Ain.Alu32R.src);
1083 vex_printf(",");
1084 ppHRegAMD64_lo32(i->Ain.Alu32R.dst);
1085 return;
1086 case Ain_MulL:
1087 vex_printf("%cmulq ", i->Ain.MulL.syned ? 's' : 'u');
1088 ppAMD64RM(i->Ain.MulL.src);
1089 return;
1090 case Ain_Div:
1091 vex_printf("%cdiv%s ",
1092 i->Ain.Div.syned ? 's' : 'u',
1093 showAMD64ScalarSz(i->Ain.Div.sz));
1094 ppAMD64RM(i->Ain.Div.src);
1095 return;
1096 case Ain_Push:
1097 vex_printf("pushq ");
1098 ppAMD64RMI(i->Ain.Push.src);
1099 return;
1100 case Ain_Call:
1101 vex_printf("call%s[%d,",
1102 i->Ain.Call.cond==Acc_ALWAYS
1103 ? "" : showAMD64CondCode(i->Ain.Call.cond),
1104 i->Ain.Call.regparms );
1105 ppRetLoc(i->Ain.Call.rloc);
1106 vex_printf("] 0x%llx", i->Ain.Call.target);
1107 break;
1108
1109 case Ain_XDirect:
1110 vex_printf("(xDirect) ");
1111 vex_printf("if (%%rflags.%s) { ",
1112 showAMD64CondCode(i->Ain.XDirect.cond));
1113 vex_printf("movabsq $0x%llx,%%r11; ", i->Ain.XDirect.dstGA);
1114 vex_printf("movq %%r11,");
1115 ppAMD64AMode(i->Ain.XDirect.amRIP);
1116 vex_printf("; ");
1117 vex_printf("movabsq $disp_cp_chain_me_to_%sEP,%%r11; call *%%r11 }",
1118 i->Ain.XDirect.toFastEP ? "fast" : "slow");
1119 return;
1120 case Ain_XIndir:
1121 vex_printf("(xIndir) ");
1122 vex_printf("if (%%rflags.%s) { ",
1123 showAMD64CondCode(i->Ain.XIndir.cond));
1124 vex_printf("movq ");
1125 ppHRegAMD64(i->Ain.XIndir.dstGA);
1126 vex_printf(",");
1127 ppAMD64AMode(i->Ain.XIndir.amRIP);
1128 vex_printf("; movabsq $disp_indir,%%r11; jmp *%%r11 }");
1129 return;
1130 case Ain_XAssisted:
1131 vex_printf("(xAssisted) ");
1132 vex_printf("if (%%rflags.%s) { ",
1133 showAMD64CondCode(i->Ain.XAssisted.cond));
1134 vex_printf("movq ");
1135 ppHRegAMD64(i->Ain.XAssisted.dstGA);
1136 vex_printf(",");
1137 ppAMD64AMode(i->Ain.XAssisted.amRIP);
1138 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%rbp",
1139 (Int)i->Ain.XAssisted.jk);
1140 vex_printf("; movabsq $disp_assisted,%%r11; jmp *%%r11 }");
1141 return;
1142
1143 case Ain_CMov64:
1144 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond));
1145 ppHRegAMD64(i->Ain.CMov64.src);
1146 vex_printf(",");
1147 ppHRegAMD64(i->Ain.CMov64.dst);
1148 return;
1149 case Ain_CLoad:
1150 vex_printf("if (%%rflags.%s) { ",
1151 showAMD64CondCode(i->Ain.CLoad.cond));
1152 vex_printf("mov%c ", i->Ain.CLoad.szB == 4 ? 'l' : 'q');
1153 ppAMD64AMode(i->Ain.CLoad.addr);
1154 vex_printf(", ");
1155 (i->Ain.CLoad.szB == 4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1156 (i->Ain.CLoad.dst);
1157 vex_printf(" }");
1158 return;
1159 case Ain_CStore:
1160 vex_printf("if (%%rflags.%s) { ",
1161 showAMD64CondCode(i->Ain.CStore.cond));
1162 vex_printf("mov%c ", i->Ain.CStore.szB == 4 ? 'l' : 'q');
1163 (i->Ain.CStore.szB == 4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1164 (i->Ain.CStore.src);
1165 vex_printf(", ");
1166 ppAMD64AMode(i->Ain.CStore.addr);
1167 vex_printf(" }");
1168 return;
1169
1170 case Ain_MovxLQ:
1171 vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z');
1172 ppHRegAMD64_lo32(i->Ain.MovxLQ.src);
1173 vex_printf(",");
1174 ppHRegAMD64(i->Ain.MovxLQ.dst);
1175 return;
1176 case Ain_LoadEX:
1177 if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) {
1178 vex_printf("movl ");
1179 ppAMD64AMode(i->Ain.LoadEX.src);
1180 vex_printf(",");
1181 ppHRegAMD64_lo32(i->Ain.LoadEX.dst);
1182 } else {
1183 vex_printf("mov%c%cq ",
1184 i->Ain.LoadEX.syned ? 's' : 'z',
1185 i->Ain.LoadEX.szSmall==1
1186 ? 'b'
1187 : (i->Ain.LoadEX.szSmall==2 ? 'w' : 'l'));
1188 ppAMD64AMode(i->Ain.LoadEX.src);
1189 vex_printf(",");
1190 ppHRegAMD64(i->Ain.LoadEX.dst);
1191 }
1192 return;
1193 case Ain_Store:
1194 vex_printf("mov%c ", i->Ain.Store.sz==1 ? 'b'
1195 : (i->Ain.Store.sz==2 ? 'w' : 'l'));
1196 ppHRegAMD64(i->Ain.Store.src);
1197 vex_printf(",");
1198 ppAMD64AMode(i->Ain.Store.dst);
1199 return;
1200 case Ain_Set64:
1201 vex_printf("setq%s ", showAMD64CondCode(i->Ain.Set64.cond));
1202 ppHRegAMD64(i->Ain.Set64.dst);
1203 return;
1204 case Ain_Bsfr64:
1205 vex_printf("bs%cq ", i->Ain.Bsfr64.isFwds ? 'f' : 'r');
1206 ppHRegAMD64(i->Ain.Bsfr64.src);
1207 vex_printf(",");
1208 ppHRegAMD64(i->Ain.Bsfr64.dst);
1209 return;
1210 case Ain_MFence:
1211 vex_printf("mfence" );
1212 return;
1213 case Ain_ACAS:
1214 vex_printf("lock cmpxchg%c ",
1215 i->Ain.ACAS.sz==1 ? 'b' : i->Ain.ACAS.sz==2 ? 'w'
1216 : i->Ain.ACAS.sz==4 ? 'l' : 'q' );
1217 vex_printf("{%%rax->%%rbx},");
1218 ppAMD64AMode(i->Ain.ACAS.addr);
1219 return;
1220 case Ain_DACAS:
1221 vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},",
1222 (Int)(2 * i->Ain.DACAS.sz));
1223 ppAMD64AMode(i->Ain.DACAS.addr);
1224 return;
1225 case Ain_A87Free:
1226 vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs );
1227 break;
1228 case Ain_A87PushPop:
1229 vex_printf(i->Ain.A87PushPop.isPush ? "fld%c " : "fstp%c ",
1230 i->Ain.A87PushPop.szB == 4 ? 's' : 'l');
1231 ppAMD64AMode(i->Ain.A87PushPop.addr);
1232 break;
1233 case Ain_A87FpOp:
1234 vex_printf("f%s", showA87FpOp(i->Ain.A87FpOp.op));
1235 break;
1236 case Ain_A87LdCW:
1237 vex_printf("fldcw ");
1238 ppAMD64AMode(i->Ain.A87LdCW.addr);
1239 break;
1240 case Ain_A87StSW:
1241 vex_printf("fstsw ");
1242 ppAMD64AMode(i->Ain.A87StSW.addr);
1243 break;
1244 case Ain_LdMXCSR:
1245 vex_printf("ldmxcsr ");
1246 ppAMD64AMode(i->Ain.LdMXCSR.addr);
1247 break;
1248 case Ain_SseUComIS:
1249 vex_printf("ucomis%s ", i->Ain.SseUComIS.sz==4 ? "s" : "d");
1250 ppHRegAMD64(i->Ain.SseUComIS.srcL);
1251 vex_printf(",");
1252 ppHRegAMD64(i->Ain.SseUComIS.srcR);
1253 vex_printf(" ; pushfq ; popq ");
1254 ppHRegAMD64(i->Ain.SseUComIS.dst);
1255 break;
1256 case Ain_SseSI2SF:
1257 vex_printf("cvtsi2s%s ", i->Ain.SseSI2SF.szD==4 ? "s" : "d");
1258 (i->Ain.SseSI2SF.szS==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1259 (i->Ain.SseSI2SF.src);
1260 vex_printf(",");
1261 ppHRegAMD64(i->Ain.SseSI2SF.dst);
1262 break;
1263 case Ain_SseSF2SI:
1264 vex_printf("cvts%s2si ", i->Ain.SseSF2SI.szS==4 ? "s" : "d");
1265 ppHRegAMD64(i->Ain.SseSF2SI.src);
1266 vex_printf(",");
1267 (i->Ain.SseSF2SI.szD==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1268 (i->Ain.SseSF2SI.dst);
1269 break;
1270 case Ain_SseSDSS:
1271 vex_printf(i->Ain.SseSDSS.from64 ? "cvtsd2ss " : "cvtss2sd ");
1272 ppHRegAMD64(i->Ain.SseSDSS.src);
1273 vex_printf(",");
1274 ppHRegAMD64(i->Ain.SseSDSS.dst);
1275 break;
1276 case Ain_SseLdSt:
1277 switch (i->Ain.SseLdSt.sz) {
1278 case 4: vex_printf("movss "); break;
1279 case 8: vex_printf("movsd "); break;
1280 case 16: vex_printf("movups "); break;
1281 default: vassert(0);
1282 }
1283 if (i->Ain.SseLdSt.isLoad) {
1284 ppAMD64AMode(i->Ain.SseLdSt.addr);
1285 vex_printf(",");
1286 ppHRegAMD64(i->Ain.SseLdSt.reg);
1287 } else {
1288 ppHRegAMD64(i->Ain.SseLdSt.reg);
1289 vex_printf(",");
1290 ppAMD64AMode(i->Ain.SseLdSt.addr);
1291 }
1292 return;
1293 case Ain_SseCStore:
1294 vex_printf("if (%%rflags.%s) { ",
1295 showAMD64CondCode(i->Ain.SseCStore.cond));
1296 vex_printf("movups ");
1297 ppHRegAMD64(i->Ain.SseCStore.src);
1298 vex_printf(", ");
1299 ppAMD64AMode(i->Ain.SseCStore.addr);
1300 vex_printf(" }");
1301 return;
1302 case Ain_SseCLoad:
1303 vex_printf("if (%%rflags.%s) { ",
1304 showAMD64CondCode(i->Ain.SseCLoad.cond));
1305 vex_printf("movups ");
1306 ppAMD64AMode(i->Ain.SseCLoad.addr);
1307 vex_printf(", ");
1308 ppHRegAMD64(i->Ain.SseCLoad.dst);
1309 vex_printf(" }");
1310 return;
1311 case Ain_SseLdzLO:
1312 vex_printf("movs%s ", i->Ain.SseLdzLO.sz==4 ? "s" : "d");
1313 ppAMD64AMode(i->Ain.SseLdzLO.addr);
1314 vex_printf(",");
1315 ppHRegAMD64(i->Ain.SseLdzLO.reg);
1316 return;
1317 case Ain_Sse32Fx4:
1318 vex_printf("%sps ", showAMD64SseOp(i->Ain.Sse32Fx4.op));
1319 ppHRegAMD64(i->Ain.Sse32Fx4.src);
1320 vex_printf(",");
1321 ppHRegAMD64(i->Ain.Sse32Fx4.dst);
1322 return;
1323 case Ain_Sse32FLo:
1324 vex_printf("%sss ", showAMD64SseOp(i->Ain.Sse32FLo.op));
1325 ppHRegAMD64(i->Ain.Sse32FLo.src);
1326 vex_printf(",");
1327 ppHRegAMD64(i->Ain.Sse32FLo.dst);
1328 return;
1329 case Ain_Sse64Fx2:
1330 vex_printf("%spd ", showAMD64SseOp(i->Ain.Sse64Fx2.op));
1331 ppHRegAMD64(i->Ain.Sse64Fx2.src);
1332 vex_printf(",");
1333 ppHRegAMD64(i->Ain.Sse64Fx2.dst);
1334 return;
1335 case Ain_Sse64FLo:
1336 vex_printf("%ssd ", showAMD64SseOp(i->Ain.Sse64FLo.op));
1337 ppHRegAMD64(i->Ain.Sse64FLo.src);
1338 vex_printf(",");
1339 ppHRegAMD64(i->Ain.Sse64FLo.dst);
1340 return;
1341 case Ain_SseReRg:
1342 vex_printf("%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1343 ppHRegAMD64(i->Ain.SseReRg.src);
1344 vex_printf(",");
1345 ppHRegAMD64(i->Ain.SseReRg.dst);
1346 return;
1347 case Ain_SseCMov:
1348 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.SseCMov.cond));
1349 ppHRegAMD64(i->Ain.SseCMov.src);
1350 vex_printf(",");
1351 ppHRegAMD64(i->Ain.SseCMov.dst);
1352 return;
1353 case Ain_SseShuf:
1354 vex_printf("pshufd $0x%x,", (UInt)i->Ain.SseShuf.order);
1355 ppHRegAMD64(i->Ain.SseShuf.src);
1356 vex_printf(",");
1357 ppHRegAMD64(i->Ain.SseShuf.dst);
1358 return;
1359 //uu case Ain_AvxLdSt:
1360 //uu vex_printf("vmovups ");
1361 //uu if (i->Ain.AvxLdSt.isLoad) {
1362 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1363 //uu vex_printf(",");
1364 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1365 //uu } else {
1366 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1367 //uu vex_printf(",");
1368 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1369 //uu }
1370 //uu return;
1371 //uu case Ain_AvxReRg:
1372 //uu vex_printf("v%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1373 //uu ppHRegAMD64(i->Ain.AvxReRg.src);
1374 //uu vex_printf(",");
1375 //uu ppHRegAMD64(i->Ain.AvxReRg.dst);
1376 //uu return;
1377 case Ain_EvCheck:
1378 vex_printf("(evCheck) decl ");
1379 ppAMD64AMode(i->Ain.EvCheck.amCounter);
1380 vex_printf("; jns nofail; jmp *");
1381 ppAMD64AMode(i->Ain.EvCheck.amFailAddr);
1382 vex_printf("; nofail:");
1383 return;
1384 case Ain_ProfInc:
1385 vex_printf("(profInc) movabsq $NotKnownYet, %%r11; incq (%%r11)");
1386 return;
1387 default:
1388 vpanic("ppAMD64Instr");
1389 }
1390 }
1391
1392 /* --------- Helpers for register allocation. --------- */
1393
getRegUsage_AMD64Instr(HRegUsage * u,const AMD64Instr * i,Bool mode64)1394 void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 )
1395 {
1396 Bool unary;
1397 vassert(mode64 == True);
1398 initHRegUsage(u);
1399 switch (i->tag) {
1400 case Ain_Imm64:
1401 addHRegUse(u, HRmWrite, i->Ain.Imm64.dst);
1402 return;
1403 case Ain_Alu64R:
1404 addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src);
1405 if (i->Ain.Alu64R.op == Aalu_MOV) {
1406 addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst);
1407 return;
1408 }
1409 if (i->Ain.Alu64R.op == Aalu_CMP) {
1410 addHRegUse(u, HRmRead, i->Ain.Alu64R.dst);
1411 return;
1412 }
1413 addHRegUse(u, HRmModify, i->Ain.Alu64R.dst);
1414 return;
1415 case Ain_Alu64M:
1416 addRegUsage_AMD64RI(u, i->Ain.Alu64M.src);
1417 addRegUsage_AMD64AMode(u, i->Ain.Alu64M.dst);
1418 return;
1419 case Ain_Sh64:
1420 addHRegUse(u, HRmModify, i->Ain.Sh64.dst);
1421 if (i->Ain.Sh64.src == 0)
1422 addHRegUse(u, HRmRead, hregAMD64_RCX());
1423 return;
1424 case Ain_Test64:
1425 addHRegUse(u, HRmRead, i->Ain.Test64.dst);
1426 return;
1427 case Ain_Unary64:
1428 addHRegUse(u, HRmModify, i->Ain.Unary64.dst);
1429 return;
1430 case Ain_Lea64:
1431 addRegUsage_AMD64AMode(u, i->Ain.Lea64.am);
1432 addHRegUse(u, HRmWrite, i->Ain.Lea64.dst);
1433 return;
1434 case Ain_Alu32R:
1435 vassert(i->Ain.Alu32R.op != Aalu_MOV);
1436 addRegUsage_AMD64RMI(u, i->Ain.Alu32R.src);
1437 if (i->Ain.Alu32R.op == Aalu_CMP) {
1438 addHRegUse(u, HRmRead, i->Ain.Alu32R.dst);
1439 return;
1440 }
1441 addHRegUse(u, HRmModify, i->Ain.Alu32R.dst);
1442 return;
1443 case Ain_MulL:
1444 addRegUsage_AMD64RM(u, i->Ain.MulL.src, HRmRead);
1445 addHRegUse(u, HRmModify, hregAMD64_RAX());
1446 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1447 return;
1448 case Ain_Div:
1449 addRegUsage_AMD64RM(u, i->Ain.Div.src, HRmRead);
1450 addHRegUse(u, HRmModify, hregAMD64_RAX());
1451 addHRegUse(u, HRmModify, hregAMD64_RDX());
1452 return;
1453 case Ain_Push:
1454 addRegUsage_AMD64RMI(u, i->Ain.Push.src);
1455 addHRegUse(u, HRmModify, hregAMD64_RSP());
1456 return;
1457 case Ain_Call:
1458 /* This is a bit subtle. */
1459 /* First off, claim it trashes all the caller-saved regs
1460 which fall within the register allocator's jurisdiction.
1461 These I believe to be: rax rcx rdx rsi rdi r8 r9 r10 r11
1462 and all the xmm registers.
1463 */
1464 addHRegUse(u, HRmWrite, hregAMD64_RAX());
1465 addHRegUse(u, HRmWrite, hregAMD64_RCX());
1466 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1467 addHRegUse(u, HRmWrite, hregAMD64_RSI());
1468 addHRegUse(u, HRmWrite, hregAMD64_RDI());
1469 addHRegUse(u, HRmWrite, hregAMD64_R8());
1470 addHRegUse(u, HRmWrite, hregAMD64_R9());
1471 addHRegUse(u, HRmWrite, hregAMD64_R10());
1472 addHRegUse(u, HRmWrite, hregAMD64_R11());
1473 addHRegUse(u, HRmWrite, hregAMD64_XMM0());
1474 addHRegUse(u, HRmWrite, hregAMD64_XMM1());
1475 addHRegUse(u, HRmWrite, hregAMD64_XMM3());
1476 addHRegUse(u, HRmWrite, hregAMD64_XMM4());
1477 addHRegUse(u, HRmWrite, hregAMD64_XMM5());
1478 addHRegUse(u, HRmWrite, hregAMD64_XMM6());
1479 addHRegUse(u, HRmWrite, hregAMD64_XMM7());
1480 addHRegUse(u, HRmWrite, hregAMD64_XMM8());
1481 addHRegUse(u, HRmWrite, hregAMD64_XMM9());
1482 addHRegUse(u, HRmWrite, hregAMD64_XMM10());
1483 addHRegUse(u, HRmWrite, hregAMD64_XMM11());
1484 addHRegUse(u, HRmWrite, hregAMD64_XMM12());
1485
1486 /* Now we have to state any parameter-carrying registers
1487 which might be read. This depends on the regparmness. */
1488 switch (i->Ain.Call.regparms) {
1489 case 6: addHRegUse(u, HRmRead, hregAMD64_R9()); /*fallthru*/
1490 case 5: addHRegUse(u, HRmRead, hregAMD64_R8()); /*fallthru*/
1491 case 4: addHRegUse(u, HRmRead, hregAMD64_RCX()); /*fallthru*/
1492 case 3: addHRegUse(u, HRmRead, hregAMD64_RDX()); /*fallthru*/
1493 case 2: addHRegUse(u, HRmRead, hregAMD64_RSI()); /*fallthru*/
1494 case 1: addHRegUse(u, HRmRead, hregAMD64_RDI()); break;
1495 case 0: break;
1496 default: vpanic("getRegUsage_AMD64Instr:Call:regparms");
1497 }
1498 /* Finally, there is the issue that the insn trashes a
1499 register because the literal target address has to be
1500 loaded into a register. Fortunately, r11 is stated in the
1501 ABI as a scratch register, and so seems a suitable victim. */
1502 addHRegUse(u, HRmWrite, hregAMD64_R11());
1503 /* Upshot of this is that the assembler really must use r11,
1504 and no other, as a destination temporary. */
1505 return;
1506 /* XDirect/XIndir/XAssisted are also a bit subtle. They
1507 conditionally exit the block. Hence we only need to list (1)
1508 the registers that they read, and (2) the registers that they
1509 write in the case where the block is not exited. (2) is
1510 empty, hence only (1) is relevant here. */
1511 case Ain_XDirect:
1512 /* Don't bother to mention the write to %r11, since it is not
1513 available to the allocator. */
1514 addRegUsage_AMD64AMode(u, i->Ain.XDirect.amRIP);
1515 return;
1516 case Ain_XIndir:
1517 /* Ditto re %r11 */
1518 addHRegUse(u, HRmRead, i->Ain.XIndir.dstGA);
1519 addRegUsage_AMD64AMode(u, i->Ain.XIndir.amRIP);
1520 return;
1521 case Ain_XAssisted:
1522 /* Ditto re %r11 and %rbp (the baseblock ptr) */
1523 addHRegUse(u, HRmRead, i->Ain.XAssisted.dstGA);
1524 addRegUsage_AMD64AMode(u, i->Ain.XAssisted.amRIP);
1525 return;
1526 case Ain_CMov64:
1527 addHRegUse(u, HRmRead, i->Ain.CMov64.src);
1528 addHRegUse(u, HRmModify, i->Ain.CMov64.dst);
1529 return;
1530 case Ain_CLoad:
1531 addRegUsage_AMD64AMode(u, i->Ain.CLoad.addr);
1532 addHRegUse(u, HRmModify, i->Ain.CLoad.dst);
1533 return;
1534 case Ain_CStore:
1535 addRegUsage_AMD64AMode(u, i->Ain.CStore.addr);
1536 addHRegUse(u, HRmRead, i->Ain.CStore.src);
1537 return;
1538 case Ain_MovxLQ:
1539 addHRegUse(u, HRmRead, i->Ain.MovxLQ.src);
1540 addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst);
1541 return;
1542 case Ain_LoadEX:
1543 addRegUsage_AMD64AMode(u, i->Ain.LoadEX.src);
1544 addHRegUse(u, HRmWrite, i->Ain.LoadEX.dst);
1545 return;
1546 case Ain_Store:
1547 addHRegUse(u, HRmRead, i->Ain.Store.src);
1548 addRegUsage_AMD64AMode(u, i->Ain.Store.dst);
1549 return;
1550 case Ain_Set64:
1551 addHRegUse(u, HRmWrite, i->Ain.Set64.dst);
1552 return;
1553 case Ain_Bsfr64:
1554 addHRegUse(u, HRmRead, i->Ain.Bsfr64.src);
1555 addHRegUse(u, HRmWrite, i->Ain.Bsfr64.dst);
1556 return;
1557 case Ain_MFence:
1558 return;
1559 case Ain_ACAS:
1560 addRegUsage_AMD64AMode(u, i->Ain.ACAS.addr);
1561 addHRegUse(u, HRmRead, hregAMD64_RBX());
1562 addHRegUse(u, HRmModify, hregAMD64_RAX());
1563 return;
1564 case Ain_DACAS:
1565 addRegUsage_AMD64AMode(u, i->Ain.DACAS.addr);
1566 addHRegUse(u, HRmRead, hregAMD64_RCX());
1567 addHRegUse(u, HRmRead, hregAMD64_RBX());
1568 addHRegUse(u, HRmModify, hregAMD64_RDX());
1569 addHRegUse(u, HRmModify, hregAMD64_RAX());
1570 return;
1571 case Ain_A87Free:
1572 return;
1573 case Ain_A87PushPop:
1574 addRegUsage_AMD64AMode(u, i->Ain.A87PushPop.addr);
1575 return;
1576 case Ain_A87FpOp:
1577 return;
1578 case Ain_A87LdCW:
1579 addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr);
1580 return;
1581 case Ain_A87StSW:
1582 addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr);
1583 return;
1584 case Ain_LdMXCSR:
1585 addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr);
1586 return;
1587 case Ain_SseUComIS:
1588 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcL);
1589 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR);
1590 addHRegUse(u, HRmWrite, i->Ain.SseUComIS.dst);
1591 return;
1592 case Ain_SseSI2SF:
1593 addHRegUse(u, HRmRead, i->Ain.SseSI2SF.src);
1594 addHRegUse(u, HRmWrite, i->Ain.SseSI2SF.dst);
1595 return;
1596 case Ain_SseSF2SI:
1597 addHRegUse(u, HRmRead, i->Ain.SseSF2SI.src);
1598 addHRegUse(u, HRmWrite, i->Ain.SseSF2SI.dst);
1599 return;
1600 case Ain_SseSDSS:
1601 addHRegUse(u, HRmRead, i->Ain.SseSDSS.src);
1602 addHRegUse(u, HRmWrite, i->Ain.SseSDSS.dst);
1603 return;
1604 case Ain_SseLdSt:
1605 addRegUsage_AMD64AMode(u, i->Ain.SseLdSt.addr);
1606 addHRegUse(u, i->Ain.SseLdSt.isLoad ? HRmWrite : HRmRead,
1607 i->Ain.SseLdSt.reg);
1608 return;
1609 case Ain_SseCStore:
1610 addRegUsage_AMD64AMode(u, i->Ain.SseCStore.addr);
1611 addHRegUse(u, HRmRead, i->Ain.SseCStore.src);
1612 return;
1613 case Ain_SseCLoad:
1614 addRegUsage_AMD64AMode(u, i->Ain.SseCLoad.addr);
1615 addHRegUse(u, HRmModify, i->Ain.SseCLoad.dst);
1616 return;
1617 case Ain_SseLdzLO:
1618 addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr);
1619 addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg);
1620 return;
1621 case Ain_Sse32Fx4:
1622 vassert(i->Ain.Sse32Fx4.op != Asse_MOV);
1623 unary = toBool( i->Ain.Sse32Fx4.op == Asse_RCPF
1624 || i->Ain.Sse32Fx4.op == Asse_RSQRTF
1625 || i->Ain.Sse32Fx4.op == Asse_SQRTF );
1626 addHRegUse(u, HRmRead, i->Ain.Sse32Fx4.src);
1627 addHRegUse(u, unary ? HRmWrite : HRmModify,
1628 i->Ain.Sse32Fx4.dst);
1629 return;
1630 case Ain_Sse32FLo:
1631 vassert(i->Ain.Sse32FLo.op != Asse_MOV);
1632 unary = toBool( i->Ain.Sse32FLo.op == Asse_RCPF
1633 || i->Ain.Sse32FLo.op == Asse_RSQRTF
1634 || i->Ain.Sse32FLo.op == Asse_SQRTF );
1635 addHRegUse(u, HRmRead, i->Ain.Sse32FLo.src);
1636 addHRegUse(u, unary ? HRmWrite : HRmModify,
1637 i->Ain.Sse32FLo.dst);
1638 return;
1639 case Ain_Sse64Fx2:
1640 vassert(i->Ain.Sse64Fx2.op != Asse_MOV);
1641 unary = toBool( i->Ain.Sse64Fx2.op == Asse_RCPF
1642 || i->Ain.Sse64Fx2.op == Asse_RSQRTF
1643 || i->Ain.Sse64Fx2.op == Asse_SQRTF );
1644 addHRegUse(u, HRmRead, i->Ain.Sse64Fx2.src);
1645 addHRegUse(u, unary ? HRmWrite : HRmModify,
1646 i->Ain.Sse64Fx2.dst);
1647 return;
1648 case Ain_Sse64FLo:
1649 vassert(i->Ain.Sse64FLo.op != Asse_MOV);
1650 unary = toBool( i->Ain.Sse64FLo.op == Asse_RCPF
1651 || i->Ain.Sse64FLo.op == Asse_RSQRTF
1652 || i->Ain.Sse64FLo.op == Asse_SQRTF );
1653 addHRegUse(u, HRmRead, i->Ain.Sse64FLo.src);
1654 addHRegUse(u, unary ? HRmWrite : HRmModify,
1655 i->Ain.Sse64FLo.dst);
1656 return;
1657 case Ain_SseReRg:
1658 if ( (i->Ain.SseReRg.op == Asse_XOR
1659 || i->Ain.SseReRg.op == Asse_CMPEQ32)
1660 && sameHReg(i->Ain.SseReRg.src, i->Ain.SseReRg.dst)) {
1661 /* reg-alloc needs to understand 'xor r,r' and 'cmpeqd
1662 r,r' as a write of a value to r, and independent of any
1663 previous value in r */
1664 /* (as opposed to a rite of passage :-) */
1665 addHRegUse(u, HRmWrite, i->Ain.SseReRg.dst);
1666 } else {
1667 addHRegUse(u, HRmRead, i->Ain.SseReRg.src);
1668 addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV
1669 ? HRmWrite : HRmModify,
1670 i->Ain.SseReRg.dst);
1671 }
1672 return;
1673 case Ain_SseCMov:
1674 addHRegUse(u, HRmRead, i->Ain.SseCMov.src);
1675 addHRegUse(u, HRmModify, i->Ain.SseCMov.dst);
1676 return;
1677 case Ain_SseShuf:
1678 addHRegUse(u, HRmRead, i->Ain.SseShuf.src);
1679 addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst);
1680 return;
1681 //uu case Ain_AvxLdSt:
1682 //uu addRegUsage_AMD64AMode(u, i->Ain.AvxLdSt.addr);
1683 //uu addHRegUse(u, i->Ain.AvxLdSt.isLoad ? HRmWrite : HRmRead,
1684 //uu i->Ain.AvxLdSt.reg);
1685 //uu return;
1686 //uu case Ain_AvxReRg:
1687 //uu if ( (i->Ain.AvxReRg.op == Asse_XOR
1688 //uu || i->Ain.AvxReRg.op == Asse_CMPEQ32)
1689 //uu && i->Ain.AvxReRg.src == i->Ain.AvxReRg.dst) {
1690 //uu /* See comments on the case for Ain_SseReRg. */
1691 //uu addHRegUse(u, HRmWrite, i->Ain.AvxReRg.dst);
1692 //uu } else {
1693 //uu addHRegUse(u, HRmRead, i->Ain.AvxReRg.src);
1694 //uu addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV
1695 //uu ? HRmWrite : HRmModify,
1696 //uu i->Ain.AvxReRg.dst);
1697 //uu }
1698 //uu return;
1699 case Ain_EvCheck:
1700 /* We expect both amodes only to mention %rbp, so this is in
1701 fact pointless, since %rbp isn't allocatable, but anyway.. */
1702 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amCounter);
1703 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amFailAddr);
1704 return;
1705 case Ain_ProfInc:
1706 addHRegUse(u, HRmWrite, hregAMD64_R11());
1707 return;
1708 default:
1709 ppAMD64Instr(i, mode64);
1710 vpanic("getRegUsage_AMD64Instr");
1711 }
1712 }
1713
1714 /* local helper */
mapReg(HRegRemap * m,HReg * r)1715 static inline void mapReg(HRegRemap* m, HReg* r)
1716 {
1717 *r = lookupHRegRemap(m, *r);
1718 }
1719
mapRegs_AMD64Instr(HRegRemap * m,AMD64Instr * i,Bool mode64)1720 void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
1721 {
1722 vassert(mode64 == True);
1723 switch (i->tag) {
1724 case Ain_Imm64:
1725 mapReg(m, &i->Ain.Imm64.dst);
1726 return;
1727 case Ain_Alu64R:
1728 mapRegs_AMD64RMI(m, i->Ain.Alu64R.src);
1729 mapReg(m, &i->Ain.Alu64R.dst);
1730 return;
1731 case Ain_Alu64M:
1732 mapRegs_AMD64RI(m, i->Ain.Alu64M.src);
1733 mapRegs_AMD64AMode(m, i->Ain.Alu64M.dst);
1734 return;
1735 case Ain_Sh64:
1736 mapReg(m, &i->Ain.Sh64.dst);
1737 return;
1738 case Ain_Test64:
1739 mapReg(m, &i->Ain.Test64.dst);
1740 return;
1741 case Ain_Unary64:
1742 mapReg(m, &i->Ain.Unary64.dst);
1743 return;
1744 case Ain_Lea64:
1745 mapRegs_AMD64AMode(m, i->Ain.Lea64.am);
1746 mapReg(m, &i->Ain.Lea64.dst);
1747 return;
1748 case Ain_Alu32R:
1749 mapRegs_AMD64RMI(m, i->Ain.Alu32R.src);
1750 mapReg(m, &i->Ain.Alu32R.dst);
1751 return;
1752 case Ain_MulL:
1753 mapRegs_AMD64RM(m, i->Ain.MulL.src);
1754 return;
1755 case Ain_Div:
1756 mapRegs_AMD64RM(m, i->Ain.Div.src);
1757 return;
1758 case Ain_Push:
1759 mapRegs_AMD64RMI(m, i->Ain.Push.src);
1760 return;
1761 case Ain_Call:
1762 return;
1763 case Ain_XDirect:
1764 mapRegs_AMD64AMode(m, i->Ain.XDirect.amRIP);
1765 return;
1766 case Ain_XIndir:
1767 mapReg(m, &i->Ain.XIndir.dstGA);
1768 mapRegs_AMD64AMode(m, i->Ain.XIndir.amRIP);
1769 return;
1770 case Ain_XAssisted:
1771 mapReg(m, &i->Ain.XAssisted.dstGA);
1772 mapRegs_AMD64AMode(m, i->Ain.XAssisted.amRIP);
1773 return;
1774 case Ain_CMov64:
1775 mapReg(m, &i->Ain.CMov64.src);
1776 mapReg(m, &i->Ain.CMov64.dst);
1777 return;
1778 case Ain_CLoad:
1779 mapRegs_AMD64AMode(m, i->Ain.CLoad.addr);
1780 mapReg(m, &i->Ain.CLoad.dst);
1781 return;
1782 case Ain_CStore:
1783 mapRegs_AMD64AMode(m, i->Ain.CStore.addr);
1784 mapReg(m, &i->Ain.CStore.src);
1785 return;
1786 case Ain_MovxLQ:
1787 mapReg(m, &i->Ain.MovxLQ.src);
1788 mapReg(m, &i->Ain.MovxLQ.dst);
1789 return;
1790 case Ain_LoadEX:
1791 mapRegs_AMD64AMode(m, i->Ain.LoadEX.src);
1792 mapReg(m, &i->Ain.LoadEX.dst);
1793 return;
1794 case Ain_Store:
1795 mapReg(m, &i->Ain.Store.src);
1796 mapRegs_AMD64AMode(m, i->Ain.Store.dst);
1797 return;
1798 case Ain_Set64:
1799 mapReg(m, &i->Ain.Set64.dst);
1800 return;
1801 case Ain_Bsfr64:
1802 mapReg(m, &i->Ain.Bsfr64.src);
1803 mapReg(m, &i->Ain.Bsfr64.dst);
1804 return;
1805 case Ain_MFence:
1806 return;
1807 case Ain_ACAS:
1808 mapRegs_AMD64AMode(m, i->Ain.ACAS.addr);
1809 return;
1810 case Ain_DACAS:
1811 mapRegs_AMD64AMode(m, i->Ain.DACAS.addr);
1812 return;
1813 case Ain_A87Free:
1814 return;
1815 case Ain_A87PushPop:
1816 mapRegs_AMD64AMode(m, i->Ain.A87PushPop.addr);
1817 return;
1818 case Ain_A87FpOp:
1819 return;
1820 case Ain_A87LdCW:
1821 mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr);
1822 return;
1823 case Ain_A87StSW:
1824 mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr);
1825 return;
1826 case Ain_LdMXCSR:
1827 mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr);
1828 return;
1829 case Ain_SseUComIS:
1830 mapReg(m, &i->Ain.SseUComIS.srcL);
1831 mapReg(m, &i->Ain.SseUComIS.srcR);
1832 mapReg(m, &i->Ain.SseUComIS.dst);
1833 return;
1834 case Ain_SseSI2SF:
1835 mapReg(m, &i->Ain.SseSI2SF.src);
1836 mapReg(m, &i->Ain.SseSI2SF.dst);
1837 return;
1838 case Ain_SseSF2SI:
1839 mapReg(m, &i->Ain.SseSF2SI.src);
1840 mapReg(m, &i->Ain.SseSF2SI.dst);
1841 return;
1842 case Ain_SseSDSS:
1843 mapReg(m, &i->Ain.SseSDSS.src);
1844 mapReg(m, &i->Ain.SseSDSS.dst);
1845 return;
1846 case Ain_SseLdSt:
1847 mapReg(m, &i->Ain.SseLdSt.reg);
1848 mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr);
1849 break;
1850 case Ain_SseCStore:
1851 mapRegs_AMD64AMode(m, i->Ain.SseCStore.addr);
1852 mapReg(m, &i->Ain.SseCStore.src);
1853 return;
1854 case Ain_SseCLoad:
1855 mapRegs_AMD64AMode(m, i->Ain.SseCLoad.addr);
1856 mapReg(m, &i->Ain.SseCLoad.dst);
1857 return;
1858 case Ain_SseLdzLO:
1859 mapReg(m, &i->Ain.SseLdzLO.reg);
1860 mapRegs_AMD64AMode(m, i->Ain.SseLdzLO.addr);
1861 break;
1862 case Ain_Sse32Fx4:
1863 mapReg(m, &i->Ain.Sse32Fx4.src);
1864 mapReg(m, &i->Ain.Sse32Fx4.dst);
1865 return;
1866 case Ain_Sse32FLo:
1867 mapReg(m, &i->Ain.Sse32FLo.src);
1868 mapReg(m, &i->Ain.Sse32FLo.dst);
1869 return;
1870 case Ain_Sse64Fx2:
1871 mapReg(m, &i->Ain.Sse64Fx2.src);
1872 mapReg(m, &i->Ain.Sse64Fx2.dst);
1873 return;
1874 case Ain_Sse64FLo:
1875 mapReg(m, &i->Ain.Sse64FLo.src);
1876 mapReg(m, &i->Ain.Sse64FLo.dst);
1877 return;
1878 case Ain_SseReRg:
1879 mapReg(m, &i->Ain.SseReRg.src);
1880 mapReg(m, &i->Ain.SseReRg.dst);
1881 return;
1882 case Ain_SseCMov:
1883 mapReg(m, &i->Ain.SseCMov.src);
1884 mapReg(m, &i->Ain.SseCMov.dst);
1885 return;
1886 case Ain_SseShuf:
1887 mapReg(m, &i->Ain.SseShuf.src);
1888 mapReg(m, &i->Ain.SseShuf.dst);
1889 return;
1890 //uu case Ain_AvxLdSt:
1891 //uu mapReg(m, &i->Ain.AvxLdSt.reg);
1892 //uu mapRegs_AMD64AMode(m, i->Ain.AvxLdSt.addr);
1893 //uu break;
1894 //uu case Ain_AvxReRg:
1895 //uu mapReg(m, &i->Ain.AvxReRg.src);
1896 //uu mapReg(m, &i->Ain.AvxReRg.dst);
1897 //uu return;
1898 case Ain_EvCheck:
1899 /* We expect both amodes only to mention %rbp, so this is in
1900 fact pointless, since %rbp isn't allocatable, but anyway.. */
1901 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amCounter);
1902 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amFailAddr);
1903 return;
1904 case Ain_ProfInc:
1905 /* hardwires r11 -- nothing to modify. */
1906 return;
1907 default:
1908 ppAMD64Instr(i, mode64);
1909 vpanic("mapRegs_AMD64Instr");
1910 }
1911 }
1912
1913 /* Figure out if i represents a reg-reg move, and if so assign the
1914 source and destination to *src and *dst. If in doubt say No. Used
1915 by the register allocator to do move coalescing.
1916 */
isMove_AMD64Instr(const AMD64Instr * i,HReg * src,HReg * dst)1917 Bool isMove_AMD64Instr ( const AMD64Instr* i, HReg* src, HReg* dst )
1918 {
1919 switch (i->tag) {
1920 case Ain_Alu64R:
1921 /* Moves between integer regs */
1922 if (i->Ain.Alu64R.op != Aalu_MOV)
1923 return False;
1924 if (i->Ain.Alu64R.src->tag != Armi_Reg)
1925 return False;
1926 *src = i->Ain.Alu64R.src->Armi.Reg.reg;
1927 *dst = i->Ain.Alu64R.dst;
1928 return True;
1929 case Ain_SseReRg:
1930 /* Moves between SSE regs */
1931 if (i->Ain.SseReRg.op != Asse_MOV)
1932 return False;
1933 *src = i->Ain.SseReRg.src;
1934 *dst = i->Ain.SseReRg.dst;
1935 return True;
1936 //uu case Ain_AvxReRg:
1937 //uu /* Moves between AVX regs */
1938 //uu if (i->Ain.AvxReRg.op != Asse_MOV)
1939 //uu return False;
1940 //uu *src = i->Ain.AvxReRg.src;
1941 //uu *dst = i->Ain.AvxReRg.dst;
1942 //uu return True;
1943 default:
1944 return False;
1945 }
1946 /*NOTREACHED*/
1947 }
1948
1949
1950 /* Generate amd64 spill/reload instructions under the direction of the
1951 register allocator. Note it's critical these don't write the
1952 condition codes. */
1953
genSpill_AMD64(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)1954 void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1955 HReg rreg, Int offsetB, Bool mode64 )
1956 {
1957 AMD64AMode* am;
1958 vassert(offsetB >= 0);
1959 vassert(!hregIsVirtual(rreg));
1960 vassert(mode64 == True);
1961 *i1 = *i2 = NULL;
1962 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
1963 switch (hregClass(rreg)) {
1964 case HRcInt64:
1965 *i1 = AMD64Instr_Alu64M ( Aalu_MOV, AMD64RI_Reg(rreg), am );
1966 return;
1967 case HRcVec128:
1968 *i1 = AMD64Instr_SseLdSt ( False/*store*/, 16, rreg, am );
1969 return;
1970 default:
1971 ppHRegClass(hregClass(rreg));
1972 vpanic("genSpill_AMD64: unimplemented regclass");
1973 }
1974 }
1975
genReload_AMD64(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)1976 void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1977 HReg rreg, Int offsetB, Bool mode64 )
1978 {
1979 AMD64AMode* am;
1980 vassert(offsetB >= 0);
1981 vassert(!hregIsVirtual(rreg));
1982 vassert(mode64 == True);
1983 *i1 = *i2 = NULL;
1984 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
1985 switch (hregClass(rreg)) {
1986 case HRcInt64:
1987 *i1 = AMD64Instr_Alu64R ( Aalu_MOV, AMD64RMI_Mem(am), rreg );
1988 return;
1989 case HRcVec128:
1990 *i1 = AMD64Instr_SseLdSt ( True/*load*/, 16, rreg, am );
1991 return;
1992 default:
1993 ppHRegClass(hregClass(rreg));
1994 vpanic("genReload_AMD64: unimplemented regclass");
1995 }
1996 }
1997
1998
1999 /* --------- The amd64 assembler (bleh.) --------- */
2000
2001 /* Produce the low three bits of an integer register number. */
iregEnc210(HReg r)2002 inline static UInt iregEnc210 ( HReg r )
2003 {
2004 UInt n;
2005 vassert(hregClass(r) == HRcInt64);
2006 vassert(!hregIsVirtual(r));
2007 n = hregEncoding(r);
2008 vassert(n <= 15);
2009 return n & 7;
2010 }
2011
2012 /* Produce bit 3 of an integer register number. */
iregEnc3(HReg r)2013 inline static UInt iregEnc3 ( HReg r )
2014 {
2015 UInt n;
2016 vassert(hregClass(r) == HRcInt64);
2017 vassert(!hregIsVirtual(r));
2018 n = hregEncoding(r);
2019 vassert(n <= 15);
2020 return (n >> 3) & 1;
2021 }
2022
2023 /* Produce a complete 4-bit integer register number. */
iregEnc3210(HReg r)2024 inline static UInt iregEnc3210 ( HReg r )
2025 {
2026 UInt n;
2027 vassert(hregClass(r) == HRcInt64);
2028 vassert(!hregIsVirtual(r));
2029 n = hregEncoding(r);
2030 vassert(n <= 15);
2031 return n;
2032 }
2033
2034 /* Produce a complete 4-bit integer register number. */
vregEnc3210(HReg r)2035 inline static UInt vregEnc3210 ( HReg r )
2036 {
2037 UInt n;
2038 vassert(hregClass(r) == HRcVec128);
2039 vassert(!hregIsVirtual(r));
2040 n = hregEncoding(r);
2041 vassert(n <= 15);
2042 return n;
2043 }
2044
mkModRegRM(UInt mod,UInt reg,UInt regmem)2045 inline static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem )
2046 {
2047 vassert(mod < 4);
2048 vassert((reg|regmem) < 8);
2049 return (UChar)( ((mod & 3) << 6) | ((reg & 7) << 3) | (regmem & 7) );
2050 }
2051
mkSIB(UInt shift,UInt regindex,UInt regbase)2052 inline static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase )
2053 {
2054 vassert(shift < 4);
2055 vassert((regindex|regbase) < 8);
2056 return (UChar)( ((shift & 3) << 6) | ((regindex & 7) << 3) | (regbase & 7) );
2057 }
2058
emit32(UChar * p,UInt w32)2059 static UChar* emit32 ( UChar* p, UInt w32 )
2060 {
2061 *p++ = toUChar((w32) & 0x000000FF);
2062 *p++ = toUChar((w32 >> 8) & 0x000000FF);
2063 *p++ = toUChar((w32 >> 16) & 0x000000FF);
2064 *p++ = toUChar((w32 >> 24) & 0x000000FF);
2065 return p;
2066 }
2067
emit64(UChar * p,ULong w64)2068 static UChar* emit64 ( UChar* p, ULong w64 )
2069 {
2070 p = emit32(p, toUInt(w64 & 0xFFFFFFFF));
2071 p = emit32(p, toUInt((w64 >> 32) & 0xFFFFFFFF));
2072 return p;
2073 }
2074
2075 /* Does a sign-extend of the lowest 8 bits give
2076 the original number? */
fits8bits(UInt w32)2077 static Bool fits8bits ( UInt w32 )
2078 {
2079 Int i32 = (Int)w32;
2080 return toBool(i32 == ((Int)(w32 << 24) >> 24));
2081 }
2082 /* Can the lower 32 bits be signedly widened to produce the whole
2083 64-bit value? In other words, are the top 33 bits either all 0 or
2084 all 1 ? */
fitsIn32Bits(ULong x)2085 static Bool fitsIn32Bits ( ULong x )
2086 {
2087 Long y1;
2088 y1 = x << 32;
2089 y1 >>=/*s*/ 32;
2090 return toBool(x == y1);
2091 }
2092
2093
2094 /* Forming mod-reg-rm bytes and scale-index-base bytes.
2095
2096 greg, 0(ereg) | ereg is not any of: RSP RBP R12 R13
2097 = 00 greg ereg
2098
2099 greg, d8(ereg) | ereg is neither of: RSP R12
2100 = 01 greg ereg, d8
2101
2102 greg, d32(ereg) | ereg is neither of: RSP R12
2103 = 10 greg ereg, d32
2104
2105 greg, d8(ereg) | ereg is either: RSP R12
2106 = 01 greg 100, 0x24, d8
2107 (lowest bit of rex distinguishes R12/RSP)
2108
2109 greg, d32(ereg) | ereg is either: RSP R12
2110 = 10 greg 100, 0x24, d32
2111 (lowest bit of rex distinguishes R12/RSP)
2112
2113 -----------------------------------------------
2114
2115 greg, d8(base,index,scale)
2116 | index != RSP
2117 = 01 greg 100, scale index base, d8
2118
2119 greg, d32(base,index,scale)
2120 | index != RSP
2121 = 10 greg 100, scale index base, d32
2122 */
doAMode_M__wrk(UChar * p,UInt gregEnc3210,AMD64AMode * am)2123 static UChar* doAMode_M__wrk ( UChar* p, UInt gregEnc3210, AMD64AMode* am )
2124 {
2125 UInt gregEnc210 = gregEnc3210 & 7;
2126 if (am->tag == Aam_IR) {
2127 if (am->Aam.IR.imm == 0
2128 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2129 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RBP())
2130 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2131 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R13())
2132 ) {
2133 *p++ = mkModRegRM(0, gregEnc210, iregEnc210(am->Aam.IR.reg));
2134 return p;
2135 }
2136 if (fits8bits(am->Aam.IR.imm)
2137 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2138 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2139 ) {
2140 *p++ = mkModRegRM(1, gregEnc210, iregEnc210(am->Aam.IR.reg));
2141 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
2142 return p;
2143 }
2144 if (! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2145 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2146 ) {
2147 *p++ = mkModRegRM(2, gregEnc210, iregEnc210(am->Aam.IR.reg));
2148 p = emit32(p, am->Aam.IR.imm);
2149 return p;
2150 }
2151 if ((sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2152 || sameHReg(am->Aam.IR.reg, hregAMD64_R12()))
2153 && fits8bits(am->Aam.IR.imm)) {
2154 *p++ = mkModRegRM(1, gregEnc210, 4);
2155 *p++ = 0x24;
2156 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
2157 return p;
2158 }
2159 if (/* (sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2160 || wait for test case for RSP case */
2161 sameHReg(am->Aam.IR.reg, hregAMD64_R12())) {
2162 *p++ = mkModRegRM(2, gregEnc210, 4);
2163 *p++ = 0x24;
2164 p = emit32(p, am->Aam.IR.imm);
2165 return p;
2166 }
2167 ppAMD64AMode(am);
2168 vpanic("doAMode_M: can't emit amode IR");
2169 /*NOTREACHED*/
2170 }
2171 if (am->tag == Aam_IRRS) {
2172 if (fits8bits(am->Aam.IRRS.imm)
2173 && ! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) {
2174 *p++ = mkModRegRM(1, gregEnc210, 4);
2175 *p++ = mkSIB(am->Aam.IRRS.shift, iregEnc210(am->Aam.IRRS.index),
2176 iregEnc210(am->Aam.IRRS.base));
2177 *p++ = toUChar(am->Aam.IRRS.imm & 0xFF);
2178 return p;
2179 }
2180 if (! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) {
2181 *p++ = mkModRegRM(2, gregEnc210, 4);
2182 *p++ = mkSIB(am->Aam.IRRS.shift, iregEnc210(am->Aam.IRRS.index),
2183 iregEnc210(am->Aam.IRRS.base));
2184 p = emit32(p, am->Aam.IRRS.imm);
2185 return p;
2186 }
2187 ppAMD64AMode(am);
2188 vpanic("doAMode_M: can't emit amode IRRS");
2189 /*NOTREACHED*/
2190 }
2191 vpanic("doAMode_M: unknown amode");
2192 /*NOTREACHED*/
2193 }
2194
doAMode_M(UChar * p,HReg greg,AMD64AMode * am)2195 static UChar* doAMode_M ( UChar* p, HReg greg, AMD64AMode* am )
2196 {
2197 return doAMode_M__wrk(p, iregEnc3210(greg), am);
2198 }
2199
doAMode_M_enc(UChar * p,UInt gregEnc3210,AMD64AMode * am)2200 static UChar* doAMode_M_enc ( UChar* p, UInt gregEnc3210, AMD64AMode* am )
2201 {
2202 vassert(gregEnc3210 < 16);
2203 return doAMode_M__wrk(p, gregEnc3210, am);
2204 }
2205
2206
2207 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
2208 inline
doAMode_R__wrk(UChar * p,UInt gregEnc3210,UInt eregEnc3210)2209 static UChar* doAMode_R__wrk ( UChar* p, UInt gregEnc3210, UInt eregEnc3210 )
2210 {
2211 *p++ = mkModRegRM(3, gregEnc3210 & 7, eregEnc3210 & 7);
2212 return p;
2213 }
2214
doAMode_R(UChar * p,HReg greg,HReg ereg)2215 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
2216 {
2217 return doAMode_R__wrk(p, iregEnc3210(greg), iregEnc3210(ereg));
2218 }
2219
doAMode_R_enc_reg(UChar * p,UInt gregEnc3210,HReg ereg)2220 static UChar* doAMode_R_enc_reg ( UChar* p, UInt gregEnc3210, HReg ereg )
2221 {
2222 vassert(gregEnc3210 < 16);
2223 return doAMode_R__wrk(p, gregEnc3210, iregEnc3210(ereg));
2224 }
2225
doAMode_R_reg_enc(UChar * p,HReg greg,UInt eregEnc3210)2226 static UChar* doAMode_R_reg_enc ( UChar* p, HReg greg, UInt eregEnc3210 )
2227 {
2228 vassert(eregEnc3210 < 16);
2229 return doAMode_R__wrk(p, iregEnc3210(greg), eregEnc3210);
2230 }
2231
doAMode_R_enc_enc(UChar * p,UInt gregEnc3210,UInt eregEnc3210)2232 static UChar* doAMode_R_enc_enc ( UChar* p, UInt gregEnc3210, UInt eregEnc3210 )
2233 {
2234 vassert( (gregEnc3210|eregEnc3210) < 16);
2235 return doAMode_R__wrk(p, gregEnc3210, eregEnc3210);
2236 }
2237
2238
2239 /* Clear the W bit on a REX byte, thereby changing the operand size
2240 back to whatever that instruction's default operand size is. */
clearWBit(UChar rex)2241 static inline UChar clearWBit ( UChar rex )
2242 {
2243 return rex & ~(1<<3);
2244 }
2245
2246
2247 /* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */
rexAMode_M__wrk(UInt gregEnc3210,AMD64AMode * am)2248 inline static UChar rexAMode_M__wrk ( UInt gregEnc3210, AMD64AMode* am )
2249 {
2250 if (am->tag == Aam_IR) {
2251 UChar W = 1; /* we want 64-bit mode */
2252 UChar R = (gregEnc3210 >> 3) & 1;
2253 UChar X = 0; /* not relevant */
2254 UChar B = iregEnc3(am->Aam.IR.reg);
2255 return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
2256 }
2257 if (am->tag == Aam_IRRS) {
2258 UChar W = 1; /* we want 64-bit mode */
2259 UChar R = (gregEnc3210 >> 3) & 1;
2260 UChar X = iregEnc3(am->Aam.IRRS.index);
2261 UChar B = iregEnc3(am->Aam.IRRS.base);
2262 return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
2263 }
2264 vassert(0);
2265 return 0; /*NOTREACHED*/
2266 }
2267
rexAMode_M(HReg greg,AMD64AMode * am)2268 static UChar rexAMode_M ( HReg greg, AMD64AMode* am )
2269 {
2270 return rexAMode_M__wrk(iregEnc3210(greg), am);
2271 }
2272
rexAMode_M_enc(UInt gregEnc3210,AMD64AMode * am)2273 static UChar rexAMode_M_enc ( UInt gregEnc3210, AMD64AMode* am )
2274 {
2275 vassert(gregEnc3210 < 16);
2276 return rexAMode_M__wrk(gregEnc3210, am);
2277 }
2278
2279
2280 /* Make up a REX byte, with W=1 (size=64), for a (greg,ereg) pair. */
rexAMode_R__wrk(UInt gregEnc3210,UInt eregEnc3210)2281 inline static UChar rexAMode_R__wrk ( UInt gregEnc3210, UInt eregEnc3210 )
2282 {
2283 UChar W = 1; /* we want 64-bit mode */
2284 UChar R = (gregEnc3210 >> 3) & 1;
2285 UChar X = 0; /* not relevant */
2286 UChar B = (eregEnc3210 >> 3) & 1;
2287 return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
2288 }
2289
rexAMode_R(HReg greg,HReg ereg)2290 static UChar rexAMode_R ( HReg greg, HReg ereg )
2291 {
2292 return rexAMode_R__wrk(iregEnc3210(greg), iregEnc3210(ereg));
2293 }
2294
rexAMode_R_enc_reg(UInt gregEnc3210,HReg ereg)2295 static UChar rexAMode_R_enc_reg ( UInt gregEnc3210, HReg ereg )
2296 {
2297 vassert(gregEnc3210 < 16);
2298 return rexAMode_R__wrk(gregEnc3210, iregEnc3210(ereg));
2299 }
2300
rexAMode_R_reg_enc(HReg greg,UInt eregEnc3210)2301 static UChar rexAMode_R_reg_enc ( HReg greg, UInt eregEnc3210 )
2302 {
2303 vassert(eregEnc3210 < 16);
2304 return rexAMode_R__wrk(iregEnc3210(greg), eregEnc3210);
2305 }
2306
rexAMode_R_enc_enc(UInt gregEnc3210,UInt eregEnc3210)2307 static UChar rexAMode_R_enc_enc ( UInt gregEnc3210, UInt eregEnc3210 )
2308 {
2309 vassert((gregEnc3210|eregEnc3210) < 16);
2310 return rexAMode_R__wrk(gregEnc3210, eregEnc3210);
2311 }
2312
2313
2314 //uu /* May 2012: this VEX prefix stuff is currently unused, but has
2315 //uu verified correct (I reckon). Certainly it has been known to
2316 //uu produce correct VEX prefixes during testing. */
2317 //uu
2318 //uu /* Assemble a 2 or 3 byte VEX prefix from parts. rexR, rexX, rexB and
2319 //uu notVvvvv need to be not-ed before packing. mmmmm, rexW, L and pp go
2320 //uu in verbatim. There's no range checking on the bits. */
2321 //uu static UInt packVexPrefix ( UInt rexR, UInt rexX, UInt rexB,
2322 //uu UInt mmmmm, UInt rexW, UInt notVvvv,
2323 //uu UInt L, UInt pp )
2324 //uu {
2325 //uu UChar byte0 = 0;
2326 //uu UChar byte1 = 0;
2327 //uu UChar byte2 = 0;
2328 //uu if (rexX == 0 && rexB == 0 && mmmmm == 1 && rexW == 0) {
2329 //uu /* 2 byte encoding is possible. */
2330 //uu byte0 = 0xC5;
2331 //uu byte1 = ((rexR ^ 1) << 7) | ((notVvvv ^ 0xF) << 3)
2332 //uu | (L << 2) | pp;
2333 //uu } else {
2334 //uu /* 3 byte encoding is needed. */
2335 //uu byte0 = 0xC4;
2336 //uu byte1 = ((rexR ^ 1) << 7) | ((rexX ^ 1) << 6)
2337 //uu | ((rexB ^ 1) << 5) | mmmmm;
2338 //uu byte2 = (rexW << 7) | ((notVvvv ^ 0xF) << 3) | (L << 2) | pp;
2339 //uu }
2340 //uu return (((UInt)byte2) << 16) | (((UInt)byte1) << 8) | ((UInt)byte0);
2341 //uu }
2342 //uu
2343 //uu /* Make up a VEX prefix for a (greg,amode) pair. First byte in bits
2344 //uu 7:0 of result, second in 15:8, third (for a 3 byte prefix) in
2345 //uu 23:16. Has m-mmmm set to indicate a prefix of 0F, pp set to
2346 //uu indicate no SIMD prefix, W=0 (ignore), L=1 (size=256), and
2347 //uu vvvv=1111 (unused 3rd reg). */
2348 //uu static UInt vexAMode_M ( HReg greg, AMD64AMode* am )
2349 //uu {
2350 //uu UChar L = 1; /* size = 256 */
2351 //uu UChar pp = 0; /* no SIMD prefix */
2352 //uu UChar mmmmm = 1; /* 0F */
2353 //uu UChar notVvvv = 0; /* unused */
2354 //uu UChar rexW = 0;
2355 //uu UChar rexR = 0;
2356 //uu UChar rexX = 0;
2357 //uu UChar rexB = 0;
2358 //uu /* Same logic as in rexAMode_M. */
2359 //uu if (am->tag == Aam_IR) {
2360 //uu rexR = iregEnc3(greg);
2361 //uu rexX = 0; /* not relevant */
2362 //uu rexB = iregEnc3(am->Aam.IR.reg);
2363 //uu }
2364 //uu else if (am->tag == Aam_IRRS) {
2365 //uu rexR = iregEnc3(greg);
2366 //uu rexX = iregEnc3(am->Aam.IRRS.index);
2367 //uu rexB = iregEnc3(am->Aam.IRRS.base);
2368 //uu } else {
2369 //uu vassert(0);
2370 //uu }
2371 //uu return packVexPrefix( rexR, rexX, rexB, mmmmm, rexW, notVvvv, L, pp );
2372 //uu }
2373 //uu
2374 //uu static UChar* emitVexPrefix ( UChar* p, UInt vex )
2375 //uu {
2376 //uu switch (vex & 0xFF) {
2377 //uu case 0xC5:
2378 //uu *p++ = 0xC5;
2379 //uu *p++ = (vex >> 8) & 0xFF;
2380 //uu vassert(0 == (vex >> 16));
2381 //uu break;
2382 //uu case 0xC4:
2383 //uu *p++ = 0xC4;
2384 //uu *p++ = (vex >> 8) & 0xFF;
2385 //uu *p++ = (vex >> 16) & 0xFF;
2386 //uu vassert(0 == (vex >> 24));
2387 //uu break;
2388 //uu default:
2389 //uu vassert(0);
2390 //uu }
2391 //uu return p;
2392 //uu }
2393
2394
2395 /* Emit ffree %st(N) */
do_ffree_st(UChar * p,Int n)2396 static UChar* do_ffree_st ( UChar* p, Int n )
2397 {
2398 vassert(n >= 0 && n <= 7);
2399 *p++ = 0xDD;
2400 *p++ = toUChar(0xC0 + n);
2401 return p;
2402 }
2403
2404 /* Emit an instruction into buf and return the number of bytes used.
2405 Note that buf is not the insn's final place, and therefore it is
2406 imperative to emit position-independent code. If the emitted
2407 instruction was a profiler inc, set *is_profInc to True, else
2408 leave it unchanged. */
2409
emit_AMD64Instr(Bool * is_profInc,UChar * buf,Int nbuf,const AMD64Instr * i,Bool mode64,VexEndness endness_host,const void * disp_cp_chain_me_to_slowEP,const void * disp_cp_chain_me_to_fastEP,const void * disp_cp_xindir,const void * disp_cp_xassisted)2410 Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
2411 UChar* buf, Int nbuf, const AMD64Instr* i,
2412 Bool mode64, VexEndness endness_host,
2413 const void* disp_cp_chain_me_to_slowEP,
2414 const void* disp_cp_chain_me_to_fastEP,
2415 const void* disp_cp_xindir,
2416 const void* disp_cp_xassisted )
2417 {
2418 UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
2419 UInt xtra;
2420 UInt reg;
2421 UChar rex;
2422 UChar* p = &buf[0];
2423 UChar* ptmp;
2424 Int j;
2425 vassert(nbuf >= 64);
2426 vassert(mode64 == True);
2427
2428 /* vex_printf("asm "); ppAMD64Instr(i, mode64); vex_printf("\n"); */
2429
2430 switch (i->tag) {
2431
2432 case Ain_Imm64:
2433 if (i->Ain.Imm64.imm64 <= 0xFFFFFULL) {
2434 /* Use the short form (load into 32 bit reg, + default
2435 widening rule) for constants under 1 million. We could
2436 use this form for the range 0 to 0x7FFFFFFF inclusive, but
2437 limit it to a smaller range for verifiability purposes. */
2438 if (1 & iregEnc3(i->Ain.Imm64.dst))
2439 *p++ = 0x41;
2440 *p++ = 0xB8 + iregEnc210(i->Ain.Imm64.dst);
2441 p = emit32(p, (UInt)i->Ain.Imm64.imm64);
2442 } else {
2443 *p++ = toUChar(0x48 + (1 & iregEnc3(i->Ain.Imm64.dst)));
2444 *p++ = toUChar(0xB8 + iregEnc210(i->Ain.Imm64.dst));
2445 p = emit64(p, i->Ain.Imm64.imm64);
2446 }
2447 goto done;
2448
2449 case Ain_Alu64R:
2450 /* Deal specially with MOV */
2451 if (i->Ain.Alu64R.op == Aalu_MOV) {
2452 switch (i->Ain.Alu64R.src->tag) {
2453 case Armi_Imm:
2454 if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFFFF)) {
2455 /* Actually we could use this form for constants in
2456 the range 0 through 0x7FFFFFFF inclusive, but
2457 limit it to a small range for verifiability
2458 purposes. */
2459 /* Generate "movl $imm32, 32-bit-register" and let
2460 the default zero-extend rule cause the upper half
2461 of the dst to be zeroed out too. This saves 1
2462 and sometimes 2 bytes compared to the more
2463 obvious encoding in the 'else' branch. */
2464 if (1 & iregEnc3(i->Ain.Alu64R.dst))
2465 *p++ = 0x41;
2466 *p++ = 0xB8 + iregEnc210(i->Ain.Alu64R.dst);
2467 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2468 } else {
2469 *p++ = toUChar(0x48 + (1 & iregEnc3(i->Ain.Alu64R.dst)));
2470 *p++ = 0xC7;
2471 *p++ = toUChar(0xC0 + iregEnc210(i->Ain.Alu64R.dst));
2472 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2473 }
2474 goto done;
2475 case Armi_Reg:
2476 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2477 i->Ain.Alu64R.dst );
2478 *p++ = 0x89;
2479 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2480 i->Ain.Alu64R.dst);
2481 goto done;
2482 case Armi_Mem:
2483 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2484 i->Ain.Alu64R.src->Armi.Mem.am);
2485 *p++ = 0x8B;
2486 p = doAMode_M(p, i->Ain.Alu64R.dst,
2487 i->Ain.Alu64R.src->Armi.Mem.am);
2488 goto done;
2489 default:
2490 goto bad;
2491 }
2492 }
2493 /* MUL */
2494 if (i->Ain.Alu64R.op == Aalu_MUL) {
2495 switch (i->Ain.Alu64R.src->tag) {
2496 case Armi_Reg:
2497 *p++ = rexAMode_R( i->Ain.Alu64R.dst,
2498 i->Ain.Alu64R.src->Armi.Reg.reg);
2499 *p++ = 0x0F;
2500 *p++ = 0xAF;
2501 p = doAMode_R(p, i->Ain.Alu64R.dst,
2502 i->Ain.Alu64R.src->Armi.Reg.reg);
2503 goto done;
2504 case Armi_Mem:
2505 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2506 i->Ain.Alu64R.src->Armi.Mem.am);
2507 *p++ = 0x0F;
2508 *p++ = 0xAF;
2509 p = doAMode_M(p, i->Ain.Alu64R.dst,
2510 i->Ain.Alu64R.src->Armi.Mem.am);
2511 goto done;
2512 case Armi_Imm:
2513 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2514 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2515 *p++ = 0x6B;
2516 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2517 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
2518 } else {
2519 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2520 *p++ = 0x69;
2521 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2522 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2523 }
2524 goto done;
2525 default:
2526 goto bad;
2527 }
2528 }
2529 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2530 opc = opc_rr = subopc_imm = opc_imma = 0;
2531 switch (i->Ain.Alu64R.op) {
2532 case Aalu_ADC: opc = 0x13; opc_rr = 0x11;
2533 subopc_imm = 2; opc_imma = 0x15; break;
2534 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2535 subopc_imm = 0; opc_imma = 0x05; break;
2536 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2537 subopc_imm = 5; opc_imma = 0x2D; break;
2538 case Aalu_SBB: opc = 0x1B; opc_rr = 0x19;
2539 subopc_imm = 3; opc_imma = 0x1D; break;
2540 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2541 subopc_imm = 4; opc_imma = 0x25; break;
2542 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2543 subopc_imm = 6; opc_imma = 0x35; break;
2544 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2545 subopc_imm = 1; opc_imma = 0x0D; break;
2546 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2547 subopc_imm = 7; opc_imma = 0x3D; break;
2548 default: goto bad;
2549 }
2550 switch (i->Ain.Alu64R.src->tag) {
2551 case Armi_Imm:
2552 if (sameHReg(i->Ain.Alu64R.dst, hregAMD64_RAX())
2553 && !fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2554 goto bad; /* FIXME: awaiting test case */
2555 *p++ = toUChar(opc_imma);
2556 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2557 } else
2558 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2559 *p++ = rexAMode_R_enc_reg( 0, i->Ain.Alu64R.dst );
2560 *p++ = 0x83;
2561 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu64R.dst);
2562 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
2563 } else {
2564 *p++ = rexAMode_R_enc_reg( 0, i->Ain.Alu64R.dst);
2565 *p++ = 0x81;
2566 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu64R.dst);
2567 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2568 }
2569 goto done;
2570 case Armi_Reg:
2571 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2572 i->Ain.Alu64R.dst);
2573 *p++ = toUChar(opc_rr);
2574 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2575 i->Ain.Alu64R.dst);
2576 goto done;
2577 case Armi_Mem:
2578 *p++ = rexAMode_M( i->Ain.Alu64R.dst,
2579 i->Ain.Alu64R.src->Armi.Mem.am);
2580 *p++ = toUChar(opc);
2581 p = doAMode_M(p, i->Ain.Alu64R.dst,
2582 i->Ain.Alu64R.src->Armi.Mem.am);
2583 goto done;
2584 default:
2585 goto bad;
2586 }
2587 break;
2588
2589 case Ain_Alu64M:
2590 /* Deal specially with MOV */
2591 if (i->Ain.Alu64M.op == Aalu_MOV) {
2592 switch (i->Ain.Alu64M.src->tag) {
2593 case Ari_Reg:
2594 *p++ = rexAMode_M(i->Ain.Alu64M.src->Ari.Reg.reg,
2595 i->Ain.Alu64M.dst);
2596 *p++ = 0x89;
2597 p = doAMode_M(p, i->Ain.Alu64M.src->Ari.Reg.reg,
2598 i->Ain.Alu64M.dst);
2599 goto done;
2600 case Ari_Imm:
2601 *p++ = rexAMode_M_enc(0, i->Ain.Alu64M.dst);
2602 *p++ = 0xC7;
2603 p = doAMode_M_enc(p, 0, i->Ain.Alu64M.dst);
2604 p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
2605 goto done;
2606 default:
2607 goto bad;
2608 }
2609 }
2610 break;
2611
2612 case Ain_Sh64:
2613 opc_cl = opc_imm = subopc = 0;
2614 switch (i->Ain.Sh64.op) {
2615 case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
2616 case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
2617 case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
2618 default: goto bad;
2619 }
2620 if (i->Ain.Sh64.src == 0) {
2621 *p++ = rexAMode_R_enc_reg(0, i->Ain.Sh64.dst);
2622 *p++ = toUChar(opc_cl);
2623 p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh64.dst);
2624 goto done;
2625 } else {
2626 *p++ = rexAMode_R_enc_reg(0, i->Ain.Sh64.dst);
2627 *p++ = toUChar(opc_imm);
2628 p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh64.dst);
2629 *p++ = (UChar)(i->Ain.Sh64.src);
2630 goto done;
2631 }
2632 break;
2633
2634 case Ain_Test64:
2635 /* testq sign-extend($imm32), %reg */
2636 *p++ = rexAMode_R_enc_reg(0, i->Ain.Test64.dst);
2637 *p++ = 0xF7;
2638 p = doAMode_R_enc_reg(p, 0, i->Ain.Test64.dst);
2639 p = emit32(p, i->Ain.Test64.imm32);
2640 goto done;
2641
2642 case Ain_Unary64:
2643 if (i->Ain.Unary64.op == Aun_NOT) {
2644 *p++ = rexAMode_R_enc_reg(0, i->Ain.Unary64.dst);
2645 *p++ = 0xF7;
2646 p = doAMode_R_enc_reg(p, 2, i->Ain.Unary64.dst);
2647 goto done;
2648 }
2649 if (i->Ain.Unary64.op == Aun_NEG) {
2650 *p++ = rexAMode_R_enc_reg(0, i->Ain.Unary64.dst);
2651 *p++ = 0xF7;
2652 p = doAMode_R_enc_reg(p, 3, i->Ain.Unary64.dst);
2653 goto done;
2654 }
2655 break;
2656
2657 case Ain_Lea64:
2658 *p++ = rexAMode_M(i->Ain.Lea64.dst, i->Ain.Lea64.am);
2659 *p++ = 0x8D;
2660 p = doAMode_M(p, i->Ain.Lea64.dst, i->Ain.Lea64.am);
2661 goto done;
2662
2663 case Ain_Alu32R:
2664 /* ADD/SUB/AND/OR/XOR/CMP */
2665 opc = opc_rr = subopc_imm = opc_imma = 0;
2666 switch (i->Ain.Alu32R.op) {
2667 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2668 subopc_imm = 0; opc_imma = 0x05; break;
2669 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2670 subopc_imm = 5; opc_imma = 0x2D; break;
2671 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2672 subopc_imm = 4; opc_imma = 0x25; break;
2673 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2674 subopc_imm = 6; opc_imma = 0x35; break;
2675 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2676 subopc_imm = 1; opc_imma = 0x0D; break;
2677 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2678 subopc_imm = 7; opc_imma = 0x3D; break;
2679 default: goto bad;
2680 }
2681 switch (i->Ain.Alu32R.src->tag) {
2682 case Armi_Imm:
2683 if (sameHReg(i->Ain.Alu32R.dst, hregAMD64_RAX())
2684 && !fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
2685 goto bad; /* FIXME: awaiting test case */
2686 *p++ = toUChar(opc_imma);
2687 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
2688 } else
2689 if (fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
2690 rex = clearWBit( rexAMode_R_enc_reg( 0, i->Ain.Alu32R.dst ) );
2691 if (rex != 0x40) *p++ = rex;
2692 *p++ = 0x83;
2693 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu32R.dst);
2694 *p++ = toUChar(0xFF & i->Ain.Alu32R.src->Armi.Imm.imm32);
2695 } else {
2696 rex = clearWBit( rexAMode_R_enc_reg( 0, i->Ain.Alu32R.dst) );
2697 if (rex != 0x40) *p++ = rex;
2698 *p++ = 0x81;
2699 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu32R.dst);
2700 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
2701 }
2702 goto done;
2703 case Armi_Reg:
2704 rex = clearWBit(
2705 rexAMode_R( i->Ain.Alu32R.src->Armi.Reg.reg,
2706 i->Ain.Alu32R.dst) );
2707 if (rex != 0x40) *p++ = rex;
2708 *p++ = toUChar(opc_rr);
2709 p = doAMode_R(p, i->Ain.Alu32R.src->Armi.Reg.reg,
2710 i->Ain.Alu32R.dst);
2711 goto done;
2712 case Armi_Mem:
2713 rex = clearWBit(
2714 rexAMode_M( i->Ain.Alu32R.dst,
2715 i->Ain.Alu32R.src->Armi.Mem.am) );
2716 if (rex != 0x40) *p++ = rex;
2717 *p++ = toUChar(opc);
2718 p = doAMode_M(p, i->Ain.Alu32R.dst,
2719 i->Ain.Alu32R.src->Armi.Mem.am);
2720 goto done;
2721 default:
2722 goto bad;
2723 }
2724 break;
2725
2726 case Ain_MulL:
2727 subopc = i->Ain.MulL.syned ? 5 : 4;
2728 switch (i->Ain.MulL.src->tag) {
2729 case Arm_Mem:
2730 *p++ = rexAMode_M_enc(0, i->Ain.MulL.src->Arm.Mem.am);
2731 *p++ = 0xF7;
2732 p = doAMode_M_enc(p, subopc, i->Ain.MulL.src->Arm.Mem.am);
2733 goto done;
2734 case Arm_Reg:
2735 *p++ = rexAMode_R_enc_reg(0, i->Ain.MulL.src->Arm.Reg.reg);
2736 *p++ = 0xF7;
2737 p = doAMode_R_enc_reg(p, subopc, i->Ain.MulL.src->Arm.Reg.reg);
2738 goto done;
2739 default:
2740 goto bad;
2741 }
2742 break;
2743
2744 case Ain_Div:
2745 subopc = i->Ain.Div.syned ? 7 : 6;
2746 if (i->Ain.Div.sz == 4) {
2747 switch (i->Ain.Div.src->tag) {
2748 case Arm_Mem:
2749 goto bad;
2750 /*FIXME*/
2751 *p++ = 0xF7;
2752 p = doAMode_M_enc(p, subopc, i->Ain.Div.src->Arm.Mem.am);
2753 goto done;
2754 case Arm_Reg:
2755 *p++ = clearWBit(
2756 rexAMode_R_enc_reg(0, i->Ain.Div.src->Arm.Reg.reg));
2757 *p++ = 0xF7;
2758 p = doAMode_R_enc_reg(p, subopc, i->Ain.Div.src->Arm.Reg.reg);
2759 goto done;
2760 default:
2761 goto bad;
2762 }
2763 }
2764 if (i->Ain.Div.sz == 8) {
2765 switch (i->Ain.Div.src->tag) {
2766 case Arm_Mem:
2767 *p++ = rexAMode_M_enc(0, i->Ain.Div.src->Arm.Mem.am);
2768 *p++ = 0xF7;
2769 p = doAMode_M_enc(p, subopc, i->Ain.Div.src->Arm.Mem.am);
2770 goto done;
2771 case Arm_Reg:
2772 *p++ = rexAMode_R_enc_reg(0, i->Ain.Div.src->Arm.Reg.reg);
2773 *p++ = 0xF7;
2774 p = doAMode_R_enc_reg(p, subopc, i->Ain.Div.src->Arm.Reg.reg);
2775 goto done;
2776 default:
2777 goto bad;
2778 }
2779 }
2780 break;
2781
2782 case Ain_Push:
2783 switch (i->Ain.Push.src->tag) {
2784 case Armi_Mem:
2785 *p++ = clearWBit(
2786 rexAMode_M_enc(0, i->Ain.Push.src->Armi.Mem.am));
2787 *p++ = 0xFF;
2788 p = doAMode_M_enc(p, 6, i->Ain.Push.src->Armi.Mem.am);
2789 goto done;
2790 case Armi_Imm:
2791 *p++ = 0x68;
2792 p = emit32(p, i->Ain.Push.src->Armi.Imm.imm32);
2793 goto done;
2794 case Armi_Reg:
2795 *p++ = toUChar(0x40 + (1 & iregEnc3(i->Ain.Push.src->Armi.Reg.reg)));
2796 *p++ = toUChar(0x50 + iregEnc210(i->Ain.Push.src->Armi.Reg.reg));
2797 goto done;
2798 default:
2799 goto bad;
2800 }
2801
2802 case Ain_Call: {
2803 /* As per detailed comment for Ain_Call in getRegUsage_AMD64Instr
2804 above, %r11 is used as an address temporary. */
2805 /* If we don't need to do any fixup actions in the case that the
2806 call doesn't happen, just do the simple thing and emit
2807 straight-line code. This is usually the case. */
2808 if (i->Ain.Call.cond == Acc_ALWAYS/*call always happens*/
2809 || i->Ain.Call.rloc.pri == RLPri_None/*no fixup action*/) {
2810 /* jump over the following two insns if the condition does
2811 not hold */
2812 Bool shortImm = fitsIn32Bits(i->Ain.Call.target);
2813 if (i->Ain.Call.cond != Acc_ALWAYS) {
2814 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
2815 *p++ = shortImm ? 10 : 13;
2816 /* 10 or 13 bytes in the next two insns */
2817 }
2818 if (shortImm) {
2819 /* 7 bytes: movl sign-extend(imm32), %r11 */
2820 *p++ = 0x49;
2821 *p++ = 0xC7;
2822 *p++ = 0xC3;
2823 p = emit32(p, (UInt)i->Ain.Call.target);
2824 } else {
2825 /* 10 bytes: movabsq $target, %r11 */
2826 *p++ = 0x49;
2827 *p++ = 0xBB;
2828 p = emit64(p, i->Ain.Call.target);
2829 }
2830 /* 3 bytes: call *%r11 */
2831 *p++ = 0x41;
2832 *p++ = 0xFF;
2833 *p++ = 0xD3;
2834 } else {
2835 Int delta;
2836 /* Complex case. We have to generate an if-then-else diamond. */
2837 // before:
2838 // j{!cond} else:
2839 // movabsq $target, %r11
2840 // call* %r11
2841 // preElse:
2842 // jmp after:
2843 // else:
2844 // movabsq $0x5555555555555555, %rax // possibly
2845 // movq %rax, %rdx // possibly
2846 // after:
2847
2848 // before:
2849 UChar* pBefore = p;
2850
2851 // j{!cond} else:
2852 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
2853 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2854
2855 // movabsq $target, %r11
2856 *p++ = 0x49;
2857 *p++ = 0xBB;
2858 p = emit64(p, i->Ain.Call.target);
2859
2860 // call* %r11
2861 *p++ = 0x41;
2862 *p++ = 0xFF;
2863 *p++ = 0xD3;
2864
2865 // preElse:
2866 UChar* pPreElse = p;
2867
2868 // jmp after:
2869 *p++ = 0xEB;
2870 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2871
2872 // else:
2873 UChar* pElse = p;
2874
2875 /* Do the 'else' actions */
2876 switch (i->Ain.Call.rloc.pri) {
2877 case RLPri_Int:
2878 // movabsq $0x5555555555555555, %rax
2879 *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
2880 break;
2881 case RLPri_2Int:
2882 goto bad; //ATC
2883 // movabsq $0x5555555555555555, %rax
2884 *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
2885 // movq %rax, %rdx
2886 *p++ = 0x48; *p++ = 0x89; *p++ = 0xC2;
2887 break;
2888 case RLPri_V128SpRel:
2889 if (i->Ain.Call.rloc.spOff == 0) {
2890 // We could accept any |spOff| here, but that's more
2891 // hassle and the only value we're ever going to get
2892 // is zero (I believe.) Hence take the easy path :)
2893 // We need a scag register -- r11 can be it.
2894 // movabsq $0x5555555555555555, %r11
2895 *p++ = 0x49; *p++ = 0xBB;
2896 p = emit64(p, 0x5555555555555555ULL);
2897 // movq %r11, 0(%rsp)
2898 *p++ = 0x4C; *p++ = 0x89; *p++ = 0x1C; *p++ = 0x24;
2899 // movq %r11, 8(%rsp)
2900 *p++ = 0x4C; *p++ = 0x89; *p++ = 0x5C; *p++ = 0x24;
2901 *p++ = 0x08;
2902 break;
2903 }
2904 goto bad; //ATC for all other spOff values
2905 case RLPri_V256SpRel:
2906 goto bad; //ATC
2907 case RLPri_None: case RLPri_INVALID: default:
2908 vassert(0); // should never get here
2909 }
2910
2911 // after:
2912 UChar* pAfter = p;
2913
2914 // Fix up the branch offsets. The +2s in the offset
2915 // calculations are there because x86 requires conditional
2916 // branches to have their offset stated relative to the
2917 // instruction immediately following the branch insn. And in
2918 // both cases the branch insns are 2 bytes long.
2919
2920 // First, the "j{!cond} else:" at pBefore.
2921 delta = (Int)(Long)(pElse - (pBefore + 2));
2922 vassert(delta >= 0 && delta < 100/*arbitrary*/);
2923 *(pBefore+1) = (UChar)delta;
2924
2925 // And secondly, the "jmp after:" at pPreElse.
2926 delta = (Int)(Long)(pAfter - (pPreElse + 2));
2927 vassert(delta >= 0 && delta < 100/*arbitrary*/);
2928 *(pPreElse+1) = (UChar)delta;
2929 }
2930 goto done;
2931 }
2932
2933 case Ain_XDirect: {
2934 /* NB: what goes on here has to be very closely coordinated with the
2935 chainXDirect_AMD64 and unchainXDirect_AMD64 below. */
2936 /* We're generating chain-me requests here, so we need to be
2937 sure this is actually allowed -- no-redir translations can't
2938 use chain-me's. Hence: */
2939 vassert(disp_cp_chain_me_to_slowEP != NULL);
2940 vassert(disp_cp_chain_me_to_fastEP != NULL);
2941
2942 HReg r11 = hregAMD64_R11();
2943
2944 /* Use ptmp for backpatching conditional jumps. */
2945 ptmp = NULL;
2946
2947 /* First off, if this is conditional, create a conditional
2948 jump over the rest of it. */
2949 if (i->Ain.XDirect.cond != Acc_ALWAYS) {
2950 /* jmp fwds if !condition */
2951 *p++ = toUChar(0x70 + (0xF & (i->Ain.XDirect.cond ^ 1)));
2952 ptmp = p; /* fill in this bit later */
2953 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2954 }
2955
2956 /* Update the guest RIP. */
2957 if (fitsIn32Bits(i->Ain.XDirect.dstGA)) {
2958 /* use a shorter encoding */
2959 /* movl sign-extend(dstGA), %r11 */
2960 *p++ = 0x49;
2961 *p++ = 0xC7;
2962 *p++ = 0xC3;
2963 p = emit32(p, (UInt)i->Ain.XDirect.dstGA);
2964 } else {
2965 /* movabsq $dstGA, %r11 */
2966 *p++ = 0x49;
2967 *p++ = 0xBB;
2968 p = emit64(p, i->Ain.XDirect.dstGA);
2969 }
2970
2971 /* movq %r11, amRIP */
2972 *p++ = rexAMode_M(r11, i->Ain.XDirect.amRIP);
2973 *p++ = 0x89;
2974 p = doAMode_M(p, r11, i->Ain.XDirect.amRIP);
2975
2976 /* --- FIRST PATCHABLE BYTE follows --- */
2977 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
2978 to) backs up the return address, so as to find the address of
2979 the first patchable byte. So: don't change the length of the
2980 two instructions below. */
2981 /* movabsq $disp_cp_chain_me_to_{slow,fast}EP,%r11; */
2982 *p++ = 0x49;
2983 *p++ = 0xBB;
2984 const void* disp_cp_chain_me
2985 = i->Ain.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
2986 : disp_cp_chain_me_to_slowEP;
2987 p = emit64(p, (Addr)disp_cp_chain_me);
2988 /* call *%r11 */
2989 *p++ = 0x41;
2990 *p++ = 0xFF;
2991 *p++ = 0xD3;
2992 /* --- END of PATCHABLE BYTES --- */
2993
2994 /* Fix up the conditional jump, if there was one. */
2995 if (i->Ain.XDirect.cond != Acc_ALWAYS) {
2996 Int delta = p - ptmp;
2997 vassert(delta > 0 && delta < 40);
2998 *ptmp = toUChar(delta-1);
2999 }
3000 goto done;
3001 }
3002
3003 case Ain_XIndir: {
3004 /* We're generating transfers that could lead indirectly to a
3005 chain-me, so we need to be sure this is actually allowed --
3006 no-redir translations are not allowed to reach normal
3007 translations without going through the scheduler. That means
3008 no XDirects or XIndirs out from no-redir translations.
3009 Hence: */
3010 vassert(disp_cp_xindir != NULL);
3011
3012 /* Use ptmp for backpatching conditional jumps. */
3013 ptmp = NULL;
3014
3015 /* First off, if this is conditional, create a conditional
3016 jump over the rest of it. */
3017 if (i->Ain.XIndir.cond != Acc_ALWAYS) {
3018 /* jmp fwds if !condition */
3019 *p++ = toUChar(0x70 + (0xF & (i->Ain.XIndir.cond ^ 1)));
3020 ptmp = p; /* fill in this bit later */
3021 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3022 }
3023
3024 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
3025 *p++ = rexAMode_M(i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
3026 *p++ = 0x89;
3027 p = doAMode_M(p, i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
3028
3029 /* get $disp_cp_xindir into %r11 */
3030 if (fitsIn32Bits((Addr)disp_cp_xindir)) {
3031 /* use a shorter encoding */
3032 /* movl sign-extend(disp_cp_xindir), %r11 */
3033 *p++ = 0x49;
3034 *p++ = 0xC7;
3035 *p++ = 0xC3;
3036 p = emit32(p, (UInt)(Addr)disp_cp_xindir);
3037 } else {
3038 /* movabsq $disp_cp_xindir, %r11 */
3039 *p++ = 0x49;
3040 *p++ = 0xBB;
3041 p = emit64(p, (Addr)disp_cp_xindir);
3042 }
3043
3044 /* jmp *%r11 */
3045 *p++ = 0x41;
3046 *p++ = 0xFF;
3047 *p++ = 0xE3;
3048
3049 /* Fix up the conditional jump, if there was one. */
3050 if (i->Ain.XIndir.cond != Acc_ALWAYS) {
3051 Int delta = p - ptmp;
3052 vassert(delta > 0 && delta < 40);
3053 *ptmp = toUChar(delta-1);
3054 }
3055 goto done;
3056 }
3057
3058 case Ain_XAssisted: {
3059 /* Use ptmp for backpatching conditional jumps. */
3060 ptmp = NULL;
3061
3062 /* First off, if this is conditional, create a conditional
3063 jump over the rest of it. */
3064 if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
3065 /* jmp fwds if !condition */
3066 *p++ = toUChar(0x70 + (0xF & (i->Ain.XAssisted.cond ^ 1)));
3067 ptmp = p; /* fill in this bit later */
3068 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3069 }
3070
3071 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
3072 *p++ = rexAMode_M(i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
3073 *p++ = 0x89;
3074 p = doAMode_M(p, i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
3075 /* movl $magic_number, %ebp. Since these numbers are all small positive
3076 integers, we can get away with "movl $N, %ebp" rather than
3077 the longer "movq $N, %rbp". */
3078 UInt trcval = 0;
3079 switch (i->Ain.XAssisted.jk) {
3080 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
3081 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3082 case Ijk_Sys_int32: trcval = VEX_TRC_JMP_SYS_INT32; break;
3083 case Ijk_Sys_int210: trcval = VEX_TRC_JMP_SYS_INT210; break;
3084 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
3085 case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
3086 case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
3087 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
3088 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
3089 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
3090 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
3091 case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
3092 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
3093 /* We don't expect to see the following being assisted. */
3094 case Ijk_Ret:
3095 case Ijk_Call:
3096 /* fallthrough */
3097 default:
3098 ppIRJumpKind(i->Ain.XAssisted.jk);
3099 vpanic("emit_AMD64Instr.Ain_XAssisted: unexpected jump kind");
3100 }
3101 vassert(trcval != 0);
3102 *p++ = 0xBD;
3103 p = emit32(p, trcval);
3104 /* movabsq $disp_assisted, %r11 */
3105 *p++ = 0x49;
3106 *p++ = 0xBB;
3107 p = emit64(p, (Addr)disp_cp_xassisted);
3108 /* jmp *%r11 */
3109 *p++ = 0x41;
3110 *p++ = 0xFF;
3111 *p++ = 0xE3;
3112
3113 /* Fix up the conditional jump, if there was one. */
3114 if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
3115 Int delta = p - ptmp;
3116 vassert(delta > 0 && delta < 40);
3117 *ptmp = toUChar(delta-1);
3118 }
3119 goto done;
3120 }
3121
3122 case Ain_CMov64:
3123 vassert(i->Ain.CMov64.cond != Acc_ALWAYS);
3124 *p++ = rexAMode_R(i->Ain.CMov64.dst, i->Ain.CMov64.src);
3125 *p++ = 0x0F;
3126 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond));
3127 p = doAMode_R(p, i->Ain.CMov64.dst, i->Ain.CMov64.src);
3128 goto done;
3129
3130 case Ain_CLoad: {
3131 vassert(i->Ain.CLoad.cond != Acc_ALWAYS);
3132
3133 /* Only 32- or 64-bit variants are allowed. */
3134 vassert(i->Ain.CLoad.szB == 4 || i->Ain.CLoad.szB == 8);
3135
3136 /* Use ptmp for backpatching conditional jumps. */
3137 ptmp = NULL;
3138
3139 /* jmp fwds if !condition */
3140 *p++ = toUChar(0x70 + (0xF & (i->Ain.CLoad.cond ^ 1)));
3141 ptmp = p; /* fill in this bit later */
3142 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3143
3144 /* Now the load. Either a normal 64 bit load or a normal 32 bit
3145 load, which, by the default zero-extension rule, zeroes out
3146 the upper half of the destination, as required. */
3147 rex = rexAMode_M(i->Ain.CLoad.dst, i->Ain.CLoad.addr);
3148 *p++ = i->Ain.CLoad.szB == 4 ? clearWBit(rex) : rex;
3149 *p++ = 0x8B;
3150 p = doAMode_M(p, i->Ain.CLoad.dst, i->Ain.CLoad.addr);
3151
3152 /* Fix up the conditional branch */
3153 Int delta = p - ptmp;
3154 vassert(delta > 0 && delta < 40);
3155 *ptmp = toUChar(delta-1);
3156 goto done;
3157 }
3158
3159 case Ain_CStore: {
3160 /* AFAICS this is identical to Ain_CLoad except that the opcode
3161 is 0x89 instead of 0x8B. */
3162 vassert(i->Ain.CStore.cond != Acc_ALWAYS);
3163
3164 /* Only 32- or 64-bit variants are allowed. */
3165 vassert(i->Ain.CStore.szB == 4 || i->Ain.CStore.szB == 8);
3166
3167 /* Use ptmp for backpatching conditional jumps. */
3168 ptmp = NULL;
3169
3170 /* jmp fwds if !condition */
3171 *p++ = toUChar(0x70 + (0xF & (i->Ain.CStore.cond ^ 1)));
3172 ptmp = p; /* fill in this bit later */
3173 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3174
3175 /* Now the store. */
3176 rex = rexAMode_M(i->Ain.CStore.src, i->Ain.CStore.addr);
3177 *p++ = i->Ain.CStore.szB == 4 ? clearWBit(rex) : rex;
3178 *p++ = 0x89;
3179 p = doAMode_M(p, i->Ain.CStore.src, i->Ain.CStore.addr);
3180
3181 /* Fix up the conditional branch */
3182 Int delta = p - ptmp;
3183 vassert(delta > 0 && delta < 40);
3184 *ptmp = toUChar(delta-1);
3185 goto done;
3186 }
3187
3188 case Ain_MovxLQ:
3189 /* No, _don't_ ask me why the sense of the args has to be
3190 different in the S vs Z case. I don't know. */
3191 if (i->Ain.MovxLQ.syned) {
3192 /* Need REX.W = 1 here, but rexAMode_R does that for us. */
3193 *p++ = rexAMode_R(i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
3194 *p++ = 0x63;
3195 p = doAMode_R(p, i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
3196 } else {
3197 /* Produce a 32-bit reg-reg move, since the implicit
3198 zero-extend does what we want. */
3199 *p++ = clearWBit (
3200 rexAMode_R(i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst));
3201 *p++ = 0x89;
3202 p = doAMode_R(p, i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst);
3203 }
3204 goto done;
3205
3206 case Ain_LoadEX:
3207 if (i->Ain.LoadEX.szSmall == 1 && !i->Ain.LoadEX.syned) {
3208 /* movzbq */
3209 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3210 *p++ = 0x0F;
3211 *p++ = 0xB6;
3212 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3213 goto done;
3214 }
3215 if (i->Ain.LoadEX.szSmall == 2 && !i->Ain.LoadEX.syned) {
3216 /* movzwq */
3217 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3218 *p++ = 0x0F;
3219 *p++ = 0xB7;
3220 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3221 goto done;
3222 }
3223 if (i->Ain.LoadEX.szSmall == 4 && !i->Ain.LoadEX.syned) {
3224 /* movzlq */
3225 /* This isn't really an existing AMD64 instruction per se.
3226 Rather, we have to do a 32-bit load. Because a 32-bit
3227 write implicitly clears the upper 32 bits of the target
3228 register, we get what we want. */
3229 *p++ = clearWBit(
3230 rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src));
3231 *p++ = 0x8B;
3232 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3233 goto done;
3234 }
3235 break;
3236
3237 case Ain_Set64:
3238 /* Make the destination register be 1 or 0, depending on whether
3239 the relevant condition holds. Complication: the top 56 bits
3240 of the destination should be forced to zero, but doing 'xorq
3241 %r,%r' kills the flag(s) we are about to read. Sigh. So
3242 start off my moving $0 into the dest. */
3243 reg = iregEnc3210(i->Ain.Set64.dst);
3244 vassert(reg < 16);
3245
3246 /* movq $0, %dst */
3247 *p++ = toUChar(reg >= 8 ? 0x49 : 0x48);
3248 *p++ = 0xC7;
3249 *p++ = toUChar(0xC0 + (reg & 7));
3250 p = emit32(p, 0);
3251
3252 /* setb lo8(%dst) */
3253 /* note, 8-bit register rex trickyness. Be careful here. */
3254 *p++ = toUChar(reg >= 8 ? 0x41 : 0x40);
3255 *p++ = 0x0F;
3256 *p++ = toUChar(0x90 + (0x0F & i->Ain.Set64.cond));
3257 *p++ = toUChar(0xC0 + (reg & 7));
3258 goto done;
3259
3260 case Ain_Bsfr64:
3261 *p++ = rexAMode_R(i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
3262 *p++ = 0x0F;
3263 if (i->Ain.Bsfr64.isFwds) {
3264 *p++ = 0xBC;
3265 } else {
3266 *p++ = 0xBD;
3267 }
3268 p = doAMode_R(p, i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
3269 goto done;
3270
3271 case Ain_MFence:
3272 /* mfence */
3273 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
3274 goto done;
3275
3276 case Ain_ACAS:
3277 /* lock */
3278 *p++ = 0xF0;
3279 if (i->Ain.ACAS.sz == 2) *p++ = 0x66;
3280 /* cmpxchg{b,w,l,q} %rbx,mem. Expected-value in %rax, new value
3281 in %rbx. The new-value register is hardwired to be %rbx
3282 since dealing with byte integer registers is too much hassle,
3283 so we force the register operand to %rbx (could equally be
3284 %rcx or %rdx). */
3285 rex = rexAMode_M( hregAMD64_RBX(), i->Ain.ACAS.addr );
3286 if (i->Ain.ACAS.sz != 8)
3287 rex = clearWBit(rex);
3288
3289 *p++ = rex; /* this can emit 0x40, which is pointless. oh well. */
3290 *p++ = 0x0F;
3291 if (i->Ain.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
3292 p = doAMode_M(p, hregAMD64_RBX(), i->Ain.ACAS.addr);
3293 goto done;
3294
3295 case Ain_DACAS:
3296 /* lock */
3297 *p++ = 0xF0;
3298 /* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new
3299 value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so
3300 aren't encoded in the insn. */
3301 rex = rexAMode_M_enc(1, i->Ain.ACAS.addr );
3302 if (i->Ain.ACAS.sz != 8)
3303 rex = clearWBit(rex);
3304 *p++ = rex;
3305 *p++ = 0x0F;
3306 *p++ = 0xC7;
3307 p = doAMode_M_enc(p, 1, i->Ain.DACAS.addr);
3308 goto done;
3309
3310 case Ain_A87Free:
3311 vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7);
3312 for (j = 0; j < i->Ain.A87Free.nregs; j++) {
3313 p = do_ffree_st(p, 7-j);
3314 }
3315 goto done;
3316
3317 case Ain_A87PushPop:
3318 vassert(i->Ain.A87PushPop.szB == 8 || i->Ain.A87PushPop.szB == 4);
3319 if (i->Ain.A87PushPop.isPush) {
3320 /* Load from memory into %st(0): flds/fldl amode */
3321 *p++ = clearWBit(
3322 rexAMode_M_enc(0, i->Ain.A87PushPop.addr) );
3323 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
3324 p = doAMode_M_enc(p, 0/*subopcode*/, i->Ain.A87PushPop.addr);
3325 } else {
3326 /* Dump %st(0) to memory: fstps/fstpl amode */
3327 *p++ = clearWBit(
3328 rexAMode_M_enc(3, i->Ain.A87PushPop.addr) );
3329 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
3330 p = doAMode_M_enc(p, 3/*subopcode*/, i->Ain.A87PushPop.addr);
3331 goto done;
3332 }
3333 goto done;
3334
3335 case Ain_A87FpOp:
3336 switch (i->Ain.A87FpOp.op) {
3337 case Afp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
3338 case Afp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
3339 case Afp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
3340 case Afp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
3341 case Afp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
3342 case Afp_SCALE: *p++ = 0xD9; *p++ = 0xFD; break;
3343 case Afp_ATAN: *p++ = 0xD9; *p++ = 0xF3; break;
3344 case Afp_YL2X: *p++ = 0xD9; *p++ = 0xF1; break;
3345 case Afp_YL2XP1: *p++ = 0xD9; *p++ = 0xF9; break;
3346 case Afp_PREM: *p++ = 0xD9; *p++ = 0xF8; break;
3347 case Afp_PREM1: *p++ = 0xD9; *p++ = 0xF5; break;
3348 case Afp_TAN:
3349 /* fptan pushes 1.0 on the FP stack, except when the
3350 argument is out of range. Hence we have to do the
3351 instruction, then inspect C2 to see if there is an out
3352 of range condition. If there is, we skip the fincstp
3353 that is used by the in-range case to get rid of this
3354 extra 1.0 value. */
3355 *p++ = 0xD9; *p++ = 0xF2; // fptan
3356 *p++ = 0x50; // pushq %rax
3357 *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax
3358 *p++ = 0x66; *p++ = 0xA9;
3359 *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax
3360 *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp
3361 *p++ = 0xD9; *p++ = 0xF7; // fincstp
3362 *p++ = 0x58; // after_fincstp: popq %rax
3363 break;
3364 default:
3365 goto bad;
3366 }
3367 goto done;
3368
3369 case Ain_A87LdCW:
3370 *p++ = clearWBit(
3371 rexAMode_M_enc(5, i->Ain.A87LdCW.addr) );
3372 *p++ = 0xD9;
3373 p = doAMode_M_enc(p, 5/*subopcode*/, i->Ain.A87LdCW.addr);
3374 goto done;
3375
3376 case Ain_A87StSW:
3377 *p++ = clearWBit(
3378 rexAMode_M_enc(7, i->Ain.A87StSW.addr) );
3379 *p++ = 0xDD;
3380 p = doAMode_M_enc(p, 7/*subopcode*/, i->Ain.A87StSW.addr);
3381 goto done;
3382
3383 case Ain_Store:
3384 if (i->Ain.Store.sz == 2) {
3385 /* This just goes to show the crazyness of the instruction
3386 set encoding. We have to insert two prefix bytes, but be
3387 careful to avoid a conflict in what the size should be, by
3388 ensuring that REX.W = 0. */
3389 *p++ = 0x66; /* override to 16-bits */
3390 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3391 *p++ = 0x89;
3392 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3393 goto done;
3394 }
3395 if (i->Ain.Store.sz == 4) {
3396 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3397 *p++ = 0x89;
3398 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3399 goto done;
3400 }
3401 if (i->Ain.Store.sz == 1) {
3402 /* This is one place where it would be wrong to skip emitting
3403 a rex byte of 0x40, since the mere presence of rex changes
3404 the meaning of the byte register access. Be careful. */
3405 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3406 *p++ = 0x88;
3407 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3408 goto done;
3409 }
3410 break;
3411
3412 case Ain_LdMXCSR:
3413 *p++ = clearWBit(rexAMode_M_enc(0, i->Ain.LdMXCSR.addr));
3414 *p++ = 0x0F;
3415 *p++ = 0xAE;
3416 p = doAMode_M_enc(p, 2/*subopcode*/, i->Ain.LdMXCSR.addr);
3417 goto done;
3418
3419 case Ain_SseUComIS:
3420 /* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */
3421 /* ucomi[sd] %srcL, %srcR */
3422 if (i->Ain.SseUComIS.sz == 8) {
3423 *p++ = 0x66;
3424 } else {
3425 goto bad;
3426 vassert(i->Ain.SseUComIS.sz == 4);
3427 }
3428 *p++ = clearWBit (
3429 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseUComIS.srcL),
3430 vregEnc3210(i->Ain.SseUComIS.srcR) ));
3431 *p++ = 0x0F;
3432 *p++ = 0x2E;
3433 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseUComIS.srcL),
3434 vregEnc3210(i->Ain.SseUComIS.srcR) );
3435 /* pushfq */
3436 *p++ = 0x9C;
3437 /* popq %dst */
3438 *p++ = toUChar(0x40 + (1 & iregEnc3(i->Ain.SseUComIS.dst)));
3439 *p++ = toUChar(0x58 + iregEnc210(i->Ain.SseUComIS.dst));
3440 goto done;
3441
3442 case Ain_SseSI2SF:
3443 /* cvssi2s[sd] %src, %dst */
3444 rex = rexAMode_R_enc_reg( vregEnc3210(i->Ain.SseSI2SF.dst),
3445 i->Ain.SseSI2SF.src );
3446 *p++ = toUChar(i->Ain.SseSI2SF.szD==4 ? 0xF3 : 0xF2);
3447 *p++ = toUChar(i->Ain.SseSI2SF.szS==4 ? clearWBit(rex) : rex);
3448 *p++ = 0x0F;
3449 *p++ = 0x2A;
3450 p = doAMode_R_enc_reg( p, vregEnc3210(i->Ain.SseSI2SF.dst),
3451 i->Ain.SseSI2SF.src );
3452 goto done;
3453
3454 case Ain_SseSF2SI:
3455 /* cvss[sd]2si %src, %dst */
3456 rex = rexAMode_R_reg_enc( i->Ain.SseSF2SI.dst,
3457 vregEnc3210(i->Ain.SseSF2SI.src) );
3458 *p++ = toUChar(i->Ain.SseSF2SI.szS==4 ? 0xF3 : 0xF2);
3459 *p++ = toUChar(i->Ain.SseSF2SI.szD==4 ? clearWBit(rex) : rex);
3460 *p++ = 0x0F;
3461 *p++ = 0x2D;
3462 p = doAMode_R_reg_enc( p, i->Ain.SseSF2SI.dst,
3463 vregEnc3210(i->Ain.SseSF2SI.src) );
3464 goto done;
3465
3466 case Ain_SseSDSS:
3467 /* cvtsd2ss/cvtss2sd %src, %dst */
3468 *p++ = toUChar(i->Ain.SseSDSS.from64 ? 0xF2 : 0xF3);
3469 *p++ = clearWBit(
3470 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseSDSS.dst),
3471 vregEnc3210(i->Ain.SseSDSS.src) ));
3472 *p++ = 0x0F;
3473 *p++ = 0x5A;
3474 p = doAMode_R_enc_enc( p, vregEnc3210(i->Ain.SseSDSS.dst),
3475 vregEnc3210(i->Ain.SseSDSS.src) );
3476 goto done;
3477
3478 case Ain_SseLdSt:
3479 if (i->Ain.SseLdSt.sz == 8) {
3480 *p++ = 0xF2;
3481 } else
3482 if (i->Ain.SseLdSt.sz == 4) {
3483 *p++ = 0xF3;
3484 } else
3485 if (i->Ain.SseLdSt.sz != 16) {
3486 vassert(0);
3487 }
3488 *p++ = clearWBit(
3489 rexAMode_M_enc(vregEnc3210(i->Ain.SseLdSt.reg),
3490 i->Ain.SseLdSt.addr));
3491 *p++ = 0x0F;
3492 *p++ = toUChar(i->Ain.SseLdSt.isLoad ? 0x10 : 0x11);
3493 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseLdSt.reg),
3494 i->Ain.SseLdSt.addr);
3495 goto done;
3496
3497 case Ain_SseCStore: {
3498 vassert(i->Ain.SseCStore.cond != Acc_ALWAYS);
3499
3500 /* Use ptmp for backpatching conditional jumps. */
3501 ptmp = NULL;
3502
3503 /* jmp fwds if !condition */
3504 *p++ = toUChar(0x70 + (0xF & (i->Ain.SseCStore.cond ^ 1)));
3505 ptmp = p; /* fill in this bit later */
3506 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3507
3508 /* Now the store. */
3509 *p++ = clearWBit(
3510 rexAMode_M_enc(vregEnc3210(i->Ain.SseCStore.src),
3511 i->Ain.SseCStore.addr));
3512 *p++ = 0x0F;
3513 *p++ = toUChar(0x11);
3514 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseCStore.src),
3515 i->Ain.SseCStore.addr);
3516
3517 /* Fix up the conditional branch */
3518 Int delta = p - ptmp;
3519 vassert(delta > 0 && delta < 40);
3520 *ptmp = toUChar(delta-1);
3521 goto done;
3522 }
3523
3524 case Ain_SseCLoad: {
3525 vassert(i->Ain.SseCLoad.cond != Acc_ALWAYS);
3526
3527 /* Use ptmp for backpatching conditional jumps. */
3528 ptmp = NULL;
3529
3530 /* jmp fwds if !condition */
3531 *p++ = toUChar(0x70 + (0xF & (i->Ain.SseCLoad.cond ^ 1)));
3532 ptmp = p; /* fill in this bit later */
3533 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3534
3535 /* Now the load. */
3536 *p++ = clearWBit(
3537 rexAMode_M_enc(vregEnc3210(i->Ain.SseCLoad.dst),
3538 i->Ain.SseCLoad.addr));
3539 *p++ = 0x0F;
3540 *p++ = toUChar(0x10);
3541 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseCLoad.dst),
3542 i->Ain.SseCLoad.addr);
3543
3544 /* Fix up the conditional branch */
3545 Int delta = p - ptmp;
3546 vassert(delta > 0 && delta < 40);
3547 *ptmp = toUChar(delta-1);
3548 goto done;
3549 }
3550
3551 case Ain_SseLdzLO:
3552 vassert(i->Ain.SseLdzLO.sz == 4 || i->Ain.SseLdzLO.sz == 8);
3553 /* movs[sd] amode, %xmm-dst */
3554 *p++ = toUChar(i->Ain.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
3555 *p++ = clearWBit(
3556 rexAMode_M_enc(vregEnc3210(i->Ain.SseLdzLO.reg),
3557 i->Ain.SseLdzLO.addr));
3558 *p++ = 0x0F;
3559 *p++ = 0x10;
3560 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseLdzLO.reg),
3561 i->Ain.SseLdzLO.addr);
3562 goto done;
3563
3564 case Ain_Sse32Fx4:
3565 xtra = 0;
3566 *p++ = clearWBit(
3567 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse32Fx4.dst),
3568 vregEnc3210(i->Ain.Sse32Fx4.src) ));
3569 *p++ = 0x0F;
3570 switch (i->Ain.Sse32Fx4.op) {
3571 case Asse_ADDF: *p++ = 0x58; break;
3572 case Asse_DIVF: *p++ = 0x5E; break;
3573 case Asse_MAXF: *p++ = 0x5F; break;
3574 case Asse_MINF: *p++ = 0x5D; break;
3575 case Asse_MULF: *p++ = 0x59; break;
3576 case Asse_RCPF: *p++ = 0x53; break;
3577 case Asse_RSQRTF: *p++ = 0x52; break;
3578 case Asse_SQRTF: *p++ = 0x51; break;
3579 case Asse_SUBF: *p++ = 0x5C; break;
3580 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3581 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3582 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3583 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3584 default: goto bad;
3585 }
3586 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse32Fx4.dst),
3587 vregEnc3210(i->Ain.Sse32Fx4.src) );
3588 if (xtra & 0x100)
3589 *p++ = toUChar(xtra & 0xFF);
3590 goto done;
3591
3592 case Ain_Sse64Fx2:
3593 xtra = 0;
3594 *p++ = 0x66;
3595 *p++ = clearWBit(
3596 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse64Fx2.dst),
3597 vregEnc3210(i->Ain.Sse64Fx2.src) ));
3598 *p++ = 0x0F;
3599 switch (i->Ain.Sse64Fx2.op) {
3600 case Asse_ADDF: *p++ = 0x58; break;
3601 case Asse_DIVF: *p++ = 0x5E; break;
3602 case Asse_MAXF: *p++ = 0x5F; break;
3603 case Asse_MINF: *p++ = 0x5D; break;
3604 case Asse_MULF: *p++ = 0x59; break;
3605 case Asse_SQRTF: *p++ = 0x51; break;
3606 case Asse_SUBF: *p++ = 0x5C; break;
3607 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3608 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3609 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3610 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3611 default: goto bad;
3612 }
3613 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse64Fx2.dst),
3614 vregEnc3210(i->Ain.Sse64Fx2.src) );
3615 if (xtra & 0x100)
3616 *p++ = toUChar(xtra & 0xFF);
3617 goto done;
3618
3619 case Ain_Sse32FLo:
3620 xtra = 0;
3621 *p++ = 0xF3;
3622 *p++ = clearWBit(
3623 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse32FLo.dst),
3624 vregEnc3210(i->Ain.Sse32FLo.src) ));
3625 *p++ = 0x0F;
3626 switch (i->Ain.Sse32FLo.op) {
3627 case Asse_ADDF: *p++ = 0x58; break;
3628 case Asse_DIVF: *p++ = 0x5E; break;
3629 case Asse_MAXF: *p++ = 0x5F; break;
3630 case Asse_MINF: *p++ = 0x5D; break;
3631 case Asse_MULF: *p++ = 0x59; break;
3632 case Asse_RCPF: *p++ = 0x53; break;
3633 case Asse_RSQRTF: *p++ = 0x52; break;
3634 case Asse_SQRTF: *p++ = 0x51; break;
3635 case Asse_SUBF: *p++ = 0x5C; break;
3636 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3637 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3638 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3639 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3640 default: goto bad;
3641 }
3642 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse32FLo.dst),
3643 vregEnc3210(i->Ain.Sse32FLo.src) );
3644 if (xtra & 0x100)
3645 *p++ = toUChar(xtra & 0xFF);
3646 goto done;
3647
3648 case Ain_Sse64FLo:
3649 xtra = 0;
3650 *p++ = 0xF2;
3651 *p++ = clearWBit(
3652 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse64FLo.dst),
3653 vregEnc3210(i->Ain.Sse64FLo.src) ));
3654 *p++ = 0x0F;
3655 switch (i->Ain.Sse64FLo.op) {
3656 case Asse_ADDF: *p++ = 0x58; break;
3657 case Asse_DIVF: *p++ = 0x5E; break;
3658 case Asse_MAXF: *p++ = 0x5F; break;
3659 case Asse_MINF: *p++ = 0x5D; break;
3660 case Asse_MULF: *p++ = 0x59; break;
3661 case Asse_SQRTF: *p++ = 0x51; break;
3662 case Asse_SUBF: *p++ = 0x5C; break;
3663 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3664 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3665 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3666 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3667 default: goto bad;
3668 }
3669 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse64FLo.dst),
3670 vregEnc3210(i->Ain.Sse64FLo.src) );
3671 if (xtra & 0x100)
3672 *p++ = toUChar(xtra & 0xFF);
3673 goto done;
3674
3675 case Ain_SseReRg:
3676 # define XX(_n) *p++ = (_n)
3677
3678 rex = clearWBit(
3679 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseReRg.dst),
3680 vregEnc3210(i->Ain.SseReRg.src) ));
3681
3682 switch (i->Ain.SseReRg.op) {
3683 case Asse_MOV: /*movups*/ XX(rex); XX(0x0F); XX(0x10); break;
3684 case Asse_OR: XX(rex); XX(0x0F); XX(0x56); break;
3685 case Asse_XOR: XX(rex); XX(0x0F); XX(0x57); break;
3686 case Asse_AND: XX(rex); XX(0x0F); XX(0x54); break;
3687 case Asse_ANDN: XX(rex); XX(0x0F); XX(0x55); break;
3688 case Asse_PACKSSD: XX(0x66); XX(rex); XX(0x0F); XX(0x6B); break;
3689 case Asse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63); break;
3690 case Asse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67); break;
3691 case Asse_ADD8: XX(0x66); XX(rex); XX(0x0F); XX(0xFC); break;
3692 case Asse_ADD16: XX(0x66); XX(rex); XX(0x0F); XX(0xFD); break;
3693 case Asse_ADD32: XX(0x66); XX(rex); XX(0x0F); XX(0xFE); break;
3694 case Asse_ADD64: XX(0x66); XX(rex); XX(0x0F); XX(0xD4); break;
3695 case Asse_QADD8S: XX(0x66); XX(rex); XX(0x0F); XX(0xEC); break;
3696 case Asse_QADD16S: XX(0x66); XX(rex); XX(0x0F); XX(0xED); break;
3697 case Asse_QADD8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDC); break;
3698 case Asse_QADD16U: XX(0x66); XX(rex); XX(0x0F); XX(0xDD); break;
3699 case Asse_AVG8U: XX(0x66); XX(rex); XX(0x0F); XX(0xE0); break;
3700 case Asse_AVG16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE3); break;
3701 case Asse_CMPEQ8: XX(0x66); XX(rex); XX(0x0F); XX(0x74); break;
3702 case Asse_CMPEQ16: XX(0x66); XX(rex); XX(0x0F); XX(0x75); break;
3703 case Asse_CMPEQ32: XX(0x66); XX(rex); XX(0x0F); XX(0x76); break;
3704 case Asse_CMPGT8S: XX(0x66); XX(rex); XX(0x0F); XX(0x64); break;
3705 case Asse_CMPGT16S: XX(0x66); XX(rex); XX(0x0F); XX(0x65); break;
3706 case Asse_CMPGT32S: XX(0x66); XX(rex); XX(0x0F); XX(0x66); break;
3707 case Asse_MAX16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEE); break;
3708 case Asse_MAX8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDE); break;
3709 case Asse_MIN16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEA); break;
3710 case Asse_MIN8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDA); break;
3711 case Asse_MULHI16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE4); break;
3712 case Asse_MULHI16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE5); break;
3713 case Asse_MUL16: XX(0x66); XX(rex); XX(0x0F); XX(0xD5); break;
3714 case Asse_SHL16: XX(0x66); XX(rex); XX(0x0F); XX(0xF1); break;
3715 case Asse_SHL32: XX(0x66); XX(rex); XX(0x0F); XX(0xF2); break;
3716 case Asse_SHL64: XX(0x66); XX(rex); XX(0x0F); XX(0xF3); break;
3717 case Asse_SAR16: XX(0x66); XX(rex); XX(0x0F); XX(0xE1); break;
3718 case Asse_SAR32: XX(0x66); XX(rex); XX(0x0F); XX(0xE2); break;
3719 case Asse_SHR16: XX(0x66); XX(rex); XX(0x0F); XX(0xD1); break;
3720 case Asse_SHR32: XX(0x66); XX(rex); XX(0x0F); XX(0xD2); break;
3721 case Asse_SHR64: XX(0x66); XX(rex); XX(0x0F); XX(0xD3); break;
3722 case Asse_SUB8: XX(0x66); XX(rex); XX(0x0F); XX(0xF8); break;
3723 case Asse_SUB16: XX(0x66); XX(rex); XX(0x0F); XX(0xF9); break;
3724 case Asse_SUB32: XX(0x66); XX(rex); XX(0x0F); XX(0xFA); break;
3725 case Asse_SUB64: XX(0x66); XX(rex); XX(0x0F); XX(0xFB); break;
3726 case Asse_QSUB8S: XX(0x66); XX(rex); XX(0x0F); XX(0xE8); break;
3727 case Asse_QSUB16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE9); break;
3728 case Asse_QSUB8U: XX(0x66); XX(rex); XX(0x0F); XX(0xD8); break;
3729 case Asse_QSUB16U: XX(0x66); XX(rex); XX(0x0F); XX(0xD9); break;
3730 case Asse_UNPCKHB: XX(0x66); XX(rex); XX(0x0F); XX(0x68); break;
3731 case Asse_UNPCKHW: XX(0x66); XX(rex); XX(0x0F); XX(0x69); break;
3732 case Asse_UNPCKHD: XX(0x66); XX(rex); XX(0x0F); XX(0x6A); break;
3733 case Asse_UNPCKHQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6D); break;
3734 case Asse_UNPCKLB: XX(0x66); XX(rex); XX(0x0F); XX(0x60); break;
3735 case Asse_UNPCKLW: XX(0x66); XX(rex); XX(0x0F); XX(0x61); break;
3736 case Asse_UNPCKLD: XX(0x66); XX(rex); XX(0x0F); XX(0x62); break;
3737 case Asse_UNPCKLQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6C); break;
3738 default: goto bad;
3739 }
3740 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseReRg.dst),
3741 vregEnc3210(i->Ain.SseReRg.src) );
3742 # undef XX
3743 goto done;
3744
3745 case Ain_SseCMov:
3746 /* jmp fwds if !condition */
3747 *p++ = toUChar(0x70 + (i->Ain.SseCMov.cond ^ 1));
3748 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
3749 ptmp = p;
3750
3751 /* movaps %src, %dst */
3752 *p++ = clearWBit(
3753 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseCMov.dst),
3754 vregEnc3210(i->Ain.SseCMov.src) ));
3755 *p++ = 0x0F;
3756 *p++ = 0x28;
3757 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseCMov.dst),
3758 vregEnc3210(i->Ain.SseCMov.src) );
3759
3760 /* Fill in the jump offset. */
3761 *(ptmp-1) = toUChar(p - ptmp);
3762 goto done;
3763
3764 case Ain_SseShuf:
3765 *p++ = 0x66;
3766 *p++ = clearWBit(
3767 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseShuf.dst),
3768 vregEnc3210(i->Ain.SseShuf.src) ));
3769 *p++ = 0x0F;
3770 *p++ = 0x70;
3771 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseShuf.dst),
3772 vregEnc3210(i->Ain.SseShuf.src) );
3773 *p++ = (UChar)(i->Ain.SseShuf.order);
3774 goto done;
3775
3776 //uu case Ain_AvxLdSt: {
3777 //uu UInt vex = vexAMode_M( dvreg2ireg(i->Ain.AvxLdSt.reg),
3778 //uu i->Ain.AvxLdSt.addr );
3779 //uu p = emitVexPrefix(p, vex);
3780 //uu *p++ = toUChar(i->Ain.AvxLdSt.isLoad ? 0x10 : 0x11);
3781 //uu p = doAMode_M(p, dvreg2ireg(i->Ain.AvxLdSt.reg), i->Ain.AvxLdSt.addr);
3782 //uu goto done;
3783 //uu }
3784
3785 case Ain_EvCheck: {
3786 /* We generate:
3787 (3 bytes) decl 8(%rbp) 8 == offsetof(host_EvC_COUNTER)
3788 (2 bytes) jns nofail expected taken
3789 (3 bytes) jmp* 0(%rbp) 0 == offsetof(host_EvC_FAILADDR)
3790 nofail:
3791 */
3792 /* This is heavily asserted re instruction lengths. It needs to
3793 be. If we get given unexpected forms of .amCounter or
3794 .amFailAddr -- basically, anything that's not of the form
3795 uimm7(%rbp) -- they are likely to fail. */
3796 /* Note also that after the decl we must be very careful not to
3797 read the carry flag, else we get a partial flags stall.
3798 js/jns avoids that, though. */
3799 UChar* p0 = p;
3800 /* --- decl 8(%rbp) --- */
3801 /* Need to compute the REX byte for the decl in order to prove
3802 that we don't need it, since this is a 32-bit inc and all
3803 registers involved in the amode are < r8. "1" because
3804 there's no register in this encoding; instead the register
3805 field is used as a sub opcode. The encoding for "decl r/m32"
3806 is FF /1, hence the "1". */
3807 rex = clearWBit(rexAMode_M_enc(1, i->Ain.EvCheck.amCounter));
3808 if (rex != 0x40) goto bad; /* We don't expect to need the REX byte. */
3809 *p++ = 0xFF;
3810 p = doAMode_M_enc(p, 1, i->Ain.EvCheck.amCounter);
3811 vassert(p - p0 == 3);
3812 /* --- jns nofail --- */
3813 *p++ = 0x79;
3814 *p++ = 0x03; /* need to check this 0x03 after the next insn */
3815 vassert(p - p0 == 5);
3816 /* --- jmp* 0(%rbp) --- */
3817 /* Once again, verify we don't need REX. The encoding is FF /4.
3818 We don't need REX.W since by default FF /4 in 64-bit mode
3819 implies a 64 bit load. */
3820 rex = clearWBit(rexAMode_M_enc(4, i->Ain.EvCheck.amFailAddr));
3821 if (rex != 0x40) goto bad;
3822 *p++ = 0xFF;
3823 p = doAMode_M_enc(p, 4, i->Ain.EvCheck.amFailAddr);
3824 vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
3825 /* And crosscheck .. */
3826 vassert(evCheckSzB_AMD64() == 8);
3827 goto done;
3828 }
3829
3830 case Ain_ProfInc: {
3831 /* We generate movabsq $0, %r11
3832 incq (%r11)
3833 in the expectation that a later call to LibVEX_patchProfCtr
3834 will be used to fill in the immediate field once the right
3835 value is known.
3836 49 BB 00 00 00 00 00 00 00 00
3837 49 FF 03
3838 */
3839 *p++ = 0x49; *p++ = 0xBB;
3840 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3841 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3842 *p++ = 0x49; *p++ = 0xFF; *p++ = 0x03;
3843 /* Tell the caller .. */
3844 vassert(!(*is_profInc));
3845 *is_profInc = True;
3846 goto done;
3847 }
3848
3849 default:
3850 goto bad;
3851 }
3852
3853 bad:
3854 ppAMD64Instr(i, mode64);
3855 vpanic("emit_AMD64Instr");
3856 /*NOTREACHED*/
3857
3858 done:
3859 vassert(p - &buf[0] <= 64);
3860 return p - &buf[0];
3861 }
3862
3863
3864 /* How big is an event check? See case for Ain_EvCheck in
3865 emit_AMD64Instr just above. That crosschecks what this returns, so
3866 we can tell if we're inconsistent. */
evCheckSzB_AMD64(void)3867 Int evCheckSzB_AMD64 (void)
3868 {
3869 return 8;
3870 }
3871
3872
3873 /* NB: what goes on here has to be very closely coordinated with the
3874 emitInstr case for XDirect, above. */
chainXDirect_AMD64(VexEndness endness_host,void * place_to_chain,const void * disp_cp_chain_me_EXPECTED,const void * place_to_jump_to)3875 VexInvalRange chainXDirect_AMD64 ( VexEndness endness_host,
3876 void* place_to_chain,
3877 const void* disp_cp_chain_me_EXPECTED,
3878 const void* place_to_jump_to )
3879 {
3880 vassert(endness_host == VexEndnessLE);
3881
3882 /* What we're expecting to see is:
3883 movabsq $disp_cp_chain_me_EXPECTED, %r11
3884 call *%r11
3885 viz
3886 49 BB <8 bytes value == disp_cp_chain_me_EXPECTED>
3887 41 FF D3
3888 */
3889 UChar* p = (UChar*)place_to_chain;
3890 vassert(p[0] == 0x49);
3891 vassert(p[1] == 0xBB);
3892 vassert(read_misaligned_ULong_LE(&p[2]) == (Addr)disp_cp_chain_me_EXPECTED);
3893 vassert(p[10] == 0x41);
3894 vassert(p[11] == 0xFF);
3895 vassert(p[12] == 0xD3);
3896 /* And what we want to change it to is either:
3897 (general case):
3898 movabsq $place_to_jump_to, %r11
3899 jmpq *%r11
3900 viz
3901 49 BB <8 bytes value == place_to_jump_to>
3902 41 FF E3
3903 So it's the same length (convenient, huh) and we don't
3904 need to change all the bits.
3905 ---OR---
3906 in the case where the displacement falls within 32 bits
3907 jmpq disp32 where disp32 is relative to the next insn
3908 ud2; ud2; ud2; ud2
3909 viz
3910 E9 <4 bytes == disp32>
3911 0F 0B 0F 0B 0F 0B 0F 0B
3912
3913 In both cases the replacement has the same length as the original.
3914 To remain sane & verifiable,
3915 (1) limit the displacement for the short form to
3916 (say) +/- one billion, so as to avoid wraparound
3917 off-by-ones
3918 (2) even if the short form is applicable, once every (say)
3919 1024 times use the long form anyway, so as to maintain
3920 verifiability
3921 */
3922 /* This is the delta we need to put into a JMP d32 insn. It's
3923 relative to the start of the next insn, hence the -5. */
3924 Long delta = (Long)((const UChar *)place_to_jump_to - (const UChar*)p) - 5;
3925 Bool shortOK = delta >= -1000*1000*1000 && delta < 1000*1000*1000;
3926
3927 static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
3928 if (shortOK) {
3929 shortCTR++; // thread safety bleh
3930 if (0 == (shortCTR & 0x3FF)) {
3931 shortOK = False;
3932 if (0)
3933 vex_printf("QQQ chainXDirect_AMD64: shortCTR = %u, "
3934 "using long jmp\n", shortCTR);
3935 }
3936 }
3937
3938 /* And make the modifications. */
3939 if (shortOK) {
3940 p[0] = 0xE9;
3941 write_misaligned_UInt_LE(&p[1], (UInt)(Int)delta);
3942 p[5] = 0x0F; p[6] = 0x0B;
3943 p[7] = 0x0F; p[8] = 0x0B;
3944 p[9] = 0x0F; p[10] = 0x0B;
3945 p[11] = 0x0F; p[12] = 0x0B;
3946 /* sanity check on the delta -- top 32 are all 0 or all 1 */
3947 delta >>= 32;
3948 vassert(delta == 0LL || delta == -1LL);
3949 } else {
3950 /* Minimal modifications from the starting sequence. */
3951 write_misaligned_ULong_LE(&p[2], (ULong)(Addr)place_to_jump_to);
3952 p[12] = 0xE3;
3953 }
3954 VexInvalRange vir = { (HWord)place_to_chain, 13 };
3955 return vir;
3956 }
3957
3958
3959 /* NB: what goes on here has to be very closely coordinated with the
3960 emitInstr case for XDirect, above. */
unchainXDirect_AMD64(VexEndness endness_host,void * place_to_unchain,const void * place_to_jump_to_EXPECTED,const void * disp_cp_chain_me)3961 VexInvalRange unchainXDirect_AMD64 ( VexEndness endness_host,
3962 void* place_to_unchain,
3963 const void* place_to_jump_to_EXPECTED,
3964 const void* disp_cp_chain_me )
3965 {
3966 vassert(endness_host == VexEndnessLE);
3967
3968 /* What we're expecting to see is either:
3969 (general case)
3970 movabsq $place_to_jump_to_EXPECTED, %r11
3971 jmpq *%r11
3972 viz
3973 49 BB <8 bytes value == place_to_jump_to_EXPECTED>
3974 41 FF E3
3975 ---OR---
3976 in the case where the displacement falls within 32 bits
3977 jmpq d32
3978 ud2; ud2; ud2; ud2
3979 viz
3980 E9 <4 bytes == disp32>
3981 0F 0B 0F 0B 0F 0B 0F 0B
3982 */
3983 UChar* p = (UChar*)place_to_unchain;
3984 Bool valid = False;
3985 if (p[0] == 0x49 && p[1] == 0xBB
3986 && read_misaligned_ULong_LE(&p[2])
3987 == (ULong)(Addr)place_to_jump_to_EXPECTED
3988 && p[10] == 0x41 && p[11] == 0xFF && p[12] == 0xE3) {
3989 /* it's the long form */
3990 valid = True;
3991 }
3992 else
3993 if (p[0] == 0xE9
3994 && p[5] == 0x0F && p[6] == 0x0B
3995 && p[7] == 0x0F && p[8] == 0x0B
3996 && p[9] == 0x0F && p[10] == 0x0B
3997 && p[11] == 0x0F && p[12] == 0x0B) {
3998 /* It's the short form. Check the offset is right. */
3999 Int s32 = (Int)read_misaligned_UInt_LE(&p[1]);
4000 Long s64 = (Long)s32;
4001 if ((UChar*)p + 5 + s64 == place_to_jump_to_EXPECTED) {
4002 valid = True;
4003 if (0)
4004 vex_printf("QQQ unchainXDirect_AMD64: found short form\n");
4005 }
4006 }
4007 vassert(valid);
4008 /* And what we want to change it to is:
4009 movabsq $disp_cp_chain_me, %r11
4010 call *%r11
4011 viz
4012 49 BB <8 bytes value == disp_cp_chain_me>
4013 41 FF D3
4014 So it's the same length (convenient, huh).
4015 */
4016 p[0] = 0x49;
4017 p[1] = 0xBB;
4018 write_misaligned_ULong_LE(&p[2], (ULong)(Addr)disp_cp_chain_me);
4019 p[10] = 0x41;
4020 p[11] = 0xFF;
4021 p[12] = 0xD3;
4022 VexInvalRange vir = { (HWord)place_to_unchain, 13 };
4023 return vir;
4024 }
4025
4026
4027 /* Patch the counter address into a profile inc point, as previously
4028 created by the Ain_ProfInc case for emit_AMD64Instr. */
patchProfInc_AMD64(VexEndness endness_host,void * place_to_patch,const ULong * location_of_counter)4029 VexInvalRange patchProfInc_AMD64 ( VexEndness endness_host,
4030 void* place_to_patch,
4031 const ULong* location_of_counter )
4032 {
4033 vassert(endness_host == VexEndnessLE);
4034 vassert(sizeof(ULong*) == 8);
4035 UChar* p = (UChar*)place_to_patch;
4036 vassert(p[0] == 0x49);
4037 vassert(p[1] == 0xBB);
4038 vassert(p[2] == 0x00);
4039 vassert(p[3] == 0x00);
4040 vassert(p[4] == 0x00);
4041 vassert(p[5] == 0x00);
4042 vassert(p[6] == 0x00);
4043 vassert(p[7] == 0x00);
4044 vassert(p[8] == 0x00);
4045 vassert(p[9] == 0x00);
4046 vassert(p[10] == 0x49);
4047 vassert(p[11] == 0xFF);
4048 vassert(p[12] == 0x03);
4049 ULong imm64 = (ULong)(Addr)location_of_counter;
4050 p[2] = imm64 & 0xFF; imm64 >>= 8;
4051 p[3] = imm64 & 0xFF; imm64 >>= 8;
4052 p[4] = imm64 & 0xFF; imm64 >>= 8;
4053 p[5] = imm64 & 0xFF; imm64 >>= 8;
4054 p[6] = imm64 & 0xFF; imm64 >>= 8;
4055 p[7] = imm64 & 0xFF; imm64 >>= 8;
4056 p[8] = imm64 & 0xFF; imm64 >>= 8;
4057 p[9] = imm64 & 0xFF; imm64 >>= 8;
4058 VexInvalRange vir = { (HWord)place_to_patch, 13 };
4059 return vir;
4060 }
4061
4062
4063 /*---------------------------------------------------------------*/
4064 /*--- end host_amd64_defs.c ---*/
4065 /*---------------------------------------------------------------*/
4066