1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm64_defs.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2013-2015 OpenWorks
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 #include "libvex_basictypes.h"
32 #include "libvex.h"
33 #include "libvex_trc_values.h"
34
35 #include "main_util.h"
36 #include "host_generic_regs.h"
37 #include "host_arm64_defs.h"
38
39
40 /* --------- Registers. --------- */
41
42 /* The usual HReg abstraction. We use the following classes only:
43 X regs (64 bit int)
44 D regs (64 bit float, also used for 32 bit float)
45 Q regs (128 bit vector)
46 */
47
getRRegUniverse_ARM64(void)48 const RRegUniverse* getRRegUniverse_ARM64 ( void )
49 {
50 /* The real-register universe is a big constant, so we just want to
51 initialise it once. */
52 static RRegUniverse rRegUniverse_ARM64;
53 static Bool rRegUniverse_ARM64_initted = False;
54
55 /* Handy shorthand, nothing more */
56 RRegUniverse* ru = &rRegUniverse_ARM64;
57
58 /* This isn't thread-safe. Sigh. */
59 if (LIKELY(rRegUniverse_ARM64_initted))
60 return ru;
61
62 RRegUniverse__init(ru);
63
64 /* Add the registers. The initial segment of this array must be
65 those available for allocation by reg-alloc, and those that
66 follow are not available for allocation. */
67
68 ru->regs[ru->size++] = hregARM64_X22();
69 ru->regs[ru->size++] = hregARM64_X23();
70 ru->regs[ru->size++] = hregARM64_X24();
71 ru->regs[ru->size++] = hregARM64_X25();
72 ru->regs[ru->size++] = hregARM64_X26();
73 ru->regs[ru->size++] = hregARM64_X27();
74 ru->regs[ru->size++] = hregARM64_X28();
75
76 ru->regs[ru->size++] = hregARM64_X0();
77 ru->regs[ru->size++] = hregARM64_X1();
78 ru->regs[ru->size++] = hregARM64_X2();
79 ru->regs[ru->size++] = hregARM64_X3();
80 ru->regs[ru->size++] = hregARM64_X4();
81 ru->regs[ru->size++] = hregARM64_X5();
82 ru->regs[ru->size++] = hregARM64_X6();
83 ru->regs[ru->size++] = hregARM64_X7();
84 // X8 is used as a ProfInc temporary, not available to regalloc.
85 // X9 is a chaining/spill temporary, not available to regalloc.
86
87 // Do we really need all these?
88 //ru->regs[ru->size++] = hregARM64_X10();
89 //ru->regs[ru->size++] = hregARM64_X11();
90 //ru->regs[ru->size++] = hregARM64_X12();
91 //ru->regs[ru->size++] = hregARM64_X13();
92 //ru->regs[ru->size++] = hregARM64_X14();
93 //ru->regs[ru->size++] = hregARM64_X15();
94 // X21 is the guest state pointer, not available to regalloc.
95
96 // vector regs. Unfortunately not callee-saved.
97 ru->regs[ru->size++] = hregARM64_Q16();
98 ru->regs[ru->size++] = hregARM64_Q17();
99 ru->regs[ru->size++] = hregARM64_Q18();
100 ru->regs[ru->size++] = hregARM64_Q19();
101 ru->regs[ru->size++] = hregARM64_Q20();
102
103 // F64 regs, all of which are callee-saved
104 ru->regs[ru->size++] = hregARM64_D8();
105 ru->regs[ru->size++] = hregARM64_D9();
106 ru->regs[ru->size++] = hregARM64_D10();
107 ru->regs[ru->size++] = hregARM64_D11();
108 ru->regs[ru->size++] = hregARM64_D12();
109 ru->regs[ru->size++] = hregARM64_D13();
110
111 ru->allocable = ru->size;
112 /* And other regs, not available to the allocator. */
113
114 // unavail: x21 as GSP
115 // x8 is used as a ProfInc temporary
116 // x9 is used as a spill/reload/chaining/call temporary
117 // x30 as LR
118 // x31 because dealing with the SP-vs-ZR overloading is too
119 // confusing, and we don't need to do so, so let's just avoid
120 // the problem
121 //
122 // Currently, we have 15 allocatable integer registers:
123 // 0 1 2 3 4 5 6 7 22 23 24 25 26 27 28
124 //
125 // Hence for the allocatable integer registers we have:
126 //
127 // callee-saved: 22 23 24 25 26 27 28
128 // caller-saved: 0 1 2 3 4 5 6 7
129 //
130 // If the set of available registers changes or if the e/r status
131 // changes, be sure to re-check/sync the definition of
132 // getRegUsage for ARM64Instr_Call too.
133
134 ru->regs[ru->size++] = hregARM64_X8();
135 ru->regs[ru->size++] = hregARM64_X9();
136 ru->regs[ru->size++] = hregARM64_X21();
137
138 rRegUniverse_ARM64_initted = True;
139
140 RRegUniverse__check_is_sane(ru);
141 return ru;
142 }
143
144
ppHRegARM64(HReg reg)145 void ppHRegARM64 ( HReg reg ) {
146 Int r;
147 /* Be generic for all virtual regs. */
148 if (hregIsVirtual(reg)) {
149 ppHReg(reg);
150 return;
151 }
152 /* But specific for real regs. */
153 switch (hregClass(reg)) {
154 case HRcInt64:
155 r = hregEncoding(reg);
156 vassert(r >= 0 && r < 31);
157 vex_printf("x%d", r);
158 return;
159 case HRcFlt64:
160 r = hregEncoding(reg);
161 vassert(r >= 0 && r < 32);
162 vex_printf("d%d", r);
163 return;
164 case HRcVec128:
165 r = hregEncoding(reg);
166 vassert(r >= 0 && r < 32);
167 vex_printf("q%d", r);
168 return;
169 default:
170 vpanic("ppHRegARM64");
171 }
172 }
173
ppHRegARM64asSreg(HReg reg)174 static void ppHRegARM64asSreg ( HReg reg ) {
175 ppHRegARM64(reg);
176 vex_printf("(S-reg)");
177 }
178
ppHRegARM64asHreg(HReg reg)179 static void ppHRegARM64asHreg ( HReg reg ) {
180 ppHRegARM64(reg);
181 vex_printf("(H-reg)");
182 }
183
184
185 /* --------- Condition codes, ARM64 encoding. --------- */
186
showARM64CondCode(ARM64CondCode cond)187 static const HChar* showARM64CondCode ( ARM64CondCode cond ) {
188 switch (cond) {
189 case ARM64cc_EQ: return "eq";
190 case ARM64cc_NE: return "ne";
191 case ARM64cc_CS: return "cs";
192 case ARM64cc_CC: return "cc";
193 case ARM64cc_MI: return "mi";
194 case ARM64cc_PL: return "pl";
195 case ARM64cc_VS: return "vs";
196 case ARM64cc_VC: return "vc";
197 case ARM64cc_HI: return "hi";
198 case ARM64cc_LS: return "ls";
199 case ARM64cc_GE: return "ge";
200 case ARM64cc_LT: return "lt";
201 case ARM64cc_GT: return "gt";
202 case ARM64cc_LE: return "le";
203 case ARM64cc_AL: return "al"; // default
204 case ARM64cc_NV: return "nv";
205 default: vpanic("showARM64CondCode");
206 }
207 }
208
209
210 /* --------- Memory address expressions (amodes). --------- */
211
ARM64AMode_RI9(HReg reg,Int simm9)212 ARM64AMode* ARM64AMode_RI9 ( HReg reg, Int simm9 ) {
213 ARM64AMode* am = LibVEX_Alloc_inline(sizeof(ARM64AMode));
214 am->tag = ARM64am_RI9;
215 am->ARM64am.RI9.reg = reg;
216 am->ARM64am.RI9.simm9 = simm9;
217 vassert(-256 <= simm9 && simm9 <= 255);
218 return am;
219 }
220
ARM64AMode_RI12(HReg reg,Int uimm12,UChar szB)221 ARM64AMode* ARM64AMode_RI12 ( HReg reg, Int uimm12, UChar szB ) {
222 ARM64AMode* am = LibVEX_Alloc_inline(sizeof(ARM64AMode));
223 am->tag = ARM64am_RI12;
224 am->ARM64am.RI12.reg = reg;
225 am->ARM64am.RI12.uimm12 = uimm12;
226 am->ARM64am.RI12.szB = szB;
227 vassert(uimm12 >= 0 && uimm12 <= 4095);
228 switch (szB) {
229 case 1: case 2: case 4: case 8: break;
230 default: vassert(0);
231 }
232 return am;
233 }
234
ARM64AMode_RR(HReg base,HReg index)235 ARM64AMode* ARM64AMode_RR ( HReg base, HReg index ) {
236 ARM64AMode* am = LibVEX_Alloc_inline(sizeof(ARM64AMode));
237 am->tag = ARM64am_RR;
238 am->ARM64am.RR.base = base;
239 am->ARM64am.RR.index = index;
240 return am;
241 }
242
ppARM64AMode(ARM64AMode * am)243 static void ppARM64AMode ( ARM64AMode* am ) {
244 switch (am->tag) {
245 case ARM64am_RI9:
246 vex_printf("%d(", am->ARM64am.RI9.simm9);
247 ppHRegARM64(am->ARM64am.RI9.reg);
248 vex_printf(")");
249 break;
250 case ARM64am_RI12:
251 vex_printf("%u(", (UInt)am->ARM64am.RI12.szB
252 * (UInt)am->ARM64am.RI12.uimm12);
253 ppHRegARM64(am->ARM64am.RI12.reg);
254 vex_printf(")");
255 break;
256 case ARM64am_RR:
257 vex_printf("(");
258 ppHRegARM64(am->ARM64am.RR.base);
259 vex_printf(",");
260 ppHRegARM64(am->ARM64am.RR.index);
261 vex_printf(")");
262 break;
263 default:
264 vassert(0);
265 }
266 }
267
addRegUsage_ARM64AMode(HRegUsage * u,ARM64AMode * am)268 static void addRegUsage_ARM64AMode ( HRegUsage* u, ARM64AMode* am ) {
269 switch (am->tag) {
270 case ARM64am_RI9:
271 addHRegUse(u, HRmRead, am->ARM64am.RI9.reg);
272 return;
273 case ARM64am_RI12:
274 addHRegUse(u, HRmRead, am->ARM64am.RI12.reg);
275 return;
276 case ARM64am_RR:
277 addHRegUse(u, HRmRead, am->ARM64am.RR.base);
278 addHRegUse(u, HRmRead, am->ARM64am.RR.index);
279 return;
280 default:
281 vpanic("addRegUsage_ARM64Amode");
282 }
283 }
284
mapRegs_ARM64AMode(HRegRemap * m,ARM64AMode * am)285 static void mapRegs_ARM64AMode ( HRegRemap* m, ARM64AMode* am ) {
286 switch (am->tag) {
287 case ARM64am_RI9:
288 am->ARM64am.RI9.reg = lookupHRegRemap(m, am->ARM64am.RI9.reg);
289 return;
290 case ARM64am_RI12:
291 am->ARM64am.RI12.reg = lookupHRegRemap(m, am->ARM64am.RI12.reg);
292 return;
293 case ARM64am_RR:
294 am->ARM64am.RR.base = lookupHRegRemap(m, am->ARM64am.RR.base);
295 am->ARM64am.RR.index = lookupHRegRemap(m, am->ARM64am.RR.index);
296 return;
297 default:
298 vpanic("mapRegs_ARM64Amode");
299 }
300 }
301
302
303 /* --------- Reg or uimm12<<{0,12} operands --------- */
304
ARM64RIA_I12(UShort imm12,UChar shift)305 ARM64RIA* ARM64RIA_I12 ( UShort imm12, UChar shift ) {
306 ARM64RIA* riA = LibVEX_Alloc_inline(sizeof(ARM64RIA));
307 riA->tag = ARM64riA_I12;
308 riA->ARM64riA.I12.imm12 = imm12;
309 riA->ARM64riA.I12.shift = shift;
310 vassert(imm12 < 4096);
311 vassert(shift == 0 || shift == 12);
312 return riA;
313 }
ARM64RIA_R(HReg reg)314 ARM64RIA* ARM64RIA_R ( HReg reg ) {
315 ARM64RIA* riA = LibVEX_Alloc_inline(sizeof(ARM64RIA));
316 riA->tag = ARM64riA_R;
317 riA->ARM64riA.R.reg = reg;
318 return riA;
319 }
320
ppARM64RIA(ARM64RIA * riA)321 static void ppARM64RIA ( ARM64RIA* riA ) {
322 switch (riA->tag) {
323 case ARM64riA_I12:
324 vex_printf("#%u",(UInt)(riA->ARM64riA.I12.imm12
325 << riA->ARM64riA.I12.shift));
326 break;
327 case ARM64riA_R:
328 ppHRegARM64(riA->ARM64riA.R.reg);
329 break;
330 default:
331 vassert(0);
332 }
333 }
334
addRegUsage_ARM64RIA(HRegUsage * u,ARM64RIA * riA)335 static void addRegUsage_ARM64RIA ( HRegUsage* u, ARM64RIA* riA ) {
336 switch (riA->tag) {
337 case ARM64riA_I12:
338 return;
339 case ARM64riA_R:
340 addHRegUse(u, HRmRead, riA->ARM64riA.R.reg);
341 return;
342 default:
343 vpanic("addRegUsage_ARM64RIA");
344 }
345 }
346
mapRegs_ARM64RIA(HRegRemap * m,ARM64RIA * riA)347 static void mapRegs_ARM64RIA ( HRegRemap* m, ARM64RIA* riA ) {
348 switch (riA->tag) {
349 case ARM64riA_I12:
350 return;
351 case ARM64riA_R:
352 riA->ARM64riA.R.reg = lookupHRegRemap(m, riA->ARM64riA.R.reg);
353 return;
354 default:
355 vpanic("mapRegs_ARM64RIA");
356 }
357 }
358
359
360 /* --------- Reg or "bitfield" (logic immediate) operands --------- */
361
ARM64RIL_I13(UChar bitN,UChar immR,UChar immS)362 ARM64RIL* ARM64RIL_I13 ( UChar bitN, UChar immR, UChar immS ) {
363 ARM64RIL* riL = LibVEX_Alloc_inline(sizeof(ARM64RIL));
364 riL->tag = ARM64riL_I13;
365 riL->ARM64riL.I13.bitN = bitN;
366 riL->ARM64riL.I13.immR = immR;
367 riL->ARM64riL.I13.immS = immS;
368 vassert(bitN < 2);
369 vassert(immR < 64);
370 vassert(immS < 64);
371 return riL;
372 }
ARM64RIL_R(HReg reg)373 ARM64RIL* ARM64RIL_R ( HReg reg ) {
374 ARM64RIL* riL = LibVEX_Alloc_inline(sizeof(ARM64RIL));
375 riL->tag = ARM64riL_R;
376 riL->ARM64riL.R.reg = reg;
377 return riL;
378 }
379
ppARM64RIL(ARM64RIL * riL)380 static void ppARM64RIL ( ARM64RIL* riL ) {
381 switch (riL->tag) {
382 case ARM64riL_I13:
383 vex_printf("#nrs(%u,%u,%u)",
384 (UInt)riL->ARM64riL.I13.bitN,
385 (UInt)riL->ARM64riL.I13.immR,
386 (UInt)riL->ARM64riL.I13.immS);
387 break;
388 case ARM64riL_R:
389 ppHRegARM64(riL->ARM64riL.R.reg);
390 break;
391 default:
392 vassert(0);
393 }
394 }
395
addRegUsage_ARM64RIL(HRegUsage * u,ARM64RIL * riL)396 static void addRegUsage_ARM64RIL ( HRegUsage* u, ARM64RIL* riL ) {
397 switch (riL->tag) {
398 case ARM64riL_I13:
399 return;
400 case ARM64riL_R:
401 addHRegUse(u, HRmRead, riL->ARM64riL.R.reg);
402 return;
403 default:
404 vpanic("addRegUsage_ARM64RIL");
405 }
406 }
407
mapRegs_ARM64RIL(HRegRemap * m,ARM64RIL * riL)408 static void mapRegs_ARM64RIL ( HRegRemap* m, ARM64RIL* riL ) {
409 switch (riL->tag) {
410 case ARM64riL_I13:
411 return;
412 case ARM64riL_R:
413 riL->ARM64riL.R.reg = lookupHRegRemap(m, riL->ARM64riL.R.reg);
414 return;
415 default:
416 vpanic("mapRegs_ARM64RIL");
417 }
418 }
419
420
421 /* --------------- Reg or uimm6 operands --------------- */
422
ARM64RI6_I6(UInt imm6)423 ARM64RI6* ARM64RI6_I6 ( UInt imm6 ) {
424 ARM64RI6* ri6 = LibVEX_Alloc_inline(sizeof(ARM64RI6));
425 ri6->tag = ARM64ri6_I6;
426 ri6->ARM64ri6.I6.imm6 = imm6;
427 vassert(imm6 > 0 && imm6 < 64);
428 return ri6;
429 }
ARM64RI6_R(HReg reg)430 ARM64RI6* ARM64RI6_R ( HReg reg ) {
431 ARM64RI6* ri6 = LibVEX_Alloc_inline(sizeof(ARM64RI6));
432 ri6->tag = ARM64ri6_R;
433 ri6->ARM64ri6.R.reg = reg;
434 return ri6;
435 }
436
ppARM64RI6(ARM64RI6 * ri6)437 static void ppARM64RI6 ( ARM64RI6* ri6 ) {
438 switch (ri6->tag) {
439 case ARM64ri6_I6:
440 vex_printf("#%u", ri6->ARM64ri6.I6.imm6);
441 break;
442 case ARM64ri6_R:
443 ppHRegARM64(ri6->ARM64ri6.R.reg);
444 break;
445 default:
446 vassert(0);
447 }
448 }
449
addRegUsage_ARM64RI6(HRegUsage * u,ARM64RI6 * ri6)450 static void addRegUsage_ARM64RI6 ( HRegUsage* u, ARM64RI6* ri6 ) {
451 switch (ri6->tag) {
452 case ARM64ri6_I6:
453 return;
454 case ARM64ri6_R:
455 addHRegUse(u, HRmRead, ri6->ARM64ri6.R.reg);
456 return;
457 default:
458 vpanic("addRegUsage_ARM64RI6");
459 }
460 }
461
mapRegs_ARM64RI6(HRegRemap * m,ARM64RI6 * ri6)462 static void mapRegs_ARM64RI6 ( HRegRemap* m, ARM64RI6* ri6 ) {
463 switch (ri6->tag) {
464 case ARM64ri6_I6:
465 return;
466 case ARM64ri6_R:
467 ri6->ARM64ri6.R.reg = lookupHRegRemap(m, ri6->ARM64ri6.R.reg);
468 return;
469 default:
470 vpanic("mapRegs_ARM64RI6");
471 }
472 }
473
474
475 /* --------- Instructions. --------- */
476
showARM64LogicOp(ARM64LogicOp op)477 static const HChar* showARM64LogicOp ( ARM64LogicOp op ) {
478 switch (op) {
479 case ARM64lo_AND: return "and";
480 case ARM64lo_OR: return "orr";
481 case ARM64lo_XOR: return "eor";
482 default: vpanic("showARM64LogicOp");
483 }
484 }
485
showARM64ShiftOp(ARM64ShiftOp op)486 static const HChar* showARM64ShiftOp ( ARM64ShiftOp op ) {
487 switch (op) {
488 case ARM64sh_SHL: return "lsl";
489 case ARM64sh_SHR: return "lsr";
490 case ARM64sh_SAR: return "asr";
491 default: vpanic("showARM64ShiftOp");
492 }
493 }
494
showARM64UnaryOp(ARM64UnaryOp op)495 static const HChar* showARM64UnaryOp ( ARM64UnaryOp op ) {
496 switch (op) {
497 case ARM64un_NEG: return "neg";
498 case ARM64un_NOT: return "not";
499 case ARM64un_CLZ: return "clz";
500 default: vpanic("showARM64UnaryOp");
501 }
502 }
503
showARM64MulOp(ARM64MulOp op)504 static const HChar* showARM64MulOp ( ARM64MulOp op ) {
505 switch (op) {
506 case ARM64mul_PLAIN: return "mul ";
507 case ARM64mul_ZX: return "umulh";
508 case ARM64mul_SX: return "smulh";
509 default: vpanic("showARM64MulOp");
510 }
511 }
512
characteriseARM64CvtOp(HChar * syn,UInt * fszB,UInt * iszB,ARM64CvtOp op)513 static void characteriseARM64CvtOp ( /*OUT*/HChar* syn,
514 /*OUT*/UInt* fszB, /*OUT*/UInt* iszB,
515 ARM64CvtOp op ) {
516 switch (op) {
517 case ARM64cvt_F32_I32S:
518 *syn = 's'; *fszB = 4; *iszB = 4; break;
519 case ARM64cvt_F64_I32S:
520 *syn = 's'; *fszB = 8; *iszB = 4; break;
521 case ARM64cvt_F32_I64S:
522 *syn = 's'; *fszB = 4; *iszB = 8; break;
523 case ARM64cvt_F64_I64S:
524 *syn = 's'; *fszB = 8; *iszB = 8; break;
525 case ARM64cvt_F32_I32U:
526 *syn = 'u'; *fszB = 4; *iszB = 4; break;
527 case ARM64cvt_F64_I32U:
528 *syn = 'u'; *fszB = 8; *iszB = 4; break;
529 case ARM64cvt_F32_I64U:
530 *syn = 'u'; *fszB = 4; *iszB = 8; break;
531 case ARM64cvt_F64_I64U:
532 *syn = 'u'; *fszB = 8; *iszB = 8; break;
533 default:
534 vpanic("characteriseARM64CvtOp");
535 }
536 }
537
showARM64FpBinOp(ARM64FpBinOp op)538 static const HChar* showARM64FpBinOp ( ARM64FpBinOp op ) {
539 switch (op) {
540 case ARM64fpb_ADD: return "add";
541 case ARM64fpb_SUB: return "sub";
542 case ARM64fpb_MUL: return "mul";
543 case ARM64fpb_DIV: return "div";
544 default: vpanic("showARM64FpBinOp");
545 }
546 }
547
showARM64FpUnaryOp(ARM64FpUnaryOp op)548 static const HChar* showARM64FpUnaryOp ( ARM64FpUnaryOp op ) {
549 switch (op) {
550 case ARM64fpu_NEG: return "neg ";
551 case ARM64fpu_ABS: return "abs ";
552 case ARM64fpu_SQRT: return "sqrt ";
553 case ARM64fpu_RINT: return "rinti";
554 case ARM64fpu_RECPX: return "recpx";
555 default: vpanic("showARM64FpUnaryOp");
556 }
557 }
558
showARM64VecBinOp(const HChar ** nm,const HChar ** ar,ARM64VecBinOp op)559 static void showARM64VecBinOp(/*OUT*/const HChar** nm,
560 /*OUT*/const HChar** ar, ARM64VecBinOp op ) {
561 switch (op) {
562 case ARM64vecb_ADD64x2: *nm = "add "; *ar = "2d"; return;
563 case ARM64vecb_ADD32x4: *nm = "add "; *ar = "4s"; return;
564 case ARM64vecb_ADD16x8: *nm = "add "; *ar = "8h"; return;
565 case ARM64vecb_ADD8x16: *nm = "add "; *ar = "16b"; return;
566 case ARM64vecb_SUB64x2: *nm = "sub "; *ar = "2d"; return;
567 case ARM64vecb_SUB32x4: *nm = "sub "; *ar = "4s"; return;
568 case ARM64vecb_SUB16x8: *nm = "sub "; *ar = "8h"; return;
569 case ARM64vecb_SUB8x16: *nm = "sub "; *ar = "16b"; return;
570 case ARM64vecb_MUL32x4: *nm = "mul "; *ar = "4s"; return;
571 case ARM64vecb_MUL16x8: *nm = "mul "; *ar = "8h"; return;
572 case ARM64vecb_MUL8x16: *nm = "mul "; *ar = "16b"; return;
573 case ARM64vecb_FADD64x2: *nm = "fadd "; *ar = "2d"; return;
574 case ARM64vecb_FSUB64x2: *nm = "fsub "; *ar = "2d"; return;
575 case ARM64vecb_FMUL64x2: *nm = "fmul "; *ar = "2d"; return;
576 case ARM64vecb_FDIV64x2: *nm = "fdiv "; *ar = "2d"; return;
577 case ARM64vecb_FADD32x4: *nm = "fadd "; *ar = "4s"; return;
578 case ARM64vecb_FSUB32x4: *nm = "fsub "; *ar = "4s"; return;
579 case ARM64vecb_FMUL32x4: *nm = "fmul "; *ar = "4s"; return;
580 case ARM64vecb_FDIV32x4: *nm = "fdiv "; *ar = "4s"; return;
581 case ARM64vecb_FMAX64x2: *nm = "fmax "; *ar = "2d"; return;
582 case ARM64vecb_FMAX32x4: *nm = "fmax "; *ar = "4s"; return;
583 case ARM64vecb_FMIN64x2: *nm = "fmin "; *ar = "2d"; return;
584 case ARM64vecb_FMIN32x4: *nm = "fmin "; *ar = "4s"; return;
585 case ARM64vecb_UMAX32x4: *nm = "umax "; *ar = "4s"; return;
586 case ARM64vecb_UMAX16x8: *nm = "umax "; *ar = "8h"; return;
587 case ARM64vecb_UMAX8x16: *nm = "umax "; *ar = "16b"; return;
588 case ARM64vecb_UMIN32x4: *nm = "umin "; *ar = "4s"; return;
589 case ARM64vecb_UMIN16x8: *nm = "umin "; *ar = "8h"; return;
590 case ARM64vecb_UMIN8x16: *nm = "umin "; *ar = "16b"; return;
591 case ARM64vecb_SMAX32x4: *nm = "smax "; *ar = "4s"; return;
592 case ARM64vecb_SMAX16x8: *nm = "smax "; *ar = "8h"; return;
593 case ARM64vecb_SMAX8x16: *nm = "smax "; *ar = "16b"; return;
594 case ARM64vecb_SMIN32x4: *nm = "smin "; *ar = "4s"; return;
595 case ARM64vecb_SMIN16x8: *nm = "smin "; *ar = "8h"; return;
596 case ARM64vecb_SMIN8x16: *nm = "smin "; *ar = "16b"; return;
597 case ARM64vecb_AND: *nm = "and "; *ar = "16b"; return;
598 case ARM64vecb_ORR: *nm = "orr "; *ar = "16b"; return;
599 case ARM64vecb_XOR: *nm = "eor "; *ar = "16b"; return;
600 case ARM64vecb_CMEQ64x2: *nm = "cmeq "; *ar = "2d"; return;
601 case ARM64vecb_CMEQ32x4: *nm = "cmeq "; *ar = "4s"; return;
602 case ARM64vecb_CMEQ16x8: *nm = "cmeq "; *ar = "8h"; return;
603 case ARM64vecb_CMEQ8x16: *nm = "cmeq "; *ar = "16b"; return;
604 case ARM64vecb_CMHI64x2: *nm = "cmhi "; *ar = "2d"; return;
605 case ARM64vecb_CMHI32x4: *nm = "cmhi "; *ar = "4s"; return;
606 case ARM64vecb_CMHI16x8: *nm = "cmhi "; *ar = "8h"; return;
607 case ARM64vecb_CMHI8x16: *nm = "cmhi "; *ar = "16b"; return;
608 case ARM64vecb_CMGT64x2: *nm = "cmgt "; *ar = "2d"; return;
609 case ARM64vecb_CMGT32x4: *nm = "cmgt "; *ar = "4s"; return;
610 case ARM64vecb_CMGT16x8: *nm = "cmgt "; *ar = "8h"; return;
611 case ARM64vecb_CMGT8x16: *nm = "cmgt "; *ar = "16b"; return;
612 case ARM64vecb_FCMEQ64x2: *nm = "fcmeq "; *ar = "2d"; return;
613 case ARM64vecb_FCMEQ32x4: *nm = "fcmeq "; *ar = "4s"; return;
614 case ARM64vecb_FCMGE64x2: *nm = "fcmge "; *ar = "2d"; return;
615 case ARM64vecb_FCMGE32x4: *nm = "fcmge "; *ar = "4s"; return;
616 case ARM64vecb_FCMGT64x2: *nm = "fcmgt "; *ar = "2d"; return;
617 case ARM64vecb_FCMGT32x4: *nm = "fcmgt "; *ar = "4s"; return;
618 case ARM64vecb_TBL1: *nm = "tbl "; *ar = "16b"; return;
619 case ARM64vecb_UZP164x2: *nm = "uzp1 "; *ar = "2d"; return;
620 case ARM64vecb_UZP132x4: *nm = "uzp1 "; *ar = "4s"; return;
621 case ARM64vecb_UZP116x8: *nm = "uzp1 "; *ar = "8h"; return;
622 case ARM64vecb_UZP18x16: *nm = "uzp1 "; *ar = "16b"; return;
623 case ARM64vecb_UZP264x2: *nm = "uzp2 "; *ar = "2d"; return;
624 case ARM64vecb_UZP232x4: *nm = "uzp2 "; *ar = "4s"; return;
625 case ARM64vecb_UZP216x8: *nm = "uzp2 "; *ar = "8h"; return;
626 case ARM64vecb_UZP28x16: *nm = "uzp2 "; *ar = "16b"; return;
627 case ARM64vecb_ZIP132x4: *nm = "zip1 "; *ar = "4s"; return;
628 case ARM64vecb_ZIP116x8: *nm = "zip1 "; *ar = "8h"; return;
629 case ARM64vecb_ZIP18x16: *nm = "zip1 "; *ar = "16b"; return;
630 case ARM64vecb_ZIP232x4: *nm = "zip2 "; *ar = "4s"; return;
631 case ARM64vecb_ZIP216x8: *nm = "zip2 "; *ar = "8h"; return;
632 case ARM64vecb_ZIP28x16: *nm = "zip2 "; *ar = "16b"; return;
633 case ARM64vecb_PMUL8x16: *nm = "pmul "; *ar = "16b"; return;
634 case ARM64vecb_PMULL8x8: *nm = "pmull "; *ar = "8hbb"; return;
635 case ARM64vecb_UMULL2DSS: *nm = "umull "; *ar = "2dss"; return;
636 case ARM64vecb_UMULL4SHH: *nm = "umull "; *ar = "4shh"; return;
637 case ARM64vecb_UMULL8HBB: *nm = "umull "; *ar = "8hbb"; return;
638 case ARM64vecb_SMULL2DSS: *nm = "smull "; *ar = "2dss"; return;
639 case ARM64vecb_SMULL4SHH: *nm = "smull "; *ar = "4shh"; return;
640 case ARM64vecb_SMULL8HBB: *nm = "smull "; *ar = "8hbb"; return;
641 case ARM64vecb_SQADD64x2: *nm = "sqadd "; *ar = "2d"; return;
642 case ARM64vecb_SQADD32x4: *nm = "sqadd "; *ar = "4s"; return;
643 case ARM64vecb_SQADD16x8: *nm = "sqadd "; *ar = "8h"; return;
644 case ARM64vecb_SQADD8x16: *nm = "sqadd "; *ar = "16b"; return;
645 case ARM64vecb_UQADD64x2: *nm = "uqadd "; *ar = "2d"; return;
646 case ARM64vecb_UQADD32x4: *nm = "uqadd "; *ar = "4s"; return;
647 case ARM64vecb_UQADD16x8: *nm = "uqadd "; *ar = "8h"; return;
648 case ARM64vecb_UQADD8x16: *nm = "uqadd "; *ar = "16b"; return;
649 case ARM64vecb_SQSUB64x2: *nm = "sqsub "; *ar = "2d"; return;
650 case ARM64vecb_SQSUB32x4: *nm = "sqsub "; *ar = "4s"; return;
651 case ARM64vecb_SQSUB16x8: *nm = "sqsub "; *ar = "8h"; return;
652 case ARM64vecb_SQSUB8x16: *nm = "sqsub "; *ar = "16b"; return;
653 case ARM64vecb_UQSUB64x2: *nm = "uqsub "; *ar = "2d"; return;
654 case ARM64vecb_UQSUB32x4: *nm = "uqsub "; *ar = "4s"; return;
655 case ARM64vecb_UQSUB16x8: *nm = "uqsub "; *ar = "8h"; return;
656 case ARM64vecb_UQSUB8x16: *nm = "uqsub "; *ar = "16b"; return;
657 case ARM64vecb_SQDMULL2DSS: *nm = "sqdmull"; *ar = "2dss"; return;
658 case ARM64vecb_SQDMULL4SHH: *nm = "sqdmull"; *ar = "4shh"; return;
659 case ARM64vecb_SQDMULH32x4: *nm = "sqdmulh"; *ar = "4s"; return;
660 case ARM64vecb_SQDMULH16x8: *nm = "sqdmulh"; *ar = "8h"; return;
661 case ARM64vecb_SQRDMULH32x4: *nm = "sqrdmulh"; *ar = "4s"; return;
662 case ARM64vecb_SQRDMULH16x8: *nm = "sqrdmulh"; *ar = "8h"; return;
663 case ARM64vecb_SQSHL64x2: *nm = "sqshl "; *ar = "2d"; return;
664 case ARM64vecb_SQSHL32x4: *nm = "sqshl "; *ar = "4s"; return;
665 case ARM64vecb_SQSHL16x8: *nm = "sqshl "; *ar = "8h"; return;
666 case ARM64vecb_SQSHL8x16: *nm = "sqshl "; *ar = "16b"; return;
667 case ARM64vecb_UQSHL64x2: *nm = "uqshl "; *ar = "2d"; return;
668 case ARM64vecb_UQSHL32x4: *nm = "uqshl "; *ar = "4s"; return;
669 case ARM64vecb_UQSHL16x8: *nm = "uqshl "; *ar = "8h"; return;
670 case ARM64vecb_UQSHL8x16: *nm = "uqshl "; *ar = "16b"; return;
671 case ARM64vecb_SQRSHL64x2: *nm = "sqrshl"; *ar = "2d"; return;
672 case ARM64vecb_SQRSHL32x4: *nm = "sqrshl"; *ar = "4s"; return;
673 case ARM64vecb_SQRSHL16x8: *nm = "sqrshl"; *ar = "8h"; return;
674 case ARM64vecb_SQRSHL8x16: *nm = "sqrshl"; *ar = "16b"; return;
675 case ARM64vecb_UQRSHL64x2: *nm = "uqrshl"; *ar = "2d"; return;
676 case ARM64vecb_UQRSHL32x4: *nm = "uqrshl"; *ar = "4s"; return;
677 case ARM64vecb_UQRSHL16x8: *nm = "uqrshl"; *ar = "8h"; return;
678 case ARM64vecb_UQRSHL8x16: *nm = "uqrshl"; *ar = "16b"; return;
679 case ARM64vecb_SSHL64x2: *nm = "sshl "; *ar = "2d"; return;
680 case ARM64vecb_SSHL32x4: *nm = "sshl "; *ar = "4s"; return;
681 case ARM64vecb_SSHL16x8: *nm = "sshl "; *ar = "8h"; return;
682 case ARM64vecb_SSHL8x16: *nm = "sshl "; *ar = "16b"; return;
683 case ARM64vecb_USHL64x2: *nm = "ushl "; *ar = "2d"; return;
684 case ARM64vecb_USHL32x4: *nm = "ushl "; *ar = "4s"; return;
685 case ARM64vecb_USHL16x8: *nm = "ushl "; *ar = "8h"; return;
686 case ARM64vecb_USHL8x16: *nm = "ushl "; *ar = "16b"; return;
687 case ARM64vecb_SRSHL64x2: *nm = "srshl "; *ar = "2d"; return;
688 case ARM64vecb_SRSHL32x4: *nm = "srshl "; *ar = "4s"; return;
689 case ARM64vecb_SRSHL16x8: *nm = "srshl "; *ar = "8h"; return;
690 case ARM64vecb_SRSHL8x16: *nm = "srshl "; *ar = "16b"; return;
691 case ARM64vecb_URSHL64x2: *nm = "urshl "; *ar = "2d"; return;
692 case ARM64vecb_URSHL32x4: *nm = "urshl "; *ar = "4s"; return;
693 case ARM64vecb_URSHL16x8: *nm = "urshl "; *ar = "8h"; return;
694 case ARM64vecb_URSHL8x16: *nm = "urshl "; *ar = "16b"; return;
695 case ARM64vecb_FRECPS64x2: *nm = "frecps"; *ar = "2d"; return;
696 case ARM64vecb_FRECPS32x4: *nm = "frecps"; *ar = "4s"; return;
697 case ARM64vecb_FRSQRTS64x2: *nm = "frsqrts"; *ar = "2d"; return;
698 case ARM64vecb_FRSQRTS32x4: *nm = "frsqrts"; *ar = "4s"; return;
699 default: vpanic("showARM64VecBinOp");
700 }
701 }
702
showARM64VecModifyOp(const HChar ** nm,const HChar ** ar,ARM64VecModifyOp op)703 static void showARM64VecModifyOp(/*OUT*/const HChar** nm,
704 /*OUT*/const HChar** ar,
705 ARM64VecModifyOp op ) {
706 switch (op) {
707 case ARM64vecmo_SUQADD64x2: *nm = "suqadd"; *ar = "2d"; return;
708 case ARM64vecmo_SUQADD32x4: *nm = "suqadd"; *ar = "4s"; return;
709 case ARM64vecmo_SUQADD16x8: *nm = "suqadd"; *ar = "8h"; return;
710 case ARM64vecmo_SUQADD8x16: *nm = "suqadd"; *ar = "16b"; return;
711 case ARM64vecmo_USQADD64x2: *nm = "usqadd"; *ar = "2d"; return;
712 case ARM64vecmo_USQADD32x4: *nm = "usqadd"; *ar = "4s"; return;
713 case ARM64vecmo_USQADD16x8: *nm = "usqadd"; *ar = "8h"; return;
714 case ARM64vecmo_USQADD8x16: *nm = "usqadd"; *ar = "16b"; return;
715 default: vpanic("showARM64VecModifyOp");
716 }
717 }
718
showARM64VecUnaryOp(const HChar ** nm,const HChar ** ar,ARM64VecUnaryOp op)719 static void showARM64VecUnaryOp(/*OUT*/const HChar** nm,
720 /*OUT*/const HChar** ar, ARM64VecUnaryOp op )
721 {
722 switch (op) {
723 case ARM64vecu_FNEG64x2: *nm = "fneg "; *ar = "2d"; return;
724 case ARM64vecu_FNEG32x4: *nm = "fneg "; *ar = "4s"; return;
725 case ARM64vecu_FABS64x2: *nm = "fabs "; *ar = "2d"; return;
726 case ARM64vecu_FABS32x4: *nm = "fabs "; *ar = "4s"; return;
727 case ARM64vecu_NOT: *nm = "not "; *ar = "all"; return;
728 case ARM64vecu_ABS64x2: *nm = "abs "; *ar = "2d"; return;
729 case ARM64vecu_ABS32x4: *nm = "abs "; *ar = "4s"; return;
730 case ARM64vecu_ABS16x8: *nm = "abs "; *ar = "8h"; return;
731 case ARM64vecu_ABS8x16: *nm = "abs "; *ar = "16b"; return;
732 case ARM64vecu_CLS32x4: *nm = "cls "; *ar = "4s"; return;
733 case ARM64vecu_CLS16x8: *nm = "cls "; *ar = "8h"; return;
734 case ARM64vecu_CLS8x16: *nm = "cls "; *ar = "16b"; return;
735 case ARM64vecu_CLZ32x4: *nm = "clz "; *ar = "4s"; return;
736 case ARM64vecu_CLZ16x8: *nm = "clz "; *ar = "8h"; return;
737 case ARM64vecu_CLZ8x16: *nm = "clz "; *ar = "16b"; return;
738 case ARM64vecu_CNT8x16: *nm = "cnt "; *ar = "16b"; return;
739 case ARM64vecu_RBIT: *nm = "rbit "; *ar = "16b"; return;
740 case ARM64vecu_REV1616B: *nm = "rev16"; *ar = "16b"; return;
741 case ARM64vecu_REV3216B: *nm = "rev32"; *ar = "16b"; return;
742 case ARM64vecu_REV328H: *nm = "rev32"; *ar = "8h"; return;
743 case ARM64vecu_REV6416B: *nm = "rev64"; *ar = "16b"; return;
744 case ARM64vecu_REV648H: *nm = "rev64"; *ar = "8h"; return;
745 case ARM64vecu_REV644S: *nm = "rev64"; *ar = "4s"; return;
746 case ARM64vecu_URECPE32x4: *nm = "urecpe"; *ar = "4s"; return;
747 case ARM64vecu_URSQRTE32x4: *nm = "ursqrte"; *ar = "4s"; return;
748 case ARM64vecu_FRECPE64x2: *nm = "frecpe"; *ar = "2d"; return;
749 case ARM64vecu_FRECPE32x4: *nm = "frecpe"; *ar = "4s"; return;
750 case ARM64vecu_FRSQRTE64x2: *nm = "frsqrte"; *ar = "2d"; return;
751 case ARM64vecu_FRSQRTE32x4: *nm = "frsqrte"; *ar = "4s"; return;
752 case ARM64vecu_FSQRT64x2: *nm = "fsqrt"; *ar = "2d"; return;
753 case ARM64vecu_FSQRT32x4: *nm = "fsqrt"; *ar = "4s"; return;
754 default: vpanic("showARM64VecUnaryOp");
755 }
756 }
757
showARM64VecShiftImmOp(const HChar ** nm,const HChar ** ar,ARM64VecShiftImmOp op)758 static void showARM64VecShiftImmOp(/*OUT*/const HChar** nm,
759 /*OUT*/const HChar** ar,
760 ARM64VecShiftImmOp op )
761 {
762 switch (op) {
763 case ARM64vecshi_USHR64x2: *nm = "ushr "; *ar = "2d"; return;
764 case ARM64vecshi_USHR32x4: *nm = "ushr "; *ar = "4s"; return;
765 case ARM64vecshi_USHR16x8: *nm = "ushr "; *ar = "8h"; return;
766 case ARM64vecshi_USHR8x16: *nm = "ushr "; *ar = "16b"; return;
767 case ARM64vecshi_SSHR64x2: *nm = "sshr "; *ar = "2d"; return;
768 case ARM64vecshi_SSHR32x4: *nm = "sshr "; *ar = "4s"; return;
769 case ARM64vecshi_SSHR16x8: *nm = "sshr "; *ar = "8h"; return;
770 case ARM64vecshi_SSHR8x16: *nm = "sshr "; *ar = "16b"; return;
771 case ARM64vecshi_SHL64x2: *nm = "shl "; *ar = "2d"; return;
772 case ARM64vecshi_SHL32x4: *nm = "shl "; *ar = "4s"; return;
773 case ARM64vecshi_SHL16x8: *nm = "shl "; *ar = "8h"; return;
774 case ARM64vecshi_SHL8x16: *nm = "shl "; *ar = "16b"; return;
775 case ARM64vecshi_SQSHRN2SD: *nm = "sqshrn"; *ar = "2sd"; return;
776 case ARM64vecshi_SQSHRN4HS: *nm = "sqshrn"; *ar = "4hs"; return;
777 case ARM64vecshi_SQSHRN8BH: *nm = "sqshrn"; *ar = "8bh"; return;
778 case ARM64vecshi_UQSHRN2SD: *nm = "uqshrn"; *ar = "2sd"; return;
779 case ARM64vecshi_UQSHRN4HS: *nm = "uqshrn"; *ar = "4hs"; return;
780 case ARM64vecshi_UQSHRN8BH: *nm = "uqshrn"; *ar = "8bh"; return;
781 case ARM64vecshi_SQSHRUN2SD: *nm = "sqshrun"; *ar = "2sd"; return;
782 case ARM64vecshi_SQSHRUN4HS: *nm = "sqshrun"; *ar = "4hs"; return;
783 case ARM64vecshi_SQSHRUN8BH: *nm = "sqshrun"; *ar = "8bh"; return;
784 case ARM64vecshi_SQRSHRN2SD: *nm = "sqrshrn"; *ar = "2sd"; return;
785 case ARM64vecshi_SQRSHRN4HS: *nm = "sqrshrn"; *ar = "4hs"; return;
786 case ARM64vecshi_SQRSHRN8BH: *nm = "sqrshrn"; *ar = "8bh"; return;
787 case ARM64vecshi_UQRSHRN2SD: *nm = "uqrshrn"; *ar = "2sd"; return;
788 case ARM64vecshi_UQRSHRN4HS: *nm = "uqrshrn"; *ar = "4hs"; return;
789 case ARM64vecshi_UQRSHRN8BH: *nm = "uqrshrn"; *ar = "8bh"; return;
790 case ARM64vecshi_SQRSHRUN2SD: *nm = "sqrshrun"; *ar = "2sd"; return;
791 case ARM64vecshi_SQRSHRUN4HS: *nm = "sqrshrun"; *ar = "4hs"; return;
792 case ARM64vecshi_SQRSHRUN8BH: *nm = "sqrshrun"; *ar = "8bh"; return;
793 case ARM64vecshi_UQSHL64x2: *nm = "uqshl "; *ar = "2d"; return;
794 case ARM64vecshi_UQSHL32x4: *nm = "uqshl "; *ar = "4s"; return;
795 case ARM64vecshi_UQSHL16x8: *nm = "uqshl "; *ar = "8h"; return;
796 case ARM64vecshi_UQSHL8x16: *nm = "uqshl "; *ar = "16b"; return;
797 case ARM64vecshi_SQSHL64x2: *nm = "sqshl "; *ar = "2d"; return;
798 case ARM64vecshi_SQSHL32x4: *nm = "sqshl "; *ar = "4s"; return;
799 case ARM64vecshi_SQSHL16x8: *nm = "sqshl "; *ar = "8h"; return;
800 case ARM64vecshi_SQSHL8x16: *nm = "sqshl "; *ar = "16b"; return;
801 case ARM64vecshi_SQSHLU64x2: *nm = "sqshlu"; *ar = "2d"; return;
802 case ARM64vecshi_SQSHLU32x4: *nm = "sqshlu"; *ar = "4s"; return;
803 case ARM64vecshi_SQSHLU16x8: *nm = "sqshlu"; *ar = "8h"; return;
804 case ARM64vecshi_SQSHLU8x16: *nm = "sqshlu"; *ar = "16b"; return;
805 default: vpanic("showARM64VecShiftImmOp");
806 }
807 }
808
showARM64VecNarrowOp(ARM64VecNarrowOp op)809 static const HChar* showARM64VecNarrowOp(ARM64VecNarrowOp op) {
810 switch (op) {
811 case ARM64vecna_XTN: return "xtn ";
812 case ARM64vecna_SQXTN: return "sqxtn ";
813 case ARM64vecna_UQXTN: return "uqxtn ";
814 case ARM64vecna_SQXTUN: return "sqxtun";
815 default: vpanic("showARM64VecNarrowOp");
816 }
817 }
818
ARM64Instr_Arith(HReg dst,HReg argL,ARM64RIA * argR,Bool isAdd)819 ARM64Instr* ARM64Instr_Arith ( HReg dst,
820 HReg argL, ARM64RIA* argR, Bool isAdd ) {
821 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
822 i->tag = ARM64in_Arith;
823 i->ARM64in.Arith.dst = dst;
824 i->ARM64in.Arith.argL = argL;
825 i->ARM64in.Arith.argR = argR;
826 i->ARM64in.Arith.isAdd = isAdd;
827 return i;
828 }
ARM64Instr_Cmp(HReg argL,ARM64RIA * argR,Bool is64)829 ARM64Instr* ARM64Instr_Cmp ( HReg argL, ARM64RIA* argR, Bool is64 ) {
830 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
831 i->tag = ARM64in_Cmp;
832 i->ARM64in.Cmp.argL = argL;
833 i->ARM64in.Cmp.argR = argR;
834 i->ARM64in.Cmp.is64 = is64;
835 return i;
836 }
ARM64Instr_Logic(HReg dst,HReg argL,ARM64RIL * argR,ARM64LogicOp op)837 ARM64Instr* ARM64Instr_Logic ( HReg dst,
838 HReg argL, ARM64RIL* argR, ARM64LogicOp op ) {
839 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
840 i->tag = ARM64in_Logic;
841 i->ARM64in.Logic.dst = dst;
842 i->ARM64in.Logic.argL = argL;
843 i->ARM64in.Logic.argR = argR;
844 i->ARM64in.Logic.op = op;
845 return i;
846 }
ARM64Instr_Test(HReg argL,ARM64RIL * argR)847 ARM64Instr* ARM64Instr_Test ( HReg argL, ARM64RIL* argR ) {
848 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
849 i->tag = ARM64in_Test;
850 i->ARM64in.Test.argL = argL;
851 i->ARM64in.Test.argR = argR;
852 return i;
853 }
ARM64Instr_Shift(HReg dst,HReg argL,ARM64RI6 * argR,ARM64ShiftOp op)854 ARM64Instr* ARM64Instr_Shift ( HReg dst,
855 HReg argL, ARM64RI6* argR, ARM64ShiftOp op ) {
856 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
857 i->tag = ARM64in_Shift;
858 i->ARM64in.Shift.dst = dst;
859 i->ARM64in.Shift.argL = argL;
860 i->ARM64in.Shift.argR = argR;
861 i->ARM64in.Shift.op = op;
862 return i;
863 }
ARM64Instr_Unary(HReg dst,HReg src,ARM64UnaryOp op)864 ARM64Instr* ARM64Instr_Unary ( HReg dst, HReg src, ARM64UnaryOp op ) {
865 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
866 i->tag = ARM64in_Unary;
867 i->ARM64in.Unary.dst = dst;
868 i->ARM64in.Unary.src = src;
869 i->ARM64in.Unary.op = op;
870 return i;
871 }
ARM64Instr_MovI(HReg dst,HReg src)872 ARM64Instr* ARM64Instr_MovI ( HReg dst, HReg src ) {
873 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
874 i->tag = ARM64in_MovI;
875 i->ARM64in.MovI.dst = dst;
876 i->ARM64in.MovI.src = src;
877 vassert(hregClass(src) == HRcInt64);
878 vassert(hregClass(dst) == HRcInt64);
879 return i;
880 }
ARM64Instr_Imm64(HReg dst,ULong imm64)881 ARM64Instr* ARM64Instr_Imm64 ( HReg dst, ULong imm64 ) {
882 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
883 i->tag = ARM64in_Imm64;
884 i->ARM64in.Imm64.dst = dst;
885 i->ARM64in.Imm64.imm64 = imm64;
886 return i;
887 }
ARM64Instr_LdSt64(Bool isLoad,HReg rD,ARM64AMode * amode)888 ARM64Instr* ARM64Instr_LdSt64 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
889 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
890 i->tag = ARM64in_LdSt64;
891 i->ARM64in.LdSt64.isLoad = isLoad;
892 i->ARM64in.LdSt64.rD = rD;
893 i->ARM64in.LdSt64.amode = amode;
894 return i;
895 }
ARM64Instr_LdSt32(Bool isLoad,HReg rD,ARM64AMode * amode)896 ARM64Instr* ARM64Instr_LdSt32 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
897 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
898 i->tag = ARM64in_LdSt32;
899 i->ARM64in.LdSt32.isLoad = isLoad;
900 i->ARM64in.LdSt32.rD = rD;
901 i->ARM64in.LdSt32.amode = amode;
902 return i;
903 }
ARM64Instr_LdSt16(Bool isLoad,HReg rD,ARM64AMode * amode)904 ARM64Instr* ARM64Instr_LdSt16 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
905 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
906 i->tag = ARM64in_LdSt16;
907 i->ARM64in.LdSt16.isLoad = isLoad;
908 i->ARM64in.LdSt16.rD = rD;
909 i->ARM64in.LdSt16.amode = amode;
910 return i;
911 }
ARM64Instr_LdSt8(Bool isLoad,HReg rD,ARM64AMode * amode)912 ARM64Instr* ARM64Instr_LdSt8 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
913 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
914 i->tag = ARM64in_LdSt8;
915 i->ARM64in.LdSt8.isLoad = isLoad;
916 i->ARM64in.LdSt8.rD = rD;
917 i->ARM64in.LdSt8.amode = amode;
918 return i;
919 }
ARM64Instr_XDirect(Addr64 dstGA,ARM64AMode * amPC,ARM64CondCode cond,Bool toFastEP)920 ARM64Instr* ARM64Instr_XDirect ( Addr64 dstGA, ARM64AMode* amPC,
921 ARM64CondCode cond, Bool toFastEP ) {
922 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
923 i->tag = ARM64in_XDirect;
924 i->ARM64in.XDirect.dstGA = dstGA;
925 i->ARM64in.XDirect.amPC = amPC;
926 i->ARM64in.XDirect.cond = cond;
927 i->ARM64in.XDirect.toFastEP = toFastEP;
928 return i;
929 }
ARM64Instr_XIndir(HReg dstGA,ARM64AMode * amPC,ARM64CondCode cond)930 ARM64Instr* ARM64Instr_XIndir ( HReg dstGA, ARM64AMode* amPC,
931 ARM64CondCode cond ) {
932 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
933 i->tag = ARM64in_XIndir;
934 i->ARM64in.XIndir.dstGA = dstGA;
935 i->ARM64in.XIndir.amPC = amPC;
936 i->ARM64in.XIndir.cond = cond;
937 return i;
938 }
ARM64Instr_XAssisted(HReg dstGA,ARM64AMode * amPC,ARM64CondCode cond,IRJumpKind jk)939 ARM64Instr* ARM64Instr_XAssisted ( HReg dstGA, ARM64AMode* amPC,
940 ARM64CondCode cond, IRJumpKind jk ) {
941 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
942 i->tag = ARM64in_XAssisted;
943 i->ARM64in.XAssisted.dstGA = dstGA;
944 i->ARM64in.XAssisted.amPC = amPC;
945 i->ARM64in.XAssisted.cond = cond;
946 i->ARM64in.XAssisted.jk = jk;
947 return i;
948 }
ARM64Instr_CSel(HReg dst,HReg argL,HReg argR,ARM64CondCode cond)949 ARM64Instr* ARM64Instr_CSel ( HReg dst, HReg argL, HReg argR,
950 ARM64CondCode cond ) {
951 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
952 i->tag = ARM64in_CSel;
953 i->ARM64in.CSel.dst = dst;
954 i->ARM64in.CSel.argL = argL;
955 i->ARM64in.CSel.argR = argR;
956 i->ARM64in.CSel.cond = cond;
957 return i;
958 }
ARM64Instr_Call(ARM64CondCode cond,Addr64 target,Int nArgRegs,RetLoc rloc)959 ARM64Instr* ARM64Instr_Call ( ARM64CondCode cond, Addr64 target, Int nArgRegs,
960 RetLoc rloc ) {
961 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
962 i->tag = ARM64in_Call;
963 i->ARM64in.Call.cond = cond;
964 i->ARM64in.Call.target = target;
965 i->ARM64in.Call.nArgRegs = nArgRegs;
966 i->ARM64in.Call.rloc = rloc;
967 vassert(is_sane_RetLoc(rloc));
968 return i;
969 }
ARM64Instr_AddToSP(Int simm)970 extern ARM64Instr* ARM64Instr_AddToSP ( Int simm ) {
971 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
972 i->tag = ARM64in_AddToSP;
973 i->ARM64in.AddToSP.simm = simm;
974 vassert(-4096 < simm && simm < 4096);
975 vassert(0 == (simm & 0xF));
976 return i;
977 }
ARM64Instr_FromSP(HReg dst)978 extern ARM64Instr* ARM64Instr_FromSP ( HReg dst ) {
979 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
980 i->tag = ARM64in_FromSP;
981 i->ARM64in.FromSP.dst = dst;
982 return i;
983 }
ARM64Instr_Mul(HReg dst,HReg argL,HReg argR,ARM64MulOp op)984 ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR,
985 ARM64MulOp op ) {
986 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
987 i->tag = ARM64in_Mul;
988 i->ARM64in.Mul.dst = dst;
989 i->ARM64in.Mul.argL = argL;
990 i->ARM64in.Mul.argR = argR;
991 i->ARM64in.Mul.op = op;
992 return i;
993 }
ARM64Instr_LdrEX(Int szB)994 ARM64Instr* ARM64Instr_LdrEX ( Int szB ) {
995 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
996 i->tag = ARM64in_LdrEX;
997 i->ARM64in.LdrEX.szB = szB;
998 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
999 return i;
1000 }
ARM64Instr_StrEX(Int szB)1001 ARM64Instr* ARM64Instr_StrEX ( Int szB ) {
1002 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1003 i->tag = ARM64in_StrEX;
1004 i->ARM64in.StrEX.szB = szB;
1005 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1006 return i;
1007 }
ARM64Instr_MFence(void)1008 ARM64Instr* ARM64Instr_MFence ( void ) {
1009 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1010 i->tag = ARM64in_MFence;
1011 return i;
1012 }
ARM64Instr_VLdStH(Bool isLoad,HReg sD,HReg rN,UInt uimm12)1013 ARM64Instr* ARM64Instr_VLdStH ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
1014 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1015 i->tag = ARM64in_VLdStH;
1016 i->ARM64in.VLdStH.isLoad = isLoad;
1017 i->ARM64in.VLdStH.hD = sD;
1018 i->ARM64in.VLdStH.rN = rN;
1019 i->ARM64in.VLdStH.uimm12 = uimm12;
1020 vassert(uimm12 < 8192 && 0 == (uimm12 & 1));
1021 return i;
1022 }
ARM64Instr_VLdStS(Bool isLoad,HReg sD,HReg rN,UInt uimm12)1023 ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
1024 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1025 i->tag = ARM64in_VLdStS;
1026 i->ARM64in.VLdStS.isLoad = isLoad;
1027 i->ARM64in.VLdStS.sD = sD;
1028 i->ARM64in.VLdStS.rN = rN;
1029 i->ARM64in.VLdStS.uimm12 = uimm12;
1030 vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
1031 return i;
1032 }
ARM64Instr_VLdStD(Bool isLoad,HReg dD,HReg rN,UInt uimm12)1033 ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN, UInt uimm12 ) {
1034 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1035 i->tag = ARM64in_VLdStD;
1036 i->ARM64in.VLdStD.isLoad = isLoad;
1037 i->ARM64in.VLdStD.dD = dD;
1038 i->ARM64in.VLdStD.rN = rN;
1039 i->ARM64in.VLdStD.uimm12 = uimm12;
1040 vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
1041 return i;
1042 }
ARM64Instr_VLdStQ(Bool isLoad,HReg rQ,HReg rN)1043 ARM64Instr* ARM64Instr_VLdStQ ( Bool isLoad, HReg rQ, HReg rN ) {
1044 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1045 i->tag = ARM64in_VLdStQ;
1046 i->ARM64in.VLdStQ.isLoad = isLoad;
1047 i->ARM64in.VLdStQ.rQ = rQ;
1048 i->ARM64in.VLdStQ.rN = rN;
1049 return i;
1050 }
ARM64Instr_VCvtI2F(ARM64CvtOp how,HReg rD,HReg rS)1051 ARM64Instr* ARM64Instr_VCvtI2F ( ARM64CvtOp how, HReg rD, HReg rS ) {
1052 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1053 i->tag = ARM64in_VCvtI2F;
1054 i->ARM64in.VCvtI2F.how = how;
1055 i->ARM64in.VCvtI2F.rD = rD;
1056 i->ARM64in.VCvtI2F.rS = rS;
1057 return i;
1058 }
ARM64Instr_VCvtF2I(ARM64CvtOp how,HReg rD,HReg rS,UChar armRM)1059 ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS,
1060 UChar armRM ) {
1061 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1062 i->tag = ARM64in_VCvtF2I;
1063 i->ARM64in.VCvtF2I.how = how;
1064 i->ARM64in.VCvtF2I.rD = rD;
1065 i->ARM64in.VCvtF2I.rS = rS;
1066 i->ARM64in.VCvtF2I.armRM = armRM;
1067 vassert(armRM <= 3);
1068 return i;
1069 }
ARM64Instr_VCvtSD(Bool sToD,HReg dst,HReg src)1070 ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
1071 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1072 i->tag = ARM64in_VCvtSD;
1073 i->ARM64in.VCvtSD.sToD = sToD;
1074 i->ARM64in.VCvtSD.dst = dst;
1075 i->ARM64in.VCvtSD.src = src;
1076 return i;
1077 }
ARM64Instr_VCvtHS(Bool hToS,HReg dst,HReg src)1078 ARM64Instr* ARM64Instr_VCvtHS ( Bool hToS, HReg dst, HReg src ) {
1079 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1080 i->tag = ARM64in_VCvtHS;
1081 i->ARM64in.VCvtHS.hToS = hToS;
1082 i->ARM64in.VCvtHS.dst = dst;
1083 i->ARM64in.VCvtHS.src = src;
1084 return i;
1085 }
ARM64Instr_VCvtHD(Bool hToD,HReg dst,HReg src)1086 ARM64Instr* ARM64Instr_VCvtHD ( Bool hToD, HReg dst, HReg src ) {
1087 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1088 i->tag = ARM64in_VCvtHD;
1089 i->ARM64in.VCvtHD.hToD = hToD;
1090 i->ARM64in.VCvtHD.dst = dst;
1091 i->ARM64in.VCvtHD.src = src;
1092 return i;
1093 }
ARM64Instr_VUnaryD(ARM64FpUnaryOp op,HReg dst,HReg src)1094 ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
1095 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1096 i->tag = ARM64in_VUnaryD;
1097 i->ARM64in.VUnaryD.op = op;
1098 i->ARM64in.VUnaryD.dst = dst;
1099 i->ARM64in.VUnaryD.src = src;
1100 return i;
1101 }
ARM64Instr_VUnaryS(ARM64FpUnaryOp op,HReg dst,HReg src)1102 ARM64Instr* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
1103 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1104 i->tag = ARM64in_VUnaryS;
1105 i->ARM64in.VUnaryS.op = op;
1106 i->ARM64in.VUnaryS.dst = dst;
1107 i->ARM64in.VUnaryS.src = src;
1108 return i;
1109 }
ARM64Instr_VBinD(ARM64FpBinOp op,HReg dst,HReg argL,HReg argR)1110 ARM64Instr* ARM64Instr_VBinD ( ARM64FpBinOp op,
1111 HReg dst, HReg argL, HReg argR ) {
1112 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1113 i->tag = ARM64in_VBinD;
1114 i->ARM64in.VBinD.op = op;
1115 i->ARM64in.VBinD.dst = dst;
1116 i->ARM64in.VBinD.argL = argL;
1117 i->ARM64in.VBinD.argR = argR;
1118 return i;
1119 }
ARM64Instr_VBinS(ARM64FpBinOp op,HReg dst,HReg argL,HReg argR)1120 ARM64Instr* ARM64Instr_VBinS ( ARM64FpBinOp op,
1121 HReg dst, HReg argL, HReg argR ) {
1122 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1123 i->tag = ARM64in_VBinS;
1124 i->ARM64in.VBinS.op = op;
1125 i->ARM64in.VBinS.dst = dst;
1126 i->ARM64in.VBinS.argL = argL;
1127 i->ARM64in.VBinS.argR = argR;
1128 return i;
1129 }
ARM64Instr_VCmpD(HReg argL,HReg argR)1130 ARM64Instr* ARM64Instr_VCmpD ( HReg argL, HReg argR ) {
1131 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1132 i->tag = ARM64in_VCmpD;
1133 i->ARM64in.VCmpD.argL = argL;
1134 i->ARM64in.VCmpD.argR = argR;
1135 return i;
1136 }
ARM64Instr_VCmpS(HReg argL,HReg argR)1137 ARM64Instr* ARM64Instr_VCmpS ( HReg argL, HReg argR ) {
1138 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1139 i->tag = ARM64in_VCmpS;
1140 i->ARM64in.VCmpS.argL = argL;
1141 i->ARM64in.VCmpS.argR = argR;
1142 return i;
1143 }
ARM64Instr_VFCSel(HReg dst,HReg argL,HReg argR,ARM64CondCode cond,Bool isD)1144 ARM64Instr* ARM64Instr_VFCSel ( HReg dst, HReg argL, HReg argR,
1145 ARM64CondCode cond, Bool isD ) {
1146 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1147 i->tag = ARM64in_VFCSel;
1148 i->ARM64in.VFCSel.dst = dst;
1149 i->ARM64in.VFCSel.argL = argL;
1150 i->ARM64in.VFCSel.argR = argR;
1151 i->ARM64in.VFCSel.cond = cond;
1152 i->ARM64in.VFCSel.isD = isD;
1153 return i;
1154 }
ARM64Instr_FPCR(Bool toFPCR,HReg iReg)1155 ARM64Instr* ARM64Instr_FPCR ( Bool toFPCR, HReg iReg ) {
1156 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1157 i->tag = ARM64in_FPCR;
1158 i->ARM64in.FPCR.toFPCR = toFPCR;
1159 i->ARM64in.FPCR.iReg = iReg;
1160 return i;
1161 }
ARM64Instr_FPSR(Bool toFPSR,HReg iReg)1162 ARM64Instr* ARM64Instr_FPSR ( Bool toFPSR, HReg iReg ) {
1163 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1164 i->tag = ARM64in_FPSR;
1165 i->ARM64in.FPSR.toFPSR = toFPSR;
1166 i->ARM64in.FPSR.iReg = iReg;
1167 return i;
1168 }
ARM64Instr_VBinV(ARM64VecBinOp op,HReg dst,HReg argL,HReg argR)1169 ARM64Instr* ARM64Instr_VBinV ( ARM64VecBinOp op,
1170 HReg dst, HReg argL, HReg argR ) {
1171 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1172 i->tag = ARM64in_VBinV;
1173 i->ARM64in.VBinV.op = op;
1174 i->ARM64in.VBinV.dst = dst;
1175 i->ARM64in.VBinV.argL = argL;
1176 i->ARM64in.VBinV.argR = argR;
1177 return i;
1178 }
ARM64Instr_VModifyV(ARM64VecModifyOp op,HReg mod,HReg arg)1179 ARM64Instr* ARM64Instr_VModifyV ( ARM64VecModifyOp op, HReg mod, HReg arg ) {
1180 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1181 i->tag = ARM64in_VModifyV;
1182 i->ARM64in.VModifyV.op = op;
1183 i->ARM64in.VModifyV.mod = mod;
1184 i->ARM64in.VModifyV.arg = arg;
1185 return i;
1186 }
ARM64Instr_VUnaryV(ARM64VecUnaryOp op,HReg dst,HReg arg)1187 ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg dst, HReg arg ) {
1188 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1189 i->tag = ARM64in_VUnaryV;
1190 i->ARM64in.VUnaryV.op = op;
1191 i->ARM64in.VUnaryV.dst = dst;
1192 i->ARM64in.VUnaryV.arg = arg;
1193 return i;
1194 }
ARM64Instr_VNarrowV(ARM64VecNarrowOp op,UInt dszBlg2,HReg dst,HReg src)1195 ARM64Instr* ARM64Instr_VNarrowV ( ARM64VecNarrowOp op,
1196 UInt dszBlg2, HReg dst, HReg src ) {
1197 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1198 i->tag = ARM64in_VNarrowV;
1199 i->ARM64in.VNarrowV.op = op;
1200 i->ARM64in.VNarrowV.dszBlg2 = dszBlg2;
1201 i->ARM64in.VNarrowV.dst = dst;
1202 i->ARM64in.VNarrowV.src = src;
1203 vassert(dszBlg2 == 0 || dszBlg2 == 1 || dszBlg2 == 2);
1204 return i;
1205 }
ARM64Instr_VShiftImmV(ARM64VecShiftImmOp op,HReg dst,HReg src,UInt amt)1206 ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftImmOp op,
1207 HReg dst, HReg src, UInt amt ) {
1208 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1209 i->tag = ARM64in_VShiftImmV;
1210 i->ARM64in.VShiftImmV.op = op;
1211 i->ARM64in.VShiftImmV.dst = dst;
1212 i->ARM64in.VShiftImmV.src = src;
1213 i->ARM64in.VShiftImmV.amt = amt;
1214 UInt minSh = 0;
1215 UInt maxSh = 0;
1216 switch (op) {
1217 /* For right shifts, the allowed shift amounts are 1 .. lane_size.
1218 For left shifts, the allowed shift amounts are 0 .. lane_size-1.
1219 */
1220 case ARM64vecshi_USHR64x2: case ARM64vecshi_SSHR64x2:
1221 case ARM64vecshi_UQSHRN2SD: case ARM64vecshi_SQSHRN2SD:
1222 case ARM64vecshi_SQSHRUN2SD:
1223 case ARM64vecshi_UQRSHRN2SD: case ARM64vecshi_SQRSHRN2SD:
1224 case ARM64vecshi_SQRSHRUN2SD:
1225 minSh = 1; maxSh = 64; break;
1226 case ARM64vecshi_SHL64x2:
1227 case ARM64vecshi_UQSHL64x2: case ARM64vecshi_SQSHL64x2:
1228 case ARM64vecshi_SQSHLU64x2:
1229 minSh = 0; maxSh = 63; break;
1230 case ARM64vecshi_USHR32x4: case ARM64vecshi_SSHR32x4:
1231 case ARM64vecshi_UQSHRN4HS: case ARM64vecshi_SQSHRN4HS:
1232 case ARM64vecshi_SQSHRUN4HS:
1233 case ARM64vecshi_UQRSHRN4HS: case ARM64vecshi_SQRSHRN4HS:
1234 case ARM64vecshi_SQRSHRUN4HS:
1235 minSh = 1; maxSh = 32; break;
1236 case ARM64vecshi_SHL32x4:
1237 case ARM64vecshi_UQSHL32x4: case ARM64vecshi_SQSHL32x4:
1238 case ARM64vecshi_SQSHLU32x4:
1239 minSh = 0; maxSh = 31; break;
1240 case ARM64vecshi_USHR16x8: case ARM64vecshi_SSHR16x8:
1241 case ARM64vecshi_UQSHRN8BH: case ARM64vecshi_SQSHRN8BH:
1242 case ARM64vecshi_SQSHRUN8BH:
1243 case ARM64vecshi_UQRSHRN8BH: case ARM64vecshi_SQRSHRN8BH:
1244 case ARM64vecshi_SQRSHRUN8BH:
1245 minSh = 1; maxSh = 16; break;
1246 case ARM64vecshi_SHL16x8:
1247 case ARM64vecshi_UQSHL16x8: case ARM64vecshi_SQSHL16x8:
1248 case ARM64vecshi_SQSHLU16x8:
1249 minSh = 0; maxSh = 15; break;
1250 case ARM64vecshi_USHR8x16: case ARM64vecshi_SSHR8x16:
1251 minSh = 1; maxSh = 8; break;
1252 case ARM64vecshi_SHL8x16:
1253 case ARM64vecshi_UQSHL8x16: case ARM64vecshi_SQSHL8x16:
1254 case ARM64vecshi_SQSHLU8x16:
1255 minSh = 0; maxSh = 7; break;
1256 default:
1257 vassert(0);
1258 }
1259 vassert(maxSh > 0);
1260 vassert(amt >= minSh && amt <= maxSh);
1261 return i;
1262 }
ARM64Instr_VExtV(HReg dst,HReg srcLo,HReg srcHi,UInt amtB)1263 ARM64Instr* ARM64Instr_VExtV ( HReg dst, HReg srcLo, HReg srcHi, UInt amtB ) {
1264 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1265 i->tag = ARM64in_VExtV;
1266 i->ARM64in.VExtV.dst = dst;
1267 i->ARM64in.VExtV.srcLo = srcLo;
1268 i->ARM64in.VExtV.srcHi = srcHi;
1269 i->ARM64in.VExtV.amtB = amtB;
1270 vassert(amtB >= 1 && amtB <= 15);
1271 return i;
1272 }
ARM64Instr_VImmQ(HReg rQ,UShort imm)1273 ARM64Instr* ARM64Instr_VImmQ (HReg rQ, UShort imm) {
1274 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1275 i->tag = ARM64in_VImmQ;
1276 i->ARM64in.VImmQ.rQ = rQ;
1277 i->ARM64in.VImmQ.imm = imm;
1278 /* Check that this is something that can actually be emitted. */
1279 switch (imm) {
1280 case 0x0000: case 0x0001: case 0x0003:
1281 case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
1282 break;
1283 default:
1284 vassert(0);
1285 }
1286 return i;
1287 }
ARM64Instr_VDfromX(HReg rD,HReg rX)1288 ARM64Instr* ARM64Instr_VDfromX ( HReg rD, HReg rX ) {
1289 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1290 i->tag = ARM64in_VDfromX;
1291 i->ARM64in.VDfromX.rD = rD;
1292 i->ARM64in.VDfromX.rX = rX;
1293 return i;
1294 }
ARM64Instr_VQfromX(HReg rQ,HReg rXlo)1295 ARM64Instr* ARM64Instr_VQfromX ( HReg rQ, HReg rXlo ) {
1296 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1297 i->tag = ARM64in_VQfromX;
1298 i->ARM64in.VQfromX.rQ = rQ;
1299 i->ARM64in.VQfromX.rXlo = rXlo;
1300 return i;
1301 }
ARM64Instr_VQfromXX(HReg rQ,HReg rXhi,HReg rXlo)1302 ARM64Instr* ARM64Instr_VQfromXX ( HReg rQ, HReg rXhi, HReg rXlo ) {
1303 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1304 i->tag = ARM64in_VQfromXX;
1305 i->ARM64in.VQfromXX.rQ = rQ;
1306 i->ARM64in.VQfromXX.rXhi = rXhi;
1307 i->ARM64in.VQfromXX.rXlo = rXlo;
1308 return i;
1309 }
ARM64Instr_VXfromQ(HReg rX,HReg rQ,UInt laneNo)1310 ARM64Instr* ARM64Instr_VXfromQ ( HReg rX, HReg rQ, UInt laneNo ) {
1311 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1312 i->tag = ARM64in_VXfromQ;
1313 i->ARM64in.VXfromQ.rX = rX;
1314 i->ARM64in.VXfromQ.rQ = rQ;
1315 i->ARM64in.VXfromQ.laneNo = laneNo;
1316 vassert(laneNo <= 1);
1317 return i;
1318 }
ARM64Instr_VXfromDorS(HReg rX,HReg rDorS,Bool fromD)1319 ARM64Instr* ARM64Instr_VXfromDorS ( HReg rX, HReg rDorS, Bool fromD ) {
1320 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1321 i->tag = ARM64in_VXfromDorS;
1322 i->ARM64in.VXfromDorS.rX = rX;
1323 i->ARM64in.VXfromDorS.rDorS = rDorS;
1324 i->ARM64in.VXfromDorS.fromD = fromD;
1325 return i;
1326 }
ARM64Instr_VMov(UInt szB,HReg dst,HReg src)1327 ARM64Instr* ARM64Instr_VMov ( UInt szB, HReg dst, HReg src ) {
1328 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1329 i->tag = ARM64in_VMov;
1330 i->ARM64in.VMov.szB = szB;
1331 i->ARM64in.VMov.dst = dst;
1332 i->ARM64in.VMov.src = src;
1333 switch (szB) {
1334 case 16:
1335 vassert(hregClass(src) == HRcVec128);
1336 vassert(hregClass(dst) == HRcVec128);
1337 break;
1338 case 8:
1339 vassert(hregClass(src) == HRcFlt64);
1340 vassert(hregClass(dst) == HRcFlt64);
1341 break;
1342 default:
1343 vpanic("ARM64Instr_VMov");
1344 }
1345 return i;
1346 }
ARM64Instr_EvCheck(ARM64AMode * amCounter,ARM64AMode * amFailAddr)1347 ARM64Instr* ARM64Instr_EvCheck ( ARM64AMode* amCounter,
1348 ARM64AMode* amFailAddr ) {
1349 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1350 i->tag = ARM64in_EvCheck;
1351 i->ARM64in.EvCheck.amCounter = amCounter;
1352 i->ARM64in.EvCheck.amFailAddr = amFailAddr;
1353 return i;
1354 }
ARM64Instr_ProfInc(void)1355 ARM64Instr* ARM64Instr_ProfInc ( void ) {
1356 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1357 i->tag = ARM64in_ProfInc;
1358 return i;
1359 }
1360
1361 /* ... */
1362
ppARM64Instr(const ARM64Instr * i)1363 void ppARM64Instr ( const ARM64Instr* i ) {
1364 switch (i->tag) {
1365 case ARM64in_Arith:
1366 vex_printf("%s ", i->ARM64in.Arith.isAdd ? "add" : "sub");
1367 ppHRegARM64(i->ARM64in.Arith.dst);
1368 vex_printf(", ");
1369 ppHRegARM64(i->ARM64in.Arith.argL);
1370 vex_printf(", ");
1371 ppARM64RIA(i->ARM64in.Arith.argR);
1372 return;
1373 case ARM64in_Cmp:
1374 vex_printf("cmp%s ", i->ARM64in.Cmp.is64 ? " " : "(w)" );
1375 ppHRegARM64(i->ARM64in.Cmp.argL);
1376 vex_printf(", ");
1377 ppARM64RIA(i->ARM64in.Cmp.argR);
1378 return;
1379 case ARM64in_Logic:
1380 vex_printf("%s ", showARM64LogicOp(i->ARM64in.Logic.op));
1381 ppHRegARM64(i->ARM64in.Logic.dst);
1382 vex_printf(", ");
1383 ppHRegARM64(i->ARM64in.Logic.argL);
1384 vex_printf(", ");
1385 ppARM64RIL(i->ARM64in.Logic.argR);
1386 return;
1387 case ARM64in_Test:
1388 vex_printf("tst ");
1389 ppHRegARM64(i->ARM64in.Test.argL);
1390 vex_printf(", ");
1391 ppARM64RIL(i->ARM64in.Test.argR);
1392 return;
1393 case ARM64in_Shift:
1394 vex_printf("%s ", showARM64ShiftOp(i->ARM64in.Shift.op));
1395 ppHRegARM64(i->ARM64in.Shift.dst);
1396 vex_printf(", ");
1397 ppHRegARM64(i->ARM64in.Shift.argL);
1398 vex_printf(", ");
1399 ppARM64RI6(i->ARM64in.Shift.argR);
1400 return;
1401 case ARM64in_Unary:
1402 vex_printf("%s ", showARM64UnaryOp(i->ARM64in.Unary.op));
1403 ppHRegARM64(i->ARM64in.Unary.dst);
1404 vex_printf(", ");
1405 ppHRegARM64(i->ARM64in.Unary.src);
1406 return;
1407 case ARM64in_MovI:
1408 vex_printf("mov ");
1409 ppHRegARM64(i->ARM64in.MovI.dst);
1410 vex_printf(", ");
1411 ppHRegARM64(i->ARM64in.MovI.src);
1412 return;
1413 case ARM64in_Imm64:
1414 vex_printf("imm64 ");
1415 ppHRegARM64(i->ARM64in.Imm64.dst);
1416 vex_printf(", 0x%llx", i->ARM64in.Imm64.imm64);
1417 return;
1418 case ARM64in_LdSt64:
1419 if (i->ARM64in.LdSt64.isLoad) {
1420 vex_printf("ldr ");
1421 ppHRegARM64(i->ARM64in.LdSt64.rD);
1422 vex_printf(", ");
1423 ppARM64AMode(i->ARM64in.LdSt64.amode);
1424 } else {
1425 vex_printf("str ");
1426 ppARM64AMode(i->ARM64in.LdSt64.amode);
1427 vex_printf(", ");
1428 ppHRegARM64(i->ARM64in.LdSt64.rD);
1429 }
1430 return;
1431 case ARM64in_LdSt32:
1432 if (i->ARM64in.LdSt32.isLoad) {
1433 vex_printf("ldruw ");
1434 ppHRegARM64(i->ARM64in.LdSt32.rD);
1435 vex_printf(", ");
1436 ppARM64AMode(i->ARM64in.LdSt32.amode);
1437 } else {
1438 vex_printf("strw ");
1439 ppARM64AMode(i->ARM64in.LdSt32.amode);
1440 vex_printf(", ");
1441 ppHRegARM64(i->ARM64in.LdSt32.rD);
1442 }
1443 return;
1444 case ARM64in_LdSt16:
1445 if (i->ARM64in.LdSt16.isLoad) {
1446 vex_printf("ldruh ");
1447 ppHRegARM64(i->ARM64in.LdSt16.rD);
1448 vex_printf(", ");
1449 ppARM64AMode(i->ARM64in.LdSt16.amode);
1450 } else {
1451 vex_printf("strh ");
1452 ppARM64AMode(i->ARM64in.LdSt16.amode);
1453 vex_printf(", ");
1454 ppHRegARM64(i->ARM64in.LdSt16.rD);
1455 }
1456 return;
1457 case ARM64in_LdSt8:
1458 if (i->ARM64in.LdSt8.isLoad) {
1459 vex_printf("ldrub ");
1460 ppHRegARM64(i->ARM64in.LdSt8.rD);
1461 vex_printf(", ");
1462 ppARM64AMode(i->ARM64in.LdSt8.amode);
1463 } else {
1464 vex_printf("strb ");
1465 ppARM64AMode(i->ARM64in.LdSt8.amode);
1466 vex_printf(", ");
1467 ppHRegARM64(i->ARM64in.LdSt8.rD);
1468 }
1469 return;
1470 case ARM64in_XDirect:
1471 vex_printf("(xDirect) ");
1472 vex_printf("if (%%pstate.%s) { ",
1473 showARM64CondCode(i->ARM64in.XDirect.cond));
1474 vex_printf("imm64 x9,0x%llx; ", i->ARM64in.XDirect.dstGA);
1475 vex_printf("str x9,");
1476 ppARM64AMode(i->ARM64in.XDirect.amPC);
1477 vex_printf("; imm64-exactly4 x9,$disp_cp_chain_me_to_%sEP; ",
1478 i->ARM64in.XDirect.toFastEP ? "fast" : "slow");
1479 vex_printf("blr x9 }");
1480 return;
1481 case ARM64in_XIndir:
1482 vex_printf("(xIndir) ");
1483 vex_printf("if (%%pstate.%s) { ",
1484 showARM64CondCode(i->ARM64in.XIndir.cond));
1485 vex_printf("str ");
1486 ppHRegARM64(i->ARM64in.XIndir.dstGA);
1487 vex_printf(",");
1488 ppARM64AMode(i->ARM64in.XIndir.amPC);
1489 vex_printf("; imm64 x9,$disp_cp_xindir; ");
1490 vex_printf("br x9 }");
1491 return;
1492 case ARM64in_XAssisted:
1493 vex_printf("(xAssisted) ");
1494 vex_printf("if (%%pstate.%s) { ",
1495 showARM64CondCode(i->ARM64in.XAssisted.cond));
1496 vex_printf("str ");
1497 ppHRegARM64(i->ARM64in.XAssisted.dstGA);
1498 vex_printf(",");
1499 ppARM64AMode(i->ARM64in.XAssisted.amPC);
1500 vex_printf("; movw x21,$IRJumpKind_to_TRCVAL(%d); ",
1501 (Int)i->ARM64in.XAssisted.jk);
1502 vex_printf("imm64 x9,$disp_cp_xassisted; ");
1503 vex_printf("br x9 }");
1504 return;
1505 case ARM64in_CSel:
1506 vex_printf("csel ");
1507 ppHRegARM64(i->ARM64in.CSel.dst);
1508 vex_printf(", ");
1509 ppHRegARM64(i->ARM64in.CSel.argL);
1510 vex_printf(", ");
1511 ppHRegARM64(i->ARM64in.CSel.argR);
1512 vex_printf(", %s", showARM64CondCode(i->ARM64in.CSel.cond));
1513 return;
1514 case ARM64in_Call:
1515 vex_printf("call%s ",
1516 i->ARM64in.Call.cond==ARM64cc_AL
1517 ? " " : showARM64CondCode(i->ARM64in.Call.cond));
1518 vex_printf("0x%llx [nArgRegs=%d, ",
1519 i->ARM64in.Call.target, i->ARM64in.Call.nArgRegs);
1520 ppRetLoc(i->ARM64in.Call.rloc);
1521 vex_printf("]");
1522 return;
1523 case ARM64in_AddToSP: {
1524 Int simm = i->ARM64in.AddToSP.simm;
1525 vex_printf("%s xsp, xsp, #%d", simm < 0 ? "sub" : "add",
1526 simm < 0 ? -simm : simm);
1527 return;
1528 }
1529 case ARM64in_FromSP:
1530 vex_printf("mov ");
1531 ppHRegARM64(i->ARM64in.FromSP.dst);
1532 vex_printf(", xsp");
1533 return;
1534 case ARM64in_Mul:
1535 vex_printf("%s ", showARM64MulOp(i->ARM64in.Mul.op));
1536 ppHRegARM64(i->ARM64in.Mul.dst);
1537 vex_printf(", ");
1538 ppHRegARM64(i->ARM64in.Mul.argL);
1539 vex_printf(", ");
1540 ppHRegARM64(i->ARM64in.Mul.argR);
1541 return;
1542
1543 case ARM64in_LdrEX: {
1544 const HChar* sz = " ";
1545 switch (i->ARM64in.LdrEX.szB) {
1546 case 1: sz = "b"; break;
1547 case 2: sz = "h"; break;
1548 case 4: case 8: break;
1549 default: vassert(0);
1550 }
1551 vex_printf("ldxr%s %c2, [x4]",
1552 sz, i->ARM64in.LdrEX.szB == 8 ? 'x' : 'w');
1553 return;
1554 }
1555 case ARM64in_StrEX: {
1556 const HChar* sz = " ";
1557 switch (i->ARM64in.StrEX.szB) {
1558 case 1: sz = "b"; break;
1559 case 2: sz = "h"; break;
1560 case 4: case 8: break;
1561 default: vassert(0);
1562 }
1563 vex_printf("stxr%s w0, %c2, [x4]",
1564 sz, i->ARM64in.StrEX.szB == 8 ? 'x' : 'w');
1565 return;
1566 }
1567 case ARM64in_MFence:
1568 vex_printf("(mfence) dsb sy; dmb sy; isb");
1569 return;
1570 case ARM64in_VLdStH:
1571 if (i->ARM64in.VLdStH.isLoad) {
1572 vex_printf("ldr ");
1573 ppHRegARM64asHreg(i->ARM64in.VLdStH.hD);
1574 vex_printf(", %u(", i->ARM64in.VLdStH.uimm12);
1575 ppHRegARM64(i->ARM64in.VLdStH.rN);
1576 vex_printf(")");
1577 } else {
1578 vex_printf("str ");
1579 vex_printf("%u(", i->ARM64in.VLdStH.uimm12);
1580 ppHRegARM64(i->ARM64in.VLdStH.rN);
1581 vex_printf("), ");
1582 ppHRegARM64asHreg(i->ARM64in.VLdStH.hD);
1583 }
1584 return;
1585 case ARM64in_VLdStS:
1586 if (i->ARM64in.VLdStS.isLoad) {
1587 vex_printf("ldr ");
1588 ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
1589 vex_printf(", %u(", i->ARM64in.VLdStS.uimm12);
1590 ppHRegARM64(i->ARM64in.VLdStS.rN);
1591 vex_printf(")");
1592 } else {
1593 vex_printf("str ");
1594 vex_printf("%u(", i->ARM64in.VLdStS.uimm12);
1595 ppHRegARM64(i->ARM64in.VLdStS.rN);
1596 vex_printf("), ");
1597 ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
1598 }
1599 return;
1600 case ARM64in_VLdStD:
1601 if (i->ARM64in.VLdStD.isLoad) {
1602 vex_printf("ldr ");
1603 ppHRegARM64(i->ARM64in.VLdStD.dD);
1604 vex_printf(", %u(", i->ARM64in.VLdStD.uimm12);
1605 ppHRegARM64(i->ARM64in.VLdStD.rN);
1606 vex_printf(")");
1607 } else {
1608 vex_printf("str ");
1609 vex_printf("%u(", i->ARM64in.VLdStD.uimm12);
1610 ppHRegARM64(i->ARM64in.VLdStD.rN);
1611 vex_printf("), ");
1612 ppHRegARM64(i->ARM64in.VLdStD.dD);
1613 }
1614 return;
1615 case ARM64in_VLdStQ:
1616 if (i->ARM64in.VLdStQ.isLoad)
1617 vex_printf("ld1.2d {");
1618 else
1619 vex_printf("st1.2d {");
1620 ppHRegARM64(i->ARM64in.VLdStQ.rQ);
1621 vex_printf("}, [");
1622 ppHRegARM64(i->ARM64in.VLdStQ.rN);
1623 vex_printf("]");
1624 return;
1625 case ARM64in_VCvtI2F: {
1626 HChar syn = '?';
1627 UInt fszB = 0;
1628 UInt iszB = 0;
1629 characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtI2F.how);
1630 vex_printf("%ccvtf ", syn);
1631 ppHRegARM64(i->ARM64in.VCvtI2F.rD);
1632 vex_printf("(%c-reg), ", fszB == 4 ? 'S' : 'D');
1633 ppHRegARM64(i->ARM64in.VCvtI2F.rS);
1634 vex_printf("(%c-reg)", iszB == 4 ? 'W' : 'X');
1635 return;
1636 }
1637 case ARM64in_VCvtF2I: {
1638 HChar syn = '?';
1639 UInt fszB = 0;
1640 UInt iszB = 0;
1641 HChar rmo = '?';
1642 characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtF2I.how);
1643 UChar armRM = i->ARM64in.VCvtF2I.armRM;
1644 if (armRM < 4) rmo = "npmz"[armRM];
1645 vex_printf("fcvt%c%c ", rmo, syn);
1646 ppHRegARM64(i->ARM64in.VCvtF2I.rD);
1647 vex_printf("(%c-reg), ", iszB == 4 ? 'W' : 'X');
1648 ppHRegARM64(i->ARM64in.VCvtF2I.rS);
1649 vex_printf("(%c-reg)", fszB == 4 ? 'S' : 'D');
1650 return;
1651 }
1652 case ARM64in_VCvtSD:
1653 vex_printf("fcvt%s ", i->ARM64in.VCvtSD.sToD ? "s2d" : "d2s");
1654 if (i->ARM64in.VCvtSD.sToD) {
1655 ppHRegARM64(i->ARM64in.VCvtSD.dst);
1656 vex_printf(", ");
1657 ppHRegARM64asSreg(i->ARM64in.VCvtSD.src);
1658 } else {
1659 ppHRegARM64asSreg(i->ARM64in.VCvtSD.dst);
1660 vex_printf(", ");
1661 ppHRegARM64(i->ARM64in.VCvtSD.src);
1662 }
1663 return;
1664 case ARM64in_VCvtHS:
1665 vex_printf("fcvt%s ", i->ARM64in.VCvtHS.hToS ? "h2s" : "s2h");
1666 if (i->ARM64in.VCvtHS.hToS) {
1667 ppHRegARM64asSreg(i->ARM64in.VCvtHS.dst);
1668 vex_printf(", ");
1669 ppHRegARM64asHreg(i->ARM64in.VCvtHS.src);
1670 } else {
1671 ppHRegARM64asHreg(i->ARM64in.VCvtHS.dst);
1672 vex_printf(", ");
1673 ppHRegARM64asSreg(i->ARM64in.VCvtHS.src);
1674 }
1675 return;
1676 case ARM64in_VCvtHD:
1677 vex_printf("fcvt%s ", i->ARM64in.VCvtHD.hToD ? "h2d" : "d2h");
1678 if (i->ARM64in.VCvtHD.hToD) {
1679 ppHRegARM64(i->ARM64in.VCvtHD.dst);
1680 vex_printf(", ");
1681 ppHRegARM64asHreg(i->ARM64in.VCvtHD.src);
1682 } else {
1683 ppHRegARM64asHreg(i->ARM64in.VCvtHD.dst);
1684 vex_printf(", ");
1685 ppHRegARM64(i->ARM64in.VCvtHD.src);
1686 }
1687 return;
1688 case ARM64in_VUnaryD:
1689 vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryD.op));
1690 ppHRegARM64(i->ARM64in.VUnaryD.dst);
1691 vex_printf(", ");
1692 ppHRegARM64(i->ARM64in.VUnaryD.src);
1693 return;
1694 case ARM64in_VUnaryS:
1695 vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryS.op));
1696 ppHRegARM64asSreg(i->ARM64in.VUnaryS.dst);
1697 vex_printf(", ");
1698 ppHRegARM64asSreg(i->ARM64in.VUnaryS.src);
1699 return;
1700 case ARM64in_VBinD:
1701 vex_printf("f%s ", showARM64FpBinOp(i->ARM64in.VBinD.op));
1702 ppHRegARM64(i->ARM64in.VBinD.dst);
1703 vex_printf(", ");
1704 ppHRegARM64(i->ARM64in.VBinD.argL);
1705 vex_printf(", ");
1706 ppHRegARM64(i->ARM64in.VBinD.argR);
1707 return;
1708 case ARM64in_VBinS:
1709 vex_printf("f%s ", showARM64FpBinOp(i->ARM64in.VBinS.op));
1710 ppHRegARM64asSreg(i->ARM64in.VBinS.dst);
1711 vex_printf(", ");
1712 ppHRegARM64asSreg(i->ARM64in.VBinS.argL);
1713 vex_printf(", ");
1714 ppHRegARM64asSreg(i->ARM64in.VBinS.argR);
1715 return;
1716 case ARM64in_VCmpD:
1717 vex_printf("fcmp ");
1718 ppHRegARM64(i->ARM64in.VCmpD.argL);
1719 vex_printf(", ");
1720 ppHRegARM64(i->ARM64in.VCmpD.argR);
1721 return;
1722 case ARM64in_VCmpS:
1723 vex_printf("fcmp ");
1724 ppHRegARM64asSreg(i->ARM64in.VCmpS.argL);
1725 vex_printf(", ");
1726 ppHRegARM64asSreg(i->ARM64in.VCmpS.argR);
1727 return;
1728 case ARM64in_VFCSel: {
1729 void (*ppHRegARM64fp)(HReg)
1730 = (i->ARM64in.VFCSel.isD ? ppHRegARM64 : ppHRegARM64asSreg);
1731 vex_printf("fcsel ");
1732 ppHRegARM64fp(i->ARM64in.VFCSel.dst);
1733 vex_printf(", ");
1734 ppHRegARM64fp(i->ARM64in.VFCSel.argL);
1735 vex_printf(", ");
1736 ppHRegARM64fp(i->ARM64in.VFCSel.argR);
1737 vex_printf(", %s", showARM64CondCode(i->ARM64in.VFCSel.cond));
1738 return;
1739 }
1740 case ARM64in_FPCR:
1741 if (i->ARM64in.FPCR.toFPCR) {
1742 vex_printf("msr fpcr, ");
1743 ppHRegARM64(i->ARM64in.FPCR.iReg);
1744 } else {
1745 vex_printf("mrs ");
1746 ppHRegARM64(i->ARM64in.FPCR.iReg);
1747 vex_printf(", fpcr");
1748 }
1749 return;
1750 case ARM64in_FPSR:
1751 if (i->ARM64in.FPSR.toFPSR) {
1752 vex_printf("msr fpsr, ");
1753 ppHRegARM64(i->ARM64in.FPSR.iReg);
1754 } else {
1755 vex_printf("mrs ");
1756 ppHRegARM64(i->ARM64in.FPSR.iReg);
1757 vex_printf(", fpsr");
1758 }
1759 return;
1760 case ARM64in_VBinV: {
1761 const HChar* nm = "??";
1762 const HChar* ar = "??";
1763 showARM64VecBinOp(&nm, &ar, i->ARM64in.VBinV.op);
1764 vex_printf("%s ", nm);
1765 ppHRegARM64(i->ARM64in.VBinV.dst);
1766 vex_printf(".%s, ", ar);
1767 ppHRegARM64(i->ARM64in.VBinV.argL);
1768 vex_printf(".%s, ", ar);
1769 ppHRegARM64(i->ARM64in.VBinV.argR);
1770 vex_printf(".%s", ar);
1771 return;
1772 }
1773 case ARM64in_VModifyV: {
1774 const HChar* nm = "??";
1775 const HChar* ar = "??";
1776 showARM64VecModifyOp(&nm, &ar, i->ARM64in.VModifyV.op);
1777 vex_printf("%s ", nm);
1778 ppHRegARM64(i->ARM64in.VModifyV.mod);
1779 vex_printf(".%s, ", ar);
1780 ppHRegARM64(i->ARM64in.VModifyV.arg);
1781 vex_printf(".%s", ar);
1782 return;
1783 }
1784 case ARM64in_VUnaryV: {
1785 const HChar* nm = "??";
1786 const HChar* ar = "??";
1787 showARM64VecUnaryOp(&nm, &ar, i->ARM64in.VUnaryV.op);
1788 vex_printf("%s ", nm);
1789 ppHRegARM64(i->ARM64in.VUnaryV.dst);
1790 vex_printf(".%s, ", ar);
1791 ppHRegARM64(i->ARM64in.VUnaryV.arg);
1792 vex_printf(".%s", ar);
1793 return;
1794 }
1795 case ARM64in_VNarrowV: {
1796 UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
1797 const HChar* darr[3] = { "8b", "4h", "2s" };
1798 const HChar* sarr[3] = { "8h", "4s", "2d" };
1799 const HChar* nm = showARM64VecNarrowOp(i->ARM64in.VNarrowV.op);
1800 vex_printf("%s ", nm);
1801 ppHRegARM64(i->ARM64in.VNarrowV.dst);
1802 vex_printf(".%s, ", dszBlg2 < 3 ? darr[dszBlg2] : "??");
1803 ppHRegARM64(i->ARM64in.VNarrowV.src);
1804 vex_printf(".%s", dszBlg2 < 3 ? sarr[dszBlg2] : "??");
1805 return;
1806 }
1807 case ARM64in_VShiftImmV: {
1808 const HChar* nm = "??";
1809 const HChar* ar = "??";
1810 showARM64VecShiftImmOp(&nm, &ar, i->ARM64in.VShiftImmV.op);
1811 vex_printf("%s ", nm);
1812 ppHRegARM64(i->ARM64in.VShiftImmV.dst);
1813 vex_printf(".%s, ", ar);
1814 ppHRegARM64(i->ARM64in.VShiftImmV.src);
1815 vex_printf(".%s, #%u", ar, i->ARM64in.VShiftImmV.amt);
1816 return;
1817 }
1818 case ARM64in_VExtV: {
1819 vex_printf("ext ");
1820 ppHRegARM64(i->ARM64in.VExtV.dst);
1821 vex_printf(".16b, ");
1822 ppHRegARM64(i->ARM64in.VExtV.srcLo);
1823 vex_printf(".16b, ");
1824 ppHRegARM64(i->ARM64in.VExtV.srcHi);
1825 vex_printf(".16b, #%u", i->ARM64in.VExtV.amtB);
1826 return;
1827 }
1828 case ARM64in_VImmQ:
1829 vex_printf("qimm ");
1830 ppHRegARM64(i->ARM64in.VImmQ.rQ);
1831 vex_printf(", Bits16toBytes16(0x%x)", (UInt)i->ARM64in.VImmQ.imm);
1832 return;
1833 case ARM64in_VDfromX:
1834 vex_printf("fmov ");
1835 ppHRegARM64(i->ARM64in.VDfromX.rD);
1836 vex_printf(", ");
1837 ppHRegARM64(i->ARM64in.VDfromX.rX);
1838 return;
1839 case ARM64in_VQfromX:
1840 vex_printf("fmov ");
1841 ppHRegARM64(i->ARM64in.VQfromX.rQ);
1842 vex_printf(".d[0], ");
1843 ppHRegARM64(i->ARM64in.VQfromX.rXlo);
1844 return;
1845 case ARM64in_VQfromXX:
1846 vex_printf("qFromXX ");
1847 ppHRegARM64(i->ARM64in.VQfromXX.rQ);
1848 vex_printf(", ");
1849 ppHRegARM64(i->ARM64in.VQfromXX.rXhi);
1850 vex_printf(", ");
1851 ppHRegARM64(i->ARM64in.VQfromXX.rXlo);
1852 return;
1853 case ARM64in_VXfromQ:
1854 vex_printf("fmov ");
1855 ppHRegARM64(i->ARM64in.VXfromQ.rX);
1856 vex_printf(", ");
1857 ppHRegARM64(i->ARM64in.VXfromQ.rQ);
1858 vex_printf(".d[%u]", i->ARM64in.VXfromQ.laneNo);
1859 return;
1860 case ARM64in_VXfromDorS:
1861 vex_printf("fmov ");
1862 ppHRegARM64(i->ARM64in.VXfromDorS.rX);
1863 vex_printf("(%c-reg), ", i->ARM64in.VXfromDorS.fromD ? 'X':'W');
1864 ppHRegARM64(i->ARM64in.VXfromDorS.rDorS);
1865 vex_printf("(%c-reg)", i->ARM64in.VXfromDorS.fromD ? 'D' : 'S');
1866 return;
1867 case ARM64in_VMov: {
1868 UChar aux = '?';
1869 switch (i->ARM64in.VMov.szB) {
1870 case 16: aux = 'q'; break;
1871 case 8: aux = 'd'; break;
1872 case 4: aux = 's'; break;
1873 default: break;
1874 }
1875 vex_printf("mov(%c) ", aux);
1876 ppHRegARM64(i->ARM64in.VMov.dst);
1877 vex_printf(", ");
1878 ppHRegARM64(i->ARM64in.VMov.src);
1879 return;
1880 }
1881 case ARM64in_EvCheck:
1882 vex_printf("(evCheck) ldr w9,");
1883 ppARM64AMode(i->ARM64in.EvCheck.amCounter);
1884 vex_printf("; subs w9,w9,$1; str w9,");
1885 ppARM64AMode(i->ARM64in.EvCheck.amCounter);
1886 vex_printf("; bpl nofail; ldr x9,");
1887 ppARM64AMode(i->ARM64in.EvCheck.amFailAddr);
1888 vex_printf("; br x9; nofail:");
1889 return;
1890 case ARM64in_ProfInc:
1891 vex_printf("(profInc) imm64-fixed4 x9,$NotKnownYet; "
1892 "ldr x8,[x9]; add x8,x8,#1, str x8,[x9]");
1893 return;
1894 default:
1895 vex_printf("ppARM64Instr: unhandled case (tag %d)", (Int)i->tag);
1896 vpanic("ppARM64Instr(1)");
1897 return;
1898 }
1899 }
1900
1901
1902 /* --------- Helpers for register allocation. --------- */
1903
getRegUsage_ARM64Instr(HRegUsage * u,const ARM64Instr * i,Bool mode64)1904 void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
1905 {
1906 vassert(mode64 == True);
1907 initHRegUsage(u);
1908 switch (i->tag) {
1909 case ARM64in_Arith:
1910 addHRegUse(u, HRmWrite, i->ARM64in.Arith.dst);
1911 addHRegUse(u, HRmRead, i->ARM64in.Arith.argL);
1912 addRegUsage_ARM64RIA(u, i->ARM64in.Arith.argR);
1913 return;
1914 case ARM64in_Cmp:
1915 addHRegUse(u, HRmRead, i->ARM64in.Cmp.argL);
1916 addRegUsage_ARM64RIA(u, i->ARM64in.Cmp.argR);
1917 return;
1918 case ARM64in_Logic:
1919 addHRegUse(u, HRmWrite, i->ARM64in.Logic.dst);
1920 addHRegUse(u, HRmRead, i->ARM64in.Logic.argL);
1921 addRegUsage_ARM64RIL(u, i->ARM64in.Logic.argR);
1922 return;
1923 case ARM64in_Test:
1924 addHRegUse(u, HRmRead, i->ARM64in.Test.argL);
1925 addRegUsage_ARM64RIL(u, i->ARM64in.Test.argR);
1926 return;
1927 case ARM64in_Shift:
1928 addHRegUse(u, HRmWrite, i->ARM64in.Shift.dst);
1929 addHRegUse(u, HRmRead, i->ARM64in.Shift.argL);
1930 addRegUsage_ARM64RI6(u, i->ARM64in.Shift.argR);
1931 return;
1932 case ARM64in_Unary:
1933 addHRegUse(u, HRmWrite, i->ARM64in.Unary.dst);
1934 addHRegUse(u, HRmRead, i->ARM64in.Unary.src);
1935 return;
1936 case ARM64in_MovI:
1937 addHRegUse(u, HRmWrite, i->ARM64in.MovI.dst);
1938 addHRegUse(u, HRmRead, i->ARM64in.MovI.src);
1939 return;
1940 case ARM64in_Imm64:
1941 addHRegUse(u, HRmWrite, i->ARM64in.Imm64.dst);
1942 return;
1943 case ARM64in_LdSt64:
1944 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt64.amode);
1945 if (i->ARM64in.LdSt64.isLoad) {
1946 addHRegUse(u, HRmWrite, i->ARM64in.LdSt64.rD);
1947 } else {
1948 addHRegUse(u, HRmRead, i->ARM64in.LdSt64.rD);
1949 }
1950 return;
1951 case ARM64in_LdSt32:
1952 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt32.amode);
1953 if (i->ARM64in.LdSt32.isLoad) {
1954 addHRegUse(u, HRmWrite, i->ARM64in.LdSt32.rD);
1955 } else {
1956 addHRegUse(u, HRmRead, i->ARM64in.LdSt32.rD);
1957 }
1958 return;
1959 case ARM64in_LdSt16:
1960 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt16.amode);
1961 if (i->ARM64in.LdSt16.isLoad) {
1962 addHRegUse(u, HRmWrite, i->ARM64in.LdSt16.rD);
1963 } else {
1964 addHRegUse(u, HRmRead, i->ARM64in.LdSt16.rD);
1965 }
1966 return;
1967 case ARM64in_LdSt8:
1968 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt8.amode);
1969 if (i->ARM64in.LdSt8.isLoad) {
1970 addHRegUse(u, HRmWrite, i->ARM64in.LdSt8.rD);
1971 } else {
1972 addHRegUse(u, HRmRead, i->ARM64in.LdSt8.rD);
1973 }
1974 return;
1975 /* XDirect/XIndir/XAssisted are also a bit subtle. They
1976 conditionally exit the block. Hence we only need to list (1)
1977 the registers that they read, and (2) the registers that they
1978 write in the case where the block is not exited. (2) is
1979 empty, hence only (1) is relevant here. */
1980 case ARM64in_XDirect:
1981 addRegUsage_ARM64AMode(u, i->ARM64in.XDirect.amPC);
1982 return;
1983 case ARM64in_XIndir:
1984 addHRegUse(u, HRmRead, i->ARM64in.XIndir.dstGA);
1985 addRegUsage_ARM64AMode(u, i->ARM64in.XIndir.amPC);
1986 return;
1987 case ARM64in_XAssisted:
1988 addHRegUse(u, HRmRead, i->ARM64in.XAssisted.dstGA);
1989 addRegUsage_ARM64AMode(u, i->ARM64in.XAssisted.amPC);
1990 return;
1991 case ARM64in_CSel:
1992 addHRegUse(u, HRmWrite, i->ARM64in.CSel.dst);
1993 addHRegUse(u, HRmRead, i->ARM64in.CSel.argL);
1994 addHRegUse(u, HRmRead, i->ARM64in.CSel.argR);
1995 return;
1996 case ARM64in_Call:
1997 /* logic and comments copied/modified from x86 back end */
1998 /* This is a bit subtle. */
1999 /* First off, claim it trashes all the caller-saved regs
2000 which fall within the register allocator's jurisdiction.
2001 These I believe to be x0 to x7 and the 128-bit vector
2002 registers in use, q16 .. q20. */
2003 addHRegUse(u, HRmWrite, hregARM64_X0());
2004 addHRegUse(u, HRmWrite, hregARM64_X1());
2005 addHRegUse(u, HRmWrite, hregARM64_X2());
2006 addHRegUse(u, HRmWrite, hregARM64_X3());
2007 addHRegUse(u, HRmWrite, hregARM64_X4());
2008 addHRegUse(u, HRmWrite, hregARM64_X5());
2009 addHRegUse(u, HRmWrite, hregARM64_X6());
2010 addHRegUse(u, HRmWrite, hregARM64_X7());
2011 addHRegUse(u, HRmWrite, hregARM64_Q16());
2012 addHRegUse(u, HRmWrite, hregARM64_Q17());
2013 addHRegUse(u, HRmWrite, hregARM64_Q18());
2014 addHRegUse(u, HRmWrite, hregARM64_Q19());
2015 addHRegUse(u, HRmWrite, hregARM64_Q20());
2016 /* Now we have to state any parameter-carrying registers
2017 which might be read. This depends on nArgRegs. */
2018 switch (i->ARM64in.Call.nArgRegs) {
2019 case 8: addHRegUse(u, HRmRead, hregARM64_X7()); /*fallthru*/
2020 case 7: addHRegUse(u, HRmRead, hregARM64_X6()); /*fallthru*/
2021 case 6: addHRegUse(u, HRmRead, hregARM64_X5()); /*fallthru*/
2022 case 5: addHRegUse(u, HRmRead, hregARM64_X4()); /*fallthru*/
2023 case 4: addHRegUse(u, HRmRead, hregARM64_X3()); /*fallthru*/
2024 case 3: addHRegUse(u, HRmRead, hregARM64_X2()); /*fallthru*/
2025 case 2: addHRegUse(u, HRmRead, hregARM64_X1()); /*fallthru*/
2026 case 1: addHRegUse(u, HRmRead, hregARM64_X0()); break;
2027 case 0: break;
2028 default: vpanic("getRegUsage_ARM64:Call:regparms");
2029 }
2030 /* Finally, there is the issue that the insn trashes a
2031 register because the literal target address has to be
2032 loaded into a register. However, we reserve x9 for that
2033 purpose so there's no further complexity here. Stating x9
2034 as trashed is pointless since it's not under the control
2035 of the allocator, but what the hell. */
2036 addHRegUse(u, HRmWrite, hregARM64_X9());
2037 return;
2038 case ARM64in_AddToSP:
2039 /* Only changes SP, but regalloc doesn't control that, hence
2040 we don't care. */
2041 return;
2042 case ARM64in_FromSP:
2043 addHRegUse(u, HRmWrite, i->ARM64in.FromSP.dst);
2044 return;
2045 case ARM64in_Mul:
2046 addHRegUse(u, HRmWrite, i->ARM64in.Mul.dst);
2047 addHRegUse(u, HRmRead, i->ARM64in.Mul.argL);
2048 addHRegUse(u, HRmRead, i->ARM64in.Mul.argR);
2049 return;
2050 case ARM64in_LdrEX:
2051 addHRegUse(u, HRmRead, hregARM64_X4());
2052 addHRegUse(u, HRmWrite, hregARM64_X2());
2053 return;
2054 case ARM64in_StrEX:
2055 addHRegUse(u, HRmRead, hregARM64_X4());
2056 addHRegUse(u, HRmWrite, hregARM64_X0());
2057 addHRegUse(u, HRmRead, hregARM64_X2());
2058 return;
2059 case ARM64in_MFence:
2060 return;
2061 case ARM64in_VLdStH:
2062 addHRegUse(u, HRmRead, i->ARM64in.VLdStH.rN);
2063 if (i->ARM64in.VLdStH.isLoad) {
2064 addHRegUse(u, HRmWrite, i->ARM64in.VLdStH.hD);
2065 } else {
2066 addHRegUse(u, HRmRead, i->ARM64in.VLdStH.hD);
2067 }
2068 return;
2069 case ARM64in_VLdStS:
2070 addHRegUse(u, HRmRead, i->ARM64in.VLdStS.rN);
2071 if (i->ARM64in.VLdStS.isLoad) {
2072 addHRegUse(u, HRmWrite, i->ARM64in.VLdStS.sD);
2073 } else {
2074 addHRegUse(u, HRmRead, i->ARM64in.VLdStS.sD);
2075 }
2076 return;
2077 case ARM64in_VLdStD:
2078 addHRegUse(u, HRmRead, i->ARM64in.VLdStD.rN);
2079 if (i->ARM64in.VLdStD.isLoad) {
2080 addHRegUse(u, HRmWrite, i->ARM64in.VLdStD.dD);
2081 } else {
2082 addHRegUse(u, HRmRead, i->ARM64in.VLdStD.dD);
2083 }
2084 return;
2085 case ARM64in_VLdStQ:
2086 addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rN);
2087 if (i->ARM64in.VLdStQ.isLoad)
2088 addHRegUse(u, HRmWrite, i->ARM64in.VLdStQ.rQ);
2089 else
2090 addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rQ);
2091 return;
2092 case ARM64in_VCvtI2F:
2093 addHRegUse(u, HRmRead, i->ARM64in.VCvtI2F.rS);
2094 addHRegUse(u, HRmWrite, i->ARM64in.VCvtI2F.rD);
2095 return;
2096 case ARM64in_VCvtF2I:
2097 addHRegUse(u, HRmRead, i->ARM64in.VCvtF2I.rS);
2098 addHRegUse(u, HRmWrite, i->ARM64in.VCvtF2I.rD);
2099 return;
2100 case ARM64in_VCvtSD:
2101 addHRegUse(u, HRmWrite, i->ARM64in.VCvtSD.dst);
2102 addHRegUse(u, HRmRead, i->ARM64in.VCvtSD.src);
2103 return;
2104 case ARM64in_VCvtHS:
2105 addHRegUse(u, HRmWrite, i->ARM64in.VCvtHS.dst);
2106 addHRegUse(u, HRmRead, i->ARM64in.VCvtHS.src);
2107 return;
2108 case ARM64in_VCvtHD:
2109 addHRegUse(u, HRmWrite, i->ARM64in.VCvtHD.dst);
2110 addHRegUse(u, HRmRead, i->ARM64in.VCvtHD.src);
2111 return;
2112 case ARM64in_VUnaryD:
2113 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryD.dst);
2114 addHRegUse(u, HRmRead, i->ARM64in.VUnaryD.src);
2115 return;
2116 case ARM64in_VUnaryS:
2117 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryS.dst);
2118 addHRegUse(u, HRmRead, i->ARM64in.VUnaryS.src);
2119 return;
2120 case ARM64in_VBinD:
2121 addHRegUse(u, HRmWrite, i->ARM64in.VBinD.dst);
2122 addHRegUse(u, HRmRead, i->ARM64in.VBinD.argL);
2123 addHRegUse(u, HRmRead, i->ARM64in.VBinD.argR);
2124 return;
2125 case ARM64in_VBinS:
2126 addHRegUse(u, HRmWrite, i->ARM64in.VBinS.dst);
2127 addHRegUse(u, HRmRead, i->ARM64in.VBinS.argL);
2128 addHRegUse(u, HRmRead, i->ARM64in.VBinS.argR);
2129 return;
2130 case ARM64in_VCmpD:
2131 addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argL);
2132 addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argR);
2133 return;
2134 case ARM64in_VCmpS:
2135 addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argL);
2136 addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argR);
2137 return;
2138 case ARM64in_VFCSel:
2139 addHRegUse(u, HRmRead, i->ARM64in.VFCSel.argL);
2140 addHRegUse(u, HRmRead, i->ARM64in.VFCSel.argR);
2141 addHRegUse(u, HRmWrite, i->ARM64in.VFCSel.dst);
2142 return;
2143 case ARM64in_FPCR:
2144 if (i->ARM64in.FPCR.toFPCR)
2145 addHRegUse(u, HRmRead, i->ARM64in.FPCR.iReg);
2146 else
2147 addHRegUse(u, HRmWrite, i->ARM64in.FPCR.iReg);
2148 return;
2149 case ARM64in_FPSR:
2150 if (i->ARM64in.FPSR.toFPSR)
2151 addHRegUse(u, HRmRead, i->ARM64in.FPSR.iReg);
2152 else
2153 addHRegUse(u, HRmWrite, i->ARM64in.FPSR.iReg);
2154 return;
2155 case ARM64in_VBinV:
2156 addHRegUse(u, HRmWrite, i->ARM64in.VBinV.dst);
2157 addHRegUse(u, HRmRead, i->ARM64in.VBinV.argL);
2158 addHRegUse(u, HRmRead, i->ARM64in.VBinV.argR);
2159 return;
2160 case ARM64in_VModifyV:
2161 addHRegUse(u, HRmWrite, i->ARM64in.VModifyV.mod);
2162 addHRegUse(u, HRmRead, i->ARM64in.VModifyV.mod);
2163 addHRegUse(u, HRmRead, i->ARM64in.VModifyV.arg);
2164 return;
2165 case ARM64in_VUnaryV:
2166 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryV.dst);
2167 addHRegUse(u, HRmRead, i->ARM64in.VUnaryV.arg);
2168 return;
2169 case ARM64in_VNarrowV:
2170 addHRegUse(u, HRmWrite, i->ARM64in.VNarrowV.dst);
2171 addHRegUse(u, HRmRead, i->ARM64in.VNarrowV.src);
2172 return;
2173 case ARM64in_VShiftImmV:
2174 addHRegUse(u, HRmWrite, i->ARM64in.VShiftImmV.dst);
2175 addHRegUse(u, HRmRead, i->ARM64in.VShiftImmV.src);
2176 return;
2177 case ARM64in_VExtV:
2178 addHRegUse(u, HRmWrite, i->ARM64in.VExtV.dst);
2179 addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcLo);
2180 addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcHi);
2181 return;
2182 case ARM64in_VImmQ:
2183 addHRegUse(u, HRmWrite, i->ARM64in.VImmQ.rQ);
2184 return;
2185 case ARM64in_VDfromX:
2186 addHRegUse(u, HRmWrite, i->ARM64in.VDfromX.rD);
2187 addHRegUse(u, HRmRead, i->ARM64in.VDfromX.rX);
2188 return;
2189 case ARM64in_VQfromX:
2190 addHRegUse(u, HRmWrite, i->ARM64in.VQfromX.rQ);
2191 addHRegUse(u, HRmRead, i->ARM64in.VQfromX.rXlo);
2192 return;
2193 case ARM64in_VQfromXX:
2194 addHRegUse(u, HRmWrite, i->ARM64in.VQfromXX.rQ);
2195 addHRegUse(u, HRmRead, i->ARM64in.VQfromXX.rXhi);
2196 addHRegUse(u, HRmRead, i->ARM64in.VQfromXX.rXlo);
2197 return;
2198 case ARM64in_VXfromQ:
2199 addHRegUse(u, HRmWrite, i->ARM64in.VXfromQ.rX);
2200 addHRegUse(u, HRmRead, i->ARM64in.VXfromQ.rQ);
2201 return;
2202 case ARM64in_VXfromDorS:
2203 addHRegUse(u, HRmWrite, i->ARM64in.VXfromDorS.rX);
2204 addHRegUse(u, HRmRead, i->ARM64in.VXfromDorS.rDorS);
2205 return;
2206 case ARM64in_VMov:
2207 addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst);
2208 addHRegUse(u, HRmRead, i->ARM64in.VMov.src);
2209 return;
2210 case ARM64in_EvCheck:
2211 /* We expect both amodes only to mention x21, so this is in
2212 fact pointless, since x21 isn't allocatable, but
2213 anyway.. */
2214 addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amCounter);
2215 addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amFailAddr);
2216 addHRegUse(u, HRmWrite, hregARM64_X9()); /* also unavail to RA */
2217 return;
2218 case ARM64in_ProfInc:
2219 /* Again, pointless to actually state these since neither
2220 is available to RA. */
2221 addHRegUse(u, HRmWrite, hregARM64_X9()); /* unavail to RA */
2222 addHRegUse(u, HRmWrite, hregARM64_X8()); /* unavail to RA */
2223 return;
2224 default:
2225 ppARM64Instr(i);
2226 vpanic("getRegUsage_ARM64Instr");
2227 }
2228 }
2229
2230
mapRegs_ARM64Instr(HRegRemap * m,ARM64Instr * i,Bool mode64)2231 void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
2232 {
2233 vassert(mode64 == True);
2234 switch (i->tag) {
2235 case ARM64in_Arith:
2236 i->ARM64in.Arith.dst = lookupHRegRemap(m, i->ARM64in.Arith.dst);
2237 i->ARM64in.Arith.argL = lookupHRegRemap(m, i->ARM64in.Arith.argL);
2238 mapRegs_ARM64RIA(m, i->ARM64in.Arith.argR);
2239 return;
2240 case ARM64in_Cmp:
2241 i->ARM64in.Cmp.argL = lookupHRegRemap(m, i->ARM64in.Cmp.argL);
2242 mapRegs_ARM64RIA(m, i->ARM64in.Cmp.argR);
2243 return;
2244 case ARM64in_Logic:
2245 i->ARM64in.Logic.dst = lookupHRegRemap(m, i->ARM64in.Logic.dst);
2246 i->ARM64in.Logic.argL = lookupHRegRemap(m, i->ARM64in.Logic.argL);
2247 mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
2248 return;
2249 case ARM64in_Test:
2250 i->ARM64in.Test.argL = lookupHRegRemap(m, i->ARM64in.Test.argL);
2251 mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
2252 return;
2253 case ARM64in_Shift:
2254 i->ARM64in.Shift.dst = lookupHRegRemap(m, i->ARM64in.Shift.dst);
2255 i->ARM64in.Shift.argL = lookupHRegRemap(m, i->ARM64in.Shift.argL);
2256 mapRegs_ARM64RI6(m, i->ARM64in.Shift.argR);
2257 return;
2258 case ARM64in_Unary:
2259 i->ARM64in.Unary.dst = lookupHRegRemap(m, i->ARM64in.Unary.dst);
2260 i->ARM64in.Unary.src = lookupHRegRemap(m, i->ARM64in.Unary.src);
2261 return;
2262 case ARM64in_MovI:
2263 i->ARM64in.MovI.dst = lookupHRegRemap(m, i->ARM64in.MovI.dst);
2264 i->ARM64in.MovI.src = lookupHRegRemap(m, i->ARM64in.MovI.src);
2265 return;
2266 case ARM64in_Imm64:
2267 i->ARM64in.Imm64.dst = lookupHRegRemap(m, i->ARM64in.Imm64.dst);
2268 return;
2269 case ARM64in_LdSt64:
2270 i->ARM64in.LdSt64.rD = lookupHRegRemap(m, i->ARM64in.LdSt64.rD);
2271 mapRegs_ARM64AMode(m, i->ARM64in.LdSt64.amode);
2272 return;
2273 case ARM64in_LdSt32:
2274 i->ARM64in.LdSt32.rD = lookupHRegRemap(m, i->ARM64in.LdSt32.rD);
2275 mapRegs_ARM64AMode(m, i->ARM64in.LdSt32.amode);
2276 return;
2277 case ARM64in_LdSt16:
2278 i->ARM64in.LdSt16.rD = lookupHRegRemap(m, i->ARM64in.LdSt16.rD);
2279 mapRegs_ARM64AMode(m, i->ARM64in.LdSt16.amode);
2280 return;
2281 case ARM64in_LdSt8:
2282 i->ARM64in.LdSt8.rD = lookupHRegRemap(m, i->ARM64in.LdSt8.rD);
2283 mapRegs_ARM64AMode(m, i->ARM64in.LdSt8.amode);
2284 return;
2285 case ARM64in_XDirect:
2286 mapRegs_ARM64AMode(m, i->ARM64in.XDirect.amPC);
2287 return;
2288 case ARM64in_XIndir:
2289 i->ARM64in.XIndir.dstGA
2290 = lookupHRegRemap(m, i->ARM64in.XIndir.dstGA);
2291 mapRegs_ARM64AMode(m, i->ARM64in.XIndir.amPC);
2292 return;
2293 case ARM64in_XAssisted:
2294 i->ARM64in.XAssisted.dstGA
2295 = lookupHRegRemap(m, i->ARM64in.XAssisted.dstGA);
2296 mapRegs_ARM64AMode(m, i->ARM64in.XAssisted.amPC);
2297 return;
2298 case ARM64in_CSel:
2299 i->ARM64in.CSel.dst = lookupHRegRemap(m, i->ARM64in.CSel.dst);
2300 i->ARM64in.CSel.argL = lookupHRegRemap(m, i->ARM64in.CSel.argL);
2301 i->ARM64in.CSel.argR = lookupHRegRemap(m, i->ARM64in.CSel.argR);
2302 return;
2303 case ARM64in_Call:
2304 return;
2305 case ARM64in_AddToSP:
2306 return;
2307 case ARM64in_FromSP:
2308 i->ARM64in.FromSP.dst = lookupHRegRemap(m, i->ARM64in.FromSP.dst);
2309 return;
2310 case ARM64in_Mul:
2311 i->ARM64in.Mul.dst = lookupHRegRemap(m, i->ARM64in.Mul.dst);
2312 i->ARM64in.Mul.argL = lookupHRegRemap(m, i->ARM64in.Mul.argL);
2313 i->ARM64in.Mul.argR = lookupHRegRemap(m, i->ARM64in.Mul.argR);
2314 break;
2315 case ARM64in_LdrEX:
2316 return;
2317 case ARM64in_StrEX:
2318 return;
2319 case ARM64in_MFence:
2320 return;
2321 case ARM64in_VLdStH:
2322 i->ARM64in.VLdStH.hD = lookupHRegRemap(m, i->ARM64in.VLdStH.hD);
2323 i->ARM64in.VLdStH.rN = lookupHRegRemap(m, i->ARM64in.VLdStH.rN);
2324 return;
2325 case ARM64in_VLdStS:
2326 i->ARM64in.VLdStS.sD = lookupHRegRemap(m, i->ARM64in.VLdStS.sD);
2327 i->ARM64in.VLdStS.rN = lookupHRegRemap(m, i->ARM64in.VLdStS.rN);
2328 return;
2329 case ARM64in_VLdStD:
2330 i->ARM64in.VLdStD.dD = lookupHRegRemap(m, i->ARM64in.VLdStD.dD);
2331 i->ARM64in.VLdStD.rN = lookupHRegRemap(m, i->ARM64in.VLdStD.rN);
2332 return;
2333 case ARM64in_VLdStQ:
2334 i->ARM64in.VLdStQ.rQ = lookupHRegRemap(m, i->ARM64in.VLdStQ.rQ);
2335 i->ARM64in.VLdStQ.rN = lookupHRegRemap(m, i->ARM64in.VLdStQ.rN);
2336 return;
2337 case ARM64in_VCvtI2F:
2338 i->ARM64in.VCvtI2F.rS = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rS);
2339 i->ARM64in.VCvtI2F.rD = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rD);
2340 return;
2341 case ARM64in_VCvtF2I:
2342 i->ARM64in.VCvtF2I.rS = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rS);
2343 i->ARM64in.VCvtF2I.rD = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rD);
2344 return;
2345 case ARM64in_VCvtSD:
2346 i->ARM64in.VCvtSD.dst = lookupHRegRemap(m, i->ARM64in.VCvtSD.dst);
2347 i->ARM64in.VCvtSD.src = lookupHRegRemap(m, i->ARM64in.VCvtSD.src);
2348 return;
2349 case ARM64in_VCvtHS:
2350 i->ARM64in.VCvtHS.dst = lookupHRegRemap(m, i->ARM64in.VCvtHS.dst);
2351 i->ARM64in.VCvtHS.src = lookupHRegRemap(m, i->ARM64in.VCvtHS.src);
2352 return;
2353 case ARM64in_VCvtHD:
2354 i->ARM64in.VCvtHD.dst = lookupHRegRemap(m, i->ARM64in.VCvtHD.dst);
2355 i->ARM64in.VCvtHD.src = lookupHRegRemap(m, i->ARM64in.VCvtHD.src);
2356 return;
2357 case ARM64in_VUnaryD:
2358 i->ARM64in.VUnaryD.dst = lookupHRegRemap(m, i->ARM64in.VUnaryD.dst);
2359 i->ARM64in.VUnaryD.src = lookupHRegRemap(m, i->ARM64in.VUnaryD.src);
2360 return;
2361 case ARM64in_VUnaryS:
2362 i->ARM64in.VUnaryS.dst = lookupHRegRemap(m, i->ARM64in.VUnaryS.dst);
2363 i->ARM64in.VUnaryS.src = lookupHRegRemap(m, i->ARM64in.VUnaryS.src);
2364 return;
2365 case ARM64in_VBinD:
2366 i->ARM64in.VBinD.dst = lookupHRegRemap(m, i->ARM64in.VBinD.dst);
2367 i->ARM64in.VBinD.argL = lookupHRegRemap(m, i->ARM64in.VBinD.argL);
2368 i->ARM64in.VBinD.argR = lookupHRegRemap(m, i->ARM64in.VBinD.argR);
2369 return;
2370 case ARM64in_VBinS:
2371 i->ARM64in.VBinS.dst = lookupHRegRemap(m, i->ARM64in.VBinS.dst);
2372 i->ARM64in.VBinS.argL = lookupHRegRemap(m, i->ARM64in.VBinS.argL);
2373 i->ARM64in.VBinS.argR = lookupHRegRemap(m, i->ARM64in.VBinS.argR);
2374 return;
2375 case ARM64in_VCmpD:
2376 i->ARM64in.VCmpD.argL = lookupHRegRemap(m, i->ARM64in.VCmpD.argL);
2377 i->ARM64in.VCmpD.argR = lookupHRegRemap(m, i->ARM64in.VCmpD.argR);
2378 return;
2379 case ARM64in_VCmpS:
2380 i->ARM64in.VCmpS.argL = lookupHRegRemap(m, i->ARM64in.VCmpS.argL);
2381 i->ARM64in.VCmpS.argR = lookupHRegRemap(m, i->ARM64in.VCmpS.argR);
2382 return;
2383 case ARM64in_VFCSel:
2384 i->ARM64in.VFCSel.argL = lookupHRegRemap(m, i->ARM64in.VFCSel.argL);
2385 i->ARM64in.VFCSel.argR = lookupHRegRemap(m, i->ARM64in.VFCSel.argR);
2386 i->ARM64in.VFCSel.dst = lookupHRegRemap(m, i->ARM64in.VFCSel.dst);
2387 return;
2388 case ARM64in_FPCR:
2389 i->ARM64in.FPCR.iReg = lookupHRegRemap(m, i->ARM64in.FPCR.iReg);
2390 return;
2391 case ARM64in_FPSR:
2392 i->ARM64in.FPSR.iReg = lookupHRegRemap(m, i->ARM64in.FPSR.iReg);
2393 return;
2394 case ARM64in_VBinV:
2395 i->ARM64in.VBinV.dst = lookupHRegRemap(m, i->ARM64in.VBinV.dst);
2396 i->ARM64in.VBinV.argL = lookupHRegRemap(m, i->ARM64in.VBinV.argL);
2397 i->ARM64in.VBinV.argR = lookupHRegRemap(m, i->ARM64in.VBinV.argR);
2398 return;
2399 case ARM64in_VModifyV:
2400 i->ARM64in.VModifyV.mod = lookupHRegRemap(m, i->ARM64in.VModifyV.mod);
2401 i->ARM64in.VModifyV.arg = lookupHRegRemap(m, i->ARM64in.VModifyV.arg);
2402 return;
2403 case ARM64in_VUnaryV:
2404 i->ARM64in.VUnaryV.dst = lookupHRegRemap(m, i->ARM64in.VUnaryV.dst);
2405 i->ARM64in.VUnaryV.arg = lookupHRegRemap(m, i->ARM64in.VUnaryV.arg);
2406 return;
2407 case ARM64in_VNarrowV:
2408 i->ARM64in.VNarrowV.dst = lookupHRegRemap(m, i->ARM64in.VNarrowV.dst);
2409 i->ARM64in.VNarrowV.src = lookupHRegRemap(m, i->ARM64in.VNarrowV.src);
2410 return;
2411 case ARM64in_VShiftImmV:
2412 i->ARM64in.VShiftImmV.dst
2413 = lookupHRegRemap(m, i->ARM64in.VShiftImmV.dst);
2414 i->ARM64in.VShiftImmV.src
2415 = lookupHRegRemap(m, i->ARM64in.VShiftImmV.src);
2416 return;
2417 case ARM64in_VExtV:
2418 i->ARM64in.VExtV.dst = lookupHRegRemap(m, i->ARM64in.VExtV.dst);
2419 i->ARM64in.VExtV.srcLo = lookupHRegRemap(m, i->ARM64in.VExtV.srcLo);
2420 i->ARM64in.VExtV.srcHi = lookupHRegRemap(m, i->ARM64in.VExtV.srcHi);
2421 return;
2422 case ARM64in_VImmQ:
2423 i->ARM64in.VImmQ.rQ = lookupHRegRemap(m, i->ARM64in.VImmQ.rQ);
2424 return;
2425 case ARM64in_VDfromX:
2426 i->ARM64in.VDfromX.rD
2427 = lookupHRegRemap(m, i->ARM64in.VDfromX.rD);
2428 i->ARM64in.VDfromX.rX
2429 = lookupHRegRemap(m, i->ARM64in.VDfromX.rX);
2430 return;
2431 case ARM64in_VQfromX:
2432 i->ARM64in.VQfromX.rQ
2433 = lookupHRegRemap(m, i->ARM64in.VQfromX.rQ);
2434 i->ARM64in.VQfromX.rXlo
2435 = lookupHRegRemap(m, i->ARM64in.VQfromX.rXlo);
2436 return;
2437 case ARM64in_VQfromXX:
2438 i->ARM64in.VQfromXX.rQ
2439 = lookupHRegRemap(m, i->ARM64in.VQfromXX.rQ);
2440 i->ARM64in.VQfromXX.rXhi
2441 = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXhi);
2442 i->ARM64in.VQfromXX.rXlo
2443 = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXlo);
2444 return;
2445 case ARM64in_VXfromQ:
2446 i->ARM64in.VXfromQ.rX
2447 = lookupHRegRemap(m, i->ARM64in.VXfromQ.rX);
2448 i->ARM64in.VXfromQ.rQ
2449 = lookupHRegRemap(m, i->ARM64in.VXfromQ.rQ);
2450 return;
2451 case ARM64in_VXfromDorS:
2452 i->ARM64in.VXfromDorS.rX
2453 = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rX);
2454 i->ARM64in.VXfromDorS.rDorS
2455 = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rDorS);
2456 return;
2457 case ARM64in_VMov:
2458 i->ARM64in.VMov.dst = lookupHRegRemap(m, i->ARM64in.VMov.dst);
2459 i->ARM64in.VMov.src = lookupHRegRemap(m, i->ARM64in.VMov.src);
2460 return;
2461 case ARM64in_EvCheck:
2462 /* We expect both amodes only to mention x21, so this is in
2463 fact pointless, since x21 isn't allocatable, but
2464 anyway.. */
2465 mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amCounter);
2466 mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amFailAddr);
2467 return;
2468 case ARM64in_ProfInc:
2469 /* hardwires x8 and x9 -- nothing to modify. */
2470 return;
2471 default:
2472 ppARM64Instr(i);
2473 vpanic("mapRegs_ARM64Instr");
2474 }
2475 }
2476
2477 /* Figure out if i represents a reg-reg move, and if so assign the
2478 source and destination to *src and *dst. If in doubt say No. Used
2479 by the register allocator to do move coalescing.
2480 */
isMove_ARM64Instr(const ARM64Instr * i,HReg * src,HReg * dst)2481 Bool isMove_ARM64Instr ( const ARM64Instr* i, HReg* src, HReg* dst )
2482 {
2483 switch (i->tag) {
2484 case ARM64in_MovI:
2485 *src = i->ARM64in.MovI.src;
2486 *dst = i->ARM64in.MovI.dst;
2487 return True;
2488 case ARM64in_VMov:
2489 *src = i->ARM64in.VMov.src;
2490 *dst = i->ARM64in.VMov.dst;
2491 return True;
2492 default:
2493 break;
2494 }
2495
2496 return False;
2497 }
2498
2499
2500 /* Generate arm spill/reload instructions under the direction of the
2501 register allocator. Note it's critical these don't write the
2502 condition codes. */
2503
genSpill_ARM64(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)2504 void genSpill_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2505 HReg rreg, Int offsetB, Bool mode64 )
2506 {
2507 HRegClass rclass;
2508 vassert(offsetB >= 0);
2509 vassert(!hregIsVirtual(rreg));
2510 vassert(mode64 == True);
2511 *i1 = *i2 = NULL;
2512 rclass = hregClass(rreg);
2513 switch (rclass) {
2514 case HRcInt64:
2515 vassert(0 == (offsetB & 7));
2516 offsetB >>= 3;
2517 vassert(offsetB < 4096);
2518 *i1 = ARM64Instr_LdSt64(
2519 False/*!isLoad*/,
2520 rreg,
2521 ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
2522 );
2523 return;
2524 case HRcFlt64:
2525 vassert(0 == (offsetB & 7));
2526 vassert(offsetB >= 0 && offsetB < 32768);
2527 *i1 = ARM64Instr_VLdStD(False/*!isLoad*/,
2528 rreg, hregARM64_X21(), offsetB);
2529 return;
2530 case HRcVec128: {
2531 HReg x21 = hregARM64_X21(); // baseblock
2532 HReg x9 = hregARM64_X9(); // spill temporary
2533 vassert(0 == (offsetB & 15)); // check sane alignment
2534 vassert(offsetB < 4096);
2535 *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
2536 *i2 = ARM64Instr_VLdStQ(False/*!isLoad*/, rreg, x9);
2537 return;
2538 }
2539 default:
2540 ppHRegClass(rclass);
2541 vpanic("genSpill_ARM: unimplemented regclass");
2542 }
2543 }
2544
genReload_ARM64(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)2545 void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2546 HReg rreg, Int offsetB, Bool mode64 )
2547 {
2548 HRegClass rclass;
2549 vassert(offsetB >= 0);
2550 vassert(!hregIsVirtual(rreg));
2551 vassert(mode64 == True);
2552 *i1 = *i2 = NULL;
2553 rclass = hregClass(rreg);
2554 switch (rclass) {
2555 case HRcInt64:
2556 vassert(0 == (offsetB & 7));
2557 offsetB >>= 3;
2558 vassert(offsetB < 4096);
2559 *i1 = ARM64Instr_LdSt64(
2560 True/*isLoad*/,
2561 rreg,
2562 ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
2563 );
2564 return;
2565 case HRcFlt64:
2566 vassert(0 == (offsetB & 7));
2567 vassert(offsetB >= 0 && offsetB < 32768);
2568 *i1 = ARM64Instr_VLdStD(True/*isLoad*/,
2569 rreg, hregARM64_X21(), offsetB);
2570 return;
2571 case HRcVec128: {
2572 HReg x21 = hregARM64_X21(); // baseblock
2573 HReg x9 = hregARM64_X9(); // spill temporary
2574 vassert(0 == (offsetB & 15)); // check sane alignment
2575 vassert(offsetB < 4096);
2576 *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
2577 *i2 = ARM64Instr_VLdStQ(True/*isLoad*/, rreg, x9);
2578 return;
2579 }
2580 default:
2581 ppHRegClass(rclass);
2582 vpanic("genReload_ARM: unimplemented regclass");
2583 }
2584 }
2585
2586
2587 /* Emit an instruction into buf and return the number of bytes used.
2588 Note that buf is not the insn's final place, and therefore it is
2589 imperative to emit position-independent code. */
2590
iregEnc(HReg r)2591 static inline UInt iregEnc ( HReg r )
2592 {
2593 UInt n;
2594 vassert(hregClass(r) == HRcInt64);
2595 vassert(!hregIsVirtual(r));
2596 n = hregEncoding(r);
2597 vassert(n <= 30);
2598 return n;
2599 }
2600
dregEnc(HReg r)2601 static inline UInt dregEnc ( HReg r )
2602 {
2603 UInt n;
2604 vassert(hregClass(r) == HRcFlt64);
2605 vassert(!hregIsVirtual(r));
2606 n = hregEncoding(r);
2607 vassert(n <= 31);
2608 return n;
2609 }
2610
qregEnc(HReg r)2611 static inline UInt qregEnc ( HReg r )
2612 {
2613 UInt n;
2614 vassert(hregClass(r) == HRcVec128);
2615 vassert(!hregIsVirtual(r));
2616 n = hregEncoding(r);
2617 vassert(n <= 31);
2618 return n;
2619 }
2620
2621 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
2622 (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
2623
2624 #define X00 BITS4(0,0, 0,0)
2625 #define X01 BITS4(0,0, 0,1)
2626 #define X10 BITS4(0,0, 1,0)
2627 #define X11 BITS4(0,0, 1,1)
2628
2629 #define X000 BITS4(0, 0,0,0)
2630 #define X001 BITS4(0, 0,0,1)
2631 #define X010 BITS4(0, 0,1,0)
2632 #define X011 BITS4(0, 0,1,1)
2633 #define X100 BITS4(0, 1,0,0)
2634 #define X101 BITS4(0, 1,0,1)
2635 #define X110 BITS4(0, 1,1,0)
2636 #define X111 BITS4(0, 1,1,1)
2637
2638 #define X0000 BITS4(0,0,0,0)
2639 #define X0001 BITS4(0,0,0,1)
2640 #define X0010 BITS4(0,0,1,0)
2641 #define X0011 BITS4(0,0,1,1)
2642
2643 #define BITS8(zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1,zzb0) \
2644 ((BITS4(zzb7,zzb6,zzb5,zzb4) << 4) | BITS4(zzb3,zzb2,zzb1,zzb0))
2645
2646 #define X00000 BITS8(0,0,0, 0,0,0,0,0)
2647 #define X00001 BITS8(0,0,0, 0,0,0,0,1)
2648 #define X00110 BITS8(0,0,0, 0,0,1,1,0)
2649 #define X00111 BITS8(0,0,0, 0,0,1,1,1)
2650 #define X01000 BITS8(0,0,0, 0,1,0,0,0)
2651 #define X10000 BITS8(0,0,0, 1,0,0,0,0)
2652 #define X11000 BITS8(0,0,0, 1,1,0,0,0)
2653 #define X11110 BITS8(0,0,0, 1,1,1,1,0)
2654 #define X11111 BITS8(0,0,0, 1,1,1,1,1)
2655
2656 #define X000000 BITS8(0,0, 0,0,0,0,0,0)
2657 #define X000001 BITS8(0,0, 0,0,0,0,0,1)
2658 #define X000010 BITS8(0,0, 0,0,0,0,1,0)
2659 #define X000011 BITS8(0,0, 0,0,0,0,1,1)
2660 #define X000100 BITS8(0,0, 0,0,0,1,0,0)
2661 #define X000110 BITS8(0,0, 0,0,0,1,1,0)
2662 #define X000111 BITS8(0,0, 0,0,0,1,1,1)
2663 #define X001000 BITS8(0,0, 0,0,1,0,0,0)
2664 #define X001001 BITS8(0,0, 0,0,1,0,0,1)
2665 #define X001010 BITS8(0,0, 0,0,1,0,1,0)
2666 #define X001011 BITS8(0,0, 0,0,1,0,1,1)
2667 #define X001101 BITS8(0,0, 0,0,1,1,0,1)
2668 #define X001110 BITS8(0,0, 0,0,1,1,1,0)
2669 #define X001111 BITS8(0,0, 0,0,1,1,1,1)
2670 #define X010000 BITS8(0,0, 0,1,0,0,0,0)
2671 #define X010001 BITS8(0,0, 0,1,0,0,0,1)
2672 #define X010010 BITS8(0,0, 0,1,0,0,1,0)
2673 #define X010011 BITS8(0,0, 0,1,0,0,1,1)
2674 #define X010101 BITS8(0,0, 0,1,0,1,0,1)
2675 #define X010110 BITS8(0,0, 0,1,0,1,1,0)
2676 #define X010111 BITS8(0,0, 0,1,0,1,1,1)
2677 #define X011001 BITS8(0,0, 0,1,1,0,0,1)
2678 #define X011010 BITS8(0,0, 0,1,1,0,1,0)
2679 #define X011011 BITS8(0,0, 0,1,1,0,1,1)
2680 #define X011101 BITS8(0,0, 0,1,1,1,0,1)
2681 #define X011110 BITS8(0,0, 0,1,1,1,1,0)
2682 #define X011111 BITS8(0,0, 0,1,1,1,1,1)
2683 #define X100001 BITS8(0,0, 1,0,0,0,0,1)
2684 #define X100011 BITS8(0,0, 1,0,0,0,1,1)
2685 #define X100100 BITS8(0,0, 1,0,0,1,0,0)
2686 #define X100101 BITS8(0,0, 1,0,0,1,0,1)
2687 #define X100110 BITS8(0,0, 1,0,0,1,1,0)
2688 #define X100111 BITS8(0,0, 1,0,0,1,1,1)
2689 #define X101101 BITS8(0,0, 1,0,1,1,0,1)
2690 #define X101110 BITS8(0,0, 1,0,1,1,1,0)
2691 #define X110000 BITS8(0,0, 1,1,0,0,0,0)
2692 #define X110001 BITS8(0,0, 1,1,0,0,0,1)
2693 #define X110010 BITS8(0,0, 1,1,0,0,1,0)
2694 #define X110100 BITS8(0,0, 1,1,0,1,0,0)
2695 #define X110101 BITS8(0,0, 1,1,0,1,0,1)
2696 #define X110110 BITS8(0,0, 1,1,0,1,1,0)
2697 #define X110111 BITS8(0,0, 1,1,0,1,1,1)
2698 #define X111000 BITS8(0,0, 1,1,1,0,0,0)
2699 #define X111001 BITS8(0,0, 1,1,1,0,0,1)
2700 #define X111101 BITS8(0,0, 1,1,1,1,0,1)
2701 #define X111110 BITS8(0,0, 1,1,1,1,1,0)
2702 #define X111111 BITS8(0,0, 1,1,1,1,1,1)
2703
2704 #define X0001000 BITS8(0, 0,0,0,1,0,0,0)
2705 #define X0010000 BITS8(0, 0,0,1,0,0,0,0)
2706 #define X0100000 BITS8(0, 0,1,0,0,0,0,0)
2707 #define X1000000 BITS8(0, 1,0,0,0,0,0,0)
2708
2709 #define X00100000 BITS8(0,0,1,0,0,0,0,0)
2710 #define X00100001 BITS8(0,0,1,0,0,0,0,1)
2711 #define X00100010 BITS8(0,0,1,0,0,0,1,0)
2712 #define X00100011 BITS8(0,0,1,0,0,0,1,1)
2713 #define X01010000 BITS8(0,1,0,1,0,0,0,0)
2714 #define X01010001 BITS8(0,1,0,1,0,0,0,1)
2715 #define X01010100 BITS8(0,1,0,1,0,1,0,0)
2716 #define X01011000 BITS8(0,1,0,1,1,0,0,0)
2717 #define X01100000 BITS8(0,1,1,0,0,0,0,0)
2718 #define X01100001 BITS8(0,1,1,0,0,0,0,1)
2719 #define X01100010 BITS8(0,1,1,0,0,0,1,0)
2720 #define X01100011 BITS8(0,1,1,0,0,0,1,1)
2721 #define X01110000 BITS8(0,1,1,1,0,0,0,0)
2722 #define X01110001 BITS8(0,1,1,1,0,0,0,1)
2723 #define X01110010 BITS8(0,1,1,1,0,0,1,0)
2724 #define X01110011 BITS8(0,1,1,1,0,0,1,1)
2725 #define X01110100 BITS8(0,1,1,1,0,1,0,0)
2726 #define X01110101 BITS8(0,1,1,1,0,1,0,1)
2727 #define X01110110 BITS8(0,1,1,1,0,1,1,0)
2728 #define X01110111 BITS8(0,1,1,1,0,1,1,1)
2729 #define X11000001 BITS8(1,1,0,0,0,0,0,1)
2730 #define X11000011 BITS8(1,1,0,0,0,0,1,1)
2731 #define X11010100 BITS8(1,1,0,1,0,1,0,0)
2732 #define X11010110 BITS8(1,1,0,1,0,1,1,0)
2733 #define X11011000 BITS8(1,1,0,1,1,0,0,0)
2734 #define X11011010 BITS8(1,1,0,1,1,0,1,0)
2735 #define X11011110 BITS8(1,1,0,1,1,1,1,0)
2736 #define X11100010 BITS8(1,1,1,0,0,0,1,0)
2737 #define X11110001 BITS8(1,1,1,1,0,0,0,1)
2738 #define X11110011 BITS8(1,1,1,1,0,0,1,1)
2739 #define X11110101 BITS8(1,1,1,1,0,1,0,1)
2740 #define X11110111 BITS8(1,1,1,1,0,1,1,1)
2741
2742
2743 /* --- 4 fields --- */
2744
X_8_19_1_4(UInt f1,UInt f2,UInt f3,UInt f4)2745 static inline UInt X_8_19_1_4 ( UInt f1, UInt f2, UInt f3, UInt f4 ) {
2746 vassert(8+19+1+4 == 32);
2747 vassert(f1 < (1<<8));
2748 vassert(f2 < (1<<19));
2749 vassert(f3 < (1<<1));
2750 vassert(f4 < (1<<4));
2751 UInt w = 0;
2752 w = (w << 8) | f1;
2753 w = (w << 19) | f2;
2754 w = (w << 1) | f3;
2755 w = (w << 4) | f4;
2756 return w;
2757 }
2758
2759 /* --- 5 fields --- */
2760
X_3_6_2_16_5(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5)2761 static inline UInt X_3_6_2_16_5 ( UInt f1, UInt f2,
2762 UInt f3, UInt f4, UInt f5 ) {
2763 vassert(3+6+2+16+5 == 32);
2764 vassert(f1 < (1<<3));
2765 vassert(f2 < (1<<6));
2766 vassert(f3 < (1<<2));
2767 vassert(f4 < (1<<16));
2768 vassert(f5 < (1<<5));
2769 UInt w = 0;
2770 w = (w << 3) | f1;
2771 w = (w << 6) | f2;
2772 w = (w << 2) | f3;
2773 w = (w << 16) | f4;
2774 w = (w << 5) | f5;
2775 return w;
2776 }
2777
2778 /* --- 6 fields --- */
2779
X_2_6_2_12_5_5(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5,UInt f6)2780 static inline UInt X_2_6_2_12_5_5 ( UInt f1, UInt f2, UInt f3,
2781 UInt f4, UInt f5, UInt f6 ) {
2782 vassert(2+6+2+12+5+5 == 32);
2783 vassert(f1 < (1<<2));
2784 vassert(f2 < (1<<6));
2785 vassert(f3 < (1<<2));
2786 vassert(f4 < (1<<12));
2787 vassert(f5 < (1<<5));
2788 vassert(f6 < (1<<5));
2789 UInt w = 0;
2790 w = (w << 2) | f1;
2791 w = (w << 6) | f2;
2792 w = (w << 2) | f3;
2793 w = (w << 12) | f4;
2794 w = (w << 5) | f5;
2795 w = (w << 5) | f6;
2796 return w;
2797 }
2798
X_3_8_5_6_5_5(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5,UInt f6)2799 static inline UInt X_3_8_5_6_5_5 ( UInt f1, UInt f2, UInt f3,
2800 UInt f4, UInt f5, UInt f6 ) {
2801 vassert(3+8+5+6+5+5 == 32);
2802 vassert(f1 < (1<<3));
2803 vassert(f2 < (1<<8));
2804 vassert(f3 < (1<<5));
2805 vassert(f4 < (1<<6));
2806 vassert(f5 < (1<<5));
2807 vassert(f6 < (1<<5));
2808 UInt w = 0;
2809 w = (w << 3) | f1;
2810 w = (w << 8) | f2;
2811 w = (w << 5) | f3;
2812 w = (w << 6) | f4;
2813 w = (w << 5) | f5;
2814 w = (w << 5) | f6;
2815 return w;
2816 }
2817
X_3_5_8_6_5_5(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5,UInt f6)2818 static inline UInt X_3_5_8_6_5_5 ( UInt f1, UInt f2, UInt f3,
2819 UInt f4, UInt f5, UInt f6 ) {
2820 vassert(3+8+5+6+5+5 == 32);
2821 vassert(f1 < (1<<3));
2822 vassert(f2 < (1<<5));
2823 vassert(f3 < (1<<8));
2824 vassert(f4 < (1<<6));
2825 vassert(f5 < (1<<5));
2826 vassert(f6 < (1<<5));
2827 UInt w = 0;
2828 w = (w << 3) | f1;
2829 w = (w << 5) | f2;
2830 w = (w << 8) | f3;
2831 w = (w << 6) | f4;
2832 w = (w << 5) | f5;
2833 w = (w << 5) | f6;
2834 return w;
2835 }
2836
X_3_6_7_6_5_5(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5,UInt f6)2837 static inline UInt X_3_6_7_6_5_5 ( UInt f1, UInt f2, UInt f3,
2838 UInt f4, UInt f5, UInt f6 ) {
2839 vassert(3+6+7+6+5+5 == 32);
2840 vassert(f1 < (1<<3));
2841 vassert(f2 < (1<<6));
2842 vassert(f3 < (1<<7));
2843 vassert(f4 < (1<<6));
2844 vassert(f5 < (1<<5));
2845 vassert(f6 < (1<<5));
2846 UInt w = 0;
2847 w = (w << 3) | f1;
2848 w = (w << 6) | f2;
2849 w = (w << 7) | f3;
2850 w = (w << 6) | f4;
2851 w = (w << 5) | f5;
2852 w = (w << 5) | f6;
2853 return w;
2854 }
2855
2856 /* --- 7 fields --- */
2857
X_2_6_3_9_2_5_5(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5,UInt f6,UInt f7)2858 static inline UInt X_2_6_3_9_2_5_5 ( UInt f1, UInt f2, UInt f3,
2859 UInt f4, UInt f5, UInt f6, UInt f7 ) {
2860 vassert(2+6+3+9+2+5+5 == 32);
2861 vassert(f1 < (1<<2));
2862 vassert(f2 < (1<<6));
2863 vassert(f3 < (1<<3));
2864 vassert(f4 < (1<<9));
2865 vassert(f5 < (1<<2));
2866 vassert(f6 < (1<<5));
2867 vassert(f7 < (1<<5));
2868 UInt w = 0;
2869 w = (w << 2) | f1;
2870 w = (w << 6) | f2;
2871 w = (w << 3) | f3;
2872 w = (w << 9) | f4;
2873 w = (w << 2) | f5;
2874 w = (w << 5) | f6;
2875 w = (w << 5) | f7;
2876 return w;
2877 }
2878
X_3_6_1_6_6_5_5(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5,UInt f6,UInt f7)2879 static inline UInt X_3_6_1_6_6_5_5 ( UInt f1, UInt f2, UInt f3,
2880 UInt f4, UInt f5, UInt f6, UInt f7 ) {
2881 vassert(3+6+1+6+6+5+5 == 32);
2882 vassert(f1 < (1<<3));
2883 vassert(f2 < (1<<6));
2884 vassert(f3 < (1<<1));
2885 vassert(f4 < (1<<6));
2886 vassert(f5 < (1<<6));
2887 vassert(f6 < (1<<5));
2888 vassert(f7 < (1<<5));
2889 UInt w = 0;
2890 w = (w << 3) | f1;
2891 w = (w << 6) | f2;
2892 w = (w << 1) | f3;
2893 w = (w << 6) | f4;
2894 w = (w << 6) | f5;
2895 w = (w << 5) | f6;
2896 w = (w << 5) | f7;
2897 return w;
2898 }
2899
2900
2901 //ZZ #define X0000 BITS4(0,0,0,0)
2902 //ZZ #define X0001 BITS4(0,0,0,1)
2903 //ZZ #define X0010 BITS4(0,0,1,0)
2904 //ZZ #define X0011 BITS4(0,0,1,1)
2905 //ZZ #define X0100 BITS4(0,1,0,0)
2906 //ZZ #define X0101 BITS4(0,1,0,1)
2907 //ZZ #define X0110 BITS4(0,1,1,0)
2908 //ZZ #define X0111 BITS4(0,1,1,1)
2909 //ZZ #define X1000 BITS4(1,0,0,0)
2910 //ZZ #define X1001 BITS4(1,0,0,1)
2911 //ZZ #define X1010 BITS4(1,0,1,0)
2912 //ZZ #define X1011 BITS4(1,0,1,1)
2913 //ZZ #define X1100 BITS4(1,1,0,0)
2914 //ZZ #define X1101 BITS4(1,1,0,1)
2915 //ZZ #define X1110 BITS4(1,1,1,0)
2916 //ZZ #define X1111 BITS4(1,1,1,1)
2917 /*
2918 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
2919 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2920 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2921 (((zzx3) & 0xF) << 12))
2922
2923 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2) \
2924 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2925 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2926 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8))
2927
2928 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0) \
2929 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2930 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2931 (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) << 0))
2932
2933 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
2934 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2935 (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
2936 (((zzx0) & 0xF) << 0))
2937
2938 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0) \
2939 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2940 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2941 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \
2942 (((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0))
2943
2944 #define XX______(zzx7,zzx6) \
2945 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
2946 */
2947
2948
2949 /* Get an immediate into a register, using only that register. */
imm64_to_ireg(UInt * p,Int xD,ULong imm64)2950 static UInt* imm64_to_ireg ( UInt* p, Int xD, ULong imm64 )
2951 {
2952 if (imm64 == 0) {
2953 // This has to be special-cased, since the logic below
2954 // will leave the register unchanged in this case.
2955 // MOVZ xD, #0, LSL #0
2956 *p++ = X_3_6_2_16_5(X110, X100101, X00, 0/*imm16*/, xD);
2957 return p;
2958 }
2959
2960 // There must be at least one non-zero halfword. Find the
2961 // lowest nonzero such, and use MOVZ to install it and zero
2962 // out the rest of the register.
2963 UShort h[4];
2964 h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
2965 h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
2966 h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
2967 h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
2968
2969 UInt i;
2970 for (i = 0; i < 4; i++) {
2971 if (h[i] != 0)
2972 break;
2973 }
2974 vassert(i < 4);
2975
2976 // MOVZ xD, h[i], LSL (16*i)
2977 *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
2978
2979 // Work on upwards through h[i], using MOVK to stuff in any
2980 // remaining nonzero elements.
2981 i++;
2982 for (; i < 4; i++) {
2983 if (h[i] == 0)
2984 continue;
2985 // MOVK xD, h[i], LSL (16*i)
2986 *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
2987 }
2988
2989 return p;
2990 }
2991
2992 /* Get an immediate into a register, using only that register, and
2993 generating exactly 4 instructions, regardless of the value of the
2994 immediate. This is used when generating sections of code that need
2995 to be patched later, so as to guarantee a specific size. */
imm64_to_ireg_EXACTLY4(UInt * p,Int xD,ULong imm64)2996 static UInt* imm64_to_ireg_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
2997 {
2998 UShort h[4];
2999 h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
3000 h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
3001 h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
3002 h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
3003 // Work on upwards through h[i], using MOVK to stuff in the
3004 // remaining elements.
3005 UInt i;
3006 for (i = 0; i < 4; i++) {
3007 if (i == 0) {
3008 // MOVZ xD, h[0], LSL (16*0)
3009 *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
3010 } else {
3011 // MOVK xD, h[i], LSL (16*i)
3012 *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
3013 }
3014 }
3015 return p;
3016 }
3017
3018 /* Check whether p points at a 4-insn sequence cooked up by
3019 imm64_to_ireg_EXACTLY4(). */
is_imm64_to_ireg_EXACTLY4(UInt * p,Int xD,ULong imm64)3020 static Bool is_imm64_to_ireg_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
3021 {
3022 UShort h[4];
3023 h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
3024 h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
3025 h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
3026 h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
3027 // Work on upwards through h[i], using MOVK to stuff in the
3028 // remaining elements.
3029 UInt i;
3030 for (i = 0; i < 4; i++) {
3031 UInt expected;
3032 if (i == 0) {
3033 // MOVZ xD, h[0], LSL (16*0)
3034 expected = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
3035 } else {
3036 // MOVK xD, h[i], LSL (16*i)
3037 expected = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
3038 }
3039 if (p[i] != expected)
3040 return False;
3041 }
3042 return True;
3043 }
3044
3045
3046 /* Generate a 8 bit store or 8-to-64 unsigned widening load from/to
3047 rD, using the given amode for the address. */
do_load_or_store8(UInt * p,Bool isLoad,UInt wD,ARM64AMode * am)3048 static UInt* do_load_or_store8 ( UInt* p,
3049 Bool isLoad, UInt wD, ARM64AMode* am )
3050 {
3051 vassert(wD <= 30);
3052 if (am->tag == ARM64am_RI9) {
3053 /* STURB Wd, [Xn|SP + simm9]: 00 111000 000 simm9 00 n d
3054 LDURB Wd, [Xn|SP + simm9]: 00 111000 010 simm9 00 n d
3055 */
3056 Int simm9 = am->ARM64am.RI9.simm9;
3057 vassert(-256 <= simm9 && simm9 <= 255);
3058 UInt instr = X_2_6_3_9_2_5_5(X00, X111000, isLoad ? X010 : X000,
3059 simm9 & 0x1FF, X00,
3060 iregEnc(am->ARM64am.RI9.reg), wD);
3061 *p++ = instr;
3062 return p;
3063 }
3064 if (am->tag == ARM64am_RI12) {
3065 /* STRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 00 imm12 n d
3066 LDRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 01 imm12 n d
3067 */
3068 UInt uimm12 = am->ARM64am.RI12.uimm12;
3069 UInt scale = am->ARM64am.RI12.szB;
3070 vassert(scale == 1); /* failure of this is serious. Do not ignore. */
3071 UInt xN = iregEnc(am->ARM64am.RI12.reg);
3072 vassert(xN <= 30);
3073 UInt instr = X_2_6_2_12_5_5(X00, X111001, isLoad ? X01 : X00,
3074 uimm12, xN, wD);
3075 *p++ = instr;
3076 return p;
3077 }
3078 if (am->tag == ARM64am_RR) {
3079 /* STRB Xd, [Xn|SP, Xm]: 00 111 000 001 m 011 0 10 n d
3080 LDRB Xd, [Xn|SP, Xm]: 00 111 000 011 m 011 0 10 n d
3081 */
3082 UInt xN = iregEnc(am->ARM64am.RR.base);
3083 UInt xM = iregEnc(am->ARM64am.RR.index);
3084 vassert(xN <= 30);
3085 UInt instr = X_3_8_5_6_5_5(X001, isLoad ? X11000011 : X11000001,
3086 xM, X011010, xN, wD);
3087 *p++ = instr;
3088 return p;
3089 }
3090 vpanic("do_load_or_store8");
3091 vassert(0);
3092 }
3093
3094
3095 /* Generate a 16 bit store or 16-to-64 unsigned widening load from/to
3096 rD, using the given amode for the address. */
do_load_or_store16(UInt * p,Bool isLoad,UInt wD,ARM64AMode * am)3097 static UInt* do_load_or_store16 ( UInt* p,
3098 Bool isLoad, UInt wD, ARM64AMode* am )
3099 {
3100 vassert(wD <= 30);
3101 if (am->tag == ARM64am_RI9) {
3102 /* STURH Wd, [Xn|SP + simm9]: 01 111000 000 simm9 00 n d
3103 LDURH Wd, [Xn|SP + simm9]: 01 111000 010 simm9 00 n d
3104 */
3105 Int simm9 = am->ARM64am.RI9.simm9;
3106 vassert(-256 <= simm9 && simm9 <= 255);
3107 UInt instr = X_2_6_3_9_2_5_5(X01, X111000, isLoad ? X010 : X000,
3108 simm9 & 0x1FF, X00,
3109 iregEnc(am->ARM64am.RI9.reg), wD);
3110 *p++ = instr;
3111 return p;
3112 }
3113 if (am->tag == ARM64am_RI12) {
3114 /* STRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 00 imm12 n d
3115 LDRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 01 imm12 n d
3116 */
3117 UInt uimm12 = am->ARM64am.RI12.uimm12;
3118 UInt scale = am->ARM64am.RI12.szB;
3119 vassert(scale == 2); /* failure of this is serious. Do not ignore. */
3120 UInt xN = iregEnc(am->ARM64am.RI12.reg);
3121 vassert(xN <= 30);
3122 UInt instr = X_2_6_2_12_5_5(X01, X111001, isLoad ? X01 : X00,
3123 uimm12, xN, wD);
3124 *p++ = instr;
3125 return p;
3126 }
3127 if (am->tag == ARM64am_RR) {
3128 /* STRH Xd, [Xn|SP, Xm]: 01 111 000 001 m 011 0 10 n d
3129 LDRH Xd, [Xn|SP, Xm]: 01 111 000 011 m 011 0 10 n d
3130 */
3131 UInt xN = iregEnc(am->ARM64am.RR.base);
3132 UInt xM = iregEnc(am->ARM64am.RR.index);
3133 vassert(xN <= 30);
3134 UInt instr = X_3_8_5_6_5_5(X011, isLoad ? X11000011 : X11000001,
3135 xM, X011010, xN, wD);
3136 *p++ = instr;
3137 return p;
3138 }
3139 vpanic("do_load_or_store16");
3140 vassert(0);
3141 }
3142
3143
3144 /* Generate a 32 bit store or 32-to-64 unsigned widening load from/to
3145 rD, using the given amode for the address. */
do_load_or_store32(UInt * p,Bool isLoad,UInt wD,ARM64AMode * am)3146 static UInt* do_load_or_store32 ( UInt* p,
3147 Bool isLoad, UInt wD, ARM64AMode* am )
3148 {
3149 vassert(wD <= 30);
3150 if (am->tag == ARM64am_RI9) {
3151 /* STUR Wd, [Xn|SP + simm9]: 10 111000 000 simm9 00 n d
3152 LDUR Wd, [Xn|SP + simm9]: 10 111000 010 simm9 00 n d
3153 */
3154 Int simm9 = am->ARM64am.RI9.simm9;
3155 vassert(-256 <= simm9 && simm9 <= 255);
3156 UInt instr = X_2_6_3_9_2_5_5(X10, X111000, isLoad ? X010 : X000,
3157 simm9 & 0x1FF, X00,
3158 iregEnc(am->ARM64am.RI9.reg), wD);
3159 *p++ = instr;
3160 return p;
3161 }
3162 if (am->tag == ARM64am_RI12) {
3163 /* STR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 00 imm12 n d
3164 LDR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 01 imm12 n d
3165 */
3166 UInt uimm12 = am->ARM64am.RI12.uimm12;
3167 UInt scale = am->ARM64am.RI12.szB;
3168 vassert(scale == 4); /* failure of this is serious. Do not ignore. */
3169 UInt xN = iregEnc(am->ARM64am.RI12.reg);
3170 vassert(xN <= 30);
3171 UInt instr = X_2_6_2_12_5_5(X10, X111001, isLoad ? X01 : X00,
3172 uimm12, xN, wD);
3173 *p++ = instr;
3174 return p;
3175 }
3176 if (am->tag == ARM64am_RR) {
3177 /* STR Wd, [Xn|SP, Xm]: 10 111 000 001 m 011 0 10 n d
3178 LDR Wd, [Xn|SP, Xm]: 10 111 000 011 m 011 0 10 n d
3179 */
3180 UInt xN = iregEnc(am->ARM64am.RR.base);
3181 UInt xM = iregEnc(am->ARM64am.RR.index);
3182 vassert(xN <= 30);
3183 UInt instr = X_3_8_5_6_5_5(X101, isLoad ? X11000011 : X11000001,
3184 xM, X011010, xN, wD);
3185 *p++ = instr;
3186 return p;
3187 }
3188 vpanic("do_load_or_store32");
3189 vassert(0);
3190 }
3191
3192
3193 /* Generate a 64 bit load or store to/from xD, using the given amode
3194 for the address. */
do_load_or_store64(UInt * p,Bool isLoad,UInt xD,ARM64AMode * am)3195 static UInt* do_load_or_store64 ( UInt* p,
3196 Bool isLoad, UInt xD, ARM64AMode* am )
3197 {
3198 /* In all these cases, Rn can't be 31 since that means SP. */
3199 vassert(xD <= 30);
3200 if (am->tag == ARM64am_RI9) {
3201 /* STUR Xd, [Xn|SP + simm9]: 11 111000 000 simm9 00 n d
3202 LDUR Xd, [Xn|SP + simm9]: 11 111000 010 simm9 00 n d
3203 */
3204 Int simm9 = am->ARM64am.RI9.simm9;
3205 vassert(-256 <= simm9 && simm9 <= 255);
3206 UInt xN = iregEnc(am->ARM64am.RI9.reg);
3207 vassert(xN <= 30);
3208 UInt instr = X_2_6_3_9_2_5_5(X11, X111000, isLoad ? X010 : X000,
3209 simm9 & 0x1FF, X00, xN, xD);
3210 *p++ = instr;
3211 return p;
3212 }
3213 if (am->tag == ARM64am_RI12) {
3214 /* STR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 00 imm12 n d
3215 LDR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 01 imm12 n d
3216 */
3217 UInt uimm12 = am->ARM64am.RI12.uimm12;
3218 UInt scale = am->ARM64am.RI12.szB;
3219 vassert(scale == 8); /* failure of this is serious. Do not ignore. */
3220 UInt xN = iregEnc(am->ARM64am.RI12.reg);
3221 vassert(xN <= 30);
3222 UInt instr = X_2_6_2_12_5_5(X11, X111001, isLoad ? X01 : X00,
3223 uimm12, xN, xD);
3224 *p++ = instr;
3225 return p;
3226 }
3227 if (am->tag == ARM64am_RR) {
3228 /* STR Xd, [Xn|SP, Xm]: 11 111 000 001 m 011 0 10 n d
3229 LDR Xd, [Xn|SP, Xm]: 11 111 000 011 m 011 0 10 n d
3230 */
3231 UInt xN = iregEnc(am->ARM64am.RR.base);
3232 UInt xM = iregEnc(am->ARM64am.RR.index);
3233 vassert(xN <= 30);
3234 UInt instr = X_3_8_5_6_5_5(X111, isLoad ? X11000011 : X11000001,
3235 xM, X011010, xN, xD);
3236 *p++ = instr;
3237 return p;
3238 }
3239 vpanic("do_load_or_store64");
3240 vassert(0);
3241 }
3242
3243
3244 /* Emit an instruction into buf and return the number of bytes used.
3245 Note that buf is not the insn's final place, and therefore it is
3246 imperative to emit position-independent code. If the emitted
3247 instruction was a profiler inc, set *is_profInc to True, else
3248 leave it unchanged. */
3249
emit_ARM64Instr(Bool * is_profInc,UChar * buf,Int nbuf,const ARM64Instr * i,Bool mode64,VexEndness endness_host,const void * disp_cp_chain_me_to_slowEP,const void * disp_cp_chain_me_to_fastEP,const void * disp_cp_xindir,const void * disp_cp_xassisted)3250 Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
3251 UChar* buf, Int nbuf, const ARM64Instr* i,
3252 Bool mode64, VexEndness endness_host,
3253 const void* disp_cp_chain_me_to_slowEP,
3254 const void* disp_cp_chain_me_to_fastEP,
3255 const void* disp_cp_xindir,
3256 const void* disp_cp_xassisted )
3257 {
3258 UInt* p = (UInt*)buf;
3259 vassert(nbuf >= 32);
3260 vassert(mode64 == True);
3261 vassert(0 == (((HWord)buf) & 3));
3262
3263 switch (i->tag) {
3264 case ARM64in_Arith: {
3265 UInt rD = iregEnc(i->ARM64in.Arith.dst);
3266 UInt rN = iregEnc(i->ARM64in.Arith.argL);
3267 ARM64RIA* argR = i->ARM64in.Arith.argR;
3268 switch (argR->tag) {
3269 case ARM64riA_I12:
3270 *p++ = X_2_6_2_12_5_5(
3271 i->ARM64in.Arith.isAdd ? X10 : X11,
3272 X010001,
3273 argR->ARM64riA.I12.shift == 12 ? X01 : X00,
3274 argR->ARM64riA.I12.imm12, rN, rD
3275 );
3276 break;
3277 case ARM64riA_R: {
3278 UInt rM = iregEnc(i->ARM64in.Arith.argR->ARM64riA.R.reg);
3279 *p++ = X_3_8_5_6_5_5(
3280 i->ARM64in.Arith.isAdd ? X100 : X110,
3281 X01011000, rM, X000000, rN, rD
3282 );
3283 break;
3284 }
3285 default:
3286 goto bad;
3287 }
3288 goto done;
3289 }
3290 case ARM64in_Cmp: {
3291 UInt rD = 31; /* XZR, we are going to dump the result */
3292 UInt rN = iregEnc(i->ARM64in.Cmp.argL);
3293 ARM64RIA* argR = i->ARM64in.Cmp.argR;
3294 Bool is64 = i->ARM64in.Cmp.is64;
3295 switch (argR->tag) {
3296 case ARM64riA_I12:
3297 /* 1 11 10001 sh imm12 Rn Rd = SUBS Xd, Xn, #imm */
3298 /* 0 11 10001 sh imm12 Rn Rd = SUBS Wd, Wn, #imm */
3299 *p++ = X_2_6_2_12_5_5(
3300 is64 ? X11 : X01, X110001,
3301 argR->ARM64riA.I12.shift == 12 ? X01 : X00,
3302 argR->ARM64riA.I12.imm12, rN, rD);
3303 break;
3304 case ARM64riA_R: {
3305 /* 1 11 01011 00 0 Rm 000000 Rn Rd = SUBS Xd, Xn, Xm */
3306 /* 0 11 01011 00 0 Rm 000000 Rn Rd = SUBS Wd, Wn, Wm */
3307 UInt rM = iregEnc(i->ARM64in.Cmp.argR->ARM64riA.R.reg);
3308 *p++ = X_3_8_5_6_5_5(is64 ? X111 : X011,
3309 X01011000, rM, X000000, rN, rD);
3310 break;
3311 }
3312 default:
3313 goto bad;
3314 }
3315 goto done;
3316 }
3317 case ARM64in_Logic: {
3318 UInt rD = iregEnc(i->ARM64in.Logic.dst);
3319 UInt rN = iregEnc(i->ARM64in.Logic.argL);
3320 ARM64RIL* argR = i->ARM64in.Logic.argR;
3321 UInt opc = 0; /* invalid */
3322 vassert(rD < 31);
3323 vassert(rN < 31);
3324 switch (i->ARM64in.Logic.op) {
3325 case ARM64lo_OR: opc = X101; break;
3326 case ARM64lo_AND: opc = X100; break;
3327 case ARM64lo_XOR: opc = X110; break;
3328 default: break;
3329 }
3330 vassert(opc != 0);
3331 switch (argR->tag) {
3332 case ARM64riL_I13: {
3333 /* 1 01 100100 N immR immS Rn Rd = ORR <Xd|Sp>, Xn, #imm */
3334 /* 1 00 100100 N immR immS Rn Rd = AND <Xd|Sp>, Xn, #imm */
3335 /* 1 10 100100 N immR immS Rn Rd = EOR <Xd|Sp>, Xn, #imm */
3336 *p++ = X_3_6_1_6_6_5_5(
3337 opc, X100100, argR->ARM64riL.I13.bitN,
3338 argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
3339 rN, rD
3340 );
3341 break;
3342 }
3343 case ARM64riL_R: {
3344 /* 1 01 01010 00 0 m 000000 n d = ORR Xd, Xn, Xm */
3345 /* 1 00 01010 00 0 m 000000 n d = AND Xd, Xn, Xm */
3346 /* 1 10 01010 00 0 m 000000 n d = EOR Xd, Xn, Xm */
3347 UInt rM = iregEnc(argR->ARM64riL.R.reg);
3348 vassert(rM < 31);
3349 *p++ = X_3_8_5_6_5_5(opc, X01010000, rM, X000000, rN, rD);
3350 break;
3351 }
3352 default:
3353 goto bad;
3354 }
3355 goto done;
3356 }
3357 case ARM64in_Test: {
3358 UInt rD = 31; /* XZR, we are going to dump the result */
3359 UInt rN = iregEnc(i->ARM64in.Test.argL);
3360 ARM64RIL* argR = i->ARM64in.Test.argR;
3361 switch (argR->tag) {
3362 case ARM64riL_I13: {
3363 /* 1 11 100100 N immR immS Rn Rd = ANDS Xd, Xn, #imm */
3364 *p++ = X_3_6_1_6_6_5_5(
3365 X111, X100100, argR->ARM64riL.I13.bitN,
3366 argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
3367 rN, rD
3368 );
3369 break;
3370 }
3371 default:
3372 goto bad;
3373 }
3374 goto done;
3375 }
3376 case ARM64in_Shift: {
3377 UInt rD = iregEnc(i->ARM64in.Shift.dst);
3378 UInt rN = iregEnc(i->ARM64in.Shift.argL);
3379 ARM64RI6* argR = i->ARM64in.Shift.argR;
3380 vassert(rD < 31);
3381 vassert(rN < 31);
3382 switch (argR->tag) {
3383 case ARM64ri6_I6: {
3384 /* 110 1001101 (63-sh) (64-sh) nn dd LSL Xd, Xn, sh */
3385 /* 110 1001101 sh 63 nn dd LSR Xd, Xn, sh */
3386 /* 100 1001101 sh 63 nn dd ASR Xd, Xn, sh */
3387 UInt sh = argR->ARM64ri6.I6.imm6;
3388 vassert(sh > 0 && sh < 64);
3389 switch (i->ARM64in.Shift.op) {
3390 case ARM64sh_SHL:
3391 *p++ = X_3_6_1_6_6_5_5(X110, X100110,
3392 1, 64-sh, 63-sh, rN, rD);
3393 break;
3394 case ARM64sh_SHR:
3395 *p++ = X_3_6_1_6_6_5_5(X110, X100110, 1, sh, 63, rN, rD);
3396 break;
3397 case ARM64sh_SAR:
3398 *p++ = X_3_6_1_6_6_5_5(X100, X100110, 1, sh, 63, rN, rD);
3399 break;
3400 default:
3401 vassert(0);
3402 }
3403 break;
3404 }
3405 case ARM64ri6_R: {
3406 /* 100 1101 0110 mm 001000 nn dd LSL Xd, Xn, Xm */
3407 /* 100 1101 0110 mm 001001 nn dd LSR Xd, Xn, Xm */
3408 /* 100 1101 0110 mm 001010 nn dd ASR Xd, Xn, Xm */
3409 UInt rM = iregEnc(argR->ARM64ri6.R.reg);
3410 vassert(rM < 31);
3411 UInt subOpc = 0;
3412 switch (i->ARM64in.Shift.op) {
3413 case ARM64sh_SHL: subOpc = X001000; break;
3414 case ARM64sh_SHR: subOpc = X001001; break;
3415 case ARM64sh_SAR: subOpc = X001010; break;
3416 default: vassert(0);
3417 }
3418 *p++ = X_3_8_5_6_5_5(X100, X11010110, rM, subOpc, rN, rD);
3419 break;
3420 }
3421 default:
3422 vassert(0);
3423 }
3424 goto done;
3425 }
3426 case ARM64in_Unary: {
3427 UInt rDst = iregEnc(i->ARM64in.Unary.dst);
3428 UInt rSrc = iregEnc(i->ARM64in.Unary.src);
3429 switch (i->ARM64in.Unary.op) {
3430 case ARM64un_CLZ:
3431 /* 1 10 1101 0110 00000 00010 0 nn dd CLZ Xd, Xn */
3432 /* 1 10 1101 0110 00000 00010 1 nn dd CLS Xd, Xn (unimp) */
3433 *p++ = X_3_8_5_6_5_5(X110,
3434 X11010110, X00000, X000100, rSrc, rDst);
3435 goto done;
3436 case ARM64un_NEG:
3437 /* 1 10 01011 000 m 000000 11111 d NEG Xd,Xm */
3438 /* 0 10 01011 000 m 000000 11111 d NEG Wd,Wm (unimp) */
3439 *p++ = X_3_8_5_6_5_5(X110,
3440 X01011000, rSrc, X000000, X11111, rDst);
3441 goto done;
3442 case ARM64un_NOT: {
3443 /* 1 01 01010 00 1 m 000000 11111 d MVN Xd,Xm */
3444 *p++ = X_3_8_5_6_5_5(X101,
3445 X01010001, rSrc, X000000, X11111, rDst);
3446 goto done;
3447 }
3448 default:
3449 break;
3450 }
3451 goto bad;
3452 }
3453 case ARM64in_MovI: {
3454 /* We generate the "preferred form", ORR Xd, XZR, Xm
3455 101 01010 00 0 m 000000 11111 d
3456 */
3457 UInt instr = 0xAA0003E0;
3458 UInt d = iregEnc(i->ARM64in.MovI.dst);
3459 UInt m = iregEnc(i->ARM64in.MovI.src);
3460 *p++ = instr | ((m & 31) << 16) | ((d & 31) << 0);
3461 goto done;
3462 }
3463 case ARM64in_Imm64: {
3464 p = imm64_to_ireg( p, iregEnc(i->ARM64in.Imm64.dst),
3465 i->ARM64in.Imm64.imm64 );
3466 goto done;
3467 }
3468 case ARM64in_LdSt64: {
3469 p = do_load_or_store64( p, i->ARM64in.LdSt64.isLoad,
3470 iregEnc(i->ARM64in.LdSt64.rD),
3471 i->ARM64in.LdSt64.amode );
3472 goto done;
3473 }
3474 case ARM64in_LdSt32: {
3475 p = do_load_or_store32( p, i->ARM64in.LdSt32.isLoad,
3476 iregEnc(i->ARM64in.LdSt32.rD),
3477 i->ARM64in.LdSt32.amode );
3478 goto done;
3479 }
3480 case ARM64in_LdSt16: {
3481 p = do_load_or_store16( p, i->ARM64in.LdSt16.isLoad,
3482 iregEnc(i->ARM64in.LdSt16.rD),
3483 i->ARM64in.LdSt16.amode );
3484 goto done;
3485 }
3486 case ARM64in_LdSt8: {
3487 p = do_load_or_store8( p, i->ARM64in.LdSt8.isLoad,
3488 iregEnc(i->ARM64in.LdSt8.rD),
3489 i->ARM64in.LdSt8.amode );
3490 goto done;
3491 }
3492
3493 case ARM64in_XDirect: {
3494 /* NB: what goes on here has to be very closely coordinated
3495 with chainXDirect_ARM64 and unchainXDirect_ARM64 below. */
3496 /* We're generating chain-me requests here, so we need to be
3497 sure this is actually allowed -- no-redir translations
3498 can't use chain-me's. Hence: */
3499 vassert(disp_cp_chain_me_to_slowEP != NULL);
3500 vassert(disp_cp_chain_me_to_fastEP != NULL);
3501
3502 /* Use ptmp for backpatching conditional jumps. */
3503 UInt* ptmp = NULL;
3504
3505 /* First off, if this is conditional, create a conditional
3506 jump over the rest of it. Or at least, leave a space for
3507 it that we will shortly fill in. */
3508 if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
3509 vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
3510 ptmp = p;
3511 *p++ = 0;
3512 }
3513
3514 /* Update the guest PC. */
3515 /* imm64 x9, dstGA */
3516 /* str x9, amPC */
3517 p = imm64_to_ireg(p, /*x*/9, i->ARM64in.XDirect.dstGA);
3518 p = do_load_or_store64(p, False/*!isLoad*/,
3519 /*x*/9, i->ARM64in.XDirect.amPC);
3520
3521 /* --- FIRST PATCHABLE BYTE follows --- */
3522 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
3523 calling to) backs up the return address, so as to find the
3524 address of the first patchable byte. So: don't change the
3525 number of instructions (5) below. */
3526 /* movw x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[15:0] */
3527 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[31:15], lsl 16 */
3528 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[47:32], lsl 32 */
3529 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[63:48], lsl 48 */
3530 /* blr x9 */
3531 const void* disp_cp_chain_me
3532 = i->ARM64in.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3533 : disp_cp_chain_me_to_slowEP;
3534 p = imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)disp_cp_chain_me);
3535 *p++ = 0xD63F0120;
3536 /* --- END of PATCHABLE BYTES --- */
3537
3538 /* Fix up the conditional jump, if there was one. */
3539 if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
3540 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3541 vassert(delta > 0 && delta < 40);
3542 vassert((delta & 3) == 0);
3543 UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
3544 vassert(notCond <= 13); /* Neither AL nor NV */
3545 vassert(ptmp != NULL);
3546 delta = delta >> 2;
3547 *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
3548 }
3549 goto done;
3550 }
3551
3552 case ARM64in_XIndir: {
3553 // XIndir is more or less the same as XAssisted, except
3554 // we don't have a trc value to hand back, so there's no
3555 // write to r21
3556 /* Use ptmp for backpatching conditional jumps. */
3557 //UInt* ptmp = NULL;
3558
3559 /* First off, if this is conditional, create a conditional
3560 jump over the rest of it. Or at least, leave a space for
3561 it that we will shortly fill in. */
3562 if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
3563 vassert(0); //ATC
3564 //ZZ vassert(i->ARMin.XIndir.cond != ARMcc_NV);
3565 //ZZ ptmp = p;
3566 //ZZ *p++ = 0;
3567 }
3568
3569 /* Update the guest PC. */
3570 /* str r-dstGA, amPC */
3571 p = do_load_or_store64(p, False/*!isLoad*/,
3572 iregEnc(i->ARM64in.XIndir.dstGA),
3573 i->ARM64in.XIndir.amPC);
3574
3575 /* imm64 x9, VG_(disp_cp_xindir) */
3576 /* br x9 */
3577 p = imm64_to_ireg(p, /*x*/9, (Addr)disp_cp_xindir);
3578 *p++ = 0xD61F0120; /* br x9 */
3579
3580 /* Fix up the conditional jump, if there was one. */
3581 if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
3582 vassert(0); //ATC
3583 //ZZ Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3584 //ZZ vassert(delta > 0 && delta < 40);
3585 //ZZ vassert((delta & 3) == 0);
3586 //ZZ UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
3587 //ZZ vassert(notCond <= 13); /* Neither AL nor NV */
3588 //ZZ delta = (delta >> 2) - 2;
3589 //ZZ *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3590 }
3591 goto done;
3592 }
3593
3594 case ARM64in_XAssisted: {
3595 /* Use ptmp for backpatching conditional jumps. */
3596 UInt* ptmp = NULL;
3597
3598 /* First off, if this is conditional, create a conditional
3599 jump over the rest of it. Or at least, leave a space for
3600 it that we will shortly fill in. I think this can only
3601 ever happen when VEX is driven by the switchbacker. */
3602 if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
3603 vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
3604 ptmp = p;
3605 *p++ = 0;
3606 }
3607
3608 /* Update the guest PC. */
3609 /* str r-dstGA, amPC */
3610 p = do_load_or_store64(p, False/*!isLoad*/,
3611 iregEnc(i->ARM64in.XAssisted.dstGA),
3612 i->ARM64in.XAssisted.amPC);
3613
3614 /* movw r21, $magic_number */
3615 UInt trcval = 0;
3616 switch (i->ARM64in.XAssisted.jk) {
3617 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
3618 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3619 //case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
3620 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
3621 //case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
3622 //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
3623 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
3624 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
3625 case Ijk_FlushDCache: trcval = VEX_TRC_JMP_FLUSHDCACHE; break;
3626 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
3627 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
3628 //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
3629 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
3630 /* We don't expect to see the following being assisted. */
3631 //case Ijk_Ret:
3632 //case Ijk_Call:
3633 /* fallthrough */
3634 default:
3635 ppIRJumpKind(i->ARM64in.XAssisted.jk);
3636 vpanic("emit_ARM64Instr.ARM64in_XAssisted: "
3637 "unexpected jump kind");
3638 }
3639 vassert(trcval != 0);
3640 p = imm64_to_ireg(p, /*x*/21, (ULong)trcval);
3641
3642 /* imm64 x9, VG_(disp_cp_xassisted) */
3643 /* br x9 */
3644 p = imm64_to_ireg(p, /*x*/9, (Addr)disp_cp_xassisted);
3645 *p++ = 0xD61F0120; /* br x9 */
3646
3647 /* Fix up the conditional jump, if there was one. */
3648 if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
3649 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3650 vassert(delta > 0 && delta < 40);
3651 vassert((delta & 3) == 0);
3652 UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
3653 vassert(notCond <= 13); /* Neither AL nor NV */
3654 vassert(ptmp != NULL);
3655 delta = delta >> 2;
3656 *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
3657 }
3658 goto done;
3659 }
3660
3661 case ARM64in_CSel: {
3662 /* 100 1101 0100 mm cond 00 nn dd = CSEL Xd, Xn, Xm, cond */
3663 UInt dd = iregEnc(i->ARM64in.CSel.dst);
3664 UInt nn = iregEnc(i->ARM64in.CSel.argL);
3665 UInt mm = iregEnc(i->ARM64in.CSel.argR);
3666 UInt cond = (UInt)i->ARM64in.CSel.cond;
3667 vassert(dd < 31 && nn < 31 && mm < 31 && cond < 16);
3668 *p++ = X_3_8_5_6_5_5(X100, X11010100, mm, cond << 2, nn, dd);
3669 goto done;
3670 }
3671
3672 case ARM64in_Call: {
3673 /* We'll use x9 as a scratch register to put the target
3674 address in. */
3675 if (i->ARM64in.Call.cond != ARM64cc_AL
3676 && i->ARM64in.Call.rloc.pri != RLPri_None) {
3677 /* The call might not happen (it isn't unconditional) and
3678 it returns a result. In this case we will need to
3679 generate a control flow diamond to put 0x555..555 in
3680 the return register(s) in the case where the call
3681 doesn't happen. If this ever becomes necessary, maybe
3682 copy code from the 32-bit ARM equivalent. Until that
3683 day, just give up. */
3684 goto bad;
3685 }
3686
3687 UInt* ptmp = NULL;
3688 if (i->ARM64in.Call.cond != ARM64cc_AL) {
3689 /* Create a hole to put a conditional branch in. We'll
3690 patch it once we know the branch length. */
3691 ptmp = p;
3692 *p++ = 0;
3693 }
3694
3695 // x9 = &target
3696 p = imm64_to_ireg( (UInt*)p, /*x*/9, (ULong)i->ARM64in.Call.target );
3697 // blr x9
3698 *p++ = 0xD63F0120;
3699
3700 // Patch the hole if necessary
3701 if (i->ARM64in.Call.cond != ARM64cc_AL) {
3702 ULong dist = (ULong)(p - ptmp);
3703 /* imm64_to_ireg produces between 1 and 4 insns, and
3704 then there's the BLR itself. Hence: */
3705 vassert(dist >= 2 && dist <= 5);
3706 vassert(ptmp != NULL);
3707 // 01010100 simm19 0 cond = B.cond (here + simm19 << 2)
3708 *ptmp = X_8_19_1_4(X01010100, dist, 0,
3709 1 ^ (UInt)i->ARM64in.Call.cond);
3710 } else {
3711 vassert(ptmp == NULL);
3712 }
3713
3714 goto done;
3715 }
3716
3717 case ARM64in_AddToSP: {
3718 /* 10,0 10001 00 imm12 11111 11111 ADD xsp, xsp, #imm12
3719 11,0 10001 00 imm12 11111 11111 SUB xsp, xsp, #imm12
3720 */
3721 Int simm12 = i->ARM64in.AddToSP.simm;
3722 vassert(-4096 < simm12 && simm12 < 4096);
3723 vassert(0 == (simm12 & 0xF));
3724 if (simm12 >= 0) {
3725 *p++ = X_2_6_2_12_5_5(X10, X010001, X00, simm12, X11111, X11111);
3726 } else {
3727 *p++ = X_2_6_2_12_5_5(X11, X010001, X00, -simm12, X11111, X11111);
3728 }
3729 goto done;
3730 }
3731
3732 case ARM64in_FromSP: {
3733 /* 10,0 10001 00 0..(12)..0 11111 dd MOV Xd, xsp */
3734 UInt dd = iregEnc(i->ARM64in.FromSP.dst);
3735 vassert(dd < 31);
3736 *p++ = X_2_6_2_12_5_5(X10, X010001, X00, 0, X11111, dd);
3737 goto done;
3738 }
3739
3740 case ARM64in_Mul: {
3741 /* 100 11011 110 mm 011111 nn dd UMULH Xd, Xn,Xm
3742 100 11011 010 mm 011111 nn dd SMULH Xd, Xn,Xm
3743 100 11011 000 mm 011111 nn dd MUL Xd, Xn,Xm
3744 */
3745 UInt dd = iregEnc(i->ARM64in.Mul.dst);
3746 UInt nn = iregEnc(i->ARM64in.Mul.argL);
3747 UInt mm = iregEnc(i->ARM64in.Mul.argR);
3748 vassert(dd < 31 && nn < 31 && mm < 31);
3749 switch (i->ARM64in.Mul.op) {
3750 case ARM64mul_ZX:
3751 *p++ = X_3_8_5_6_5_5(X100, X11011110, mm, X011111, nn, dd);
3752 goto done;
3753 case ARM64mul_SX:
3754 *p++ = X_3_8_5_6_5_5(X100, X11011010, mm, X011111, nn, dd);
3755 goto done;
3756 case ARM64mul_PLAIN:
3757 *p++ = X_3_8_5_6_5_5(X100, X11011000, mm, X011111, nn, dd);
3758 goto done;
3759 default:
3760 vassert(0);
3761 }
3762 goto bad;
3763 }
3764 case ARM64in_LdrEX: {
3765 /* 085F7C82 ldxrb w2, [x4]
3766 485F7C82 ldxrh w2, [x4]
3767 885F7C82 ldxr w2, [x4]
3768 C85F7C82 ldxr x2, [x4]
3769 */
3770 switch (i->ARM64in.LdrEX.szB) {
3771 case 1: *p++ = 0x085F7C82; goto done;
3772 case 2: *p++ = 0x485F7C82; goto done;
3773 case 4: *p++ = 0x885F7C82; goto done;
3774 case 8: *p++ = 0xC85F7C82; goto done;
3775 default: break;
3776 }
3777 goto bad;
3778 }
3779 case ARM64in_StrEX: {
3780 /* 08007C82 stxrb w0, w2, [x4]
3781 48007C82 stxrh w0, w2, [x4]
3782 88007C82 stxr w0, w2, [x4]
3783 C8007C82 stxr w0, x2, [x4]
3784 */
3785 switch (i->ARM64in.StrEX.szB) {
3786 case 1: *p++ = 0x08007C82; goto done;
3787 case 2: *p++ = 0x48007C82; goto done;
3788 case 4: *p++ = 0x88007C82; goto done;
3789 case 8: *p++ = 0xC8007C82; goto done;
3790 default: break;
3791 }
3792 goto bad;
3793 }
3794 case ARM64in_MFence: {
3795 *p++ = 0xD5033F9F; /* DSB sy */
3796 *p++ = 0xD5033FBF; /* DMB sy */
3797 *p++ = 0xD5033FDF; /* ISB */
3798 goto done;
3799 }
3800 //case ARM64in_CLREX: {
3801 // //ATC, but believed to be correct
3802 // goto bad;
3803 // *p++ = 0xD5033F5F; /* clrex */
3804 // goto done;
3805 //}
3806 case ARM64in_VLdStH: {
3807 /* 01 111101 01 imm12 n t LDR Ht, [Xn|SP, #imm12 * 2]
3808 01 111101 00 imm12 n t STR Ht, [Xn|SP, #imm12 * 2]
3809 */
3810 UInt hD = dregEnc(i->ARM64in.VLdStH.hD);
3811 UInt rN = iregEnc(i->ARM64in.VLdStH.rN);
3812 UInt uimm12 = i->ARM64in.VLdStH.uimm12;
3813 Bool isLD = i->ARM64in.VLdStH.isLoad;
3814 vassert(uimm12 < 8192 && 0 == (uimm12 & 1));
3815 uimm12 >>= 1;
3816 vassert(uimm12 < (1<<12));
3817 vassert(hD < 32);
3818 vassert(rN < 31);
3819 *p++ = X_2_6_2_12_5_5(X01, X111101, isLD ? X01 : X00,
3820 uimm12, rN, hD);
3821 goto done;
3822 }
3823 case ARM64in_VLdStS: {
3824 /* 10 111101 01 imm12 n t LDR St, [Xn|SP, #imm12 * 4]
3825 10 111101 00 imm12 n t STR St, [Xn|SP, #imm12 * 4]
3826 */
3827 UInt sD = dregEnc(i->ARM64in.VLdStS.sD);
3828 UInt rN = iregEnc(i->ARM64in.VLdStS.rN);
3829 UInt uimm12 = i->ARM64in.VLdStS.uimm12;
3830 Bool isLD = i->ARM64in.VLdStS.isLoad;
3831 vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
3832 uimm12 >>= 2;
3833 vassert(uimm12 < (1<<12));
3834 vassert(sD < 32);
3835 vassert(rN < 31);
3836 *p++ = X_2_6_2_12_5_5(X10, X111101, isLD ? X01 : X00,
3837 uimm12, rN, sD);
3838 goto done;
3839 }
3840 case ARM64in_VLdStD: {
3841 /* 11 111101 01 imm12 n t LDR Dt, [Xn|SP, #imm12 * 8]
3842 11 111101 00 imm12 n t STR Dt, [Xn|SP, #imm12 * 8]
3843 */
3844 UInt dD = dregEnc(i->ARM64in.VLdStD.dD);
3845 UInt rN = iregEnc(i->ARM64in.VLdStD.rN);
3846 UInt uimm12 = i->ARM64in.VLdStD.uimm12;
3847 Bool isLD = i->ARM64in.VLdStD.isLoad;
3848 vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
3849 uimm12 >>= 3;
3850 vassert(uimm12 < (1<<12));
3851 vassert(dD < 32);
3852 vassert(rN < 31);
3853 *p++ = X_2_6_2_12_5_5(X11, X111101, isLD ? X01 : X00,
3854 uimm12, rN, dD);
3855 goto done;
3856 }
3857 case ARM64in_VLdStQ: {
3858 /* 0100 1100 0000 0000 0111 11 rN rQ st1 {vQ.2d}, [<rN|SP>]
3859 0100 1100 0100 0000 0111 11 rN rQ ld1 {vQ.2d}, [<rN|SP>]
3860 */
3861 UInt rQ = qregEnc(i->ARM64in.VLdStQ.rQ);
3862 UInt rN = iregEnc(i->ARM64in.VLdStQ.rN);
3863 vassert(rQ < 32);
3864 vassert(rN < 31);
3865 if (i->ARM64in.VLdStQ.isLoad) {
3866 *p++ = 0x4C407C00 | (rN << 5) | rQ;
3867 } else {
3868 *p++ = 0x4C007C00 | (rN << 5) | rQ;
3869 }
3870 goto done;
3871 }
3872 case ARM64in_VCvtI2F: {
3873 /* 31 28 23 21 20 18 15 9 4
3874 000 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
3875 000 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
3876 100 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
3877 100 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
3878 000 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
3879 000 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
3880 100 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
3881 100 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
3882 */
3883 UInt rN = iregEnc(i->ARM64in.VCvtI2F.rS);
3884 UInt rD = dregEnc(i->ARM64in.VCvtI2F.rD);
3885 ARM64CvtOp how = i->ARM64in.VCvtI2F.how;
3886 /* Just handle cases as they show up. */
3887 switch (how) {
3888 case ARM64cvt_F32_I32S: /* SCVTF Sd, Wn */
3889 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X000000, rN, rD);
3890 break;
3891 case ARM64cvt_F64_I32S: /* SCVTF Dd, Wn */
3892 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X000000, rN, rD);
3893 break;
3894 case ARM64cvt_F32_I64S: /* SCVTF Sd, Xn */
3895 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100010, X000000, rN, rD);
3896 break;
3897 case ARM64cvt_F64_I64S: /* SCVTF Dd, Xn */
3898 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100010, X000000, rN, rD);
3899 break;
3900 case ARM64cvt_F32_I32U: /* UCVTF Sd, Wn */
3901 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X000000, rN, rD);
3902 break;
3903 case ARM64cvt_F64_I32U: /* UCVTF Dd, Wn */
3904 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X000000, rN, rD);
3905 break;
3906 case ARM64cvt_F32_I64U: /* UCVTF Sd, Xn */
3907 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100011, X000000, rN, rD);
3908 break;
3909 case ARM64cvt_F64_I64U: /* UCVTF Dd, Xn */
3910 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100011, X000000, rN, rD);
3911 break;
3912 default:
3913 goto bad; //ATC
3914 }
3915 goto done;
3916 }
3917 case ARM64in_VCvtF2I: {
3918 /* 30 23 20 18 15 9 4
3919 sf 00,11110,0x 1 00 000,000000 n d FCVTNS Rd, Fn (round to
3920 sf 00,11110,0x 1 00 001,000000 n d FCVTNU Rd, Fn nearest)
3921 ---------------- 01 -------------- FCVTP-------- (round to +inf)
3922 ---------------- 10 -------------- FCVTM-------- (round to -inf)
3923 ---------------- 11 -------------- FCVTZ-------- (round to zero)
3924
3925 Rd is Xd when sf==1, Wd when sf==0
3926 Fn is Dn when x==1, Sn when x==0
3927 20:19 carry the rounding mode, using the same encoding as FPCR
3928 */
3929 UInt rD = iregEnc(i->ARM64in.VCvtF2I.rD);
3930 UInt rN = dregEnc(i->ARM64in.VCvtF2I.rS);
3931 ARM64CvtOp how = i->ARM64in.VCvtF2I.how;
3932 UChar armRM = i->ARM64in.VCvtF2I.armRM;
3933 /* Just handle cases as they show up. */
3934 switch (how) {
3935 case ARM64cvt_F64_I32S: /* FCVTxS Wd, Dn */
3936 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100000 | (armRM << 3),
3937 X000000, rN, rD);
3938 break;
3939 case ARM64cvt_F64_I32U: /* FCVTxU Wd, Dn */
3940 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100001 | (armRM << 3),
3941 X000000, rN, rD);
3942 break;
3943 case ARM64cvt_F64_I64S: /* FCVTxS Xd, Dn */
3944 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100000 | (armRM << 3),
3945 X000000, rN, rD);
3946 break;
3947 case ARM64cvt_F64_I64U: /* FCVTxU Xd, Dn */
3948 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100001 | (armRM << 3),
3949 X000000, rN, rD);
3950 break;
3951 case ARM64cvt_F32_I32S: /* FCVTxS Wd, Sn */
3952 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100000 | (armRM << 3),
3953 X000000, rN, rD);
3954 break;
3955 case ARM64cvt_F32_I32U: /* FCVTxU Wd, Sn */
3956 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100001 | (armRM << 3),
3957 X000000, rN, rD);
3958 break;
3959 case ARM64cvt_F32_I64S: /* FCVTxS Xd, Sn */
3960 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100000 | (armRM << 3),
3961 X000000, rN, rD);
3962 break;
3963 case ARM64cvt_F32_I64U: /* FCVTxU Xd, Sn */
3964 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100001 | (armRM << 3),
3965 X000000, rN, rD);
3966 break;
3967 default:
3968 goto bad; //ATC
3969 }
3970 goto done;
3971 }
3972 case ARM64in_VCvtSD: {
3973 /* 31 23 21 16 14 9 4
3974 000,11110, 00 10001 0,1 10000 n d FCVT Dd, Sn (S->D)
3975 ---------- 01 ----- 0,0 --------- FCVT Sd, Dn (D->S)
3976 Rounding, when dst is smaller than src, is per the FPCR.
3977 */
3978 UInt dd = dregEnc(i->ARM64in.VCvtSD.dst);
3979 UInt nn = dregEnc(i->ARM64in.VCvtSD.src);
3980 if (i->ARM64in.VCvtSD.sToD) {
3981 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X110000, nn, dd);
3982 } else {
3983 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X010000, nn, dd);
3984 }
3985 goto done;
3986 }
3987 case ARM64in_VCvtHS: {
3988 /* 31 23 21 16 14 9 4
3989 000,11110, 11 10001 0,0 10000 n d FCVT Sd, Hn (H->S)
3990 ---------- 00 ----- 1,1 --------- FCVT Hd, Sn (S->H)
3991 Rounding, when dst is smaller than src, is per the FPCR.
3992 */
3993 UInt dd = dregEnc(i->ARM64in.VCvtHS.dst);
3994 UInt nn = dregEnc(i->ARM64in.VCvtHS.src);
3995 if (i->ARM64in.VCvtHS.hToS) {
3996 *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X010000, nn, dd);
3997 } else {
3998 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X110000, nn, dd);
3999 }
4000 goto done;
4001 }
4002 case ARM64in_VCvtHD: {
4003 /* 31 23 21 16 14 9 4
4004 000,11110, 11 10001 0,1 10000 n d FCVT Dd, Hn (H->D)
4005 ---------- 01 ----- 1,1 --------- FCVT Hd, Dn (D->H)
4006 Rounding, when dst is smaller than src, is per the FPCR.
4007 */
4008 UInt dd = dregEnc(i->ARM64in.VCvtHD.dst);
4009 UInt nn = dregEnc(i->ARM64in.VCvtHD.src);
4010 if (i->ARM64in.VCvtHD.hToD) {
4011 *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X110000, nn, dd);
4012 } else {
4013 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X110000, nn, dd);
4014 }
4015 goto done;
4016 }
4017 case ARM64in_VUnaryD: {
4018 /* 31 23 21 16 14 9 4
4019 000,11110 01 1,0000 0,0 10000 n d FMOV Dd, Dn (not handled)
4020 ------------------- 0,1 --------- FABS ------
4021 ------------------- 1,0 --------- FNEG ------
4022 ------------------- 1,1 --------- FSQRT -----
4023 */
4024 UInt dD = dregEnc(i->ARM64in.VUnaryD.dst);
4025 UInt dN = dregEnc(i->ARM64in.VUnaryD.src);
4026 UInt b16 = 2; /* impossible */
4027 UInt b15 = 2; /* impossible */
4028 switch (i->ARM64in.VUnaryD.op) {
4029 case ARM64fpu_NEG: b16 = 1; b15 = 0; break;
4030 case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
4031 case ARM64fpu_ABS: b16 = 0; b15 = 1; break;
4032 default: break;
4033 }
4034 if (b16 < 2 && b15 < 2) {
4035 *p++ = X_3_8_5_6_5_5(X000, X11110011, (X0000 << 1) | b16,
4036 (b15 << 5) | X10000, dN, dD);
4037 goto done;
4038 }
4039 /*
4040 000, 11110 01 1,001 11,1 10000 n d FRINTI Dd, Dm (round per FPCR)
4041 */
4042 if (i->ARM64in.VUnaryD.op == ARM64fpu_RINT) {
4043 *p++ = X_3_8_5_6_5_5(X000, X11110011, X00111, X110000, dN, dD);
4044 goto done;
4045 }
4046 /*
4047 010, 11110 11 1,0000 1,1111 10 n d FRECPX Dd, Dm
4048 */
4049 if (i->ARM64in.VUnaryD.op == ARM64fpu_RECPX) {
4050 *p++ = X_3_8_5_6_5_5(X010, X11110111, X00001, X111110, dN, dD);
4051 goto done;
4052 }
4053 goto bad;
4054 }
4055 case ARM64in_VUnaryS: {
4056 /* 31 23 21 16 14 9 4
4057 000,11110 00 1,0000 0,0 10000 n d FMOV Sd, Sn (not handled)
4058 ------------------- 0,1 --------- FABS ------
4059 ------------------- 1,0 --------- FNEG ------
4060 ------------------- 1,1 --------- FSQRT -----
4061 */
4062 UInt sD = dregEnc(i->ARM64in.VUnaryS.dst);
4063 UInt sN = dregEnc(i->ARM64in.VUnaryS.src);
4064 UInt b16 = 2; /* impossible */
4065 UInt b15 = 2; /* impossible */
4066 switch (i->ARM64in.VUnaryS.op) {
4067 case ARM64fpu_NEG: b16 = 1; b15 = 0; break;
4068 case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
4069 case ARM64fpu_ABS: b16 = 0; b15 = 1; break;
4070 default: break;
4071 }
4072 if (b16 < 2 && b15 < 2) {
4073 *p++ = X_3_8_5_6_5_5(X000, X11110001, (X0000 << 1) | b16,
4074 (b15 << 5) | X10000, sN, sD);
4075 goto done;
4076 }
4077 /*
4078 000, 11110 00 1,001 11,1 10000 n d FRINTI Sd, Sm (round per FPCR)
4079 */
4080 if (i->ARM64in.VUnaryS.op == ARM64fpu_RINT) {
4081 *p++ = X_3_8_5_6_5_5(X000, X11110001, X00111, X110000, sN, sD);
4082 goto done;
4083 }
4084 /*
4085 010, 11110 10 1,0000 1,1111 10 n d FRECPX Sd, Sm
4086 */
4087 if (i->ARM64in.VUnaryS.op == ARM64fpu_RECPX) {
4088 *p++ = X_3_8_5_6_5_5(X010, X11110101, X00001, X111110, sN, sD);
4089 goto done;
4090 }
4091 goto bad;
4092 }
4093 case ARM64in_VBinD: {
4094 /* 31 23 20 15 11 9 4
4095 ---------------- 0000 ------ FMUL --------
4096 000 11110 011 m 0001 10 n d FDIV Dd,Dn,Dm
4097 ---------------- 0010 ------ FADD --------
4098 ---------------- 0011 ------ FSUB --------
4099 */
4100 UInt dD = dregEnc(i->ARM64in.VBinD.dst);
4101 UInt dN = dregEnc(i->ARM64in.VBinD.argL);
4102 UInt dM = dregEnc(i->ARM64in.VBinD.argR);
4103 UInt b1512 = 16; /* impossible */
4104 switch (i->ARM64in.VBinD.op) {
4105 case ARM64fpb_DIV: b1512 = X0001; break;
4106 case ARM64fpb_MUL: b1512 = X0000; break;
4107 case ARM64fpb_SUB: b1512 = X0011; break;
4108 case ARM64fpb_ADD: b1512 = X0010; break;
4109 default: goto bad;
4110 }
4111 vassert(b1512 < 16);
4112 *p++
4113 = X_3_8_5_6_5_5(X000, X11110011, dM, (b1512 << 2) | X10, dN, dD);
4114 goto done;
4115 }
4116 case ARM64in_VBinS: {
4117 /* 31 23 20 15 11 9 4
4118 ---------------- 0000 ------ FMUL --------
4119 000 11110 001 m 0001 10 n d FDIV Dd,Dn,Dm
4120 ---------------- 0010 ------ FADD --------
4121 ---------------- 0011 ------ FSUB --------
4122 */
4123 UInt sD = dregEnc(i->ARM64in.VBinS.dst);
4124 UInt sN = dregEnc(i->ARM64in.VBinS.argL);
4125 UInt sM = dregEnc(i->ARM64in.VBinS.argR);
4126 UInt b1512 = 16; /* impossible */
4127 switch (i->ARM64in.VBinS.op) {
4128 case ARM64fpb_DIV: b1512 = X0001; break;
4129 case ARM64fpb_MUL: b1512 = X0000; break;
4130 case ARM64fpb_SUB: b1512 = X0011; break;
4131 case ARM64fpb_ADD: b1512 = X0010; break;
4132 default: goto bad;
4133 }
4134 vassert(b1512 < 16);
4135 *p++
4136 = X_3_8_5_6_5_5(X000, X11110001, sM, (b1512 << 2) | X10, sN, sD);
4137 goto done;
4138 }
4139 case ARM64in_VCmpD: {
4140 /* 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm */
4141 UInt dN = dregEnc(i->ARM64in.VCmpD.argL);
4142 UInt dM = dregEnc(i->ARM64in.VCmpD.argR);
4143 *p++ = X_3_8_5_6_5_5(X000, X11110011, dM, X001000, dN, X00000);
4144 goto done;
4145 }
4146 case ARM64in_VCmpS: {
4147 /* 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm */
4148 UInt sN = dregEnc(i->ARM64in.VCmpS.argL);
4149 UInt sM = dregEnc(i->ARM64in.VCmpS.argR);
4150 *p++ = X_3_8_5_6_5_5(X000, X11110001, sM, X001000, sN, X00000);
4151 goto done;
4152 }
4153 case ARM64in_VFCSel: {
4154 /* 31 23 21 20 15 11 9 5
4155 000 11110 00 1 m cond 11 n d FCSEL Sd,Sn,Sm,cond
4156 000 11110 01 1 m cond 11 n d FCSEL Dd,Dn,Dm,cond
4157 */
4158 Bool isD = i->ARM64in.VFCSel.isD;
4159 UInt dd = dregEnc(i->ARM64in.VFCSel.dst);
4160 UInt nn = dregEnc(i->ARM64in.VFCSel.argL);
4161 UInt mm = dregEnc(i->ARM64in.VFCSel.argR);
4162 UInt cond = (UInt)i->ARM64in.VFCSel.cond;
4163 vassert(cond < 16);
4164 *p++ = X_3_8_5_6_5_5(X000, isD ? X11110011 : X11110001,
4165 mm, (cond << 2) | X000011, nn, dd);
4166 goto done;
4167 }
4168 case ARM64in_FPCR: {
4169 Bool toFPCR = i->ARM64in.FPCR.toFPCR;
4170 UInt iReg = iregEnc(i->ARM64in.FPCR.iReg);
4171 if (toFPCR) {
4172 /* 0xD51B44 000 Rt MSR fpcr, rT */
4173 *p++ = 0xD51B4400 | (iReg & 0x1F);
4174 goto done;
4175 }
4176 goto bad; // FPCR -> iReg case currently ATC
4177 }
4178 case ARM64in_FPSR: {
4179 Bool toFPSR = i->ARM64in.FPSR.toFPSR;
4180 UInt iReg = iregEnc(i->ARM64in.FPSR.iReg);
4181 if (toFPSR) {
4182 /* 0xD51B44 001 Rt MSR fpsr, rT */
4183 *p++ = 0xD51B4420 | (iReg & 0x1F);
4184 } else {
4185 /* 0xD53B44 001 Rt MRS rT, fpsr */
4186 *p++ = 0xD53B4420 | (iReg & 0x1F);
4187 }
4188 goto done;
4189 }
4190 case ARM64in_VBinV: {
4191 /* 31 23 20 15 9 4
4192 010 01110 11 1 m 100001 n d ADD Vd.2d, Vn.2d, Vm.2d
4193 010 01110 10 1 m 100001 n d ADD Vd.4s, Vn.4s, Vm.4s
4194 010 01110 01 1 m 100001 n d ADD Vd.8h, Vn.8h, Vm.8h
4195 010 01110 00 1 m 100001 n d ADD Vd.16b, Vn.16b, Vm.16b
4196
4197 011 01110 11 1 m 100001 n d SUB Vd.2d, Vn.2d, Vm.2d
4198 011 01110 10 1 m 100001 n d SUB Vd.4s, Vn.4s, Vm.4s
4199 011 01110 01 1 m 100001 n d SUB Vd.8h, Vn.8h, Vm.8h
4200 011 01110 00 1 m 100001 n d SUB Vd.16b, Vn.16b, Vm.16b
4201
4202 010 01110 10 1 m 100111 n d MUL Vd.4s, Vn.4s, Vm.4s
4203 010 01110 01 1 m 100111 n d MUL Vd.8h, Vn.8h, Vm.8h
4204 010 01110 00 1 m 100111 n d MUL Vd.16b, Vn.16b, Vm.16b
4205
4206 010 01110 01 1 m 110101 n d FADD Vd.2d, Vn.2d, Vm.2d
4207 010 01110 00 1 m 110101 n d FADD Vd.4s, Vn.4s, Vm.4s
4208 010 01110 11 1 m 110101 n d FSUB Vd.2d, Vn.2d, Vm.2d
4209 010 01110 10 1 m 110101 n d FSUB Vd.4s, Vn.4s, Vm.4s
4210
4211 011 01110 01 1 m 110111 n d FMUL Vd.2d, Vn.2d, Vm.2d
4212 011 01110 00 1 m 110111 n d FMUL Vd.4s, Vn.4s, Vm.4s
4213 011 01110 01 1 m 111111 n d FDIV Vd.2d, Vn.2d, Vm.2d
4214 011 01110 00 1 m 111111 n d FDIV Vd.4s, Vn.4s, Vm.4s
4215
4216 010 01110 01 1 m 111101 n d FMAX Vd.2d, Vn.2d, Vm.2d
4217 010 01110 00 1 m 111101 n d FMAX Vd.4s, Vn.4s, Vm.4s
4218 010 01110 11 1 m 111101 n d FMIN Vd.2d, Vn.2d, Vm.2d
4219 010 01110 10 1 m 111101 n d FMIN Vd.4s, Vn.4s, Vm.4s
4220
4221 011 01110 10 1 m 011001 n d UMAX Vd.4s, Vn.4s, Vm.4s
4222 011 01110 01 1 m 011001 n d UMAX Vd.8h, Vn.8h, Vm.8h
4223 011 01110 00 1 m 011001 n d UMAX Vd.16b, Vn.16b, Vm.16b
4224
4225 011 01110 10 1 m 011011 n d UMIN Vd.4s, Vn.4s, Vm.4s
4226 011 01110 01 1 m 011011 n d UMIN Vd.8h, Vn.8h, Vm.8h
4227 011 01110 00 1 m 011011 n d UMIN Vd.16b, Vn.16b, Vm.16b
4228
4229 010 01110 10 1 m 011001 n d SMAX Vd.4s, Vn.4s, Vm.4s
4230 010 01110 01 1 m 011001 n d SMAX Vd.8h, Vn.8h, Vm.8h
4231 010 01110 00 1 m 011001 n d SMAX Vd.16b, Vn.16b, Vm.16b
4232
4233 010 01110 10 1 m 011011 n d SMIN Vd.4s, Vn.4s, Vm.4s
4234 010 01110 01 1 m 011011 n d SMIN Vd.8h, Vn.8h, Vm.8h
4235 010 01110 00 1 m 011011 n d SMIN Vd.16b, Vn.16b, Vm.16b
4236
4237 010 01110 00 1 m 000111 n d AND Vd, Vn, Vm
4238 010 01110 10 1 m 000111 n d ORR Vd, Vn, Vm
4239 011 01110 00 1 m 000111 n d EOR Vd, Vn, Vm
4240
4241 011 01110 11 1 m 100011 n d CMEQ Vd.2d, Vn.2d, Vm.2d
4242 011 01110 10 1 m 100011 n d CMEQ Vd.4s, Vn.4s, Vm.4s
4243 011 01110 01 1 m 100011 n d CMEQ Vd.8h, Vn.8h, Vm.8h
4244 011 01110 00 1 m 100011 n d CMEQ Vd.16b, Vn.16b, Vm.16b
4245
4246 011 01110 11 1 m 001101 n d CMHI Vd.2d, Vn.2d, Vm.2d
4247 011 01110 10 1 m 001101 n d CMHI Vd.4s, Vn.4s, Vm.4s
4248 011 01110 01 1 m 001101 n d CMHI Vd.8h, Vn.8h, Vm.8h
4249 011 01110 00 1 m 001101 n d CMHI Vd.16b, Vn.16b, Vm.16b
4250
4251 010 01110 11 1 m 001101 n d CMGT Vd.2d, Vn.2d, Vm.2d
4252 010 01110 10 1 m 001101 n d CMGT Vd.4s, Vn.4s, Vm.4s
4253 010 01110 01 1 m 001101 n d CMGT Vd.8h, Vn.8h, Vm.8h
4254 010 01110 00 1 m 001101 n d CMGT Vd.16b, Vn.16b, Vm.16b
4255
4256 010 01110 01 1 m 111001 n d FCMEQ Vd.2d, Vn.2d, Vm.2d
4257 010 01110 00 1 m 111001 n d FCMEQ Vd.4s, Vn.4s, Vm.4s
4258
4259 011 01110 01 1 m 111001 n d FCMGE Vd.2d, Vn.2d, Vm.2d
4260 011 01110 00 1 m 111001 n d FCMGE Vd.4s, Vn.4s, Vm.4s
4261
4262 011 01110 11 1 m 111001 n d FCMGT Vd.2d, Vn.2d, Vm.2d
4263 011 01110 10 1 m 111001 n d FCMGT Vd.4s, Vn.4s, Vm.4s
4264
4265 010 01110 00 0 m 000000 n d TBL Vd.16b, {Vn.16b}, Vm.16b
4266
4267 010 01110 11 0 m 000110 n d UZP1 Vd.2d, Vn.2d, Vm.2d
4268 010 01110 10 0 m 000110 n d UZP1 Vd.4s, Vn.4s, Vm.4s
4269 010 01110 01 0 m 000110 n d UZP1 Vd.8h, Vn.8h, Vm.8h
4270 010 01110 00 0 m 000110 n d UZP1 Vd.16b, Vn.16b, Vm.16b
4271
4272 010 01110 11 0 m 010110 n d UZP2 Vd.2d, Vn.2d, Vm.2d
4273 010 01110 10 0 m 010110 n d UZP2 Vd.4s, Vn.4s, Vm.4s
4274 010 01110 01 0 m 010110 n d UZP2 Vd.8h, Vn.8h, Vm.8h
4275 010 01110 00 0 m 010110 n d UZP2 Vd.16b, Vn.16b, Vm.16b
4276
4277 010 01110 10 0 m 001110 n d ZIP1 Vd.4s, Vn.4s, Vm.4s
4278 010 01110 01 0 m 001110 n d ZIP1 Vd.8h, Vn.8h, Vm.8h
4279 010 01110 10 0 m 001110 n d ZIP1 Vd.16b, Vn.16b, Vm.16b
4280
4281 010 01110 10 0 m 011110 n d ZIP2 Vd.4s, Vn.4s, Vm.4s
4282 010 01110 01 0 m 011110 n d ZIP2 Vd.8h, Vn.8h, Vm.8h
4283 010 01110 10 0 m 011110 n d ZIP2 Vd.16b, Vn.16b, Vm.16b
4284
4285 011 01110 00 1 m 100111 n d PMUL Vd.16b, Vn.16b, Vm.16b
4286
4287 000 01110 00 1 m 111000 n d PMULL Vd.8h, Vn.8b, Vm.8b
4288
4289 001 01110 10 1 m 110000 n d UMULL Vd.2d, Vn.2s, Vm.2s
4290 001 01110 01 1 m 110000 n d UMULL Vd.4s, Vn.4h, Vm.4h
4291 001 01110 00 1 m 110000 n d UMULL Vd.8h, Vn.8b, Vm.8b
4292
4293 000 01110 10 1 m 110000 n d SMULL Vd.2d, Vn.2s, Vm.2s
4294 000 01110 01 1 m 110000 n d SMULL Vd.4s, Vn.4h, Vm.4h
4295 000 01110 00 1 m 110000 n d SMULL Vd.8h, Vn.8b, Vm.8b
4296
4297 010 01110 11 1 m 000011 n d SQADD Vd.2d, Vn.2d, Vm.2d
4298 010 01110 10 1 m 000011 n d SQADD Vd.4s, Vn.4s, Vm.4s
4299 010 01110 01 1 m 000011 n d SQADD Vd.8h, Vn.8h, Vm.8h
4300 010 01110 00 1 m 000011 n d SQADD Vd.16b, Vn.16b, Vm.16b
4301
4302 011 01110 11 1 m 000011 n d UQADD Vd.2d, Vn.2d, Vm.2d
4303 011 01110 10 1 m 000011 n d UQADD Vd.4s, Vn.4s, Vm.4s
4304 011 01110 01 1 m 000011 n d UQADD Vd.8h, Vn.8h, Vm.8h
4305 011 01110 00 1 m 000011 n d UQADD Vd.16b, Vn.16b, Vm.16b
4306
4307 010 01110 11 1 m 001011 n d SQSUB Vd.2d, Vn.2d, Vm.2d
4308 010 01110 10 1 m 001011 n d SQSUB Vd.4s, Vn.4s, Vm.4s
4309 010 01110 01 1 m 001011 n d SQSUB Vd.8h, Vn.8h, Vm.8h
4310 010 01110 00 1 m 001011 n d SQSUB Vd.16b, Vn.16b, Vm.16b
4311
4312 011 01110 11 1 m 001011 n d UQSUB Vd.2d, Vn.2d, Vm.2d
4313 011 01110 10 1 m 001011 n d UQSUB Vd.4s, Vn.4s, Vm.4s
4314 011 01110 01 1 m 001011 n d UQSUB Vd.8h, Vn.8h, Vm.8h
4315 011 01110 00 1 m 001011 n d UQSUB Vd.16b, Vn.16b, Vm.16b
4316
4317 000 01110 10 1 m 110100 n d SQDMULL Vd.2d, Vn.2s, Vm.2s
4318 000 01110 01 1 m 110100 n d SQDMULL Vd.4s, Vn.4h, Vm.4h
4319
4320 010 01110 10 1 m 101101 n d SQDMULH Vd.4s, Vn.4s, Vm.4s
4321 010 01110 01 1 m 101101 n d SQDMULH Vd.8h, Vn.8h, Vm.8h
4322 011 01110 10 1 m 101101 n d SQRDMULH Vd.4s, Vn.4s, Vm.4s
4323 011 01110 10 1 m 101101 n d SQRDMULH Vd.8h, Vn.8h, Vm.8h
4324
4325 010 01110 sz 1 m 010011 n d SQSHL@sz Vd, Vn, Vm
4326 010 01110 sz 1 m 010111 n d SQRSHL@sz Vd, Vn, Vm
4327 011 01110 sz 1 m 010011 n d UQSHL@sz Vd, Vn, Vm
4328 011 01110 sz 1 m 010111 n d URQSHL@sz Vd, Vn, Vm
4329
4330 010 01110 sz 1 m 010001 n d SSHL@sz Vd, Vn, Vm
4331 010 01110 sz 1 m 010101 n d SRSHL@sz Vd, Vn, Vm
4332 011 01110 sz 1 m 010001 n d USHL@sz Vd, Vn, Vm
4333 011 01110 sz 1 m 010101 n d URSHL@sz Vd, Vn, Vm
4334
4335 010 01110 01 1 m 111111 n d FRECPS Vd.2d, Vn.2d, Vm.2d
4336 010 01110 00 1 m 111111 n d FRECPS Vd.4s, Vn.4s, Vm.4s
4337 010 01110 11 1 m 111111 n d FRSQRTS Vd.2d, Vn.2d, Vm.2d
4338 010 01110 10 1 m 111111 n d FRSQRTS Vd.4s, Vn.4s, Vm.4s
4339 */
4340 UInt vD = qregEnc(i->ARM64in.VBinV.dst);
4341 UInt vN = qregEnc(i->ARM64in.VBinV.argL);
4342 UInt vM = qregEnc(i->ARM64in.VBinV.argR);
4343 switch (i->ARM64in.VBinV.op) {
4344 case ARM64vecb_ADD64x2:
4345 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X100001, vN, vD);
4346 break;
4347 case ARM64vecb_ADD32x4:
4348 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100001, vN, vD);
4349 break;
4350 case ARM64vecb_ADD16x8:
4351 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100001, vN, vD);
4352 break;
4353 case ARM64vecb_ADD8x16:
4354 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100001, vN, vD);
4355 break;
4356 case ARM64vecb_SUB64x2:
4357 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100001, vN, vD);
4358 break;
4359 case ARM64vecb_SUB32x4:
4360 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100001, vN, vD);
4361 break;
4362 case ARM64vecb_SUB16x8:
4363 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100001, vN, vD);
4364 break;
4365 case ARM64vecb_SUB8x16:
4366 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100001, vN, vD);
4367 break;
4368 case ARM64vecb_MUL32x4:
4369 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100111, vN, vD);
4370 break;
4371 case ARM64vecb_MUL16x8:
4372 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100111, vN, vD);
4373 break;
4374 case ARM64vecb_MUL8x16:
4375 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100111, vN, vD);
4376 break;
4377 case ARM64vecb_FADD64x2:
4378 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X110101, vN, vD);
4379 break;
4380 case ARM64vecb_FADD32x4:
4381 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X110101, vN, vD);
4382 break;
4383 case ARM64vecb_FSUB64x2:
4384 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X110101, vN, vD);
4385 break;
4386 case ARM64vecb_FSUB32x4:
4387 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X110101, vN, vD);
4388 break;
4389 case ARM64vecb_FMUL64x2:
4390 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X110111, vN, vD);
4391 break;
4392 case ARM64vecb_FMUL32x4:
4393 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X110111, vN, vD);
4394 break;
4395 case ARM64vecb_FDIV64x2:
4396 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111111, vN, vD);
4397 break;
4398 case ARM64vecb_FDIV32x4:
4399 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111111, vN, vD);
4400 break;
4401
4402 case ARM64vecb_FMAX64x2:
4403 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111101, vN, vD);
4404 break;
4405 case ARM64vecb_FMAX32x4:
4406 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111101, vN, vD);
4407 break;
4408 case ARM64vecb_FMIN64x2:
4409 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X111101, vN, vD);
4410 break;
4411 case ARM64vecb_FMIN32x4:
4412 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X111101, vN, vD);
4413 break;
4414
4415 case ARM64vecb_UMAX32x4:
4416 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011001, vN, vD);
4417 break;
4418 case ARM64vecb_UMAX16x8:
4419 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011001, vN, vD);
4420 break;
4421 case ARM64vecb_UMAX8x16:
4422 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011001, vN, vD);
4423 break;
4424
4425 case ARM64vecb_UMIN32x4:
4426 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011011, vN, vD);
4427 break;
4428 case ARM64vecb_UMIN16x8:
4429 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011011, vN, vD);
4430 break;
4431 case ARM64vecb_UMIN8x16:
4432 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011011, vN, vD);
4433 break;
4434
4435 case ARM64vecb_SMAX32x4:
4436 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011001, vN, vD);
4437 break;
4438 case ARM64vecb_SMAX16x8:
4439 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011001, vN, vD);
4440 break;
4441 case ARM64vecb_SMAX8x16:
4442 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011001, vN, vD);
4443 break;
4444
4445 case ARM64vecb_SMIN32x4:
4446 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011011, vN, vD);
4447 break;
4448 case ARM64vecb_SMIN16x8:
4449 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011011, vN, vD);
4450 break;
4451 case ARM64vecb_SMIN8x16:
4452 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011011, vN, vD);
4453 break;
4454
4455 case ARM64vecb_AND:
4456 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000111, vN, vD);
4457 break;
4458 case ARM64vecb_ORR:
4459 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000111, vN, vD);
4460 break;
4461 case ARM64vecb_XOR:
4462 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000111, vN, vD);
4463 break;
4464
4465 case ARM64vecb_CMEQ64x2:
4466 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100011, vN, vD);
4467 break;
4468 case ARM64vecb_CMEQ32x4:
4469 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100011, vN, vD);
4470 break;
4471 case ARM64vecb_CMEQ16x8:
4472 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100011, vN, vD);
4473 break;
4474 case ARM64vecb_CMEQ8x16:
4475 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100011, vN, vD);
4476 break;
4477
4478 case ARM64vecb_CMHI64x2:
4479 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X001101, vN, vD);
4480 break;
4481 case ARM64vecb_CMHI32x4:
4482 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X001101, vN, vD);
4483 break;
4484 case ARM64vecb_CMHI16x8:
4485 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X001101, vN, vD);
4486 break;
4487 case ARM64vecb_CMHI8x16:
4488 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X001101, vN, vD);
4489 break;
4490
4491 case ARM64vecb_CMGT64x2:
4492 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X001101, vN, vD);
4493 break;
4494 case ARM64vecb_CMGT32x4:
4495 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X001101, vN, vD);
4496 break;
4497 case ARM64vecb_CMGT16x8:
4498 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X001101, vN, vD);
4499 break;
4500 case ARM64vecb_CMGT8x16:
4501 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X001101, vN, vD);
4502 break;
4503
4504 case ARM64vecb_FCMEQ64x2:
4505 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111001, vN, vD);
4506 break;
4507 case ARM64vecb_FCMEQ32x4:
4508 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111001, vN, vD);
4509 break;
4510
4511 case ARM64vecb_FCMGE64x2:
4512 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111001, vN, vD);
4513 break;
4514 case ARM64vecb_FCMGE32x4:
4515 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111001, vN, vD);
4516 break;
4517
4518 case ARM64vecb_FCMGT64x2:
4519 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X111001, vN, vD);
4520 break;
4521 case ARM64vecb_FCMGT32x4:
4522 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X111001, vN, vD);
4523 break;
4524
4525 case ARM64vecb_TBL1:
4526 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000000, vN, vD);
4527 break;
4528
4529 case ARM64vecb_UZP164x2:
4530 *p++ = X_3_8_5_6_5_5(X010, X01110110, vM, X000110, vN, vD);
4531 break;
4532 case ARM64vecb_UZP132x4:
4533 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X000110, vN, vD);
4534 break;
4535 case ARM64vecb_UZP116x8:
4536 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X000110, vN, vD);
4537 break;
4538 case ARM64vecb_UZP18x16:
4539 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000110, vN, vD);
4540 break;
4541
4542 case ARM64vecb_UZP264x2:
4543 *p++ = X_3_8_5_6_5_5(X010, X01110110, vM, X010110, vN, vD);
4544 break;
4545 case ARM64vecb_UZP232x4:
4546 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X010110, vN, vD);
4547 break;
4548 case ARM64vecb_UZP216x8:
4549 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X010110, vN, vD);
4550 break;
4551 case ARM64vecb_UZP28x16:
4552 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X010110, vN, vD);
4553 break;
4554
4555 case ARM64vecb_ZIP132x4:
4556 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X001110, vN, vD);
4557 break;
4558 case ARM64vecb_ZIP116x8:
4559 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X001110, vN, vD);
4560 break;
4561 case ARM64vecb_ZIP18x16:
4562 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X001110, vN, vD);
4563 break;
4564
4565 case ARM64vecb_ZIP232x4:
4566 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X011110, vN, vD);
4567 break;
4568 case ARM64vecb_ZIP216x8:
4569 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X011110, vN, vD);
4570 break;
4571 case ARM64vecb_ZIP28x16:
4572 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X011110, vN, vD);
4573 break;
4574
4575 case ARM64vecb_PMUL8x16:
4576 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100111, vN, vD);
4577 break;
4578
4579 case ARM64vecb_PMULL8x8:
4580 *p++ = X_3_8_5_6_5_5(X000, X01110001, vM, X111000, vN, vD);
4581 break;
4582
4583 case ARM64vecb_UMULL2DSS:
4584 *p++ = X_3_8_5_6_5_5(X001, X01110101, vM, X110000, vN, vD);
4585 break;
4586 case ARM64vecb_UMULL4SHH:
4587 *p++ = X_3_8_5_6_5_5(X001, X01110011, vM, X110000, vN, vD);
4588 break;
4589 case ARM64vecb_UMULL8HBB:
4590 *p++ = X_3_8_5_6_5_5(X001, X01110001, vM, X110000, vN, vD);
4591 break;
4592
4593 case ARM64vecb_SMULL2DSS:
4594 *p++ = X_3_8_5_6_5_5(X000, X01110101, vM, X110000, vN, vD);
4595 break;
4596 case ARM64vecb_SMULL4SHH:
4597 *p++ = X_3_8_5_6_5_5(X000, X01110011, vM, X110000, vN, vD);
4598 break;
4599 case ARM64vecb_SMULL8HBB:
4600 *p++ = X_3_8_5_6_5_5(X000, X01110001, vM, X110000, vN, vD);
4601 break;
4602
4603 case ARM64vecb_SQADD64x2:
4604 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X000011, vN, vD);
4605 break;
4606 case ARM64vecb_SQADD32x4:
4607 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000011, vN, vD);
4608 break;
4609 case ARM64vecb_SQADD16x8:
4610 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X000011, vN, vD);
4611 break;
4612 case ARM64vecb_SQADD8x16:
4613 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000011, vN, vD);
4614 break;
4615
4616 case ARM64vecb_UQADD64x2:
4617 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X000011, vN, vD);
4618 break;
4619 case ARM64vecb_UQADD32x4:
4620 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X000011, vN, vD);
4621 break;
4622 case ARM64vecb_UQADD16x8:
4623 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X000011, vN, vD);
4624 break;
4625 case ARM64vecb_UQADD8x16:
4626 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000011, vN, vD);
4627 break;
4628
4629 case ARM64vecb_SQSUB64x2:
4630 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X001011, vN, vD);
4631 break;
4632 case ARM64vecb_SQSUB32x4:
4633 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X001011, vN, vD);
4634 break;
4635 case ARM64vecb_SQSUB16x8:
4636 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X001011, vN, vD);
4637 break;
4638 case ARM64vecb_SQSUB8x16:
4639 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X001011, vN, vD);
4640 break;
4641
4642 case ARM64vecb_UQSUB64x2:
4643 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X001011, vN, vD);
4644 break;
4645 case ARM64vecb_UQSUB32x4:
4646 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X001011, vN, vD);
4647 break;
4648 case ARM64vecb_UQSUB16x8:
4649 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X001011, vN, vD);
4650 break;
4651 case ARM64vecb_UQSUB8x16:
4652 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X001011, vN, vD);
4653 break;
4654
4655 case ARM64vecb_SQDMULL2DSS:
4656 *p++ = X_3_8_5_6_5_5(X000, X01110101, vM, X110100, vN, vD);
4657 break;
4658 case ARM64vecb_SQDMULL4SHH:
4659 *p++ = X_3_8_5_6_5_5(X000, X01110011, vM, X110100, vN, vD);
4660 break;
4661
4662 case ARM64vecb_SQDMULH32x4:
4663 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X101101, vN, vD);
4664 break;
4665 case ARM64vecb_SQDMULH16x8:
4666 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X101101, vN, vD);
4667 break;
4668 case ARM64vecb_SQRDMULH32x4:
4669 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X101101, vN, vD);
4670 break;
4671 case ARM64vecb_SQRDMULH16x8:
4672 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X101101, vN, vD);
4673 break;
4674
4675 case ARM64vecb_SQSHL64x2:
4676 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010011, vN, vD);
4677 break;
4678 case ARM64vecb_SQSHL32x4:
4679 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010011, vN, vD);
4680 break;
4681 case ARM64vecb_SQSHL16x8:
4682 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010011, vN, vD);
4683 break;
4684 case ARM64vecb_SQSHL8x16:
4685 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010011, vN, vD);
4686 break;
4687
4688 case ARM64vecb_SQRSHL64x2:
4689 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010111, vN, vD);
4690 break;
4691 case ARM64vecb_SQRSHL32x4:
4692 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010111, vN, vD);
4693 break;
4694 case ARM64vecb_SQRSHL16x8:
4695 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010111, vN, vD);
4696 break;
4697 case ARM64vecb_SQRSHL8x16:
4698 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010111, vN, vD);
4699 break;
4700
4701 case ARM64vecb_UQSHL64x2:
4702 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010011, vN, vD);
4703 break;
4704 case ARM64vecb_UQSHL32x4:
4705 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010011, vN, vD);
4706 break;
4707 case ARM64vecb_UQSHL16x8:
4708 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010011, vN, vD);
4709 break;
4710 case ARM64vecb_UQSHL8x16:
4711 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010011, vN, vD);
4712 break;
4713
4714 case ARM64vecb_UQRSHL64x2:
4715 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010111, vN, vD);
4716 break;
4717 case ARM64vecb_UQRSHL32x4:
4718 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010111, vN, vD);
4719 break;
4720 case ARM64vecb_UQRSHL16x8:
4721 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010111, vN, vD);
4722 break;
4723 case ARM64vecb_UQRSHL8x16:
4724 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010111, vN, vD);
4725 break;
4726
4727 case ARM64vecb_SSHL64x2:
4728 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010001, vN, vD);
4729 break;
4730 case ARM64vecb_SSHL32x4:
4731 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010001, vN, vD);
4732 break;
4733 case ARM64vecb_SSHL16x8:
4734 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010001, vN, vD);
4735 break;
4736 case ARM64vecb_SSHL8x16:
4737 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010001, vN, vD);
4738 break;
4739
4740 case ARM64vecb_SRSHL64x2:
4741 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010101, vN, vD);
4742 break;
4743 case ARM64vecb_SRSHL32x4:
4744 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010101, vN, vD);
4745 break;
4746 case ARM64vecb_SRSHL16x8:
4747 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010101, vN, vD);
4748 break;
4749 case ARM64vecb_SRSHL8x16:
4750 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010101, vN, vD);
4751 break;
4752
4753 case ARM64vecb_USHL64x2:
4754 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010001, vN, vD);
4755 break;
4756 case ARM64vecb_USHL32x4:
4757 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010001, vN, vD);
4758 break;
4759 case ARM64vecb_USHL16x8:
4760 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010001, vN, vD);
4761 break;
4762 case ARM64vecb_USHL8x16:
4763 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010001, vN, vD);
4764 break;
4765
4766 case ARM64vecb_URSHL64x2:
4767 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010101, vN, vD);
4768 break;
4769 case ARM64vecb_URSHL32x4:
4770 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010101, vN, vD);
4771 break;
4772 case ARM64vecb_URSHL16x8:
4773 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010101, vN, vD);
4774 break;
4775 case ARM64vecb_URSHL8x16:
4776 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010101, vN, vD);
4777 break;
4778
4779 case ARM64vecb_FRECPS64x2:
4780 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111111, vN, vD);
4781 break;
4782 case ARM64vecb_FRECPS32x4:
4783 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111111, vN, vD);
4784 break;
4785 case ARM64vecb_FRSQRTS64x2:
4786 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X111111, vN, vD);
4787 break;
4788 case ARM64vecb_FRSQRTS32x4:
4789 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X111111, vN, vD);
4790 break;
4791
4792 default:
4793 goto bad;
4794 }
4795 goto done;
4796 }
4797 case ARM64in_VModifyV: {
4798 /* 31 23 20 15 9 4
4799 010 01110 sz 1 00000 001110 n d SUQADD@sz Vd, Vn
4800 011 01110 sz 1 00000 001110 n d USQADD@sz Vd, Vn
4801 */
4802 UInt vD = qregEnc(i->ARM64in.VModifyV.mod);
4803 UInt vN = qregEnc(i->ARM64in.VModifyV.arg);
4804 switch (i->ARM64in.VModifyV.op) {
4805 case ARM64vecmo_SUQADD64x2:
4806 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X001110, vN, vD);
4807 break;
4808 case ARM64vecmo_SUQADD32x4:
4809 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X001110, vN, vD);
4810 break;
4811 case ARM64vecmo_SUQADD16x8:
4812 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X001110, vN, vD);
4813 break;
4814 case ARM64vecmo_SUQADD8x16:
4815 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X001110, vN, vD);
4816 break;
4817 case ARM64vecmo_USQADD64x2:
4818 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X001110, vN, vD);
4819 break;
4820 case ARM64vecmo_USQADD32x4:
4821 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X001110, vN, vD);
4822 break;
4823 case ARM64vecmo_USQADD16x8:
4824 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X001110, vN, vD);
4825 break;
4826 case ARM64vecmo_USQADD8x16:
4827 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X001110, vN, vD);
4828 break;
4829 default:
4830 goto bad;
4831 }
4832 goto done;
4833 }
4834 case ARM64in_VUnaryV: {
4835 /* 31 23 20 15 9 4
4836 010 01110 11 1 00000 111110 n d FABS Vd.2d, Vn.2d
4837 010 01110 10 1 00000 111110 n d FABS Vd.4s, Vn.4s
4838 011 01110 11 1 00000 111110 n d FNEG Vd.2d, Vn.2d
4839 011 01110 10 1 00000 111110 n d FNEG Vd.4s, Vn.4s
4840 011 01110 00 1 00000 010110 n d NOT Vd.16b, Vn.16b
4841
4842 010 01110 11 1 00000 101110 n d ABS Vd.2d, Vn.2d
4843 010 01110 10 1 00000 101110 n d ABS Vd.4s, Vn.4s
4844 010 01110 01 1 00000 101110 n d ABS Vd.8h, Vn.8h
4845 010 01110 00 1 00000 101110 n d ABS Vd.16b, Vn.16b
4846
4847 010 01110 10 1 00000 010010 n d CLS Vd.4s, Vn.4s
4848 010 01110 01 1 00000 010010 n d CLS Vd.8h, Vn.8h
4849 010 01110 00 1 00000 010010 n d CLS Vd.16b, Vn.16b
4850
4851 011 01110 10 1 00000 010010 n d CLZ Vd.4s, Vn.4s
4852 011 01110 01 1 00000 010010 n d CLZ Vd.8h, Vn.8h
4853 011 01110 00 1 00000 010010 n d CLZ Vd.16b, Vn.16b
4854
4855 010 01110 00 1 00000 010110 n d CNT Vd.16b, Vn.16b
4856
4857 011 01110 01 1 00000 010110 n d RBIT Vd.16b, Vn.16b
4858 010 01110 00 1 00000 000110 n d REV16 Vd.16b, Vn.16b
4859 011 01110 00 1 00000 000010 n d REV32 Vd.16b, Vn.16b
4860 011 01110 01 1 00000 000010 n d REV32 Vd.8h, Vn.8h
4861
4862 010 01110 00 1 00000 000010 n d REV64 Vd.16b, Vn.16b
4863 010 01110 01 1 00000 000010 n d REV64 Vd.8h, Vn.8h
4864 010 01110 10 1 00000 000010 n d REV64 Vd.4s, Vn.4s
4865
4866 010 01110 10 1 00001 110010 n d URECPE Vd.4s, Vn.4s
4867 011 01110 10 1 00001 110010 n d URSQRTE Vd.4s, Vn.4s
4868
4869 010 01110 11 1 00001 110110 n d FRECPE Vd.2d, Vn.2d
4870 010 01110 10 1 00001 110110 n d FRECPE Vd.4s, Vn.4s
4871
4872 011 01110 11 1 00001 110110 n d FRECPE Vd.2d, Vn.2d
4873 011 01110 10 1 00001 110110 n d FRECPE Vd.4s, Vn.4s
4874
4875 011 01110 11 1 00001 111110 n d FSQRT Vd.2d, Vn.2d
4876 011 01110 10 1 00001 111110 n d FSQRT Vd.4s, Vn.4s
4877 */
4878 UInt vD = qregEnc(i->ARM64in.VUnaryV.dst);
4879 UInt vN = qregEnc(i->ARM64in.VUnaryV.arg);
4880 switch (i->ARM64in.VUnaryV.op) {
4881 case ARM64vecu_FABS64x2:
4882 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X111110, vN, vD);
4883 break;
4884 case ARM64vecu_FABS32x4:
4885 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X111110, vN, vD);
4886 break;
4887 case ARM64vecu_FNEG64x2:
4888 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X111110, vN, vD);
4889 break;
4890 case ARM64vecu_FNEG32x4:
4891 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X111110, vN, vD);
4892 break;
4893 case ARM64vecu_NOT:
4894 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010110, vN, vD);
4895 break;
4896 case ARM64vecu_ABS64x2:
4897 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X101110, vN, vD);
4898 break;
4899 case ARM64vecu_ABS32x4:
4900 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X101110, vN, vD);
4901 break;
4902 case ARM64vecu_ABS16x8:
4903 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X101110, vN, vD);
4904 break;
4905 case ARM64vecu_ABS8x16:
4906 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X101110, vN, vD);
4907 break;
4908 case ARM64vecu_CLS32x4:
4909 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X010010, vN, vD);
4910 break;
4911 case ARM64vecu_CLS16x8:
4912 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X010010, vN, vD);
4913 break;
4914 case ARM64vecu_CLS8x16:
4915 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010010, vN, vD);
4916 break;
4917 case ARM64vecu_CLZ32x4:
4918 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X010010, vN, vD);
4919 break;
4920 case ARM64vecu_CLZ16x8:
4921 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X010010, vN, vD);
4922 break;
4923 case ARM64vecu_CLZ8x16:
4924 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010010, vN, vD);
4925 break;
4926 case ARM64vecu_CNT8x16:
4927 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010110, vN, vD);
4928 break;
4929 case ARM64vecu_RBIT:
4930 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X010110, vN, vD);
4931 break;
4932 case ARM64vecu_REV1616B:
4933 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X000110, vN, vD);
4934 break;
4935 case ARM64vecu_REV3216B:
4936 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X000010, vN, vD);
4937 break;
4938 case ARM64vecu_REV328H:
4939 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X000010, vN, vD);
4940 break;
4941 case ARM64vecu_REV6416B:
4942 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X000010, vN, vD);
4943 break;
4944 case ARM64vecu_REV648H:
4945 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X000010, vN, vD);
4946 break;
4947 case ARM64vecu_REV644S:
4948 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X000010, vN, vD);
4949 break;
4950 case ARM64vecu_URECPE32x4:
4951 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110010, vN, vD);
4952 break;
4953 case ARM64vecu_URSQRTE32x4:
4954 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110010, vN, vD);
4955 break;
4956 case ARM64vecu_FRECPE64x2:
4957 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00001, X110110, vN, vD);
4958 break;
4959 case ARM64vecu_FRECPE32x4:
4960 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110110, vN, vD);
4961 break;
4962 case ARM64vecu_FRSQRTE64x2:
4963 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00001, X110110, vN, vD);
4964 break;
4965 case ARM64vecu_FRSQRTE32x4:
4966 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110110, vN, vD);
4967 break;
4968 case ARM64vecu_FSQRT64x2:
4969 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00001, X111110, vN, vD);
4970 break;
4971 case ARM64vecu_FSQRT32x4:
4972 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X111110, vN, vD);
4973 break;
4974 default:
4975 goto bad;
4976 }
4977 goto done;
4978 }
4979 case ARM64in_VNarrowV: {
4980 /* 31 23 21 15 9 4
4981 000 01110 00 1,00001 001010 n d XTN Vd.8b, Vn.8h
4982 000 01110 01 1,00001 001010 n d XTN Vd.4h, Vn.4s
4983 000 01110 10 1,00001 001010 n d XTN Vd.2s, Vn.2d
4984
4985 001 01110 00 1,00001 001010 n d SQXTUN Vd.8b, Vn.8h
4986 001 01110 01 1,00001 001010 n d SQXTUN Vd.4h, Vn.4s
4987 001 01110 10 1,00001 001010 n d SQXTUN Vd.2s, Vn.2d
4988
4989 000 01110 00 1,00001 010010 n d SQXTN Vd.8b, Vn.8h
4990 000 01110 01 1,00001 010010 n d SQXTN Vd.4h, Vn.4s
4991 000 01110 10 1,00001 010010 n d SQXTN Vd.2s, Vn.2d
4992
4993 001 01110 00 1,00001 010010 n d UQXTN Vd.8b, Vn.8h
4994 001 01110 01 1,00001 010010 n d UQXTN Vd.4h, Vn.4s
4995 001 01110 10 1,00001 010010 n d UQXTN Vd.2s, Vn.2d
4996 */
4997 UInt vD = qregEnc(i->ARM64in.VNarrowV.dst);
4998 UInt vN = qregEnc(i->ARM64in.VNarrowV.src);
4999 UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
5000 vassert(dszBlg2 >= 0 && dszBlg2 <= 2);
5001 switch (i->ARM64in.VNarrowV.op) {
5002 case ARM64vecna_XTN:
5003 *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
5004 X00001, X001010, vN, vD);
5005 goto done;
5006 case ARM64vecna_SQXTUN:
5007 *p++ = X_3_8_5_6_5_5(X001, X01110001 | (dszBlg2 << 1),
5008 X00001, X001010, vN, vD);
5009 goto done;
5010 case ARM64vecna_SQXTN:
5011 *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
5012 X00001, X010010, vN, vD);
5013 goto done;
5014 case ARM64vecna_UQXTN:
5015 *p++ = X_3_8_5_6_5_5(X001, X01110001 | (dszBlg2 << 1),
5016 X00001, X010010, vN, vD);
5017 goto done;
5018 default:
5019 break;
5020 }
5021 goto bad;
5022 }
5023 case ARM64in_VShiftImmV: {
5024 /*
5025 011 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #sh
5026 010 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #sh
5027
5028 001 011110 immh immb 100101 n d UQSHRN ,,#sh
5029 000 011110 immh immb 100101 n d SQSHRN ,,#sh
5030 001 011110 immh immb 100001 n d SQSHRUN ,,#sh
5031
5032 001 011110 immh immb 100111 n d UQRSHRN ,,#sh
5033 000 011110 immh immb 100111 n d SQRSHRN ,,#sh
5034 001 011110 immh immb 100011 n d SQRSHRUN ,,#sh
5035
5036 where immh:immb
5037 = case T of
5038 2d | sh in 1..64 -> let xxxxxx = 64-sh in 1xxx:xxx
5039 4s | sh in 1..32 -> let xxxxx = 32-sh in 01xx:xxx
5040 8h | sh in 1..16 -> let xxxx = 16-sh in 001x:xxx
5041 16b | sh in 1..8 -> let xxx = 8-sh in 0001:xxx
5042
5043 010 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #sh
5044
5045 011 011110 immh immb 011101 n d UQSHL Vd.T, Vn.T, #sh
5046 010 011110 immh immb 011101 n d SQSHL Vd.T, Vn.T, #sh
5047 011 011110 immh immb 011001 n d SQSHLU Vd.T, Vn.T, #sh
5048
5049 where immh:immb
5050 = case T of
5051 2d | sh in 0..63 -> let xxxxxx = sh in 1xxx:xxx
5052 4s | sh in 0..31 -> let xxxxx = sh in 01xx:xxx
5053 8h | sh in 0..15 -> let xxxx = sh in 001x:xxx
5054 16b | sh in 0..7 -> let xxx = sh in 0001:xxx
5055 */
5056 UInt vD = qregEnc(i->ARM64in.VShiftImmV.dst);
5057 UInt vN = qregEnc(i->ARM64in.VShiftImmV.src);
5058 UInt sh = i->ARM64in.VShiftImmV.amt;
5059 UInt tmpl = 0; /* invalid */
5060
5061 const UInt tmpl_USHR
5062 = X_3_6_7_6_5_5(X011, X011110, 0, X000001, vN, vD);
5063 const UInt tmpl_SSHR
5064 = X_3_6_7_6_5_5(X010, X011110, 0, X000001, vN, vD);
5065
5066 const UInt tmpl_UQSHRN
5067 = X_3_6_7_6_5_5(X001, X011110, 0, X100101, vN, vD);
5068 const UInt tmpl_SQSHRN
5069 = X_3_6_7_6_5_5(X000, X011110, 0, X100101, vN, vD);
5070 const UInt tmpl_SQSHRUN
5071 = X_3_6_7_6_5_5(X001, X011110, 0, X100001, vN, vD);
5072
5073 const UInt tmpl_UQRSHRN
5074 = X_3_6_7_6_5_5(X001, X011110, 0, X100111, vN, vD);
5075 const UInt tmpl_SQRSHRN
5076 = X_3_6_7_6_5_5(X000, X011110, 0, X100111, vN, vD);
5077 const UInt tmpl_SQRSHRUN
5078 = X_3_6_7_6_5_5(X001, X011110, 0, X100011, vN, vD);
5079
5080 const UInt tmpl_SHL
5081 = X_3_6_7_6_5_5(X010, X011110, 0, X010101, vN, vD);
5082
5083 const UInt tmpl_UQSHL
5084 = X_3_6_7_6_5_5(X011, X011110, 0, X011101, vN, vD);
5085 const UInt tmpl_SQSHL
5086 = X_3_6_7_6_5_5(X010, X011110, 0, X011101, vN, vD);
5087 const UInt tmpl_SQSHLU
5088 = X_3_6_7_6_5_5(X011, X011110, 0, X011001, vN, vD);
5089
5090 switch (i->ARM64in.VShiftImmV.op) {
5091 case ARM64vecshi_SSHR64x2: tmpl = tmpl_SSHR; goto right64x2;
5092 case ARM64vecshi_USHR64x2: tmpl = tmpl_USHR; goto right64x2;
5093 case ARM64vecshi_SHL64x2: tmpl = tmpl_SHL; goto left64x2;
5094 case ARM64vecshi_UQSHL64x2: tmpl = tmpl_UQSHL; goto left64x2;
5095 case ARM64vecshi_SQSHL64x2: tmpl = tmpl_SQSHL; goto left64x2;
5096 case ARM64vecshi_SQSHLU64x2: tmpl = tmpl_SQSHLU; goto left64x2;
5097 case ARM64vecshi_SSHR32x4: tmpl = tmpl_SSHR; goto right32x4;
5098 case ARM64vecshi_USHR32x4: tmpl = tmpl_USHR; goto right32x4;
5099 case ARM64vecshi_UQSHRN2SD: tmpl = tmpl_UQSHRN; goto right32x4;
5100 case ARM64vecshi_SQSHRN2SD: tmpl = tmpl_SQSHRN; goto right32x4;
5101 case ARM64vecshi_SQSHRUN2SD: tmpl = tmpl_SQSHRUN; goto right32x4;
5102 case ARM64vecshi_UQRSHRN2SD: tmpl = tmpl_UQRSHRN; goto right32x4;
5103 case ARM64vecshi_SQRSHRN2SD: tmpl = tmpl_SQRSHRN; goto right32x4;
5104 case ARM64vecshi_SQRSHRUN2SD: tmpl = tmpl_SQRSHRUN; goto right32x4;
5105 case ARM64vecshi_SHL32x4: tmpl = tmpl_SHL; goto left32x4;
5106 case ARM64vecshi_UQSHL32x4: tmpl = tmpl_UQSHL; goto left32x4;
5107 case ARM64vecshi_SQSHL32x4: tmpl = tmpl_SQSHL; goto left32x4;
5108 case ARM64vecshi_SQSHLU32x4: tmpl = tmpl_SQSHLU; goto left32x4;
5109 case ARM64vecshi_SSHR16x8: tmpl = tmpl_SSHR; goto right16x8;
5110 case ARM64vecshi_USHR16x8: tmpl = tmpl_USHR; goto right16x8;
5111 case ARM64vecshi_UQSHRN4HS: tmpl = tmpl_UQSHRN; goto right16x8;
5112 case ARM64vecshi_SQSHRN4HS: tmpl = tmpl_SQSHRN; goto right16x8;
5113 case ARM64vecshi_SQSHRUN4HS: tmpl = tmpl_SQSHRUN; goto right16x8;
5114 case ARM64vecshi_UQRSHRN4HS: tmpl = tmpl_UQRSHRN; goto right16x8;
5115 case ARM64vecshi_SQRSHRN4HS: tmpl = tmpl_SQRSHRN; goto right16x8;
5116 case ARM64vecshi_SQRSHRUN4HS: tmpl = tmpl_SQRSHRUN; goto right16x8;
5117 case ARM64vecshi_SHL16x8: tmpl = tmpl_SHL; goto left16x8;
5118 case ARM64vecshi_UQSHL16x8: tmpl = tmpl_UQSHL; goto left16x8;
5119 case ARM64vecshi_SQSHL16x8: tmpl = tmpl_SQSHL; goto left16x8;
5120 case ARM64vecshi_SQSHLU16x8: tmpl = tmpl_SQSHLU; goto left16x8;
5121 case ARM64vecshi_SSHR8x16: tmpl = tmpl_SSHR; goto right8x16;
5122 case ARM64vecshi_USHR8x16: tmpl = tmpl_USHR; goto right8x16;
5123 case ARM64vecshi_UQSHRN8BH: tmpl = tmpl_UQSHRN; goto right8x16;
5124 case ARM64vecshi_SQSHRN8BH: tmpl = tmpl_SQSHRN; goto right8x16;
5125 case ARM64vecshi_SQSHRUN8BH: tmpl = tmpl_SQSHRUN; goto right8x16;
5126 case ARM64vecshi_UQRSHRN8BH: tmpl = tmpl_UQRSHRN; goto right8x16;
5127 case ARM64vecshi_SQRSHRN8BH: tmpl = tmpl_SQRSHRN; goto right8x16;
5128 case ARM64vecshi_SQRSHRUN8BH: tmpl = tmpl_SQRSHRUN; goto right8x16;
5129 case ARM64vecshi_SHL8x16: tmpl = tmpl_SHL; goto left8x16;
5130 case ARM64vecshi_UQSHL8x16: tmpl = tmpl_UQSHL; goto left8x16;
5131 case ARM64vecshi_SQSHL8x16: tmpl = tmpl_SQSHL; goto left8x16;
5132 case ARM64vecshi_SQSHLU8x16: tmpl = tmpl_SQSHLU; goto left8x16;
5133
5134 default: break;
5135
5136 right64x2:
5137 if (sh >= 1 && sh <= 63) {
5138 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X1000000 | (64-sh), 0,0,0);
5139 goto done;
5140 }
5141 break;
5142 right32x4:
5143 if (sh >= 1 && sh <= 32) {
5144 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0100000 | (32-sh), 0,0,0);
5145 goto done;
5146 }
5147 break;
5148 right16x8:
5149 if (sh >= 1 && sh <= 16) {
5150 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0010000 | (16-sh), 0,0,0);
5151 goto done;
5152 }
5153 break;
5154 right8x16:
5155 if (sh >= 1 && sh <= 8) {
5156 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0001000 | (8-sh), 0,0,0);
5157 goto done;
5158 }
5159 break;
5160
5161 left64x2:
5162 if (sh >= 0 && sh <= 63) {
5163 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X1000000 | sh, 0,0,0);
5164 goto done;
5165 }
5166 break;
5167 left32x4:
5168 if (sh >= 0 && sh <= 31) {
5169 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0100000 | sh, 0,0,0);
5170 goto done;
5171 }
5172 break;
5173 left16x8:
5174 if (sh >= 0 && sh <= 15) {
5175 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0010000 | sh, 0,0,0);
5176 goto done;
5177 }
5178 break;
5179 left8x16:
5180 if (sh >= 0 && sh <= 7) {
5181 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0001000 | sh, 0,0,0);
5182 goto done;
5183 }
5184 break;
5185 }
5186 goto bad;
5187 }
5188 case ARM64in_VExtV: {
5189 /*
5190 011 01110 000 m 0 imm4 0 n d EXT Vd.16b, Vn.16b, Vm.16b, #imm4
5191 where imm4 = the shift amount, in bytes,
5192 Vn is low operand, Vm is high operand
5193 */
5194 UInt vD = qregEnc(i->ARM64in.VExtV.dst);
5195 UInt vN = qregEnc(i->ARM64in.VExtV.srcLo);
5196 UInt vM = qregEnc(i->ARM64in.VExtV.srcHi);
5197 UInt imm4 = i->ARM64in.VExtV.amtB;
5198 vassert(imm4 >= 1 && imm4 <= 15);
5199 *p++ = X_3_8_5_6_5_5(X011, X01110000, vM,
5200 X000000 | (imm4 << 1), vN, vD);
5201 goto done;
5202 }
5203 case ARM64in_VImmQ: {
5204 UInt rQ = qregEnc(i->ARM64in.VImmQ.rQ);
5205 UShort imm = i->ARM64in.VImmQ.imm;
5206 vassert(rQ < 32);
5207 switch (imm) {
5208 case 0x0000:
5209 // movi rQ.4s, #0x0 == 0x4F 0x00 0x04 000 rQ
5210 *p++ = 0x4F000400 | rQ;
5211 goto done;
5212 case 0x0001:
5213 // movi rQ, #0xFF == 0x2F 0x00 0xE4 001 rQ
5214 *p++ = 0x2F00E420 | rQ;
5215 goto done;
5216 case 0x0003:
5217 // movi rQ, #0xFFFF == 0x2F 0x00 0xE4 011 rQ
5218 *p++ = 0x2F00E460 | rQ;
5219 goto done;
5220 case 0x000F:
5221 // movi rQ, #0xFFFFFFFF == 0x2F 0x00 0xE5 111 rQ
5222 *p++ = 0x2F00E5E0 | rQ;
5223 goto done;
5224 case 0x003F:
5225 // movi rQ, #0xFFFFFFFFFFFF == 0x2F 0x01 0xE7 111 rQ
5226 *p++ = 0x2F01E7E0 | rQ;
5227 goto done;
5228 case 0x00FF:
5229 // movi rQ, #0xFFFFFFFFFFFFFFFF == 0x2F 0x07 0xE7 111 rQ
5230 *p++ = 0x2F07E7E0 | rQ;
5231 goto done;
5232 case 0xFFFF:
5233 // mvni rQ.4s, #0x0 == 0x6F 0x00 0x04 000 rQ
5234 *p++ = 0x6F000400 | rQ;
5235 goto done;
5236 default:
5237 break;
5238 }
5239 goto bad; /* no other handled cases right now */
5240 }
5241
5242 case ARM64in_VDfromX: {
5243 /* INS Vd.D[0], rX
5244 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn
5245 This isn't wonderful, in the sense that the upper half of
5246 the vector register stays unchanged and thus the insn is
5247 data dependent on its output register. */
5248 UInt dd = dregEnc(i->ARM64in.VDfromX.rD);
5249 UInt xx = iregEnc(i->ARM64in.VDfromX.rX);
5250 vassert(xx < 31);
5251 *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xx,dd);
5252 goto done;
5253 }
5254
5255 case ARM64in_VQfromX: {
5256 /* FMOV D, X
5257 1001 1110 0110 0111 0000 00 nn dd FMOV Vd.D[0], Xn
5258 I think this zeroes out the top half of the destination, which
5259 is what we need. TODO: can we do VDfromX and VQfromXX better? */
5260 UInt dd = qregEnc(i->ARM64in.VQfromX.rQ);
5261 UInt xx = iregEnc(i->ARM64in.VQfromX.rXlo);
5262 vassert(xx < 31);
5263 *p++ = 0x9E670000 | X_2_6_2_12_5_5(0,0,0,0,xx,dd);
5264 goto done;
5265 }
5266
5267 case ARM64in_VQfromXX: {
5268 /* What we really generate is a two insn sequence:
5269 INS Vd.D[0], Xlo; INS Vd.D[1], Xhi
5270 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn
5271 0100 1110 0001 1000 0001 11 nn dd INS Vd.D[1], Xn
5272 */
5273 UInt qq = qregEnc(i->ARM64in.VQfromXX.rQ);
5274 UInt xhi = iregEnc(i->ARM64in.VQfromXX.rXhi);
5275 UInt xlo = iregEnc(i->ARM64in.VQfromXX.rXlo);
5276 vassert(xhi < 31 && xlo < 31);
5277 *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xlo,qq);
5278 *p++ = 0x4E181C00 | X_2_6_2_12_5_5(0,0,0,0,xhi,qq);
5279 goto done;
5280 }
5281
5282 case ARM64in_VXfromQ: {
5283 /* 010 0111 0000 01000 001111 nn dd UMOV Xd, Vn.D[0]
5284 010 0111 0000 11000 001111 nn dd UMOV Xd, Vn.D[1]
5285 */
5286 UInt dd = iregEnc(i->ARM64in.VXfromQ.rX);
5287 UInt nn = qregEnc(i->ARM64in.VXfromQ.rQ);
5288 UInt laneNo = i->ARM64in.VXfromQ.laneNo;
5289 vassert(dd < 31);
5290 vassert(laneNo < 2);
5291 *p++ = X_3_8_5_6_5_5(X010, X01110000,
5292 laneNo == 1 ? X11000 : X01000, X001111, nn, dd);
5293 goto done;
5294 }
5295
5296 case ARM64in_VXfromDorS: {
5297 /* 000 11110001 00110 000000 n d FMOV Wd, Sn
5298 100 11110011 00110 000000 n d FMOV Xd, Dn
5299 */
5300 UInt dd = iregEnc(i->ARM64in.VXfromDorS.rX);
5301 UInt nn = dregEnc(i->ARM64in.VXfromDorS.rDorS);
5302 Bool fromD = i->ARM64in.VXfromDorS.fromD;
5303 vassert(dd < 31);
5304 *p++ = X_3_8_5_6_5_5(fromD ? X100 : X000,
5305 fromD ? X11110011 : X11110001,
5306 X00110, X000000, nn, dd);
5307 goto done;
5308 }
5309
5310 case ARM64in_VMov: {
5311 /* 000 11110 00 10000 00 10000 n d FMOV Sd, Sn
5312 000 11110 01 10000 00 10000 n d FMOV Dd, Dn
5313 010 01110 10 1 n 0 00111 n d MOV Vd.16b, Vn.16b
5314 */
5315 HReg rD = i->ARM64in.VMov.dst;
5316 HReg rN = i->ARM64in.VMov.src;
5317 switch (i->ARM64in.VMov.szB) {
5318 case 16: {
5319 UInt dd = qregEnc(rD);
5320 UInt nn = qregEnc(rN);
5321 *p++ = X_3_8_5_6_5_5(X010, X01110101, nn, X000111, nn, dd);
5322 goto done;
5323 }
5324 case 8: {
5325 UInt dd = dregEnc(rD);
5326 UInt nn = dregEnc(rN);
5327 *p++ = X_3_8_5_6_5_5(X000, X11110011, X00000, X010000, nn, dd);
5328 goto done;
5329 }
5330 default:
5331 break;
5332 }
5333 goto bad;
5334 }
5335
5336 case ARM64in_EvCheck: {
5337 /* The sequence is fixed (canned) except for the two amodes
5338 supplied by the insn. These don't change the length, though.
5339 We generate:
5340 ldr w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER)
5341 subs w9, w9, #1
5342 str w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER)
5343 bpl nofail
5344 ldr x9, [x21 + #0] 0 == offsetof(host_EvC_FAILADDR)
5345 br x9
5346 nofail:
5347 */
5348 UInt* p0 = p;
5349 p = do_load_or_store32(p, True/*isLoad*/, /*w*/9,
5350 i->ARM64in.EvCheck.amCounter);
5351 *p++ = 0x71000529; /* subs w9, w9, #1 */
5352 p = do_load_or_store32(p, False/*!isLoad*/, /*w*/9,
5353 i->ARM64in.EvCheck.amCounter);
5354 *p++ = 0x54000065; /* bpl nofail */
5355 p = do_load_or_store64(p, True/*isLoad*/, /*x*/9,
5356 i->ARM64in.EvCheck.amFailAddr);
5357 *p++ = 0xD61F0120; /* br x9 */
5358 /* nofail: */
5359
5360 /* Crosscheck */
5361 vassert(evCheckSzB_ARM64() == (UChar*)p - (UChar*)p0);
5362 goto done;
5363 }
5364
5365 case ARM64in_ProfInc: {
5366 /* We generate:
5367 (ctrP is unknown now, so use 0x6555'7555'8555'9566 in the
5368 expectation that a later call to LibVEX_patchProfCtr
5369 will be used to fill in the immediate fields once the
5370 right value is known.)
5371 imm64-exactly4 x9, 0x6555'7555'8555'9566
5372 ldr x8, [x9]
5373 add x8, x8, #1
5374 str x8, [x9]
5375 */
5376 p = imm64_to_ireg_EXACTLY4(p, /*x*/9, 0x6555755585559566ULL);
5377 *p++ = 0xF9400128;
5378 *p++ = 0x91000508;
5379 *p++ = 0xF9000128;
5380 /* Tell the caller .. */
5381 vassert(!(*is_profInc));
5382 *is_profInc = True;
5383 goto done;
5384 }
5385
5386 /* ... */
5387 default:
5388 goto bad;
5389 }
5390
5391 bad:
5392 ppARM64Instr(i);
5393 vpanic("emit_ARM64Instr");
5394 /*NOTREACHED*/
5395
5396 done:
5397 vassert(((UChar*)p) - &buf[0] <= 36);
5398 return ((UChar*)p) - &buf[0];
5399 }
5400
5401
5402 /* How big is an event check? See case for ARM64in_EvCheck in
5403 emit_ARM64Instr just above. That crosschecks what this returns, so
5404 we can tell if we're inconsistent. */
evCheckSzB_ARM64(void)5405 Int evCheckSzB_ARM64 (void)
5406 {
5407 return 24;
5408 }
5409
5410
5411 /* NB: what goes on here has to be very closely coordinated with the
5412 emitInstr case for XDirect, above. */
chainXDirect_ARM64(VexEndness endness_host,void * place_to_chain,const void * disp_cp_chain_me_EXPECTED,const void * place_to_jump_to)5413 VexInvalRange chainXDirect_ARM64 ( VexEndness endness_host,
5414 void* place_to_chain,
5415 const void* disp_cp_chain_me_EXPECTED,
5416 const void* place_to_jump_to )
5417 {
5418 vassert(endness_host == VexEndnessLE);
5419
5420 /* What we're expecting to see is:
5421 movw x9, disp_cp_chain_me_to_EXPECTED[15:0]
5422 movk x9, disp_cp_chain_me_to_EXPECTED[31:15], lsl 16
5423 movk x9, disp_cp_chain_me_to_EXPECTED[47:32], lsl 32
5424 movk x9, disp_cp_chain_me_to_EXPECTED[63:48], lsl 48
5425 blr x9
5426 viz
5427 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5428 D6 3F 01 20
5429 */
5430 UInt* p = (UInt*)place_to_chain;
5431 vassert(0 == (3 & (HWord)p));
5432 vassert(is_imm64_to_ireg_EXACTLY4(
5433 p, /*x*/9, (Addr)disp_cp_chain_me_EXPECTED));
5434 vassert(p[4] == 0xD63F0120);
5435
5436 /* And what we want to change it to is:
5437 movw x9, place_to_jump_to[15:0]
5438 movk x9, place_to_jump_to[31:15], lsl 16
5439 movk x9, place_to_jump_to[47:32], lsl 32
5440 movk x9, place_to_jump_to[63:48], lsl 48
5441 br x9
5442 viz
5443 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5444 D6 1F 01 20
5445
5446 The replacement has the same length as the original.
5447 */
5448 (void)imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)place_to_jump_to);
5449 p[4] = 0xD61F0120;
5450
5451 VexInvalRange vir = {(HWord)p, 20};
5452 return vir;
5453 }
5454
5455
5456 /* NB: what goes on here has to be very closely coordinated with the
5457 emitInstr case for XDirect, above. */
unchainXDirect_ARM64(VexEndness endness_host,void * place_to_unchain,const void * place_to_jump_to_EXPECTED,const void * disp_cp_chain_me)5458 VexInvalRange unchainXDirect_ARM64 ( VexEndness endness_host,
5459 void* place_to_unchain,
5460 const void* place_to_jump_to_EXPECTED,
5461 const void* disp_cp_chain_me )
5462 {
5463 vassert(endness_host == VexEndnessLE);
5464
5465 /* What we're expecting to see is:
5466 movw x9, place_to_jump_to_EXPECTED[15:0]
5467 movk x9, place_to_jump_to_EXPECTED[31:15], lsl 16
5468 movk x9, place_to_jump_to_EXPECTED[47:32], lsl 32
5469 movk x9, place_to_jump_to_EXPECTED[63:48], lsl 48
5470 br x9
5471 viz
5472 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5473 D6 1F 01 20
5474 */
5475 UInt* p = (UInt*)place_to_unchain;
5476 vassert(0 == (3 & (HWord)p));
5477 vassert(is_imm64_to_ireg_EXACTLY4(
5478 p, /*x*/9, (Addr)place_to_jump_to_EXPECTED));
5479 vassert(p[4] == 0xD61F0120);
5480
5481 /* And what we want to change it to is:
5482 movw x9, disp_cp_chain_me_to[15:0]
5483 movk x9, disp_cp_chain_me_to[31:15], lsl 16
5484 movk x9, disp_cp_chain_me_to[47:32], lsl 32
5485 movk x9, disp_cp_chain_me_to[63:48], lsl 48
5486 blr x9
5487 viz
5488 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5489 D6 3F 01 20
5490 */
5491 (void)imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)disp_cp_chain_me);
5492 p[4] = 0xD63F0120;
5493
5494 VexInvalRange vir = {(HWord)p, 20};
5495 return vir;
5496 }
5497
5498
5499 /* Patch the counter address into a profile inc point, as previously
5500 created by the ARM64in_ProfInc case for emit_ARM64Instr. */
patchProfInc_ARM64(VexEndness endness_host,void * place_to_patch,const ULong * location_of_counter)5501 VexInvalRange patchProfInc_ARM64 ( VexEndness endness_host,
5502 void* place_to_patch,
5503 const ULong* location_of_counter )
5504 {
5505 vassert(sizeof(ULong*) == 8);
5506 vassert(endness_host == VexEndnessLE);
5507 UInt* p = (UInt*)place_to_patch;
5508 vassert(0 == (3 & (HWord)p));
5509 vassert(is_imm64_to_ireg_EXACTLY4(p, /*x*/9, 0x6555755585559566ULL));
5510 vassert(p[4] == 0xF9400128);
5511 vassert(p[5] == 0x91000508);
5512 vassert(p[6] == 0xF9000128);
5513 imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)location_of_counter);
5514 VexInvalRange vir = {(HWord)p, 4*4};
5515 return vir;
5516 }
5517
5518 /*---------------------------------------------------------------*/
5519 /*--- end host_arm64_defs.c ---*/
5520 /*---------------------------------------------------------------*/
5521