1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm_defs.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2015 OpenWorks LLP
11 info@open-works.net
12
13 NEON support is
14 Copyright (C) 2010-2015 Samsung Electronics
15 contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16 and Kirill Batuzov <batuzovk@ispras.ru>
17
18 This program is free software; you can redistribute it and/or
19 modify it under the terms of the GNU General Public License as
20 published by the Free Software Foundation; either version 2 of the
21 License, or (at your option) any later version.
22
23 This program is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
27
28 You should have received a copy of the GNU General Public License
29 along with this program; if not, write to the Free Software
30 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 02110-1301, USA.
32
33 The GNU General Public License is contained in the file COPYING.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38 #include "libvex_trc_values.h"
39
40 #include "main_util.h"
41 #include "host_generic_regs.h"
42 #include "host_arm_defs.h"
43
44 UInt arm_hwcaps = 0;
45
46
47 /* --------- Registers. --------- */
48
getRRegUniverse_ARM(void)49 const RRegUniverse* getRRegUniverse_ARM ( void )
50 {
51 /* The real-register universe is a big constant, so we just want to
52 initialise it once. */
53 static RRegUniverse rRegUniverse_ARM;
54 static Bool rRegUniverse_ARM_initted = False;
55
56 /* Handy shorthand, nothing more */
57 RRegUniverse* ru = &rRegUniverse_ARM;
58
59 /* This isn't thread-safe. Sigh. */
60 if (LIKELY(rRegUniverse_ARM_initted))
61 return ru;
62
63 RRegUniverse__init(ru);
64
65 /* Add the registers. The initial segment of this array must be
66 those available for allocation by reg-alloc, and those that
67 follow are not available for allocation. */
68
69 /* Callee saves ones are listed first, since we prefer them
70 if they're available. */
71 ru->regs[ru->size++] = hregARM_R4();
72 ru->regs[ru->size++] = hregARM_R5();
73 ru->regs[ru->size++] = hregARM_R6();
74 ru->regs[ru->size++] = hregARM_R7();
75 ru->regs[ru->size++] = hregARM_R10();
76 ru->regs[ru->size++] = hregARM_R11();
77 /* Otherwise we'll have to slum it out with caller-saves ones. */
78 ru->regs[ru->size++] = hregARM_R0();
79 ru->regs[ru->size++] = hregARM_R1();
80 ru->regs[ru->size++] = hregARM_R2();
81 ru->regs[ru->size++] = hregARM_R3();
82 ru->regs[ru->size++] = hregARM_R9();
83 /* FP registers. Note: these are all callee-save. Yay! Hence we
84 don't need to mention them as trashed in getHRegUsage for
85 ARMInstr_Call. */
86 ru->regs[ru->size++] = hregARM_D8();
87 ru->regs[ru->size++] = hregARM_D9();
88 ru->regs[ru->size++] = hregARM_D10();
89 ru->regs[ru->size++] = hregARM_D11();
90 ru->regs[ru->size++] = hregARM_D12();
91 ru->regs[ru->size++] = hregARM_S26();
92 ru->regs[ru->size++] = hregARM_S27();
93 ru->regs[ru->size++] = hregARM_S28();
94 ru->regs[ru->size++] = hregARM_S29();
95 ru->regs[ru->size++] = hregARM_S30();
96 ru->regs[ru->size++] = hregARM_Q8();
97 ru->regs[ru->size++] = hregARM_Q9();
98 ru->regs[ru->size++] = hregARM_Q10();
99 ru->regs[ru->size++] = hregARM_Q11();
100 ru->regs[ru->size++] = hregARM_Q12();
101 ru->allocable = ru->size;
102
103 /* And other regs, not available to the allocator. */
104
105 // unavail: r8 as GSP
106 // r12 is used as a spill/reload temporary
107 // r13 as SP
108 // r14 as LR
109 // r15 as PC
110 //
111 // All in all, we have 11 allocatable integer registers:
112 // 0 1 2 3 4 5 6 7 9 10 11, with r8 dedicated as GSP
113 // and r12 dedicated as a spill temporary.
114 // 13 14 and 15 are not under the allocator's control.
115 //
116 // Hence for the allocatable registers we have:
117 //
118 // callee-saved: 4 5 6 7 (8) 9 10 11
119 // caller-saved: 0 1 2 3
120 // Note 9 is ambiguous: the base EABI does not give an e/r-saved
121 // designation for it, but the Linux instantiation of the ABI
122 // specifies it as callee-saved.
123 //
124 // If the set of available registers changes or if the e/r status
125 // changes, be sure to re-check/sync the definition of
126 // getHRegUsage for ARMInstr_Call too.
127 ru->regs[ru->size++] = hregARM_R8();
128 ru->regs[ru->size++] = hregARM_R12();
129 ru->regs[ru->size++] = hregARM_R13();
130 ru->regs[ru->size++] = hregARM_R14();
131 ru->regs[ru->size++] = hregARM_R15();
132 ru->regs[ru->size++] = hregARM_Q13();
133 ru->regs[ru->size++] = hregARM_Q14();
134 ru->regs[ru->size++] = hregARM_Q15();
135
136 rRegUniverse_ARM_initted = True;
137
138 RRegUniverse__check_is_sane(ru);
139 return ru;
140 }
141
142
ppHRegARM(HReg reg)143 void ppHRegARM ( HReg reg ) {
144 Int r;
145 /* Be generic for all virtual regs. */
146 if (hregIsVirtual(reg)) {
147 ppHReg(reg);
148 return;
149 }
150 /* But specific for real regs. */
151 switch (hregClass(reg)) {
152 case HRcInt32:
153 r = hregEncoding(reg);
154 vassert(r >= 0 && r < 16);
155 vex_printf("r%d", r);
156 return;
157 case HRcFlt64:
158 r = hregEncoding(reg);
159 vassert(r >= 0 && r < 32);
160 vex_printf("d%d", r);
161 return;
162 case HRcFlt32:
163 r = hregEncoding(reg);
164 vassert(r >= 0 && r < 32);
165 vex_printf("s%d", r);
166 return;
167 case HRcVec128:
168 r = hregEncoding(reg);
169 vassert(r >= 0 && r < 16);
170 vex_printf("q%d", r);
171 return;
172 default:
173 vpanic("ppHRegARM");
174 }
175 }
176
177
178 /* --------- Condition codes, ARM encoding. --------- */
179
showARMCondCode(ARMCondCode cond)180 const HChar* showARMCondCode ( ARMCondCode cond ) {
181 switch (cond) {
182 case ARMcc_EQ: return "eq";
183 case ARMcc_NE: return "ne";
184 case ARMcc_HS: return "hs";
185 case ARMcc_LO: return "lo";
186 case ARMcc_MI: return "mi";
187 case ARMcc_PL: return "pl";
188 case ARMcc_VS: return "vs";
189 case ARMcc_VC: return "vc";
190 case ARMcc_HI: return "hi";
191 case ARMcc_LS: return "ls";
192 case ARMcc_GE: return "ge";
193 case ARMcc_LT: return "lt";
194 case ARMcc_GT: return "gt";
195 case ARMcc_LE: return "le";
196 case ARMcc_AL: return "al"; // default
197 case ARMcc_NV: return "nv";
198 default: vpanic("showARMCondCode");
199 }
200 }
201
202
203 /* --------- Mem AModes: Addressing Mode 1 --------- */
204
ARMAMode1_RI(HReg reg,Int simm13)205 ARMAMode1* ARMAMode1_RI ( HReg reg, Int simm13 ) {
206 ARMAMode1* am = LibVEX_Alloc_inline(sizeof(ARMAMode1));
207 am->tag = ARMam1_RI;
208 am->ARMam1.RI.reg = reg;
209 am->ARMam1.RI.simm13 = simm13;
210 vassert(-4095 <= simm13 && simm13 <= 4095);
211 return am;
212 }
ARMAMode1_RRS(HReg base,HReg index,UInt shift)213 ARMAMode1* ARMAMode1_RRS ( HReg base, HReg index, UInt shift ) {
214 ARMAMode1* am = LibVEX_Alloc_inline(sizeof(ARMAMode1));
215 am->tag = ARMam1_RRS;
216 am->ARMam1.RRS.base = base;
217 am->ARMam1.RRS.index = index;
218 am->ARMam1.RRS.shift = shift;
219 vassert(0 <= shift && shift <= 3);
220 return am;
221 }
222
ppARMAMode1(ARMAMode1 * am)223 void ppARMAMode1 ( ARMAMode1* am ) {
224 switch (am->tag) {
225 case ARMam1_RI:
226 vex_printf("%d(", am->ARMam1.RI.simm13);
227 ppHRegARM(am->ARMam1.RI.reg);
228 vex_printf(")");
229 break;
230 case ARMam1_RRS:
231 vex_printf("(");
232 ppHRegARM(am->ARMam1.RRS.base);
233 vex_printf(",");
234 ppHRegARM(am->ARMam1.RRS.index);
235 vex_printf(",%u)", am->ARMam1.RRS.shift);
236 break;
237 default:
238 vassert(0);
239 }
240 }
241
addRegUsage_ARMAMode1(HRegUsage * u,ARMAMode1 * am)242 static void addRegUsage_ARMAMode1 ( HRegUsage* u, ARMAMode1* am ) {
243 switch (am->tag) {
244 case ARMam1_RI:
245 addHRegUse(u, HRmRead, am->ARMam1.RI.reg);
246 return;
247 case ARMam1_RRS:
248 // addHRegUse(u, HRmRead, am->ARMam1.RRS.base);
249 // addHRegUse(u, HRmRead, am->ARMam1.RRS.index);
250 // return;
251 default:
252 vpanic("addRegUsage_ARMAmode1");
253 }
254 }
255
mapRegs_ARMAMode1(HRegRemap * m,ARMAMode1 * am)256 static void mapRegs_ARMAMode1 ( HRegRemap* m, ARMAMode1* am ) {
257 switch (am->tag) {
258 case ARMam1_RI:
259 am->ARMam1.RI.reg = lookupHRegRemap(m, am->ARMam1.RI.reg);
260 return;
261 case ARMam1_RRS:
262 //am->ARMam1.RR.base =lookupHRegRemap(m, am->ARMam1.RR.base);
263 //am->ARMam1.RR.index = lookupHRegRemap(m, am->ARMam1.RR.index);
264 //return;
265 default:
266 vpanic("mapRegs_ARMAmode1");
267 }
268 }
269
270
271 /* --------- Mem AModes: Addressing Mode 2 --------- */
272
ARMAMode2_RI(HReg reg,Int simm9)273 ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) {
274 ARMAMode2* am = LibVEX_Alloc_inline(sizeof(ARMAMode2));
275 am->tag = ARMam2_RI;
276 am->ARMam2.RI.reg = reg;
277 am->ARMam2.RI.simm9 = simm9;
278 vassert(-255 <= simm9 && simm9 <= 255);
279 return am;
280 }
ARMAMode2_RR(HReg base,HReg index)281 ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) {
282 ARMAMode2* am = LibVEX_Alloc_inline(sizeof(ARMAMode2));
283 am->tag = ARMam2_RR;
284 am->ARMam2.RR.base = base;
285 am->ARMam2.RR.index = index;
286 return am;
287 }
288
ppARMAMode2(ARMAMode2 * am)289 void ppARMAMode2 ( ARMAMode2* am ) {
290 switch (am->tag) {
291 case ARMam2_RI:
292 vex_printf("%d(", am->ARMam2.RI.simm9);
293 ppHRegARM(am->ARMam2.RI.reg);
294 vex_printf(")");
295 break;
296 case ARMam2_RR:
297 vex_printf("(");
298 ppHRegARM(am->ARMam2.RR.base);
299 vex_printf(",");
300 ppHRegARM(am->ARMam2.RR.index);
301 vex_printf(")");
302 break;
303 default:
304 vassert(0);
305 }
306 }
307
addRegUsage_ARMAMode2(HRegUsage * u,ARMAMode2 * am)308 static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) {
309 switch (am->tag) {
310 case ARMam2_RI:
311 addHRegUse(u, HRmRead, am->ARMam2.RI.reg);
312 return;
313 case ARMam2_RR:
314 // addHRegUse(u, HRmRead, am->ARMam2.RR.base);
315 // addHRegUse(u, HRmRead, am->ARMam2.RR.index);
316 // return;
317 default:
318 vpanic("addRegUsage_ARMAmode2");
319 }
320 }
321
mapRegs_ARMAMode2(HRegRemap * m,ARMAMode2 * am)322 static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) {
323 switch (am->tag) {
324 case ARMam2_RI:
325 am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg);
326 return;
327 case ARMam2_RR:
328 //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base);
329 //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index);
330 //return;
331 default:
332 vpanic("mapRegs_ARMAmode2");
333 }
334 }
335
336
337 /* --------- Mem AModes: Addressing Mode VFP --------- */
338
mkARMAModeV(HReg reg,Int simm11)339 ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) {
340 ARMAModeV* am = LibVEX_Alloc_inline(sizeof(ARMAModeV));
341 vassert(simm11 >= -1020 && simm11 <= 1020);
342 vassert(0 == (simm11 & 3));
343 am->reg = reg;
344 am->simm11 = simm11;
345 return am;
346 }
347
ppARMAModeV(ARMAModeV * am)348 void ppARMAModeV ( ARMAModeV* am ) {
349 vex_printf("%d(", am->simm11);
350 ppHRegARM(am->reg);
351 vex_printf(")");
352 }
353
addRegUsage_ARMAModeV(HRegUsage * u,ARMAModeV * am)354 static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) {
355 addHRegUse(u, HRmRead, am->reg);
356 }
357
mapRegs_ARMAModeV(HRegRemap * m,ARMAModeV * am)358 static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) {
359 am->reg = lookupHRegRemap(m, am->reg);
360 }
361
362
363 /* --------- Mem AModes: Addressing Mode Neon ------- */
364
mkARMAModeN_RR(HReg rN,HReg rM)365 ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
366 ARMAModeN* am = LibVEX_Alloc_inline(sizeof(ARMAModeN));
367 am->tag = ARMamN_RR;
368 am->ARMamN.RR.rN = rN;
369 am->ARMamN.RR.rM = rM;
370 return am;
371 }
372
mkARMAModeN_R(HReg rN)373 ARMAModeN *mkARMAModeN_R ( HReg rN ) {
374 ARMAModeN* am = LibVEX_Alloc_inline(sizeof(ARMAModeN));
375 am->tag = ARMamN_R;
376 am->ARMamN.R.rN = rN;
377 return am;
378 }
379
addRegUsage_ARMAModeN(HRegUsage * u,ARMAModeN * am)380 static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
381 if (am->tag == ARMamN_R) {
382 addHRegUse(u, HRmRead, am->ARMamN.R.rN);
383 } else {
384 addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
385 addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
386 }
387 }
388
mapRegs_ARMAModeN(HRegRemap * m,ARMAModeN * am)389 static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
390 if (am->tag == ARMamN_R) {
391 am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
392 } else {
393 am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
394 am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
395 }
396 }
397
ppARMAModeN(ARMAModeN * am)398 void ppARMAModeN ( ARMAModeN* am ) {
399 vex_printf("[");
400 if (am->tag == ARMamN_R) {
401 ppHRegARM(am->ARMamN.R.rN);
402 } else {
403 ppHRegARM(am->ARMamN.RR.rN);
404 }
405 vex_printf("]");
406 if (am->tag == ARMamN_RR) {
407 vex_printf(", ");
408 ppHRegARM(am->ARMamN.RR.rM);
409 }
410 }
411
412
413 /* --------- Reg or imm-8x4 operands --------- */
414
ROR32(UInt x,UInt sh)415 static UInt ROR32 ( UInt x, UInt sh ) {
416 vassert(sh >= 0 && sh < 32);
417 if (sh == 0)
418 return x;
419 else
420 return (x << (32-sh)) | (x >> sh);
421 }
422
ARMRI84_I84(UShort imm8,UShort imm4)423 ARMRI84* ARMRI84_I84 ( UShort imm8, UShort imm4 ) {
424 ARMRI84* ri84 = LibVEX_Alloc_inline(sizeof(ARMRI84));
425 ri84->tag = ARMri84_I84;
426 ri84->ARMri84.I84.imm8 = imm8;
427 ri84->ARMri84.I84.imm4 = imm4;
428 vassert(imm8 >= 0 && imm8 <= 255);
429 vassert(imm4 >= 0 && imm4 <= 15);
430 return ri84;
431 }
ARMRI84_R(HReg reg)432 ARMRI84* ARMRI84_R ( HReg reg ) {
433 ARMRI84* ri84 = LibVEX_Alloc_inline(sizeof(ARMRI84));
434 ri84->tag = ARMri84_R;
435 ri84->ARMri84.R.reg = reg;
436 return ri84;
437 }
438
ppARMRI84(ARMRI84 * ri84)439 void ppARMRI84 ( ARMRI84* ri84 ) {
440 switch (ri84->tag) {
441 case ARMri84_I84:
442 vex_printf("0x%x", ROR32(ri84->ARMri84.I84.imm8,
443 2 * ri84->ARMri84.I84.imm4));
444 break;
445 case ARMri84_R:
446 ppHRegARM(ri84->ARMri84.R.reg);
447 break;
448 default:
449 vassert(0);
450 }
451 }
452
addRegUsage_ARMRI84(HRegUsage * u,ARMRI84 * ri84)453 static void addRegUsage_ARMRI84 ( HRegUsage* u, ARMRI84* ri84 ) {
454 switch (ri84->tag) {
455 case ARMri84_I84:
456 return;
457 case ARMri84_R:
458 addHRegUse(u, HRmRead, ri84->ARMri84.R.reg);
459 return;
460 default:
461 vpanic("addRegUsage_ARMRI84");
462 }
463 }
464
mapRegs_ARMRI84(HRegRemap * m,ARMRI84 * ri84)465 static void mapRegs_ARMRI84 ( HRegRemap* m, ARMRI84* ri84 ) {
466 switch (ri84->tag) {
467 case ARMri84_I84:
468 return;
469 case ARMri84_R:
470 ri84->ARMri84.R.reg = lookupHRegRemap(m, ri84->ARMri84.R.reg);
471 return;
472 default:
473 vpanic("mapRegs_ARMRI84");
474 }
475 }
476
477
478 /* --------- Reg or imm5 operands --------- */
479
ARMRI5_I5(UInt imm5)480 ARMRI5* ARMRI5_I5 ( UInt imm5 ) {
481 ARMRI5* ri5 = LibVEX_Alloc_inline(sizeof(ARMRI5));
482 ri5->tag = ARMri5_I5;
483 ri5->ARMri5.I5.imm5 = imm5;
484 vassert(imm5 > 0 && imm5 <= 31); // zero is not allowed
485 return ri5;
486 }
ARMRI5_R(HReg reg)487 ARMRI5* ARMRI5_R ( HReg reg ) {
488 ARMRI5* ri5 = LibVEX_Alloc_inline(sizeof(ARMRI5));
489 ri5->tag = ARMri5_R;
490 ri5->ARMri5.R.reg = reg;
491 return ri5;
492 }
493
ppARMRI5(ARMRI5 * ri5)494 void ppARMRI5 ( ARMRI5* ri5 ) {
495 switch (ri5->tag) {
496 case ARMri5_I5:
497 vex_printf("%u", ri5->ARMri5.I5.imm5);
498 break;
499 case ARMri5_R:
500 ppHRegARM(ri5->ARMri5.R.reg);
501 break;
502 default:
503 vassert(0);
504 }
505 }
506
addRegUsage_ARMRI5(HRegUsage * u,ARMRI5 * ri5)507 static void addRegUsage_ARMRI5 ( HRegUsage* u, ARMRI5* ri5 ) {
508 switch (ri5->tag) {
509 case ARMri5_I5:
510 return;
511 case ARMri5_R:
512 addHRegUse(u, HRmRead, ri5->ARMri5.R.reg);
513 return;
514 default:
515 vpanic("addRegUsage_ARMRI5");
516 }
517 }
518
mapRegs_ARMRI5(HRegRemap * m,ARMRI5 * ri5)519 static void mapRegs_ARMRI5 ( HRegRemap* m, ARMRI5* ri5 ) {
520 switch (ri5->tag) {
521 case ARMri5_I5:
522 return;
523 case ARMri5_R:
524 ri5->ARMri5.R.reg = lookupHRegRemap(m, ri5->ARMri5.R.reg);
525 return;
526 default:
527 vpanic("mapRegs_ARMRI5");
528 }
529 }
530
531 /* -------- Neon Immediate operatnd --------- */
532
ARMNImm_TI(UInt type,UInt imm8)533 ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
534 ARMNImm* i = LibVEX_Alloc_inline(sizeof(ARMNImm));
535 i->type = type;
536 i->imm8 = imm8;
537 return i;
538 }
539
ARMNImm_to_Imm64(ARMNImm * imm)540 ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
541 int i, j;
542 ULong y, x = imm->imm8;
543 switch (imm->type) {
544 case 3:
545 x = x << 8; /* fallthrough */
546 case 2:
547 x = x << 8; /* fallthrough */
548 case 1:
549 x = x << 8; /* fallthrough */
550 case 0:
551 return (x << 32) | x;
552 case 5:
553 case 6:
554 if (imm->type == 5)
555 x = x << 8;
556 else
557 x = (x << 8) | x;
558 /* fallthrough */
559 case 4:
560 x = (x << 16) | x;
561 return (x << 32) | x;
562 case 8:
563 x = (x << 8) | 0xFF;
564 /* fallthrough */
565 case 7:
566 x = (x << 8) | 0xFF;
567 return (x << 32) | x;
568 case 9:
569 x = 0;
570 for (i = 7; i >= 0; i--) {
571 y = ((ULong)imm->imm8 >> i) & 1;
572 for (j = 0; j < 8; j++) {
573 x = (x << 1) | y;
574 }
575 }
576 return x;
577 case 10:
578 x |= (x & 0x80) << 5;
579 x |= (~x & 0x40) << 5;
580 x &= 0x187F; /* 0001 1000 0111 1111 */
581 x |= (x & 0x40) << 4;
582 x |= (x & 0x40) << 3;
583 x |= (x & 0x40) << 2;
584 x |= (x & 0x40) << 1;
585 x = x << 19;
586 x = (x << 32) | x;
587 return x;
588 default:
589 vpanic("ARMNImm_to_Imm64");
590 }
591 }
592
Imm64_to_ARMNImm(ULong x)593 ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
594 ARMNImm tmp;
595 if ((x & 0xFFFFFFFF) == (x >> 32)) {
596 if ((x & 0xFFFFFF00) == 0)
597 return ARMNImm_TI(0, x & 0xFF);
598 if ((x & 0xFFFF00FF) == 0)
599 return ARMNImm_TI(1, (x >> 8) & 0xFF);
600 if ((x & 0xFF00FFFF) == 0)
601 return ARMNImm_TI(2, (x >> 16) & 0xFF);
602 if ((x & 0x00FFFFFF) == 0)
603 return ARMNImm_TI(3, (x >> 24) & 0xFF);
604 if ((x & 0xFFFF00FF) == 0xFF)
605 return ARMNImm_TI(7, (x >> 8) & 0xFF);
606 if ((x & 0xFF00FFFF) == 0xFFFF)
607 return ARMNImm_TI(8, (x >> 16) & 0xFF);
608 if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
609 if ((x & 0xFF00) == 0)
610 return ARMNImm_TI(4, x & 0xFF);
611 if ((x & 0x00FF) == 0)
612 return ARMNImm_TI(5, (x >> 8) & 0xFF);
613 if ((x & 0xFF) == ((x >> 8) & 0xFF))
614 return ARMNImm_TI(6, x & 0xFF);
615 }
616 if ((x & 0x7FFFF) == 0) {
617 tmp.type = 10;
618 tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
619 if (ARMNImm_to_Imm64(&tmp) == x)
620 return ARMNImm_TI(tmp.type, tmp.imm8);
621 }
622 } else {
623 /* This can only be type 9. */
624 tmp.imm8 = (((x >> 56) & 1) << 7)
625 | (((x >> 48) & 1) << 6)
626 | (((x >> 40) & 1) << 5)
627 | (((x >> 32) & 1) << 4)
628 | (((x >> 24) & 1) << 3)
629 | (((x >> 16) & 1) << 2)
630 | (((x >> 8) & 1) << 1)
631 | (((x >> 0) & 1) << 0);
632 tmp.type = 9;
633 if (ARMNImm_to_Imm64 (&tmp) == x)
634 return ARMNImm_TI(tmp.type, tmp.imm8);
635 }
636 return NULL;
637 }
638
ppARMNImm(ARMNImm * i)639 void ppARMNImm (ARMNImm* i) {
640 ULong x = ARMNImm_to_Imm64(i);
641 vex_printf("0x%llX%llX", x, x);
642 }
643
644 /* -- Register or scalar operand --- */
645
mkARMNRS(ARMNRS_tag tag,HReg reg,UInt index)646 ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
647 {
648 ARMNRS *p = LibVEX_Alloc_inline(sizeof(ARMNRS));
649 p->tag = tag;
650 p->reg = reg;
651 p->index = index;
652 return p;
653 }
654
ppARMNRS(ARMNRS * p)655 void ppARMNRS(ARMNRS *p)
656 {
657 ppHRegARM(p->reg);
658 if (p->tag == ARMNRS_Scalar) {
659 vex_printf("[%u]", p->index);
660 }
661 }
662
663 /* --------- Instructions. --------- */
664
showARMAluOp(ARMAluOp op)665 const HChar* showARMAluOp ( ARMAluOp op ) {
666 switch (op) {
667 case ARMalu_ADD: return "add";
668 case ARMalu_ADDS: return "adds";
669 case ARMalu_ADC: return "adc";
670 case ARMalu_SUB: return "sub";
671 case ARMalu_SUBS: return "subs";
672 case ARMalu_SBC: return "sbc";
673 case ARMalu_AND: return "and";
674 case ARMalu_BIC: return "bic";
675 case ARMalu_OR: return "orr";
676 case ARMalu_XOR: return "xor";
677 default: vpanic("showARMAluOp");
678 }
679 }
680
showARMShiftOp(ARMShiftOp op)681 const HChar* showARMShiftOp ( ARMShiftOp op ) {
682 switch (op) {
683 case ARMsh_SHL: return "shl";
684 case ARMsh_SHR: return "shr";
685 case ARMsh_SAR: return "sar";
686 default: vpanic("showARMShiftOp");
687 }
688 }
689
showARMUnaryOp(ARMUnaryOp op)690 const HChar* showARMUnaryOp ( ARMUnaryOp op ) {
691 switch (op) {
692 case ARMun_NEG: return "neg";
693 case ARMun_NOT: return "not";
694 case ARMun_CLZ: return "clz";
695 default: vpanic("showARMUnaryOp");
696 }
697 }
698
showARMMulOp(ARMMulOp op)699 const HChar* showARMMulOp ( ARMMulOp op ) {
700 switch (op) {
701 case ARMmul_PLAIN: return "mul";
702 case ARMmul_ZX: return "umull";
703 case ARMmul_SX: return "smull";
704 default: vpanic("showARMMulOp");
705 }
706 }
707
showARMVfpOp(ARMVfpOp op)708 const HChar* showARMVfpOp ( ARMVfpOp op ) {
709 switch (op) {
710 case ARMvfp_ADD: return "add";
711 case ARMvfp_SUB: return "sub";
712 case ARMvfp_MUL: return "mul";
713 case ARMvfp_DIV: return "div";
714 default: vpanic("showARMVfpOp");
715 }
716 }
717
showARMVfpUnaryOp(ARMVfpUnaryOp op)718 const HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op ) {
719 switch (op) {
720 case ARMvfpu_COPY: return "cpy";
721 case ARMvfpu_NEG: return "neg";
722 case ARMvfpu_ABS: return "abs";
723 case ARMvfpu_SQRT: return "sqrt";
724 default: vpanic("showARMVfpUnaryOp");
725 }
726 }
727
showARMNeonBinOp(ARMNeonBinOp op)728 const HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
729 switch (op) {
730 case ARMneon_VAND: return "vand";
731 case ARMneon_VORR: return "vorr";
732 case ARMneon_VXOR: return "veor";
733 case ARMneon_VADD: return "vadd";
734 case ARMneon_VRHADDS: return "vrhadd";
735 case ARMneon_VRHADDU: return "vrhadd";
736 case ARMneon_VADDFP: return "vadd";
737 case ARMneon_VPADDFP: return "vpadd";
738 case ARMneon_VABDFP: return "vabd";
739 case ARMneon_VSUB: return "vsub";
740 case ARMneon_VSUBFP: return "vsub";
741 case ARMneon_VMINU: return "vmin";
742 case ARMneon_VMINS: return "vmin";
743 case ARMneon_VMINF: return "vmin";
744 case ARMneon_VMAXU: return "vmax";
745 case ARMneon_VMAXS: return "vmax";
746 case ARMneon_VMAXF: return "vmax";
747 case ARMneon_VQADDU: return "vqadd";
748 case ARMneon_VQADDS: return "vqadd";
749 case ARMneon_VQSUBU: return "vqsub";
750 case ARMneon_VQSUBS: return "vqsub";
751 case ARMneon_VCGTU: return "vcgt";
752 case ARMneon_VCGTS: return "vcgt";
753 case ARMneon_VCGTF: return "vcgt";
754 case ARMneon_VCGEF: return "vcgt";
755 case ARMneon_VCGEU: return "vcge";
756 case ARMneon_VCGES: return "vcge";
757 case ARMneon_VCEQ: return "vceq";
758 case ARMneon_VCEQF: return "vceq";
759 case ARMneon_VPADD: return "vpadd";
760 case ARMneon_VPMINU: return "vpmin";
761 case ARMneon_VPMINS: return "vpmin";
762 case ARMneon_VPMINF: return "vpmin";
763 case ARMneon_VPMAXU: return "vpmax";
764 case ARMneon_VPMAXS: return "vpmax";
765 case ARMneon_VPMAXF: return "vpmax";
766 case ARMneon_VEXT: return "vext";
767 case ARMneon_VMUL: return "vmuli";
768 case ARMneon_VMULLU: return "vmull";
769 case ARMneon_VMULLS: return "vmull";
770 case ARMneon_VMULP: return "vmul";
771 case ARMneon_VMULFP: return "vmul";
772 case ARMneon_VMULLP: return "vmul";
773 case ARMneon_VQDMULH: return "vqdmulh";
774 case ARMneon_VQRDMULH: return "vqrdmulh";
775 case ARMneon_VQDMULL: return "vqdmull";
776 case ARMneon_VTBL: return "vtbl";
777 case ARMneon_VRECPS: return "vrecps";
778 case ARMneon_VRSQRTS: return "vrecps";
779 case ARMneon_INVALID: return "??invalid??";
780 /* ... */
781 default: vpanic("showARMNeonBinOp");
782 }
783 }
784
showARMNeonBinOpDataType(ARMNeonBinOp op)785 const HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
786 switch (op) {
787 case ARMneon_VAND:
788 case ARMneon_VORR:
789 case ARMneon_VXOR:
790 return "";
791 case ARMneon_VADD:
792 case ARMneon_VSUB:
793 case ARMneon_VEXT:
794 case ARMneon_VMUL:
795 case ARMneon_VPADD:
796 case ARMneon_VTBL:
797 case ARMneon_VCEQ:
798 return ".i";
799 case ARMneon_VRHADDU:
800 case ARMneon_VMINU:
801 case ARMneon_VMAXU:
802 case ARMneon_VQADDU:
803 case ARMneon_VQSUBU:
804 case ARMneon_VCGTU:
805 case ARMneon_VCGEU:
806 case ARMneon_VMULLU:
807 case ARMneon_VPMINU:
808 case ARMneon_VPMAXU:
809 return ".u";
810 case ARMneon_VRHADDS:
811 case ARMneon_VMINS:
812 case ARMneon_VMAXS:
813 case ARMneon_VQADDS:
814 case ARMneon_VQSUBS:
815 case ARMneon_VCGTS:
816 case ARMneon_VCGES:
817 case ARMneon_VQDMULL:
818 case ARMneon_VMULLS:
819 case ARMneon_VPMINS:
820 case ARMneon_VPMAXS:
821 case ARMneon_VQDMULH:
822 case ARMneon_VQRDMULH:
823 return ".s";
824 case ARMneon_VMULP:
825 case ARMneon_VMULLP:
826 return ".p";
827 case ARMneon_VADDFP:
828 case ARMneon_VABDFP:
829 case ARMneon_VPADDFP:
830 case ARMneon_VSUBFP:
831 case ARMneon_VMULFP:
832 case ARMneon_VMINF:
833 case ARMneon_VMAXF:
834 case ARMneon_VPMINF:
835 case ARMneon_VPMAXF:
836 case ARMneon_VCGTF:
837 case ARMneon_VCGEF:
838 case ARMneon_VCEQF:
839 case ARMneon_VRECPS:
840 case ARMneon_VRSQRTS:
841 return ".f";
842 /* ... */
843 default: vpanic("showARMNeonBinOpDataType");
844 }
845 }
846
showARMNeonUnOp(ARMNeonUnOp op)847 const HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
848 switch (op) {
849 case ARMneon_COPY: return "vmov";
850 case ARMneon_COPYLS: return "vmov";
851 case ARMneon_COPYLU: return "vmov";
852 case ARMneon_COPYN: return "vmov";
853 case ARMneon_COPYQNSS: return "vqmovn";
854 case ARMneon_COPYQNUS: return "vqmovun";
855 case ARMneon_COPYQNUU: return "vqmovn";
856 case ARMneon_NOT: return "vmvn";
857 case ARMneon_EQZ: return "vceq";
858 case ARMneon_CNT: return "vcnt";
859 case ARMneon_CLS: return "vcls";
860 case ARMneon_CLZ: return "vclz";
861 case ARMneon_DUP: return "vdup";
862 case ARMneon_PADDLS: return "vpaddl";
863 case ARMneon_PADDLU: return "vpaddl";
864 case ARMneon_VQSHLNSS: return "vqshl";
865 case ARMneon_VQSHLNUU: return "vqshl";
866 case ARMneon_VQSHLNUS: return "vqshlu";
867 case ARMneon_REV16: return "vrev16";
868 case ARMneon_REV32: return "vrev32";
869 case ARMneon_REV64: return "vrev64";
870 case ARMneon_VCVTFtoU: return "vcvt";
871 case ARMneon_VCVTFtoS: return "vcvt";
872 case ARMneon_VCVTUtoF: return "vcvt";
873 case ARMneon_VCVTStoF: return "vcvt";
874 case ARMneon_VCVTFtoFixedU: return "vcvt";
875 case ARMneon_VCVTFtoFixedS: return "vcvt";
876 case ARMneon_VCVTFixedUtoF: return "vcvt";
877 case ARMneon_VCVTFixedStoF: return "vcvt";
878 case ARMneon_VCVTF32toF16: return "vcvt";
879 case ARMneon_VCVTF16toF32: return "vcvt";
880 case ARMneon_VRECIP: return "vrecip";
881 case ARMneon_VRECIPF: return "vrecipf";
882 case ARMneon_VNEGF: return "vneg";
883 case ARMneon_ABS: return "vabs";
884 case ARMneon_VABSFP: return "vabsfp";
885 case ARMneon_VRSQRTEFP: return "vrsqrtefp";
886 case ARMneon_VRSQRTE: return "vrsqrte";
887 /* ... */
888 default: vpanic("showARMNeonUnOp");
889 }
890 }
891
showARMNeonUnOpDataType(ARMNeonUnOp op)892 const HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
893 switch (op) {
894 case ARMneon_COPY:
895 case ARMneon_NOT:
896 return "";
897 case ARMneon_COPYN:
898 case ARMneon_EQZ:
899 case ARMneon_CNT:
900 case ARMneon_DUP:
901 case ARMneon_REV16:
902 case ARMneon_REV32:
903 case ARMneon_REV64:
904 return ".i";
905 case ARMneon_COPYLU:
906 case ARMneon_PADDLU:
907 case ARMneon_COPYQNUU:
908 case ARMneon_VQSHLNUU:
909 case ARMneon_VRECIP:
910 case ARMneon_VRSQRTE:
911 return ".u";
912 case ARMneon_CLS:
913 case ARMneon_CLZ:
914 case ARMneon_COPYLS:
915 case ARMneon_PADDLS:
916 case ARMneon_COPYQNSS:
917 case ARMneon_COPYQNUS:
918 case ARMneon_VQSHLNSS:
919 case ARMneon_VQSHLNUS:
920 case ARMneon_ABS:
921 return ".s";
922 case ARMneon_VRECIPF:
923 case ARMneon_VNEGF:
924 case ARMneon_VABSFP:
925 case ARMneon_VRSQRTEFP:
926 return ".f";
927 case ARMneon_VCVTFtoU: return ".u32.f32";
928 case ARMneon_VCVTFtoS: return ".s32.f32";
929 case ARMneon_VCVTUtoF: return ".f32.u32";
930 case ARMneon_VCVTStoF: return ".f32.s32";
931 case ARMneon_VCVTF16toF32: return ".f32.f16";
932 case ARMneon_VCVTF32toF16: return ".f16.f32";
933 case ARMneon_VCVTFtoFixedU: return ".u32.f32";
934 case ARMneon_VCVTFtoFixedS: return ".s32.f32";
935 case ARMneon_VCVTFixedUtoF: return ".f32.u32";
936 case ARMneon_VCVTFixedStoF: return ".f32.s32";
937 /* ... */
938 default: vpanic("showARMNeonUnOpDataType");
939 }
940 }
941
showARMNeonUnOpS(ARMNeonUnOpS op)942 const HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
943 switch (op) {
944 case ARMneon_SETELEM: return "vmov";
945 case ARMneon_GETELEMU: return "vmov";
946 case ARMneon_GETELEMS: return "vmov";
947 case ARMneon_VDUP: return "vdup";
948 /* ... */
949 default: vpanic("showARMNeonUnarySOp");
950 }
951 }
952
showARMNeonUnOpSDataType(ARMNeonUnOpS op)953 const HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
954 switch (op) {
955 case ARMneon_SETELEM:
956 case ARMneon_VDUP:
957 return ".i";
958 case ARMneon_GETELEMS:
959 return ".s";
960 case ARMneon_GETELEMU:
961 return ".u";
962 /* ... */
963 default: vpanic("showARMNeonUnarySOp");
964 }
965 }
966
showARMNeonShiftOp(ARMNeonShiftOp op)967 const HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
968 switch (op) {
969 case ARMneon_VSHL: return "vshl";
970 case ARMneon_VSAL: return "vshl";
971 case ARMneon_VQSHL: return "vqshl";
972 case ARMneon_VQSAL: return "vqshl";
973 /* ... */
974 default: vpanic("showARMNeonShiftOp");
975 }
976 }
977
showARMNeonShiftOpDataType(ARMNeonShiftOp op)978 const HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
979 switch (op) {
980 case ARMneon_VSHL:
981 case ARMneon_VQSHL:
982 return ".u";
983 case ARMneon_VSAL:
984 case ARMneon_VQSAL:
985 return ".s";
986 /* ... */
987 default: vpanic("showARMNeonShiftOpDataType");
988 }
989 }
990
showARMNeonDualOp(ARMNeonDualOp op)991 const HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
992 switch (op) {
993 case ARMneon_TRN: return "vtrn";
994 case ARMneon_ZIP: return "vzip";
995 case ARMneon_UZP: return "vuzp";
996 /* ... */
997 default: vpanic("showARMNeonDualOp");
998 }
999 }
1000
showARMNeonDualOpDataType(ARMNeonDualOp op)1001 const HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
1002 switch (op) {
1003 case ARMneon_TRN:
1004 case ARMneon_ZIP:
1005 case ARMneon_UZP:
1006 return "i";
1007 /* ... */
1008 default: vpanic("showARMNeonDualOp");
1009 }
1010 }
1011
showARMNeonDataSize_wrk(UInt size)1012 static const HChar* showARMNeonDataSize_wrk ( UInt size )
1013 {
1014 switch (size) {
1015 case 0: return "8";
1016 case 1: return "16";
1017 case 2: return "32";
1018 case 3: return "64";
1019 default: vpanic("showARMNeonDataSize");
1020 }
1021 }
1022
showARMNeonDataSize(const ARMInstr * i)1023 static const HChar* showARMNeonDataSize ( const ARMInstr* i )
1024 {
1025 switch (i->tag) {
1026 case ARMin_NBinary:
1027 if (i->ARMin.NBinary.op == ARMneon_VEXT)
1028 return "8";
1029 if (i->ARMin.NBinary.op == ARMneon_VAND ||
1030 i->ARMin.NBinary.op == ARMneon_VORR ||
1031 i->ARMin.NBinary.op == ARMneon_VXOR)
1032 return "";
1033 return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
1034 case ARMin_NUnary:
1035 if (i->ARMin.NUnary.op == ARMneon_COPY ||
1036 i->ARMin.NUnary.op == ARMneon_NOT ||
1037 i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
1038 i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
1039 i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1040 i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1041 i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1042 i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
1043 i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
1044 i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
1045 i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
1046 i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
1047 return "";
1048 if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1049 i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1050 i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1051 UInt size;
1052 size = i->ARMin.NUnary.size;
1053 if (size & 0x40)
1054 return "64";
1055 if (size & 0x20)
1056 return "32";
1057 if (size & 0x10)
1058 return "16";
1059 if (size & 0x08)
1060 return "8";
1061 vpanic("showARMNeonDataSize");
1062 }
1063 return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
1064 case ARMin_NUnaryS:
1065 if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
1066 int size;
1067 size = i->ARMin.NUnaryS.size;
1068 if ((size & 1) == 1)
1069 return "8";
1070 if ((size & 3) == 2)
1071 return "16";
1072 if ((size & 7) == 4)
1073 return "32";
1074 vpanic("showARMNeonDataSize");
1075 }
1076 return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
1077 case ARMin_NShift:
1078 return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
1079 case ARMin_NDual:
1080 return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
1081 default:
1082 vpanic("showARMNeonDataSize");
1083 }
1084 }
1085
ARMInstr_Alu(ARMAluOp op,HReg dst,HReg argL,ARMRI84 * argR)1086 ARMInstr* ARMInstr_Alu ( ARMAluOp op,
1087 HReg dst, HReg argL, ARMRI84* argR ) {
1088 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1089 i->tag = ARMin_Alu;
1090 i->ARMin.Alu.op = op;
1091 i->ARMin.Alu.dst = dst;
1092 i->ARMin.Alu.argL = argL;
1093 i->ARMin.Alu.argR = argR;
1094 return i;
1095 }
ARMInstr_Shift(ARMShiftOp op,HReg dst,HReg argL,ARMRI5 * argR)1096 ARMInstr* ARMInstr_Shift ( ARMShiftOp op,
1097 HReg dst, HReg argL, ARMRI5* argR ) {
1098 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1099 i->tag = ARMin_Shift;
1100 i->ARMin.Shift.op = op;
1101 i->ARMin.Shift.dst = dst;
1102 i->ARMin.Shift.argL = argL;
1103 i->ARMin.Shift.argR = argR;
1104 return i;
1105 }
ARMInstr_Unary(ARMUnaryOp op,HReg dst,HReg src)1106 ARMInstr* ARMInstr_Unary ( ARMUnaryOp op, HReg dst, HReg src ) {
1107 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1108 i->tag = ARMin_Unary;
1109 i->ARMin.Unary.op = op;
1110 i->ARMin.Unary.dst = dst;
1111 i->ARMin.Unary.src = src;
1112 return i;
1113 }
ARMInstr_CmpOrTst(Bool isCmp,HReg argL,ARMRI84 * argR)1114 ARMInstr* ARMInstr_CmpOrTst ( Bool isCmp, HReg argL, ARMRI84* argR ) {
1115 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1116 i->tag = ARMin_CmpOrTst;
1117 i->ARMin.CmpOrTst.isCmp = isCmp;
1118 i->ARMin.CmpOrTst.argL = argL;
1119 i->ARMin.CmpOrTst.argR = argR;
1120 return i;
1121 }
ARMInstr_Mov(HReg dst,ARMRI84 * src)1122 ARMInstr* ARMInstr_Mov ( HReg dst, ARMRI84* src ) {
1123 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1124 i->tag = ARMin_Mov;
1125 i->ARMin.Mov.dst = dst;
1126 i->ARMin.Mov.src = src;
1127 return i;
1128 }
ARMInstr_Imm32(HReg dst,UInt imm32)1129 ARMInstr* ARMInstr_Imm32 ( HReg dst, UInt imm32 ) {
1130 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1131 i->tag = ARMin_Imm32;
1132 i->ARMin.Imm32.dst = dst;
1133 i->ARMin.Imm32.imm32 = imm32;
1134 return i;
1135 }
ARMInstr_LdSt32(ARMCondCode cc,Bool isLoad,HReg rD,ARMAMode1 * amode)1136 ARMInstr* ARMInstr_LdSt32 ( ARMCondCode cc,
1137 Bool isLoad, HReg rD, ARMAMode1* amode ) {
1138 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1139 i->tag = ARMin_LdSt32;
1140 i->ARMin.LdSt32.cc = cc;
1141 i->ARMin.LdSt32.isLoad = isLoad;
1142 i->ARMin.LdSt32.rD = rD;
1143 i->ARMin.LdSt32.amode = amode;
1144 vassert(cc != ARMcc_NV);
1145 return i;
1146 }
ARMInstr_LdSt16(ARMCondCode cc,Bool isLoad,Bool signedLoad,HReg rD,ARMAMode2 * amode)1147 ARMInstr* ARMInstr_LdSt16 ( ARMCondCode cc,
1148 Bool isLoad, Bool signedLoad,
1149 HReg rD, ARMAMode2* amode ) {
1150 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1151 i->tag = ARMin_LdSt16;
1152 i->ARMin.LdSt16.cc = cc;
1153 i->ARMin.LdSt16.isLoad = isLoad;
1154 i->ARMin.LdSt16.signedLoad = signedLoad;
1155 i->ARMin.LdSt16.rD = rD;
1156 i->ARMin.LdSt16.amode = amode;
1157 vassert(cc != ARMcc_NV);
1158 return i;
1159 }
ARMInstr_LdSt8U(ARMCondCode cc,Bool isLoad,HReg rD,ARMAMode1 * amode)1160 ARMInstr* ARMInstr_LdSt8U ( ARMCondCode cc,
1161 Bool isLoad, HReg rD, ARMAMode1* amode ) {
1162 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1163 i->tag = ARMin_LdSt8U;
1164 i->ARMin.LdSt8U.cc = cc;
1165 i->ARMin.LdSt8U.isLoad = isLoad;
1166 i->ARMin.LdSt8U.rD = rD;
1167 i->ARMin.LdSt8U.amode = amode;
1168 vassert(cc != ARMcc_NV);
1169 return i;
1170 }
ARMInstr_Ld8S(ARMCondCode cc,HReg rD,ARMAMode2 * amode)1171 ARMInstr* ARMInstr_Ld8S ( ARMCondCode cc, HReg rD, ARMAMode2* amode ) {
1172 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1173 i->tag = ARMin_Ld8S;
1174 i->ARMin.Ld8S.cc = cc;
1175 i->ARMin.Ld8S.rD = rD;
1176 i->ARMin.Ld8S.amode = amode;
1177 vassert(cc != ARMcc_NV);
1178 return i;
1179 }
ARMInstr_XDirect(Addr32 dstGA,ARMAMode1 * amR15T,ARMCondCode cond,Bool toFastEP)1180 ARMInstr* ARMInstr_XDirect ( Addr32 dstGA, ARMAMode1* amR15T,
1181 ARMCondCode cond, Bool toFastEP ) {
1182 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1183 i->tag = ARMin_XDirect;
1184 i->ARMin.XDirect.dstGA = dstGA;
1185 i->ARMin.XDirect.amR15T = amR15T;
1186 i->ARMin.XDirect.cond = cond;
1187 i->ARMin.XDirect.toFastEP = toFastEP;
1188 return i;
1189 }
ARMInstr_XIndir(HReg dstGA,ARMAMode1 * amR15T,ARMCondCode cond)1190 ARMInstr* ARMInstr_XIndir ( HReg dstGA, ARMAMode1* amR15T,
1191 ARMCondCode cond ) {
1192 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1193 i->tag = ARMin_XIndir;
1194 i->ARMin.XIndir.dstGA = dstGA;
1195 i->ARMin.XIndir.amR15T = amR15T;
1196 i->ARMin.XIndir.cond = cond;
1197 return i;
1198 }
ARMInstr_XAssisted(HReg dstGA,ARMAMode1 * amR15T,ARMCondCode cond,IRJumpKind jk)1199 ARMInstr* ARMInstr_XAssisted ( HReg dstGA, ARMAMode1* amR15T,
1200 ARMCondCode cond, IRJumpKind jk ) {
1201 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1202 i->tag = ARMin_XAssisted;
1203 i->ARMin.XAssisted.dstGA = dstGA;
1204 i->ARMin.XAssisted.amR15T = amR15T;
1205 i->ARMin.XAssisted.cond = cond;
1206 i->ARMin.XAssisted.jk = jk;
1207 return i;
1208 }
ARMInstr_CMov(ARMCondCode cond,HReg dst,ARMRI84 * src)1209 ARMInstr* ARMInstr_CMov ( ARMCondCode cond, HReg dst, ARMRI84* src ) {
1210 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1211 i->tag = ARMin_CMov;
1212 i->ARMin.CMov.cond = cond;
1213 i->ARMin.CMov.dst = dst;
1214 i->ARMin.CMov.src = src;
1215 vassert(cond != ARMcc_AL);
1216 return i;
1217 }
ARMInstr_Call(ARMCondCode cond,Addr32 target,Int nArgRegs,RetLoc rloc)1218 ARMInstr* ARMInstr_Call ( ARMCondCode cond, Addr32 target, Int nArgRegs,
1219 RetLoc rloc ) {
1220 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1221 i->tag = ARMin_Call;
1222 i->ARMin.Call.cond = cond;
1223 i->ARMin.Call.target = target;
1224 i->ARMin.Call.nArgRegs = nArgRegs;
1225 i->ARMin.Call.rloc = rloc;
1226 vassert(is_sane_RetLoc(rloc));
1227 return i;
1228 }
ARMInstr_Mul(ARMMulOp op)1229 ARMInstr* ARMInstr_Mul ( ARMMulOp op ) {
1230 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1231 i->tag = ARMin_Mul;
1232 i->ARMin.Mul.op = op;
1233 return i;
1234 }
ARMInstr_LdrEX(Int szB)1235 ARMInstr* ARMInstr_LdrEX ( Int szB ) {
1236 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1237 i->tag = ARMin_LdrEX;
1238 i->ARMin.LdrEX.szB = szB;
1239 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1240 return i;
1241 }
ARMInstr_StrEX(Int szB)1242 ARMInstr* ARMInstr_StrEX ( Int szB ) {
1243 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1244 i->tag = ARMin_StrEX;
1245 i->ARMin.StrEX.szB = szB;
1246 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1247 return i;
1248 }
ARMInstr_VLdStD(Bool isLoad,HReg dD,ARMAModeV * am)1249 ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg dD, ARMAModeV* am ) {
1250 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1251 i->tag = ARMin_VLdStD;
1252 i->ARMin.VLdStD.isLoad = isLoad;
1253 i->ARMin.VLdStD.dD = dD;
1254 i->ARMin.VLdStD.amode = am;
1255 return i;
1256 }
ARMInstr_VLdStS(Bool isLoad,HReg fD,ARMAModeV * am)1257 ARMInstr* ARMInstr_VLdStS ( Bool isLoad, HReg fD, ARMAModeV* am ) {
1258 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1259 i->tag = ARMin_VLdStS;
1260 i->ARMin.VLdStS.isLoad = isLoad;
1261 i->ARMin.VLdStS.fD = fD;
1262 i->ARMin.VLdStS.amode = am;
1263 return i;
1264 }
ARMInstr_VAluD(ARMVfpOp op,HReg dst,HReg argL,HReg argR)1265 ARMInstr* ARMInstr_VAluD ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1266 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1267 i->tag = ARMin_VAluD;
1268 i->ARMin.VAluD.op = op;
1269 i->ARMin.VAluD.dst = dst;
1270 i->ARMin.VAluD.argL = argL;
1271 i->ARMin.VAluD.argR = argR;
1272 return i;
1273 }
ARMInstr_VAluS(ARMVfpOp op,HReg dst,HReg argL,HReg argR)1274 ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1275 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1276 i->tag = ARMin_VAluS;
1277 i->ARMin.VAluS.op = op;
1278 i->ARMin.VAluS.dst = dst;
1279 i->ARMin.VAluS.argL = argL;
1280 i->ARMin.VAluS.argR = argR;
1281 return i;
1282 }
ARMInstr_VUnaryD(ARMVfpUnaryOp op,HReg dst,HReg src)1283 ARMInstr* ARMInstr_VUnaryD ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1284 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1285 i->tag = ARMin_VUnaryD;
1286 i->ARMin.VUnaryD.op = op;
1287 i->ARMin.VUnaryD.dst = dst;
1288 i->ARMin.VUnaryD.src = src;
1289 return i;
1290 }
ARMInstr_VUnaryS(ARMVfpUnaryOp op,HReg dst,HReg src)1291 ARMInstr* ARMInstr_VUnaryS ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1292 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1293 i->tag = ARMin_VUnaryS;
1294 i->ARMin.VUnaryS.op = op;
1295 i->ARMin.VUnaryS.dst = dst;
1296 i->ARMin.VUnaryS.src = src;
1297 return i;
1298 }
ARMInstr_VCmpD(HReg argL,HReg argR)1299 ARMInstr* ARMInstr_VCmpD ( HReg argL, HReg argR ) {
1300 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1301 i->tag = ARMin_VCmpD;
1302 i->ARMin.VCmpD.argL = argL;
1303 i->ARMin.VCmpD.argR = argR;
1304 return i;
1305 }
ARMInstr_VCMovD(ARMCondCode cond,HReg dst,HReg src)1306 ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) {
1307 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1308 i->tag = ARMin_VCMovD;
1309 i->ARMin.VCMovD.cond = cond;
1310 i->ARMin.VCMovD.dst = dst;
1311 i->ARMin.VCMovD.src = src;
1312 vassert(cond != ARMcc_AL);
1313 return i;
1314 }
ARMInstr_VCMovS(ARMCondCode cond,HReg dst,HReg src)1315 ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) {
1316 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1317 i->tag = ARMin_VCMovS;
1318 i->ARMin.VCMovS.cond = cond;
1319 i->ARMin.VCMovS.dst = dst;
1320 i->ARMin.VCMovS.src = src;
1321 vassert(cond != ARMcc_AL);
1322 return i;
1323 }
ARMInstr_VCvtSD(Bool sToD,HReg dst,HReg src)1324 ARMInstr* ARMInstr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
1325 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1326 i->tag = ARMin_VCvtSD;
1327 i->ARMin.VCvtSD.sToD = sToD;
1328 i->ARMin.VCvtSD.dst = dst;
1329 i->ARMin.VCvtSD.src = src;
1330 return i;
1331 }
ARMInstr_VXferD(Bool toD,HReg dD,HReg rHi,HReg rLo)1332 ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
1333 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1334 i->tag = ARMin_VXferD;
1335 i->ARMin.VXferD.toD = toD;
1336 i->ARMin.VXferD.dD = dD;
1337 i->ARMin.VXferD.rHi = rHi;
1338 i->ARMin.VXferD.rLo = rLo;
1339 return i;
1340 }
ARMInstr_VXferS(Bool toS,HReg fD,HReg rLo)1341 ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) {
1342 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1343 i->tag = ARMin_VXferS;
1344 i->ARMin.VXferS.toS = toS;
1345 i->ARMin.VXferS.fD = fD;
1346 i->ARMin.VXferS.rLo = rLo;
1347 return i;
1348 }
ARMInstr_VCvtID(Bool iToD,Bool syned,HReg dst,HReg src)1349 ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
1350 HReg dst, HReg src ) {
1351 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1352 i->tag = ARMin_VCvtID;
1353 i->ARMin.VCvtID.iToD = iToD;
1354 i->ARMin.VCvtID.syned = syned;
1355 i->ARMin.VCvtID.dst = dst;
1356 i->ARMin.VCvtID.src = src;
1357 return i;
1358 }
ARMInstr_FPSCR(Bool toFPSCR,HReg iReg)1359 ARMInstr* ARMInstr_FPSCR ( Bool toFPSCR, HReg iReg ) {
1360 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1361 i->tag = ARMin_FPSCR;
1362 i->ARMin.FPSCR.toFPSCR = toFPSCR;
1363 i->ARMin.FPSCR.iReg = iReg;
1364 return i;
1365 }
ARMInstr_MFence(void)1366 ARMInstr* ARMInstr_MFence ( void ) {
1367 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1368 i->tag = ARMin_MFence;
1369 return i;
1370 }
ARMInstr_CLREX(void)1371 ARMInstr* ARMInstr_CLREX( void ) {
1372 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1373 i->tag = ARMin_CLREX;
1374 return i;
1375 }
1376
ARMInstr_NLdStQ(Bool isLoad,HReg dQ,ARMAModeN * amode)1377 ARMInstr* ARMInstr_NLdStQ ( Bool isLoad, HReg dQ, ARMAModeN *amode ) {
1378 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1379 i->tag = ARMin_NLdStQ;
1380 i->ARMin.NLdStQ.isLoad = isLoad;
1381 i->ARMin.NLdStQ.dQ = dQ;
1382 i->ARMin.NLdStQ.amode = amode;
1383 return i;
1384 }
1385
ARMInstr_NLdStD(Bool isLoad,HReg dD,ARMAModeN * amode)1386 ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
1387 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1388 i->tag = ARMin_NLdStD;
1389 i->ARMin.NLdStD.isLoad = isLoad;
1390 i->ARMin.NLdStD.dD = dD;
1391 i->ARMin.NLdStD.amode = amode;
1392 return i;
1393 }
1394
ARMInstr_NUnary(ARMNeonUnOp op,HReg dQ,HReg nQ,UInt size,Bool Q)1395 ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
1396 UInt size, Bool Q ) {
1397 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1398 i->tag = ARMin_NUnary;
1399 i->ARMin.NUnary.op = op;
1400 i->ARMin.NUnary.src = nQ;
1401 i->ARMin.NUnary.dst = dQ;
1402 i->ARMin.NUnary.size = size;
1403 i->ARMin.NUnary.Q = Q;
1404 return i;
1405 }
1406
ARMInstr_NUnaryS(ARMNeonUnOpS op,ARMNRS * dst,ARMNRS * src,UInt size,Bool Q)1407 ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS op, ARMNRS* dst, ARMNRS* src,
1408 UInt size, Bool Q ) {
1409 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1410 i->tag = ARMin_NUnaryS;
1411 i->ARMin.NUnaryS.op = op;
1412 i->ARMin.NUnaryS.src = src;
1413 i->ARMin.NUnaryS.dst = dst;
1414 i->ARMin.NUnaryS.size = size;
1415 i->ARMin.NUnaryS.Q = Q;
1416 return i;
1417 }
1418
ARMInstr_NDual(ARMNeonDualOp op,HReg nQ,HReg mQ,UInt size,Bool Q)1419 ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
1420 UInt size, Bool Q ) {
1421 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1422 i->tag = ARMin_NDual;
1423 i->ARMin.NDual.op = op;
1424 i->ARMin.NDual.arg1 = nQ;
1425 i->ARMin.NDual.arg2 = mQ;
1426 i->ARMin.NDual.size = size;
1427 i->ARMin.NDual.Q = Q;
1428 return i;
1429 }
1430
ARMInstr_NBinary(ARMNeonBinOp op,HReg dst,HReg argL,HReg argR,UInt size,Bool Q)1431 ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
1432 HReg dst, HReg argL, HReg argR,
1433 UInt size, Bool Q ) {
1434 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1435 i->tag = ARMin_NBinary;
1436 i->ARMin.NBinary.op = op;
1437 i->ARMin.NBinary.argL = argL;
1438 i->ARMin.NBinary.argR = argR;
1439 i->ARMin.NBinary.dst = dst;
1440 i->ARMin.NBinary.size = size;
1441 i->ARMin.NBinary.Q = Q;
1442 return i;
1443 }
1444
ARMInstr_NeonImm(HReg dst,ARMNImm * imm)1445 ARMInstr* ARMInstr_NeonImm (HReg dst, ARMNImm* imm ) {
1446 ARMInstr *i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1447 i->tag = ARMin_NeonImm;
1448 i->ARMin.NeonImm.dst = dst;
1449 i->ARMin.NeonImm.imm = imm;
1450 return i;
1451 }
1452
ARMInstr_NCMovQ(ARMCondCode cond,HReg dst,HReg src)1453 ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
1454 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1455 i->tag = ARMin_NCMovQ;
1456 i->ARMin.NCMovQ.cond = cond;
1457 i->ARMin.NCMovQ.dst = dst;
1458 i->ARMin.NCMovQ.src = src;
1459 vassert(cond != ARMcc_AL);
1460 return i;
1461 }
1462
ARMInstr_NShift(ARMNeonShiftOp op,HReg dst,HReg argL,HReg argR,UInt size,Bool Q)1463 ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
1464 HReg dst, HReg argL, HReg argR,
1465 UInt size, Bool Q ) {
1466 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1467 i->tag = ARMin_NShift;
1468 i->ARMin.NShift.op = op;
1469 i->ARMin.NShift.argL = argL;
1470 i->ARMin.NShift.argR = argR;
1471 i->ARMin.NShift.dst = dst;
1472 i->ARMin.NShift.size = size;
1473 i->ARMin.NShift.Q = Q;
1474 return i;
1475 }
1476
ARMInstr_NShl64(HReg dst,HReg src,UInt amt)1477 ARMInstr* ARMInstr_NShl64 ( HReg dst, HReg src, UInt amt )
1478 {
1479 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1480 i->tag = ARMin_NShl64;
1481 i->ARMin.NShl64.dst = dst;
1482 i->ARMin.NShl64.src = src;
1483 i->ARMin.NShl64.amt = amt;
1484 vassert(amt >= 1 && amt <= 63);
1485 return i;
1486 }
1487
1488 /* Helper copy-pasted from isel.c */
fitsIn8x4(UInt * u8,UInt * u4,UInt u)1489 static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
1490 {
1491 UInt i;
1492 for (i = 0; i < 16; i++) {
1493 if (0 == (u & 0xFFFFFF00)) {
1494 *u8 = u;
1495 *u4 = i;
1496 return True;
1497 }
1498 u = ROR32(u, 30);
1499 }
1500 vassert(i == 16);
1501 return False;
1502 }
1503
ARMInstr_Add32(HReg rD,HReg rN,UInt imm32)1504 ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
1505 UInt u8, u4;
1506 ARMInstr *i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1507 /* Try to generate single ADD if possible */
1508 if (fitsIn8x4(&u8, &u4, imm32)) {
1509 i->tag = ARMin_Alu;
1510 i->ARMin.Alu.op = ARMalu_ADD;
1511 i->ARMin.Alu.dst = rD;
1512 i->ARMin.Alu.argL = rN;
1513 i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
1514 } else {
1515 i->tag = ARMin_Add32;
1516 i->ARMin.Add32.rD = rD;
1517 i->ARMin.Add32.rN = rN;
1518 i->ARMin.Add32.imm32 = imm32;
1519 }
1520 return i;
1521 }
1522
ARMInstr_EvCheck(ARMAMode1 * amCounter,ARMAMode1 * amFailAddr)1523 ARMInstr* ARMInstr_EvCheck ( ARMAMode1* amCounter,
1524 ARMAMode1* amFailAddr ) {
1525 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1526 i->tag = ARMin_EvCheck;
1527 i->ARMin.EvCheck.amCounter = amCounter;
1528 i->ARMin.EvCheck.amFailAddr = amFailAddr;
1529 return i;
1530 }
1531
ARMInstr_ProfInc(void)1532 ARMInstr* ARMInstr_ProfInc ( void ) {
1533 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1534 i->tag = ARMin_ProfInc;
1535 return i;
1536 }
1537
1538 /* ... */
1539
ppARMInstr(const ARMInstr * i)1540 void ppARMInstr ( const ARMInstr* i ) {
1541 switch (i->tag) {
1542 case ARMin_Alu:
1543 vex_printf("%-4s ", showARMAluOp(i->ARMin.Alu.op));
1544 ppHRegARM(i->ARMin.Alu.dst);
1545 vex_printf(", ");
1546 ppHRegARM(i->ARMin.Alu.argL);
1547 vex_printf(", ");
1548 ppARMRI84(i->ARMin.Alu.argR);
1549 return;
1550 case ARMin_Shift:
1551 vex_printf("%s ", showARMShiftOp(i->ARMin.Shift.op));
1552 ppHRegARM(i->ARMin.Shift.dst);
1553 vex_printf(", ");
1554 ppHRegARM(i->ARMin.Shift.argL);
1555 vex_printf(", ");
1556 ppARMRI5(i->ARMin.Shift.argR);
1557 return;
1558 case ARMin_Unary:
1559 vex_printf("%s ", showARMUnaryOp(i->ARMin.Unary.op));
1560 ppHRegARM(i->ARMin.Unary.dst);
1561 vex_printf(", ");
1562 ppHRegARM(i->ARMin.Unary.src);
1563 return;
1564 case ARMin_CmpOrTst:
1565 vex_printf("%s ", i->ARMin.CmpOrTst.isCmp ? "cmp" : "tst");
1566 ppHRegARM(i->ARMin.CmpOrTst.argL);
1567 vex_printf(", ");
1568 ppARMRI84(i->ARMin.CmpOrTst.argR);
1569 return;
1570 case ARMin_Mov:
1571 vex_printf("mov ");
1572 ppHRegARM(i->ARMin.Mov.dst);
1573 vex_printf(", ");
1574 ppARMRI84(i->ARMin.Mov.src);
1575 return;
1576 case ARMin_Imm32:
1577 vex_printf("imm ");
1578 ppHRegARM(i->ARMin.Imm32.dst);
1579 vex_printf(", 0x%x", i->ARMin.Imm32.imm32);
1580 return;
1581 case ARMin_LdSt32:
1582 if (i->ARMin.LdSt32.isLoad) {
1583 vex_printf("ldr%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? " "
1584 : showARMCondCode(i->ARMin.LdSt32.cc));
1585 ppHRegARM(i->ARMin.LdSt32.rD);
1586 vex_printf(", ");
1587 ppARMAMode1(i->ARMin.LdSt32.amode);
1588 } else {
1589 vex_printf("str%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? " "
1590 : showARMCondCode(i->ARMin.LdSt32.cc));
1591 ppARMAMode1(i->ARMin.LdSt32.amode);
1592 vex_printf(", ");
1593 ppHRegARM(i->ARMin.LdSt32.rD);
1594 }
1595 return;
1596 case ARMin_LdSt16:
1597 if (i->ARMin.LdSt16.isLoad) {
1598 vex_printf("%s%s%s",
1599 i->ARMin.LdSt16.signedLoad ? "ldrsh" : "ldrh",
1600 i->ARMin.LdSt16.cc == ARMcc_AL ? " "
1601 : showARMCondCode(i->ARMin.LdSt16.cc),
1602 i->ARMin.LdSt16.signedLoad ? " " : " ");
1603 ppHRegARM(i->ARMin.LdSt16.rD);
1604 vex_printf(", ");
1605 ppARMAMode2(i->ARMin.LdSt16.amode);
1606 } else {
1607 vex_printf("strh%s ",
1608 i->ARMin.LdSt16.cc == ARMcc_AL ? " "
1609 : showARMCondCode(i->ARMin.LdSt16.cc));
1610 ppARMAMode2(i->ARMin.LdSt16.amode);
1611 vex_printf(", ");
1612 ppHRegARM(i->ARMin.LdSt16.rD);
1613 }
1614 return;
1615 case ARMin_LdSt8U:
1616 if (i->ARMin.LdSt8U.isLoad) {
1617 vex_printf("ldrb%s ", i->ARMin.LdSt8U.cc == ARMcc_AL ? " "
1618 : showARMCondCode(i->ARMin.LdSt8U.cc));
1619 ppHRegARM(i->ARMin.LdSt8U.rD);
1620 vex_printf(", ");
1621 ppARMAMode1(i->ARMin.LdSt8U.amode);
1622 } else {
1623 vex_printf("strb%s ", i->ARMin.LdSt8U.cc == ARMcc_AL ? " "
1624 : showARMCondCode(i->ARMin.LdSt8U.cc));
1625 ppARMAMode1(i->ARMin.LdSt8U.amode);
1626 vex_printf(", ");
1627 ppHRegARM(i->ARMin.LdSt8U.rD);
1628 }
1629 return;
1630 case ARMin_Ld8S:
1631 vex_printf("ldrsb%s ", i->ARMin.Ld8S.cc == ARMcc_AL ? " "
1632 : showARMCondCode(i->ARMin.Ld8S.cc));
1633 ppARMAMode2(i->ARMin.Ld8S.amode);
1634 vex_printf(", ");
1635 ppHRegARM(i->ARMin.Ld8S.rD);
1636 return;
1637 case ARMin_XDirect:
1638 vex_printf("(xDirect) ");
1639 vex_printf("if (%%cpsr.%s) { ",
1640 showARMCondCode(i->ARMin.XDirect.cond));
1641 vex_printf("movw r12,0x%x; ",
1642 (UInt)(i->ARMin.XDirect.dstGA & 0xFFFF));
1643 vex_printf("movt r12,0x%x; ",
1644 (UInt)((i->ARMin.XDirect.dstGA >> 16) & 0xFFFF));
1645 vex_printf("str r12,");
1646 ppARMAMode1(i->ARMin.XDirect.amR15T);
1647 vex_printf("; movw r12,LO16($disp_cp_chain_me_to_%sEP); ",
1648 i->ARMin.XDirect.toFastEP ? "fast" : "slow");
1649 vex_printf("movt r12,HI16($disp_cp_chain_me_to_%sEP); ",
1650 i->ARMin.XDirect.toFastEP ? "fast" : "slow");
1651 vex_printf("blx r12 }");
1652 return;
1653 case ARMin_XIndir:
1654 vex_printf("(xIndir) ");
1655 vex_printf("if (%%cpsr.%s) { ",
1656 showARMCondCode(i->ARMin.XIndir.cond));
1657 vex_printf("str ");
1658 ppHRegARM(i->ARMin.XIndir.dstGA);
1659 vex_printf(",");
1660 ppARMAMode1(i->ARMin.XIndir.amR15T);
1661 vex_printf("; movw r12,LO16($disp_cp_xindir); ");
1662 vex_printf("movt r12,HI16($disp_cp_xindir); ");
1663 vex_printf("blx r12 }");
1664 return;
1665 case ARMin_XAssisted:
1666 vex_printf("(xAssisted) ");
1667 vex_printf("if (%%cpsr.%s) { ",
1668 showARMCondCode(i->ARMin.XAssisted.cond));
1669 vex_printf("str ");
1670 ppHRegARM(i->ARMin.XAssisted.dstGA);
1671 vex_printf(",");
1672 ppARMAMode1(i->ARMin.XAssisted.amR15T);
1673 vex_printf("movw r8,$IRJumpKind_to_TRCVAL(%d); ",
1674 (Int)i->ARMin.XAssisted.jk);
1675 vex_printf("movw r12,LO16($disp_cp_xassisted); ");
1676 vex_printf("movt r12,HI16($disp_cp_xassisted); ");
1677 vex_printf("blx r12 }");
1678 return;
1679 case ARMin_CMov:
1680 vex_printf("mov%s ", showARMCondCode(i->ARMin.CMov.cond));
1681 ppHRegARM(i->ARMin.CMov.dst);
1682 vex_printf(", ");
1683 ppARMRI84(i->ARMin.CMov.src);
1684 return;
1685 case ARMin_Call:
1686 vex_printf("call%s ",
1687 i->ARMin.Call.cond==ARMcc_AL
1688 ? "" : showARMCondCode(i->ARMin.Call.cond));
1689 vex_printf("0x%x [nArgRegs=%d, ",
1690 i->ARMin.Call.target, i->ARMin.Call.nArgRegs);
1691 ppRetLoc(i->ARMin.Call.rloc);
1692 vex_printf("]");
1693 return;
1694 case ARMin_Mul:
1695 vex_printf("%-5s ", showARMMulOp(i->ARMin.Mul.op));
1696 if (i->ARMin.Mul.op == ARMmul_PLAIN) {
1697 vex_printf("r0, r2, r3");
1698 } else {
1699 vex_printf("r1:r0, r2, r3");
1700 }
1701 return;
1702 case ARMin_LdrEX: {
1703 const HChar* sz = "";
1704 switch (i->ARMin.LdrEX.szB) {
1705 case 1: sz = "b"; break; case 2: sz = "h"; break;
1706 case 8: sz = "d"; break; case 4: break;
1707 default: vassert(0);
1708 }
1709 vex_printf("ldrex%s %sr2, [r4]",
1710 sz, i->ARMin.LdrEX.szB == 8 ? "r3:" : "");
1711 return;
1712 }
1713 case ARMin_StrEX: {
1714 const HChar* sz = "";
1715 switch (i->ARMin.StrEX.szB) {
1716 case 1: sz = "b"; break; case 2: sz = "h"; break;
1717 case 8: sz = "d"; break; case 4: break;
1718 default: vassert(0);
1719 }
1720 vex_printf("strex%s r0, %sr2, [r4]",
1721 sz, i->ARMin.StrEX.szB == 8 ? "r3:" : "");
1722 return;
1723 }
1724 case ARMin_VLdStD:
1725 if (i->ARMin.VLdStD.isLoad) {
1726 vex_printf("fldd ");
1727 ppHRegARM(i->ARMin.VLdStD.dD);
1728 vex_printf(", ");
1729 ppARMAModeV(i->ARMin.VLdStD.amode);
1730 } else {
1731 vex_printf("fstd ");
1732 ppARMAModeV(i->ARMin.VLdStD.amode);
1733 vex_printf(", ");
1734 ppHRegARM(i->ARMin.VLdStD.dD);
1735 }
1736 return;
1737 case ARMin_VLdStS:
1738 if (i->ARMin.VLdStS.isLoad) {
1739 vex_printf("flds ");
1740 ppHRegARM(i->ARMin.VLdStS.fD);
1741 vex_printf(", ");
1742 ppARMAModeV(i->ARMin.VLdStS.amode);
1743 } else {
1744 vex_printf("fsts ");
1745 ppARMAModeV(i->ARMin.VLdStS.amode);
1746 vex_printf(", ");
1747 ppHRegARM(i->ARMin.VLdStS.fD);
1748 }
1749 return;
1750 case ARMin_VAluD:
1751 vex_printf("f%-3sd ", showARMVfpOp(i->ARMin.VAluD.op));
1752 ppHRegARM(i->ARMin.VAluD.dst);
1753 vex_printf(", ");
1754 ppHRegARM(i->ARMin.VAluD.argL);
1755 vex_printf(", ");
1756 ppHRegARM(i->ARMin.VAluD.argR);
1757 return;
1758 case ARMin_VAluS:
1759 vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
1760 ppHRegARM(i->ARMin.VAluS.dst);
1761 vex_printf(", ");
1762 ppHRegARM(i->ARMin.VAluS.argL);
1763 vex_printf(", ");
1764 ppHRegARM(i->ARMin.VAluS.argR);
1765 return;
1766 case ARMin_VUnaryD:
1767 vex_printf("f%-3sd ", showARMVfpUnaryOp(i->ARMin.VUnaryD.op));
1768 ppHRegARM(i->ARMin.VUnaryD.dst);
1769 vex_printf(", ");
1770 ppHRegARM(i->ARMin.VUnaryD.src);
1771 return;
1772 case ARMin_VUnaryS:
1773 vex_printf("f%-3ss ", showARMVfpUnaryOp(i->ARMin.VUnaryS.op));
1774 ppHRegARM(i->ARMin.VUnaryS.dst);
1775 vex_printf(", ");
1776 ppHRegARM(i->ARMin.VUnaryS.src);
1777 return;
1778 case ARMin_VCmpD:
1779 vex_printf("fcmpd ");
1780 ppHRegARM(i->ARMin.VCmpD.argL);
1781 vex_printf(", ");
1782 ppHRegARM(i->ARMin.VCmpD.argR);
1783 vex_printf(" ; fmstat");
1784 return;
1785 case ARMin_VCMovD:
1786 vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond));
1787 ppHRegARM(i->ARMin.VCMovD.dst);
1788 vex_printf(", ");
1789 ppHRegARM(i->ARMin.VCMovD.src);
1790 return;
1791 case ARMin_VCMovS:
1792 vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond));
1793 ppHRegARM(i->ARMin.VCMovS.dst);
1794 vex_printf(", ");
1795 ppHRegARM(i->ARMin.VCMovS.src);
1796 return;
1797 case ARMin_VCvtSD:
1798 vex_printf("fcvt%s ", i->ARMin.VCvtSD.sToD ? "ds" : "sd");
1799 ppHRegARM(i->ARMin.VCvtSD.dst);
1800 vex_printf(", ");
1801 ppHRegARM(i->ARMin.VCvtSD.src);
1802 return;
1803 case ARMin_VXferD:
1804 vex_printf("vmov ");
1805 if (i->ARMin.VXferD.toD) {
1806 ppHRegARM(i->ARMin.VXferD.dD);
1807 vex_printf(", ");
1808 ppHRegARM(i->ARMin.VXferD.rLo);
1809 vex_printf(", ");
1810 ppHRegARM(i->ARMin.VXferD.rHi);
1811 } else {
1812 ppHRegARM(i->ARMin.VXferD.rLo);
1813 vex_printf(", ");
1814 ppHRegARM(i->ARMin.VXferD.rHi);
1815 vex_printf(", ");
1816 ppHRegARM(i->ARMin.VXferD.dD);
1817 }
1818 return;
1819 case ARMin_VXferS:
1820 vex_printf("vmov ");
1821 if (i->ARMin.VXferS.toS) {
1822 ppHRegARM(i->ARMin.VXferS.fD);
1823 vex_printf(", ");
1824 ppHRegARM(i->ARMin.VXferS.rLo);
1825 } else {
1826 ppHRegARM(i->ARMin.VXferS.rLo);
1827 vex_printf(", ");
1828 ppHRegARM(i->ARMin.VXferS.fD);
1829 }
1830 return;
1831 case ARMin_VCvtID: {
1832 const HChar* nm = "?";
1833 if (i->ARMin.VCvtID.iToD) {
1834 nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod";
1835 } else {
1836 nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid";
1837 }
1838 vex_printf("%s ", nm);
1839 ppHRegARM(i->ARMin.VCvtID.dst);
1840 vex_printf(", ");
1841 ppHRegARM(i->ARMin.VCvtID.src);
1842 return;
1843 }
1844 case ARMin_FPSCR:
1845 if (i->ARMin.FPSCR.toFPSCR) {
1846 vex_printf("fmxr fpscr, ");
1847 ppHRegARM(i->ARMin.FPSCR.iReg);
1848 } else {
1849 vex_printf("fmrx ");
1850 ppHRegARM(i->ARMin.FPSCR.iReg);
1851 vex_printf(", fpscr");
1852 }
1853 return;
1854 case ARMin_MFence:
1855 vex_printf("(mfence) dsb sy; dmb sy; isb");
1856 return;
1857 case ARMin_CLREX:
1858 vex_printf("clrex");
1859 return;
1860 case ARMin_NLdStQ:
1861 if (i->ARMin.NLdStQ.isLoad)
1862 vex_printf("vld1.32 {");
1863 else
1864 vex_printf("vst1.32 {");
1865 ppHRegARM(i->ARMin.NLdStQ.dQ);
1866 vex_printf("} ");
1867 ppARMAModeN(i->ARMin.NLdStQ.amode);
1868 return;
1869 case ARMin_NLdStD:
1870 if (i->ARMin.NLdStD.isLoad)
1871 vex_printf("vld1.32 {");
1872 else
1873 vex_printf("vst1.32 {");
1874 ppHRegARM(i->ARMin.NLdStD.dD);
1875 vex_printf("} ");
1876 ppARMAModeN(i->ARMin.NLdStD.amode);
1877 return;
1878 case ARMin_NUnary:
1879 vex_printf("%s%s%s ",
1880 showARMNeonUnOp(i->ARMin.NUnary.op),
1881 showARMNeonUnOpDataType(i->ARMin.NUnary.op),
1882 showARMNeonDataSize(i));
1883 ppHRegARM(i->ARMin.NUnary.dst);
1884 vex_printf(", ");
1885 ppHRegARM(i->ARMin.NUnary.src);
1886 if (i->ARMin.NUnary.op == ARMneon_EQZ)
1887 vex_printf(", #0");
1888 if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1889 i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1890 i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1891 i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
1892 vex_printf(", #%u", i->ARMin.NUnary.size);
1893 }
1894 if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1895 i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1896 i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1897 UInt size;
1898 size = i->ARMin.NUnary.size;
1899 if (size & 0x40) {
1900 vex_printf(", #%u", size - 64);
1901 } else if (size & 0x20) {
1902 vex_printf(", #%u", size - 32);
1903 } else if (size & 0x10) {
1904 vex_printf(", #%u", size - 16);
1905 } else if (size & 0x08) {
1906 vex_printf(", #%u", size - 8);
1907 }
1908 }
1909 return;
1910 case ARMin_NUnaryS:
1911 vex_printf("%s%s%s ",
1912 showARMNeonUnOpS(i->ARMin.NUnaryS.op),
1913 showARMNeonUnOpSDataType(i->ARMin.NUnaryS.op),
1914 showARMNeonDataSize(i));
1915 ppARMNRS(i->ARMin.NUnaryS.dst);
1916 vex_printf(", ");
1917 ppARMNRS(i->ARMin.NUnaryS.src);
1918 return;
1919 case ARMin_NShift:
1920 vex_printf("%s%s%s ",
1921 showARMNeonShiftOp(i->ARMin.NShift.op),
1922 showARMNeonShiftOpDataType(i->ARMin.NShift.op),
1923 showARMNeonDataSize(i));
1924 ppHRegARM(i->ARMin.NShift.dst);
1925 vex_printf(", ");
1926 ppHRegARM(i->ARMin.NShift.argL);
1927 vex_printf(", ");
1928 ppHRegARM(i->ARMin.NShift.argR);
1929 return;
1930 case ARMin_NShl64:
1931 vex_printf("vshl.i64 ");
1932 ppHRegARM(i->ARMin.NShl64.dst);
1933 vex_printf(", ");
1934 ppHRegARM(i->ARMin.NShl64.src);
1935 vex_printf(", #%u", i->ARMin.NShl64.amt);
1936 return;
1937 case ARMin_NDual:
1938 vex_printf("%s%s%s ",
1939 showARMNeonDualOp(i->ARMin.NDual.op),
1940 showARMNeonDualOpDataType(i->ARMin.NDual.op),
1941 showARMNeonDataSize(i));
1942 ppHRegARM(i->ARMin.NDual.arg1);
1943 vex_printf(", ");
1944 ppHRegARM(i->ARMin.NDual.arg2);
1945 return;
1946 case ARMin_NBinary:
1947 vex_printf("%s%s%s",
1948 showARMNeonBinOp(i->ARMin.NBinary.op),
1949 showARMNeonBinOpDataType(i->ARMin.NBinary.op),
1950 showARMNeonDataSize(i));
1951 vex_printf(" ");
1952 ppHRegARM(i->ARMin.NBinary.dst);
1953 vex_printf(", ");
1954 ppHRegARM(i->ARMin.NBinary.argL);
1955 vex_printf(", ");
1956 ppHRegARM(i->ARMin.NBinary.argR);
1957 return;
1958 case ARMin_NeonImm:
1959 vex_printf("vmov ");
1960 ppHRegARM(i->ARMin.NeonImm.dst);
1961 vex_printf(", ");
1962 ppARMNImm(i->ARMin.NeonImm.imm);
1963 return;
1964 case ARMin_NCMovQ:
1965 vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
1966 ppHRegARM(i->ARMin.NCMovQ.dst);
1967 vex_printf(", ");
1968 ppHRegARM(i->ARMin.NCMovQ.src);
1969 return;
1970 case ARMin_Add32:
1971 vex_printf("add32 ");
1972 ppHRegARM(i->ARMin.Add32.rD);
1973 vex_printf(", ");
1974 ppHRegARM(i->ARMin.Add32.rN);
1975 vex_printf(", ");
1976 vex_printf("%u", i->ARMin.Add32.imm32);
1977 return;
1978 case ARMin_EvCheck:
1979 vex_printf("(evCheck) ldr r12,");
1980 ppARMAMode1(i->ARMin.EvCheck.amCounter);
1981 vex_printf("; subs r12,r12,$1; str r12,");
1982 ppARMAMode1(i->ARMin.EvCheck.amCounter);
1983 vex_printf("; bpl nofail; ldr r12,");
1984 ppARMAMode1(i->ARMin.EvCheck.amFailAddr);
1985 vex_printf("; bx r12; nofail:");
1986 return;
1987 case ARMin_ProfInc:
1988 vex_printf("(profInc) movw r12,LO16($NotKnownYet); "
1989 "movw r12,HI16($NotKnownYet); "
1990 "ldr r11,[r12]; "
1991 "adds r11,r11,$1; "
1992 "str r11,[r12]; "
1993 "ldr r11,[r12+4]; "
1994 "adc r11,r11,$0; "
1995 "str r11,[r12+4]");
1996 return;
1997 default:
1998 vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
1999 vpanic("ppARMInstr(1)");
2000 return;
2001 }
2002 }
2003
2004
2005 /* --------- Helpers for register allocation. --------- */
2006
getRegUsage_ARMInstr(HRegUsage * u,const ARMInstr * i,Bool mode64)2007 void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 )
2008 {
2009 vassert(mode64 == False);
2010 initHRegUsage(u);
2011 switch (i->tag) {
2012 case ARMin_Alu:
2013 addHRegUse(u, HRmWrite, i->ARMin.Alu.dst);
2014 addHRegUse(u, HRmRead, i->ARMin.Alu.argL);
2015 addRegUsage_ARMRI84(u, i->ARMin.Alu.argR);
2016 return;
2017 case ARMin_Shift:
2018 addHRegUse(u, HRmWrite, i->ARMin.Shift.dst);
2019 addHRegUse(u, HRmRead, i->ARMin.Shift.argL);
2020 addRegUsage_ARMRI5(u, i->ARMin.Shift.argR);
2021 return;
2022 case ARMin_Unary:
2023 addHRegUse(u, HRmWrite, i->ARMin.Unary.dst);
2024 addHRegUse(u, HRmRead, i->ARMin.Unary.src);
2025 return;
2026 case ARMin_CmpOrTst:
2027 addHRegUse(u, HRmRead, i->ARMin.CmpOrTst.argL);
2028 addRegUsage_ARMRI84(u, i->ARMin.CmpOrTst.argR);
2029 return;
2030 case ARMin_Mov:
2031 addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
2032 addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
2033 return;
2034 case ARMin_Imm32:
2035 addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
2036 return;
2037 case ARMin_LdSt32:
2038 addRegUsage_ARMAMode1(u, i->ARMin.LdSt32.amode);
2039 if (i->ARMin.LdSt32.isLoad) {
2040 addHRegUse(u, HRmWrite, i->ARMin.LdSt32.rD);
2041 if (i->ARMin.LdSt32.cc != ARMcc_AL)
2042 addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
2043 } else {
2044 addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
2045 }
2046 return;
2047 case ARMin_LdSt16:
2048 addRegUsage_ARMAMode2(u, i->ARMin.LdSt16.amode);
2049 if (i->ARMin.LdSt16.isLoad) {
2050 addHRegUse(u, HRmWrite, i->ARMin.LdSt16.rD);
2051 if (i->ARMin.LdSt16.cc != ARMcc_AL)
2052 addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
2053 } else {
2054 addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
2055 }
2056 return;
2057 case ARMin_LdSt8U:
2058 addRegUsage_ARMAMode1(u, i->ARMin.LdSt8U.amode);
2059 if (i->ARMin.LdSt8U.isLoad) {
2060 addHRegUse(u, HRmWrite, i->ARMin.LdSt8U.rD);
2061 if (i->ARMin.LdSt8U.cc != ARMcc_AL)
2062 addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
2063 } else {
2064 addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
2065 }
2066 return;
2067 case ARMin_Ld8S:
2068 addRegUsage_ARMAMode2(u, i->ARMin.Ld8S.amode);
2069 addHRegUse(u, HRmWrite, i->ARMin.Ld8S.rD);
2070 if (i->ARMin.Ld8S.cc != ARMcc_AL)
2071 addHRegUse(u, HRmRead, i->ARMin.Ld8S.rD);
2072 return;
2073 /* XDirect/XIndir/XAssisted are also a bit subtle. They
2074 conditionally exit the block. Hence we only need to list (1)
2075 the registers that they read, and (2) the registers that they
2076 write in the case where the block is not exited. (2) is
2077 empty, hence only (1) is relevant here. */
2078 case ARMin_XDirect:
2079 addRegUsage_ARMAMode1(u, i->ARMin.XDirect.amR15T);
2080 return;
2081 case ARMin_XIndir:
2082 addHRegUse(u, HRmRead, i->ARMin.XIndir.dstGA);
2083 addRegUsage_ARMAMode1(u, i->ARMin.XIndir.amR15T);
2084 return;
2085 case ARMin_XAssisted:
2086 addHRegUse(u, HRmRead, i->ARMin.XAssisted.dstGA);
2087 addRegUsage_ARMAMode1(u, i->ARMin.XAssisted.amR15T);
2088 return;
2089 case ARMin_CMov:
2090 addHRegUse(u, HRmWrite, i->ARMin.CMov.dst);
2091 addHRegUse(u, HRmRead, i->ARMin.CMov.dst);
2092 addRegUsage_ARMRI84(u, i->ARMin.CMov.src);
2093 return;
2094 case ARMin_Call:
2095 /* logic and comments copied/modified from x86 back end */
2096 /* This is a bit subtle. */
2097 /* First off, claim it trashes all the caller-saved regs
2098 which fall within the register allocator's jurisdiction.
2099 These I believe to be r0,1,2,3. If it turns out that r9
2100 is also caller-saved, then we'll have to add that here
2101 too. */
2102 addHRegUse(u, HRmWrite, hregARM_R0());
2103 addHRegUse(u, HRmWrite, hregARM_R1());
2104 addHRegUse(u, HRmWrite, hregARM_R2());
2105 addHRegUse(u, HRmWrite, hregARM_R3());
2106 /* Now we have to state any parameter-carrying registers
2107 which might be read. This depends on nArgRegs. */
2108 switch (i->ARMin.Call.nArgRegs) {
2109 case 4: addHRegUse(u, HRmRead, hregARM_R3()); /*fallthru*/
2110 case 3: addHRegUse(u, HRmRead, hregARM_R2()); /*fallthru*/
2111 case 2: addHRegUse(u, HRmRead, hregARM_R1()); /*fallthru*/
2112 case 1: addHRegUse(u, HRmRead, hregARM_R0()); break;
2113 case 0: break;
2114 default: vpanic("getRegUsage_ARM:Call:regparms");
2115 }
2116 /* Finally, there is the issue that the insn trashes a
2117 register because the literal target address has to be
2118 loaded into a register. Fortunately, for the nArgRegs=
2119 0/1/2/3 case, we can use r0, r1, r2 or r3 respectively, so
2120 this does not cause any further damage. For the
2121 nArgRegs=4 case, we'll have to choose another register
2122 arbitrarily since all the caller saved regs are used for
2123 parameters, and so we might as well choose r11.
2124 */
2125 if (i->ARMin.Call.nArgRegs == 4)
2126 addHRegUse(u, HRmWrite, hregARM_R11());
2127 /* Upshot of this is that the assembler really must observe
2128 the here-stated convention of which register to use as an
2129 address temporary, depending on nArgRegs: 0==r0,
2130 1==r1, 2==r2, 3==r3, 4==r11 */
2131 return;
2132 case ARMin_Mul:
2133 addHRegUse(u, HRmRead, hregARM_R2());
2134 addHRegUse(u, HRmRead, hregARM_R3());
2135 addHRegUse(u, HRmWrite, hregARM_R0());
2136 if (i->ARMin.Mul.op != ARMmul_PLAIN)
2137 addHRegUse(u, HRmWrite, hregARM_R1());
2138 return;
2139 case ARMin_LdrEX:
2140 addHRegUse(u, HRmRead, hregARM_R4());
2141 addHRegUse(u, HRmWrite, hregARM_R2());
2142 if (i->ARMin.LdrEX.szB == 8)
2143 addHRegUse(u, HRmWrite, hregARM_R3());
2144 return;
2145 case ARMin_StrEX:
2146 addHRegUse(u, HRmRead, hregARM_R4());
2147 addHRegUse(u, HRmWrite, hregARM_R0());
2148 addHRegUse(u, HRmRead, hregARM_R2());
2149 if (i->ARMin.StrEX.szB == 8)
2150 addHRegUse(u, HRmRead, hregARM_R3());
2151 return;
2152 case ARMin_VLdStD:
2153 addRegUsage_ARMAModeV(u, i->ARMin.VLdStD.amode);
2154 if (i->ARMin.VLdStD.isLoad) {
2155 addHRegUse(u, HRmWrite, i->ARMin.VLdStD.dD);
2156 } else {
2157 addHRegUse(u, HRmRead, i->ARMin.VLdStD.dD);
2158 }
2159 return;
2160 case ARMin_VLdStS:
2161 addRegUsage_ARMAModeV(u, i->ARMin.VLdStS.amode);
2162 if (i->ARMin.VLdStS.isLoad) {
2163 addHRegUse(u, HRmWrite, i->ARMin.VLdStS.fD);
2164 } else {
2165 addHRegUse(u, HRmRead, i->ARMin.VLdStS.fD);
2166 }
2167 return;
2168 case ARMin_VAluD:
2169 addHRegUse(u, HRmWrite, i->ARMin.VAluD.dst);
2170 addHRegUse(u, HRmRead, i->ARMin.VAluD.argL);
2171 addHRegUse(u, HRmRead, i->ARMin.VAluD.argR);
2172 return;
2173 case ARMin_VAluS:
2174 addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
2175 addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
2176 addHRegUse(u, HRmRead, i->ARMin.VAluS.argR);
2177 return;
2178 case ARMin_VUnaryD:
2179 addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
2180 addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
2181 return;
2182 case ARMin_VUnaryS:
2183 addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
2184 addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
2185 return;
2186 case ARMin_VCmpD:
2187 addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
2188 addHRegUse(u, HRmRead, i->ARMin.VCmpD.argR);
2189 return;
2190 case ARMin_VCMovD:
2191 addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst);
2192 addHRegUse(u, HRmRead, i->ARMin.VCMovD.dst);
2193 addHRegUse(u, HRmRead, i->ARMin.VCMovD.src);
2194 return;
2195 case ARMin_VCMovS:
2196 addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst);
2197 addHRegUse(u, HRmRead, i->ARMin.VCMovS.dst);
2198 addHRegUse(u, HRmRead, i->ARMin.VCMovS.src);
2199 return;
2200 case ARMin_VCvtSD:
2201 addHRegUse(u, HRmWrite, i->ARMin.VCvtSD.dst);
2202 addHRegUse(u, HRmRead, i->ARMin.VCvtSD.src);
2203 return;
2204 case ARMin_VXferD:
2205 if (i->ARMin.VXferD.toD) {
2206 addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
2207 addHRegUse(u, HRmRead, i->ARMin.VXferD.rHi);
2208 addHRegUse(u, HRmRead, i->ARMin.VXferD.rLo);
2209 } else {
2210 addHRegUse(u, HRmRead, i->ARMin.VXferD.dD);
2211 addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi);
2212 addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo);
2213 }
2214 return;
2215 case ARMin_VXferS:
2216 if (i->ARMin.VXferS.toS) {
2217 addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD);
2218 addHRegUse(u, HRmRead, i->ARMin.VXferS.rLo);
2219 } else {
2220 addHRegUse(u, HRmRead, i->ARMin.VXferS.fD);
2221 addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo);
2222 }
2223 return;
2224 case ARMin_VCvtID:
2225 addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst);
2226 addHRegUse(u, HRmRead, i->ARMin.VCvtID.src);
2227 return;
2228 case ARMin_FPSCR:
2229 if (i->ARMin.FPSCR.toFPSCR)
2230 addHRegUse(u, HRmRead, i->ARMin.FPSCR.iReg);
2231 else
2232 addHRegUse(u, HRmWrite, i->ARMin.FPSCR.iReg);
2233 return;
2234 case ARMin_MFence:
2235 return;
2236 case ARMin_CLREX:
2237 return;
2238 case ARMin_NLdStQ:
2239 if (i->ARMin.NLdStQ.isLoad)
2240 addHRegUse(u, HRmWrite, i->ARMin.NLdStQ.dQ);
2241 else
2242 addHRegUse(u, HRmRead, i->ARMin.NLdStQ.dQ);
2243 addRegUsage_ARMAModeN(u, i->ARMin.NLdStQ.amode);
2244 return;
2245 case ARMin_NLdStD:
2246 if (i->ARMin.NLdStD.isLoad)
2247 addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
2248 else
2249 addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
2250 addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
2251 return;
2252 case ARMin_NUnary:
2253 addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
2254 addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
2255 return;
2256 case ARMin_NUnaryS:
2257 addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
2258 addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
2259 return;
2260 case ARMin_NShift:
2261 addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
2262 addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
2263 addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
2264 return;
2265 case ARMin_NShl64:
2266 addHRegUse(u, HRmWrite, i->ARMin.NShl64.dst);
2267 addHRegUse(u, HRmRead, i->ARMin.NShl64.src);
2268 return;
2269 case ARMin_NDual:
2270 addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
2271 addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
2272 addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
2273 addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
2274 return;
2275 case ARMin_NBinary:
2276 addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
2277 /* TODO: sometimes dst is also being read! */
2278 // XXX fix this
2279 addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
2280 addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
2281 return;
2282 case ARMin_NeonImm:
2283 addHRegUse(u, HRmWrite, i->ARMin.NeonImm.dst);
2284 return;
2285 case ARMin_NCMovQ:
2286 addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
2287 addHRegUse(u, HRmRead, i->ARMin.NCMovQ.dst);
2288 addHRegUse(u, HRmRead, i->ARMin.NCMovQ.src);
2289 return;
2290 case ARMin_Add32:
2291 addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
2292 addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
2293 return;
2294 case ARMin_EvCheck:
2295 /* We expect both amodes only to mention r8, so this is in
2296 fact pointless, since r8 isn't allocatable, but
2297 anyway.. */
2298 addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amCounter);
2299 addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amFailAddr);
2300 addHRegUse(u, HRmWrite, hregARM_R12()); /* also unavail to RA */
2301 return;
2302 case ARMin_ProfInc:
2303 addHRegUse(u, HRmWrite, hregARM_R12());
2304 addHRegUse(u, HRmWrite, hregARM_R11());
2305 return;
2306 default:
2307 ppARMInstr(i);
2308 vpanic("getRegUsage_ARMInstr");
2309 }
2310 }
2311
2312
mapRegs_ARMInstr(HRegRemap * m,ARMInstr * i,Bool mode64)2313 void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 )
2314 {
2315 vassert(mode64 == False);
2316 switch (i->tag) {
2317 case ARMin_Alu:
2318 i->ARMin.Alu.dst = lookupHRegRemap(m, i->ARMin.Alu.dst);
2319 i->ARMin.Alu.argL = lookupHRegRemap(m, i->ARMin.Alu.argL);
2320 mapRegs_ARMRI84(m, i->ARMin.Alu.argR);
2321 return;
2322 case ARMin_Shift:
2323 i->ARMin.Shift.dst = lookupHRegRemap(m, i->ARMin.Shift.dst);
2324 i->ARMin.Shift.argL = lookupHRegRemap(m, i->ARMin.Shift.argL);
2325 mapRegs_ARMRI5(m, i->ARMin.Shift.argR);
2326 return;
2327 case ARMin_Unary:
2328 i->ARMin.Unary.dst = lookupHRegRemap(m, i->ARMin.Unary.dst);
2329 i->ARMin.Unary.src = lookupHRegRemap(m, i->ARMin.Unary.src);
2330 return;
2331 case ARMin_CmpOrTst:
2332 i->ARMin.CmpOrTst.argL = lookupHRegRemap(m, i->ARMin.CmpOrTst.argL);
2333 mapRegs_ARMRI84(m, i->ARMin.CmpOrTst.argR);
2334 return;
2335 case ARMin_Mov:
2336 i->ARMin.Mov.dst = lookupHRegRemap(m, i->ARMin.Mov.dst);
2337 mapRegs_ARMRI84(m, i->ARMin.Mov.src);
2338 return;
2339 case ARMin_Imm32:
2340 i->ARMin.Imm32.dst = lookupHRegRemap(m, i->ARMin.Imm32.dst);
2341 return;
2342 case ARMin_LdSt32:
2343 i->ARMin.LdSt32.rD = lookupHRegRemap(m, i->ARMin.LdSt32.rD);
2344 mapRegs_ARMAMode1(m, i->ARMin.LdSt32.amode);
2345 return;
2346 case ARMin_LdSt16:
2347 i->ARMin.LdSt16.rD = lookupHRegRemap(m, i->ARMin.LdSt16.rD);
2348 mapRegs_ARMAMode2(m, i->ARMin.LdSt16.amode);
2349 return;
2350 case ARMin_LdSt8U:
2351 i->ARMin.LdSt8U.rD = lookupHRegRemap(m, i->ARMin.LdSt8U.rD);
2352 mapRegs_ARMAMode1(m, i->ARMin.LdSt8U.amode);
2353 return;
2354 case ARMin_Ld8S:
2355 i->ARMin.Ld8S.rD = lookupHRegRemap(m, i->ARMin.Ld8S.rD);
2356 mapRegs_ARMAMode2(m, i->ARMin.Ld8S.amode);
2357 return;
2358 case ARMin_XDirect:
2359 mapRegs_ARMAMode1(m, i->ARMin.XDirect.amR15T);
2360 return;
2361 case ARMin_XIndir:
2362 i->ARMin.XIndir.dstGA
2363 = lookupHRegRemap(m, i->ARMin.XIndir.dstGA);
2364 mapRegs_ARMAMode1(m, i->ARMin.XIndir.amR15T);
2365 return;
2366 case ARMin_XAssisted:
2367 i->ARMin.XAssisted.dstGA
2368 = lookupHRegRemap(m, i->ARMin.XAssisted.dstGA);
2369 mapRegs_ARMAMode1(m, i->ARMin.XAssisted.amR15T);
2370 return;
2371 case ARMin_CMov:
2372 i->ARMin.CMov.dst = lookupHRegRemap(m, i->ARMin.CMov.dst);
2373 mapRegs_ARMRI84(m, i->ARMin.CMov.src);
2374 return;
2375 case ARMin_Call:
2376 return;
2377 case ARMin_Mul:
2378 return;
2379 case ARMin_LdrEX:
2380 return;
2381 case ARMin_StrEX:
2382 return;
2383 case ARMin_VLdStD:
2384 i->ARMin.VLdStD.dD = lookupHRegRemap(m, i->ARMin.VLdStD.dD);
2385 mapRegs_ARMAModeV(m, i->ARMin.VLdStD.amode);
2386 return;
2387 case ARMin_VLdStS:
2388 i->ARMin.VLdStS.fD = lookupHRegRemap(m, i->ARMin.VLdStS.fD);
2389 mapRegs_ARMAModeV(m, i->ARMin.VLdStS.amode);
2390 return;
2391 case ARMin_VAluD:
2392 i->ARMin.VAluD.dst = lookupHRegRemap(m, i->ARMin.VAluD.dst);
2393 i->ARMin.VAluD.argL = lookupHRegRemap(m, i->ARMin.VAluD.argL);
2394 i->ARMin.VAluD.argR = lookupHRegRemap(m, i->ARMin.VAluD.argR);
2395 return;
2396 case ARMin_VAluS:
2397 i->ARMin.VAluS.dst = lookupHRegRemap(m, i->ARMin.VAluS.dst);
2398 i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
2399 i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR);
2400 return;
2401 case ARMin_VUnaryD:
2402 i->ARMin.VUnaryD.dst = lookupHRegRemap(m, i->ARMin.VUnaryD.dst);
2403 i->ARMin.VUnaryD.src = lookupHRegRemap(m, i->ARMin.VUnaryD.src);
2404 return;
2405 case ARMin_VUnaryS:
2406 i->ARMin.VUnaryS.dst = lookupHRegRemap(m, i->ARMin.VUnaryS.dst);
2407 i->ARMin.VUnaryS.src = lookupHRegRemap(m, i->ARMin.VUnaryS.src);
2408 return;
2409 case ARMin_VCmpD:
2410 i->ARMin.VCmpD.argL = lookupHRegRemap(m, i->ARMin.VCmpD.argL);
2411 i->ARMin.VCmpD.argR = lookupHRegRemap(m, i->ARMin.VCmpD.argR);
2412 return;
2413 case ARMin_VCMovD:
2414 i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst);
2415 i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src);
2416 return;
2417 case ARMin_VCMovS:
2418 i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst);
2419 i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src);
2420 return;
2421 case ARMin_VCvtSD:
2422 i->ARMin.VCvtSD.dst = lookupHRegRemap(m, i->ARMin.VCvtSD.dst);
2423 i->ARMin.VCvtSD.src = lookupHRegRemap(m, i->ARMin.VCvtSD.src);
2424 return;
2425 case ARMin_VXferD:
2426 i->ARMin.VXferD.dD = lookupHRegRemap(m, i->ARMin.VXferD.dD);
2427 i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
2428 i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo);
2429 return;
2430 case ARMin_VXferS:
2431 i->ARMin.VXferS.fD = lookupHRegRemap(m, i->ARMin.VXferS.fD);
2432 i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo);
2433 return;
2434 case ARMin_VCvtID:
2435 i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst);
2436 i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src);
2437 return;
2438 case ARMin_FPSCR:
2439 i->ARMin.FPSCR.iReg = lookupHRegRemap(m, i->ARMin.FPSCR.iReg);
2440 return;
2441 case ARMin_MFence:
2442 return;
2443 case ARMin_CLREX:
2444 return;
2445 case ARMin_NLdStQ:
2446 i->ARMin.NLdStQ.dQ = lookupHRegRemap(m, i->ARMin.NLdStQ.dQ);
2447 mapRegs_ARMAModeN(m, i->ARMin.NLdStQ.amode);
2448 return;
2449 case ARMin_NLdStD:
2450 i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
2451 mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
2452 return;
2453 case ARMin_NUnary:
2454 i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
2455 i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
2456 return;
2457 case ARMin_NUnaryS:
2458 i->ARMin.NUnaryS.src->reg
2459 = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
2460 i->ARMin.NUnaryS.dst->reg
2461 = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
2462 return;
2463 case ARMin_NShift:
2464 i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
2465 i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
2466 i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
2467 return;
2468 case ARMin_NShl64:
2469 i->ARMin.NShl64.dst = lookupHRegRemap(m, i->ARMin.NShl64.dst);
2470 i->ARMin.NShl64.src = lookupHRegRemap(m, i->ARMin.NShl64.src);
2471 return;
2472 case ARMin_NDual:
2473 i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
2474 i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
2475 return;
2476 case ARMin_NBinary:
2477 i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
2478 i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
2479 i->ARMin.NBinary.dst = lookupHRegRemap(m, i->ARMin.NBinary.dst);
2480 return;
2481 case ARMin_NeonImm:
2482 i->ARMin.NeonImm.dst = lookupHRegRemap(m, i->ARMin.NeonImm.dst);
2483 return;
2484 case ARMin_NCMovQ:
2485 i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
2486 i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
2487 return;
2488 case ARMin_Add32:
2489 i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
2490 i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
2491 return;
2492 case ARMin_EvCheck:
2493 /* We expect both amodes only to mention r8, so this is in
2494 fact pointless, since r8 isn't allocatable, but
2495 anyway.. */
2496 mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amCounter);
2497 mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amFailAddr);
2498 return;
2499 case ARMin_ProfInc:
2500 /* hardwires r11 and r12 -- nothing to modify. */
2501 return;
2502 default:
2503 ppARMInstr(i);
2504 vpanic("mapRegs_ARMInstr");
2505 }
2506 }
2507
2508 /* Figure out if i represents a reg-reg move, and if so assign the
2509 source and destination to *src and *dst. If in doubt say No. Used
2510 by the register allocator to do move coalescing.
2511 */
isMove_ARMInstr(const ARMInstr * i,HReg * src,HReg * dst)2512 Bool isMove_ARMInstr ( const ARMInstr* i, HReg* src, HReg* dst )
2513 {
2514 /* Moves between integer regs */
2515 switch (i->tag) {
2516 case ARMin_Mov:
2517 if (i->ARMin.Mov.src->tag == ARMri84_R) {
2518 *src = i->ARMin.Mov.src->ARMri84.R.reg;
2519 *dst = i->ARMin.Mov.dst;
2520 return True;
2521 }
2522 break;
2523 case ARMin_VUnaryD:
2524 if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
2525 *src = i->ARMin.VUnaryD.src;
2526 *dst = i->ARMin.VUnaryD.dst;
2527 return True;
2528 }
2529 break;
2530 case ARMin_VUnaryS:
2531 if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
2532 *src = i->ARMin.VUnaryS.src;
2533 *dst = i->ARMin.VUnaryS.dst;
2534 return True;
2535 }
2536 break;
2537 case ARMin_NUnary:
2538 if (i->ARMin.NUnary.op == ARMneon_COPY) {
2539 *src = i->ARMin.NUnary.src;
2540 *dst = i->ARMin.NUnary.dst;
2541 return True;
2542 }
2543 break;
2544 default:
2545 break;
2546 }
2547
2548 return False;
2549 }
2550
2551
2552 /* Generate arm spill/reload instructions under the direction of the
2553 register allocator. Note it's critical these don't write the
2554 condition codes. */
2555
genSpill_ARM(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)2556 void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2557 HReg rreg, Int offsetB, Bool mode64 )
2558 {
2559 HRegClass rclass;
2560 vassert(offsetB >= 0);
2561 vassert(!hregIsVirtual(rreg));
2562 vassert(mode64 == False);
2563 *i1 = *i2 = NULL;
2564 rclass = hregClass(rreg);
2565 switch (rclass) {
2566 case HRcInt32:
2567 vassert(offsetB <= 4095);
2568 *i1 = ARMInstr_LdSt32( ARMcc_AL, False/*!isLoad*/,
2569 rreg,
2570 ARMAMode1_RI(hregARM_R8(), offsetB) );
2571 return;
2572 case HRcFlt32:
2573 case HRcFlt64: {
2574 HReg r8 = hregARM_R8(); /* baseblock */
2575 HReg r12 = hregARM_R12(); /* spill temp */
2576 HReg base = r8;
2577 vassert(0 == (offsetB & 3));
2578 if (offsetB >= 1024) {
2579 Int offsetKB = offsetB / 1024;
2580 /* r12 = r8 + (1024 * offsetKB) */
2581 *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2582 ARMRI84_I84(offsetKB, 11));
2583 offsetB -= (1024 * offsetKB);
2584 base = r12;
2585 }
2586 vassert(offsetB <= 1020);
2587 if (rclass == HRcFlt32) {
2588 *i2 = ARMInstr_VLdStS( False/*!isLoad*/,
2589 rreg,
2590 mkARMAModeV(base, offsetB) );
2591 } else {
2592 *i2 = ARMInstr_VLdStD( False/*!isLoad*/,
2593 rreg,
2594 mkARMAModeV(base, offsetB) );
2595 }
2596 return;
2597 }
2598 case HRcVec128: {
2599 HReg r8 = hregARM_R8();
2600 HReg r12 = hregARM_R12();
2601 *i1 = ARMInstr_Add32(r12, r8, offsetB);
2602 *i2 = ARMInstr_NLdStQ(False, rreg, mkARMAModeN_R(r12));
2603 return;
2604 }
2605 default:
2606 ppHRegClass(rclass);
2607 vpanic("genSpill_ARM: unimplemented regclass");
2608 }
2609 }
2610
genReload_ARM(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)2611 void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2612 HReg rreg, Int offsetB, Bool mode64 )
2613 {
2614 HRegClass rclass;
2615 vassert(offsetB >= 0);
2616 vassert(!hregIsVirtual(rreg));
2617 vassert(mode64 == False);
2618 *i1 = *i2 = NULL;
2619 rclass = hregClass(rreg);
2620 switch (rclass) {
2621 case HRcInt32:
2622 vassert(offsetB <= 4095);
2623 *i1 = ARMInstr_LdSt32( ARMcc_AL, True/*isLoad*/,
2624 rreg,
2625 ARMAMode1_RI(hregARM_R8(), offsetB) );
2626 return;
2627 case HRcFlt32:
2628 case HRcFlt64: {
2629 HReg r8 = hregARM_R8(); /* baseblock */
2630 HReg r12 = hregARM_R12(); /* spill temp */
2631 HReg base = r8;
2632 vassert(0 == (offsetB & 3));
2633 if (offsetB >= 1024) {
2634 Int offsetKB = offsetB / 1024;
2635 /* r12 = r8 + (1024 * offsetKB) */
2636 *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2637 ARMRI84_I84(offsetKB, 11));
2638 offsetB -= (1024 * offsetKB);
2639 base = r12;
2640 }
2641 vassert(offsetB <= 1020);
2642 if (rclass == HRcFlt32) {
2643 *i2 = ARMInstr_VLdStS( True/*isLoad*/,
2644 rreg,
2645 mkARMAModeV(base, offsetB) );
2646 } else {
2647 *i2 = ARMInstr_VLdStD( True/*isLoad*/,
2648 rreg,
2649 mkARMAModeV(base, offsetB) );
2650 }
2651 return;
2652 }
2653 case HRcVec128: {
2654 HReg r8 = hregARM_R8();
2655 HReg r12 = hregARM_R12();
2656 *i1 = ARMInstr_Add32(r12, r8, offsetB);
2657 *i2 = ARMInstr_NLdStQ(True, rreg, mkARMAModeN_R(r12));
2658 return;
2659 }
2660 default:
2661 ppHRegClass(rclass);
2662 vpanic("genReload_ARM: unimplemented regclass");
2663 }
2664 }
2665
2666
2667 /* Emit an instruction into buf and return the number of bytes used.
2668 Note that buf is not the insn's final place, and therefore it is
2669 imperative to emit position-independent code. */
2670
iregEnc(HReg r)2671 static inline UInt iregEnc ( HReg r )
2672 {
2673 UInt n;
2674 vassert(hregClass(r) == HRcInt32);
2675 vassert(!hregIsVirtual(r));
2676 n = hregEncoding(r);
2677 vassert(n <= 15);
2678 return n;
2679 }
2680
dregEnc(HReg r)2681 static inline UInt dregEnc ( HReg r )
2682 {
2683 UInt n;
2684 vassert(hregClass(r) == HRcFlt64);
2685 vassert(!hregIsVirtual(r));
2686 n = hregEncoding(r);
2687 vassert(n <= 31);
2688 return n;
2689 }
2690
fregEnc(HReg r)2691 static inline UInt fregEnc ( HReg r )
2692 {
2693 UInt n;
2694 vassert(hregClass(r) == HRcFlt32);
2695 vassert(!hregIsVirtual(r));
2696 n = hregEncoding(r);
2697 vassert(n <= 31);
2698 return n;
2699 }
2700
qregEnc(HReg r)2701 static inline UInt qregEnc ( HReg r )
2702 {
2703 UInt n;
2704 vassert(hregClass(r) == HRcVec128);
2705 vassert(!hregIsVirtual(r));
2706 n = hregEncoding(r);
2707 vassert(n <= 15);
2708 return n;
2709 }
2710
2711 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
2712 (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
2713 #define X0000 BITS4(0,0,0,0)
2714 #define X0001 BITS4(0,0,0,1)
2715 #define X0010 BITS4(0,0,1,0)
2716 #define X0011 BITS4(0,0,1,1)
2717 #define X0100 BITS4(0,1,0,0)
2718 #define X0101 BITS4(0,1,0,1)
2719 #define X0110 BITS4(0,1,1,0)
2720 #define X0111 BITS4(0,1,1,1)
2721 #define X1000 BITS4(1,0,0,0)
2722 #define X1001 BITS4(1,0,0,1)
2723 #define X1010 BITS4(1,0,1,0)
2724 #define X1011 BITS4(1,0,1,1)
2725 #define X1100 BITS4(1,1,0,0)
2726 #define X1101 BITS4(1,1,0,1)
2727 #define X1110 BITS4(1,1,1,0)
2728 #define X1111 BITS4(1,1,1,1)
2729
2730 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
2731 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2732 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2733 (((zzx3) & 0xF) << 12))
2734
2735 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2) \
2736 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2737 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2738 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8))
2739
2740 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0) \
2741 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2742 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2743 (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) << 0))
2744
2745 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
2746 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2747 (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
2748 (((zzx0) & 0xF) << 0))
2749
2750 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0) \
2751 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2752 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2753 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \
2754 (((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0))
2755
2756 #define XX______(zzx7,zzx6) \
2757 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
2758
2759 /* Generate a skeletal insn that involves an a RI84 shifter operand.
2760 Returns a word which is all zeroes apart from bits 25 and 11..0,
2761 since it is those that encode the shifter operand (at least to the
2762 extent that we care about it.) */
skeletal_RI84(ARMRI84 * ri)2763 static UInt skeletal_RI84 ( ARMRI84* ri )
2764 {
2765 UInt instr;
2766 if (ri->tag == ARMri84_I84) {
2767 vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F));
2768 vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF));
2769 instr = 1 << 25;
2770 instr |= (ri->ARMri84.I84.imm4 << 8);
2771 instr |= ri->ARMri84.I84.imm8;
2772 } else {
2773 instr = 0 << 25;
2774 instr |= iregEnc(ri->ARMri84.R.reg);
2775 }
2776 return instr;
2777 }
2778
2779 /* Ditto for RI5. Resulting word is zeroes apart from bit 4 and bits
2780 11..7. */
skeletal_RI5(ARMRI5 * ri)2781 static UInt skeletal_RI5 ( ARMRI5* ri )
2782 {
2783 UInt instr;
2784 if (ri->tag == ARMri5_I5) {
2785 UInt imm5 = ri->ARMri5.I5.imm5;
2786 vassert(imm5 >= 1 && imm5 <= 31);
2787 instr = 0 << 4;
2788 instr |= imm5 << 7;
2789 } else {
2790 instr = 1 << 4;
2791 instr |= iregEnc(ri->ARMri5.R.reg) << 8;
2792 }
2793 return instr;
2794 }
2795
2796
2797 /* Get an immediate into a register, using only that
2798 register. (very lame..) */
imm32_to_ireg(UInt * p,Int rD,UInt imm32)2799 static UInt* imm32_to_ireg ( UInt* p, Int rD, UInt imm32 )
2800 {
2801 UInt instr;
2802 vassert(rD >= 0 && rD <= 14); // r15 not good to mess with!
2803 #if 0
2804 if (0 == (imm32 & ~0xFF)) {
2805 /* mov with a immediate shifter operand of (0, imm32) (??) */
2806 instr = XXXXXX__(X1110,X0011,X1010,X0000,rD,X0000);
2807 instr |= imm32;
2808 *p++ = instr;
2809 } else {
2810 // this is very bad; causes Dcache pollution
2811 // ldr rD, [pc]
2812 instr = XXXXX___(X1110,X0101,X1001,X1111,rD);
2813 *p++ = instr;
2814 // b .+8
2815 instr = 0xEA000000;
2816 *p++ = instr;
2817 // .word imm32
2818 *p++ = imm32;
2819 }
2820 #else
2821 if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2822 /* Generate movw rD, #low16. Then, if the high 16 are
2823 nonzero, generate movt rD, #high16. */
2824 UInt lo16 = imm32 & 0xFFFF;
2825 UInt hi16 = (imm32 >> 16) & 0xFFFF;
2826 instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2827 (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2828 lo16 & 0xF);
2829 *p++ = instr;
2830 if (hi16 != 0) {
2831 instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2832 (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2833 hi16 & 0xF);
2834 *p++ = instr;
2835 }
2836 } else {
2837 UInt imm, rot;
2838 UInt op = X1010;
2839 UInt rN = 0;
2840 if ((imm32 & 0xFF) || (imm32 == 0)) {
2841 imm = imm32 & 0xFF;
2842 rot = 0;
2843 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2844 *p++ = instr;
2845 op = X1000;
2846 rN = rD;
2847 }
2848 if (imm32 & 0xFF000000) {
2849 imm = (imm32 >> 24) & 0xFF;
2850 rot = 4;
2851 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2852 *p++ = instr;
2853 op = X1000;
2854 rN = rD;
2855 }
2856 if (imm32 & 0xFF0000) {
2857 imm = (imm32 >> 16) & 0xFF;
2858 rot = 8;
2859 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2860 *p++ = instr;
2861 op = X1000;
2862 rN = rD;
2863 }
2864 if (imm32 & 0xFF00) {
2865 imm = (imm32 >> 8) & 0xFF;
2866 rot = 12;
2867 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2868 *p++ = instr;
2869 op = X1000;
2870 rN = rD;
2871 }
2872 }
2873 #endif
2874 return p;
2875 }
2876
2877 /* Get an immediate into a register, using only that register, and
2878 generating exactly 2 instructions, regardless of the value of the
2879 immediate. This is used when generating sections of code that need
2880 to be patched later, so as to guarantee a specific size. */
imm32_to_ireg_EXACTLY2(UInt * p,Int rD,UInt imm32)2881 static UInt* imm32_to_ireg_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
2882 {
2883 if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2884 /* Generate movw rD, #low16 ; movt rD, #high16. */
2885 UInt lo16 = imm32 & 0xFFFF;
2886 UInt hi16 = (imm32 >> 16) & 0xFFFF;
2887 UInt instr;
2888 instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2889 (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2890 lo16 & 0xF);
2891 *p++ = instr;
2892 instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2893 (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2894 hi16 & 0xF);
2895 *p++ = instr;
2896 } else {
2897 vassert(0); /* lose */
2898 }
2899 return p;
2900 }
2901
2902 /* Check whether p points at a 2-insn sequence cooked up by
2903 imm32_to_ireg_EXACTLY2(). */
is_imm32_to_ireg_EXACTLY2(UInt * p,Int rD,UInt imm32)2904 static Bool is_imm32_to_ireg_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
2905 {
2906 if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2907 /* Generate movw rD, #low16 ; movt rD, #high16. */
2908 UInt lo16 = imm32 & 0xFFFF;
2909 UInt hi16 = (imm32 >> 16) & 0xFFFF;
2910 UInt i0, i1;
2911 i0 = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2912 (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2913 lo16 & 0xF);
2914 i1 = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2915 (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2916 hi16 & 0xF);
2917 return p[0] == i0 && p[1] == i1;
2918 } else {
2919 vassert(0); /* lose */
2920 }
2921 }
2922
2923
do_load_or_store32(UInt * p,Bool isLoad,UInt rD,ARMAMode1 * am)2924 static UInt* do_load_or_store32 ( UInt* p,
2925 Bool isLoad, UInt rD, ARMAMode1* am )
2926 {
2927 vassert(rD <= 12);
2928 vassert(am->tag == ARMam1_RI); // RR case is not handled
2929 UInt bB = 0;
2930 UInt bL = isLoad ? 1 : 0;
2931 Int simm12;
2932 UInt instr, bP;
2933 if (am->ARMam1.RI.simm13 < 0) {
2934 bP = 0;
2935 simm12 = -am->ARMam1.RI.simm13;
2936 } else {
2937 bP = 1;
2938 simm12 = am->ARMam1.RI.simm13;
2939 }
2940 vassert(simm12 >= 0 && simm12 <= 4095);
2941 instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
2942 iregEnc(am->ARMam1.RI.reg),
2943 rD);
2944 instr |= simm12;
2945 *p++ = instr;
2946 return p;
2947 }
2948
2949
2950 /* Emit an instruction into buf and return the number of bytes used.
2951 Note that buf is not the insn's final place, and therefore it is
2952 imperative to emit position-independent code. If the emitted
2953 instruction was a profiler inc, set *is_profInc to True, else
2954 leave it unchanged. */
2955
emit_ARMInstr(Bool * is_profInc,UChar * buf,Int nbuf,const ARMInstr * i,Bool mode64,VexEndness endness_host,const void * disp_cp_chain_me_to_slowEP,const void * disp_cp_chain_me_to_fastEP,const void * disp_cp_xindir,const void * disp_cp_xassisted)2956 Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc,
2957 UChar* buf, Int nbuf, const ARMInstr* i,
2958 Bool mode64, VexEndness endness_host,
2959 const void* disp_cp_chain_me_to_slowEP,
2960 const void* disp_cp_chain_me_to_fastEP,
2961 const void* disp_cp_xindir,
2962 const void* disp_cp_xassisted )
2963 {
2964 UInt* p = (UInt*)buf;
2965 vassert(nbuf >= 32);
2966 vassert(mode64 == False);
2967 vassert(0 == (((HWord)buf) & 3));
2968
2969 switch (i->tag) {
2970 case ARMin_Alu: {
2971 UInt instr, subopc;
2972 UInt rD = iregEnc(i->ARMin.Alu.dst);
2973 UInt rN = iregEnc(i->ARMin.Alu.argL);
2974 ARMRI84* argR = i->ARMin.Alu.argR;
2975 switch (i->ARMin.Alu.op) {
2976 case ARMalu_ADDS: /* fallthru */
2977 case ARMalu_ADD: subopc = X0100; break;
2978 case ARMalu_ADC: subopc = X0101; break;
2979 case ARMalu_SUBS: /* fallthru */
2980 case ARMalu_SUB: subopc = X0010; break;
2981 case ARMalu_SBC: subopc = X0110; break;
2982 case ARMalu_AND: subopc = X0000; break;
2983 case ARMalu_BIC: subopc = X1110; break;
2984 case ARMalu_OR: subopc = X1100; break;
2985 case ARMalu_XOR: subopc = X0001; break;
2986 default: goto bad;
2987 }
2988 instr = skeletal_RI84(argR);
2989 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
2990 (subopc << 1) & 0xF, rN, rD);
2991 if (i->ARMin.Alu.op == ARMalu_ADDS
2992 || i->ARMin.Alu.op == ARMalu_SUBS) {
2993 instr |= 1<<20; /* set the S bit */
2994 }
2995 *p++ = instr;
2996 goto done;
2997 }
2998 case ARMin_Shift: {
2999 UInt instr, subopc;
3000 UInt rD = iregEnc(i->ARMin.Shift.dst);
3001 UInt rM = iregEnc(i->ARMin.Shift.argL);
3002 ARMRI5* argR = i->ARMin.Shift.argR;
3003 switch (i->ARMin.Shift.op) {
3004 case ARMsh_SHL: subopc = X0000; break;
3005 case ARMsh_SHR: subopc = X0001; break;
3006 case ARMsh_SAR: subopc = X0010; break;
3007 default: goto bad;
3008 }
3009 instr = skeletal_RI5(argR);
3010 instr |= XXXXX__X(X1110,X0001,X1010,X0000,rD, /* _ _ */ rM);
3011 instr |= (subopc & 3) << 5;
3012 *p++ = instr;
3013 goto done;
3014 }
3015 case ARMin_Unary: {
3016 UInt instr;
3017 UInt rDst = iregEnc(i->ARMin.Unary.dst);
3018 UInt rSrc = iregEnc(i->ARMin.Unary.src);
3019 switch (i->ARMin.Unary.op) {
3020 case ARMun_CLZ:
3021 instr = XXXXXXXX(X1110,X0001,X0110,X1111,
3022 rDst,X1111,X0001,rSrc);
3023 *p++ = instr;
3024 goto done;
3025 case ARMun_NEG: /* RSB rD,rS,#0 */
3026 instr = XXXXX___(X1110,0x2,0x6,rSrc,rDst);
3027 *p++ = instr;
3028 goto done;
3029 case ARMun_NOT: {
3030 UInt subopc = X1111; /* MVN */
3031 instr = rSrc;
3032 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3033 (subopc << 1) & 0xF, 0, rDst);
3034 *p++ = instr;
3035 goto done;
3036 }
3037 default:
3038 break;
3039 }
3040 goto bad;
3041 }
3042 case ARMin_CmpOrTst: {
3043 UInt instr = skeletal_RI84(i->ARMin.CmpOrTst.argR);
3044 UInt subopc = i->ARMin.CmpOrTst.isCmp ? X1010 : X1000;
3045 UInt SBZ = 0;
3046 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3047 ((subopc << 1) & 0xF) | 1,
3048 iregEnc(i->ARMin.CmpOrTst.argL), SBZ );
3049 *p++ = instr;
3050 goto done;
3051 }
3052 case ARMin_Mov: {
3053 UInt instr = skeletal_RI84(i->ARMin.Mov.src);
3054 UInt subopc = X1101; /* MOV */
3055 UInt SBZ = 0;
3056 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3057 (subopc << 1) & 0xF, SBZ,
3058 iregEnc(i->ARMin.Mov.dst));
3059 *p++ = instr;
3060 goto done;
3061 }
3062 case ARMin_Imm32: {
3063 p = imm32_to_ireg( (UInt*)p, iregEnc(i->ARMin.Imm32.dst),
3064 i->ARMin.Imm32.imm32 );
3065 goto done;
3066 }
3067 case ARMin_LdSt32:
3068 case ARMin_LdSt8U: {
3069 UInt bL, bB;
3070 HReg rD;
3071 ARMAMode1* am;
3072 ARMCondCode cc;
3073 if (i->tag == ARMin_LdSt32) {
3074 bB = 0;
3075 bL = i->ARMin.LdSt32.isLoad ? 1 : 0;
3076 am = i->ARMin.LdSt32.amode;
3077 rD = i->ARMin.LdSt32.rD;
3078 cc = i->ARMin.LdSt32.cc;
3079 } else {
3080 bB = 1;
3081 bL = i->ARMin.LdSt8U.isLoad ? 1 : 0;
3082 am = i->ARMin.LdSt8U.amode;
3083 rD = i->ARMin.LdSt8U.rD;
3084 cc = i->ARMin.LdSt8U.cc;
3085 }
3086 vassert(cc != ARMcc_NV);
3087 if (am->tag == ARMam1_RI) {
3088 Int simm12;
3089 UInt instr, bP;
3090 if (am->ARMam1.RI.simm13 < 0) {
3091 bP = 0;
3092 simm12 = -am->ARMam1.RI.simm13;
3093 } else {
3094 bP = 1;
3095 simm12 = am->ARMam1.RI.simm13;
3096 }
3097 vassert(simm12 >= 0 && simm12 <= 4095);
3098 instr = XXXXX___(cc,X0101,BITS4(bP,bB,0,bL),
3099 iregEnc(am->ARMam1.RI.reg),
3100 iregEnc(rD));
3101 instr |= simm12;
3102 *p++ = instr;
3103 goto done;
3104 } else {
3105 // RR case
3106 goto bad;
3107 }
3108 }
3109 case ARMin_LdSt16: {
3110 HReg rD = i->ARMin.LdSt16.rD;
3111 UInt bS = i->ARMin.LdSt16.signedLoad ? 1 : 0;
3112 UInt bL = i->ARMin.LdSt16.isLoad ? 1 : 0;
3113 ARMAMode2* am = i->ARMin.LdSt16.amode;
3114 ARMCondCode cc = i->ARMin.LdSt16.cc;
3115 vassert(cc != ARMcc_NV);
3116 if (am->tag == ARMam2_RI) {
3117 HReg rN = am->ARMam2.RI.reg;
3118 Int simm8;
3119 UInt bP, imm8hi, imm8lo, instr;
3120 if (am->ARMam2.RI.simm9 < 0) {
3121 bP = 0;
3122 simm8 = -am->ARMam2.RI.simm9;
3123 } else {
3124 bP = 1;
3125 simm8 = am->ARMam2.RI.simm9;
3126 }
3127 vassert(simm8 >= 0 && simm8 <= 255);
3128 imm8hi = (simm8 >> 4) & 0xF;
3129 imm8lo = simm8 & 0xF;
3130 vassert(!(bL == 0 && bS == 1)); // "! signed store"
3131 /**/ if (bL == 0 && bS == 0) {
3132 // strh
3133 instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,0), iregEnc(rN),
3134 iregEnc(rD), imm8hi, X1011, imm8lo);
3135 *p++ = instr;
3136 goto done;
3137 }
3138 else if (bL == 1 && bS == 0) {
3139 // ldrh
3140 instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
3141 iregEnc(rD), imm8hi, X1011, imm8lo);
3142 *p++ = instr;
3143 goto done;
3144 }
3145 else if (bL == 1 && bS == 1) {
3146 // ldrsh
3147 instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
3148 iregEnc(rD), imm8hi, X1111, imm8lo);
3149 *p++ = instr;
3150 goto done;
3151 }
3152 else vassert(0); // ill-constructed insn
3153 } else {
3154 // RR case
3155 goto bad;
3156 }
3157 }
3158 case ARMin_Ld8S: {
3159 HReg rD = i->ARMin.Ld8S.rD;
3160 ARMAMode2* am = i->ARMin.Ld8S.amode;
3161 ARMCondCode cc = i->ARMin.Ld8S.cc;
3162 vassert(cc != ARMcc_NV);
3163 if (am->tag == ARMam2_RI) {
3164 HReg rN = am->ARMam2.RI.reg;
3165 Int simm8;
3166 UInt bP, imm8hi, imm8lo, instr;
3167 if (am->ARMam2.RI.simm9 < 0) {
3168 bP = 0;
3169 simm8 = -am->ARMam2.RI.simm9;
3170 } else {
3171 bP = 1;
3172 simm8 = am->ARMam2.RI.simm9;
3173 }
3174 vassert(simm8 >= 0 && simm8 <= 255);
3175 imm8hi = (simm8 >> 4) & 0xF;
3176 imm8lo = simm8 & 0xF;
3177 // ldrsb
3178 instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
3179 iregEnc(rD), imm8hi, X1101, imm8lo);
3180 *p++ = instr;
3181 goto done;
3182 } else {
3183 // RR case
3184 goto bad;
3185 }
3186 }
3187
3188 case ARMin_XDirect: {
3189 /* NB: what goes on here has to be very closely coordinated
3190 with the chainXDirect_ARM and unchainXDirect_ARM below. */
3191 /* We're generating chain-me requests here, so we need to be
3192 sure this is actually allowed -- no-redir translations
3193 can't use chain-me's. Hence: */
3194 vassert(disp_cp_chain_me_to_slowEP != NULL);
3195 vassert(disp_cp_chain_me_to_fastEP != NULL);
3196
3197 /* Use ptmp for backpatching conditional jumps. */
3198 UInt* ptmp = NULL;
3199
3200 /* First off, if this is conditional, create a conditional
3201 jump over the rest of it. Or at least, leave a space for
3202 it that we will shortly fill in. */
3203 if (i->ARMin.XDirect.cond != ARMcc_AL) {
3204 vassert(i->ARMin.XDirect.cond != ARMcc_NV);
3205 ptmp = p;
3206 *p++ = 0;
3207 }
3208
3209 /* Update the guest R15T. */
3210 /* movw r12, lo16(dstGA) */
3211 /* movt r12, hi16(dstGA) */
3212 /* str r12, amR15T */
3213 p = imm32_to_ireg(p, /*r*/12, i->ARMin.XDirect.dstGA);
3214 p = do_load_or_store32(p, False/*!isLoad*/,
3215 /*r*/12, i->ARMin.XDirect.amR15T);
3216
3217 /* --- FIRST PATCHABLE BYTE follows --- */
3218 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
3219 calling to) backs up the return address, so as to find the
3220 address of the first patchable byte. So: don't change the
3221 number of instructions (3) below. */
3222 /* movw r12, lo16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
3223 /* movt r12, hi16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
3224 /* blx r12 (A1) */
3225 const void* disp_cp_chain_me
3226 = i->ARMin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3227 : disp_cp_chain_me_to_slowEP;
3228 p = imm32_to_ireg_EXACTLY2(p, /*r*/12,
3229 (UInt)(Addr)disp_cp_chain_me);
3230 *p++ = 0xE12FFF3C;
3231 /* --- END of PATCHABLE BYTES --- */
3232
3233 /* Fix up the conditional jump, if there was one. */
3234 if (i->ARMin.XDirect.cond != ARMcc_AL) {
3235 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3236 vassert(delta > 0 && delta < 40);
3237 vassert((delta & 3) == 0);
3238 UInt notCond = 1 ^ (UInt)i->ARMin.XDirect.cond;
3239 vassert(notCond <= 13); /* Neither AL nor NV */
3240 delta = (delta >> 2) - 2;
3241 *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3242 }
3243 goto done;
3244 }
3245
3246 case ARMin_XIndir: {
3247 /* We're generating transfers that could lead indirectly to a
3248 chain-me, so we need to be sure this is actually allowed
3249 -- no-redir translations are not allowed to reach normal
3250 translations without going through the scheduler. That
3251 means no XDirects or XIndirs out from no-redir
3252 translations. Hence: */
3253 vassert(disp_cp_xindir != NULL);
3254
3255 /* Use ptmp for backpatching conditional jumps. */
3256 UInt* ptmp = NULL;
3257
3258 /* First off, if this is conditional, create a conditional
3259 jump over the rest of it. Or at least, leave a space for
3260 it that we will shortly fill in. */
3261 if (i->ARMin.XIndir.cond != ARMcc_AL) {
3262 vassert(i->ARMin.XIndir.cond != ARMcc_NV);
3263 ptmp = p;
3264 *p++ = 0;
3265 }
3266
3267 /* Update the guest R15T. */
3268 /* str r-dstGA, amR15T */
3269 p = do_load_or_store32(p, False/*!isLoad*/,
3270 iregEnc(i->ARMin.XIndir.dstGA),
3271 i->ARMin.XIndir.amR15T);
3272
3273 /* movw r12, lo16(VG_(disp_cp_xindir)) */
3274 /* movt r12, hi16(VG_(disp_cp_xindir)) */
3275 /* bx r12 (A1) */
3276 p = imm32_to_ireg(p, /*r*/12, (UInt)(Addr)disp_cp_xindir);
3277 *p++ = 0xE12FFF1C;
3278
3279 /* Fix up the conditional jump, if there was one. */
3280 if (i->ARMin.XIndir.cond != ARMcc_AL) {
3281 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3282 vassert(delta > 0 && delta < 40);
3283 vassert((delta & 3) == 0);
3284 UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
3285 vassert(notCond <= 13); /* Neither AL nor NV */
3286 delta = (delta >> 2) - 2;
3287 *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3288 }
3289 goto done;
3290 }
3291
3292 case ARMin_XAssisted: {
3293 /* Use ptmp for backpatching conditional jumps. */
3294 UInt* ptmp = NULL;
3295
3296 /* First off, if this is conditional, create a conditional
3297 jump over the rest of it. Or at least, leave a space for
3298 it that we will shortly fill in. */
3299 if (i->ARMin.XAssisted.cond != ARMcc_AL) {
3300 vassert(i->ARMin.XAssisted.cond != ARMcc_NV);
3301 ptmp = p;
3302 *p++ = 0;
3303 }
3304
3305 /* Update the guest R15T. */
3306 /* str r-dstGA, amR15T */
3307 p = do_load_or_store32(p, False/*!isLoad*/,
3308 iregEnc(i->ARMin.XAssisted.dstGA),
3309 i->ARMin.XAssisted.amR15T);
3310
3311 /* movw r8, $magic_number */
3312 UInt trcval = 0;
3313 switch (i->ARMin.XAssisted.jk) {
3314 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
3315 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3316 //case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
3317 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
3318 //case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
3319 //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
3320 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
3321 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
3322 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
3323 //case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
3324 //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
3325 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
3326 /* We don't expect to see the following being assisted. */
3327 //case Ijk_Ret:
3328 //case Ijk_Call:
3329 /* fallthrough */
3330 default:
3331 ppIRJumpKind(i->ARMin.XAssisted.jk);
3332 vpanic("emit_ARMInstr.ARMin_XAssisted: unexpected jump kind");
3333 }
3334 vassert(trcval != 0);
3335 p = imm32_to_ireg(p, /*r*/8, trcval);
3336
3337 /* movw r12, lo16(VG_(disp_cp_xassisted)) */
3338 /* movt r12, hi16(VG_(disp_cp_xassisted)) */
3339 /* bx r12 (A1) */
3340 p = imm32_to_ireg(p, /*r*/12, (UInt)(Addr)disp_cp_xassisted);
3341 *p++ = 0xE12FFF1C;
3342
3343 /* Fix up the conditional jump, if there was one. */
3344 if (i->ARMin.XAssisted.cond != ARMcc_AL) {
3345 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3346 vassert(delta > 0 && delta < 40);
3347 vassert((delta & 3) == 0);
3348 UInt notCond = 1 ^ (UInt)i->ARMin.XAssisted.cond;
3349 vassert(notCond <= 13); /* Neither AL nor NV */
3350 delta = (delta >> 2) - 2;
3351 *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3352 }
3353 goto done;
3354 }
3355
3356 case ARMin_CMov: {
3357 UInt instr = skeletal_RI84(i->ARMin.CMov.src);
3358 UInt subopc = X1101; /* MOV */
3359 UInt SBZ = 0;
3360 instr |= XXXXX___(i->ARMin.CMov.cond, (1 & (subopc >> 3)),
3361 (subopc << 1) & 0xF, SBZ,
3362 iregEnc(i->ARMin.CMov.dst));
3363 *p++ = instr;
3364 goto done;
3365 }
3366
3367 case ARMin_Call: {
3368 UInt instr;
3369 /* Decide on a scratch reg used to hold to the call address.
3370 This has to be done as per the comments in getRegUsage. */
3371 Int scratchNo;
3372 switch (i->ARMin.Call.nArgRegs) {
3373 case 0: scratchNo = 0; break;
3374 case 1: scratchNo = 1; break;
3375 case 2: scratchNo = 2; break;
3376 case 3: scratchNo = 3; break;
3377 case 4: scratchNo = 11; break;
3378 default: vassert(0);
3379 }
3380 /* If we don't need to do any fixup actions in the case that
3381 the call doesn't happen, just do the simple thing and emit
3382 straight-line code. We hope this is the common case. */
3383 if (i->ARMin.Call.cond == ARMcc_AL/*call always happens*/
3384 || i->ARMin.Call.rloc.pri == RLPri_None/*no fixup action*/) {
3385 // r"scratchNo" = &target
3386 p = imm32_to_ireg( (UInt*)p,
3387 scratchNo, (UInt)i->ARMin.Call.target );
3388 // blx{cond} r"scratchNo"
3389 instr = XXX___XX(i->ARMin.Call.cond, X0001, X0010, /*___*/
3390 X0011, scratchNo);
3391 instr |= 0xFFF << 8; // stick in the SBOnes
3392 *p++ = instr;
3393 } else {
3394 Int delta;
3395 /* Complex case. We have to generate an if-then-else
3396 diamond. */
3397 // before:
3398 // b{!cond} else:
3399 // r"scratchNo" = &target
3400 // blx{AL} r"scratchNo"
3401 // preElse:
3402 // b after:
3403 // else:
3404 // mov r0, #0x55555555 // possibly
3405 // mov r1, r0 // possibly
3406 // after:
3407
3408 // before:
3409 UInt* pBefore = p;
3410
3411 // b{!cond} else: // ptmp1 points here
3412 *p++ = 0; // filled in later
3413
3414 // r"scratchNo" = &target
3415 p = imm32_to_ireg( (UInt*)p,
3416 scratchNo, (UInt)i->ARMin.Call.target );
3417
3418 // blx{AL} r"scratchNo"
3419 instr = XXX___XX(ARMcc_AL, X0001, X0010, /*___*/
3420 X0011, scratchNo);
3421 instr |= 0xFFF << 8; // stick in the SBOnes
3422 *p++ = instr;
3423
3424 // preElse:
3425 UInt* pPreElse = p;
3426
3427 // b after:
3428 *p++ = 0; // filled in later
3429
3430 // else:
3431 delta = (UChar*)p - (UChar*)pBefore;
3432 delta = (delta >> 2) - 2;
3433 *pBefore
3434 = XX______(1 ^ i->ARMin.Call.cond, X1010) | (delta & 0xFFFFFF);
3435
3436 /* Do the 'else' actions */
3437 switch (i->ARMin.Call.rloc.pri) {
3438 case RLPri_Int:
3439 p = imm32_to_ireg_EXACTLY2(p, /*r*/0, 0x55555555);
3440 break;
3441 case RLPri_2Int:
3442 vassert(0); //ATC
3443 p = imm32_to_ireg_EXACTLY2(p, /*r*/0, 0x55555555);
3444 /* mov r1, r0 */
3445 *p++ = 0xE1A01000;
3446 break;
3447 case RLPri_None: case RLPri_INVALID: default:
3448 vassert(0);
3449 }
3450
3451 // after:
3452 delta = (UChar*)p - (UChar*)pPreElse;
3453 delta = (delta >> 2) - 2;
3454 *pPreElse = XX______(ARMcc_AL, X1010) | (delta & 0xFFFFFF);
3455 }
3456
3457 goto done;
3458 }
3459
3460 case ARMin_Mul: {
3461 /* E0000392 mul r0, r2, r3
3462 E0810392 umull r0(LO), r1(HI), r2, r3
3463 E0C10392 smull r0(LO), r1(HI), r2, r3
3464 */
3465 switch (i->ARMin.Mul.op) {
3466 case ARMmul_PLAIN: *p++ = 0xE0000392; goto done;
3467 case ARMmul_ZX: *p++ = 0xE0810392; goto done;
3468 case ARMmul_SX: *p++ = 0xE0C10392; goto done;
3469 default: vassert(0);
3470 }
3471 goto bad;
3472 }
3473 case ARMin_LdrEX: {
3474 /* E1D42F9F ldrexb r2, [r4]
3475 E1F42F9F ldrexh r2, [r4]
3476 E1942F9F ldrex r2, [r4]
3477 E1B42F9F ldrexd r2, r3, [r4]
3478 */
3479 switch (i->ARMin.LdrEX.szB) {
3480 case 1: *p++ = 0xE1D42F9F; goto done;
3481 case 2: *p++ = 0xE1F42F9F; goto done;
3482 case 4: *p++ = 0xE1942F9F; goto done;
3483 case 8: *p++ = 0xE1B42F9F; goto done;
3484 default: break;
3485 }
3486 goto bad;
3487 }
3488 case ARMin_StrEX: {
3489 /* E1C40F92 strexb r0, r2, [r4]
3490 E1E40F92 strexh r0, r2, [r4]
3491 E1840F92 strex r0, r2, [r4]
3492 E1A40F92 strexd r0, r2, r3, [r4]
3493 */
3494 switch (i->ARMin.StrEX.szB) {
3495 case 1: *p++ = 0xE1C40F92; goto done;
3496 case 2: *p++ = 0xE1E40F92; goto done;
3497 case 4: *p++ = 0xE1840F92; goto done;
3498 case 8: *p++ = 0xE1A40F92; goto done;
3499 default: break;
3500 }
3501 goto bad;
3502 }
3503 case ARMin_VLdStD: {
3504 UInt dD = dregEnc(i->ARMin.VLdStD.dD);
3505 UInt rN = iregEnc(i->ARMin.VLdStD.amode->reg);
3506 Int simm11 = i->ARMin.VLdStD.amode->simm11;
3507 UInt off8 = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3508 UInt bU = simm11 >= 0 ? 1 : 0;
3509 UInt bL = i->ARMin.VLdStD.isLoad ? 1 : 0;
3510 UInt insn;
3511 vassert(0 == (off8 & 3));
3512 off8 >>= 2;
3513 vassert(0 == (off8 & 0xFFFFFF00));
3514 insn = XXXXXX__(0xE,X1101,BITS4(bU,0,0,bL),rN,dD,X1011);
3515 insn |= off8;
3516 *p++ = insn;
3517 goto done;
3518 }
3519 case ARMin_VLdStS: {
3520 UInt fD = fregEnc(i->ARMin.VLdStS.fD);
3521 UInt rN = iregEnc(i->ARMin.VLdStS.amode->reg);
3522 Int simm11 = i->ARMin.VLdStS.amode->simm11;
3523 UInt off8 = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3524 UInt bU = simm11 >= 0 ? 1 : 0;
3525 UInt bL = i->ARMin.VLdStS.isLoad ? 1 : 0;
3526 UInt bD = fD & 1;
3527 UInt insn;
3528 vassert(0 == (off8 & 3));
3529 off8 >>= 2;
3530 vassert(0 == (off8 & 0xFFFFFF00));
3531 insn = XXXXXX__(0xE,X1101,BITS4(bU,bD,0,bL),rN, (fD >> 1), X1010);
3532 insn |= off8;
3533 *p++ = insn;
3534 goto done;
3535 }
3536 case ARMin_VAluD: {
3537 UInt dN = dregEnc(i->ARMin.VAluD.argL);
3538 UInt dD = dregEnc(i->ARMin.VAluD.dst);
3539 UInt dM = dregEnc(i->ARMin.VAluD.argR);
3540 UInt pqrs = X1111; /* undefined */
3541 switch (i->ARMin.VAluD.op) {
3542 case ARMvfp_ADD: pqrs = X0110; break;
3543 case ARMvfp_SUB: pqrs = X0111; break;
3544 case ARMvfp_MUL: pqrs = X0100; break;
3545 case ARMvfp_DIV: pqrs = X1000; break;
3546 default: goto bad;
3547 }
3548 vassert(pqrs != X1111);
3549 UInt bP = (pqrs >> 3) & 1;
3550 UInt bQ = (pqrs >> 2) & 1;
3551 UInt bR = (pqrs >> 1) & 1;
3552 UInt bS = (pqrs >> 0) & 1;
3553 UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,0,bQ,bR), dN, dD,
3554 X1011, BITS4(0,bS,0,0), dM);
3555 *p++ = insn;
3556 goto done;
3557 }
3558 case ARMin_VAluS: {
3559 UInt dN = fregEnc(i->ARMin.VAluS.argL);
3560 UInt dD = fregEnc(i->ARMin.VAluS.dst);
3561 UInt dM = fregEnc(i->ARMin.VAluS.argR);
3562 UInt bN = dN & 1;
3563 UInt bD = dD & 1;
3564 UInt bM = dM & 1;
3565 UInt pqrs = X1111; /* undefined */
3566 switch (i->ARMin.VAluS.op) {
3567 case ARMvfp_ADD: pqrs = X0110; break;
3568 case ARMvfp_SUB: pqrs = X0111; break;
3569 case ARMvfp_MUL: pqrs = X0100; break;
3570 case ARMvfp_DIV: pqrs = X1000; break;
3571 default: goto bad;
3572 }
3573 vassert(pqrs != X1111);
3574 UInt bP = (pqrs >> 3) & 1;
3575 UInt bQ = (pqrs >> 2) & 1;
3576 UInt bR = (pqrs >> 1) & 1;
3577 UInt bS = (pqrs >> 0) & 1;
3578 UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR),
3579 (dN >> 1), (dD >> 1),
3580 X1010, BITS4(bN,bS,bM,0), (dM >> 1));
3581 *p++ = insn;
3582 goto done;
3583 }
3584 case ARMin_VUnaryD: {
3585 UInt dD = dregEnc(i->ARMin.VUnaryD.dst);
3586 UInt dM = dregEnc(i->ARMin.VUnaryD.src);
3587 UInt insn = 0;
3588 switch (i->ARMin.VUnaryD.op) {
3589 case ARMvfpu_COPY:
3590 insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X0100,dM);
3591 break;
3592 case ARMvfpu_ABS:
3593 insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X1100,dM);
3594 break;
3595 case ARMvfpu_NEG:
3596 insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X0100,dM);
3597 break;
3598 case ARMvfpu_SQRT:
3599 insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X1100,dM);
3600 break;
3601 default:
3602 goto bad;
3603 }
3604 *p++ = insn;
3605 goto done;
3606 }
3607 case ARMin_VUnaryS: {
3608 UInt fD = fregEnc(i->ARMin.VUnaryS.dst);
3609 UInt fM = fregEnc(i->ARMin.VUnaryS.src);
3610 UInt insn = 0;
3611 switch (i->ARMin.VUnaryS.op) {
3612 case ARMvfpu_COPY:
3613 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3614 (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3615 (fM >> 1));
3616 break;
3617 case ARMvfpu_ABS:
3618 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3619 (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3620 (fM >> 1));
3621 break;
3622 case ARMvfpu_NEG:
3623 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3624 (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3625 (fM >> 1));
3626 break;
3627 case ARMvfpu_SQRT:
3628 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3629 (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3630 (fM >> 1));
3631 break;
3632 default:
3633 goto bad;
3634 }
3635 *p++ = insn;
3636 goto done;
3637 }
3638 case ARMin_VCmpD: {
3639 UInt dD = dregEnc(i->ARMin.VCmpD.argL);
3640 UInt dM = dregEnc(i->ARMin.VCmpD.argR);
3641 UInt insn = XXXXXXXX(0xE, X1110, X1011, X0100, dD, X1011, X0100, dM);
3642 *p++ = insn; /* FCMPD dD, dM */
3643 *p++ = 0xEEF1FA10; /* FMSTAT */
3644 goto done;
3645 }
3646 case ARMin_VCMovD: {
3647 UInt cc = (UInt)i->ARMin.VCMovD.cond;
3648 UInt dD = dregEnc(i->ARMin.VCMovD.dst);
3649 UInt dM = dregEnc(i->ARMin.VCMovD.src);
3650 vassert(cc < 16 && cc != ARMcc_AL);
3651 UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM);
3652 *p++ = insn;
3653 goto done;
3654 }
3655 case ARMin_VCMovS: {
3656 UInt cc = (UInt)i->ARMin.VCMovS.cond;
3657 UInt fD = fregEnc(i->ARMin.VCMovS.dst);
3658 UInt fM = fregEnc(i->ARMin.VCMovS.src);
3659 vassert(cc < 16 && cc != ARMcc_AL);
3660 UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1),
3661 X0000,(fD >> 1),X1010,
3662 BITS4(0,1,(fM & 1),0), (fM >> 1));
3663 *p++ = insn;
3664 goto done;
3665 }
3666 case ARMin_VCvtSD: {
3667 if (i->ARMin.VCvtSD.sToD) {
3668 UInt dD = dregEnc(i->ARMin.VCvtSD.dst);
3669 UInt fM = fregEnc(i->ARMin.VCvtSD.src);
3670 UInt insn = XXXXXXXX(0xE, X1110, X1011, X0111, dD, X1010,
3671 BITS4(1,1, (fM & 1), 0),
3672 (fM >> 1));
3673 *p++ = insn;
3674 goto done;
3675 } else {
3676 UInt fD = fregEnc(i->ARMin.VCvtSD.dst);
3677 UInt dM = dregEnc(i->ARMin.VCvtSD.src);
3678 UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1),
3679 X0111, (fD >> 1),
3680 X1011, X1100, dM);
3681 *p++ = insn;
3682 goto done;
3683 }
3684 }
3685 case ARMin_VXferD: {
3686 UInt dD = dregEnc(i->ARMin.VXferD.dD);
3687 UInt rHi = iregEnc(i->ARMin.VXferD.rHi);
3688 UInt rLo = iregEnc(i->ARMin.VXferD.rLo);
3689 /* vmov dD, rLo, rHi is
3690 E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0]
3691 vmov rLo, rHi, dD is
3692 E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0]
3693 */
3694 UInt insn
3695 = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5,
3696 rHi, rLo, 0xB,
3697 BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF));
3698 *p++ = insn;
3699 goto done;
3700 }
3701 case ARMin_VXferS: {
3702 UInt fD = fregEnc(i->ARMin.VXferS.fD);
3703 UInt rLo = iregEnc(i->ARMin.VXferS.rLo);
3704 /* vmov fD, rLo is
3705 E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0
3706 vmov rLo, fD is
3707 E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0
3708 */
3709 UInt insn
3710 = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1,
3711 (fD >> 1) & 0xF, rLo, 0xA,
3712 BITS4((fD & 1),0,0,1), 0);
3713 *p++ = insn;
3714 goto done;
3715 }
3716 case ARMin_VCvtID: {
3717 Bool iToD = i->ARMin.VCvtID.iToD;
3718 Bool syned = i->ARMin.VCvtID.syned;
3719 if (iToD && syned) {
3720 // FSITOD: I32S-in-freg to F64-in-dreg
3721 UInt regF = fregEnc(i->ARMin.VCvtID.src);
3722 UInt regD = dregEnc(i->ARMin.VCvtID.dst);
3723 UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3724 X1011, BITS4(1,1,(regF & 1),0),
3725 (regF >> 1) & 0xF);
3726 *p++ = insn;
3727 goto done;
3728 }
3729 if (iToD && (!syned)) {
3730 // FUITOD: I32U-in-freg to F64-in-dreg
3731 UInt regF = fregEnc(i->ARMin.VCvtID.src);
3732 UInt regD = dregEnc(i->ARMin.VCvtID.dst);
3733 UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3734 X1011, BITS4(0,1,(regF & 1),0),
3735 (regF >> 1) & 0xF);
3736 *p++ = insn;
3737 goto done;
3738 }
3739 if ((!iToD) && syned) {
3740 // FTOSID: F64-in-dreg to I32S-in-freg
3741 UInt regD = dregEnc(i->ARMin.VCvtID.src);
3742 UInt regF = fregEnc(i->ARMin.VCvtID.dst);
3743 UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3744 X1101, (regF >> 1) & 0xF,
3745 X1011, X0100, regD);
3746 *p++ = insn;
3747 goto done;
3748 }
3749 if ((!iToD) && (!syned)) {
3750 // FTOUID: F64-in-dreg to I32U-in-freg
3751 UInt regD = dregEnc(i->ARMin.VCvtID.src);
3752 UInt regF = fregEnc(i->ARMin.VCvtID.dst);
3753 UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3754 X1100, (regF >> 1) & 0xF,
3755 X1011, X0100, regD);
3756 *p++ = insn;
3757 goto done;
3758 }
3759 /*UNREACHED*/
3760 vassert(0);
3761 }
3762 case ARMin_FPSCR: {
3763 Bool toFPSCR = i->ARMin.FPSCR.toFPSCR;
3764 UInt iReg = iregEnc(i->ARMin.FPSCR.iReg);
3765 if (toFPSCR) {
3766 /* fmxr fpscr, iReg is EEE1 iReg A10 */
3767 *p++ = 0xEEE10A10 | ((iReg & 0xF) << 12);
3768 goto done;
3769 }
3770 goto bad; // FPSCR -> iReg case currently ATC
3771 }
3772 case ARMin_MFence: {
3773 // It's not clear (to me) how these relate to the ARMv7
3774 // versions, so let's just use the v7 versions as they
3775 // are at least well documented.
3776 //*p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */
3777 //*p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */
3778 //*p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4 (ISB) */
3779 *p++ = 0xF57FF04F; /* DSB sy */
3780 *p++ = 0xF57FF05F; /* DMB sy */
3781 *p++ = 0xF57FF06F; /* ISB */
3782 goto done;
3783 }
3784 case ARMin_CLREX: {
3785 *p++ = 0xF57FF01F; /* clrex */
3786 goto done;
3787 }
3788
3789 case ARMin_NLdStQ: {
3790 UInt regD = qregEnc(i->ARMin.NLdStQ.dQ) << 1;
3791 UInt regN, regM;
3792 UInt D = regD >> 4;
3793 UInt bL = i->ARMin.NLdStQ.isLoad ? 1 : 0;
3794 UInt insn;
3795 vassert(hregClass(i->ARMin.NLdStQ.dQ) == HRcVec128);
3796 regD &= 0xF;
3797 if (i->ARMin.NLdStQ.amode->tag == ARMamN_RR) {
3798 regN = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.RR.rN);
3799 regM = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.RR.rM);
3800 } else {
3801 regN = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.R.rN);
3802 regM = 15;
3803 }
3804 insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
3805 regN, regD, X1010, X1000, regM);
3806 *p++ = insn;
3807 goto done;
3808 }
3809 case ARMin_NLdStD: {
3810 UInt regD = dregEnc(i->ARMin.NLdStD.dD);
3811 UInt regN, regM;
3812 UInt D = regD >> 4;
3813 UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
3814 UInt insn;
3815 vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
3816 regD &= 0xF;
3817 if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
3818 regN = iregEnc(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
3819 regM = iregEnc(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
3820 } else {
3821 regN = iregEnc(i->ARMin.NLdStD.amode->ARMamN.R.rN);
3822 regM = 15;
3823 }
3824 insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
3825 regN, regD, X0111, X1000, regM);
3826 *p++ = insn;
3827 goto done;
3828 }
3829 case ARMin_NUnaryS: {
3830 UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
3831 UInt regD, D;
3832 UInt regM, M;
3833 UInt size = i->ARMin.NUnaryS.size;
3834 UInt insn;
3835 UInt opc, opc1, opc2;
3836 switch (i->ARMin.NUnaryS.op) {
3837 case ARMneon_VDUP:
3838 if (i->ARMin.NUnaryS.size >= 16)
3839 goto bad;
3840 if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
3841 goto bad;
3842 if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3843 goto bad;
3844 regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
3845 ? (qregEnc(i->ARMin.NUnaryS.dst->reg) << 1)
3846 : dregEnc(i->ARMin.NUnaryS.dst->reg);
3847 regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
3848 ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1)
3849 : dregEnc(i->ARMin.NUnaryS.src->reg);
3850 D = regD >> 4;
3851 M = regM >> 4;
3852 regD &= 0xf;
3853 regM &= 0xf;
3854 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
3855 (i->ARMin.NUnaryS.size & 0xf), regD,
3856 X1100, BITS4(0,Q,M,0), regM);
3857 *p++ = insn;
3858 goto done;
3859 case ARMneon_SETELEM:
3860 regD = Q ? (qregEnc(i->ARMin.NUnaryS.dst->reg) << 1) :
3861 dregEnc(i->ARMin.NUnaryS.dst->reg);
3862 regM = iregEnc(i->ARMin.NUnaryS.src->reg);
3863 M = regM >> 4;
3864 D = regD >> 4;
3865 regM &= 0xF;
3866 regD &= 0xF;
3867 if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
3868 goto bad;
3869 switch (size) {
3870 case 0:
3871 if (i->ARMin.NUnaryS.dst->index > 7)
3872 goto bad;
3873 opc = X1000 | i->ARMin.NUnaryS.dst->index;
3874 break;
3875 case 1:
3876 if (i->ARMin.NUnaryS.dst->index > 3)
3877 goto bad;
3878 opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
3879 break;
3880 case 2:
3881 if (i->ARMin.NUnaryS.dst->index > 1)
3882 goto bad;
3883 opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
3884 break;
3885 default:
3886 goto bad;
3887 }
3888 opc1 = (opc >> 2) & 3;
3889 opc2 = opc & 3;
3890 insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
3891 regD, regM, X1011,
3892 BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
3893 *p++ = insn;
3894 goto done;
3895 case ARMneon_GETELEMU:
3896 regM = Q ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1) :
3897 dregEnc(i->ARMin.NUnaryS.src->reg);
3898 regD = iregEnc(i->ARMin.NUnaryS.dst->reg);
3899 M = regM >> 4;
3900 D = regD >> 4;
3901 regM &= 0xF;
3902 regD &= 0xF;
3903 if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3904 goto bad;
3905 switch (size) {
3906 case 0:
3907 if (Q && i->ARMin.NUnaryS.src->index > 7) {
3908 regM++;
3909 i->ARMin.NUnaryS.src->index -= 8;
3910 }
3911 if (i->ARMin.NUnaryS.src->index > 7)
3912 goto bad;
3913 opc = X1000 | i->ARMin.NUnaryS.src->index;
3914 break;
3915 case 1:
3916 if (Q && i->ARMin.NUnaryS.src->index > 3) {
3917 regM++;
3918 i->ARMin.NUnaryS.src->index -= 4;
3919 }
3920 if (i->ARMin.NUnaryS.src->index > 3)
3921 goto bad;
3922 opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
3923 break;
3924 case 2:
3925 goto bad;
3926 default:
3927 goto bad;
3928 }
3929 opc1 = (opc >> 2) & 3;
3930 opc2 = opc & 3;
3931 insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
3932 regM, regD, X1011,
3933 BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
3934 *p++ = insn;
3935 goto done;
3936 case ARMneon_GETELEMS:
3937 regM = Q ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1) :
3938 dregEnc(i->ARMin.NUnaryS.src->reg);
3939 regD = iregEnc(i->ARMin.NUnaryS.dst->reg);
3940 M = regM >> 4;
3941 D = regD >> 4;
3942 regM &= 0xF;
3943 regD &= 0xF;
3944 if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3945 goto bad;
3946 switch (size) {
3947 case 0:
3948 if (Q && i->ARMin.NUnaryS.src->index > 7) {
3949 regM++;
3950 i->ARMin.NUnaryS.src->index -= 8;
3951 }
3952 if (i->ARMin.NUnaryS.src->index > 7)
3953 goto bad;
3954 opc = X1000 | i->ARMin.NUnaryS.src->index;
3955 break;
3956 case 1:
3957 if (Q && i->ARMin.NUnaryS.src->index > 3) {
3958 regM++;
3959 i->ARMin.NUnaryS.src->index -= 4;
3960 }
3961 if (i->ARMin.NUnaryS.src->index > 3)
3962 goto bad;
3963 opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
3964 break;
3965 case 2:
3966 if (Q && i->ARMin.NUnaryS.src->index > 1) {
3967 regM++;
3968 i->ARMin.NUnaryS.src->index -= 2;
3969 }
3970 if (i->ARMin.NUnaryS.src->index > 1)
3971 goto bad;
3972 opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
3973 break;
3974 default:
3975 goto bad;
3976 }
3977 opc1 = (opc >> 2) & 3;
3978 opc2 = opc & 3;
3979 insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
3980 regM, regD, X1011,
3981 BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
3982 *p++ = insn;
3983 goto done;
3984 default:
3985 goto bad;
3986 }
3987 }
3988 case ARMin_NUnary: {
3989 UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
3990 UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
3991 ? (qregEnc(i->ARMin.NUnary.dst) << 1)
3992 : dregEnc(i->ARMin.NUnary.dst);
3993 UInt regM, M;
3994 UInt D = regD >> 4;
3995 UInt sz1 = i->ARMin.NUnary.size >> 1;
3996 UInt sz2 = i->ARMin.NUnary.size & 1;
3997 UInt sz = i->ARMin.NUnary.size;
3998 UInt insn;
3999 UInt F = 0; /* TODO: floating point EQZ ??? */
4000 if (i->ARMin.NUnary.op != ARMneon_DUP) {
4001 regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
4002 ? (qregEnc(i->ARMin.NUnary.src) << 1)
4003 : dregEnc(i->ARMin.NUnary.src);
4004 M = regM >> 4;
4005 } else {
4006 regM = iregEnc(i->ARMin.NUnary.src);
4007 M = regM >> 4;
4008 }
4009 regD &= 0xF;
4010 regM &= 0xF;
4011 switch (i->ARMin.NUnary.op) {
4012 case ARMneon_COPY: /* VMOV reg, reg */
4013 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
4014 BITS4(M,Q,M,1), regM);
4015 break;
4016 case ARMneon_COPYN: /* VMOVN regD, regQ */
4017 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4018 regD, X0010, BITS4(0,0,M,0), regM);
4019 break;
4020 case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
4021 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4022 regD, X0010, BITS4(1,0,M,0), regM);
4023 break;
4024 case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
4025 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4026 regD, X0010, BITS4(0,1,M,0), regM);
4027 break;
4028 case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
4029 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4030 regD, X0010, BITS4(1,1,M,0), regM);
4031 break;
4032 case ARMneon_COPYLS: /* VMOVL regQ, regD */
4033 if (sz >= 3)
4034 goto bad;
4035 insn = XXXXXXXX(0xF, X0010,
4036 BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
4037 BITS4((sz == 0) ? 1 : 0,0,0,0),
4038 regD, X1010, BITS4(0,0,M,1), regM);
4039 break;
4040 case ARMneon_COPYLU: /* VMOVL regQ, regD */
4041 if (sz >= 3)
4042 goto bad;
4043 insn = XXXXXXXX(0xF, X0011,
4044 BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
4045 BITS4((sz == 0) ? 1 : 0,0,0,0),
4046 regD, X1010, BITS4(0,0,M,1), regM);
4047 break;
4048 case ARMneon_NOT: /* VMVN reg, reg*/
4049 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
4050 BITS4(1,Q,M,0), regM);
4051 break;
4052 case ARMneon_EQZ:
4053 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
4054 regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
4055 break;
4056 case ARMneon_CNT:
4057 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
4058 BITS4(0,Q,M,0), regM);
4059 break;
4060 case ARMneon_CLZ:
4061 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4062 regD, X0100, BITS4(1,Q,M,0), regM);
4063 break;
4064 case ARMneon_CLS:
4065 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4066 regD, X0100, BITS4(0,Q,M,0), regM);
4067 break;
4068 case ARMneon_ABS:
4069 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
4070 regD, X0011, BITS4(0,Q,M,0), regM);
4071 break;
4072 case ARMneon_DUP:
4073 sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
4074 sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
4075 vassert(sz1 + sz2 < 2);
4076 insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
4077 X1011, BITS4(D,0,sz2,1), X0000);
4078 break;
4079 case ARMneon_REV16:
4080 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4081 regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
4082 break;
4083 case ARMneon_REV32:
4084 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4085 regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
4086 break;
4087 case ARMneon_REV64:
4088 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4089 regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
4090 break;
4091 case ARMneon_PADDLU:
4092 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4093 regD, X0010, BITS4(1,Q,M,0), regM);
4094 break;
4095 case ARMneon_PADDLS:
4096 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4097 regD, X0010, BITS4(0,Q,M,0), regM);
4098 break;
4099 case ARMneon_VQSHLNUU:
4100 insn = XXXXXXXX(0xF, X0011,
4101 (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4102 sz & 0xf, regD, X0111,
4103 BITS4(sz >> 6,Q,M,1), regM);
4104 break;
4105 case ARMneon_VQSHLNSS:
4106 insn = XXXXXXXX(0xF, X0010,
4107 (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4108 sz & 0xf, regD, X0111,
4109 BITS4(sz >> 6,Q,M,1), regM);
4110 break;
4111 case ARMneon_VQSHLNUS:
4112 insn = XXXXXXXX(0xF, X0011,
4113 (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4114 sz & 0xf, regD, X0110,
4115 BITS4(sz >> 6,Q,M,1), regM);
4116 break;
4117 case ARMneon_VCVTFtoS:
4118 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
4119 BITS4(0,Q,M,0), regM);
4120 break;
4121 case ARMneon_VCVTFtoU:
4122 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
4123 BITS4(1,Q,M,0), regM);
4124 break;
4125 case ARMneon_VCVTStoF:
4126 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
4127 BITS4(0,Q,M,0), regM);
4128 break;
4129 case ARMneon_VCVTUtoF:
4130 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
4131 BITS4(1,Q,M,0), regM);
4132 break;
4133 case ARMneon_VCVTFtoFixedU:
4134 sz1 = (sz >> 5) & 1;
4135 sz2 = (sz >> 4) & 1;
4136 sz &= 0xf;
4137 insn = XXXXXXXX(0xF, X0011,
4138 BITS4(1,D,sz1,sz2), sz, regD, X1111,
4139 BITS4(0,Q,M,1), regM);
4140 break;
4141 case ARMneon_VCVTFtoFixedS:
4142 sz1 = (sz >> 5) & 1;
4143 sz2 = (sz >> 4) & 1;
4144 sz &= 0xf;
4145 insn = XXXXXXXX(0xF, X0010,
4146 BITS4(1,D,sz1,sz2), sz, regD, X1111,
4147 BITS4(0,Q,M,1), regM);
4148 break;
4149 case ARMneon_VCVTFixedUtoF:
4150 sz1 = (sz >> 5) & 1;
4151 sz2 = (sz >> 4) & 1;
4152 sz &= 0xf;
4153 insn = XXXXXXXX(0xF, X0011,
4154 BITS4(1,D,sz1,sz2), sz, regD, X1110,
4155 BITS4(0,Q,M,1), regM);
4156 break;
4157 case ARMneon_VCVTFixedStoF:
4158 sz1 = (sz >> 5) & 1;
4159 sz2 = (sz >> 4) & 1;
4160 sz &= 0xf;
4161 insn = XXXXXXXX(0xF, X0010,
4162 BITS4(1,D,sz1,sz2), sz, regD, X1110,
4163 BITS4(0,Q,M,1), regM);
4164 break;
4165 case ARMneon_VCVTF32toF16:
4166 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
4167 BITS4(0,0,M,0), regM);
4168 break;
4169 case ARMneon_VCVTF16toF32:
4170 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
4171 BITS4(0,0,M,0), regM);
4172 break;
4173 case ARMneon_VRECIP:
4174 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
4175 BITS4(0,Q,M,0), regM);
4176 break;
4177 case ARMneon_VRECIPF:
4178 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
4179 BITS4(0,Q,M,0), regM);
4180 break;
4181 case ARMneon_VABSFP:
4182 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
4183 BITS4(0,Q,M,0), regM);
4184 break;
4185 case ARMneon_VRSQRTEFP:
4186 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
4187 BITS4(1,Q,M,0), regM);
4188 break;
4189 case ARMneon_VRSQRTE:
4190 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
4191 BITS4(1,Q,M,0), regM);
4192 break;
4193 case ARMneon_VNEGF:
4194 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
4195 BITS4(1,Q,M,0), regM);
4196 break;
4197
4198 default:
4199 goto bad;
4200 }
4201 *p++ = insn;
4202 goto done;
4203 }
4204 case ARMin_NDual: {
4205 UInt Q = i->ARMin.NDual.Q ? 1 : 0;
4206 UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
4207 ? (qregEnc(i->ARMin.NDual.arg1) << 1)
4208 : dregEnc(i->ARMin.NDual.arg1);
4209 UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
4210 ? (qregEnc(i->ARMin.NDual.arg2) << 1)
4211 : dregEnc(i->ARMin.NDual.arg2);
4212 UInt D = regD >> 4;
4213 UInt M = regM >> 4;
4214 UInt sz1 = i->ARMin.NDual.size >> 1;
4215 UInt sz2 = i->ARMin.NDual.size & 1;
4216 UInt insn;
4217 regD &= 0xF;
4218 regM &= 0xF;
4219 switch (i->ARMin.NDual.op) {
4220 case ARMneon_TRN: /* VTRN reg, reg */
4221 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4222 regD, X0000, BITS4(1,Q,M,0), regM);
4223 break;
4224 case ARMneon_ZIP: /* VZIP reg, reg */
4225 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4226 regD, X0001, BITS4(1,Q,M,0), regM);
4227 break;
4228 case ARMneon_UZP: /* VUZP reg, reg */
4229 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4230 regD, X0001, BITS4(0,Q,M,0), regM);
4231 break;
4232 default:
4233 goto bad;
4234 }
4235 *p++ = insn;
4236 goto done;
4237 }
4238 case ARMin_NBinary: {
4239 UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
4240 UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
4241 ? (qregEnc(i->ARMin.NBinary.dst) << 1)
4242 : dregEnc(i->ARMin.NBinary.dst);
4243 UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
4244 ? (qregEnc(i->ARMin.NBinary.argL) << 1)
4245 : dregEnc(i->ARMin.NBinary.argL);
4246 UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
4247 ? (qregEnc(i->ARMin.NBinary.argR) << 1)
4248 : dregEnc(i->ARMin.NBinary.argR);
4249 UInt sz1 = i->ARMin.NBinary.size >> 1;
4250 UInt sz2 = i->ARMin.NBinary.size & 1;
4251 UInt D = regD >> 4;
4252 UInt N = regN >> 4;
4253 UInt M = regM >> 4;
4254 UInt insn;
4255 regD &= 0xF;
4256 regM &= 0xF;
4257 regN &= 0xF;
4258 switch (i->ARMin.NBinary.op) {
4259 case ARMneon_VAND: /* VAND reg, reg, reg */
4260 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
4261 BITS4(N,Q,M,1), regM);
4262 break;
4263 case ARMneon_VORR: /* VORR reg, reg, reg*/
4264 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
4265 BITS4(N,Q,M,1), regM);
4266 break;
4267 case ARMneon_VXOR: /* VEOR reg, reg, reg */
4268 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
4269 BITS4(N,Q,M,1), regM);
4270 break;
4271 case ARMneon_VADD: /* VADD reg, reg, reg */
4272 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4273 X1000, BITS4(N,Q,M,0), regM);
4274 break;
4275 case ARMneon_VSUB: /* VSUB reg, reg, reg */
4276 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4277 X1000, BITS4(N,Q,M,0), regM);
4278 break;
4279 case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
4280 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4281 X0110, BITS4(N,Q,M,1), regM);
4282 break;
4283 case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
4284 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4285 X0110, BITS4(N,Q,M,1), regM);
4286 break;
4287 case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
4288 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4289 X0110, BITS4(N,Q,M,0), regM);
4290 break;
4291 case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
4292 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4293 X0110, BITS4(N,Q,M,0), regM);
4294 break;
4295 case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
4296 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4297 X0001, BITS4(N,Q,M,0), regM);
4298 break;
4299 case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
4300 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4301 X0001, BITS4(N,Q,M,0), regM);
4302 break;
4303 case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
4304 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4305 X0000, BITS4(N,Q,M,1), regM);
4306 break;
4307 case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
4308 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4309 X0000, BITS4(N,Q,M,1), regM);
4310 break;
4311 case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
4312 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4313 X0010, BITS4(N,Q,M,1), regM);
4314 break;
4315 case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
4316 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4317 X0010, BITS4(N,Q,M,1), regM);
4318 break;
4319 case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
4320 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4321 X0011, BITS4(N,Q,M,0), regM);
4322 break;
4323 case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
4324 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4325 X0011, BITS4(N,Q,M,0), regM);
4326 break;
4327 case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
4328 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4329 X0011, BITS4(N,Q,M,1), regM);
4330 break;
4331 case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
4332 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4333 X0011, BITS4(N,Q,M,1), regM);
4334 break;
4335 case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
4336 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4337 X1000, BITS4(N,Q,M,1), regM);
4338 break;
4339 case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
4340 if (i->ARMin.NBinary.size >= 16)
4341 goto bad;
4342 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
4343 i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
4344 regM);
4345 break;
4346 case ARMneon_VMUL:
4347 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4348 X1001, BITS4(N,Q,M,1), regM);
4349 break;
4350 case ARMneon_VMULLU:
4351 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
4352 X1100, BITS4(N,0,M,0), regM);
4353 break;
4354 case ARMneon_VMULLS:
4355 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4356 X1100, BITS4(N,0,M,0), regM);
4357 break;
4358 case ARMneon_VMULP:
4359 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4360 X1001, BITS4(N,Q,M,1), regM);
4361 break;
4362 case ARMneon_VMULFP:
4363 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4364 X1101, BITS4(N,Q,M,1), regM);
4365 break;
4366 case ARMneon_VMULLP:
4367 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4368 X1110, BITS4(N,0,M,0), regM);
4369 break;
4370 case ARMneon_VQDMULH:
4371 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4372 X1011, BITS4(N,Q,M,0), regM);
4373 break;
4374 case ARMneon_VQRDMULH:
4375 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4376 X1011, BITS4(N,Q,M,0), regM);
4377 break;
4378 case ARMneon_VQDMULL:
4379 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4380 X1101, BITS4(N,0,M,0), regM);
4381 break;
4382 case ARMneon_VTBL:
4383 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
4384 X1000, BITS4(N,0,M,0), regM);
4385 break;
4386 case ARMneon_VPADD:
4387 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4388 X1011, BITS4(N,Q,M,1), regM);
4389 break;
4390 case ARMneon_VPADDFP:
4391 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4392 X1101, BITS4(N,Q,M,0), regM);
4393 break;
4394 case ARMneon_VPMINU:
4395 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4396 X1010, BITS4(N,Q,M,1), regM);
4397 break;
4398 case ARMneon_VPMINS:
4399 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4400 X1010, BITS4(N,Q,M,1), regM);
4401 break;
4402 case ARMneon_VPMAXU:
4403 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4404 X1010, BITS4(N,Q,M,0), regM);
4405 break;
4406 case ARMneon_VPMAXS:
4407 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4408 X1010, BITS4(N,Q,M,0), regM);
4409 break;
4410 case ARMneon_VADDFP: /* VADD reg, reg, reg */
4411 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
4412 X1101, BITS4(N,Q,M,0), regM);
4413 break;
4414 case ARMneon_VSUBFP: /* VADD reg, reg, reg */
4415 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
4416 X1101, BITS4(N,Q,M,0), regM);
4417 break;
4418 case ARMneon_VABDFP: /* VABD reg, reg, reg */
4419 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
4420 X1101, BITS4(N,Q,M,0), regM);
4421 break;
4422 case ARMneon_VMINF:
4423 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
4424 X1111, BITS4(N,Q,M,0), regM);
4425 break;
4426 case ARMneon_VMAXF:
4427 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
4428 X1111, BITS4(N,Q,M,0), regM);
4429 break;
4430 case ARMneon_VPMINF:
4431 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
4432 X1111, BITS4(N,Q,M,0), regM);
4433 break;
4434 case ARMneon_VPMAXF:
4435 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4436 X1111, BITS4(N,Q,M,0), regM);
4437 break;
4438 case ARMneon_VRECPS:
4439 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
4440 BITS4(N,Q,M,1), regM);
4441 break;
4442 case ARMneon_VCGTF:
4443 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
4444 BITS4(N,Q,M,0), regM);
4445 break;
4446 case ARMneon_VCGEF:
4447 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
4448 BITS4(N,Q,M,0), regM);
4449 break;
4450 case ARMneon_VCEQF:
4451 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
4452 BITS4(N,Q,M,0), regM);
4453 break;
4454 case ARMneon_VRSQRTS:
4455 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
4456 BITS4(N,Q,M,1), regM);
4457 break;
4458 default:
4459 goto bad;
4460 }
4461 *p++ = insn;
4462 goto done;
4463 }
4464 case ARMin_NShift: {
4465 UInt Q = i->ARMin.NShift.Q ? 1 : 0;
4466 UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
4467 ? (qregEnc(i->ARMin.NShift.dst) << 1)
4468 : dregEnc(i->ARMin.NShift.dst);
4469 UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
4470 ? (qregEnc(i->ARMin.NShift.argL) << 1)
4471 : dregEnc(i->ARMin.NShift.argL);
4472 UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
4473 ? (qregEnc(i->ARMin.NShift.argR) << 1)
4474 : dregEnc(i->ARMin.NShift.argR);
4475 UInt sz1 = i->ARMin.NShift.size >> 1;
4476 UInt sz2 = i->ARMin.NShift.size & 1;
4477 UInt D = regD >> 4;
4478 UInt N = regN >> 4;
4479 UInt M = regM >> 4;
4480 UInt insn;
4481 regD &= 0xF;
4482 regM &= 0xF;
4483 regN &= 0xF;
4484 switch (i->ARMin.NShift.op) {
4485 case ARMneon_VSHL:
4486 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4487 X0100, BITS4(N,Q,M,0), regM);
4488 break;
4489 case ARMneon_VSAL:
4490 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4491 X0100, BITS4(N,Q,M,0), regM);
4492 break;
4493 case ARMneon_VQSHL:
4494 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4495 X0100, BITS4(N,Q,M,1), regM);
4496 break;
4497 case ARMneon_VQSAL:
4498 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4499 X0100, BITS4(N,Q,M,1), regM);
4500 break;
4501 default:
4502 goto bad;
4503 }
4504 *p++ = insn;
4505 goto done;
4506 }
4507 case ARMin_NShl64: {
4508 HReg regDreg = i->ARMin.NShl64.dst;
4509 HReg regMreg = i->ARMin.NShl64.src;
4510 UInt amt = i->ARMin.NShl64.amt;
4511 vassert(amt >= 1 && amt <= 63);
4512 vassert(hregClass(regDreg) == HRcFlt64);
4513 vassert(hregClass(regMreg) == HRcFlt64);
4514 UInt regD = dregEnc(regDreg);
4515 UInt regM = dregEnc(regMreg);
4516 UInt D = (regD >> 4) & 1;
4517 UInt Vd = regD & 0xF;
4518 UInt L = 1;
4519 UInt Q = 0; /* always 64-bit */
4520 UInt M = (regM >> 4) & 1;
4521 UInt Vm = regM & 0xF;
4522 UInt insn = XXXXXXXX(X1111,X0010, BITS4(1,D,(amt>>5)&1,(amt>>4)&1),
4523 amt & 0xF, Vd, X0101, BITS4(L,Q,M,1), Vm);
4524 *p++ = insn;
4525 goto done;
4526 }
4527 case ARMin_NeonImm: {
4528 UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
4529 UInt regD = Q ? (qregEnc(i->ARMin.NeonImm.dst) << 1) :
4530 dregEnc(i->ARMin.NeonImm.dst);
4531 UInt D = regD >> 4;
4532 UInt imm = i->ARMin.NeonImm.imm->imm8;
4533 UInt tp = i->ARMin.NeonImm.imm->type;
4534 UInt j = imm >> 7;
4535 UInt imm3 = (imm >> 4) & 0x7;
4536 UInt imm4 = imm & 0xF;
4537 UInt cmode, op;
4538 UInt insn;
4539 regD &= 0xF;
4540 if (tp == 9)
4541 op = 1;
4542 else
4543 op = 0;
4544 switch (tp) {
4545 case 0:
4546 case 1:
4547 case 2:
4548 case 3:
4549 case 4:
4550 case 5:
4551 cmode = tp << 1;
4552 break;
4553 case 9:
4554 case 6:
4555 cmode = 14;
4556 break;
4557 case 7:
4558 cmode = 12;
4559 break;
4560 case 8:
4561 cmode = 13;
4562 break;
4563 case 10:
4564 cmode = 15;
4565 break;
4566 default:
4567 vpanic("ARMin_NeonImm");
4568
4569 }
4570 insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
4571 cmode, BITS4(0,Q,op,1), imm4);
4572 *p++ = insn;
4573 goto done;
4574 }
4575 case ARMin_NCMovQ: {
4576 UInt cc = (UInt)i->ARMin.NCMovQ.cond;
4577 UInt qM = qregEnc(i->ARMin.NCMovQ.src) << 1;
4578 UInt qD = qregEnc(i->ARMin.NCMovQ.dst) << 1;
4579 UInt vM = qM & 0xF;
4580 UInt vD = qD & 0xF;
4581 UInt M = (qM >> 4) & 1;
4582 UInt D = (qD >> 4) & 1;
4583 vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
4584 /* b!cc here+8: !cc A00 0000 */
4585 UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
4586 *p++ = insn;
4587 /* vmov qD, qM */
4588 insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
4589 vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
4590 *p++ = insn;
4591 goto done;
4592 }
4593 case ARMin_Add32: {
4594 UInt regD = iregEnc(i->ARMin.Add32.rD);
4595 UInt regN = iregEnc(i->ARMin.Add32.rN);
4596 UInt imm32 = i->ARMin.Add32.imm32;
4597 vassert(regD != regN);
4598 /* MOV regD, imm32 */
4599 p = imm32_to_ireg((UInt *)p, regD, imm32);
4600 /* ADD regD, regN, regD */
4601 UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
4602 *p++ = insn;
4603 goto done;
4604 }
4605
4606 case ARMin_EvCheck: {
4607 /* We generate:
4608 ldr r12, [r8 + #4] 4 == offsetof(host_EvC_COUNTER)
4609 subs r12, r12, #1 (A1)
4610 str r12, [r8 + #4] 4 == offsetof(host_EvC_COUNTER)
4611 bpl nofail
4612 ldr r12, [r8 + #0] 0 == offsetof(host_EvC_FAILADDR)
4613 bx r12
4614 nofail:
4615 */
4616 UInt* p0 = p;
4617 p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
4618 i->ARMin.EvCheck.amCounter);
4619 *p++ = 0xE25CC001; /* subs r12, r12, #1 */
4620 p = do_load_or_store32(p, False/*!isLoad*/, /*r*/12,
4621 i->ARMin.EvCheck.amCounter);
4622 *p++ = 0x5A000001; /* bpl nofail */
4623 p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
4624 i->ARMin.EvCheck.amFailAddr);
4625 *p++ = 0xE12FFF1C; /* bx r12 */
4626 /* nofail: */
4627
4628 /* Crosscheck */
4629 vassert(evCheckSzB_ARM() == (UChar*)p - (UChar*)p0);
4630 goto done;
4631 }
4632
4633 case ARMin_ProfInc: {
4634 /* We generate:
4635 (ctrP is unknown now, so use 0x65556555 in the
4636 expectation that a later call to LibVEX_patchProfCtr
4637 will be used to fill in the immediate fields once the
4638 right value is known.)
4639 movw r12, lo16(0x65556555)
4640 movt r12, lo16(0x65556555)
4641 ldr r11, [r12]
4642 adds r11, r11, #1
4643 str r11, [r12]
4644 ldr r11, [r12+4]
4645 adc r11, r11, #0
4646 str r11, [r12+4]
4647 */
4648 p = imm32_to_ireg_EXACTLY2(p, /*r*/12, 0x65556555);
4649 *p++ = 0xE59CB000;
4650 *p++ = 0xE29BB001;
4651 *p++ = 0xE58CB000;
4652 *p++ = 0xE59CB004;
4653 *p++ = 0xE2ABB000;
4654 *p++ = 0xE58CB004;
4655 /* Tell the caller .. */
4656 vassert(!(*is_profInc));
4657 *is_profInc = True;
4658 goto done;
4659 }
4660
4661 /* ... */
4662 default:
4663 goto bad;
4664 }
4665
4666 bad:
4667 ppARMInstr(i);
4668 vpanic("emit_ARMInstr");
4669 /*NOTREACHED*/
4670
4671 done:
4672 vassert(((UChar*)p) - &buf[0] <= 32);
4673 return ((UChar*)p) - &buf[0];
4674 }
4675
4676
4677 /* How big is an event check? See case for ARMin_EvCheck in
4678 emit_ARMInstr just above. That crosschecks what this returns, so
4679 we can tell if we're inconsistent. */
evCheckSzB_ARM(void)4680 Int evCheckSzB_ARM (void)
4681 {
4682 return 24;
4683 }
4684
4685
4686 /* NB: what goes on here has to be very closely coordinated with the
4687 emitInstr case for XDirect, above. */
chainXDirect_ARM(VexEndness endness_host,void * place_to_chain,const void * disp_cp_chain_me_EXPECTED,const void * place_to_jump_to)4688 VexInvalRange chainXDirect_ARM ( VexEndness endness_host,
4689 void* place_to_chain,
4690 const void* disp_cp_chain_me_EXPECTED,
4691 const void* place_to_jump_to )
4692 {
4693 vassert(endness_host == VexEndnessLE);
4694
4695 /* What we're expecting to see is:
4696 movw r12, lo16(disp_cp_chain_me_to_EXPECTED)
4697 movt r12, hi16(disp_cp_chain_me_to_EXPECTED)
4698 blx r12
4699 viz
4700 <8 bytes generated by imm32_to_ireg_EXACTLY2>
4701 E1 2F FF 3C
4702 */
4703 UInt* p = (UInt*)place_to_chain;
4704 vassert(0 == (3 & (HWord)p));
4705 vassert(is_imm32_to_ireg_EXACTLY2(
4706 p, /*r*/12, (UInt)(Addr)disp_cp_chain_me_EXPECTED));
4707 vassert(p[2] == 0xE12FFF3C);
4708 /* And what we want to change it to is either:
4709 (general case)
4710 movw r12, lo16(place_to_jump_to)
4711 movt r12, hi16(place_to_jump_to)
4712 bx r12
4713 viz
4714 <8 bytes generated by imm32_to_ireg_EXACTLY2>
4715 E1 2F FF 1C
4716 ---OR---
4717 in the case where the displacement falls within 26 bits
4718 b disp24; undef; undef
4719 viz
4720 EA <3 bytes == disp24>
4721 FF 00 00 00
4722 FF 00 00 00
4723
4724 In both cases the replacement has the same length as the original.
4725 To remain sane & verifiable,
4726 (1) limit the displacement for the short form to
4727 (say) +/- 30 million, so as to avoid wraparound
4728 off-by-ones
4729 (2) even if the short form is applicable, once every (say)
4730 1024 times use the long form anyway, so as to maintain
4731 verifiability
4732 */
4733
4734 /* This is the delta we need to put into a B insn. It's relative
4735 to the start of the next-but-one insn, hence the -8. */
4736 Long delta = (Long)((const UChar *)place_to_jump_to - (const UChar*)p) - 8;
4737 Bool shortOK = delta >= -30*1000*1000 && delta < 30*1000*1000;
4738 vassert(0 == (delta & (Long)3));
4739
4740 static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
4741 if (shortOK) {
4742 shortCTR++; // thread safety bleh
4743 if (0 == (shortCTR & 0x3FF)) {
4744 shortOK = False;
4745 if (0)
4746 vex_printf("QQQ chainXDirect_ARM: shortCTR = %u, "
4747 "using long form\n", shortCTR);
4748 }
4749 }
4750
4751 /* And make the modifications. */
4752 if (shortOK) {
4753 Int simm24 = (Int)(delta >> 2);
4754 vassert(simm24 == ((simm24 << 8) >> 8));
4755 p[0] = 0xEA000000 | (simm24 & 0x00FFFFFF);
4756 p[1] = 0xFF000000;
4757 p[2] = 0xFF000000;
4758 } else {
4759 (void)imm32_to_ireg_EXACTLY2(
4760 p, /*r*/12, (UInt)(Addr)place_to_jump_to);
4761 p[2] = 0xE12FFF1C;
4762 }
4763
4764 VexInvalRange vir = {(HWord)p, 12};
4765 return vir;
4766 }
4767
4768
4769 /* NB: what goes on here has to be very closely coordinated with the
4770 emitInstr case for XDirect, above. */
unchainXDirect_ARM(VexEndness endness_host,void * place_to_unchain,const void * place_to_jump_to_EXPECTED,const void * disp_cp_chain_me)4771 VexInvalRange unchainXDirect_ARM ( VexEndness endness_host,
4772 void* place_to_unchain,
4773 const void* place_to_jump_to_EXPECTED,
4774 const void* disp_cp_chain_me )
4775 {
4776 vassert(endness_host == VexEndnessLE);
4777
4778 /* What we're expecting to see is:
4779 (general case)
4780 movw r12, lo16(place_to_jump_to_EXPECTED)
4781 movt r12, lo16(place_to_jump_to_EXPECTED)
4782 bx r12
4783 viz
4784 <8 bytes generated by imm32_to_ireg_EXACTLY2>
4785 E1 2F FF 1C
4786 ---OR---
4787 in the case where the displacement falls within 26 bits
4788 b disp24; undef; undef
4789 viz
4790 EA <3 bytes == disp24>
4791 FF 00 00 00
4792 FF 00 00 00
4793 */
4794 UInt* p = (UInt*)place_to_unchain;
4795 vassert(0 == (3 & (HWord)p));
4796
4797 Bool valid = False;
4798 if (is_imm32_to_ireg_EXACTLY2(
4799 p, /*r*/12, (UInt)(Addr)place_to_jump_to_EXPECTED)
4800 && p[2] == 0xE12FFF1C) {
4801 valid = True; /* it's the long form */
4802 if (0)
4803 vex_printf("QQQ unchainXDirect_ARM: found long form\n");
4804 } else
4805 if ((p[0] >> 24) == 0xEA && p[1] == 0xFF000000 && p[2] == 0xFF000000) {
4806 /* It's the short form. Check the displacement is right. */
4807 Int simm24 = p[0] & 0x00FFFFFF;
4808 simm24 <<= 8; simm24 >>= 8;
4809 if ((UChar*)p + (simm24 << 2) + 8 == place_to_jump_to_EXPECTED) {
4810 valid = True;
4811 if (0)
4812 vex_printf("QQQ unchainXDirect_ARM: found short form\n");
4813 }
4814 }
4815 vassert(valid);
4816
4817 /* And what we want to change it to is:
4818 movw r12, lo16(disp_cp_chain_me)
4819 movt r12, hi16(disp_cp_chain_me)
4820 blx r12
4821 viz
4822 <8 bytes generated by imm32_to_ireg_EXACTLY2>
4823 E1 2F FF 3C
4824 */
4825 (void)imm32_to_ireg_EXACTLY2(
4826 p, /*r*/12, (UInt)(Addr)disp_cp_chain_me);
4827 p[2] = 0xE12FFF3C;
4828 VexInvalRange vir = {(HWord)p, 12};
4829 return vir;
4830 }
4831
4832
4833 /* Patch the counter address into a profile inc point, as previously
4834 created by the ARMin_ProfInc case for emit_ARMInstr. */
patchProfInc_ARM(VexEndness endness_host,void * place_to_patch,const ULong * location_of_counter)4835 VexInvalRange patchProfInc_ARM ( VexEndness endness_host,
4836 void* place_to_patch,
4837 const ULong* location_of_counter )
4838 {
4839 vassert(endness_host == VexEndnessLE);
4840 vassert(sizeof(ULong*) == 4);
4841 UInt* p = (UInt*)place_to_patch;
4842 vassert(0 == (3 & (HWord)p));
4843 vassert(is_imm32_to_ireg_EXACTLY2(p, /*r*/12, 0x65556555));
4844 vassert(p[2] == 0xE59CB000);
4845 vassert(p[3] == 0xE29BB001);
4846 vassert(p[4] == 0xE58CB000);
4847 vassert(p[5] == 0xE59CB004);
4848 vassert(p[6] == 0xE2ABB000);
4849 vassert(p[7] == 0xE58CB004);
4850 imm32_to_ireg_EXACTLY2(p, /*r*/12, (UInt)(Addr)location_of_counter);
4851 VexInvalRange vir = {(HWord)p, 8};
4852 return vir;
4853 }
4854
4855
4856 #undef BITS4
4857 #undef X0000
4858 #undef X0001
4859 #undef X0010
4860 #undef X0011
4861 #undef X0100
4862 #undef X0101
4863 #undef X0110
4864 #undef X0111
4865 #undef X1000
4866 #undef X1001
4867 #undef X1010
4868 #undef X1011
4869 #undef X1100
4870 #undef X1101
4871 #undef X1110
4872 #undef X1111
4873 #undef XXXXX___
4874 #undef XXXXXX__
4875 #undef XXX___XX
4876 #undef XXXXX__X
4877 #undef XXXXXXXX
4878 #undef XX______
4879
4880 /*---------------------------------------------------------------*/
4881 /*--- end host_arm_defs.c ---*/
4882 /*---------------------------------------------------------------*/
4883