1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm_isel.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2015 OpenWorks LLP
11 info@open-works.net
12
13 NEON support is
14 Copyright (C) 2010-2015 Samsung Electronics
15 contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16 and Kirill Batuzov <batuzovk@ispras.ru>
17
18 This program is free software; you can redistribute it and/or
19 modify it under the terms of the GNU General Public License as
20 published by the Free Software Foundation; either version 2 of the
21 License, or (at your option) any later version.
22
23 This program is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
27
28 You should have received a copy of the GNU General Public License
29 along with this program; if not, write to the Free Software
30 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 02110-1301, USA.
32
33 The GNU General Public License is contained in the file COPYING.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
39 #include "ir_match.h"
40
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "host_generic_regs.h"
44 #include "host_generic_simd64.h" // for 32-bit SIMD helpers
45 #include "host_arm_defs.h"
46
47
48 /*---------------------------------------------------------*/
49 /*--- ARMvfp control word stuff ---*/
50 /*---------------------------------------------------------*/
51
52 /* Vex-generated code expects to run with the FPU set as follows: all
53 exceptions masked, round-to-nearest, non-vector mode, with the NZCV
54 flags cleared, and FZ (flush to zero) disabled. Curiously enough,
55 this corresponds to a FPSCR value of zero.
56
57 fpscr should therefore be zero on entry to Vex-generated code, and
58 should be unchanged at exit. (Or at least the bottom 28 bits
59 should be zero).
60 */
61
62 #define DEFAULT_FPSCR 0
63
64
65 /*---------------------------------------------------------*/
66 /*--- ISelEnv ---*/
67 /*---------------------------------------------------------*/
68
69 /* This carries around:
70
71 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
72 might encounter. This is computed before insn selection starts,
73 and does not change.
74
75 - A mapping from IRTemp to HReg. This tells the insn selector
76 which virtual register(s) are associated with each IRTemp
77 temporary. This is computed before insn selection starts, and
78 does not change. We expect this mapping to map precisely the
79 same set of IRTemps as the type mapping does.
80
81 - vregmap holds the primary register for the IRTemp.
82 - vregmapHI is only used for 64-bit integer-typed
83 IRTemps. It holds the identity of a second
84 32-bit virtual HReg, which holds the high half
85 of the value.
86
87 - The code array, that is, the insns selected so far.
88
89 - A counter, for generating new virtual registers.
90
91 - The host hardware capabilities word. This is set at the start
92 and does not change.
93
94 - A Bool for indicating whether we may generate chain-me
95 instructions for control flow transfers, or whether we must use
96 XAssisted.
97
98 - The maximum guest address of any guest insn in this block.
99 Actually, the address of the highest-addressed byte from any insn
100 in this block. Is set at the start and does not change. This is
101 used for detecting jumps which are definitely forward-edges from
102 this block, and therefore can be made (chained) to the fast entry
103 point of the destination, thereby avoiding the destination's
104 event check.
105
106 Note, this is all (well, mostly) host-independent.
107 */
108
109 typedef
110 struct {
111 /* Constant -- are set at the start and do not change. */
112 IRTypeEnv* type_env;
113
114 HReg* vregmap;
115 HReg* vregmapHI;
116 Int n_vregmap;
117
118 UInt hwcaps;
119
120 Bool chainingAllowed;
121 Addr32 max_ga;
122
123 /* These are modified as we go along. */
124 HInstrArray* code;
125 Int vreg_ctr;
126 }
127 ISelEnv;
128
lookupIRTemp(ISelEnv * env,IRTemp tmp)129 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
130 {
131 vassert(tmp >= 0);
132 vassert(tmp < env->n_vregmap);
133 return env->vregmap[tmp];
134 }
135
lookupIRTemp64(HReg * vrHI,HReg * vrLO,ISelEnv * env,IRTemp tmp)136 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
137 {
138 vassert(tmp >= 0);
139 vassert(tmp < env->n_vregmap);
140 vassert(! hregIsInvalid(env->vregmapHI[tmp]));
141 *vrLO = env->vregmap[tmp];
142 *vrHI = env->vregmapHI[tmp];
143 }
144
addInstr(ISelEnv * env,ARMInstr * instr)145 static void addInstr ( ISelEnv* env, ARMInstr* instr )
146 {
147 addHInstr(env->code, instr);
148 if (vex_traceflags & VEX_TRACE_VCODE) {
149 ppARMInstr(instr);
150 vex_printf("\n");
151 }
152 }
153
newVRegI(ISelEnv * env)154 static HReg newVRegI ( ISelEnv* env )
155 {
156 HReg reg = mkHReg(True/*virtual reg*/, HRcInt32, 0/*enc*/, env->vreg_ctr);
157 env->vreg_ctr++;
158 return reg;
159 }
160
newVRegD(ISelEnv * env)161 static HReg newVRegD ( ISelEnv* env )
162 {
163 HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr);
164 env->vreg_ctr++;
165 return reg;
166 }
167
newVRegF(ISelEnv * env)168 static HReg newVRegF ( ISelEnv* env )
169 {
170 HReg reg = mkHReg(True/*virtual reg*/, HRcFlt32, 0/*enc*/, env->vreg_ctr);
171 env->vreg_ctr++;
172 return reg;
173 }
174
newVRegV(ISelEnv * env)175 static HReg newVRegV ( ISelEnv* env )
176 {
177 HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
178 env->vreg_ctr++;
179 return reg;
180 }
181
182 /* These are duplicated in guest_arm_toIR.c */
unop(IROp op,IRExpr * a)183 static IRExpr* unop ( IROp op, IRExpr* a )
184 {
185 return IRExpr_Unop(op, a);
186 }
187
binop(IROp op,IRExpr * a1,IRExpr * a2)188 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
189 {
190 return IRExpr_Binop(op, a1, a2);
191 }
192
bind(Int binder)193 static IRExpr* bind ( Int binder )
194 {
195 return IRExpr_Binder(binder);
196 }
197
198
199 /*---------------------------------------------------------*/
200 /*--- ISEL: Forward declarations ---*/
201 /*---------------------------------------------------------*/
202
203 /* These are organised as iselXXX and iselXXX_wrk pairs. The
204 iselXXX_wrk do the real work, but are not to be called directly.
205 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
206 checks that all returned registers are virtual. You should not
207 call the _wrk version directly.
208 */
209 static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
210 static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e );
211
212 static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
213 static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e );
214
215 static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
216 static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e );
217
218 static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
219 static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e );
220
221 static ARMRI84* iselIntExpr_RI84_wrk
222 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
223 static ARMRI84* iselIntExpr_RI84
224 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
225
226 static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e );
227 static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e );
228
229 static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
230 static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e );
231
232 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
233 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
234
235 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
236 ISelEnv* env, IRExpr* e );
237 static void iselInt64Expr ( HReg* rHi, HReg* rLo,
238 ISelEnv* env, IRExpr* e );
239
240 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
241 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
242
243 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
244 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
245
246 static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e );
247 static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e );
248
249 static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e );
250 static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e );
251
252 /*---------------------------------------------------------*/
253 /*--- ISEL: Misc helpers ---*/
254 /*---------------------------------------------------------*/
255
ROR32(UInt x,UInt sh)256 static UInt ROR32 ( UInt x, UInt sh ) {
257 vassert(sh >= 0 && sh < 32);
258 if (sh == 0)
259 return x;
260 else
261 return (x << (32-sh)) | (x >> sh);
262 }
263
264 /* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
265 form, and if so return the components. */
fitsIn8x4(UInt * u8,UInt * u4,UInt u)266 static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
267 {
268 UInt i;
269 for (i = 0; i < 16; i++) {
270 if (0 == (u & 0xFFFFFF00)) {
271 *u8 = u;
272 *u4 = i;
273 return True;
274 }
275 u = ROR32(u, 30);
276 }
277 vassert(i == 16);
278 return False;
279 }
280
281 /* Make a int reg-reg move. */
mk_iMOVds_RR(HReg dst,HReg src)282 static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
283 {
284 vassert(hregClass(src) == HRcInt32);
285 vassert(hregClass(dst) == HRcInt32);
286 return ARMInstr_Mov(dst, ARMRI84_R(src));
287 }
288
289 /* Set the VFP unit's rounding mode to default (round to nearest). */
set_VFP_rounding_default(ISelEnv * env)290 static void set_VFP_rounding_default ( ISelEnv* env )
291 {
292 /* mov rTmp, #DEFAULT_FPSCR
293 fmxr fpscr, rTmp
294 */
295 HReg rTmp = newVRegI(env);
296 addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
297 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
298 }
299
300 /* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
301 expression denoting a value in the range 0 .. 3, indicating a round
302 mode encoded as per type IRRoundingMode. Set FPSCR to have the
303 same rounding.
304 */
305 static
set_VFP_rounding_mode(ISelEnv * env,IRExpr * mode)306 void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
307 {
308 /* This isn't simple, because 'mode' carries an IR rounding
309 encoding, and we need to translate that to an ARMvfp one:
310 The IR encoding:
311 00 to nearest (the default)
312 10 to +infinity
313 01 to -infinity
314 11 to zero
315 The ARMvfp encoding:
316 00 to nearest
317 01 to +infinity
318 10 to -infinity
319 11 to zero
320 Easy enough to do; just swap the two bits.
321 */
322 HReg irrm = iselIntExpr_R(env, mode);
323 HReg tL = newVRegI(env);
324 HReg tR = newVRegI(env);
325 HReg t3 = newVRegI(env);
326 /* tL = irrm << 1;
327 tR = irrm >> 1; if we're lucky, these will issue together
328 tL &= 2;
329 tR &= 1; ditto
330 t3 = tL | tR;
331 t3 <<= 22;
332 fmxr fpscr, t3
333 */
334 addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
335 addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
336 addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
337 addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
338 addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
339 addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
340 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
341 }
342
343
344 /*---------------------------------------------------------*/
345 /*--- ISEL: Function call helpers ---*/
346 /*---------------------------------------------------------*/
347
348 /* Used only in doHelperCall. See big comment in doHelperCall re
349 handling of register-parameter args. This function figures out
350 whether evaluation of an expression might require use of a fixed
351 register. If in doubt return True (safe but suboptimal).
352 */
353 static
mightRequireFixedRegs(IRExpr * e)354 Bool mightRequireFixedRegs ( IRExpr* e )
355 {
356 if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) {
357 // These are always "safe" -- either a copy of r13(sp) in some
358 // arbitrary vreg, or a copy of r8, respectively.
359 return False;
360 }
361 /* Else it's a "normal" expression. */
362 switch (e->tag) {
363 case Iex_RdTmp: case Iex_Const: case Iex_Get:
364 return False;
365 default:
366 return True;
367 }
368 }
369
370
371 /* Do a complete function call. |guard| is a Ity_Bit expression
372 indicating whether or not the call happens. If guard==NULL, the
373 call is unconditional. |retloc| is set to indicate where the
374 return value is after the call. The caller (of this fn) must
375 generate code to add |stackAdjustAfterCall| to the stack pointer
376 after the call is done. Returns True iff it managed to handle this
377 combination of arg/return types, else returns False. */
378
379 static
doHelperCall(UInt * stackAdjustAfterCall,RetLoc * retloc,ISelEnv * env,IRExpr * guard,IRCallee * cee,IRType retTy,IRExpr ** args)380 Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
381 /*OUT*/RetLoc* retloc,
382 ISelEnv* env,
383 IRExpr* guard,
384 IRCallee* cee, IRType retTy, IRExpr** args )
385 {
386 ARMCondCode cc;
387 HReg argregs[ARM_N_ARGREGS];
388 HReg tmpregs[ARM_N_ARGREGS];
389 Bool go_fast;
390 Int n_args, i, nextArgReg;
391 Addr32 target;
392
393 vassert(ARM_N_ARGREGS == 4);
394
395 /* Set default returns. We'll update them later if needed. */
396 *stackAdjustAfterCall = 0;
397 *retloc = mk_RetLoc_INVALID();
398
399 /* These are used for cross-checking that IR-level constraints on
400 the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */
401 UInt nVECRETs = 0;
402 UInt nBBPTRs = 0;
403
404 /* Marshal args for a call and do the call.
405
406 This function only deals with a tiny set of possibilities, which
407 cover all helpers in practice. The restrictions are that only
408 arguments in registers are supported, hence only ARM_N_REGPARMS
409 x 32 integer bits in total can be passed. In fact the only
410 supported arg types are I32 and I64.
411
412 The return type can be I{64,32} or V128. In the V128 case, it
413 is expected that |args| will contain the special node
414 IRExpr_VECRET(), in which case this routine generates code to
415 allocate space on the stack for the vector return value. Since
416 we are not passing any scalars on the stack, it is enough to
417 preallocate the return space before marshalling any arguments,
418 in this case.
419
420 |args| may also contain IRExpr_BBPTR(), in which case the
421 value in r8 is passed as the corresponding argument.
422
423 Generating code which is both efficient and correct when
424 parameters are to be passed in registers is difficult, for the
425 reasons elaborated in detail in comments attached to
426 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
427 of the method described in those comments.
428
429 The problem is split into two cases: the fast scheme and the
430 slow scheme. In the fast scheme, arguments are computed
431 directly into the target (real) registers. This is only safe
432 when we can be sure that computation of each argument will not
433 trash any real registers set by computation of any other
434 argument.
435
436 In the slow scheme, all args are first computed into vregs, and
437 once they are all done, they are moved to the relevant real
438 regs. This always gives correct code, but it also gives a bunch
439 of vreg-to-rreg moves which are usually redundant but are hard
440 for the register allocator to get rid of.
441
442 To decide which scheme to use, all argument expressions are
443 first examined. If they are all so simple that it is clear they
444 will be evaluated without use of any fixed registers, use the
445 fast scheme, else use the slow scheme. Note also that only
446 unconditional calls may use the fast scheme, since having to
447 compute a condition expression could itself trash real
448 registers.
449
450 Note this requires being able to examine an expression and
451 determine whether or not evaluation of it might use a fixed
452 register. That requires knowledge of how the rest of this insn
453 selector works. Currently just the following 3 are regarded as
454 safe -- hopefully they cover the majority of arguments in
455 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
456 */
457
458 /* Note that the cee->regparms field is meaningless on ARM hosts
459 (since there is only one calling convention) and so we always
460 ignore it. */
461
462 n_args = 0;
463 for (i = 0; args[i]; i++) {
464 IRExpr* arg = args[i];
465 if (UNLIKELY(arg->tag == Iex_VECRET)) {
466 nVECRETs++;
467 } else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
468 nBBPTRs++;
469 }
470 n_args++;
471 }
472
473 argregs[0] = hregARM_R0();
474 argregs[1] = hregARM_R1();
475 argregs[2] = hregARM_R2();
476 argregs[3] = hregARM_R3();
477
478 tmpregs[0] = tmpregs[1] = tmpregs[2] =
479 tmpregs[3] = INVALID_HREG;
480
481 /* First decide which scheme (slow or fast) is to be used. First
482 assume the fast scheme, and select slow if any contraindications
483 (wow) appear. */
484
485 go_fast = True;
486
487 if (guard) {
488 if (guard->tag == Iex_Const
489 && guard->Iex.Const.con->tag == Ico_U1
490 && guard->Iex.Const.con->Ico.U1 == True) {
491 /* unconditional */
492 } else {
493 /* Not manifestly unconditional -- be conservative. */
494 go_fast = False;
495 }
496 }
497
498 if (go_fast) {
499 for (i = 0; i < n_args; i++) {
500 if (mightRequireFixedRegs(args[i])) {
501 go_fast = False;
502 break;
503 }
504 }
505 }
506
507 if (go_fast) {
508 if (retTy == Ity_V128 || retTy == Ity_V256)
509 go_fast = False;
510 }
511
512 /* At this point the scheme to use has been established. Generate
513 code to get the arg values into the argument rregs. If we run
514 out of arg regs, give up. */
515
516 if (go_fast) {
517
518 /* FAST SCHEME */
519 nextArgReg = 0;
520
521 for (i = 0; i < n_args; i++) {
522 IRExpr* arg = args[i];
523
524 IRType aTy = Ity_INVALID;
525 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
526 aTy = typeOfIRExpr(env->type_env, arg);
527
528 if (nextArgReg >= ARM_N_ARGREGS)
529 return False; /* out of argregs */
530
531 if (aTy == Ity_I32) {
532 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
533 iselIntExpr_R(env, arg) ));
534 nextArgReg++;
535 }
536 else if (aTy == Ity_I64) {
537 /* 64-bit args must be passed in an a reg-pair of the form
538 n:n+1, where n is even. Hence either r0:r1 or r2:r3.
539 On a little-endian host, the less significant word is
540 passed in the lower-numbered register. */
541 if (nextArgReg & 1) {
542 if (nextArgReg >= ARM_N_ARGREGS)
543 return False; /* out of argregs */
544 addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
545 nextArgReg++;
546 }
547 if (nextArgReg >= ARM_N_ARGREGS)
548 return False; /* out of argregs */
549 HReg raHi, raLo;
550 iselInt64Expr(&raHi, &raLo, env, arg);
551 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
552 nextArgReg++;
553 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
554 nextArgReg++;
555 }
556 else if (arg->tag == Iex_BBPTR) {
557 vassert(0); //ATC
558 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
559 hregARM_R8() ));
560 nextArgReg++;
561 }
562 else if (arg->tag == Iex_VECRET) {
563 // If this happens, it denotes ill-formed IR
564 vassert(0);
565 }
566 else
567 return False; /* unhandled arg type */
568 }
569
570 /* Fast scheme only applies for unconditional calls. Hence: */
571 cc = ARMcc_AL;
572
573 } else {
574
575 /* SLOW SCHEME; move via temporaries */
576 nextArgReg = 0;
577
578 for (i = 0; i < n_args; i++) {
579 IRExpr* arg = args[i];
580
581 IRType aTy = Ity_INVALID;
582 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
583 aTy = typeOfIRExpr(env->type_env, arg);
584
585 if (nextArgReg >= ARM_N_ARGREGS)
586 return False; /* out of argregs */
587
588 if (aTy == Ity_I32) {
589 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
590 nextArgReg++;
591 }
592 else if (aTy == Ity_I64) {
593 /* Same comment applies as in the Fast-scheme case. */
594 if (nextArgReg & 1)
595 nextArgReg++;
596 if (nextArgReg + 1 >= ARM_N_ARGREGS)
597 return False; /* out of argregs */
598 HReg raHi, raLo;
599 iselInt64Expr(&raHi, &raLo, env, args[i]);
600 tmpregs[nextArgReg] = raLo;
601 nextArgReg++;
602 tmpregs[nextArgReg] = raHi;
603 nextArgReg++;
604 }
605 else if (arg->tag == Iex_BBPTR) {
606 vassert(0); //ATC
607 tmpregs[nextArgReg] = hregARM_R8();
608 nextArgReg++;
609 }
610 else if (arg->tag == Iex_VECRET) {
611 // If this happens, it denotes ill-formed IR
612 vassert(0);
613 }
614 else
615 return False; /* unhandled arg type */
616 }
617
618 /* Now we can compute the condition. We can't do it earlier
619 because the argument computations could trash the condition
620 codes. Be a bit clever to handle the common case where the
621 guard is 1:Bit. */
622 cc = ARMcc_AL;
623 if (guard) {
624 if (guard->tag == Iex_Const
625 && guard->Iex.Const.con->tag == Ico_U1
626 && guard->Iex.Const.con->Ico.U1 == True) {
627 /* unconditional -- do nothing */
628 } else {
629 cc = iselCondCode( env, guard );
630 }
631 }
632
633 /* Move the args to their final destinations. */
634 for (i = 0; i < nextArgReg; i++) {
635 if (hregIsInvalid(tmpregs[i])) { // Skip invalid regs
636 addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
637 continue;
638 }
639 /* None of these insns, including any spill code that might
640 be generated, may alter the condition codes. */
641 addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
642 }
643
644 }
645
646 /* Should be assured by checks above */
647 vassert(nextArgReg <= ARM_N_ARGREGS);
648
649 /* Do final checks, set the return values, and generate the call
650 instruction proper. */
651 vassert(nBBPTRs == 0 || nBBPTRs == 1);
652 vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0);
653 vassert(*stackAdjustAfterCall == 0);
654 vassert(is_RetLoc_INVALID(*retloc));
655 switch (retTy) {
656 case Ity_INVALID:
657 /* Function doesn't return a value. */
658 *retloc = mk_RetLoc_simple(RLPri_None);
659 break;
660 case Ity_I64:
661 *retloc = mk_RetLoc_simple(RLPri_2Int);
662 break;
663 case Ity_I32: case Ity_I16: case Ity_I8:
664 *retloc = mk_RetLoc_simple(RLPri_Int);
665 break;
666 case Ity_V128:
667 vassert(0); // ATC
668 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
669 *stackAdjustAfterCall = 16;
670 break;
671 case Ity_V256:
672 vassert(0); // ATC
673 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
674 *stackAdjustAfterCall = 32;
675 break;
676 default:
677 /* IR can denote other possible return types, but we don't
678 handle those here. */
679 vassert(0);
680 }
681
682 /* Finally, generate the call itself. This needs the *retloc value
683 set in the switch above, which is why it's at the end. */
684
685 /* nextArgReg doles out argument registers. Since these are
686 assigned in the order r0, r1, r2, r3, its numeric value at this
687 point, which must be between 0 and 4 inclusive, is going to be
688 equal to the number of arg regs in use for the call. Hence bake
689 that number into the call (we'll need to know it when doing
690 register allocation, to know what regs the call reads.)
691
692 There is a bit of a twist -- harmless but worth recording.
693 Suppose the arg types are (Ity_I32, Ity_I64). Then we will have
694 the first arg in r0 and the second in r3:r2, but r1 isn't used.
695 We nevertheless have nextArgReg==4 and bake that into the call
696 instruction. This will mean the register allocator wil believe
697 this insn reads r1 when in fact it doesn't. But that's
698 harmless; it just artificially extends the live range of r1
699 unnecessarily. The best fix would be to put into the
700 instruction, a bitmask indicating which of r0/1/2/3 carry live
701 values. But that's too much hassle. */
702
703 target = (Addr)cee->addr;
704 addInstr(env, ARMInstr_Call( cc, target, nextArgReg, *retloc ));
705
706 return True; /* success */
707 }
708
709
710 /*---------------------------------------------------------*/
711 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/
712 /*---------------------------------------------------------*/
713
714 /* Select insns for an integer-typed expression, and add them to the
715 code list. Return a reg holding the result. This reg will be a
716 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
717 want to modify it, ask for a new vreg, copy it in there, and modify
718 the copy. The register allocator will do its best to map both
719 vregs to the same real register, so the copies will often disappear
720 later in the game.
721
722 This should handle expressions of 32, 16 and 8-bit type. All
723 results are returned in a 32-bit register. For 16- and 8-bit
724 expressions, the upper 16/24 bits are arbitrary, so you should mask
725 or sign extend partial values if necessary.
726 */
727
728 /* --------------------- AMode1 --------------------- */
729
730 /* Return an AMode1 which computes the value of the specified
731 expression, possibly also adding insns to the code list as a
732 result. The expression may only be a 32-bit one.
733 */
734
sane_AMode1(ARMAMode1 * am)735 static Bool sane_AMode1 ( ARMAMode1* am )
736 {
737 switch (am->tag) {
738 case ARMam1_RI:
739 return
740 toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
741 && (hregIsVirtual(am->ARMam1.RI.reg)
742 || sameHReg(am->ARMam1.RI.reg, hregARM_R8()))
743 && am->ARMam1.RI.simm13 >= -4095
744 && am->ARMam1.RI.simm13 <= 4095 );
745 case ARMam1_RRS:
746 return
747 toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
748 && hregIsVirtual(am->ARMam1.RRS.base)
749 && hregClass(am->ARMam1.RRS.index) == HRcInt32
750 && hregIsVirtual(am->ARMam1.RRS.index)
751 && am->ARMam1.RRS.shift >= 0
752 && am->ARMam1.RRS.shift <= 3 );
753 default:
754 vpanic("sane_AMode: unknown ARM AMode1 tag");
755 }
756 }
757
iselIntExpr_AMode1(ISelEnv * env,IRExpr * e)758 static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
759 {
760 ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
761 vassert(sane_AMode1(am));
762 return am;
763 }
764
iselIntExpr_AMode1_wrk(ISelEnv * env,IRExpr * e)765 static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
766 {
767 IRType ty = typeOfIRExpr(env->type_env,e);
768 vassert(ty == Ity_I32);
769
770 /* FIXME: add RRS matching */
771
772 /* {Add32,Sub32}(expr,simm13) */
773 if (e->tag == Iex_Binop
774 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
775 && e->Iex.Binop.arg2->tag == Iex_Const
776 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
777 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
778 if (simm >= -4095 && simm <= 4095) {
779 HReg reg;
780 if (e->Iex.Binop.op == Iop_Sub32)
781 simm = -simm;
782 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
783 return ARMAMode1_RI(reg, simm);
784 }
785 }
786
787 /* Doesn't match anything in particular. Generate it into
788 a register and use that. */
789 {
790 HReg reg = iselIntExpr_R(env, e);
791 return ARMAMode1_RI(reg, 0);
792 }
793
794 }
795
796
797 /* --------------------- AMode2 --------------------- */
798
799 /* Return an AMode2 which computes the value of the specified
800 expression, possibly also adding insns to the code list as a
801 result. The expression may only be a 32-bit one.
802 */
803
sane_AMode2(ARMAMode2 * am)804 static Bool sane_AMode2 ( ARMAMode2* am )
805 {
806 switch (am->tag) {
807 case ARMam2_RI:
808 return
809 toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
810 && hregIsVirtual(am->ARMam2.RI.reg)
811 && am->ARMam2.RI.simm9 >= -255
812 && am->ARMam2.RI.simm9 <= 255 );
813 case ARMam2_RR:
814 return
815 toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
816 && hregIsVirtual(am->ARMam2.RR.base)
817 && hregClass(am->ARMam2.RR.index) == HRcInt32
818 && hregIsVirtual(am->ARMam2.RR.index) );
819 default:
820 vpanic("sane_AMode: unknown ARM AMode2 tag");
821 }
822 }
823
iselIntExpr_AMode2(ISelEnv * env,IRExpr * e)824 static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
825 {
826 ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
827 vassert(sane_AMode2(am));
828 return am;
829 }
830
iselIntExpr_AMode2_wrk(ISelEnv * env,IRExpr * e)831 static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
832 {
833 IRType ty = typeOfIRExpr(env->type_env,e);
834 vassert(ty == Ity_I32);
835
836 /* FIXME: add RR matching */
837
838 /* {Add32,Sub32}(expr,simm8) */
839 if (e->tag == Iex_Binop
840 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
841 && e->Iex.Binop.arg2->tag == Iex_Const
842 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
843 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
844 if (simm >= -255 && simm <= 255) {
845 HReg reg;
846 if (e->Iex.Binop.op == Iop_Sub32)
847 simm = -simm;
848 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
849 return ARMAMode2_RI(reg, simm);
850 }
851 }
852
853 /* Doesn't match anything in particular. Generate it into
854 a register and use that. */
855 {
856 HReg reg = iselIntExpr_R(env, e);
857 return ARMAMode2_RI(reg, 0);
858 }
859
860 }
861
862
863 /* --------------------- AModeV --------------------- */
864
865 /* Return an AModeV which computes the value of the specified
866 expression, possibly also adding insns to the code list as a
867 result. The expression may only be a 32-bit one.
868 */
869
sane_AModeV(ARMAModeV * am)870 static Bool sane_AModeV ( ARMAModeV* am )
871 {
872 return toBool( hregClass(am->reg) == HRcInt32
873 && hregIsVirtual(am->reg)
874 && am->simm11 >= -1020 && am->simm11 <= 1020
875 && 0 == (am->simm11 & 3) );
876 }
877
iselIntExpr_AModeV(ISelEnv * env,IRExpr * e)878 static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
879 {
880 ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
881 vassert(sane_AModeV(am));
882 return am;
883 }
884
iselIntExpr_AModeV_wrk(ISelEnv * env,IRExpr * e)885 static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
886 {
887 IRType ty = typeOfIRExpr(env->type_env,e);
888 vassert(ty == Ity_I32);
889
890 /* {Add32,Sub32}(expr, simm8 << 2) */
891 if (e->tag == Iex_Binop
892 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
893 && e->Iex.Binop.arg2->tag == Iex_Const
894 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
895 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
896 if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
897 HReg reg;
898 if (e->Iex.Binop.op == Iop_Sub32)
899 simm = -simm;
900 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
901 return mkARMAModeV(reg, simm);
902 }
903 }
904
905 /* Doesn't match anything in particular. Generate it into
906 a register and use that. */
907 {
908 HReg reg = iselIntExpr_R(env, e);
909 return mkARMAModeV(reg, 0);
910 }
911
912 }
913
914 /* -------------------- AModeN -------------------- */
915
iselIntExpr_AModeN(ISelEnv * env,IRExpr * e)916 static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
917 {
918 return iselIntExpr_AModeN_wrk(env, e);
919 }
920
iselIntExpr_AModeN_wrk(ISelEnv * env,IRExpr * e)921 static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
922 {
923 HReg reg = iselIntExpr_R(env, e);
924 return mkARMAModeN_R(reg);
925 }
926
927
928 /* --------------------- RI84 --------------------- */
929
930 /* Select instructions to generate 'e' into a RI84. If mayInv is
931 true, then the caller will also accept an I84 form that denotes
932 'not e'. In this case didInv may not be NULL, and *didInv is set
933 to True. This complication is so as to allow generation of an RI84
934 which is suitable for use in either an AND or BIC instruction,
935 without knowing (before this call) which one.
936 */
iselIntExpr_RI84(Bool * didInv,Bool mayInv,ISelEnv * env,IRExpr * e)937 static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
938 ISelEnv* env, IRExpr* e )
939 {
940 ARMRI84* ri;
941 if (mayInv)
942 vassert(didInv != NULL);
943 ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
944 /* sanity checks ... */
945 switch (ri->tag) {
946 case ARMri84_I84:
947 return ri;
948 case ARMri84_R:
949 vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
950 vassert(hregIsVirtual(ri->ARMri84.R.reg));
951 return ri;
952 default:
953 vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
954 }
955 }
956
957 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RI84_wrk(Bool * didInv,Bool mayInv,ISelEnv * env,IRExpr * e)958 static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
959 ISelEnv* env, IRExpr* e )
960 {
961 IRType ty = typeOfIRExpr(env->type_env,e);
962 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
963
964 if (didInv) *didInv = False;
965
966 /* special case: immediate */
967 if (e->tag == Iex_Const) {
968 UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
969 switch (e->Iex.Const.con->tag) {
970 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
971 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
972 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
973 default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
974 }
975 if (fitsIn8x4(&u8, &u4, u)) {
976 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
977 }
978 if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
979 vassert(didInv);
980 *didInv = True;
981 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
982 }
983 /* else fail, fall through to default case */
984 }
985
986 /* default case: calculate into a register and return that */
987 {
988 HReg r = iselIntExpr_R ( env, e );
989 return ARMRI84_R(r);
990 }
991 }
992
993
994 /* --------------------- RI5 --------------------- */
995
996 /* Select instructions to generate 'e' into a RI5. */
997
iselIntExpr_RI5(ISelEnv * env,IRExpr * e)998 static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
999 {
1000 ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
1001 /* sanity checks ... */
1002 switch (ri->tag) {
1003 case ARMri5_I5:
1004 return ri;
1005 case ARMri5_R:
1006 vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
1007 vassert(hregIsVirtual(ri->ARMri5.R.reg));
1008 return ri;
1009 default:
1010 vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
1011 }
1012 }
1013
1014 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RI5_wrk(ISelEnv * env,IRExpr * e)1015 static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
1016 {
1017 IRType ty = typeOfIRExpr(env->type_env,e);
1018 vassert(ty == Ity_I32 || ty == Ity_I8);
1019
1020 /* special case: immediate */
1021 if (e->tag == Iex_Const) {
1022 UInt u; /* both invalid */
1023 switch (e->Iex.Const.con->tag) {
1024 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1025 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1026 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1027 default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
1028 }
1029 if (u >= 1 && u <= 31) {
1030 return ARMRI5_I5(u);
1031 }
1032 /* else fail, fall through to default case */
1033 }
1034
1035 /* default case: calculate into a register and return that */
1036 {
1037 HReg r = iselIntExpr_R ( env, e );
1038 return ARMRI5_R(r);
1039 }
1040 }
1041
1042
1043 /* ------------------- CondCode ------------------- */
1044
1045 /* Generate code to evaluated a bit-typed expression, returning the
1046 condition code which would correspond when the expression would
1047 notionally have returned 1. */
1048
iselCondCode(ISelEnv * env,IRExpr * e)1049 static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1050 {
1051 ARMCondCode cc = iselCondCode_wrk(env,e);
1052 vassert(cc != ARMcc_NV);
1053 return cc;
1054 }
1055
iselCondCode_wrk(ISelEnv * env,IRExpr * e)1056 static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1057 {
1058 vassert(e);
1059 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1060
1061 /* var */
1062 if (e->tag == Iex_RdTmp) {
1063 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1064 /* CmpOrTst doesn't modify rTmp; so this is OK. */
1065 ARMRI84* one = ARMRI84_I84(1,0);
1066 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
1067 return ARMcc_NE;
1068 }
1069
1070 /* Not1(e) */
1071 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1072 /* Generate code for the arg, and negate the test condition */
1073 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1074 }
1075
1076 /* --- patterns rooted at: 32to1 --- */
1077
1078 if (e->tag == Iex_Unop
1079 && e->Iex.Unop.op == Iop_32to1) {
1080 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1081 ARMRI84* one = ARMRI84_I84(1,0);
1082 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
1083 return ARMcc_NE;
1084 }
1085
1086 /* --- patterns rooted at: CmpNEZ8 --- */
1087
1088 if (e->tag == Iex_Unop
1089 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1090 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1091 ARMRI84* xFF = ARMRI84_I84(0xFF,0);
1092 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
1093 return ARMcc_NE;
1094 }
1095
1096 /* --- patterns rooted at: CmpNEZ32 --- */
1097
1098 if (e->tag == Iex_Unop
1099 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1100 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1101 ARMRI84* zero = ARMRI84_I84(0,0);
1102 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
1103 return ARMcc_NE;
1104 }
1105
1106 /* --- patterns rooted at: CmpNEZ64 --- */
1107
1108 if (e->tag == Iex_Unop
1109 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1110 HReg tHi, tLo;
1111 HReg tmp = newVRegI(env);
1112 ARMRI84* zero = ARMRI84_I84(0,0);
1113 iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
1114 addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
1115 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
1116 return ARMcc_NE;
1117 }
1118
1119 /* --- Cmp*32*(x,y) --- */
1120 if (e->tag == Iex_Binop
1121 && (e->Iex.Binop.op == Iop_CmpEQ32
1122 || e->Iex.Binop.op == Iop_CmpNE32
1123 || e->Iex.Binop.op == Iop_CmpLT32S
1124 || e->Iex.Binop.op == Iop_CmpLT32U
1125 || e->Iex.Binop.op == Iop_CmpLE32S
1126 || e->Iex.Binop.op == Iop_CmpLE32U)) {
1127 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1128 ARMRI84* argR = iselIntExpr_RI84(NULL,False,
1129 env, e->Iex.Binop.arg2);
1130 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
1131 switch (e->Iex.Binop.op) {
1132 case Iop_CmpEQ32: return ARMcc_EQ;
1133 case Iop_CmpNE32: return ARMcc_NE;
1134 case Iop_CmpLT32S: return ARMcc_LT;
1135 case Iop_CmpLT32U: return ARMcc_LO;
1136 case Iop_CmpLE32S: return ARMcc_LE;
1137 case Iop_CmpLE32U: return ARMcc_LS;
1138 default: vpanic("iselCondCode(arm): CmpXX32");
1139 }
1140 }
1141
1142 /* const */
1143 /* Constant 1:Bit */
1144 if (e->tag == Iex_Const) {
1145 HReg r;
1146 vassert(e->Iex.Const.con->tag == Ico_U1);
1147 vassert(e->Iex.Const.con->Ico.U1 == True
1148 || e->Iex.Const.con->Ico.U1 == False);
1149 r = newVRegI(env);
1150 addInstr(env, ARMInstr_Imm32(r, 0));
1151 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r, ARMRI84_R(r)));
1152 return e->Iex.Const.con->Ico.U1 ? ARMcc_EQ : ARMcc_NE;
1153 }
1154
1155 // JRS 2013-Jan-03: this seems completely nonsensical
1156 /* --- CasCmpEQ* --- */
1157 /* Ist_Cas has a dummy argument to compare with, so comparison is
1158 always true. */
1159 //if (e->tag == Iex_Binop
1160 // && (e->Iex.Binop.op == Iop_CasCmpEQ32
1161 // || e->Iex.Binop.op == Iop_CasCmpEQ16
1162 // || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1163 // return ARMcc_AL;
1164 //}
1165
1166 ppIRExpr(e);
1167 vpanic("iselCondCode");
1168 }
1169
1170
1171 /* --------------------- Reg --------------------- */
1172
iselIntExpr_R(ISelEnv * env,IRExpr * e)1173 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1174 {
1175 HReg r = iselIntExpr_R_wrk(env, e);
1176 /* sanity checks ... */
1177 # if 0
1178 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1179 # endif
1180 vassert(hregClass(r) == HRcInt32);
1181 vassert(hregIsVirtual(r));
1182 return r;
1183 }
1184
1185 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_R_wrk(ISelEnv * env,IRExpr * e)1186 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1187 {
1188 IRType ty = typeOfIRExpr(env->type_env,e);
1189 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1190
1191 switch (e->tag) {
1192
1193 /* --------- TEMP --------- */
1194 case Iex_RdTmp: {
1195 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1196 }
1197
1198 /* --------- LOAD --------- */
1199 case Iex_Load: {
1200 HReg dst = newVRegI(env);
1201
1202 if (e->Iex.Load.end != Iend_LE)
1203 goto irreducible;
1204
1205 if (ty == Ity_I32) {
1206 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1207 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, dst, amode));
1208 return dst;
1209 }
1210 if (ty == Ity_I16) {
1211 ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
1212 addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
1213 True/*isLoad*/, False/*!signedLoad*/,
1214 dst, amode));
1215 return dst;
1216 }
1217 if (ty == Ity_I8) {
1218 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1219 addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, True/*isLoad*/, dst, amode));
1220 return dst;
1221 }
1222 break;
1223 }
1224
1225 //zz /* --------- TERNARY OP --------- */
1226 //zz case Iex_Triop: {
1227 //zz IRTriop *triop = e->Iex.Triop.details;
1228 //zz /* C3210 flags following FPU partial remainder (fprem), both
1229 //zz IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1230 //zz if (triop->op == Iop_PRemC3210F64
1231 //zz || triop->op == Iop_PRem1C3210F64) {
1232 //zz HReg junk = newVRegF(env);
1233 //zz HReg dst = newVRegI(env);
1234 //zz HReg srcL = iselDblExpr(env, triop->arg2);
1235 //zz HReg srcR = iselDblExpr(env, triop->arg3);
1236 //zz /* XXXROUNDINGFIXME */
1237 //zz /* set roundingmode here */
1238 //zz addInstr(env, X86Instr_FpBinary(
1239 //zz e->Iex.Binop.op==Iop_PRemC3210F64
1240 //zz ? Xfp_PREM : Xfp_PREM1,
1241 //zz srcL,srcR,junk
1242 //zz ));
1243 //zz /* The previous pseudo-insn will have left the FPU's C3210
1244 //zz flags set correctly. So bag them. */
1245 //zz addInstr(env, X86Instr_FpStSW_AX());
1246 //zz addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1247 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1248 //zz return dst;
1249 //zz }
1250 //zz
1251 //zz break;
1252 //zz }
1253
1254 /* --------- BINARY OP --------- */
1255 case Iex_Binop: {
1256
1257 ARMAluOp aop = 0; /* invalid */
1258 ARMShiftOp sop = 0; /* invalid */
1259
1260 /* ADD/SUB/AND/OR/XOR */
1261 switch (e->Iex.Binop.op) {
1262 case Iop_And32: {
1263 Bool didInv = False;
1264 HReg dst = newVRegI(env);
1265 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1266 ARMRI84* argR = iselIntExpr_RI84(&didInv, True/*mayInv*/,
1267 env, e->Iex.Binop.arg2);
1268 addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
1269 dst, argL, argR));
1270 return dst;
1271 }
1272 case Iop_Or32: aop = ARMalu_OR; goto std_binop;
1273 case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
1274 case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
1275 case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
1276 std_binop: {
1277 HReg dst = newVRegI(env);
1278 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1279 ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
1280 env, e->Iex.Binop.arg2);
1281 addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
1282 return dst;
1283 }
1284 default: break;
1285 }
1286
1287 /* SHL/SHR/SAR */
1288 switch (e->Iex.Binop.op) {
1289 case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
1290 case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
1291 case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
1292 sh_binop: {
1293 HReg dst = newVRegI(env);
1294 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1295 ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
1296 addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
1297 vassert(ty == Ity_I32); /* else the IR is ill-typed */
1298 return dst;
1299 }
1300 default: break;
1301 }
1302
1303 /* MUL */
1304 if (e->Iex.Binop.op == Iop_Mul32) {
1305 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1306 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1307 HReg dst = newVRegI(env);
1308 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1309 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1310 addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
1311 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1312 return dst;
1313 }
1314
1315 /* Handle misc other ops. */
1316
1317 if (e->Iex.Binop.op == Iop_Max32U) {
1318 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1319 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1320 HReg dst = newVRegI(env);
1321 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
1322 ARMRI84_R(argR)));
1323 addInstr(env, mk_iMOVds_RR(dst, argL));
1324 addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
1325 return dst;
1326 }
1327
1328 if (e->Iex.Binop.op == Iop_CmpF64) {
1329 HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
1330 HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
1331 HReg dst = newVRegI(env);
1332 /* Do the compare (FCMPD) and set NZCV in FPSCR. Then also do
1333 FMSTAT, so we can examine the results directly. */
1334 addInstr(env, ARMInstr_VCmpD(dL, dR));
1335 /* Create in dst, the IRCmpF64Result encoded result. */
1336 addInstr(env, ARMInstr_Imm32(dst, 0));
1337 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
1338 addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
1339 addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
1340 addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
1341 return dst;
1342 }
1343
1344 if (e->Iex.Binop.op == Iop_F64toI32S
1345 || e->Iex.Binop.op == Iop_F64toI32U) {
1346 /* Wretched uglyness all round, due to having to deal
1347 with rounding modes. Oh well. */
1348 /* FIXME: if arg1 is a constant indicating round-to-zero,
1349 then we could skip all this arsing around with FPSCR and
1350 simply emit FTO{S,U}IZD. */
1351 Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
1352 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
1353 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
1354 /* FTO{S,U}ID valF, valD */
1355 HReg valF = newVRegF(env);
1356 addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
1357 valF, valD));
1358 set_VFP_rounding_default(env);
1359 /* VMOV dst, valF */
1360 HReg dst = newVRegI(env);
1361 addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
1362 return dst;
1363 }
1364
1365 if (e->Iex.Binop.op == Iop_GetElem8x8
1366 || e->Iex.Binop.op == Iop_GetElem16x4
1367 || e->Iex.Binop.op == Iop_GetElem32x2) {
1368 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1369 HReg res = newVRegI(env);
1370 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
1371 UInt index, size;
1372 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1373 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1374 vpanic("ARM target supports GetElem with constant "
1375 "second argument only (neon)\n");
1376 }
1377 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1378 switch (e->Iex.Binop.op) {
1379 case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1380 case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1381 case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1382 default: vassert(0);
1383 }
1384 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1385 mkARMNRS(ARMNRS_Reg, res, 0),
1386 mkARMNRS(ARMNRS_Scalar, arg, index),
1387 size, False));
1388 return res;
1389 }
1390 }
1391
1392 if (e->Iex.Binop.op == Iop_GetElem32x2
1393 && e->Iex.Binop.arg2->tag == Iex_Const
1394 && !(env->hwcaps & VEX_HWCAPS_ARM_NEON)) {
1395 /* We may have to do GetElem32x2 on a non-NEON capable
1396 target. */
1397 IRConst* con = e->Iex.Binop.arg2->Iex.Const.con;
1398 vassert(con->tag == Ico_U8); /* else IR is ill-typed */
1399 UInt index = con->Ico.U8;
1400 if (index >= 0 && index <= 1) {
1401 HReg rHi, rLo;
1402 iselInt64Expr(&rHi, &rLo, env, e->Iex.Binop.arg1);
1403 return index == 0 ? rLo : rHi;
1404 }
1405 }
1406
1407 if (e->Iex.Binop.op == Iop_GetElem8x16
1408 || e->Iex.Binop.op == Iop_GetElem16x8
1409 || e->Iex.Binop.op == Iop_GetElem32x4) {
1410 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1411 HReg res = newVRegI(env);
1412 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
1413 UInt index, size;
1414 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1415 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1416 vpanic("ARM target supports GetElem with constant "
1417 "second argument only (neon)\n");
1418 }
1419 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1420 switch (e->Iex.Binop.op) {
1421 case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1422 case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1423 case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1424 default: vassert(0);
1425 }
1426 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1427 mkARMNRS(ARMNRS_Reg, res, 0),
1428 mkARMNRS(ARMNRS_Scalar, arg, index),
1429 size, True));
1430 return res;
1431 }
1432 }
1433
1434 /* All cases involving host-side helper calls. */
1435 void* fn = NULL;
1436 switch (e->Iex.Binop.op) {
1437 case Iop_Add16x2:
1438 fn = &h_generic_calc_Add16x2; break;
1439 case Iop_Sub16x2:
1440 fn = &h_generic_calc_Sub16x2; break;
1441 case Iop_HAdd16Ux2:
1442 fn = &h_generic_calc_HAdd16Ux2; break;
1443 case Iop_HAdd16Sx2:
1444 fn = &h_generic_calc_HAdd16Sx2; break;
1445 case Iop_HSub16Ux2:
1446 fn = &h_generic_calc_HSub16Ux2; break;
1447 case Iop_HSub16Sx2:
1448 fn = &h_generic_calc_HSub16Sx2; break;
1449 case Iop_QAdd16Sx2:
1450 fn = &h_generic_calc_QAdd16Sx2; break;
1451 case Iop_QAdd16Ux2:
1452 fn = &h_generic_calc_QAdd16Ux2; break;
1453 case Iop_QSub16Sx2:
1454 fn = &h_generic_calc_QSub16Sx2; break;
1455 case Iop_Add8x4:
1456 fn = &h_generic_calc_Add8x4; break;
1457 case Iop_Sub8x4:
1458 fn = &h_generic_calc_Sub8x4; break;
1459 case Iop_HAdd8Ux4:
1460 fn = &h_generic_calc_HAdd8Ux4; break;
1461 case Iop_HAdd8Sx4:
1462 fn = &h_generic_calc_HAdd8Sx4; break;
1463 case Iop_HSub8Ux4:
1464 fn = &h_generic_calc_HSub8Ux4; break;
1465 case Iop_HSub8Sx4:
1466 fn = &h_generic_calc_HSub8Sx4; break;
1467 case Iop_QAdd8Sx4:
1468 fn = &h_generic_calc_QAdd8Sx4; break;
1469 case Iop_QAdd8Ux4:
1470 fn = &h_generic_calc_QAdd8Ux4; break;
1471 case Iop_QSub8Sx4:
1472 fn = &h_generic_calc_QSub8Sx4; break;
1473 case Iop_QSub8Ux4:
1474 fn = &h_generic_calc_QSub8Ux4; break;
1475 case Iop_Sad8Ux4:
1476 fn = &h_generic_calc_Sad8Ux4; break;
1477 case Iop_QAdd32S:
1478 fn = &h_generic_calc_QAdd32S; break;
1479 case Iop_QSub32S:
1480 fn = &h_generic_calc_QSub32S; break;
1481 case Iop_QSub16Ux2:
1482 fn = &h_generic_calc_QSub16Ux2; break;
1483 case Iop_DivU32:
1484 fn = &h_calc_udiv32_w_arm_semantics; break;
1485 case Iop_DivS32:
1486 fn = &h_calc_sdiv32_w_arm_semantics; break;
1487 default:
1488 break;
1489 }
1490
1491 if (fn) {
1492 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1493 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1494 HReg res = newVRegI(env);
1495 addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
1496 addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
1497 addInstr(env, ARMInstr_Call( ARMcc_AL, (Addr)fn,
1498 2, mk_RetLoc_simple(RLPri_Int) ));
1499 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1500 return res;
1501 }
1502
1503 break;
1504 }
1505
1506 /* --------- UNARY OP --------- */
1507 case Iex_Unop: {
1508
1509 //zz /* 1Uto8(32to1(expr32)) */
1510 //zz if (e->Iex.Unop.op == Iop_1Uto8) {
1511 //zz DECLARE_PATTERN(p_32to1_then_1Uto8);
1512 //zz DEFINE_PATTERN(p_32to1_then_1Uto8,
1513 //zz unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1514 //zz if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1515 //zz IRExpr* expr32 = mi.bindee[0];
1516 //zz HReg dst = newVRegI(env);
1517 //zz HReg src = iselIntExpr_R(env, expr32);
1518 //zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1519 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1520 //zz X86RMI_Imm(1), dst));
1521 //zz return dst;
1522 //zz }
1523 //zz }
1524 //zz
1525 //zz /* 8Uto32(LDle(expr32)) */
1526 //zz if (e->Iex.Unop.op == Iop_8Uto32) {
1527 //zz DECLARE_PATTERN(p_LDle8_then_8Uto32);
1528 //zz DEFINE_PATTERN(p_LDle8_then_8Uto32,
1529 //zz unop(Iop_8Uto32,
1530 //zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1531 //zz if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1532 //zz HReg dst = newVRegI(env);
1533 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1534 //zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1535 //zz return dst;
1536 //zz }
1537 //zz }
1538 //zz
1539 //zz /* 8Sto32(LDle(expr32)) */
1540 //zz if (e->Iex.Unop.op == Iop_8Sto32) {
1541 //zz DECLARE_PATTERN(p_LDle8_then_8Sto32);
1542 //zz DEFINE_PATTERN(p_LDle8_then_8Sto32,
1543 //zz unop(Iop_8Sto32,
1544 //zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1545 //zz if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1546 //zz HReg dst = newVRegI(env);
1547 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1548 //zz addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1549 //zz return dst;
1550 //zz }
1551 //zz }
1552 //zz
1553 //zz /* 16Uto32(LDle(expr32)) */
1554 //zz if (e->Iex.Unop.op == Iop_16Uto32) {
1555 //zz DECLARE_PATTERN(p_LDle16_then_16Uto32);
1556 //zz DEFINE_PATTERN(p_LDle16_then_16Uto32,
1557 //zz unop(Iop_16Uto32,
1558 //zz IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1559 //zz if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1560 //zz HReg dst = newVRegI(env);
1561 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1562 //zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1563 //zz return dst;
1564 //zz }
1565 //zz }
1566 //zz
1567 //zz /* 8Uto32(GET:I8) */
1568 //zz if (e->Iex.Unop.op == Iop_8Uto32) {
1569 //zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1570 //zz HReg dst;
1571 //zz X86AMode* amode;
1572 //zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1573 //zz dst = newVRegI(env);
1574 //zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1575 //zz hregX86_EBP());
1576 //zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1577 //zz return dst;
1578 //zz }
1579 //zz }
1580 //zz
1581 //zz /* 16to32(GET:I16) */
1582 //zz if (e->Iex.Unop.op == Iop_16Uto32) {
1583 //zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1584 //zz HReg dst;
1585 //zz X86AMode* amode;
1586 //zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1587 //zz dst = newVRegI(env);
1588 //zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1589 //zz hregX86_EBP());
1590 //zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1591 //zz return dst;
1592 //zz }
1593 //zz }
1594
1595 switch (e->Iex.Unop.op) {
1596 case Iop_8Uto32: {
1597 HReg dst = newVRegI(env);
1598 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1599 addInstr(env, ARMInstr_Alu(ARMalu_AND,
1600 dst, src, ARMRI84_I84(0xFF,0)));
1601 return dst;
1602 }
1603 //zz case Iop_8Uto16:
1604 //zz case Iop_8Uto32:
1605 //zz case Iop_16Uto32: {
1606 //zz HReg dst = newVRegI(env);
1607 //zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1608 //zz UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1609 //zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1610 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1611 //zz X86RMI_Imm(mask), dst));
1612 //zz return dst;
1613 //zz }
1614 //zz case Iop_8Sto16:
1615 //zz case Iop_8Sto32:
1616 case Iop_16Uto32: {
1617 HReg dst = newVRegI(env);
1618 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1619 ARMRI5* amt = ARMRI5_I5(16);
1620 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1621 addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
1622 return dst;
1623 }
1624 case Iop_8Sto32:
1625 case Iop_16Sto32: {
1626 HReg dst = newVRegI(env);
1627 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1628 ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
1629 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1630 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1631 return dst;
1632 }
1633 //zz case Iop_Not8:
1634 //zz case Iop_Not16:
1635 case Iop_Not32: {
1636 HReg dst = newVRegI(env);
1637 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1638 addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
1639 return dst;
1640 }
1641 case Iop_64HIto32: {
1642 HReg rHi, rLo;
1643 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1644 return rHi; /* and abandon rLo .. poor wee thing :-) */
1645 }
1646 case Iop_64to32: {
1647 HReg rHi, rLo;
1648 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1649 return rLo; /* similar stupid comment to the above ... */
1650 }
1651 case Iop_64to8: {
1652 HReg rHi, rLo;
1653 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1654 HReg tHi = newVRegI(env);
1655 HReg tLo = newVRegI(env);
1656 HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
1657 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1658 rHi = tHi;
1659 rLo = tLo;
1660 } else {
1661 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1662 }
1663 return rLo;
1664 }
1665
1666 case Iop_1Uto32:
1667 /* 1Uto32(tmp). Since I1 values generated into registers
1668 are guaranteed to have value either only zero or one,
1669 we can simply return the value of the register in this
1670 case. */
1671 if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
1672 HReg dst = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
1673 return dst;
1674 }
1675 /* else fall through */
1676 case Iop_1Uto8: {
1677 HReg dst = newVRegI(env);
1678 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1679 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1680 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1681 return dst;
1682 }
1683
1684 case Iop_1Sto32: {
1685 HReg dst = newVRegI(env);
1686 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1687 ARMRI5* amt = ARMRI5_I5(31);
1688 /* This is really rough. We could do much better here;
1689 perhaps mvn{cond} dst, #0 as the second insn?
1690 (same applies to 1Sto64) */
1691 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1692 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1693 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1694 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1695 return dst;
1696 }
1697
1698
1699 //zz case Iop_1Sto8:
1700 //zz case Iop_1Sto16:
1701 //zz case Iop_1Sto32: {
1702 //zz /* could do better than this, but for now ... */
1703 //zz HReg dst = newVRegI(env);
1704 //zz X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1705 //zz addInstr(env, X86Instr_Set32(cond,dst));
1706 //zz addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1707 //zz addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1708 //zz return dst;
1709 //zz }
1710 //zz case Iop_Ctz32: {
1711 //zz /* Count trailing zeroes, implemented by x86 'bsfl' */
1712 //zz HReg dst = newVRegI(env);
1713 //zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1714 //zz addInstr(env, X86Instr_Bsfr32(True,src,dst));
1715 //zz return dst;
1716 //zz }
1717 case Iop_Clz32: {
1718 /* Count leading zeroes; easy on ARM. */
1719 HReg dst = newVRegI(env);
1720 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1721 addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
1722 return dst;
1723 }
1724
1725 case Iop_CmpwNEZ32: {
1726 HReg dst = newVRegI(env);
1727 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1728 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1729 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1730 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
1731 return dst;
1732 }
1733
1734 case Iop_Left32: {
1735 HReg dst = newVRegI(env);
1736 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1737 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1738 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1739 return dst;
1740 }
1741
1742 //zz case Iop_V128to32: {
1743 //zz HReg dst = newVRegI(env);
1744 //zz HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1745 //zz X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1746 //zz sub_from_esp(env, 16);
1747 //zz addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1748 //zz addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1749 //zz add_to_esp(env, 16);
1750 //zz return dst;
1751 //zz }
1752 //zz
1753 case Iop_ReinterpF32asI32: {
1754 HReg dst = newVRegI(env);
1755 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1756 addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
1757 return dst;
1758 }
1759
1760 //zz
1761 //zz case Iop_16to8:
1762 case Iop_32to8:
1763 case Iop_32to16:
1764 /* These are no-ops. */
1765 return iselIntExpr_R(env, e->Iex.Unop.arg);
1766
1767 default:
1768 break;
1769 }
1770
1771 /* All Unop cases involving host-side helper calls. */
1772 void* fn = NULL;
1773 switch (e->Iex.Unop.op) {
1774 case Iop_CmpNEZ16x2:
1775 fn = &h_generic_calc_CmpNEZ16x2; break;
1776 case Iop_CmpNEZ8x4:
1777 fn = &h_generic_calc_CmpNEZ8x4; break;
1778 default:
1779 break;
1780 }
1781
1782 if (fn) {
1783 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1784 HReg res = newVRegI(env);
1785 addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
1786 addInstr(env, ARMInstr_Call( ARMcc_AL, (Addr)fn,
1787 1, mk_RetLoc_simple(RLPri_Int) ));
1788 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1789 return res;
1790 }
1791
1792 break;
1793 }
1794
1795 /* --------- GET --------- */
1796 case Iex_Get: {
1797 if (ty == Ity_I32
1798 && 0 == (e->Iex.Get.offset & 3)
1799 && e->Iex.Get.offset < 4096-4) {
1800 HReg dst = newVRegI(env);
1801 addInstr(env, ARMInstr_LdSt32(
1802 ARMcc_AL, True/*isLoad*/,
1803 dst,
1804 ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
1805 return dst;
1806 }
1807 //zz if (ty == Ity_I8 || ty == Ity_I16) {
1808 //zz HReg dst = newVRegI(env);
1809 //zz addInstr(env, X86Instr_LoadEX(
1810 //zz toUChar(ty==Ity_I8 ? 1 : 2),
1811 //zz False,
1812 //zz X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1813 //zz dst));
1814 //zz return dst;
1815 //zz }
1816 break;
1817 }
1818
1819 //zz case Iex_GetI: {
1820 //zz X86AMode* am
1821 //zz = genGuestArrayOffset(
1822 //zz env, e->Iex.GetI.descr,
1823 //zz e->Iex.GetI.ix, e->Iex.GetI.bias );
1824 //zz HReg dst = newVRegI(env);
1825 //zz if (ty == Ity_I8) {
1826 //zz addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1827 //zz return dst;
1828 //zz }
1829 //zz if (ty == Ity_I32) {
1830 //zz addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1831 //zz return dst;
1832 //zz }
1833 //zz break;
1834 //zz }
1835
1836 /* --------- CCALL --------- */
1837 case Iex_CCall: {
1838 HReg dst = newVRegI(env);
1839 vassert(ty == e->Iex.CCall.retty);
1840
1841 /* be very restrictive for now. Only 32/64-bit ints allowed for
1842 args, and 32 bits for return type. Don't forget to change
1843 the RetLoc if more types are allowed in future. */
1844 if (e->Iex.CCall.retty != Ity_I32)
1845 goto irreducible;
1846
1847 /* Marshal args, do the call, clear stack. */
1848 UInt addToSp = 0;
1849 RetLoc rloc = mk_RetLoc_INVALID();
1850 Bool ok = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1851 e->Iex.CCall.cee, e->Iex.CCall.retty,
1852 e->Iex.CCall.args );
1853 /* */
1854 if (ok) {
1855 vassert(is_sane_RetLoc(rloc));
1856 vassert(rloc.pri == RLPri_Int);
1857 vassert(addToSp == 0);
1858 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1859 return dst;
1860 }
1861 /* else fall through; will hit the irreducible: label */
1862 }
1863
1864 /* --------- LITERAL --------- */
1865 /* 32 literals */
1866 case Iex_Const: {
1867 UInt u = 0;
1868 HReg dst = newVRegI(env);
1869 switch (e->Iex.Const.con->tag) {
1870 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1871 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1872 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1873 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
1874 }
1875 addInstr(env, ARMInstr_Imm32(dst, u));
1876 return dst;
1877 }
1878
1879 /* --------- MULTIPLEX --------- */
1880 case Iex_ITE: { // VFD
1881 /* ITE(ccexpr, iftrue, iffalse) */
1882 if (ty == Ity_I32) {
1883 ARMCondCode cc;
1884 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
1885 ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.ITE.iffalse);
1886 HReg dst = newVRegI(env);
1887 addInstr(env, mk_iMOVds_RR(dst, r1));
1888 cc = iselCondCode(env, e->Iex.ITE.cond);
1889 addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
1890 return dst;
1891 }
1892 break;
1893 }
1894
1895 default:
1896 break;
1897 } /* switch (e->tag) */
1898
1899 /* We get here if no pattern matched. */
1900 irreducible:
1901 ppIRExpr(e);
1902 vpanic("iselIntExpr_R: cannot reduce tree");
1903 }
1904
1905
1906 /* -------------------- 64-bit -------------------- */
1907
1908 /* Compute a 64-bit value into a register pair, which is returned as
1909 the first two parameters. As with iselIntExpr_R, these may be
1910 either real or virtual regs; in any case they must not be changed
1911 by subsequent code emitted by the caller. */
1912
iselInt64Expr(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)1913 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1914 {
1915 iselInt64Expr_wrk(rHi, rLo, env, e);
1916 # if 0
1917 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1918 # endif
1919 vassert(hregClass(*rHi) == HRcInt32);
1920 vassert(hregIsVirtual(*rHi));
1921 vassert(hregClass(*rLo) == HRcInt32);
1922 vassert(hregIsVirtual(*rLo));
1923 }
1924
1925 /* DO NOT CALL THIS DIRECTLY ! */
iselInt64Expr_wrk(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)1926 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1927 {
1928 vassert(e);
1929 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
1930
1931 /* 64-bit literal */
1932 if (e->tag == Iex_Const) {
1933 ULong w64 = e->Iex.Const.con->Ico.U64;
1934 UInt wHi = toUInt(w64 >> 32);
1935 UInt wLo = toUInt(w64);
1936 HReg tHi = newVRegI(env);
1937 HReg tLo = newVRegI(env);
1938 vassert(e->Iex.Const.con->tag == Ico_U64);
1939 addInstr(env, ARMInstr_Imm32(tHi, wHi));
1940 addInstr(env, ARMInstr_Imm32(tLo, wLo));
1941 *rHi = tHi;
1942 *rLo = tLo;
1943 return;
1944 }
1945
1946 /* read 64-bit IRTemp */
1947 if (e->tag == Iex_RdTmp) {
1948 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1949 HReg tHi = newVRegI(env);
1950 HReg tLo = newVRegI(env);
1951 HReg tmp = iselNeon64Expr(env, e);
1952 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1953 *rHi = tHi;
1954 *rLo = tLo;
1955 } else {
1956 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
1957 }
1958 return;
1959 }
1960
1961 /* 64-bit load */
1962 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
1963 HReg tLo, tHi, rA;
1964 vassert(e->Iex.Load.ty == Ity_I64);
1965 rA = iselIntExpr_R(env, e->Iex.Load.addr);
1966 tHi = newVRegI(env);
1967 tLo = newVRegI(env);
1968 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
1969 tHi, ARMAMode1_RI(rA, 4)));
1970 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
1971 tLo, ARMAMode1_RI(rA, 0)));
1972 *rHi = tHi;
1973 *rLo = tLo;
1974 return;
1975 }
1976
1977 /* 64-bit GET */
1978 if (e->tag == Iex_Get) {
1979 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
1980 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
1981 HReg tHi = newVRegI(env);
1982 HReg tLo = newVRegI(env);
1983 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tHi, am4));
1984 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tLo, am0));
1985 *rHi = tHi;
1986 *rLo = tLo;
1987 return;
1988 }
1989
1990 /* --------- BINARY ops --------- */
1991 if (e->tag == Iex_Binop) {
1992 switch (e->Iex.Binop.op) {
1993
1994 /* 32 x 32 -> 64 multiply */
1995 case Iop_MullS32:
1996 case Iop_MullU32: {
1997 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1998 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1999 HReg tHi = newVRegI(env);
2000 HReg tLo = newVRegI(env);
2001 ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32
2002 ? ARMmul_SX : ARMmul_ZX;
2003 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
2004 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
2005 addInstr(env, ARMInstr_Mul(mop));
2006 addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
2007 addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
2008 *rHi = tHi;
2009 *rLo = tLo;
2010 return;
2011 }
2012
2013 case Iop_Or64: {
2014 HReg xLo, xHi, yLo, yHi;
2015 HReg tHi = newVRegI(env);
2016 HReg tLo = newVRegI(env);
2017 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2018 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2019 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
2020 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
2021 *rHi = tHi;
2022 *rLo = tLo;
2023 return;
2024 }
2025
2026 case Iop_Add64: {
2027 HReg xLo, xHi, yLo, yHi;
2028 HReg tHi = newVRegI(env);
2029 HReg tLo = newVRegI(env);
2030 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2031 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2032 addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
2033 addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi)));
2034 *rHi = tHi;
2035 *rLo = tLo;
2036 return;
2037 }
2038
2039 /* 32HLto64(e1,e2) */
2040 case Iop_32HLto64: {
2041 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2042 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2043 return;
2044 }
2045
2046 default:
2047 break;
2048 }
2049 }
2050
2051 /* --------- UNARY ops --------- */
2052 if (e->tag == Iex_Unop) {
2053 switch (e->Iex.Unop.op) {
2054
2055 /* ReinterpF64asI64 */
2056 case Iop_ReinterpF64asI64: {
2057 HReg dstHi = newVRegI(env);
2058 HReg dstLo = newVRegI(env);
2059 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2060 addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
2061 *rHi = dstHi;
2062 *rLo = dstLo;
2063 return;
2064 }
2065
2066 /* Left64(e) */
2067 case Iop_Left64: {
2068 HReg yLo, yHi;
2069 HReg tHi = newVRegI(env);
2070 HReg tLo = newVRegI(env);
2071 HReg zero = newVRegI(env);
2072 /* yHi:yLo = arg */
2073 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2074 /* zero = 0 */
2075 addInstr(env, ARMInstr_Imm32(zero, 0));
2076 /* tLo = 0 - yLo, and set carry */
2077 addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
2078 tLo, zero, ARMRI84_R(yLo)));
2079 /* tHi = 0 - yHi - carry */
2080 addInstr(env, ARMInstr_Alu(ARMalu_SBC,
2081 tHi, zero, ARMRI84_R(yHi)));
2082 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
2083 back in, so as to give the final result
2084 tHi:tLo = arg | -arg. */
2085 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
2086 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
2087 *rHi = tHi;
2088 *rLo = tLo;
2089 return;
2090 }
2091
2092 /* CmpwNEZ64(e) */
2093 case Iop_CmpwNEZ64: {
2094 HReg srcLo, srcHi;
2095 HReg tmp1 = newVRegI(env);
2096 HReg tmp2 = newVRegI(env);
2097 /* srcHi:srcLo = arg */
2098 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2099 /* tmp1 = srcHi | srcLo */
2100 addInstr(env, ARMInstr_Alu(ARMalu_OR,
2101 tmp1, srcHi, ARMRI84_R(srcLo)));
2102 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2103 addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
2104 addInstr(env, ARMInstr_Alu(ARMalu_OR,
2105 tmp2, tmp2, ARMRI84_R(tmp1)));
2106 addInstr(env, ARMInstr_Shift(ARMsh_SAR,
2107 tmp2, tmp2, ARMRI5_I5(31)));
2108 *rHi = tmp2;
2109 *rLo = tmp2;
2110 return;
2111 }
2112
2113 case Iop_1Sto64: {
2114 HReg dst = newVRegI(env);
2115 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2116 ARMRI5* amt = ARMRI5_I5(31);
2117 /* This is really rough. We could do much better here;
2118 perhaps mvn{cond} dst, #0 as the second insn?
2119 (same applies to 1Sto32) */
2120 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2121 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2122 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2123 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2124 *rHi = dst;
2125 *rLo = dst;
2126 return;
2127 }
2128
2129 default:
2130 break;
2131 }
2132 } /* if (e->tag == Iex_Unop) */
2133
2134 /* --------- MULTIPLEX --------- */
2135 if (e->tag == Iex_ITE) { // VFD
2136 IRType tyC;
2137 HReg r1hi, r1lo, r0hi, r0lo, dstHi, dstLo;
2138 ARMCondCode cc;
2139 tyC = typeOfIRExpr(env->type_env,e->Iex.ITE.cond);
2140 vassert(tyC == Ity_I1);
2141 iselInt64Expr(&r1hi, &r1lo, env, e->Iex.ITE.iftrue);
2142 iselInt64Expr(&r0hi, &r0lo, env, e->Iex.ITE.iffalse);
2143 dstHi = newVRegI(env);
2144 dstLo = newVRegI(env);
2145 addInstr(env, mk_iMOVds_RR(dstHi, r1hi));
2146 addInstr(env, mk_iMOVds_RR(dstLo, r1lo));
2147 cc = iselCondCode(env, e->Iex.ITE.cond);
2148 addInstr(env, ARMInstr_CMov(cc ^ 1, dstHi, ARMRI84_R(r0hi)));
2149 addInstr(env, ARMInstr_CMov(cc ^ 1, dstLo, ARMRI84_R(r0lo)));
2150 *rHi = dstHi;
2151 *rLo = dstLo;
2152 return;
2153 }
2154
2155 /* It is convenient sometimes to call iselInt64Expr even when we
2156 have NEON support (e.g. in do_helper_call we need 64-bit
2157 arguments as 2 x 32 regs). */
2158 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2159 HReg tHi = newVRegI(env);
2160 HReg tLo = newVRegI(env);
2161 HReg tmp = iselNeon64Expr(env, e);
2162 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2163 *rHi = tHi;
2164 *rLo = tLo;
2165 return ;
2166 }
2167
2168 ppIRExpr(e);
2169 vpanic("iselInt64Expr");
2170 }
2171
2172
2173 /*---------------------------------------------------------*/
2174 /*--- ISEL: Vector (NEON) expressions (64 or 128 bit) ---*/
2175 /*---------------------------------------------------------*/
2176
iselNeon64Expr(ISelEnv * env,IRExpr * e)2177 static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
2178 {
2179 HReg r;
2180 vassert(env->hwcaps & VEX_HWCAPS_ARM_NEON);
2181 r = iselNeon64Expr_wrk( env, e );
2182 vassert(hregClass(r) == HRcFlt64);
2183 vassert(hregIsVirtual(r));
2184 return r;
2185 }
2186
2187 /* DO NOT CALL THIS DIRECTLY */
iselNeon64Expr_wrk(ISelEnv * env,IRExpr * e)2188 static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
2189 {
2190 IRType ty = typeOfIRExpr(env->type_env, e);
2191 MatchInfo mi;
2192 vassert(e);
2193 vassert(ty == Ity_I64);
2194
2195 if (e->tag == Iex_RdTmp) {
2196 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2197 }
2198
2199 if (e->tag == Iex_Const) {
2200 HReg rLo, rHi;
2201 HReg res = newVRegD(env);
2202 iselInt64Expr(&rHi, &rLo, env, e);
2203 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2204 return res;
2205 }
2206
2207 /* 64-bit load */
2208 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2209 HReg res = newVRegD(env);
2210 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2211 vassert(ty == Ity_I64);
2212 addInstr(env, ARMInstr_NLdStD(True, res, am));
2213 return res;
2214 }
2215
2216 /* 64-bit GET */
2217 if (e->tag == Iex_Get) {
2218 HReg addr = newVRegI(env);
2219 HReg res = newVRegD(env);
2220 vassert(ty == Ity_I64);
2221 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2222 addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2223 return res;
2224 }
2225
2226 /* --------- BINARY ops --------- */
2227 if (e->tag == Iex_Binop) {
2228 switch (e->Iex.Binop.op) {
2229
2230 /* 32 x 32 -> 64 multiply */
2231 case Iop_MullS32:
2232 case Iop_MullU32: {
2233 HReg rLo, rHi;
2234 HReg res = newVRegD(env);
2235 iselInt64Expr(&rHi, &rLo, env, e);
2236 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2237 return res;
2238 }
2239
2240 case Iop_And64: {
2241 HReg res = newVRegD(env);
2242 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2243 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2244 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2245 res, argL, argR, 4, False));
2246 return res;
2247 }
2248 case Iop_Or64: {
2249 HReg res = newVRegD(env);
2250 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2251 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2252 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2253 res, argL, argR, 4, False));
2254 return res;
2255 }
2256 case Iop_Xor64: {
2257 HReg res = newVRegD(env);
2258 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2259 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2260 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2261 res, argL, argR, 4, False));
2262 return res;
2263 }
2264
2265 /* 32HLto64(e1,e2) */
2266 case Iop_32HLto64: {
2267 HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2268 HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2269 HReg res = newVRegD(env);
2270 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2271 return res;
2272 }
2273
2274 case Iop_Add8x8:
2275 case Iop_Add16x4:
2276 case Iop_Add32x2:
2277 case Iop_Add64: {
2278 HReg res = newVRegD(env);
2279 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2280 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2281 UInt size;
2282 switch (e->Iex.Binop.op) {
2283 case Iop_Add8x8: size = 0; break;
2284 case Iop_Add16x4: size = 1; break;
2285 case Iop_Add32x2: size = 2; break;
2286 case Iop_Add64: size = 3; break;
2287 default: vassert(0);
2288 }
2289 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2290 res, argL, argR, size, False));
2291 return res;
2292 }
2293 case Iop_Add32Fx2: {
2294 HReg res = newVRegD(env);
2295 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2296 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2297 UInt size = 0;
2298 addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2299 res, argL, argR, size, False));
2300 return res;
2301 }
2302 case Iop_RecipStep32Fx2: {
2303 HReg res = newVRegD(env);
2304 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2305 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2306 UInt size = 0;
2307 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2308 res, argL, argR, size, False));
2309 return res;
2310 }
2311 case Iop_RSqrtStep32Fx2: {
2312 HReg res = newVRegD(env);
2313 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2314 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2315 UInt size = 0;
2316 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2317 res, argL, argR, size, False));
2318 return res;
2319 }
2320
2321 // These 6 verified 18 Apr 2013
2322 case Iop_InterleaveHI32x2:
2323 case Iop_InterleaveLO32x2:
2324 case Iop_InterleaveOddLanes8x8:
2325 case Iop_InterleaveEvenLanes8x8:
2326 case Iop_InterleaveOddLanes16x4:
2327 case Iop_InterleaveEvenLanes16x4: {
2328 HReg rD = newVRegD(env);
2329 HReg rM = newVRegD(env);
2330 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2331 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2332 UInt size;
2333 Bool resRd; // is the result in rD or rM ?
2334 switch (e->Iex.Binop.op) {
2335 case Iop_InterleaveOddLanes8x8: resRd = False; size = 0; break;
2336 case Iop_InterleaveEvenLanes8x8: resRd = True; size = 0; break;
2337 case Iop_InterleaveOddLanes16x4: resRd = False; size = 1; break;
2338 case Iop_InterleaveEvenLanes16x4: resRd = True; size = 1; break;
2339 case Iop_InterleaveHI32x2: resRd = False; size = 2; break;
2340 case Iop_InterleaveLO32x2: resRd = True; size = 2; break;
2341 default: vassert(0);
2342 }
2343 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2344 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2345 addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, False));
2346 return resRd ? rD : rM;
2347 }
2348
2349 // These 4 verified 18 Apr 2013
2350 case Iop_InterleaveHI8x8:
2351 case Iop_InterleaveLO8x8:
2352 case Iop_InterleaveHI16x4:
2353 case Iop_InterleaveLO16x4: {
2354 HReg rD = newVRegD(env);
2355 HReg rM = newVRegD(env);
2356 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2357 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2358 UInt size;
2359 Bool resRd; // is the result in rD or rM ?
2360 switch (e->Iex.Binop.op) {
2361 case Iop_InterleaveHI8x8: resRd = False; size = 0; break;
2362 case Iop_InterleaveLO8x8: resRd = True; size = 0; break;
2363 case Iop_InterleaveHI16x4: resRd = False; size = 1; break;
2364 case Iop_InterleaveLO16x4: resRd = True; size = 1; break;
2365 default: vassert(0);
2366 }
2367 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2368 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2369 addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, False));
2370 return resRd ? rD : rM;
2371 }
2372
2373 // These 4 verified 18 Apr 2013
2374 case Iop_CatOddLanes8x8:
2375 case Iop_CatEvenLanes8x8:
2376 case Iop_CatOddLanes16x4:
2377 case Iop_CatEvenLanes16x4: {
2378 HReg rD = newVRegD(env);
2379 HReg rM = newVRegD(env);
2380 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2381 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2382 UInt size;
2383 Bool resRd; // is the result in rD or rM ?
2384 switch (e->Iex.Binop.op) {
2385 case Iop_CatOddLanes8x8: resRd = False; size = 0; break;
2386 case Iop_CatEvenLanes8x8: resRd = True; size = 0; break;
2387 case Iop_CatOddLanes16x4: resRd = False; size = 1; break;
2388 case Iop_CatEvenLanes16x4: resRd = True; size = 1; break;
2389 default: vassert(0);
2390 }
2391 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2392 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2393 addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, False));
2394 return resRd ? rD : rM;
2395 }
2396
2397 case Iop_QAdd8Ux8:
2398 case Iop_QAdd16Ux4:
2399 case Iop_QAdd32Ux2:
2400 case Iop_QAdd64Ux1: {
2401 HReg res = newVRegD(env);
2402 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2403 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2404 UInt size;
2405 switch (e->Iex.Binop.op) {
2406 case Iop_QAdd8Ux8: size = 0; break;
2407 case Iop_QAdd16Ux4: size = 1; break;
2408 case Iop_QAdd32Ux2: size = 2; break;
2409 case Iop_QAdd64Ux1: size = 3; break;
2410 default: vassert(0);
2411 }
2412 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2413 res, argL, argR, size, False));
2414 return res;
2415 }
2416 case Iop_QAdd8Sx8:
2417 case Iop_QAdd16Sx4:
2418 case Iop_QAdd32Sx2:
2419 case Iop_QAdd64Sx1: {
2420 HReg res = newVRegD(env);
2421 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2422 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2423 UInt size;
2424 switch (e->Iex.Binop.op) {
2425 case Iop_QAdd8Sx8: size = 0; break;
2426 case Iop_QAdd16Sx4: size = 1; break;
2427 case Iop_QAdd32Sx2: size = 2; break;
2428 case Iop_QAdd64Sx1: size = 3; break;
2429 default: vassert(0);
2430 }
2431 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2432 res, argL, argR, size, False));
2433 return res;
2434 }
2435 case Iop_Sub8x8:
2436 case Iop_Sub16x4:
2437 case Iop_Sub32x2:
2438 case Iop_Sub64: {
2439 HReg res = newVRegD(env);
2440 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2441 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2442 UInt size;
2443 switch (e->Iex.Binop.op) {
2444 case Iop_Sub8x8: size = 0; break;
2445 case Iop_Sub16x4: size = 1; break;
2446 case Iop_Sub32x2: size = 2; break;
2447 case Iop_Sub64: size = 3; break;
2448 default: vassert(0);
2449 }
2450 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2451 res, argL, argR, size, False));
2452 return res;
2453 }
2454 case Iop_Sub32Fx2: {
2455 HReg res = newVRegD(env);
2456 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2457 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2458 UInt size = 0;
2459 addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2460 res, argL, argR, size, False));
2461 return res;
2462 }
2463 case Iop_QSub8Ux8:
2464 case Iop_QSub16Ux4:
2465 case Iop_QSub32Ux2:
2466 case Iop_QSub64Ux1: {
2467 HReg res = newVRegD(env);
2468 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2469 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2470 UInt size;
2471 switch (e->Iex.Binop.op) {
2472 case Iop_QSub8Ux8: size = 0; break;
2473 case Iop_QSub16Ux4: size = 1; break;
2474 case Iop_QSub32Ux2: size = 2; break;
2475 case Iop_QSub64Ux1: size = 3; break;
2476 default: vassert(0);
2477 }
2478 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
2479 res, argL, argR, size, False));
2480 return res;
2481 }
2482 case Iop_QSub8Sx8:
2483 case Iop_QSub16Sx4:
2484 case Iop_QSub32Sx2:
2485 case Iop_QSub64Sx1: {
2486 HReg res = newVRegD(env);
2487 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2488 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2489 UInt size;
2490 switch (e->Iex.Binop.op) {
2491 case Iop_QSub8Sx8: size = 0; break;
2492 case Iop_QSub16Sx4: size = 1; break;
2493 case Iop_QSub32Sx2: size = 2; break;
2494 case Iop_QSub64Sx1: size = 3; break;
2495 default: vassert(0);
2496 }
2497 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
2498 res, argL, argR, size, False));
2499 return res;
2500 }
2501 case Iop_Max8Ux8:
2502 case Iop_Max16Ux4:
2503 case Iop_Max32Ux2: {
2504 HReg res = newVRegD(env);
2505 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2506 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2507 UInt size;
2508 switch (e->Iex.Binop.op) {
2509 case Iop_Max8Ux8: size = 0; break;
2510 case Iop_Max16Ux4: size = 1; break;
2511 case Iop_Max32Ux2: size = 2; break;
2512 default: vassert(0);
2513 }
2514 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
2515 res, argL, argR, size, False));
2516 return res;
2517 }
2518 case Iop_Max8Sx8:
2519 case Iop_Max16Sx4:
2520 case Iop_Max32Sx2: {
2521 HReg res = newVRegD(env);
2522 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2523 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2524 UInt size;
2525 switch (e->Iex.Binop.op) {
2526 case Iop_Max8Sx8: size = 0; break;
2527 case Iop_Max16Sx4: size = 1; break;
2528 case Iop_Max32Sx2: size = 2; break;
2529 default: vassert(0);
2530 }
2531 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
2532 res, argL, argR, size, False));
2533 return res;
2534 }
2535 case Iop_Min8Ux8:
2536 case Iop_Min16Ux4:
2537 case Iop_Min32Ux2: {
2538 HReg res = newVRegD(env);
2539 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2540 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2541 UInt size;
2542 switch (e->Iex.Binop.op) {
2543 case Iop_Min8Ux8: size = 0; break;
2544 case Iop_Min16Ux4: size = 1; break;
2545 case Iop_Min32Ux2: size = 2; break;
2546 default: vassert(0);
2547 }
2548 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
2549 res, argL, argR, size, False));
2550 return res;
2551 }
2552 case Iop_Min8Sx8:
2553 case Iop_Min16Sx4:
2554 case Iop_Min32Sx2: {
2555 HReg res = newVRegD(env);
2556 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2557 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2558 UInt size;
2559 switch (e->Iex.Binop.op) {
2560 case Iop_Min8Sx8: size = 0; break;
2561 case Iop_Min16Sx4: size = 1; break;
2562 case Iop_Min32Sx2: size = 2; break;
2563 default: vassert(0);
2564 }
2565 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
2566 res, argL, argR, size, False));
2567 return res;
2568 }
2569 case Iop_Sar8x8:
2570 case Iop_Sar16x4:
2571 case Iop_Sar32x2: {
2572 HReg res = newVRegD(env);
2573 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2574 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2575 HReg argR2 = newVRegD(env);
2576 HReg zero = newVRegD(env);
2577 UInt size;
2578 switch (e->Iex.Binop.op) {
2579 case Iop_Sar8x8: size = 0; break;
2580 case Iop_Sar16x4: size = 1; break;
2581 case Iop_Sar32x2: size = 2; break;
2582 case Iop_Sar64: size = 3; break;
2583 default: vassert(0);
2584 }
2585 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2586 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2587 argR2, zero, argR, size, False));
2588 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2589 res, argL, argR2, size, False));
2590 return res;
2591 }
2592 case Iop_Sal8x8:
2593 case Iop_Sal16x4:
2594 case Iop_Sal32x2:
2595 case Iop_Sal64x1: {
2596 HReg res = newVRegD(env);
2597 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2598 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2599 UInt size;
2600 switch (e->Iex.Binop.op) {
2601 case Iop_Sal8x8: size = 0; break;
2602 case Iop_Sal16x4: size = 1; break;
2603 case Iop_Sal32x2: size = 2; break;
2604 case Iop_Sal64x1: size = 3; break;
2605 default: vassert(0);
2606 }
2607 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2608 res, argL, argR, size, False));
2609 return res;
2610 }
2611 case Iop_Shr8x8:
2612 case Iop_Shr16x4:
2613 case Iop_Shr32x2: {
2614 HReg res = newVRegD(env);
2615 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2616 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2617 HReg argR2 = newVRegD(env);
2618 HReg zero = newVRegD(env);
2619 UInt size;
2620 switch (e->Iex.Binop.op) {
2621 case Iop_Shr8x8: size = 0; break;
2622 case Iop_Shr16x4: size = 1; break;
2623 case Iop_Shr32x2: size = 2; break;
2624 default: vassert(0);
2625 }
2626 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2627 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2628 argR2, zero, argR, size, False));
2629 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2630 res, argL, argR2, size, False));
2631 return res;
2632 }
2633 case Iop_Shl8x8:
2634 case Iop_Shl16x4:
2635 case Iop_Shl32x2: {
2636 HReg res = newVRegD(env);
2637 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2638 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2639 UInt size;
2640 switch (e->Iex.Binop.op) {
2641 case Iop_Shl8x8: size = 0; break;
2642 case Iop_Shl16x4: size = 1; break;
2643 case Iop_Shl32x2: size = 2; break;
2644 default: vassert(0);
2645 }
2646 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2647 res, argL, argR, size, False));
2648 return res;
2649 }
2650 case Iop_QShl8x8:
2651 case Iop_QShl16x4:
2652 case Iop_QShl32x2:
2653 case Iop_QShl64x1: {
2654 HReg res = newVRegD(env);
2655 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2656 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2657 UInt size;
2658 switch (e->Iex.Binop.op) {
2659 case Iop_QShl8x8: size = 0; break;
2660 case Iop_QShl16x4: size = 1; break;
2661 case Iop_QShl32x2: size = 2; break;
2662 case Iop_QShl64x1: size = 3; break;
2663 default: vassert(0);
2664 }
2665 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
2666 res, argL, argR, size, False));
2667 return res;
2668 }
2669 case Iop_QSal8x8:
2670 case Iop_QSal16x4:
2671 case Iop_QSal32x2:
2672 case Iop_QSal64x1: {
2673 HReg res = newVRegD(env);
2674 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2675 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2676 UInt size;
2677 switch (e->Iex.Binop.op) {
2678 case Iop_QSal8x8: size = 0; break;
2679 case Iop_QSal16x4: size = 1; break;
2680 case Iop_QSal32x2: size = 2; break;
2681 case Iop_QSal64x1: size = 3; break;
2682 default: vassert(0);
2683 }
2684 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
2685 res, argL, argR, size, False));
2686 return res;
2687 }
2688 case Iop_QShlNsatUU8x8:
2689 case Iop_QShlNsatUU16x4:
2690 case Iop_QShlNsatUU32x2:
2691 case Iop_QShlNsatUU64x1: {
2692 HReg res = newVRegD(env);
2693 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2694 UInt size, imm;
2695 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2696 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2697 vpanic("ARM target supports Iop_QShlNsatUUAxB with constant "
2698 "second argument only\n");
2699 }
2700 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2701 switch (e->Iex.Binop.op) {
2702 case Iop_QShlNsatUU8x8: size = 8 | imm; break;
2703 case Iop_QShlNsatUU16x4: size = 16 | imm; break;
2704 case Iop_QShlNsatUU32x2: size = 32 | imm; break;
2705 case Iop_QShlNsatUU64x1: size = 64 | imm; break;
2706 default: vassert(0);
2707 }
2708 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
2709 res, argL, size, False));
2710 return res;
2711 }
2712 case Iop_QShlNsatSU8x8:
2713 case Iop_QShlNsatSU16x4:
2714 case Iop_QShlNsatSU32x2:
2715 case Iop_QShlNsatSU64x1: {
2716 HReg res = newVRegD(env);
2717 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2718 UInt size, imm;
2719 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2720 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2721 vpanic("ARM target supports Iop_QShlNsatSUAxB with constant "
2722 "second argument only\n");
2723 }
2724 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2725 switch (e->Iex.Binop.op) {
2726 case Iop_QShlNsatSU8x8: size = 8 | imm; break;
2727 case Iop_QShlNsatSU16x4: size = 16 | imm; break;
2728 case Iop_QShlNsatSU32x2: size = 32 | imm; break;
2729 case Iop_QShlNsatSU64x1: size = 64 | imm; break;
2730 default: vassert(0);
2731 }
2732 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
2733 res, argL, size, False));
2734 return res;
2735 }
2736 case Iop_QShlNsatSS8x8:
2737 case Iop_QShlNsatSS16x4:
2738 case Iop_QShlNsatSS32x2:
2739 case Iop_QShlNsatSS64x1: {
2740 HReg res = newVRegD(env);
2741 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2742 UInt size, imm;
2743 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2744 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2745 vpanic("ARM target supports Iop_QShlNsatSSAxB with constant "
2746 "second argument only\n");
2747 }
2748 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2749 switch (e->Iex.Binop.op) {
2750 case Iop_QShlNsatSS8x8: size = 8 | imm; break;
2751 case Iop_QShlNsatSS16x4: size = 16 | imm; break;
2752 case Iop_QShlNsatSS32x2: size = 32 | imm; break;
2753 case Iop_QShlNsatSS64x1: size = 64 | imm; break;
2754 default: vassert(0);
2755 }
2756 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
2757 res, argL, size, False));
2758 return res;
2759 }
2760 case Iop_ShrN8x8:
2761 case Iop_ShrN16x4:
2762 case Iop_ShrN32x2:
2763 case Iop_Shr64: {
2764 HReg res = newVRegD(env);
2765 HReg tmp = newVRegD(env);
2766 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2767 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2768 HReg argR2 = newVRegI(env);
2769 UInt size;
2770 switch (e->Iex.Binop.op) {
2771 case Iop_ShrN8x8: size = 0; break;
2772 case Iop_ShrN16x4: size = 1; break;
2773 case Iop_ShrN32x2: size = 2; break;
2774 case Iop_Shr64: size = 3; break;
2775 default: vassert(0);
2776 }
2777 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2778 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2779 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2780 res, argL, tmp, size, False));
2781 return res;
2782 }
2783 case Iop_ShlN8x8:
2784 case Iop_ShlN16x4:
2785 case Iop_ShlN32x2:
2786 case Iop_Shl64: {
2787 HReg res = newVRegD(env);
2788 HReg tmp = newVRegD(env);
2789 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2790 /* special-case Shl64(x, imm8) since the Neon front
2791 end produces a lot of those for V{LD,ST}{1,2,3,4}. */
2792 if (e->Iex.Binop.op == Iop_Shl64
2793 && e->Iex.Binop.arg2->tag == Iex_Const) {
2794 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
2795 Int nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2796 if (nshift >= 1 && nshift <= 63) {
2797 addInstr(env, ARMInstr_NShl64(res, argL, nshift));
2798 return res;
2799 }
2800 /* else fall through to general case */
2801 }
2802 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2803 UInt size;
2804 switch (e->Iex.Binop.op) {
2805 case Iop_ShlN8x8: size = 0; break;
2806 case Iop_ShlN16x4: size = 1; break;
2807 case Iop_ShlN32x2: size = 2; break;
2808 case Iop_Shl64: size = 3; break;
2809 default: vassert(0);
2810 }
2811 addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
2812 tmp, argR, 0, False));
2813 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2814 res, argL, tmp, size, False));
2815 return res;
2816 }
2817 case Iop_SarN8x8:
2818 case Iop_SarN16x4:
2819 case Iop_SarN32x2:
2820 case Iop_Sar64: {
2821 HReg res = newVRegD(env);
2822 HReg tmp = newVRegD(env);
2823 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2824 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2825 HReg argR2 = newVRegI(env);
2826 UInt size;
2827 switch (e->Iex.Binop.op) {
2828 case Iop_SarN8x8: size = 0; break;
2829 case Iop_SarN16x4: size = 1; break;
2830 case Iop_SarN32x2: size = 2; break;
2831 case Iop_Sar64: size = 3; break;
2832 default: vassert(0);
2833 }
2834 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2835 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2836 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2837 res, argL, tmp, size, False));
2838 return res;
2839 }
2840 case Iop_CmpGT8Ux8:
2841 case Iop_CmpGT16Ux4:
2842 case Iop_CmpGT32Ux2: {
2843 HReg res = newVRegD(env);
2844 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2845 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2846 UInt size;
2847 switch (e->Iex.Binop.op) {
2848 case Iop_CmpGT8Ux8: size = 0; break;
2849 case Iop_CmpGT16Ux4: size = 1; break;
2850 case Iop_CmpGT32Ux2: size = 2; break;
2851 default: vassert(0);
2852 }
2853 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
2854 res, argL, argR, size, False));
2855 return res;
2856 }
2857 case Iop_CmpGT8Sx8:
2858 case Iop_CmpGT16Sx4:
2859 case Iop_CmpGT32Sx2: {
2860 HReg res = newVRegD(env);
2861 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2862 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2863 UInt size;
2864 switch (e->Iex.Binop.op) {
2865 case Iop_CmpGT8Sx8: size = 0; break;
2866 case Iop_CmpGT16Sx4: size = 1; break;
2867 case Iop_CmpGT32Sx2: size = 2; break;
2868 default: vassert(0);
2869 }
2870 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
2871 res, argL, argR, size, False));
2872 return res;
2873 }
2874 case Iop_CmpEQ8x8:
2875 case Iop_CmpEQ16x4:
2876 case Iop_CmpEQ32x2: {
2877 HReg res = newVRegD(env);
2878 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2879 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2880 UInt size;
2881 switch (e->Iex.Binop.op) {
2882 case Iop_CmpEQ8x8: size = 0; break;
2883 case Iop_CmpEQ16x4: size = 1; break;
2884 case Iop_CmpEQ32x2: size = 2; break;
2885 default: vassert(0);
2886 }
2887 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
2888 res, argL, argR, size, False));
2889 return res;
2890 }
2891 case Iop_Mul8x8:
2892 case Iop_Mul16x4:
2893 case Iop_Mul32x2: {
2894 HReg res = newVRegD(env);
2895 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2896 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2897 UInt size = 0;
2898 switch(e->Iex.Binop.op) {
2899 case Iop_Mul8x8: size = 0; break;
2900 case Iop_Mul16x4: size = 1; break;
2901 case Iop_Mul32x2: size = 2; break;
2902 default: vassert(0);
2903 }
2904 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
2905 res, argL, argR, size, False));
2906 return res;
2907 }
2908 case Iop_Mul32Fx2: {
2909 HReg res = newVRegD(env);
2910 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2911 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2912 UInt size = 0;
2913 addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
2914 res, argL, argR, size, False));
2915 return res;
2916 }
2917 case Iop_QDMulHi16Sx4:
2918 case Iop_QDMulHi32Sx2: {
2919 HReg res = newVRegD(env);
2920 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2921 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2922 UInt size = 0;
2923 switch(e->Iex.Binop.op) {
2924 case Iop_QDMulHi16Sx4: size = 1; break;
2925 case Iop_QDMulHi32Sx2: size = 2; break;
2926 default: vassert(0);
2927 }
2928 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
2929 res, argL, argR, size, False));
2930 return res;
2931 }
2932
2933 case Iop_QRDMulHi16Sx4:
2934 case Iop_QRDMulHi32Sx2: {
2935 HReg res = newVRegD(env);
2936 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2937 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2938 UInt size = 0;
2939 switch(e->Iex.Binop.op) {
2940 case Iop_QRDMulHi16Sx4: size = 1; break;
2941 case Iop_QRDMulHi32Sx2: size = 2; break;
2942 default: vassert(0);
2943 }
2944 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
2945 res, argL, argR, size, False));
2946 return res;
2947 }
2948
2949 case Iop_PwAdd8x8:
2950 case Iop_PwAdd16x4:
2951 case Iop_PwAdd32x2: {
2952 HReg res = newVRegD(env);
2953 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2954 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2955 UInt size = 0;
2956 switch(e->Iex.Binop.op) {
2957 case Iop_PwAdd8x8: size = 0; break;
2958 case Iop_PwAdd16x4: size = 1; break;
2959 case Iop_PwAdd32x2: size = 2; break;
2960 default: vassert(0);
2961 }
2962 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
2963 res, argL, argR, size, False));
2964 return res;
2965 }
2966 case Iop_PwAdd32Fx2: {
2967 HReg res = newVRegD(env);
2968 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2969 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2970 UInt size = 0;
2971 addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
2972 res, argL, argR, size, False));
2973 return res;
2974 }
2975 case Iop_PwMin8Ux8:
2976 case Iop_PwMin16Ux4:
2977 case Iop_PwMin32Ux2: {
2978 HReg res = newVRegD(env);
2979 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2980 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2981 UInt size = 0;
2982 switch(e->Iex.Binop.op) {
2983 case Iop_PwMin8Ux8: size = 0; break;
2984 case Iop_PwMin16Ux4: size = 1; break;
2985 case Iop_PwMin32Ux2: size = 2; break;
2986 default: vassert(0);
2987 }
2988 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
2989 res, argL, argR, size, False));
2990 return res;
2991 }
2992 case Iop_PwMin8Sx8:
2993 case Iop_PwMin16Sx4:
2994 case Iop_PwMin32Sx2: {
2995 HReg res = newVRegD(env);
2996 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2997 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2998 UInt size = 0;
2999 switch(e->Iex.Binop.op) {
3000 case Iop_PwMin8Sx8: size = 0; break;
3001 case Iop_PwMin16Sx4: size = 1; break;
3002 case Iop_PwMin32Sx2: size = 2; break;
3003 default: vassert(0);
3004 }
3005 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
3006 res, argL, argR, size, False));
3007 return res;
3008 }
3009 case Iop_PwMax8Ux8:
3010 case Iop_PwMax16Ux4:
3011 case Iop_PwMax32Ux2: {
3012 HReg res = newVRegD(env);
3013 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3014 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3015 UInt size = 0;
3016 switch(e->Iex.Binop.op) {
3017 case Iop_PwMax8Ux8: size = 0; break;
3018 case Iop_PwMax16Ux4: size = 1; break;
3019 case Iop_PwMax32Ux2: size = 2; break;
3020 default: vassert(0);
3021 }
3022 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
3023 res, argL, argR, size, False));
3024 return res;
3025 }
3026 case Iop_PwMax8Sx8:
3027 case Iop_PwMax16Sx4:
3028 case Iop_PwMax32Sx2: {
3029 HReg res = newVRegD(env);
3030 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3031 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3032 UInt size = 0;
3033 switch(e->Iex.Binop.op) {
3034 case Iop_PwMax8Sx8: size = 0; break;
3035 case Iop_PwMax16Sx4: size = 1; break;
3036 case Iop_PwMax32Sx2: size = 2; break;
3037 default: vassert(0);
3038 }
3039 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
3040 res, argL, argR, size, False));
3041 return res;
3042 }
3043 case Iop_Perm8x8: {
3044 HReg res = newVRegD(env);
3045 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3046 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3047 addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
3048 res, argL, argR, 0, False));
3049 return res;
3050 }
3051 case Iop_PolynomialMul8x8: {
3052 HReg res = newVRegD(env);
3053 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3054 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3055 UInt size = 0;
3056 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
3057 res, argL, argR, size, False));
3058 return res;
3059 }
3060 case Iop_Max32Fx2: {
3061 HReg res = newVRegD(env);
3062 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3063 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3064 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
3065 res, argL, argR, 2, False));
3066 return res;
3067 }
3068 case Iop_Min32Fx2: {
3069 HReg res = newVRegD(env);
3070 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3071 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3072 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
3073 res, argL, argR, 2, False));
3074 return res;
3075 }
3076 case Iop_PwMax32Fx2: {
3077 HReg res = newVRegD(env);
3078 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3079 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3080 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
3081 res, argL, argR, 2, False));
3082 return res;
3083 }
3084 case Iop_PwMin32Fx2: {
3085 HReg res = newVRegD(env);
3086 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3087 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3088 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
3089 res, argL, argR, 2, False));
3090 return res;
3091 }
3092 case Iop_CmpGT32Fx2: {
3093 HReg res = newVRegD(env);
3094 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3095 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3096 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
3097 res, argL, argR, 2, False));
3098 return res;
3099 }
3100 case Iop_CmpGE32Fx2: {
3101 HReg res = newVRegD(env);
3102 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3103 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3104 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
3105 res, argL, argR, 2, False));
3106 return res;
3107 }
3108 case Iop_CmpEQ32Fx2: {
3109 HReg res = newVRegD(env);
3110 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3111 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3112 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3113 res, argL, argR, 2, False));
3114 return res;
3115 }
3116 case Iop_F32ToFixed32Ux2_RZ:
3117 case Iop_F32ToFixed32Sx2_RZ:
3118 case Iop_Fixed32UToF32x2_RN:
3119 case Iop_Fixed32SToF32x2_RN: {
3120 HReg res = newVRegD(env);
3121 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3122 ARMNeonUnOp op;
3123 UInt imm6;
3124 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3125 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3126 vpanic("ARM supports FP <-> Fixed conversion with constant "
3127 "second argument less than 33 only\n");
3128 }
3129 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3130 vassert(imm6 <= 32 && imm6 > 0);
3131 imm6 = 64 - imm6;
3132 switch(e->Iex.Binop.op) {
3133 case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3134 case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3135 case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3136 case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3137 default: vassert(0);
3138 }
3139 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3140 return res;
3141 }
3142 /*
3143 FIXME: is this here or not?
3144 case Iop_VDup8x8:
3145 case Iop_VDup16x4:
3146 case Iop_VDup32x2: {
3147 HReg res = newVRegD(env);
3148 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3149 UInt index;
3150 UInt imm4;
3151 UInt size = 0;
3152 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3153 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3154 vpanic("ARM supports Iop_VDup with constant "
3155 "second argument less than 16 only\n");
3156 }
3157 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3158 switch(e->Iex.Binop.op) {
3159 case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3160 case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3161 case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3162 default: vassert(0);
3163 }
3164 if (imm4 >= 16) {
3165 vpanic("ARM supports Iop_VDup with constant "
3166 "second argument less than 16 only\n");
3167 }
3168 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3169 res, argL, imm4, False));
3170 return res;
3171 }
3172 */
3173 default:
3174 break;
3175 }
3176 }
3177
3178 /* --------- UNARY ops --------- */
3179 if (e->tag == Iex_Unop) {
3180 switch (e->Iex.Unop.op) {
3181
3182 /* 32Uto64 */
3183 case Iop_32Uto64: {
3184 HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3185 HReg rHi = newVRegI(env);
3186 HReg res = newVRegD(env);
3187 addInstr(env, ARMInstr_Imm32(rHi, 0));
3188 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3189 return res;
3190 }
3191
3192 /* 32Sto64 */
3193 case Iop_32Sto64: {
3194 HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3195 HReg rHi = newVRegI(env);
3196 addInstr(env, mk_iMOVds_RR(rHi, rLo));
3197 addInstr(env, ARMInstr_Shift(ARMsh_SAR, rHi, rHi, ARMRI5_I5(31)));
3198 HReg res = newVRegD(env);
3199 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3200 return res;
3201 }
3202
3203 /* The next 3 are pass-throughs */
3204 /* ReinterpF64asI64 */
3205 case Iop_ReinterpF64asI64:
3206 /* Left64(e) */
3207 case Iop_Left64:
3208 /* CmpwNEZ64(e) */
3209 case Iop_1Sto64: {
3210 HReg rLo, rHi;
3211 HReg res = newVRegD(env);
3212 iselInt64Expr(&rHi, &rLo, env, e);
3213 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3214 return res;
3215 }
3216
3217 case Iop_Not64: {
3218 DECLARE_PATTERN(p_veqz_8x8);
3219 DECLARE_PATTERN(p_veqz_16x4);
3220 DECLARE_PATTERN(p_veqz_32x2);
3221 DECLARE_PATTERN(p_vcge_8sx8);
3222 DECLARE_PATTERN(p_vcge_16sx4);
3223 DECLARE_PATTERN(p_vcge_32sx2);
3224 DECLARE_PATTERN(p_vcge_8ux8);
3225 DECLARE_PATTERN(p_vcge_16ux4);
3226 DECLARE_PATTERN(p_vcge_32ux2);
3227 DEFINE_PATTERN(p_veqz_8x8,
3228 unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3229 DEFINE_PATTERN(p_veqz_16x4,
3230 unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3231 DEFINE_PATTERN(p_veqz_32x2,
3232 unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3233 DEFINE_PATTERN(p_vcge_8sx8,
3234 unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3235 DEFINE_PATTERN(p_vcge_16sx4,
3236 unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3237 DEFINE_PATTERN(p_vcge_32sx2,
3238 unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3239 DEFINE_PATTERN(p_vcge_8ux8,
3240 unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3241 DEFINE_PATTERN(p_vcge_16ux4,
3242 unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3243 DEFINE_PATTERN(p_vcge_32ux2,
3244 unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3245 if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3246 HReg res = newVRegD(env);
3247 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3248 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3249 return res;
3250 } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3251 HReg res = newVRegD(env);
3252 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3253 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3254 return res;
3255 } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3256 HReg res = newVRegD(env);
3257 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3258 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3259 return res;
3260 } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3261 HReg res = newVRegD(env);
3262 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3263 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3264 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3265 res, argL, argR, 0, False));
3266 return res;
3267 } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3268 HReg res = newVRegD(env);
3269 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3270 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3271 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3272 res, argL, argR, 1, False));
3273 return res;
3274 } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3275 HReg res = newVRegD(env);
3276 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3277 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3278 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3279 res, argL, argR, 2, False));
3280 return res;
3281 } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3282 HReg res = newVRegD(env);
3283 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3284 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3285 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3286 res, argL, argR, 0, False));
3287 return res;
3288 } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3289 HReg res = newVRegD(env);
3290 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3291 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3292 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3293 res, argL, argR, 1, False));
3294 return res;
3295 } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3296 HReg res = newVRegD(env);
3297 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3298 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3299 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3300 res, argL, argR, 2, False));
3301 return res;
3302 } else {
3303 HReg res = newVRegD(env);
3304 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3305 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3306 return res;
3307 }
3308 }
3309 case Iop_Dup8x8:
3310 case Iop_Dup16x4:
3311 case Iop_Dup32x2: {
3312 HReg res, arg;
3313 UInt size;
3314 DECLARE_PATTERN(p_vdup_8x8);
3315 DECLARE_PATTERN(p_vdup_16x4);
3316 DECLARE_PATTERN(p_vdup_32x2);
3317 DEFINE_PATTERN(p_vdup_8x8,
3318 unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3319 DEFINE_PATTERN(p_vdup_16x4,
3320 unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3321 DEFINE_PATTERN(p_vdup_32x2,
3322 unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3323 if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3324 UInt index;
3325 UInt imm4;
3326 if (mi.bindee[1]->tag == Iex_Const &&
3327 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3328 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3329 imm4 = (index << 1) + 1;
3330 if (index < 8) {
3331 res = newVRegD(env);
3332 arg = iselNeon64Expr(env, mi.bindee[0]);
3333 addInstr(env, ARMInstr_NUnaryS(
3334 ARMneon_VDUP,
3335 mkARMNRS(ARMNRS_Reg, res, 0),
3336 mkARMNRS(ARMNRS_Scalar, arg, index),
3337 imm4, False
3338 ));
3339 return res;
3340 }
3341 }
3342 } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3343 UInt index;
3344 UInt imm4;
3345 if (mi.bindee[1]->tag == Iex_Const &&
3346 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3347 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3348 imm4 = (index << 2) + 2;
3349 if (index < 4) {
3350 res = newVRegD(env);
3351 arg = iselNeon64Expr(env, mi.bindee[0]);
3352 addInstr(env, ARMInstr_NUnaryS(
3353 ARMneon_VDUP,
3354 mkARMNRS(ARMNRS_Reg, res, 0),
3355 mkARMNRS(ARMNRS_Scalar, arg, index),
3356 imm4, False
3357 ));
3358 return res;
3359 }
3360 }
3361 } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3362 UInt index;
3363 UInt imm4;
3364 if (mi.bindee[1]->tag == Iex_Const &&
3365 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3366 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3367 imm4 = (index << 3) + 4;
3368 if (index < 2) {
3369 res = newVRegD(env);
3370 arg = iselNeon64Expr(env, mi.bindee[0]);
3371 addInstr(env, ARMInstr_NUnaryS(
3372 ARMneon_VDUP,
3373 mkARMNRS(ARMNRS_Reg, res, 0),
3374 mkARMNRS(ARMNRS_Scalar, arg, index),
3375 imm4, False
3376 ));
3377 return res;
3378 }
3379 }
3380 }
3381 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3382 res = newVRegD(env);
3383 switch (e->Iex.Unop.op) {
3384 case Iop_Dup8x8: size = 0; break;
3385 case Iop_Dup16x4: size = 1; break;
3386 case Iop_Dup32x2: size = 2; break;
3387 default: vassert(0);
3388 }
3389 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3390 return res;
3391 }
3392 case Iop_Abs8x8:
3393 case Iop_Abs16x4:
3394 case Iop_Abs32x2: {
3395 HReg res = newVRegD(env);
3396 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3397 UInt size = 0;
3398 switch(e->Iex.Binop.op) {
3399 case Iop_Abs8x8: size = 0; break;
3400 case Iop_Abs16x4: size = 1; break;
3401 case Iop_Abs32x2: size = 2; break;
3402 default: vassert(0);
3403 }
3404 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3405 return res;
3406 }
3407 case Iop_Reverse8sIn64_x1:
3408 case Iop_Reverse16sIn64_x1:
3409 case Iop_Reverse32sIn64_x1: {
3410 HReg res = newVRegD(env);
3411 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3412 UInt size = 0;
3413 switch(e->Iex.Binop.op) {
3414 case Iop_Reverse8sIn64_x1: size = 0; break;
3415 case Iop_Reverse16sIn64_x1: size = 1; break;
3416 case Iop_Reverse32sIn64_x1: size = 2; break;
3417 default: vassert(0);
3418 }
3419 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3420 res, arg, size, False));
3421 return res;
3422 }
3423 case Iop_Reverse8sIn32_x2:
3424 case Iop_Reverse16sIn32_x2: {
3425 HReg res = newVRegD(env);
3426 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3427 UInt size = 0;
3428 switch(e->Iex.Binop.op) {
3429 case Iop_Reverse8sIn32_x2: size = 0; break;
3430 case Iop_Reverse16sIn32_x2: size = 1; break;
3431 default: vassert(0);
3432 }
3433 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3434 res, arg, size, False));
3435 return res;
3436 }
3437 case Iop_Reverse8sIn16_x4: {
3438 HReg res = newVRegD(env);
3439 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3440 UInt size = 0;
3441 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3442 res, arg, size, False));
3443 return res;
3444 }
3445 case Iop_CmpwNEZ64: {
3446 HReg x_lsh = newVRegD(env);
3447 HReg x_rsh = newVRegD(env);
3448 HReg lsh_amt = newVRegD(env);
3449 HReg rsh_amt = newVRegD(env);
3450 HReg zero = newVRegD(env);
3451 HReg tmp = newVRegD(env);
3452 HReg tmp2 = newVRegD(env);
3453 HReg res = newVRegD(env);
3454 HReg x = newVRegD(env);
3455 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3456 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3457 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3458 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3459 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3460 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3461 rsh_amt, zero, lsh_amt, 2, False));
3462 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3463 x_lsh, x, lsh_amt, 3, False));
3464 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3465 x_rsh, x, rsh_amt, 3, False));
3466 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3467 tmp, x_lsh, x_rsh, 0, False));
3468 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3469 res, tmp, x, 0, False));
3470 return res;
3471 }
3472 case Iop_CmpNEZ8x8:
3473 case Iop_CmpNEZ16x4:
3474 case Iop_CmpNEZ32x2: {
3475 HReg res = newVRegD(env);
3476 HReg tmp = newVRegD(env);
3477 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3478 UInt size;
3479 switch (e->Iex.Unop.op) {
3480 case Iop_CmpNEZ8x8: size = 0; break;
3481 case Iop_CmpNEZ16x4: size = 1; break;
3482 case Iop_CmpNEZ32x2: size = 2; break;
3483 default: vassert(0);
3484 }
3485 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
3486 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
3487 return res;
3488 }
3489 case Iop_NarrowUn16to8x8:
3490 case Iop_NarrowUn32to16x4:
3491 case Iop_NarrowUn64to32x2: {
3492 HReg res = newVRegD(env);
3493 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3494 UInt size = 0;
3495 switch(e->Iex.Binop.op) {
3496 case Iop_NarrowUn16to8x8: size = 0; break;
3497 case Iop_NarrowUn32to16x4: size = 1; break;
3498 case Iop_NarrowUn64to32x2: size = 2; break;
3499 default: vassert(0);
3500 }
3501 addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
3502 res, arg, size, False));
3503 return res;
3504 }
3505 case Iop_QNarrowUn16Sto8Sx8:
3506 case Iop_QNarrowUn32Sto16Sx4:
3507 case Iop_QNarrowUn64Sto32Sx2: {
3508 HReg res = newVRegD(env);
3509 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3510 UInt size = 0;
3511 switch(e->Iex.Binop.op) {
3512 case Iop_QNarrowUn16Sto8Sx8: size = 0; break;
3513 case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
3514 case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
3515 default: vassert(0);
3516 }
3517 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
3518 res, arg, size, False));
3519 return res;
3520 }
3521 case Iop_QNarrowUn16Sto8Ux8:
3522 case Iop_QNarrowUn32Sto16Ux4:
3523 case Iop_QNarrowUn64Sto32Ux2: {
3524 HReg res = newVRegD(env);
3525 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3526 UInt size = 0;
3527 switch(e->Iex.Binop.op) {
3528 case Iop_QNarrowUn16Sto8Ux8: size = 0; break;
3529 case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
3530 case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
3531 default: vassert(0);
3532 }
3533 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
3534 res, arg, size, False));
3535 return res;
3536 }
3537 case Iop_QNarrowUn16Uto8Ux8:
3538 case Iop_QNarrowUn32Uto16Ux4:
3539 case Iop_QNarrowUn64Uto32Ux2: {
3540 HReg res = newVRegD(env);
3541 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3542 UInt size = 0;
3543 switch(e->Iex.Binop.op) {
3544 case Iop_QNarrowUn16Uto8Ux8: size = 0; break;
3545 case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
3546 case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
3547 default: vassert(0);
3548 }
3549 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
3550 res, arg, size, False));
3551 return res;
3552 }
3553 case Iop_PwAddL8Sx8:
3554 case Iop_PwAddL16Sx4:
3555 case Iop_PwAddL32Sx2: {
3556 HReg res = newVRegD(env);
3557 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3558 UInt size = 0;
3559 switch(e->Iex.Binop.op) {
3560 case Iop_PwAddL8Sx8: size = 0; break;
3561 case Iop_PwAddL16Sx4: size = 1; break;
3562 case Iop_PwAddL32Sx2: size = 2; break;
3563 default: vassert(0);
3564 }
3565 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
3566 res, arg, size, False));
3567 return res;
3568 }
3569 case Iop_PwAddL8Ux8:
3570 case Iop_PwAddL16Ux4:
3571 case Iop_PwAddL32Ux2: {
3572 HReg res = newVRegD(env);
3573 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3574 UInt size = 0;
3575 switch(e->Iex.Binop.op) {
3576 case Iop_PwAddL8Ux8: size = 0; break;
3577 case Iop_PwAddL16Ux4: size = 1; break;
3578 case Iop_PwAddL32Ux2: size = 2; break;
3579 default: vassert(0);
3580 }
3581 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
3582 res, arg, size, False));
3583 return res;
3584 }
3585 case Iop_Cnt8x8: {
3586 HReg res = newVRegD(env);
3587 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3588 UInt size = 0;
3589 addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
3590 res, arg, size, False));
3591 return res;
3592 }
3593 case Iop_Clz8x8:
3594 case Iop_Clz16x4:
3595 case Iop_Clz32x2: {
3596 HReg res = newVRegD(env);
3597 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3598 UInt size = 0;
3599 switch(e->Iex.Binop.op) {
3600 case Iop_Clz8x8: size = 0; break;
3601 case Iop_Clz16x4: size = 1; break;
3602 case Iop_Clz32x2: size = 2; break;
3603 default: vassert(0);
3604 }
3605 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
3606 res, arg, size, False));
3607 return res;
3608 }
3609 case Iop_Cls8x8:
3610 case Iop_Cls16x4:
3611 case Iop_Cls32x2: {
3612 HReg res = newVRegD(env);
3613 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3614 UInt size = 0;
3615 switch(e->Iex.Binop.op) {
3616 case Iop_Cls8x8: size = 0; break;
3617 case Iop_Cls16x4: size = 1; break;
3618 case Iop_Cls32x2: size = 2; break;
3619 default: vassert(0);
3620 }
3621 addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
3622 res, arg, size, False));
3623 return res;
3624 }
3625 case Iop_FtoI32Sx2_RZ: {
3626 HReg res = newVRegD(env);
3627 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3628 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
3629 res, arg, 2, False));
3630 return res;
3631 }
3632 case Iop_FtoI32Ux2_RZ: {
3633 HReg res = newVRegD(env);
3634 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3635 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
3636 res, arg, 2, False));
3637 return res;
3638 }
3639 case Iop_I32StoFx2: {
3640 HReg res = newVRegD(env);
3641 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3642 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
3643 res, arg, 2, False));
3644 return res;
3645 }
3646 case Iop_I32UtoFx2: {
3647 HReg res = newVRegD(env);
3648 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3649 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
3650 res, arg, 2, False));
3651 return res;
3652 }
3653 case Iop_F32toF16x4: {
3654 HReg res = newVRegD(env);
3655 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3656 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
3657 res, arg, 2, False));
3658 return res;
3659 }
3660 case Iop_RecipEst32Fx2: {
3661 HReg res = newVRegD(env);
3662 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3663 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
3664 res, argL, 0, False));
3665 return res;
3666 }
3667 case Iop_RecipEst32Ux2: {
3668 HReg res = newVRegD(env);
3669 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3670 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
3671 res, argL, 0, False));
3672 return res;
3673 }
3674 case Iop_Abs32Fx2: {
3675 DECLARE_PATTERN(p_vabd_32fx2);
3676 DEFINE_PATTERN(p_vabd_32fx2,
3677 unop(Iop_Abs32Fx2,
3678 binop(Iop_Sub32Fx2,
3679 bind(0),
3680 bind(1))));
3681 if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
3682 HReg res = newVRegD(env);
3683 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3684 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3685 addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
3686 res, argL, argR, 0, False));
3687 return res;
3688 } else {
3689 HReg res = newVRegD(env);
3690 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3691 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
3692 res, arg, 0, False));
3693 return res;
3694 }
3695 }
3696 case Iop_RSqrtEst32Fx2: {
3697 HReg res = newVRegD(env);
3698 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3699 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
3700 res, arg, 0, False));
3701 return res;
3702 }
3703 case Iop_RSqrtEst32Ux2: {
3704 HReg res = newVRegD(env);
3705 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3706 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
3707 res, arg, 0, False));
3708 return res;
3709 }
3710 case Iop_Neg32Fx2: {
3711 HReg res = newVRegD(env);
3712 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3713 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
3714 res, arg, 0, False));
3715 return res;
3716 }
3717 default:
3718 break;
3719 }
3720 } /* if (e->tag == Iex_Unop) */
3721
3722 if (e->tag == Iex_Triop) {
3723 IRTriop *triop = e->Iex.Triop.details;
3724
3725 switch (triop->op) {
3726 case Iop_Slice64: {
3727 HReg res = newVRegD(env);
3728 HReg argL = iselNeon64Expr(env, triop->arg2);
3729 HReg argR = iselNeon64Expr(env, triop->arg1);
3730 UInt imm4;
3731 if (triop->arg3->tag != Iex_Const ||
3732 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
3733 vpanic("ARM target supports Iop_Extract64 with constant "
3734 "third argument less than 16 only\n");
3735 }
3736 imm4 = triop->arg3->Iex.Const.con->Ico.U8;
3737 if (imm4 >= 8) {
3738 vpanic("ARM target supports Iop_Extract64 with constant "
3739 "third argument less than 16 only\n");
3740 }
3741 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
3742 res, argL, argR, imm4, False));
3743 return res;
3744 }
3745 case Iop_SetElem8x8:
3746 case Iop_SetElem16x4:
3747 case Iop_SetElem32x2: {
3748 HReg res = newVRegD(env);
3749 HReg dreg = iselNeon64Expr(env, triop->arg1);
3750 HReg arg = iselIntExpr_R(env, triop->arg3);
3751 UInt index, size;
3752 if (triop->arg2->tag != Iex_Const ||
3753 typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) {
3754 vpanic("ARM target supports SetElem with constant "
3755 "second argument only\n");
3756 }
3757 index = triop->arg2->Iex.Const.con->Ico.U8;
3758 switch (triop->op) {
3759 case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
3760 case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
3761 case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
3762 default: vassert(0);
3763 }
3764 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
3765 addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
3766 mkARMNRS(ARMNRS_Scalar, res, index),
3767 mkARMNRS(ARMNRS_Reg, arg, 0),
3768 size, False));
3769 return res;
3770 }
3771 default:
3772 break;
3773 }
3774 }
3775
3776 /* --------- MULTIPLEX --------- */
3777 if (e->tag == Iex_ITE) { // VFD
3778 HReg rLo, rHi;
3779 HReg res = newVRegD(env);
3780 iselInt64Expr(&rHi, &rLo, env, e);
3781 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3782 return res;
3783 }
3784
3785 ppIRExpr(e);
3786 vpanic("iselNeon64Expr");
3787 }
3788
3789
iselNeonExpr(ISelEnv * env,IRExpr * e)3790 static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e )
3791 {
3792 HReg r;
3793 vassert(env->hwcaps & VEX_HWCAPS_ARM_NEON);
3794 r = iselNeonExpr_wrk( env, e );
3795 vassert(hregClass(r) == HRcVec128);
3796 vassert(hregIsVirtual(r));
3797 return r;
3798 }
3799
3800 /* DO NOT CALL THIS DIRECTLY */
iselNeonExpr_wrk(ISelEnv * env,IRExpr * e)3801 static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e )
3802 {
3803 IRType ty = typeOfIRExpr(env->type_env, e);
3804 MatchInfo mi;
3805 vassert(e);
3806 vassert(ty == Ity_V128);
3807
3808 if (e->tag == Iex_RdTmp) {
3809 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3810 }
3811
3812 if (e->tag == Iex_Const) {
3813 /* At the moment there should be no 128-bit constants in IR for ARM
3814 generated during disassemble. They are represented as Iop_64HLtoV128
3815 binary operation and are handled among binary ops. */
3816 /* But zero can be created by valgrind internal optimizer */
3817 if (e->Iex.Const.con->Ico.V128 == 0x0000) {
3818 HReg res = newVRegV(env);
3819 addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(6, 0)));
3820 return res;
3821 }
3822 if (e->Iex.Const.con->Ico.V128 == 0xFFFF) {
3823 HReg res = newVRegV(env);
3824 addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(6, 255)));
3825 return res;
3826 }
3827 ppIRExpr(e);
3828 vpanic("128-bit constant is not implemented");
3829 }
3830
3831 if (e->tag == Iex_Load) {
3832 HReg res = newVRegV(env);
3833 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
3834 vassert(ty == Ity_V128);
3835 addInstr(env, ARMInstr_NLdStQ(True, res, am));
3836 return res;
3837 }
3838
3839 if (e->tag == Iex_Get) {
3840 HReg addr = newVRegI(env);
3841 HReg res = newVRegV(env);
3842 vassert(ty == Ity_V128);
3843 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
3844 addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
3845 return res;
3846 }
3847
3848 if (e->tag == Iex_Unop) {
3849 switch (e->Iex.Unop.op) {
3850 case Iop_NotV128: {
3851 DECLARE_PATTERN(p_veqz_8x16);
3852 DECLARE_PATTERN(p_veqz_16x8);
3853 DECLARE_PATTERN(p_veqz_32x4);
3854 DECLARE_PATTERN(p_vcge_8sx16);
3855 DECLARE_PATTERN(p_vcge_16sx8);
3856 DECLARE_PATTERN(p_vcge_32sx4);
3857 DECLARE_PATTERN(p_vcge_8ux16);
3858 DECLARE_PATTERN(p_vcge_16ux8);
3859 DECLARE_PATTERN(p_vcge_32ux4);
3860 DEFINE_PATTERN(p_veqz_8x16,
3861 unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
3862 DEFINE_PATTERN(p_veqz_16x8,
3863 unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
3864 DEFINE_PATTERN(p_veqz_32x4,
3865 unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
3866 DEFINE_PATTERN(p_vcge_8sx16,
3867 unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
3868 DEFINE_PATTERN(p_vcge_16sx8,
3869 unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
3870 DEFINE_PATTERN(p_vcge_32sx4,
3871 unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
3872 DEFINE_PATTERN(p_vcge_8ux16,
3873 unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
3874 DEFINE_PATTERN(p_vcge_16ux8,
3875 unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
3876 DEFINE_PATTERN(p_vcge_32ux4,
3877 unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
3878 if (matchIRExpr(&mi, p_veqz_8x16, e)) {
3879 HReg res = newVRegV(env);
3880 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3881 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
3882 return res;
3883 } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
3884 HReg res = newVRegV(env);
3885 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3886 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
3887 return res;
3888 } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
3889 HReg res = newVRegV(env);
3890 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3891 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
3892 return res;
3893 } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
3894 HReg res = newVRegV(env);
3895 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3896 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3897 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3898 res, argL, argR, 0, True));
3899 return res;
3900 } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
3901 HReg res = newVRegV(env);
3902 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3903 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3904 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3905 res, argL, argR, 1, True));
3906 return res;
3907 } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
3908 HReg res = newVRegV(env);
3909 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3910 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3911 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3912 res, argL, argR, 2, True));
3913 return res;
3914 } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
3915 HReg res = newVRegV(env);
3916 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3917 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3918 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3919 res, argL, argR, 0, True));
3920 return res;
3921 } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
3922 HReg res = newVRegV(env);
3923 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3924 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3925 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3926 res, argL, argR, 1, True));
3927 return res;
3928 } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
3929 HReg res = newVRegV(env);
3930 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3931 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3932 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3933 res, argL, argR, 2, True));
3934 return res;
3935 } else {
3936 HReg res = newVRegV(env);
3937 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3938 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
3939 return res;
3940 }
3941 }
3942 case Iop_Dup8x16:
3943 case Iop_Dup16x8:
3944 case Iop_Dup32x4: {
3945 HReg res, arg;
3946 UInt size;
3947 DECLARE_PATTERN(p_vdup_8x16);
3948 DECLARE_PATTERN(p_vdup_16x8);
3949 DECLARE_PATTERN(p_vdup_32x4);
3950 DEFINE_PATTERN(p_vdup_8x16,
3951 unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
3952 DEFINE_PATTERN(p_vdup_16x8,
3953 unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
3954 DEFINE_PATTERN(p_vdup_32x4,
3955 unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
3956 if (matchIRExpr(&mi, p_vdup_8x16, e)) {
3957 UInt index;
3958 UInt imm4;
3959 if (mi.bindee[1]->tag == Iex_Const &&
3960 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3961 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3962 imm4 = (index << 1) + 1;
3963 if (index < 8) {
3964 res = newVRegV(env);
3965 arg = iselNeon64Expr(env, mi.bindee[0]);
3966 addInstr(env, ARMInstr_NUnaryS(
3967 ARMneon_VDUP,
3968 mkARMNRS(ARMNRS_Reg, res, 0),
3969 mkARMNRS(ARMNRS_Scalar, arg, index),
3970 imm4, True
3971 ));
3972 return res;
3973 }
3974 }
3975 } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
3976 UInt index;
3977 UInt imm4;
3978 if (mi.bindee[1]->tag == Iex_Const &&
3979 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3980 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3981 imm4 = (index << 2) + 2;
3982 if (index < 4) {
3983 res = newVRegV(env);
3984 arg = iselNeon64Expr(env, mi.bindee[0]);
3985 addInstr(env, ARMInstr_NUnaryS(
3986 ARMneon_VDUP,
3987 mkARMNRS(ARMNRS_Reg, res, 0),
3988 mkARMNRS(ARMNRS_Scalar, arg, index),
3989 imm4, True
3990 ));
3991 return res;
3992 }
3993 }
3994 } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
3995 UInt index;
3996 UInt imm4;
3997 if (mi.bindee[1]->tag == Iex_Const &&
3998 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3999 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4000 imm4 = (index << 3) + 4;
4001 if (index < 2) {
4002 res = newVRegV(env);
4003 arg = iselNeon64Expr(env, mi.bindee[0]);
4004 addInstr(env, ARMInstr_NUnaryS(
4005 ARMneon_VDUP,
4006 mkARMNRS(ARMNRS_Reg, res, 0),
4007 mkARMNRS(ARMNRS_Scalar, arg, index),
4008 imm4, True
4009 ));
4010 return res;
4011 }
4012 }
4013 }
4014 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
4015 res = newVRegV(env);
4016 switch (e->Iex.Unop.op) {
4017 case Iop_Dup8x16: size = 0; break;
4018 case Iop_Dup16x8: size = 1; break;
4019 case Iop_Dup32x4: size = 2; break;
4020 default: vassert(0);
4021 }
4022 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
4023 return res;
4024 }
4025 case Iop_Abs8x16:
4026 case Iop_Abs16x8:
4027 case Iop_Abs32x4: {
4028 HReg res = newVRegV(env);
4029 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4030 UInt size = 0;
4031 switch(e->Iex.Binop.op) {
4032 case Iop_Abs8x16: size = 0; break;
4033 case Iop_Abs16x8: size = 1; break;
4034 case Iop_Abs32x4: size = 2; break;
4035 default: vassert(0);
4036 }
4037 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
4038 return res;
4039 }
4040 case Iop_Reverse8sIn64_x2:
4041 case Iop_Reverse16sIn64_x2:
4042 case Iop_Reverse32sIn64_x2: {
4043 HReg res = newVRegV(env);
4044 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4045 UInt size = 0;
4046 switch(e->Iex.Binop.op) {
4047 case Iop_Reverse8sIn64_x2: size = 0; break;
4048 case Iop_Reverse16sIn64_x2: size = 1; break;
4049 case Iop_Reverse32sIn64_x2: size = 2; break;
4050 default: vassert(0);
4051 }
4052 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
4053 res, arg, size, True));
4054 return res;
4055 }
4056 case Iop_Reverse8sIn32_x4:
4057 case Iop_Reverse16sIn32_x4: {
4058 HReg res = newVRegV(env);
4059 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4060 UInt size = 0;
4061 switch(e->Iex.Binop.op) {
4062 case Iop_Reverse8sIn32_x4: size = 0; break;
4063 case Iop_Reverse16sIn32_x4: size = 1; break;
4064 default: vassert(0);
4065 }
4066 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
4067 res, arg, size, True));
4068 return res;
4069 }
4070 case Iop_Reverse8sIn16_x8: {
4071 HReg res = newVRegV(env);
4072 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4073 UInt size = 0;
4074 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
4075 res, arg, size, True));
4076 return res;
4077 }
4078 case Iop_CmpNEZ64x2: {
4079 HReg x_lsh = newVRegV(env);
4080 HReg x_rsh = newVRegV(env);
4081 HReg lsh_amt = newVRegV(env);
4082 HReg rsh_amt = newVRegV(env);
4083 HReg zero = newVRegV(env);
4084 HReg tmp = newVRegV(env);
4085 HReg tmp2 = newVRegV(env);
4086 HReg res = newVRegV(env);
4087 HReg x = newVRegV(env);
4088 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4089 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
4090 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
4091 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
4092 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
4093 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4094 rsh_amt, zero, lsh_amt, 2, True));
4095 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4096 x_lsh, x, lsh_amt, 3, True));
4097 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4098 x_rsh, x, rsh_amt, 3, True));
4099 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4100 tmp, x_lsh, x_rsh, 0, True));
4101 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4102 res, tmp, x, 0, True));
4103 return res;
4104 }
4105 case Iop_CmpNEZ8x16:
4106 case Iop_CmpNEZ16x8:
4107 case Iop_CmpNEZ32x4: {
4108 HReg res = newVRegV(env);
4109 HReg tmp = newVRegV(env);
4110 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4111 UInt size;
4112 switch (e->Iex.Unop.op) {
4113 case Iop_CmpNEZ8x16: size = 0; break;
4114 case Iop_CmpNEZ16x8: size = 1; break;
4115 case Iop_CmpNEZ32x4: size = 2; break;
4116 default: vassert(0);
4117 }
4118 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
4119 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
4120 return res;
4121 }
4122 case Iop_Widen8Uto16x8:
4123 case Iop_Widen16Uto32x4:
4124 case Iop_Widen32Uto64x2: {
4125 HReg res = newVRegV(env);
4126 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4127 UInt size;
4128 switch (e->Iex.Unop.op) {
4129 case Iop_Widen8Uto16x8: size = 0; break;
4130 case Iop_Widen16Uto32x4: size = 1; break;
4131 case Iop_Widen32Uto64x2: size = 2; break;
4132 default: vassert(0);
4133 }
4134 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
4135 res, arg, size, True));
4136 return res;
4137 }
4138 case Iop_Widen8Sto16x8:
4139 case Iop_Widen16Sto32x4:
4140 case Iop_Widen32Sto64x2: {
4141 HReg res = newVRegV(env);
4142 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4143 UInt size;
4144 switch (e->Iex.Unop.op) {
4145 case Iop_Widen8Sto16x8: size = 0; break;
4146 case Iop_Widen16Sto32x4: size = 1; break;
4147 case Iop_Widen32Sto64x2: size = 2; break;
4148 default: vassert(0);
4149 }
4150 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4151 res, arg, size, True));
4152 return res;
4153 }
4154 case Iop_PwAddL8Sx16:
4155 case Iop_PwAddL16Sx8:
4156 case Iop_PwAddL32Sx4: {
4157 HReg res = newVRegV(env);
4158 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4159 UInt size = 0;
4160 switch(e->Iex.Binop.op) {
4161 case Iop_PwAddL8Sx16: size = 0; break;
4162 case Iop_PwAddL16Sx8: size = 1; break;
4163 case Iop_PwAddL32Sx4: size = 2; break;
4164 default: vassert(0);
4165 }
4166 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4167 res, arg, size, True));
4168 return res;
4169 }
4170 case Iop_PwAddL8Ux16:
4171 case Iop_PwAddL16Ux8:
4172 case Iop_PwAddL32Ux4: {
4173 HReg res = newVRegV(env);
4174 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4175 UInt size = 0;
4176 switch(e->Iex.Binop.op) {
4177 case Iop_PwAddL8Ux16: size = 0; break;
4178 case Iop_PwAddL16Ux8: size = 1; break;
4179 case Iop_PwAddL32Ux4: size = 2; break;
4180 default: vassert(0);
4181 }
4182 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4183 res, arg, size, True));
4184 return res;
4185 }
4186 case Iop_Cnt8x16: {
4187 HReg res = newVRegV(env);
4188 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4189 UInt size = 0;
4190 addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4191 return res;
4192 }
4193 case Iop_Clz8x16:
4194 case Iop_Clz16x8:
4195 case Iop_Clz32x4: {
4196 HReg res = newVRegV(env);
4197 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4198 UInt size = 0;
4199 switch(e->Iex.Binop.op) {
4200 case Iop_Clz8x16: size = 0; break;
4201 case Iop_Clz16x8: size = 1; break;
4202 case Iop_Clz32x4: size = 2; break;
4203 default: vassert(0);
4204 }
4205 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4206 return res;
4207 }
4208 case Iop_Cls8x16:
4209 case Iop_Cls16x8:
4210 case Iop_Cls32x4: {
4211 HReg res = newVRegV(env);
4212 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4213 UInt size = 0;
4214 switch(e->Iex.Binop.op) {
4215 case Iop_Cls8x16: size = 0; break;
4216 case Iop_Cls16x8: size = 1; break;
4217 case Iop_Cls32x4: size = 2; break;
4218 default: vassert(0);
4219 }
4220 addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4221 return res;
4222 }
4223 case Iop_FtoI32Sx4_RZ: {
4224 HReg res = newVRegV(env);
4225 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4226 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4227 res, arg, 2, True));
4228 return res;
4229 }
4230 case Iop_FtoI32Ux4_RZ: {
4231 HReg res = newVRegV(env);
4232 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4233 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4234 res, arg, 2, True));
4235 return res;
4236 }
4237 case Iop_I32StoFx4: {
4238 HReg res = newVRegV(env);
4239 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4240 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4241 res, arg, 2, True));
4242 return res;
4243 }
4244 case Iop_I32UtoFx4: {
4245 HReg res = newVRegV(env);
4246 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4247 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4248 res, arg, 2, True));
4249 return res;
4250 }
4251 case Iop_F16toF32x4: {
4252 HReg res = newVRegV(env);
4253 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4254 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4255 res, arg, 2, True));
4256 return res;
4257 }
4258 case Iop_RecipEst32Fx4: {
4259 HReg res = newVRegV(env);
4260 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4261 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4262 res, argL, 0, True));
4263 return res;
4264 }
4265 case Iop_RecipEst32Ux4: {
4266 HReg res = newVRegV(env);
4267 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4268 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4269 res, argL, 0, True));
4270 return res;
4271 }
4272 case Iop_Abs32Fx4: {
4273 HReg res = newVRegV(env);
4274 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4275 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4276 res, argL, 0, True));
4277 return res;
4278 }
4279 case Iop_RSqrtEst32Fx4: {
4280 HReg res = newVRegV(env);
4281 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4282 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4283 res, argL, 0, True));
4284 return res;
4285 }
4286 case Iop_RSqrtEst32Ux4: {
4287 HReg res = newVRegV(env);
4288 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4289 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4290 res, argL, 0, True));
4291 return res;
4292 }
4293 case Iop_Neg32Fx4: {
4294 HReg res = newVRegV(env);
4295 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4296 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4297 res, arg, 0, True));
4298 return res;
4299 }
4300 /* ... */
4301 default:
4302 break;
4303 }
4304 }
4305
4306 if (e->tag == Iex_Binop) {
4307 switch (e->Iex.Binop.op) {
4308 case Iop_64HLtoV128:
4309 /* Try to match into single "VMOV reg, imm" instruction */
4310 if (e->Iex.Binop.arg1->tag == Iex_Const &&
4311 e->Iex.Binop.arg2->tag == Iex_Const &&
4312 typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
4313 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
4314 e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
4315 e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
4316 ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
4317 ARMNImm *imm = Imm64_to_ARMNImm(imm64);
4318 if (imm) {
4319 HReg res = newVRegV(env);
4320 addInstr(env, ARMInstr_NeonImm(res, imm));
4321 return res;
4322 }
4323 if ((imm64 >> 32) == 0LL &&
4324 (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
4325 HReg tmp1 = newVRegV(env);
4326 HReg tmp2 = newVRegV(env);
4327 HReg res = newVRegV(env);
4328 if (imm->type < 10) {
4329 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
4330 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4331 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4332 res, tmp1, tmp2, 4, True));
4333 return res;
4334 }
4335 }
4336 if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
4337 (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
4338 HReg tmp1 = newVRegV(env);
4339 HReg tmp2 = newVRegV(env);
4340 HReg res = newVRegV(env);
4341 if (imm->type < 10) {
4342 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
4343 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4344 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4345 res, tmp1, tmp2, 4, True));
4346 return res;
4347 }
4348 }
4349 }
4350 /* Does not match "VMOV Reg, Imm" form. We'll have to do
4351 it the slow way. */
4352 {
4353 /* local scope */
4354 /* Done via the stack for ease of use. */
4355 /* FIXME: assumes little endian host */
4356 HReg w3, w2, w1, w0;
4357 HReg res = newVRegV(env);
4358 ARMAMode1* sp_0 = ARMAMode1_RI(hregARM_R13(), 0);
4359 ARMAMode1* sp_4 = ARMAMode1_RI(hregARM_R13(), 4);
4360 ARMAMode1* sp_8 = ARMAMode1_RI(hregARM_R13(), 8);
4361 ARMAMode1* sp_12 = ARMAMode1_RI(hregARM_R13(), 12);
4362 ARMRI84* c_16 = ARMRI84_I84(16,0);
4363 /* Make space for SP */
4364 addInstr(env, ARMInstr_Alu(ARMalu_SUB, hregARM_R13(),
4365 hregARM_R13(), c_16));
4366
4367 /* Store the less significant 64 bits */
4368 iselInt64Expr(&w1, &w0, env, e->Iex.Binop.arg2);
4369 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
4370 w0, sp_0));
4371 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
4372 w1, sp_4));
4373
4374 /* Store the more significant 64 bits */
4375 iselInt64Expr(&w3, &w2, env, e->Iex.Binop.arg1);
4376 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
4377 w2, sp_8));
4378 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
4379 w3, sp_12));
4380
4381 /* Load result back from stack. */
4382 addInstr(env, ARMInstr_NLdStQ(True/*load*/, res,
4383 mkARMAModeN_R(hregARM_R13())));
4384
4385 /* Restore SP */
4386 addInstr(env, ARMInstr_Alu(ARMalu_ADD, hregARM_R13(),
4387 hregARM_R13(), c_16));
4388 return res;
4389 } /* local scope */
4390 goto neon_expr_bad;
4391 case Iop_AndV128: {
4392 HReg res = newVRegV(env);
4393 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4394 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4395 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4396 res, argL, argR, 4, True));
4397 return res;
4398 }
4399 case Iop_OrV128: {
4400 HReg res = newVRegV(env);
4401 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4402 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4403 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4404 res, argL, argR, 4, True));
4405 return res;
4406 }
4407 case Iop_XorV128: {
4408 HReg res = newVRegV(env);
4409 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4410 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4411 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4412 res, argL, argR, 4, True));
4413 return res;
4414 }
4415 case Iop_Add8x16:
4416 case Iop_Add16x8:
4417 case Iop_Add32x4:
4418 case Iop_Add64x2: {
4419 /*
4420 FIXME: remove this if not used
4421 DECLARE_PATTERN(p_vrhadd_32sx4);
4422 ULong one = (1LL << 32) | 1LL;
4423 DEFINE_PATTERN(p_vrhadd_32sx4,
4424 binop(Iop_Add32x4,
4425 binop(Iop_Add32x4,
4426 binop(Iop_SarN32x4,
4427 bind(0),
4428 mkU8(1)),
4429 binop(Iop_SarN32x4,
4430 bind(1),
4431 mkU8(1))),
4432 binop(Iop_SarN32x4,
4433 binop(Iop_Add32x4,
4434 binop(Iop_Add32x4,
4435 binop(Iop_AndV128,
4436 bind(0),
4437 mkU128(one)),
4438 binop(Iop_AndV128,
4439 bind(1),
4440 mkU128(one))),
4441 mkU128(one)),
4442 mkU8(1))));
4443 */
4444 HReg res = newVRegV(env);
4445 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4446 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4447 UInt size;
4448 switch (e->Iex.Binop.op) {
4449 case Iop_Add8x16: size = 0; break;
4450 case Iop_Add16x8: size = 1; break;
4451 case Iop_Add32x4: size = 2; break;
4452 case Iop_Add64x2: size = 3; break;
4453 default:
4454 ppIROp(e->Iex.Binop.op);
4455 vpanic("Illegal element size in VADD");
4456 }
4457 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
4458 res, argL, argR, size, True));
4459 return res;
4460 }
4461 case Iop_RecipStep32Fx4: {
4462 HReg res = newVRegV(env);
4463 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4464 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4465 UInt size = 0;
4466 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
4467 res, argL, argR, size, True));
4468 return res;
4469 }
4470 case Iop_RSqrtStep32Fx4: {
4471 HReg res = newVRegV(env);
4472 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4473 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4474 UInt size = 0;
4475 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
4476 res, argL, argR, size, True));
4477 return res;
4478 }
4479
4480 // These 6 verified 18 Apr 2013
4481 case Iop_InterleaveEvenLanes8x16:
4482 case Iop_InterleaveOddLanes8x16:
4483 case Iop_InterleaveEvenLanes16x8:
4484 case Iop_InterleaveOddLanes16x8:
4485 case Iop_InterleaveEvenLanes32x4:
4486 case Iop_InterleaveOddLanes32x4: {
4487 HReg rD = newVRegV(env);
4488 HReg rM = newVRegV(env);
4489 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4490 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4491 UInt size;
4492 Bool resRd; // is the result in rD or rM ?
4493 switch (e->Iex.Binop.op) {
4494 case Iop_InterleaveOddLanes8x16: resRd = False; size = 0; break;
4495 case Iop_InterleaveEvenLanes8x16: resRd = True; size = 0; break;
4496 case Iop_InterleaveOddLanes16x8: resRd = False; size = 1; break;
4497 case Iop_InterleaveEvenLanes16x8: resRd = True; size = 1; break;
4498 case Iop_InterleaveOddLanes32x4: resRd = False; size = 2; break;
4499 case Iop_InterleaveEvenLanes32x4: resRd = True; size = 2; break;
4500 default: vassert(0);
4501 }
4502 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4503 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4504 addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, True));
4505 return resRd ? rD : rM;
4506 }
4507
4508 // These 6 verified 18 Apr 2013
4509 case Iop_InterleaveHI8x16:
4510 case Iop_InterleaveLO8x16:
4511 case Iop_InterleaveHI16x8:
4512 case Iop_InterleaveLO16x8:
4513 case Iop_InterleaveHI32x4:
4514 case Iop_InterleaveLO32x4: {
4515 HReg rD = newVRegV(env);
4516 HReg rM = newVRegV(env);
4517 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4518 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4519 UInt size;
4520 Bool resRd; // is the result in rD or rM ?
4521 switch (e->Iex.Binop.op) {
4522 case Iop_InterleaveHI8x16: resRd = False; size = 0; break;
4523 case Iop_InterleaveLO8x16: resRd = True; size = 0; break;
4524 case Iop_InterleaveHI16x8: resRd = False; size = 1; break;
4525 case Iop_InterleaveLO16x8: resRd = True; size = 1; break;
4526 case Iop_InterleaveHI32x4: resRd = False; size = 2; break;
4527 case Iop_InterleaveLO32x4: resRd = True; size = 2; break;
4528 default: vassert(0);
4529 }
4530 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4531 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4532 addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, True));
4533 return resRd ? rD : rM;
4534 }
4535
4536 // These 6 verified 18 Apr 2013
4537 case Iop_CatOddLanes8x16:
4538 case Iop_CatEvenLanes8x16:
4539 case Iop_CatOddLanes16x8:
4540 case Iop_CatEvenLanes16x8:
4541 case Iop_CatOddLanes32x4:
4542 case Iop_CatEvenLanes32x4: {
4543 HReg rD = newVRegV(env);
4544 HReg rM = newVRegV(env);
4545 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4546 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4547 UInt size;
4548 Bool resRd; // is the result in rD or rM ?
4549 switch (e->Iex.Binop.op) {
4550 case Iop_CatOddLanes8x16: resRd = False; size = 0; break;
4551 case Iop_CatEvenLanes8x16: resRd = True; size = 0; break;
4552 case Iop_CatOddLanes16x8: resRd = False; size = 1; break;
4553 case Iop_CatEvenLanes16x8: resRd = True; size = 1; break;
4554 case Iop_CatOddLanes32x4: resRd = False; size = 2; break;
4555 case Iop_CatEvenLanes32x4: resRd = True; size = 2; break;
4556 default: vassert(0);
4557 }
4558 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4559 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4560 addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, True));
4561 return resRd ? rD : rM;
4562 }
4563
4564 case Iop_QAdd8Ux16:
4565 case Iop_QAdd16Ux8:
4566 case Iop_QAdd32Ux4:
4567 case Iop_QAdd64Ux2: {
4568 HReg res = newVRegV(env);
4569 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4570 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4571 UInt size;
4572 switch (e->Iex.Binop.op) {
4573 case Iop_QAdd8Ux16: size = 0; break;
4574 case Iop_QAdd16Ux8: size = 1; break;
4575 case Iop_QAdd32Ux4: size = 2; break;
4576 case Iop_QAdd64Ux2: size = 3; break;
4577 default:
4578 ppIROp(e->Iex.Binop.op);
4579 vpanic("Illegal element size in VQADDU");
4580 }
4581 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
4582 res, argL, argR, size, True));
4583 return res;
4584 }
4585 case Iop_QAdd8Sx16:
4586 case Iop_QAdd16Sx8:
4587 case Iop_QAdd32Sx4:
4588 case Iop_QAdd64Sx2: {
4589 HReg res = newVRegV(env);
4590 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4591 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4592 UInt size;
4593 switch (e->Iex.Binop.op) {
4594 case Iop_QAdd8Sx16: size = 0; break;
4595 case Iop_QAdd16Sx8: size = 1; break;
4596 case Iop_QAdd32Sx4: size = 2; break;
4597 case Iop_QAdd64Sx2: size = 3; break;
4598 default:
4599 ppIROp(e->Iex.Binop.op);
4600 vpanic("Illegal element size in VQADDS");
4601 }
4602 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
4603 res, argL, argR, size, True));
4604 return res;
4605 }
4606 case Iop_Sub8x16:
4607 case Iop_Sub16x8:
4608 case Iop_Sub32x4:
4609 case Iop_Sub64x2: {
4610 HReg res = newVRegV(env);
4611 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4612 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4613 UInt size;
4614 switch (e->Iex.Binop.op) {
4615 case Iop_Sub8x16: size = 0; break;
4616 case Iop_Sub16x8: size = 1; break;
4617 case Iop_Sub32x4: size = 2; break;
4618 case Iop_Sub64x2: size = 3; break;
4619 default:
4620 ppIROp(e->Iex.Binop.op);
4621 vpanic("Illegal element size in VSUB");
4622 }
4623 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4624 res, argL, argR, size, True));
4625 return res;
4626 }
4627 case Iop_QSub8Ux16:
4628 case Iop_QSub16Ux8:
4629 case Iop_QSub32Ux4:
4630 case Iop_QSub64Ux2: {
4631 HReg res = newVRegV(env);
4632 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4633 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4634 UInt size;
4635 switch (e->Iex.Binop.op) {
4636 case Iop_QSub8Ux16: size = 0; break;
4637 case Iop_QSub16Ux8: size = 1; break;
4638 case Iop_QSub32Ux4: size = 2; break;
4639 case Iop_QSub64Ux2: size = 3; break;
4640 default:
4641 ppIROp(e->Iex.Binop.op);
4642 vpanic("Illegal element size in VQSUBU");
4643 }
4644 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
4645 res, argL, argR, size, True));
4646 return res;
4647 }
4648 case Iop_QSub8Sx16:
4649 case Iop_QSub16Sx8:
4650 case Iop_QSub32Sx4:
4651 case Iop_QSub64Sx2: {
4652 HReg res = newVRegV(env);
4653 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4654 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4655 UInt size;
4656 switch (e->Iex.Binop.op) {
4657 case Iop_QSub8Sx16: size = 0; break;
4658 case Iop_QSub16Sx8: size = 1; break;
4659 case Iop_QSub32Sx4: size = 2; break;
4660 case Iop_QSub64Sx2: size = 3; break;
4661 default:
4662 ppIROp(e->Iex.Binop.op);
4663 vpanic("Illegal element size in VQSUBS");
4664 }
4665 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
4666 res, argL, argR, size, True));
4667 return res;
4668 }
4669 case Iop_Max8Ux16:
4670 case Iop_Max16Ux8:
4671 case Iop_Max32Ux4: {
4672 HReg res = newVRegV(env);
4673 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4674 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4675 UInt size;
4676 switch (e->Iex.Binop.op) {
4677 case Iop_Max8Ux16: size = 0; break;
4678 case Iop_Max16Ux8: size = 1; break;
4679 case Iop_Max32Ux4: size = 2; break;
4680 default: vpanic("Illegal element size in VMAXU");
4681 }
4682 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
4683 res, argL, argR, size, True));
4684 return res;
4685 }
4686 case Iop_Max8Sx16:
4687 case Iop_Max16Sx8:
4688 case Iop_Max32Sx4: {
4689 HReg res = newVRegV(env);
4690 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4691 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4692 UInt size;
4693 switch (e->Iex.Binop.op) {
4694 case Iop_Max8Sx16: size = 0; break;
4695 case Iop_Max16Sx8: size = 1; break;
4696 case Iop_Max32Sx4: size = 2; break;
4697 default: vpanic("Illegal element size in VMAXU");
4698 }
4699 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
4700 res, argL, argR, size, True));
4701 return res;
4702 }
4703 case Iop_Min8Ux16:
4704 case Iop_Min16Ux8:
4705 case Iop_Min32Ux4: {
4706 HReg res = newVRegV(env);
4707 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4708 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4709 UInt size;
4710 switch (e->Iex.Binop.op) {
4711 case Iop_Min8Ux16: size = 0; break;
4712 case Iop_Min16Ux8: size = 1; break;
4713 case Iop_Min32Ux4: size = 2; break;
4714 default: vpanic("Illegal element size in VMAXU");
4715 }
4716 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
4717 res, argL, argR, size, True));
4718 return res;
4719 }
4720 case Iop_Min8Sx16:
4721 case Iop_Min16Sx8:
4722 case Iop_Min32Sx4: {
4723 HReg res = newVRegV(env);
4724 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4725 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4726 UInt size;
4727 switch (e->Iex.Binop.op) {
4728 case Iop_Min8Sx16: size = 0; break;
4729 case Iop_Min16Sx8: size = 1; break;
4730 case Iop_Min32Sx4: size = 2; break;
4731 default: vpanic("Illegal element size in VMAXU");
4732 }
4733 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
4734 res, argL, argR, size, True));
4735 return res;
4736 }
4737 case Iop_Sar8x16:
4738 case Iop_Sar16x8:
4739 case Iop_Sar32x4:
4740 case Iop_Sar64x2: {
4741 HReg res = newVRegV(env);
4742 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4743 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4744 HReg argR2 = newVRegV(env);
4745 HReg zero = newVRegV(env);
4746 UInt size;
4747 switch (e->Iex.Binop.op) {
4748 case Iop_Sar8x16: size = 0; break;
4749 case Iop_Sar16x8: size = 1; break;
4750 case Iop_Sar32x4: size = 2; break;
4751 case Iop_Sar64x2: size = 3; break;
4752 default: vassert(0);
4753 }
4754 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4755 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4756 argR2, zero, argR, size, True));
4757 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4758 res, argL, argR2, size, True));
4759 return res;
4760 }
4761 case Iop_Sal8x16:
4762 case Iop_Sal16x8:
4763 case Iop_Sal32x4:
4764 case Iop_Sal64x2: {
4765 HReg res = newVRegV(env);
4766 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4767 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4768 UInt size;
4769 switch (e->Iex.Binop.op) {
4770 case Iop_Sal8x16: size = 0; break;
4771 case Iop_Sal16x8: size = 1; break;
4772 case Iop_Sal32x4: size = 2; break;
4773 case Iop_Sal64x2: size = 3; break;
4774 default: vassert(0);
4775 }
4776 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4777 res, argL, argR, size, True));
4778 return res;
4779 }
4780 case Iop_Shr8x16:
4781 case Iop_Shr16x8:
4782 case Iop_Shr32x4:
4783 case Iop_Shr64x2: {
4784 HReg res = newVRegV(env);
4785 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4786 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4787 HReg argR2 = newVRegV(env);
4788 HReg zero = newVRegV(env);
4789 UInt size;
4790 switch (e->Iex.Binop.op) {
4791 case Iop_Shr8x16: size = 0; break;
4792 case Iop_Shr16x8: size = 1; break;
4793 case Iop_Shr32x4: size = 2; break;
4794 case Iop_Shr64x2: size = 3; break;
4795 default: vassert(0);
4796 }
4797 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4798 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4799 argR2, zero, argR, size, True));
4800 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4801 res, argL, argR2, size, True));
4802 return res;
4803 }
4804 case Iop_Shl8x16:
4805 case Iop_Shl16x8:
4806 case Iop_Shl32x4:
4807 case Iop_Shl64x2: {
4808 HReg res = newVRegV(env);
4809 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4810 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4811 UInt size;
4812 switch (e->Iex.Binop.op) {
4813 case Iop_Shl8x16: size = 0; break;
4814 case Iop_Shl16x8: size = 1; break;
4815 case Iop_Shl32x4: size = 2; break;
4816 case Iop_Shl64x2: size = 3; break;
4817 default: vassert(0);
4818 }
4819 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4820 res, argL, argR, size, True));
4821 return res;
4822 }
4823 case Iop_QShl8x16:
4824 case Iop_QShl16x8:
4825 case Iop_QShl32x4:
4826 case Iop_QShl64x2: {
4827 HReg res = newVRegV(env);
4828 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4829 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4830 UInt size;
4831 switch (e->Iex.Binop.op) {
4832 case Iop_QShl8x16: size = 0; break;
4833 case Iop_QShl16x8: size = 1; break;
4834 case Iop_QShl32x4: size = 2; break;
4835 case Iop_QShl64x2: size = 3; break;
4836 default: vassert(0);
4837 }
4838 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
4839 res, argL, argR, size, True));
4840 return res;
4841 }
4842 case Iop_QSal8x16:
4843 case Iop_QSal16x8:
4844 case Iop_QSal32x4:
4845 case Iop_QSal64x2: {
4846 HReg res = newVRegV(env);
4847 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4848 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4849 UInt size;
4850 switch (e->Iex.Binop.op) {
4851 case Iop_QSal8x16: size = 0; break;
4852 case Iop_QSal16x8: size = 1; break;
4853 case Iop_QSal32x4: size = 2; break;
4854 case Iop_QSal64x2: size = 3; break;
4855 default: vassert(0);
4856 }
4857 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
4858 res, argL, argR, size, True));
4859 return res;
4860 }
4861 case Iop_QShlNsatUU8x16:
4862 case Iop_QShlNsatUU16x8:
4863 case Iop_QShlNsatUU32x4:
4864 case Iop_QShlNsatUU64x2: {
4865 HReg res = newVRegV(env);
4866 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4867 UInt size, imm;
4868 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4869 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4870 vpanic("ARM target supports Iop_QShlNsatUUAxB with constant "
4871 "second argument only\n");
4872 }
4873 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4874 switch (e->Iex.Binop.op) {
4875 case Iop_QShlNsatUU8x16: size = 8 | imm; break;
4876 case Iop_QShlNsatUU16x8: size = 16 | imm; break;
4877 case Iop_QShlNsatUU32x4: size = 32 | imm; break;
4878 case Iop_QShlNsatUU64x2: size = 64 | imm; break;
4879 default: vassert(0);
4880 }
4881 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
4882 res, argL, size, True));
4883 return res;
4884 }
4885 case Iop_QShlNsatSU8x16:
4886 case Iop_QShlNsatSU16x8:
4887 case Iop_QShlNsatSU32x4:
4888 case Iop_QShlNsatSU64x2: {
4889 HReg res = newVRegV(env);
4890 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4891 UInt size, imm;
4892 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4893 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4894 vpanic("ARM target supports Iop_QShlNsatSUAxB with constant "
4895 "second argument only\n");
4896 }
4897 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4898 switch (e->Iex.Binop.op) {
4899 case Iop_QShlNsatSU8x16: size = 8 | imm; break;
4900 case Iop_QShlNsatSU16x8: size = 16 | imm; break;
4901 case Iop_QShlNsatSU32x4: size = 32 | imm; break;
4902 case Iop_QShlNsatSU64x2: size = 64 | imm; break;
4903 default: vassert(0);
4904 }
4905 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
4906 res, argL, size, True));
4907 return res;
4908 }
4909 case Iop_QShlNsatSS8x16:
4910 case Iop_QShlNsatSS16x8:
4911 case Iop_QShlNsatSS32x4:
4912 case Iop_QShlNsatSS64x2: {
4913 HReg res = newVRegV(env);
4914 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4915 UInt size, imm;
4916 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4917 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4918 vpanic("ARM target supports Iop_QShlNsatSSAxB with constant "
4919 "second argument only\n");
4920 }
4921 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4922 switch (e->Iex.Binop.op) {
4923 case Iop_QShlNsatSS8x16: size = 8 | imm; break;
4924 case Iop_QShlNsatSS16x8: size = 16 | imm; break;
4925 case Iop_QShlNsatSS32x4: size = 32 | imm; break;
4926 case Iop_QShlNsatSS64x2: size = 64 | imm; break;
4927 default: vassert(0);
4928 }
4929 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
4930 res, argL, size, True));
4931 return res;
4932 }
4933 case Iop_ShrN8x16:
4934 case Iop_ShrN16x8:
4935 case Iop_ShrN32x4:
4936 case Iop_ShrN64x2: {
4937 HReg res = newVRegV(env);
4938 HReg tmp = newVRegV(env);
4939 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4940 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4941 HReg argR2 = newVRegI(env);
4942 UInt size;
4943 switch (e->Iex.Binop.op) {
4944 case Iop_ShrN8x16: size = 0; break;
4945 case Iop_ShrN16x8: size = 1; break;
4946 case Iop_ShrN32x4: size = 2; break;
4947 case Iop_ShrN64x2: size = 3; break;
4948 default: vassert(0);
4949 }
4950 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4951 addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
4952 tmp, argR2, 0, True));
4953 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4954 res, argL, tmp, size, True));
4955 return res;
4956 }
4957 case Iop_ShlN8x16:
4958 case Iop_ShlN16x8:
4959 case Iop_ShlN32x4:
4960 case Iop_ShlN64x2: {
4961 HReg res = newVRegV(env);
4962 HReg tmp = newVRegV(env);
4963 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4964 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4965 UInt size;
4966 switch (e->Iex.Binop.op) {
4967 case Iop_ShlN8x16: size = 0; break;
4968 case Iop_ShlN16x8: size = 1; break;
4969 case Iop_ShlN32x4: size = 2; break;
4970 case Iop_ShlN64x2: size = 3; break;
4971 default: vassert(0);
4972 }
4973 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
4974 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4975 res, argL, tmp, size, True));
4976 return res;
4977 }
4978 case Iop_SarN8x16:
4979 case Iop_SarN16x8:
4980 case Iop_SarN32x4:
4981 case Iop_SarN64x2: {
4982 HReg res = newVRegV(env);
4983 HReg tmp = newVRegV(env);
4984 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4985 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4986 HReg argR2 = newVRegI(env);
4987 UInt size;
4988 switch (e->Iex.Binop.op) {
4989 case Iop_SarN8x16: size = 0; break;
4990 case Iop_SarN16x8: size = 1; break;
4991 case Iop_SarN32x4: size = 2; break;
4992 case Iop_SarN64x2: size = 3; break;
4993 default: vassert(0);
4994 }
4995 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4996 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
4997 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4998 res, argL, tmp, size, True));
4999 return res;
5000 }
5001 case Iop_CmpGT8Ux16:
5002 case Iop_CmpGT16Ux8:
5003 case Iop_CmpGT32Ux4: {
5004 HReg res = newVRegV(env);
5005 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5006 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5007 UInt size;
5008 switch (e->Iex.Binop.op) {
5009 case Iop_CmpGT8Ux16: size = 0; break;
5010 case Iop_CmpGT16Ux8: size = 1; break;
5011 case Iop_CmpGT32Ux4: size = 2; break;
5012 default: vassert(0);
5013 }
5014 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
5015 res, argL, argR, size, True));
5016 return res;
5017 }
5018 case Iop_CmpGT8Sx16:
5019 case Iop_CmpGT16Sx8:
5020 case Iop_CmpGT32Sx4: {
5021 HReg res = newVRegV(env);
5022 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5023 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5024 UInt size;
5025 switch (e->Iex.Binop.op) {
5026 case Iop_CmpGT8Sx16: size = 0; break;
5027 case Iop_CmpGT16Sx8: size = 1; break;
5028 case Iop_CmpGT32Sx4: size = 2; break;
5029 default: vassert(0);
5030 }
5031 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
5032 res, argL, argR, size, True));
5033 return res;
5034 }
5035 case Iop_CmpEQ8x16:
5036 case Iop_CmpEQ16x8:
5037 case Iop_CmpEQ32x4: {
5038 HReg res = newVRegV(env);
5039 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5040 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5041 UInt size;
5042 switch (e->Iex.Binop.op) {
5043 case Iop_CmpEQ8x16: size = 0; break;
5044 case Iop_CmpEQ16x8: size = 1; break;
5045 case Iop_CmpEQ32x4: size = 2; break;
5046 default: vassert(0);
5047 }
5048 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
5049 res, argL, argR, size, True));
5050 return res;
5051 }
5052 case Iop_Mul8x16:
5053 case Iop_Mul16x8:
5054 case Iop_Mul32x4: {
5055 HReg res = newVRegV(env);
5056 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5057 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5058 UInt size = 0;
5059 switch(e->Iex.Binop.op) {
5060 case Iop_Mul8x16: size = 0; break;
5061 case Iop_Mul16x8: size = 1; break;
5062 case Iop_Mul32x4: size = 2; break;
5063 default: vassert(0);
5064 }
5065 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
5066 res, argL, argR, size, True));
5067 return res;
5068 }
5069 case Iop_Mull8Ux8:
5070 case Iop_Mull16Ux4:
5071 case Iop_Mull32Ux2: {
5072 HReg res = newVRegV(env);
5073 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5074 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5075 UInt size = 0;
5076 switch(e->Iex.Binop.op) {
5077 case Iop_Mull8Ux8: size = 0; break;
5078 case Iop_Mull16Ux4: size = 1; break;
5079 case Iop_Mull32Ux2: size = 2; break;
5080 default: vassert(0);
5081 }
5082 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
5083 res, argL, argR, size, True));
5084 return res;
5085 }
5086
5087 case Iop_Mull8Sx8:
5088 case Iop_Mull16Sx4:
5089 case Iop_Mull32Sx2: {
5090 HReg res = newVRegV(env);
5091 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5092 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5093 UInt size = 0;
5094 switch(e->Iex.Binop.op) {
5095 case Iop_Mull8Sx8: size = 0; break;
5096 case Iop_Mull16Sx4: size = 1; break;
5097 case Iop_Mull32Sx2: size = 2; break;
5098 default: vassert(0);
5099 }
5100 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
5101 res, argL, argR, size, True));
5102 return res;
5103 }
5104
5105 case Iop_QDMulHi16Sx8:
5106 case Iop_QDMulHi32Sx4: {
5107 HReg res = newVRegV(env);
5108 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5109 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5110 UInt size = 0;
5111 switch(e->Iex.Binop.op) {
5112 case Iop_QDMulHi16Sx8: size = 1; break;
5113 case Iop_QDMulHi32Sx4: size = 2; break;
5114 default: vassert(0);
5115 }
5116 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5117 res, argL, argR, size, True));
5118 return res;
5119 }
5120
5121 case Iop_QRDMulHi16Sx8:
5122 case Iop_QRDMulHi32Sx4: {
5123 HReg res = newVRegV(env);
5124 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5125 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5126 UInt size = 0;
5127 switch(e->Iex.Binop.op) {
5128 case Iop_QRDMulHi16Sx8: size = 1; break;
5129 case Iop_QRDMulHi32Sx4: size = 2; break;
5130 default: vassert(0);
5131 }
5132 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5133 res, argL, argR, size, True));
5134 return res;
5135 }
5136
5137 case Iop_QDMull16Sx4:
5138 case Iop_QDMull32Sx2: {
5139 HReg res = newVRegV(env);
5140 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5141 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5142 UInt size = 0;
5143 switch(e->Iex.Binop.op) {
5144 case Iop_QDMull16Sx4: size = 1; break;
5145 case Iop_QDMull32Sx2: size = 2; break;
5146 default: vassert(0);
5147 }
5148 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5149 res, argL, argR, size, True));
5150 return res;
5151 }
5152 case Iop_PolynomialMul8x16: {
5153 HReg res = newVRegV(env);
5154 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5155 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5156 UInt size = 0;
5157 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5158 res, argL, argR, size, True));
5159 return res;
5160 }
5161 case Iop_Max32Fx4: {
5162 HReg res = newVRegV(env);
5163 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5164 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5165 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5166 res, argL, argR, 2, True));
5167 return res;
5168 }
5169 case Iop_Min32Fx4: {
5170 HReg res = newVRegV(env);
5171 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5172 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5173 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5174 res, argL, argR, 2, True));
5175 return res;
5176 }
5177 case Iop_PwMax32Fx4: {
5178 HReg res = newVRegV(env);
5179 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5180 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5181 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5182 res, argL, argR, 2, True));
5183 return res;
5184 }
5185 case Iop_PwMin32Fx4: {
5186 HReg res = newVRegV(env);
5187 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5188 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5189 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5190 res, argL, argR, 2, True));
5191 return res;
5192 }
5193 case Iop_CmpGT32Fx4: {
5194 HReg res = newVRegV(env);
5195 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5196 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5197 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5198 res, argL, argR, 2, True));
5199 return res;
5200 }
5201 case Iop_CmpGE32Fx4: {
5202 HReg res = newVRegV(env);
5203 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5204 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5205 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5206 res, argL, argR, 2, True));
5207 return res;
5208 }
5209 case Iop_CmpEQ32Fx4: {
5210 HReg res = newVRegV(env);
5211 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5212 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5213 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5214 res, argL, argR, 2, True));
5215 return res;
5216 }
5217
5218 case Iop_PolynomialMull8x8: {
5219 HReg res = newVRegV(env);
5220 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5221 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5222 UInt size = 0;
5223 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5224 res, argL, argR, size, True));
5225 return res;
5226 }
5227 case Iop_F32ToFixed32Ux4_RZ:
5228 case Iop_F32ToFixed32Sx4_RZ:
5229 case Iop_Fixed32UToF32x4_RN:
5230 case Iop_Fixed32SToF32x4_RN: {
5231 HReg res = newVRegV(env);
5232 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5233 ARMNeonUnOp op;
5234 UInt imm6;
5235 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5236 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5237 vpanic("ARM supports FP <-> Fixed conversion with constant "
5238 "second argument less than 33 only\n");
5239 }
5240 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5241 vassert(imm6 <= 32 && imm6 > 0);
5242 imm6 = 64 - imm6;
5243 switch(e->Iex.Binop.op) {
5244 case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5245 case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5246 case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5247 case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5248 default: vassert(0);
5249 }
5250 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5251 return res;
5252 }
5253 /*
5254 FIXME remove if not used
5255 case Iop_VDup8x16:
5256 case Iop_VDup16x8:
5257 case Iop_VDup32x4: {
5258 HReg res = newVRegV(env);
5259 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5260 UInt imm4;
5261 UInt index;
5262 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5263 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5264 vpanic("ARM supports Iop_VDup with constant "
5265 "second argument less than 16 only\n");
5266 }
5267 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5268 switch(e->Iex.Binop.op) {
5269 case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5270 case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5271 case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5272 default: vassert(0);
5273 }
5274 if (imm4 >= 16) {
5275 vpanic("ARM supports Iop_VDup with constant "
5276 "second argument less than 16 only\n");
5277 }
5278 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5279 res, argL, imm4, True));
5280 return res;
5281 }
5282 */
5283 case Iop_PwAdd8x16:
5284 case Iop_PwAdd16x8:
5285 case Iop_PwAdd32x4: {
5286 HReg res = newVRegV(env);
5287 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5288 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5289 UInt size = 0;
5290 switch(e->Iex.Binop.op) {
5291 case Iop_PwAdd8x16: size = 0; break;
5292 case Iop_PwAdd16x8: size = 1; break;
5293 case Iop_PwAdd32x4: size = 2; break;
5294 default: vassert(0);
5295 }
5296 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5297 res, argL, argR, size, True));
5298 return res;
5299 }
5300 /* ... */
5301 default:
5302 break;
5303 }
5304 }
5305
5306 if (e->tag == Iex_Triop) {
5307 IRTriop *triop = e->Iex.Triop.details;
5308
5309 switch (triop->op) {
5310 case Iop_SliceV128: {
5311 HReg res = newVRegV(env);
5312 HReg argL = iselNeonExpr(env, triop->arg2);
5313 HReg argR = iselNeonExpr(env, triop->arg1);
5314 UInt imm4;
5315 if (triop->arg3->tag != Iex_Const ||
5316 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
5317 vpanic("ARM target supports Iop_ExtractV128 with constant "
5318 "third argument less than 16 only\n");
5319 }
5320 imm4 = triop->arg3->Iex.Const.con->Ico.U8;
5321 if (imm4 >= 16) {
5322 vpanic("ARM target supports Iop_ExtractV128 with constant "
5323 "third argument less than 16 only\n");
5324 }
5325 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5326 res, argL, argR, imm4, True));
5327 return res;
5328 }
5329 case Iop_Mul32Fx4:
5330 case Iop_Sub32Fx4:
5331 case Iop_Add32Fx4: {
5332 HReg res = newVRegV(env);
5333 HReg argL = iselNeonExpr(env, triop->arg2);
5334 HReg argR = iselNeonExpr(env, triop->arg3);
5335 UInt size = 0;
5336 ARMNeonBinOp op = ARMneon_INVALID;
5337 switch (triop->op) {
5338 case Iop_Mul32Fx4: op = ARMneon_VMULFP; break;
5339 case Iop_Sub32Fx4: op = ARMneon_VSUBFP; break;
5340 case Iop_Add32Fx4: op = ARMneon_VADDFP; break;
5341 default: vassert(0);
5342 }
5343 addInstr(env, ARMInstr_NBinary(op, res, argL, argR, size, True));
5344 return res;
5345 }
5346 default:
5347 break;
5348 }
5349 }
5350
5351 if (e->tag == Iex_ITE) { // VFD
5352 ARMCondCode cc;
5353 HReg r1 = iselNeonExpr(env, e->Iex.ITE.iftrue);
5354 HReg r0 = iselNeonExpr(env, e->Iex.ITE.iffalse);
5355 HReg dst = newVRegV(env);
5356 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, r1, 4, True));
5357 cc = iselCondCode(env, e->Iex.ITE.cond);
5358 addInstr(env, ARMInstr_NCMovQ(cc ^ 1, dst, r0));
5359 return dst;
5360 }
5361
5362 neon_expr_bad:
5363 ppIRExpr(e);
5364 vpanic("iselNeonExpr_wrk");
5365 }
5366
5367 /*---------------------------------------------------------*/
5368 /*--- ISEL: Floating point expressions (64 bit) ---*/
5369 /*---------------------------------------------------------*/
5370
5371 /* Compute a 64-bit floating point value into a register, the identity
5372 of which is returned. As with iselIntExpr_R, the reg may be either
5373 real or virtual; in any case it must not be changed by subsequent
5374 code emitted by the caller. */
5375
iselDblExpr(ISelEnv * env,IRExpr * e)5376 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5377 {
5378 HReg r = iselDblExpr_wrk( env, e );
5379 # if 0
5380 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5381 # endif
5382 vassert(hregClass(r) == HRcFlt64);
5383 vassert(hregIsVirtual(r));
5384 return r;
5385 }
5386
5387 /* DO NOT CALL THIS DIRECTLY */
iselDblExpr_wrk(ISelEnv * env,IRExpr * e)5388 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5389 {
5390 IRType ty = typeOfIRExpr(env->type_env,e);
5391 vassert(e);
5392 vassert(ty == Ity_F64);
5393
5394 if (e->tag == Iex_RdTmp) {
5395 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5396 }
5397
5398 if (e->tag == Iex_Const) {
5399 /* Just handle the zero case. */
5400 IRConst* con = e->Iex.Const.con;
5401 if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
5402 HReg z32 = newVRegI(env);
5403 HReg dst = newVRegD(env);
5404 addInstr(env, ARMInstr_Imm32(z32, 0));
5405 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
5406 return dst;
5407 }
5408 }
5409
5410 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5411 ARMAModeV* am;
5412 HReg res = newVRegD(env);
5413 vassert(e->Iex.Load.ty == Ity_F64);
5414 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5415 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5416 return res;
5417 }
5418
5419 if (e->tag == Iex_Get) {
5420 // XXX This won't work if offset > 1020 or is not 0 % 4.
5421 // In which case we'll have to generate more longwinded code.
5422 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5423 HReg res = newVRegD(env);
5424 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5425 return res;
5426 }
5427
5428 if (e->tag == Iex_Unop) {
5429 switch (e->Iex.Unop.op) {
5430 case Iop_ReinterpI64asF64: {
5431 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5432 return iselNeon64Expr(env, e->Iex.Unop.arg);
5433 } else {
5434 HReg srcHi, srcLo;
5435 HReg dst = newVRegD(env);
5436 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
5437 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
5438 return dst;
5439 }
5440 }
5441 case Iop_NegF64: {
5442 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5443 HReg dst = newVRegD(env);
5444 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
5445 return dst;
5446 }
5447 case Iop_AbsF64: {
5448 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5449 HReg dst = newVRegD(env);
5450 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
5451 return dst;
5452 }
5453 case Iop_F32toF64: {
5454 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5455 HReg dst = newVRegD(env);
5456 addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
5457 return dst;
5458 }
5459 case Iop_I32UtoF64:
5460 case Iop_I32StoF64: {
5461 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5462 HReg f32 = newVRegF(env);
5463 HReg dst = newVRegD(env);
5464 Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
5465 /* VMOV f32, src */
5466 addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
5467 /* FSITOD dst, f32 */
5468 addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
5469 dst, f32));
5470 return dst;
5471 }
5472 default:
5473 break;
5474 }
5475 }
5476
5477 if (e->tag == Iex_Binop) {
5478 switch (e->Iex.Binop.op) {
5479 case Iop_SqrtF64: {
5480 /* first arg is rounding mode; we ignore it. */
5481 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5482 HReg dst = newVRegD(env);
5483 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
5484 return dst;
5485 }
5486 default:
5487 break;
5488 }
5489 }
5490
5491 if (e->tag == Iex_Triop) {
5492 IRTriop *triop = e->Iex.Triop.details;
5493
5494 switch (triop->op) {
5495 case Iop_DivF64:
5496 case Iop_MulF64:
5497 case Iop_AddF64:
5498 case Iop_SubF64: {
5499 ARMVfpOp op = 0; /*INVALID*/
5500 HReg argL = iselDblExpr(env, triop->arg2);
5501 HReg argR = iselDblExpr(env, triop->arg3);
5502 HReg dst = newVRegD(env);
5503 switch (triop->op) {
5504 case Iop_DivF64: op = ARMvfp_DIV; break;
5505 case Iop_MulF64: op = ARMvfp_MUL; break;
5506 case Iop_AddF64: op = ARMvfp_ADD; break;
5507 case Iop_SubF64: op = ARMvfp_SUB; break;
5508 default: vassert(0);
5509 }
5510 addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
5511 return dst;
5512 }
5513 default:
5514 break;
5515 }
5516 }
5517
5518 if (e->tag == Iex_ITE) { // VFD
5519 if (ty == Ity_F64
5520 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
5521 HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
5522 HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
5523 HReg dst = newVRegD(env);
5524 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, r1));
5525 ARMCondCode cc = iselCondCode(env, e->Iex.ITE.cond);
5526 addInstr(env, ARMInstr_VCMovD(cc ^ 1, dst, r0));
5527 return dst;
5528 }
5529 }
5530
5531 ppIRExpr(e);
5532 vpanic("iselDblExpr_wrk");
5533 }
5534
5535
5536 /*---------------------------------------------------------*/
5537 /*--- ISEL: Floating point expressions (32 bit) ---*/
5538 /*---------------------------------------------------------*/
5539
5540 /* Compute a 32-bit floating point value into a register, the identity
5541 of which is returned. As with iselIntExpr_R, the reg may be either
5542 real or virtual; in any case it must not be changed by subsequent
5543 code emitted by the caller. */
5544
iselFltExpr(ISelEnv * env,IRExpr * e)5545 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
5546 {
5547 HReg r = iselFltExpr_wrk( env, e );
5548 # if 0
5549 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5550 # endif
5551 vassert(hregClass(r) == HRcFlt32);
5552 vassert(hregIsVirtual(r));
5553 return r;
5554 }
5555
5556 /* DO NOT CALL THIS DIRECTLY */
iselFltExpr_wrk(ISelEnv * env,IRExpr * e)5557 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
5558 {
5559 IRType ty = typeOfIRExpr(env->type_env,e);
5560 vassert(e);
5561 vassert(ty == Ity_F32);
5562
5563 if (e->tag == Iex_RdTmp) {
5564 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5565 }
5566
5567 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5568 ARMAModeV* am;
5569 HReg res = newVRegF(env);
5570 vassert(e->Iex.Load.ty == Ity_F32);
5571 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5572 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5573 return res;
5574 }
5575
5576 if (e->tag == Iex_Get) {
5577 // XXX This won't work if offset > 1020 or is not 0 % 4.
5578 // In which case we'll have to generate more longwinded code.
5579 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5580 HReg res = newVRegF(env);
5581 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5582 return res;
5583 }
5584
5585 if (e->tag == Iex_Unop) {
5586 switch (e->Iex.Unop.op) {
5587 case Iop_ReinterpI32asF32: {
5588 HReg dst = newVRegF(env);
5589 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5590 addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
5591 return dst;
5592 }
5593 case Iop_NegF32: {
5594 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5595 HReg dst = newVRegF(env);
5596 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
5597 return dst;
5598 }
5599 case Iop_AbsF32: {
5600 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5601 HReg dst = newVRegF(env);
5602 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
5603 return dst;
5604 }
5605 default:
5606 break;
5607 }
5608 }
5609
5610 if (e->tag == Iex_Binop) {
5611 switch (e->Iex.Binop.op) {
5612 case Iop_SqrtF32: {
5613 /* first arg is rounding mode; we ignore it. */
5614 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5615 HReg dst = newVRegF(env);
5616 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
5617 return dst;
5618 }
5619 case Iop_F64toF32: {
5620 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
5621 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5622 HReg valS = newVRegF(env);
5623 /* FCVTSD valS, valD */
5624 addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
5625 set_VFP_rounding_default(env);
5626 return valS;
5627 }
5628 default:
5629 break;
5630 }
5631 }
5632
5633 if (e->tag == Iex_Triop) {
5634 IRTriop *triop = e->Iex.Triop.details;
5635
5636 switch (triop->op) {
5637 case Iop_DivF32:
5638 case Iop_MulF32:
5639 case Iop_AddF32:
5640 case Iop_SubF32: {
5641 ARMVfpOp op = 0; /*INVALID*/
5642 HReg argL = iselFltExpr(env, triop->arg2);
5643 HReg argR = iselFltExpr(env, triop->arg3);
5644 HReg dst = newVRegF(env);
5645 switch (triop->op) {
5646 case Iop_DivF32: op = ARMvfp_DIV; break;
5647 case Iop_MulF32: op = ARMvfp_MUL; break;
5648 case Iop_AddF32: op = ARMvfp_ADD; break;
5649 case Iop_SubF32: op = ARMvfp_SUB; break;
5650 default: vassert(0);
5651 }
5652 addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
5653 return dst;
5654 }
5655 default:
5656 break;
5657 }
5658 }
5659
5660 if (e->tag == Iex_ITE) { // VFD
5661 if (ty == Ity_F32
5662 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
5663 ARMCondCode cc;
5664 HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue);
5665 HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse);
5666 HReg dst = newVRegF(env);
5667 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, r1));
5668 cc = iselCondCode(env, e->Iex.ITE.cond);
5669 addInstr(env, ARMInstr_VCMovS(cc ^ 1, dst, r0));
5670 return dst;
5671 }
5672 }
5673
5674 ppIRExpr(e);
5675 vpanic("iselFltExpr_wrk");
5676 }
5677
5678
5679 /*---------------------------------------------------------*/
5680 /*--- ISEL: Statements ---*/
5681 /*---------------------------------------------------------*/
5682
iselStmt(ISelEnv * env,IRStmt * stmt)5683 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
5684 {
5685 if (vex_traceflags & VEX_TRACE_VCODE) {
5686 vex_printf("\n-- ");
5687 ppIRStmt(stmt);
5688 vex_printf("\n");
5689 }
5690 switch (stmt->tag) {
5691
5692 /* --------- STORE --------- */
5693 /* little-endian write to memory */
5694 case Ist_Store: {
5695 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
5696 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
5697 IREndness end = stmt->Ist.Store.end;
5698
5699 if (tya != Ity_I32 || end != Iend_LE)
5700 goto stmt_fail;
5701
5702 if (tyd == Ity_I32) {
5703 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5704 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5705 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am));
5706 return;
5707 }
5708 if (tyd == Ity_I16) {
5709 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5710 ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
5711 addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
5712 False/*!isLoad*/,
5713 False/*!isSignedLoad*/, rD, am));
5714 return;
5715 }
5716 if (tyd == Ity_I8) {
5717 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5718 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5719 addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, False/*!isLoad*/, rD, am));
5720 return;
5721 }
5722 if (tyd == Ity_I64) {
5723 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5724 HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
5725 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5726 addInstr(env, ARMInstr_NLdStD(False, dD, am));
5727 } else {
5728 HReg rDhi, rDlo, rA;
5729 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
5730 rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
5731 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDhi,
5732 ARMAMode1_RI(rA,4)));
5733 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDlo,
5734 ARMAMode1_RI(rA,0)));
5735 }
5736 return;
5737 }
5738 if (tyd == Ity_F64) {
5739 HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
5740 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5741 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
5742 return;
5743 }
5744 if (tyd == Ity_F32) {
5745 HReg fD = iselFltExpr(env, stmt->Ist.Store.data);
5746 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5747 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
5748 return;
5749 }
5750 if (tyd == Ity_V128) {
5751 HReg qD = iselNeonExpr(env, stmt->Ist.Store.data);
5752 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5753 addInstr(env, ARMInstr_NLdStQ(False, qD, am));
5754 return;
5755 }
5756
5757 break;
5758 }
5759
5760 /* --------- CONDITIONAL STORE --------- */
5761 /* conditional little-endian write to memory */
5762 case Ist_StoreG: {
5763 IRStoreG* sg = stmt->Ist.StoreG.details;
5764 IRType tya = typeOfIRExpr(env->type_env, sg->addr);
5765 IRType tyd = typeOfIRExpr(env->type_env, sg->data);
5766 IREndness end = sg->end;
5767
5768 if (tya != Ity_I32 || end != Iend_LE)
5769 goto stmt_fail;
5770
5771 switch (tyd) {
5772 case Ity_I8:
5773 case Ity_I32: {
5774 HReg rD = iselIntExpr_R(env, sg->data);
5775 ARMAMode1* am = iselIntExpr_AMode1(env, sg->addr);
5776 ARMCondCode cc = iselCondCode(env, sg->guard);
5777 addInstr(env, (tyd == Ity_I32 ? ARMInstr_LdSt32 : ARMInstr_LdSt8U)
5778 (cc, False/*!isLoad*/, rD, am));
5779 return;
5780 }
5781 case Ity_I16: {
5782 HReg rD = iselIntExpr_R(env, sg->data);
5783 ARMAMode2* am = iselIntExpr_AMode2(env, sg->addr);
5784 ARMCondCode cc = iselCondCode(env, sg->guard);
5785 addInstr(env, ARMInstr_LdSt16(cc,
5786 False/*!isLoad*/,
5787 False/*!isSignedLoad*/, rD, am));
5788 return;
5789 }
5790 default:
5791 break;
5792 }
5793 break;
5794 }
5795
5796 /* --------- CONDITIONAL LOAD --------- */
5797 /* conditional little-endian load from memory */
5798 case Ist_LoadG: {
5799 IRLoadG* lg = stmt->Ist.LoadG.details;
5800 IRType tya = typeOfIRExpr(env->type_env, lg->addr);
5801 IREndness end = lg->end;
5802
5803 if (tya != Ity_I32 || end != Iend_LE)
5804 goto stmt_fail;
5805
5806 switch (lg->cvt) {
5807 case ILGop_8Uto32:
5808 case ILGop_Ident32: {
5809 HReg rAlt = iselIntExpr_R(env, lg->alt);
5810 ARMAMode1* am = iselIntExpr_AMode1(env, lg->addr);
5811 HReg rD = lookupIRTemp(env, lg->dst);
5812 addInstr(env, mk_iMOVds_RR(rD, rAlt));
5813 ARMCondCode cc = iselCondCode(env, lg->guard);
5814 addInstr(env, (lg->cvt == ILGop_Ident32 ? ARMInstr_LdSt32
5815 : ARMInstr_LdSt8U)
5816 (cc, True/*isLoad*/, rD, am));
5817 return;
5818 }
5819 case ILGop_16Sto32:
5820 case ILGop_16Uto32:
5821 case ILGop_8Sto32: {
5822 HReg rAlt = iselIntExpr_R(env, lg->alt);
5823 ARMAMode2* am = iselIntExpr_AMode2(env, lg->addr);
5824 HReg rD = lookupIRTemp(env, lg->dst);
5825 addInstr(env, mk_iMOVds_RR(rD, rAlt));
5826 ARMCondCode cc = iselCondCode(env, lg->guard);
5827 if (lg->cvt == ILGop_8Sto32) {
5828 addInstr(env, ARMInstr_Ld8S(cc, rD, am));
5829 } else {
5830 vassert(lg->cvt == ILGop_16Sto32 || lg->cvt == ILGop_16Uto32);
5831 Bool sx = lg->cvt == ILGop_16Sto32;
5832 addInstr(env, ARMInstr_LdSt16(cc, True/*isLoad*/, sx, rD, am));
5833 }
5834 return;
5835 }
5836 default:
5837 break;
5838 }
5839 break;
5840 }
5841
5842 /* --------- PUT --------- */
5843 /* write guest state, fixed offset */
5844 case Ist_Put: {
5845 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
5846
5847 if (tyd == Ity_I32) {
5848 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
5849 ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
5850 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am));
5851 return;
5852 }
5853 if (tyd == Ity_I64) {
5854 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5855 HReg addr = newVRegI(env);
5856 HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
5857 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5858 stmt->Ist.Put.offset));
5859 addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
5860 } else {
5861 HReg rDhi, rDlo;
5862 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
5863 stmt->Ist.Put.offset + 0);
5864 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
5865 stmt->Ist.Put.offset + 4);
5866 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
5867 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
5868 rDhi, am4));
5869 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
5870 rDlo, am0));
5871 }
5872 return;
5873 }
5874 if (tyd == Ity_F64) {
5875 // XXX This won't work if offset > 1020 or is not 0 % 4.
5876 // In which case we'll have to generate more longwinded code.
5877 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5878 HReg rD = iselDblExpr(env, stmt->Ist.Put.data);
5879 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
5880 return;
5881 }
5882 if (tyd == Ity_F32) {
5883 // XXX This won't work if offset > 1020 or is not 0 % 4.
5884 // In which case we'll have to generate more longwinded code.
5885 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5886 HReg rD = iselFltExpr(env, stmt->Ist.Put.data);
5887 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
5888 return;
5889 }
5890 if (tyd == Ity_V128) {
5891 HReg addr = newVRegI(env);
5892 HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
5893 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5894 stmt->Ist.Put.offset));
5895 addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
5896 return;
5897 }
5898 break;
5899 }
5900
5901 /* --------- TMP --------- */
5902 /* assign value to temporary */
5903 case Ist_WrTmp: {
5904 IRTemp tmp = stmt->Ist.WrTmp.tmp;
5905 IRType ty = typeOfIRTemp(env->type_env, tmp);
5906
5907 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
5908 ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
5909 env, stmt->Ist.WrTmp.data);
5910 HReg dst = lookupIRTemp(env, tmp);
5911 addInstr(env, ARMInstr_Mov(dst,ri84));
5912 return;
5913 }
5914 if (ty == Ity_I1) {
5915 /* Here, we are generating a I1 value into a 32 bit register.
5916 Make sure the value in the register is only zero or one,
5917 but no other. This allows optimisation of the
5918 1Uto32(tmp:I1) case, by making it simply a copy of the
5919 register holding 'tmp'. The point being that the value in
5920 the register holding 'tmp' can only have been created
5921 here. */
5922 HReg dst = lookupIRTemp(env, tmp);
5923 ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
5924 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
5925 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
5926 return;
5927 }
5928 if (ty == Ity_I64) {
5929 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5930 HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
5931 HReg dst = lookupIRTemp(env, tmp);
5932 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
5933 } else {
5934 HReg rHi, rLo, dstHi, dstLo;
5935 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
5936 lookupIRTemp64( &dstHi, &dstLo, env, tmp);
5937 addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
5938 addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
5939 }
5940 return;
5941 }
5942 if (ty == Ity_F64) {
5943 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
5944 HReg dst = lookupIRTemp(env, tmp);
5945 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
5946 return;
5947 }
5948 if (ty == Ity_F32) {
5949 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
5950 HReg dst = lookupIRTemp(env, tmp);
5951 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
5952 return;
5953 }
5954 if (ty == Ity_V128) {
5955 HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
5956 HReg dst = lookupIRTemp(env, tmp);
5957 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
5958 return;
5959 }
5960 break;
5961 }
5962
5963 /* --------- Call to DIRTY helper --------- */
5964 /* call complex ("dirty") helper function */
5965 case Ist_Dirty: {
5966 IRDirty* d = stmt->Ist.Dirty.details;
5967
5968 /* Figure out the return type, if any. */
5969 IRType retty = Ity_INVALID;
5970 if (d->tmp != IRTemp_INVALID)
5971 retty = typeOfIRTemp(env->type_env, d->tmp);
5972
5973 Bool retty_ok = False;
5974 switch (retty) {
5975 case Ity_INVALID: /* function doesn't return anything */
5976 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
5977 //case Ity_V128: //ATC
5978 retty_ok = True; break;
5979 default:
5980 break;
5981 }
5982 if (!retty_ok)
5983 break; /* will go to stmt_fail: */
5984
5985 /* Marshal args, do the call, and set the return value to 0x555..555
5986 if this is a conditional call that returns a value and the
5987 call is skipped. */
5988 UInt addToSp = 0;
5989 RetLoc rloc = mk_RetLoc_INVALID();
5990 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
5991 vassert(is_sane_RetLoc(rloc));
5992
5993 /* Now figure out what to do with the returned value, if any. */
5994 switch (retty) {
5995 case Ity_INVALID: {
5996 /* No return value. Nothing to do. */
5997 vassert(d->tmp == IRTemp_INVALID);
5998 vassert(rloc.pri == RLPri_None);
5999 vassert(addToSp == 0);
6000 return;
6001 }
6002 case Ity_I64: {
6003 vassert(rloc.pri == RLPri_2Int);
6004 vassert(addToSp == 0);
6005 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6006 HReg tmp = lookupIRTemp(env, d->tmp);
6007 addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
6008 hregARM_R0()));
6009 } else {
6010 HReg dstHi, dstLo;
6011 /* The returned value is in r1:r0. Park it in the
6012 register-pair associated with tmp. */
6013 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
6014 addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
6015 addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
6016 }
6017 return;
6018 }
6019 case Ity_I32: case Ity_I16: case Ity_I8: {
6020 vassert(rloc.pri == RLPri_Int);
6021 vassert(addToSp == 0);
6022 /* The returned value is in r0. Park it in the register
6023 associated with tmp. */
6024 HReg dst = lookupIRTemp(env, d->tmp);
6025 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
6026 return;
6027 }
6028 case Ity_V128: {
6029 vassert(0); // ATC. The code that this produces really
6030 // needs to be looked at, to verify correctness.
6031 // I don't think this can ever happen though, since the
6032 // ARM front end never produces 128-bit loads/stores.
6033 // Hence the following is mostly theoretical.
6034 /* The returned value is on the stack, and *retloc tells
6035 us where. Fish it off the stack and then move the
6036 stack pointer upwards to clear it, as directed by
6037 doHelperCall. */
6038 vassert(rloc.pri == RLPri_V128SpRel);
6039 vassert(rloc.spOff < 256); // else ARMRI84_I84(_,0) can't encode it
6040 vassert(addToSp >= 16);
6041 vassert(addToSp < 256); // ditto reason as for rloc.spOff
6042 HReg dst = lookupIRTemp(env, d->tmp);
6043 HReg tmp = newVRegI(env);
6044 HReg r13 = hregARM_R13(); // sp
6045 addInstr(env, ARMInstr_Alu(ARMalu_ADD,
6046 tmp, r13, ARMRI84_I84(rloc.spOff,0)));
6047 ARMAModeN* am = mkARMAModeN_R(tmp);
6048 addInstr(env, ARMInstr_NLdStQ(True/*load*/, dst, am));
6049 addInstr(env, ARMInstr_Alu(ARMalu_ADD,
6050 r13, r13, ARMRI84_I84(addToSp,0)));
6051 return;
6052 }
6053 default:
6054 /*NOTREACHED*/
6055 vassert(0);
6056 }
6057 break;
6058 }
6059
6060 /* --------- Load Linked and Store Conditional --------- */
6061 case Ist_LLSC: {
6062 if (stmt->Ist.LLSC.storedata == NULL) {
6063 /* LL */
6064 IRTemp res = stmt->Ist.LLSC.result;
6065 IRType ty = typeOfIRTemp(env->type_env, res);
6066 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
6067 Int szB = 0;
6068 HReg r_dst = lookupIRTemp(env, res);
6069 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6070 switch (ty) {
6071 case Ity_I8: szB = 1; break;
6072 case Ity_I16: szB = 2; break;
6073 case Ity_I32: szB = 4; break;
6074 default: vassert(0);
6075 }
6076 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
6077 addInstr(env, ARMInstr_LdrEX(szB));
6078 addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
6079 return;
6080 }
6081 if (ty == Ity_I64) {
6082 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6083 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
6084 addInstr(env, ARMInstr_LdrEX(8));
6085 /* Result is in r3:r2. On a non-NEON capable CPU, we must
6086 move it into a result register pair. On a NEON capable
6087 CPU, the result register will be a 64 bit NEON
6088 register, so we must move it there instead. */
6089 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6090 HReg dst = lookupIRTemp(env, res);
6091 addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
6092 hregARM_R2()));
6093 } else {
6094 HReg r_dst_hi, r_dst_lo;
6095 lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
6096 addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
6097 addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
6098 }
6099 return;
6100 }
6101 /*NOTREACHED*/
6102 vassert(0);
6103 } else {
6104 /* SC */
6105 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
6106 if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
6107 Int szB = 0;
6108 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
6109 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6110 switch (tyd) {
6111 case Ity_I8: szB = 1; break;
6112 case Ity_I16: szB = 2; break;
6113 case Ity_I32: szB = 4; break;
6114 default: vassert(0);
6115 }
6116 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
6117 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
6118 addInstr(env, ARMInstr_StrEX(szB));
6119 } else {
6120 vassert(tyd == Ity_I64);
6121 /* This is really ugly. There is no is/is-not NEON
6122 decision akin to the case for LL, because iselInt64Expr
6123 fudges this for us, and always gets the result into two
6124 GPRs even if this means moving it from a NEON
6125 register. */
6126 HReg rDhi, rDlo;
6127 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
6128 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6129 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
6130 addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
6131 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
6132 addInstr(env, ARMInstr_StrEX(8));
6133 }
6134 /* now r0 is 1 if failed, 0 if success. Change to IR
6135 conventions (0 is fail, 1 is success). Also transfer
6136 result to r_res. */
6137 IRTemp res = stmt->Ist.LLSC.result;
6138 IRType ty = typeOfIRTemp(env->type_env, res);
6139 HReg r_res = lookupIRTemp(env, res);
6140 ARMRI84* one = ARMRI84_I84(1,0);
6141 vassert(ty == Ity_I1);
6142 addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
6143 /* And be conservative -- mask off all but the lowest bit */
6144 addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
6145 return;
6146 }
6147 break;
6148 }
6149
6150 /* --------- MEM FENCE --------- */
6151 case Ist_MBE:
6152 switch (stmt->Ist.MBE.event) {
6153 case Imbe_Fence:
6154 addInstr(env, ARMInstr_MFence());
6155 return;
6156 case Imbe_CancelReservation:
6157 addInstr(env, ARMInstr_CLREX());
6158 return;
6159 default:
6160 break;
6161 }
6162 break;
6163
6164 /* --------- INSTR MARK --------- */
6165 /* Doesn't generate any executable code ... */
6166 case Ist_IMark:
6167 return;
6168
6169 /* --------- NO-OP --------- */
6170 case Ist_NoOp:
6171 return;
6172
6173 /* --------- EXIT --------- */
6174 case Ist_Exit: {
6175 if (stmt->Ist.Exit.dst->tag != Ico_U32)
6176 vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
6177
6178 ARMCondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
6179 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(),
6180 stmt->Ist.Exit.offsIP);
6181
6182 /* Case: boring transfer to known address */
6183 if (stmt->Ist.Exit.jk == Ijk_Boring
6184 || stmt->Ist.Exit.jk == Ijk_Call
6185 || stmt->Ist.Exit.jk == Ijk_Ret) {
6186 if (env->chainingAllowed) {
6187 /* .. almost always true .. */
6188 /* Skip the event check at the dst if this is a forwards
6189 edge. */
6190 Bool toFastEP
6191 = stmt->Ist.Exit.dst->Ico.U32 > env->max_ga;
6192 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
6193 addInstr(env, ARMInstr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
6194 amR15T, cc, toFastEP));
6195 } else {
6196 /* .. very occasionally .. */
6197 /* We can't use chaining, so ask for an assisted transfer,
6198 as that's the only alternative that is allowable. */
6199 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6200 addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, Ijk_Boring));
6201 }
6202 return;
6203 }
6204
6205 /* Case: assisted transfer to arbitrary address */
6206 switch (stmt->Ist.Exit.jk) {
6207 /* Keep this list in sync with that in iselNext below */
6208 case Ijk_ClientReq:
6209 case Ijk_NoDecode:
6210 case Ijk_NoRedir:
6211 case Ijk_Sys_syscall:
6212 case Ijk_InvalICache:
6213 case Ijk_Yield:
6214 {
6215 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6216 addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
6217 stmt->Ist.Exit.jk));
6218 return;
6219 }
6220 default:
6221 break;
6222 }
6223
6224 /* Do we ever expect to see any other kind? */
6225 goto stmt_fail;
6226 }
6227
6228 default: break;
6229 }
6230 stmt_fail:
6231 ppIRStmt(stmt);
6232 vpanic("iselStmt");
6233 }
6234
6235
6236 /*---------------------------------------------------------*/
6237 /*--- ISEL: Basic block terminators (Nexts) ---*/
6238 /*---------------------------------------------------------*/
6239
iselNext(ISelEnv * env,IRExpr * next,IRJumpKind jk,Int offsIP)6240 static void iselNext ( ISelEnv* env,
6241 IRExpr* next, IRJumpKind jk, Int offsIP )
6242 {
6243 if (vex_traceflags & VEX_TRACE_VCODE) {
6244 vex_printf( "\n-- PUT(%d) = ", offsIP);
6245 ppIRExpr( next );
6246 vex_printf( "; exit-");
6247 ppIRJumpKind(jk);
6248 vex_printf( "\n");
6249 }
6250
6251 /* Case: boring transfer to known address */
6252 if (next->tag == Iex_Const) {
6253 IRConst* cdst = next->Iex.Const.con;
6254 vassert(cdst->tag == Ico_U32);
6255 if (jk == Ijk_Boring || jk == Ijk_Call) {
6256 /* Boring transfer to known address */
6257 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6258 if (env->chainingAllowed) {
6259 /* .. almost always true .. */
6260 /* Skip the event check at the dst if this is a forwards
6261 edge. */
6262 Bool toFastEP
6263 = cdst->Ico.U32 > env->max_ga;
6264 if (0) vex_printf("%s", toFastEP ? "X" : ".");
6265 addInstr(env, ARMInstr_XDirect(cdst->Ico.U32,
6266 amR15T, ARMcc_AL,
6267 toFastEP));
6268 } else {
6269 /* .. very occasionally .. */
6270 /* We can't use chaining, so ask for an assisted transfer,
6271 as that's the only alternative that is allowable. */
6272 HReg r = iselIntExpr_R(env, next);
6273 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6274 Ijk_Boring));
6275 }
6276 return;
6277 }
6278 }
6279
6280 /* Case: call/return (==boring) transfer to any address */
6281 switch (jk) {
6282 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
6283 HReg r = iselIntExpr_R(env, next);
6284 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6285 if (env->chainingAllowed) {
6286 addInstr(env, ARMInstr_XIndir(r, amR15T, ARMcc_AL));
6287 } else {
6288 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6289 Ijk_Boring));
6290 }
6291 return;
6292 }
6293 default:
6294 break;
6295 }
6296
6297 /* Case: assisted transfer to arbitrary address */
6298 switch (jk) {
6299 /* Keep this list in sync with that for Ist_Exit above */
6300 case Ijk_ClientReq:
6301 case Ijk_NoDecode:
6302 case Ijk_NoRedir:
6303 case Ijk_Sys_syscall:
6304 case Ijk_InvalICache:
6305 case Ijk_Yield:
6306 {
6307 HReg r = iselIntExpr_R(env, next);
6308 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6309 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, jk));
6310 return;
6311 }
6312 default:
6313 break;
6314 }
6315
6316 vex_printf( "\n-- PUT(%d) = ", offsIP);
6317 ppIRExpr( next );
6318 vex_printf( "; exit-");
6319 ppIRJumpKind(jk);
6320 vex_printf( "\n");
6321 vassert(0); // are we expecting any other kind?
6322 }
6323
6324
6325 /*---------------------------------------------------------*/
6326 /*--- Insn selector top-level ---*/
6327 /*---------------------------------------------------------*/
6328
6329 /* Translate an entire SB to arm code. */
6330
iselSB_ARM(const IRSB * bb,VexArch arch_host,const VexArchInfo * archinfo_host,const VexAbiInfo * vbi,Int offs_Host_EvC_Counter,Int offs_Host_EvC_FailAddr,Bool chainingAllowed,Bool addProfInc,Addr max_ga)6331 HInstrArray* iselSB_ARM ( const IRSB* bb,
6332 VexArch arch_host,
6333 const VexArchInfo* archinfo_host,
6334 const VexAbiInfo* vbi/*UNUSED*/,
6335 Int offs_Host_EvC_Counter,
6336 Int offs_Host_EvC_FailAddr,
6337 Bool chainingAllowed,
6338 Bool addProfInc,
6339 Addr max_ga )
6340 {
6341 Int i, j;
6342 HReg hreg, hregHI;
6343 ISelEnv* env;
6344 UInt hwcaps_host = archinfo_host->hwcaps;
6345 ARMAMode1 *amCounter, *amFailAddr;
6346
6347 /* sanity ... */
6348 vassert(arch_host == VexArchARM);
6349
6350 /* Check that the host's endianness is as expected. */
6351 vassert(archinfo_host->endness == VexEndnessLE);
6352
6353 /* guard against unexpected space regressions */
6354 vassert(sizeof(ARMInstr) <= 28);
6355
6356 /* hwcaps should not change from one ISEL call to another. */
6357 arm_hwcaps = hwcaps_host; // JRS 2012 Mar 31: FIXME (RM)
6358
6359 /* Make up an initial environment to use. */
6360 env = LibVEX_Alloc_inline(sizeof(ISelEnv));
6361 env->vreg_ctr = 0;
6362
6363 /* Set up output code array. */
6364 env->code = newHInstrArray();
6365
6366 /* Copy BB's type env. */
6367 env->type_env = bb->tyenv;
6368
6369 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
6370 change as we go along. */
6371 env->n_vregmap = bb->tyenv->types_used;
6372 env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
6373 env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
6374
6375 /* and finally ... */
6376 env->chainingAllowed = chainingAllowed;
6377 env->hwcaps = hwcaps_host;
6378 env->max_ga = max_ga;
6379
6380 /* For each IR temporary, allocate a suitably-kinded virtual
6381 register. */
6382 j = 0;
6383 for (i = 0; i < env->n_vregmap; i++) {
6384 hregHI = hreg = INVALID_HREG;
6385 switch (bb->tyenv->types[i]) {
6386 case Ity_I1:
6387 case Ity_I8:
6388 case Ity_I16:
6389 case Ity_I32: hreg = mkHReg(True, HRcInt32, 0, j++); break;
6390 case Ity_I64:
6391 if (hwcaps_host & VEX_HWCAPS_ARM_NEON) {
6392 hreg = mkHReg(True, HRcFlt64, 0, j++);
6393 } else {
6394 hregHI = mkHReg(True, HRcInt32, 0, j++);
6395 hreg = mkHReg(True, HRcInt32, 0, j++);
6396 }
6397 break;
6398 case Ity_F32: hreg = mkHReg(True, HRcFlt32, 0, j++); break;
6399 case Ity_F64: hreg = mkHReg(True, HRcFlt64, 0, j++); break;
6400 case Ity_V128: hreg = mkHReg(True, HRcVec128, 0, j++); break;
6401 default: ppIRType(bb->tyenv->types[i]);
6402 vpanic("iselBB: IRTemp type");
6403 }
6404 env->vregmap[i] = hreg;
6405 env->vregmapHI[i] = hregHI;
6406 }
6407 env->vreg_ctr = j;
6408
6409 /* The very first instruction must be an event check. */
6410 amCounter = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_Counter);
6411 amFailAddr = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_FailAddr);
6412 addInstr(env, ARMInstr_EvCheck(amCounter, amFailAddr));
6413
6414 /* Possibly a block counter increment (for profiling). At this
6415 point we don't know the address of the counter, so just pretend
6416 it is zero. It will have to be patched later, but before this
6417 translation is used, by a call to LibVEX_patchProfCtr. */
6418 if (addProfInc) {
6419 addInstr(env, ARMInstr_ProfInc());
6420 }
6421
6422 /* Ok, finally we can iterate over the statements. */
6423 for (i = 0; i < bb->stmts_used; i++)
6424 iselStmt(env, bb->stmts[i]);
6425
6426 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
6427
6428 /* record the number of vregs we used. */
6429 env->code->n_vregs = env->vreg_ctr;
6430 return env->code;
6431 }
6432
6433
6434 /*---------------------------------------------------------------*/
6435 /*--- end host_arm_isel.c ---*/
6436 /*---------------------------------------------------------------*/
6437
6438