• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                                   host_arm_isel.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2015 OpenWorks LLP
11       info@open-works.net
12 
13    NEON support is
14    Copyright (C) 2010-2015 Samsung Electronics
15    contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16               and Kirill Batuzov <batuzovk@ispras.ru>
17 
18    This program is free software; you can redistribute it and/or
19    modify it under the terms of the GNU General Public License as
20    published by the Free Software Foundation; either version 2 of the
21    License, or (at your option) any later version.
22 
23    This program is distributed in the hope that it will be useful, but
24    WITHOUT ANY WARRANTY; without even the implied warranty of
25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26    General Public License for more details.
27 
28    You should have received a copy of the GNU General Public License
29    along with this program; if not, write to the Free Software
30    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31    02110-1301, USA.
32 
33    The GNU General Public License is contained in the file COPYING.
34 */
35 
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
39 #include "ir_match.h"
40 
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "host_generic_regs.h"
44 #include "host_generic_simd64.h"  // for 32-bit SIMD helpers
45 #include "host_arm_defs.h"
46 
47 
48 /*---------------------------------------------------------*/
49 /*--- ARMvfp control word stuff                         ---*/
50 /*---------------------------------------------------------*/
51 
52 /* Vex-generated code expects to run with the FPU set as follows: all
53    exceptions masked, round-to-nearest, non-vector mode, with the NZCV
54    flags cleared, and FZ (flush to zero) disabled.  Curiously enough,
55    this corresponds to a FPSCR value of zero.
56 
57    fpscr should therefore be zero on entry to Vex-generated code, and
58    should be unchanged at exit.  (Or at least the bottom 28 bits
59    should be zero).
60 */
61 
62 #define DEFAULT_FPSCR 0
63 
64 
65 /*---------------------------------------------------------*/
66 /*--- ISelEnv                                           ---*/
67 /*---------------------------------------------------------*/
68 
69 /* This carries around:
70 
71    - A mapping from IRTemp to IRType, giving the type of any IRTemp we
72      might encounter.  This is computed before insn selection starts,
73      and does not change.
74 
75    - A mapping from IRTemp to HReg.  This tells the insn selector
76      which virtual register(s) are associated with each IRTemp
77      temporary.  This is computed before insn selection starts, and
78      does not change.  We expect this mapping to map precisely the
79      same set of IRTemps as the type mapping does.
80 
81         - vregmap   holds the primary register for the IRTemp.
82         - vregmapHI is only used for 64-bit integer-typed
83              IRTemps.  It holds the identity of a second
84              32-bit virtual HReg, which holds the high half
85              of the value.
86 
87    - The code array, that is, the insns selected so far.
88 
89    - A counter, for generating new virtual registers.
90 
91    - The host hardware capabilities word.  This is set at the start
92      and does not change.
93 
94    - A Bool for indicating whether we may generate chain-me
95      instructions for control flow transfers, or whether we must use
96      XAssisted.
97 
98    - The maximum guest address of any guest insn in this block.
99      Actually, the address of the highest-addressed byte from any insn
100      in this block.  Is set at the start and does not change.  This is
101      used for detecting jumps which are definitely forward-edges from
102      this block, and therefore can be made (chained) to the fast entry
103      point of the destination, thereby avoiding the destination's
104      event check.
105 
106    Note, this is all (well, mostly) host-independent.
107 */
108 
109 typedef
110    struct {
111       /* Constant -- are set at the start and do not change. */
112       IRTypeEnv*   type_env;
113 
114       HReg*        vregmap;
115       HReg*        vregmapHI;
116       Int          n_vregmap;
117 
118       UInt         hwcaps;
119 
120       Bool         chainingAllowed;
121       Addr32       max_ga;
122 
123       /* These are modified as we go along. */
124       HInstrArray* code;
125       Int          vreg_ctr;
126    }
127    ISelEnv;
128 
lookupIRTemp(ISelEnv * env,IRTemp tmp)129 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
130 {
131    vassert(tmp >= 0);
132    vassert(tmp < env->n_vregmap);
133    return env->vregmap[tmp];
134 }
135 
lookupIRTemp64(HReg * vrHI,HReg * vrLO,ISelEnv * env,IRTemp tmp)136 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
137 {
138    vassert(tmp >= 0);
139    vassert(tmp < env->n_vregmap);
140    vassert(! hregIsInvalid(env->vregmapHI[tmp]));
141    *vrLO = env->vregmap[tmp];
142    *vrHI = env->vregmapHI[tmp];
143 }
144 
addInstr(ISelEnv * env,ARMInstr * instr)145 static void addInstr ( ISelEnv* env, ARMInstr* instr )
146 {
147    addHInstr(env->code, instr);
148    if (vex_traceflags & VEX_TRACE_VCODE) {
149       ppARMInstr(instr);
150       vex_printf("\n");
151    }
152 }
153 
newVRegI(ISelEnv * env)154 static HReg newVRegI ( ISelEnv* env )
155 {
156    HReg reg = mkHReg(True/*virtual reg*/, HRcInt32, 0/*enc*/, env->vreg_ctr);
157    env->vreg_ctr++;
158    return reg;
159 }
160 
newVRegD(ISelEnv * env)161 static HReg newVRegD ( ISelEnv* env )
162 {
163    HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr);
164    env->vreg_ctr++;
165    return reg;
166 }
167 
newVRegF(ISelEnv * env)168 static HReg newVRegF ( ISelEnv* env )
169 {
170    HReg reg = mkHReg(True/*virtual reg*/, HRcFlt32, 0/*enc*/, env->vreg_ctr);
171    env->vreg_ctr++;
172    return reg;
173 }
174 
newVRegV(ISelEnv * env)175 static HReg newVRegV ( ISelEnv* env )
176 {
177    HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
178    env->vreg_ctr++;
179    return reg;
180 }
181 
182 /* These are duplicated in guest_arm_toIR.c */
unop(IROp op,IRExpr * a)183 static IRExpr* unop ( IROp op, IRExpr* a )
184 {
185    return IRExpr_Unop(op, a);
186 }
187 
binop(IROp op,IRExpr * a1,IRExpr * a2)188 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
189 {
190    return IRExpr_Binop(op, a1, a2);
191 }
192 
bind(Int binder)193 static IRExpr* bind ( Int binder )
194 {
195    return IRExpr_Binder(binder);
196 }
197 
198 
199 /*---------------------------------------------------------*/
200 /*--- ISEL: Forward declarations                        ---*/
201 /*---------------------------------------------------------*/
202 
203 /* These are organised as iselXXX and iselXXX_wrk pairs.  The
204    iselXXX_wrk do the real work, but are not to be called directly.
205    For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
206    checks that all returned registers are virtual.  You should not
207    call the _wrk version directly.
208 */
209 static ARMAMode1*  iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
210 static ARMAMode1*  iselIntExpr_AMode1     ( ISelEnv* env, IRExpr* e );
211 
212 static ARMAMode2*  iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
213 static ARMAMode2*  iselIntExpr_AMode2     ( ISelEnv* env, IRExpr* e );
214 
215 static ARMAModeV*  iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
216 static ARMAModeV*  iselIntExpr_AModeV     ( ISelEnv* env, IRExpr* e );
217 
218 static ARMAModeN*  iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
219 static ARMAModeN*  iselIntExpr_AModeN     ( ISelEnv* env, IRExpr* e );
220 
221 static ARMRI84*    iselIntExpr_RI84_wrk
222         ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
223 static ARMRI84*    iselIntExpr_RI84
224         ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
225 
226 static ARMRI5*     iselIntExpr_RI5_wrk    ( ISelEnv* env, IRExpr* e );
227 static ARMRI5*     iselIntExpr_RI5        ( ISelEnv* env, IRExpr* e );
228 
229 static ARMCondCode iselCondCode_wrk       ( ISelEnv* env, IRExpr* e );
230 static ARMCondCode iselCondCode           ( ISelEnv* env, IRExpr* e );
231 
232 static HReg        iselIntExpr_R_wrk      ( ISelEnv* env, IRExpr* e );
233 static HReg        iselIntExpr_R          ( ISelEnv* env, IRExpr* e );
234 
235 static void        iselInt64Expr_wrk      ( HReg* rHi, HReg* rLo,
236                                             ISelEnv* env, IRExpr* e );
237 static void        iselInt64Expr          ( HReg* rHi, HReg* rLo,
238                                             ISelEnv* env, IRExpr* e );
239 
240 static HReg        iselDblExpr_wrk        ( ISelEnv* env, IRExpr* e );
241 static HReg        iselDblExpr            ( ISelEnv* env, IRExpr* e );
242 
243 static HReg        iselFltExpr_wrk        ( ISelEnv* env, IRExpr* e );
244 static HReg        iselFltExpr            ( ISelEnv* env, IRExpr* e );
245 
246 static HReg        iselNeon64Expr_wrk     ( ISelEnv* env, IRExpr* e );
247 static HReg        iselNeon64Expr         ( ISelEnv* env, IRExpr* e );
248 
249 static HReg        iselNeonExpr_wrk       ( ISelEnv* env, IRExpr* e );
250 static HReg        iselNeonExpr           ( ISelEnv* env, IRExpr* e );
251 
252 /*---------------------------------------------------------*/
253 /*--- ISEL: Misc helpers                                ---*/
254 /*---------------------------------------------------------*/
255 
ROR32(UInt x,UInt sh)256 static UInt ROR32 ( UInt x, UInt sh ) {
257    vassert(sh >= 0 && sh < 32);
258    if (sh == 0)
259       return x;
260    else
261       return (x << (32-sh)) | (x >> sh);
262 }
263 
264 /* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
265    form, and if so return the components. */
fitsIn8x4(UInt * u8,UInt * u4,UInt u)266 static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
267 {
268    UInt i;
269    for (i = 0; i < 16; i++) {
270       if (0 == (u & 0xFFFFFF00)) {
271          *u8 = u;
272          *u4 = i;
273          return True;
274       }
275       u = ROR32(u, 30);
276    }
277    vassert(i == 16);
278    return False;
279 }
280 
281 /* Make a int reg-reg move. */
mk_iMOVds_RR(HReg dst,HReg src)282 static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
283 {
284    vassert(hregClass(src) == HRcInt32);
285    vassert(hregClass(dst) == HRcInt32);
286    return ARMInstr_Mov(dst, ARMRI84_R(src));
287 }
288 
289 /* Set the VFP unit's rounding mode to default (round to nearest). */
set_VFP_rounding_default(ISelEnv * env)290 static void set_VFP_rounding_default ( ISelEnv* env )
291 {
292    /* mov rTmp, #DEFAULT_FPSCR
293       fmxr fpscr, rTmp
294    */
295    HReg rTmp = newVRegI(env);
296    addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
297    addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
298 }
299 
300 /* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
301    expression denoting a value in the range 0 .. 3, indicating a round
302    mode encoded as per type IRRoundingMode.  Set FPSCR to have the
303    same rounding.
304 */
305 static
set_VFP_rounding_mode(ISelEnv * env,IRExpr * mode)306 void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
307 {
308    /* This isn't simple, because 'mode' carries an IR rounding
309       encoding, and we need to translate that to an ARMvfp one:
310       The IR encoding:
311          00  to nearest (the default)
312          10  to +infinity
313          01  to -infinity
314          11  to zero
315       The ARMvfp encoding:
316          00  to nearest
317          01  to +infinity
318          10  to -infinity
319          11  to zero
320       Easy enough to do; just swap the two bits.
321    */
322    HReg irrm = iselIntExpr_R(env, mode);
323    HReg tL   = newVRegI(env);
324    HReg tR   = newVRegI(env);
325    HReg t3   = newVRegI(env);
326    /* tL = irrm << 1;
327       tR = irrm >> 1;  if we're lucky, these will issue together
328       tL &= 2;
329       tR &= 1;         ditto
330       t3 = tL | tR;
331       t3 <<= 22;
332       fmxr fpscr, t3
333    */
334    addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
335    addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
336    addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
337    addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
338    addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
339    addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
340    addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
341 }
342 
343 
344 /*---------------------------------------------------------*/
345 /*--- ISEL: Function call helpers                       ---*/
346 /*---------------------------------------------------------*/
347 
348 /* Used only in doHelperCall.  See big comment in doHelperCall re
349    handling of register-parameter args.  This function figures out
350    whether evaluation of an expression might require use of a fixed
351    register.  If in doubt return True (safe but suboptimal).
352 */
353 static
mightRequireFixedRegs(IRExpr * e)354 Bool mightRequireFixedRegs ( IRExpr* e )
355 {
356    if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) {
357       // These are always "safe" -- either a copy of r13(sp) in some
358       // arbitrary vreg, or a copy of r8, respectively.
359       return False;
360    }
361    /* Else it's a "normal" expression. */
362    switch (e->tag) {
363    case Iex_RdTmp: case Iex_Const: case Iex_Get:
364       return False;
365    default:
366       return True;
367    }
368 }
369 
370 
371 /* Do a complete function call.  |guard| is a Ity_Bit expression
372    indicating whether or not the call happens.  If guard==NULL, the
373    call is unconditional.  |retloc| is set to indicate where the
374    return value is after the call.  The caller (of this fn) must
375    generate code to add |stackAdjustAfterCall| to the stack pointer
376    after the call is done.  Returns True iff it managed to handle this
377    combination of arg/return types, else returns False. */
378 
379 static
doHelperCall(UInt * stackAdjustAfterCall,RetLoc * retloc,ISelEnv * env,IRExpr * guard,IRCallee * cee,IRType retTy,IRExpr ** args)380 Bool doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
381                     /*OUT*/RetLoc* retloc,
382                     ISelEnv* env,
383                     IRExpr* guard,
384                     IRCallee* cee, IRType retTy, IRExpr** args )
385 {
386    ARMCondCode cc;
387    HReg        argregs[ARM_N_ARGREGS];
388    HReg        tmpregs[ARM_N_ARGREGS];
389    Bool        go_fast;
390    Int         n_args, i, nextArgReg;
391    Addr32      target;
392 
393    vassert(ARM_N_ARGREGS == 4);
394 
395    /* Set default returns.  We'll update them later if needed. */
396    *stackAdjustAfterCall = 0;
397    *retloc               = mk_RetLoc_INVALID();
398 
399    /* These are used for cross-checking that IR-level constraints on
400       the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */
401    UInt nVECRETs = 0;
402    UInt nBBPTRs  = 0;
403 
404    /* Marshal args for a call and do the call.
405 
406       This function only deals with a tiny set of possibilities, which
407       cover all helpers in practice.  The restrictions are that only
408       arguments in registers are supported, hence only ARM_N_REGPARMS
409       x 32 integer bits in total can be passed.  In fact the only
410       supported arg types are I32 and I64.
411 
412       The return type can be I{64,32} or V128.  In the V128 case, it
413       is expected that |args| will contain the special node
414       IRExpr_VECRET(), in which case this routine generates code to
415       allocate space on the stack for the vector return value.  Since
416       we are not passing any scalars on the stack, it is enough to
417       preallocate the return space before marshalling any arguments,
418       in this case.
419 
420       |args| may also contain IRExpr_BBPTR(), in which case the
421       value in r8 is passed as the corresponding argument.
422 
423       Generating code which is both efficient and correct when
424       parameters are to be passed in registers is difficult, for the
425       reasons elaborated in detail in comments attached to
426       doHelperCall() in priv/host-x86/isel.c.  Here, we use a variant
427       of the method described in those comments.
428 
429       The problem is split into two cases: the fast scheme and the
430       slow scheme.  In the fast scheme, arguments are computed
431       directly into the target (real) registers.  This is only safe
432       when we can be sure that computation of each argument will not
433       trash any real registers set by computation of any other
434       argument.
435 
436       In the slow scheme, all args are first computed into vregs, and
437       once they are all done, they are moved to the relevant real
438       regs.  This always gives correct code, but it also gives a bunch
439       of vreg-to-rreg moves which are usually redundant but are hard
440       for the register allocator to get rid of.
441 
442       To decide which scheme to use, all argument expressions are
443       first examined.  If they are all so simple that it is clear they
444       will be evaluated without use of any fixed registers, use the
445       fast scheme, else use the slow scheme.  Note also that only
446       unconditional calls may use the fast scheme, since having to
447       compute a condition expression could itself trash real
448       registers.
449 
450       Note this requires being able to examine an expression and
451       determine whether or not evaluation of it might use a fixed
452       register.  That requires knowledge of how the rest of this insn
453       selector works.  Currently just the following 3 are regarded as
454       safe -- hopefully they cover the majority of arguments in
455       practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
456    */
457 
458    /* Note that the cee->regparms field is meaningless on ARM hosts
459       (since there is only one calling convention) and so we always
460       ignore it. */
461 
462    n_args = 0;
463    for (i = 0; args[i]; i++) {
464       IRExpr* arg = args[i];
465       if (UNLIKELY(arg->tag == Iex_VECRET)) {
466          nVECRETs++;
467       } else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
468          nBBPTRs++;
469       }
470       n_args++;
471    }
472 
473    argregs[0] = hregARM_R0();
474    argregs[1] = hregARM_R1();
475    argregs[2] = hregARM_R2();
476    argregs[3] = hregARM_R3();
477 
478    tmpregs[0] = tmpregs[1] = tmpregs[2] =
479    tmpregs[3] = INVALID_HREG;
480 
481    /* First decide which scheme (slow or fast) is to be used.  First
482       assume the fast scheme, and select slow if any contraindications
483       (wow) appear. */
484 
485    go_fast = True;
486 
487    if (guard) {
488       if (guard->tag == Iex_Const
489           && guard->Iex.Const.con->tag == Ico_U1
490           && guard->Iex.Const.con->Ico.U1 == True) {
491          /* unconditional */
492       } else {
493          /* Not manifestly unconditional -- be conservative. */
494          go_fast = False;
495       }
496    }
497 
498    if (go_fast) {
499       for (i = 0; i < n_args; i++) {
500          if (mightRequireFixedRegs(args[i])) {
501             go_fast = False;
502             break;
503          }
504       }
505    }
506 
507    if (go_fast) {
508       if (retTy == Ity_V128 || retTy == Ity_V256)
509          go_fast = False;
510    }
511 
512    /* At this point the scheme to use has been established.  Generate
513       code to get the arg values into the argument rregs.  If we run
514       out of arg regs, give up. */
515 
516    if (go_fast) {
517 
518       /* FAST SCHEME */
519       nextArgReg = 0;
520 
521       for (i = 0; i < n_args; i++) {
522          IRExpr* arg = args[i];
523 
524          IRType  aTy = Ity_INVALID;
525          if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
526             aTy = typeOfIRExpr(env->type_env, arg);
527 
528          if (nextArgReg >= ARM_N_ARGREGS)
529             return False; /* out of argregs */
530 
531          if (aTy == Ity_I32) {
532             addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
533                                         iselIntExpr_R(env, arg) ));
534             nextArgReg++;
535          }
536          else if (aTy == Ity_I64) {
537             /* 64-bit args must be passed in an a reg-pair of the form
538                n:n+1, where n is even.  Hence either r0:r1 or r2:r3.
539                On a little-endian host, the less significant word is
540                passed in the lower-numbered register. */
541             if (nextArgReg & 1) {
542                if (nextArgReg >= ARM_N_ARGREGS)
543                   return False; /* out of argregs */
544                addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
545                nextArgReg++;
546             }
547             if (nextArgReg >= ARM_N_ARGREGS)
548                return False; /* out of argregs */
549             HReg raHi, raLo;
550             iselInt64Expr(&raHi, &raLo, env, arg);
551             addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
552             nextArgReg++;
553             addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
554             nextArgReg++;
555          }
556          else if (arg->tag == Iex_BBPTR) {
557             vassert(0); //ATC
558             addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
559                                         hregARM_R8() ));
560             nextArgReg++;
561          }
562          else if (arg->tag == Iex_VECRET) {
563             // If this happens, it denotes ill-formed IR
564             vassert(0);
565          }
566          else
567             return False; /* unhandled arg type */
568       }
569 
570       /* Fast scheme only applies for unconditional calls.  Hence: */
571       cc = ARMcc_AL;
572 
573    } else {
574 
575       /* SLOW SCHEME; move via temporaries */
576       nextArgReg = 0;
577 
578       for (i = 0; i < n_args; i++) {
579          IRExpr* arg = args[i];
580 
581          IRType  aTy = Ity_INVALID;
582          if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
583             aTy  = typeOfIRExpr(env->type_env, arg);
584 
585          if (nextArgReg >= ARM_N_ARGREGS)
586             return False; /* out of argregs */
587 
588          if (aTy == Ity_I32) {
589             tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
590             nextArgReg++;
591          }
592          else if (aTy == Ity_I64) {
593             /* Same comment applies as in the Fast-scheme case. */
594             if (nextArgReg & 1)
595                nextArgReg++;
596             if (nextArgReg + 1 >= ARM_N_ARGREGS)
597                return False; /* out of argregs */
598             HReg raHi, raLo;
599             iselInt64Expr(&raHi, &raLo, env, args[i]);
600             tmpregs[nextArgReg] = raLo;
601             nextArgReg++;
602             tmpregs[nextArgReg] = raHi;
603             nextArgReg++;
604          }
605          else if (arg->tag == Iex_BBPTR) {
606             vassert(0); //ATC
607             tmpregs[nextArgReg] = hregARM_R8();
608             nextArgReg++;
609          }
610          else if (arg->tag == Iex_VECRET) {
611             // If this happens, it denotes ill-formed IR
612             vassert(0);
613          }
614          else
615             return False; /* unhandled arg type */
616       }
617 
618       /* Now we can compute the condition.  We can't do it earlier
619          because the argument computations could trash the condition
620          codes.  Be a bit clever to handle the common case where the
621          guard is 1:Bit. */
622       cc = ARMcc_AL;
623       if (guard) {
624          if (guard->tag == Iex_Const
625              && guard->Iex.Const.con->tag == Ico_U1
626              && guard->Iex.Const.con->Ico.U1 == True) {
627             /* unconditional -- do nothing */
628          } else {
629             cc = iselCondCode( env, guard );
630          }
631       }
632 
633       /* Move the args to their final destinations. */
634       for (i = 0; i < nextArgReg; i++) {
635          if (hregIsInvalid(tmpregs[i])) { // Skip invalid regs
636             addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
637             continue;
638          }
639          /* None of these insns, including any spill code that might
640             be generated, may alter the condition codes. */
641          addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
642       }
643 
644    }
645 
646    /* Should be assured by checks above */
647    vassert(nextArgReg <= ARM_N_ARGREGS);
648 
649    /* Do final checks, set the return values, and generate the call
650       instruction proper. */
651    vassert(nBBPTRs == 0 || nBBPTRs == 1);
652    vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0);
653    vassert(*stackAdjustAfterCall == 0);
654    vassert(is_RetLoc_INVALID(*retloc));
655    switch (retTy) {
656          case Ity_INVALID:
657             /* Function doesn't return a value. */
658             *retloc = mk_RetLoc_simple(RLPri_None);
659             break;
660          case Ity_I64:
661             *retloc = mk_RetLoc_simple(RLPri_2Int);
662             break;
663          case Ity_I32: case Ity_I16: case Ity_I8:
664             *retloc = mk_RetLoc_simple(RLPri_Int);
665             break;
666          case Ity_V128:
667             vassert(0); // ATC
668             *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
669             *stackAdjustAfterCall = 16;
670             break;
671          case Ity_V256:
672             vassert(0); // ATC
673             *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
674             *stackAdjustAfterCall = 32;
675             break;
676          default:
677             /* IR can denote other possible return types, but we don't
678                handle those here. */
679            vassert(0);
680    }
681 
682    /* Finally, generate the call itself.  This needs the *retloc value
683       set in the switch above, which is why it's at the end. */
684 
685    /* nextArgReg doles out argument registers.  Since these are
686       assigned in the order r0, r1, r2, r3, its numeric value at this
687       point, which must be between 0 and 4 inclusive, is going to be
688       equal to the number of arg regs in use for the call.  Hence bake
689       that number into the call (we'll need to know it when doing
690       register allocation, to know what regs the call reads.)
691 
692       There is a bit of a twist -- harmless but worth recording.
693       Suppose the arg types are (Ity_I32, Ity_I64).  Then we will have
694       the first arg in r0 and the second in r3:r2, but r1 isn't used.
695       We nevertheless have nextArgReg==4 and bake that into the call
696       instruction.  This will mean the register allocator wil believe
697       this insn reads r1 when in fact it doesn't.  But that's
698       harmless; it just artificially extends the live range of r1
699       unnecessarily.  The best fix would be to put into the
700       instruction, a bitmask indicating which of r0/1/2/3 carry live
701       values.  But that's too much hassle. */
702 
703    target = (Addr)cee->addr;
704    addInstr(env, ARMInstr_Call( cc, target, nextArgReg, *retloc ));
705 
706    return True; /* success */
707 }
708 
709 
710 /*---------------------------------------------------------*/
711 /*--- ISEL: Integer expressions (32/16/8 bit)           ---*/
712 /*---------------------------------------------------------*/
713 
714 /* Select insns for an integer-typed expression, and add them to the
715    code list.  Return a reg holding the result.  This reg will be a
716    virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
717    want to modify it, ask for a new vreg, copy it in there, and modify
718    the copy.  The register allocator will do its best to map both
719    vregs to the same real register, so the copies will often disappear
720    later in the game.
721 
722    This should handle expressions of 32, 16 and 8-bit type.  All
723    results are returned in a 32-bit register.  For 16- and 8-bit
724    expressions, the upper 16/24 bits are arbitrary, so you should mask
725    or sign extend partial values if necessary.
726 */
727 
728 /* --------------------- AMode1 --------------------- */
729 
730 /* Return an AMode1 which computes the value of the specified
731    expression, possibly also adding insns to the code list as a
732    result.  The expression may only be a 32-bit one.
733 */
734 
sane_AMode1(ARMAMode1 * am)735 static Bool sane_AMode1 ( ARMAMode1* am )
736 {
737    switch (am->tag) {
738       case ARMam1_RI:
739          return
740             toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
741                     && (hregIsVirtual(am->ARMam1.RI.reg)
742                         || sameHReg(am->ARMam1.RI.reg, hregARM_R8()))
743                     && am->ARMam1.RI.simm13 >= -4095
744                     && am->ARMam1.RI.simm13 <= 4095 );
745       case ARMam1_RRS:
746          return
747             toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
748                     && hregIsVirtual(am->ARMam1.RRS.base)
749                     && hregClass(am->ARMam1.RRS.index) == HRcInt32
750                     && hregIsVirtual(am->ARMam1.RRS.index)
751                     && am->ARMam1.RRS.shift >= 0
752                     && am->ARMam1.RRS.shift <= 3 );
753       default:
754          vpanic("sane_AMode: unknown ARM AMode1 tag");
755    }
756 }
757 
iselIntExpr_AMode1(ISelEnv * env,IRExpr * e)758 static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
759 {
760    ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
761    vassert(sane_AMode1(am));
762    return am;
763 }
764 
iselIntExpr_AMode1_wrk(ISelEnv * env,IRExpr * e)765 static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
766 {
767    IRType ty = typeOfIRExpr(env->type_env,e);
768    vassert(ty == Ity_I32);
769 
770    /* FIXME: add RRS matching */
771 
772    /* {Add32,Sub32}(expr,simm13) */
773    if (e->tag == Iex_Binop
774        && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
775        && e->Iex.Binop.arg2->tag == Iex_Const
776        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
777       Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
778       if (simm >= -4095 && simm <= 4095) {
779          HReg reg;
780          if (e->Iex.Binop.op == Iop_Sub32)
781             simm = -simm;
782          reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
783          return ARMAMode1_RI(reg, simm);
784       }
785    }
786 
787    /* Doesn't match anything in particular.  Generate it into
788       a register and use that. */
789    {
790       HReg reg = iselIntExpr_R(env, e);
791       return ARMAMode1_RI(reg, 0);
792    }
793 
794 }
795 
796 
797 /* --------------------- AMode2 --------------------- */
798 
799 /* Return an AMode2 which computes the value of the specified
800    expression, possibly also adding insns to the code list as a
801    result.  The expression may only be a 32-bit one.
802 */
803 
sane_AMode2(ARMAMode2 * am)804 static Bool sane_AMode2 ( ARMAMode2* am )
805 {
806    switch (am->tag) {
807       case ARMam2_RI:
808          return
809             toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
810                     && hregIsVirtual(am->ARMam2.RI.reg)
811                     && am->ARMam2.RI.simm9 >= -255
812                     && am->ARMam2.RI.simm9 <= 255 );
813       case ARMam2_RR:
814          return
815             toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
816                     && hregIsVirtual(am->ARMam2.RR.base)
817                     && hregClass(am->ARMam2.RR.index) == HRcInt32
818                     && hregIsVirtual(am->ARMam2.RR.index) );
819       default:
820          vpanic("sane_AMode: unknown ARM AMode2 tag");
821    }
822 }
823 
iselIntExpr_AMode2(ISelEnv * env,IRExpr * e)824 static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
825 {
826    ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
827    vassert(sane_AMode2(am));
828    return am;
829 }
830 
iselIntExpr_AMode2_wrk(ISelEnv * env,IRExpr * e)831 static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
832 {
833    IRType ty = typeOfIRExpr(env->type_env,e);
834    vassert(ty == Ity_I32);
835 
836    /* FIXME: add RR matching */
837 
838    /* {Add32,Sub32}(expr,simm8) */
839    if (e->tag == Iex_Binop
840        && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
841        && e->Iex.Binop.arg2->tag == Iex_Const
842        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
843       Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
844       if (simm >= -255 && simm <= 255) {
845          HReg reg;
846          if (e->Iex.Binop.op == Iop_Sub32)
847             simm = -simm;
848          reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
849          return ARMAMode2_RI(reg, simm);
850       }
851    }
852 
853    /* Doesn't match anything in particular.  Generate it into
854       a register and use that. */
855    {
856       HReg reg = iselIntExpr_R(env, e);
857       return ARMAMode2_RI(reg, 0);
858    }
859 
860 }
861 
862 
863 /* --------------------- AModeV --------------------- */
864 
865 /* Return an AModeV which computes the value of the specified
866    expression, possibly also adding insns to the code list as a
867    result.  The expression may only be a 32-bit one.
868 */
869 
sane_AModeV(ARMAModeV * am)870 static Bool sane_AModeV ( ARMAModeV* am )
871 {
872   return toBool( hregClass(am->reg) == HRcInt32
873                  && hregIsVirtual(am->reg)
874                  && am->simm11 >= -1020 && am->simm11 <= 1020
875                  && 0 == (am->simm11 & 3) );
876 }
877 
iselIntExpr_AModeV(ISelEnv * env,IRExpr * e)878 static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
879 {
880    ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
881    vassert(sane_AModeV(am));
882    return am;
883 }
884 
iselIntExpr_AModeV_wrk(ISelEnv * env,IRExpr * e)885 static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
886 {
887    IRType ty = typeOfIRExpr(env->type_env,e);
888    vassert(ty == Ity_I32);
889 
890    /* {Add32,Sub32}(expr, simm8 << 2) */
891    if (e->tag == Iex_Binop
892        && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
893        && e->Iex.Binop.arg2->tag == Iex_Const
894        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
895       Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
896       if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
897          HReg reg;
898          if (e->Iex.Binop.op == Iop_Sub32)
899             simm = -simm;
900          reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
901          return mkARMAModeV(reg, simm);
902       }
903    }
904 
905    /* Doesn't match anything in particular.  Generate it into
906       a register and use that. */
907    {
908       HReg reg = iselIntExpr_R(env, e);
909       return mkARMAModeV(reg, 0);
910    }
911 
912 }
913 
914 /* -------------------- AModeN -------------------- */
915 
iselIntExpr_AModeN(ISelEnv * env,IRExpr * e)916 static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
917 {
918    return iselIntExpr_AModeN_wrk(env, e);
919 }
920 
iselIntExpr_AModeN_wrk(ISelEnv * env,IRExpr * e)921 static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
922 {
923    HReg reg = iselIntExpr_R(env, e);
924    return mkARMAModeN_R(reg);
925 }
926 
927 
928 /* --------------------- RI84 --------------------- */
929 
930 /* Select instructions to generate 'e' into a RI84.  If mayInv is
931    true, then the caller will also accept an I84 form that denotes
932    'not e'.  In this case didInv may not be NULL, and *didInv is set
933    to True.  This complication is so as to allow generation of an RI84
934    which is suitable for use in either an AND or BIC instruction,
935    without knowing (before this call) which one.
936 */
iselIntExpr_RI84(Bool * didInv,Bool mayInv,ISelEnv * env,IRExpr * e)937 static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
938                                    ISelEnv* env, IRExpr* e )
939 {
940    ARMRI84* ri;
941    if (mayInv)
942       vassert(didInv != NULL);
943    ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
944    /* sanity checks ... */
945    switch (ri->tag) {
946       case ARMri84_I84:
947          return ri;
948       case ARMri84_R:
949          vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
950          vassert(hregIsVirtual(ri->ARMri84.R.reg));
951          return ri;
952       default:
953          vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
954    }
955 }
956 
957 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RI84_wrk(Bool * didInv,Bool mayInv,ISelEnv * env,IRExpr * e)958 static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
959                                        ISelEnv* env, IRExpr* e )
960 {
961    IRType ty = typeOfIRExpr(env->type_env,e);
962    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
963 
964    if (didInv) *didInv = False;
965 
966    /* special case: immediate */
967    if (e->tag == Iex_Const) {
968       UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
969       switch (e->Iex.Const.con->tag) {
970          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
971          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
972          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
973          default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
974       }
975       if (fitsIn8x4(&u8, &u4, u)) {
976          return ARMRI84_I84( (UShort)u8, (UShort)u4 );
977       }
978       if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
979          vassert(didInv);
980          *didInv = True;
981          return ARMRI84_I84( (UShort)u8, (UShort)u4 );
982       }
983       /* else fail, fall through to default case */
984    }
985 
986    /* default case: calculate into a register and return that */
987    {
988       HReg r = iselIntExpr_R ( env, e );
989       return ARMRI84_R(r);
990    }
991 }
992 
993 
994 /* --------------------- RI5 --------------------- */
995 
996 /* Select instructions to generate 'e' into a RI5. */
997 
iselIntExpr_RI5(ISelEnv * env,IRExpr * e)998 static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
999 {
1000    ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
1001    /* sanity checks ... */
1002    switch (ri->tag) {
1003       case ARMri5_I5:
1004          return ri;
1005       case ARMri5_R:
1006          vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
1007          vassert(hregIsVirtual(ri->ARMri5.R.reg));
1008          return ri;
1009       default:
1010          vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
1011    }
1012 }
1013 
1014 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RI5_wrk(ISelEnv * env,IRExpr * e)1015 static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
1016 {
1017    IRType ty = typeOfIRExpr(env->type_env,e);
1018    vassert(ty == Ity_I32 || ty == Ity_I8);
1019 
1020    /* special case: immediate */
1021    if (e->tag == Iex_Const) {
1022       UInt u; /* both invalid */
1023       switch (e->Iex.Const.con->tag) {
1024          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1025          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1026          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1027          default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
1028       }
1029       if (u >= 1 && u <= 31) {
1030          return ARMRI5_I5(u);
1031       }
1032       /* else fail, fall through to default case */
1033    }
1034 
1035    /* default case: calculate into a register and return that */
1036    {
1037       HReg r = iselIntExpr_R ( env, e );
1038       return ARMRI5_R(r);
1039    }
1040 }
1041 
1042 
1043 /* ------------------- CondCode ------------------- */
1044 
1045 /* Generate code to evaluated a bit-typed expression, returning the
1046    condition code which would correspond when the expression would
1047    notionally have returned 1. */
1048 
iselCondCode(ISelEnv * env,IRExpr * e)1049 static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1050 {
1051    ARMCondCode cc = iselCondCode_wrk(env,e);
1052    vassert(cc != ARMcc_NV);
1053    return cc;
1054 }
1055 
iselCondCode_wrk(ISelEnv * env,IRExpr * e)1056 static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1057 {
1058    vassert(e);
1059    vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1060 
1061    /* var */
1062    if (e->tag == Iex_RdTmp) {
1063       HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1064       /* CmpOrTst doesn't modify rTmp; so this is OK. */
1065       ARMRI84* one  = ARMRI84_I84(1,0);
1066       addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
1067       return ARMcc_NE;
1068    }
1069 
1070    /* Not1(e) */
1071    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1072       /* Generate code for the arg, and negate the test condition */
1073       return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1074    }
1075 
1076    /* --- patterns rooted at: 32to1 --- */
1077 
1078    if (e->tag == Iex_Unop
1079        && e->Iex.Unop.op == Iop_32to1) {
1080       HReg     rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1081       ARMRI84* one  = ARMRI84_I84(1,0);
1082       addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
1083       return ARMcc_NE;
1084    }
1085 
1086    /* --- patterns rooted at: CmpNEZ8 --- */
1087 
1088    if (e->tag == Iex_Unop
1089        && e->Iex.Unop.op == Iop_CmpNEZ8) {
1090       HReg     r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1091       ARMRI84* xFF  = ARMRI84_I84(0xFF,0);
1092       addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
1093       return ARMcc_NE;
1094    }
1095 
1096    /* --- patterns rooted at: CmpNEZ32 --- */
1097 
1098    if (e->tag == Iex_Unop
1099        && e->Iex.Unop.op == Iop_CmpNEZ32) {
1100       HReg     r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1101       ARMRI84* zero = ARMRI84_I84(0,0);
1102       addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
1103       return ARMcc_NE;
1104    }
1105 
1106    /* --- patterns rooted at: CmpNEZ64 --- */
1107 
1108    if (e->tag == Iex_Unop
1109        && e->Iex.Unop.op == Iop_CmpNEZ64) {
1110       HReg     tHi, tLo;
1111       HReg     tmp  = newVRegI(env);
1112       ARMRI84* zero = ARMRI84_I84(0,0);
1113       iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
1114       addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
1115       addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
1116       return ARMcc_NE;
1117    }
1118 
1119    /* --- Cmp*32*(x,y) --- */
1120    if (e->tag == Iex_Binop
1121        && (e->Iex.Binop.op == Iop_CmpEQ32
1122            || e->Iex.Binop.op == Iop_CmpNE32
1123            || e->Iex.Binop.op == Iop_CmpLT32S
1124            || e->Iex.Binop.op == Iop_CmpLT32U
1125            || e->Iex.Binop.op == Iop_CmpLE32S
1126            || e->Iex.Binop.op == Iop_CmpLE32U)) {
1127       HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1128       ARMRI84* argR = iselIntExpr_RI84(NULL,False,
1129                                        env, e->Iex.Binop.arg2);
1130       addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
1131       switch (e->Iex.Binop.op) {
1132          case Iop_CmpEQ32:  return ARMcc_EQ;
1133          case Iop_CmpNE32:  return ARMcc_NE;
1134          case Iop_CmpLT32S: return ARMcc_LT;
1135          case Iop_CmpLT32U: return ARMcc_LO;
1136          case Iop_CmpLE32S: return ARMcc_LE;
1137          case Iop_CmpLE32U: return ARMcc_LS;
1138          default: vpanic("iselCondCode(arm): CmpXX32");
1139       }
1140    }
1141 
1142    /* const */
1143    /* Constant 1:Bit */
1144    if (e->tag == Iex_Const) {
1145       HReg r;
1146       vassert(e->Iex.Const.con->tag == Ico_U1);
1147       vassert(e->Iex.Const.con->Ico.U1 == True
1148               || e->Iex.Const.con->Ico.U1 == False);
1149       r = newVRegI(env);
1150       addInstr(env, ARMInstr_Imm32(r, 0));
1151       addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r, ARMRI84_R(r)));
1152       return e->Iex.Const.con->Ico.U1 ? ARMcc_EQ : ARMcc_NE;
1153    }
1154 
1155    // JRS 2013-Jan-03: this seems completely nonsensical
1156    /* --- CasCmpEQ* --- */
1157    /* Ist_Cas has a dummy argument to compare with, so comparison is
1158       always true. */
1159    //if (e->tag == Iex_Binop
1160    //    && (e->Iex.Binop.op == Iop_CasCmpEQ32
1161    //        || e->Iex.Binop.op == Iop_CasCmpEQ16
1162    //        || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1163    //   return ARMcc_AL;
1164    //}
1165 
1166    ppIRExpr(e);
1167    vpanic("iselCondCode");
1168 }
1169 
1170 
1171 /* --------------------- Reg --------------------- */
1172 
iselIntExpr_R(ISelEnv * env,IRExpr * e)1173 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1174 {
1175    HReg r = iselIntExpr_R_wrk(env, e);
1176    /* sanity checks ... */
1177 #  if 0
1178    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1179 #  endif
1180    vassert(hregClass(r) == HRcInt32);
1181    vassert(hregIsVirtual(r));
1182    return r;
1183 }
1184 
1185 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_R_wrk(ISelEnv * env,IRExpr * e)1186 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1187 {
1188    IRType ty = typeOfIRExpr(env->type_env,e);
1189    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1190 
1191    switch (e->tag) {
1192 
1193    /* --------- TEMP --------- */
1194    case Iex_RdTmp: {
1195       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1196    }
1197 
1198    /* --------- LOAD --------- */
1199    case Iex_Load: {
1200       HReg dst  = newVRegI(env);
1201 
1202       if (e->Iex.Load.end != Iend_LE)
1203          goto irreducible;
1204 
1205       if (ty == Ity_I32) {
1206          ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1207          addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, dst, amode));
1208          return dst;
1209       }
1210       if (ty == Ity_I16) {
1211          ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
1212          addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
1213                                        True/*isLoad*/, False/*!signedLoad*/,
1214                                        dst, amode));
1215          return dst;
1216       }
1217       if (ty == Ity_I8) {
1218          ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1219          addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, True/*isLoad*/, dst, amode));
1220          return dst;
1221       }
1222       break;
1223    }
1224 
1225 //zz   /* --------- TERNARY OP --------- */
1226 //zz   case Iex_Triop: {
1227 //zz      IRTriop *triop = e->Iex.Triop.details;
1228 //zz      /* C3210 flags following FPU partial remainder (fprem), both
1229 //zz         IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1230 //zz      if (triop->op == Iop_PRemC3210F64
1231 //zz          || triop->op == Iop_PRem1C3210F64) {
1232 //zz         HReg junk = newVRegF(env);
1233 //zz         HReg dst  = newVRegI(env);
1234 //zz         HReg srcL = iselDblExpr(env, triop->arg2);
1235 //zz         HReg srcR = iselDblExpr(env, triop->arg3);
1236 //zz         /* XXXROUNDINGFIXME */
1237 //zz         /* set roundingmode here */
1238 //zz         addInstr(env, X86Instr_FpBinary(
1239 //zz                           e->Iex.Binop.op==Iop_PRemC3210F64
1240 //zz                              ? Xfp_PREM : Xfp_PREM1,
1241 //zz                           srcL,srcR,junk
1242 //zz                 ));
1243 //zz         /* The previous pseudo-insn will have left the FPU's C3210
1244 //zz            flags set correctly.  So bag them. */
1245 //zz         addInstr(env, X86Instr_FpStSW_AX());
1246 //zz         addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1247 //zz         addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1248 //zz         return dst;
1249 //zz      }
1250 //zz
1251 //zz      break;
1252 //zz   }
1253 
1254    /* --------- BINARY OP --------- */
1255    case Iex_Binop: {
1256 
1257       ARMAluOp   aop = 0; /* invalid */
1258       ARMShiftOp sop = 0; /* invalid */
1259 
1260       /* ADD/SUB/AND/OR/XOR */
1261       switch (e->Iex.Binop.op) {
1262          case Iop_And32: {
1263             Bool     didInv = False;
1264             HReg     dst    = newVRegI(env);
1265             HReg     argL   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1266             ARMRI84* argR   = iselIntExpr_RI84(&didInv, True/*mayInv*/,
1267                                                env, e->Iex.Binop.arg2);
1268             addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
1269                                        dst, argL, argR));
1270             return dst;
1271          }
1272          case Iop_Or32:  aop = ARMalu_OR;  goto std_binop;
1273          case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
1274          case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
1275          case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
1276          std_binop: {
1277             HReg     dst  = newVRegI(env);
1278             HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1279             ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
1280                                              env, e->Iex.Binop.arg2);
1281             addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
1282             return dst;
1283          }
1284          default: break;
1285       }
1286 
1287       /* SHL/SHR/SAR */
1288       switch (e->Iex.Binop.op) {
1289          case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
1290          case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
1291          case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
1292          sh_binop: {
1293             HReg    dst  = newVRegI(env);
1294             HReg    argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1295             ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
1296             addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
1297             vassert(ty == Ity_I32); /* else the IR is ill-typed */
1298             return dst;
1299          }
1300          default: break;
1301       }
1302 
1303       /* MUL */
1304       if (e->Iex.Binop.op == Iop_Mul32) {
1305          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1306          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1307          HReg dst  = newVRegI(env);
1308          addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1309          addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1310          addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
1311          addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1312          return dst;
1313       }
1314 
1315       /* Handle misc other ops. */
1316 
1317       if (e->Iex.Binop.op == Iop_Max32U) {
1318          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1319          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1320          HReg dst  = newVRegI(env);
1321          addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
1322                                          ARMRI84_R(argR)));
1323          addInstr(env, mk_iMOVds_RR(dst, argL));
1324          addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
1325          return dst;
1326       }
1327 
1328       if (e->Iex.Binop.op == Iop_CmpF64) {
1329          HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
1330          HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
1331          HReg dst = newVRegI(env);
1332          /* Do the compare (FCMPD) and set NZCV in FPSCR.  Then also do
1333             FMSTAT, so we can examine the results directly. */
1334          addInstr(env, ARMInstr_VCmpD(dL, dR));
1335          /* Create in dst, the IRCmpF64Result encoded result. */
1336          addInstr(env, ARMInstr_Imm32(dst, 0));
1337          addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
1338          addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
1339          addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
1340          addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
1341          return dst;
1342       }
1343 
1344       if (e->Iex.Binop.op == Iop_F64toI32S
1345           || e->Iex.Binop.op == Iop_F64toI32U) {
1346          /* Wretched uglyness all round, due to having to deal
1347             with rounding modes.  Oh well. */
1348          /* FIXME: if arg1 is a constant indicating round-to-zero,
1349             then we could skip all this arsing around with FPSCR and
1350             simply emit FTO{S,U}IZD. */
1351          Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
1352          HReg valD  = iselDblExpr(env, e->Iex.Binop.arg2);
1353          set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
1354          /* FTO{S,U}ID valF, valD */
1355          HReg valF = newVRegF(env);
1356          addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
1357                                        valF, valD));
1358          set_VFP_rounding_default(env);
1359          /* VMOV dst, valF */
1360          HReg dst = newVRegI(env);
1361          addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
1362          return dst;
1363       }
1364 
1365       if (e->Iex.Binop.op == Iop_GetElem8x8
1366           || e->Iex.Binop.op == Iop_GetElem16x4
1367           || e->Iex.Binop.op == Iop_GetElem32x2) {
1368          if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1369             HReg res = newVRegI(env);
1370             HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
1371             UInt index, size;
1372             if (e->Iex.Binop.arg2->tag != Iex_Const ||
1373                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1374                vpanic("ARM target supports GetElem with constant "
1375                       "second argument only (neon)\n");
1376             }
1377             index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1378             switch (e->Iex.Binop.op) {
1379                case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1380                case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1381                case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1382                default: vassert(0);
1383             }
1384             addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1385                                            mkARMNRS(ARMNRS_Reg, res, 0),
1386                                            mkARMNRS(ARMNRS_Scalar, arg, index),
1387                                            size, False));
1388             return res;
1389          }
1390       }
1391 
1392       if (e->Iex.Binop.op == Iop_GetElem32x2
1393           && e->Iex.Binop.arg2->tag == Iex_Const
1394           && !(env->hwcaps & VEX_HWCAPS_ARM_NEON)) {
1395          /* We may have to do GetElem32x2 on a non-NEON capable
1396             target. */
1397          IRConst* con = e->Iex.Binop.arg2->Iex.Const.con;
1398          vassert(con->tag == Ico_U8); /* else IR is ill-typed */
1399          UInt index = con->Ico.U8;
1400          if (index >= 0 && index <= 1) {
1401             HReg rHi, rLo;
1402             iselInt64Expr(&rHi, &rLo, env, e->Iex.Binop.arg1);
1403             return index == 0 ? rLo : rHi;
1404          }
1405       }
1406 
1407       if (e->Iex.Binop.op == Iop_GetElem8x16
1408           || e->Iex.Binop.op == Iop_GetElem16x8
1409           || e->Iex.Binop.op == Iop_GetElem32x4) {
1410          if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1411             HReg res = newVRegI(env);
1412             HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
1413             UInt index, size;
1414             if (e->Iex.Binop.arg2->tag != Iex_Const ||
1415                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1416                vpanic("ARM target supports GetElem with constant "
1417                       "second argument only (neon)\n");
1418             }
1419             index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1420             switch (e->Iex.Binop.op) {
1421                case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1422                case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1423                case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1424                default: vassert(0);
1425             }
1426             addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1427                                            mkARMNRS(ARMNRS_Reg, res, 0),
1428                                            mkARMNRS(ARMNRS_Scalar, arg, index),
1429                                            size, True));
1430             return res;
1431          }
1432       }
1433 
1434       /* All cases involving host-side helper calls. */
1435       void* fn = NULL;
1436       switch (e->Iex.Binop.op) {
1437          case Iop_Add16x2:
1438             fn = &h_generic_calc_Add16x2; break;
1439          case Iop_Sub16x2:
1440             fn = &h_generic_calc_Sub16x2; break;
1441          case Iop_HAdd16Ux2:
1442             fn = &h_generic_calc_HAdd16Ux2; break;
1443          case Iop_HAdd16Sx2:
1444             fn = &h_generic_calc_HAdd16Sx2; break;
1445          case Iop_HSub16Ux2:
1446             fn = &h_generic_calc_HSub16Ux2; break;
1447          case Iop_HSub16Sx2:
1448             fn = &h_generic_calc_HSub16Sx2; break;
1449          case Iop_QAdd16Sx2:
1450             fn = &h_generic_calc_QAdd16Sx2; break;
1451          case Iop_QAdd16Ux2:
1452             fn = &h_generic_calc_QAdd16Ux2; break;
1453          case Iop_QSub16Sx2:
1454             fn = &h_generic_calc_QSub16Sx2; break;
1455          case Iop_Add8x4:
1456             fn = &h_generic_calc_Add8x4; break;
1457          case Iop_Sub8x4:
1458             fn = &h_generic_calc_Sub8x4; break;
1459          case Iop_HAdd8Ux4:
1460             fn = &h_generic_calc_HAdd8Ux4; break;
1461          case Iop_HAdd8Sx4:
1462             fn = &h_generic_calc_HAdd8Sx4; break;
1463          case Iop_HSub8Ux4:
1464             fn = &h_generic_calc_HSub8Ux4; break;
1465          case Iop_HSub8Sx4:
1466             fn = &h_generic_calc_HSub8Sx4; break;
1467          case Iop_QAdd8Sx4:
1468             fn = &h_generic_calc_QAdd8Sx4; break;
1469          case Iop_QAdd8Ux4:
1470             fn = &h_generic_calc_QAdd8Ux4; break;
1471          case Iop_QSub8Sx4:
1472             fn = &h_generic_calc_QSub8Sx4; break;
1473          case Iop_QSub8Ux4:
1474             fn = &h_generic_calc_QSub8Ux4; break;
1475          case Iop_Sad8Ux4:
1476             fn = &h_generic_calc_Sad8Ux4; break;
1477          case Iop_QAdd32S:
1478             fn = &h_generic_calc_QAdd32S; break;
1479          case Iop_QSub32S:
1480             fn = &h_generic_calc_QSub32S; break;
1481          case Iop_QSub16Ux2:
1482             fn = &h_generic_calc_QSub16Ux2; break;
1483          case Iop_DivU32:
1484             fn = &h_calc_udiv32_w_arm_semantics; break;
1485          case Iop_DivS32:
1486             fn = &h_calc_sdiv32_w_arm_semantics; break;
1487          default:
1488             break;
1489       }
1490 
1491       if (fn) {
1492          HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1493          HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1494          HReg res  = newVRegI(env);
1495          addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
1496          addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
1497          addInstr(env, ARMInstr_Call( ARMcc_AL, (Addr)fn,
1498                                       2, mk_RetLoc_simple(RLPri_Int) ));
1499          addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1500          return res;
1501       }
1502 
1503       break;
1504    }
1505 
1506    /* --------- UNARY OP --------- */
1507    case Iex_Unop: {
1508 
1509 //zz      /* 1Uto8(32to1(expr32)) */
1510 //zz      if (e->Iex.Unop.op == Iop_1Uto8) {
1511 //zz         DECLARE_PATTERN(p_32to1_then_1Uto8);
1512 //zz         DEFINE_PATTERN(p_32to1_then_1Uto8,
1513 //zz                        unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1514 //zz         if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1515 //zz            IRExpr* expr32 = mi.bindee[0];
1516 //zz            HReg dst = newVRegI(env);
1517 //zz            HReg src = iselIntExpr_R(env, expr32);
1518 //zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
1519 //zz            addInstr(env, X86Instr_Alu32R(Xalu_AND,
1520 //zz                                          X86RMI_Imm(1), dst));
1521 //zz            return dst;
1522 //zz         }
1523 //zz      }
1524 //zz
1525 //zz      /* 8Uto32(LDle(expr32)) */
1526 //zz      if (e->Iex.Unop.op == Iop_8Uto32) {
1527 //zz         DECLARE_PATTERN(p_LDle8_then_8Uto32);
1528 //zz         DEFINE_PATTERN(p_LDle8_then_8Uto32,
1529 //zz                        unop(Iop_8Uto32,
1530 //zz                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1531 //zz         if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1532 //zz            HReg dst = newVRegI(env);
1533 //zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1534 //zz            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1535 //zz            return dst;
1536 //zz         }
1537 //zz      }
1538 //zz
1539 //zz      /* 8Sto32(LDle(expr32)) */
1540 //zz      if (e->Iex.Unop.op == Iop_8Sto32) {
1541 //zz         DECLARE_PATTERN(p_LDle8_then_8Sto32);
1542 //zz         DEFINE_PATTERN(p_LDle8_then_8Sto32,
1543 //zz                        unop(Iop_8Sto32,
1544 //zz                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1545 //zz         if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1546 //zz            HReg dst = newVRegI(env);
1547 //zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1548 //zz            addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1549 //zz            return dst;
1550 //zz         }
1551 //zz      }
1552 //zz
1553 //zz      /* 16Uto32(LDle(expr32)) */
1554 //zz      if (e->Iex.Unop.op == Iop_16Uto32) {
1555 //zz         DECLARE_PATTERN(p_LDle16_then_16Uto32);
1556 //zz         DEFINE_PATTERN(p_LDle16_then_16Uto32,
1557 //zz                        unop(Iop_16Uto32,
1558 //zz                             IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1559 //zz         if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1560 //zz            HReg dst = newVRegI(env);
1561 //zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1562 //zz            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1563 //zz            return dst;
1564 //zz         }
1565 //zz      }
1566 //zz
1567 //zz      /* 8Uto32(GET:I8) */
1568 //zz      if (e->Iex.Unop.op == Iop_8Uto32) {
1569 //zz         if (e->Iex.Unop.arg->tag == Iex_Get) {
1570 //zz            HReg      dst;
1571 //zz            X86AMode* amode;
1572 //zz            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1573 //zz            dst = newVRegI(env);
1574 //zz            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1575 //zz                                hregX86_EBP());
1576 //zz            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1577 //zz            return dst;
1578 //zz         }
1579 //zz      }
1580 //zz
1581 //zz      /* 16to32(GET:I16) */
1582 //zz      if (e->Iex.Unop.op == Iop_16Uto32) {
1583 //zz         if (e->Iex.Unop.arg->tag == Iex_Get) {
1584 //zz            HReg      dst;
1585 //zz            X86AMode* amode;
1586 //zz            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1587 //zz            dst = newVRegI(env);
1588 //zz            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1589 //zz                                hregX86_EBP());
1590 //zz            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1591 //zz            return dst;
1592 //zz         }
1593 //zz      }
1594 
1595       switch (e->Iex.Unop.op) {
1596          case Iop_8Uto32: {
1597             HReg dst = newVRegI(env);
1598             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1599             addInstr(env, ARMInstr_Alu(ARMalu_AND,
1600                                        dst, src, ARMRI84_I84(0xFF,0)));
1601             return dst;
1602          }
1603 //zz         case Iop_8Uto16:
1604 //zz         case Iop_8Uto32:
1605 //zz         case Iop_16Uto32: {
1606 //zz            HReg dst = newVRegI(env);
1607 //zz            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1608 //zz            UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1609 //zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
1610 //zz            addInstr(env, X86Instr_Alu32R(Xalu_AND,
1611 //zz                                          X86RMI_Imm(mask), dst));
1612 //zz            return dst;
1613 //zz         }
1614 //zz         case Iop_8Sto16:
1615 //zz         case Iop_8Sto32:
1616          case Iop_16Uto32: {
1617             HReg dst = newVRegI(env);
1618             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1619             ARMRI5* amt = ARMRI5_I5(16);
1620             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1621             addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
1622             return dst;
1623          }
1624          case Iop_8Sto32:
1625          case Iop_16Sto32: {
1626             HReg dst = newVRegI(env);
1627             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1628             ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
1629             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1630             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1631             return dst;
1632          }
1633 //zz         case Iop_Not8:
1634 //zz         case Iop_Not16:
1635          case Iop_Not32: {
1636             HReg dst = newVRegI(env);
1637             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1638             addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
1639             return dst;
1640          }
1641          case Iop_64HIto32: {
1642             HReg rHi, rLo;
1643             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1644             return rHi; /* and abandon rLo .. poor wee thing :-) */
1645          }
1646          case Iop_64to32: {
1647             HReg rHi, rLo;
1648             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1649             return rLo; /* similar stupid comment to the above ... */
1650          }
1651          case Iop_64to8: {
1652             HReg rHi, rLo;
1653             if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1654                HReg tHi = newVRegI(env);
1655                HReg tLo = newVRegI(env);
1656                HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
1657                addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1658                rHi = tHi;
1659                rLo = tLo;
1660             } else {
1661                iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1662             }
1663             return rLo;
1664          }
1665 
1666          case Iop_1Uto32:
1667             /* 1Uto32(tmp).  Since I1 values generated into registers
1668                are guaranteed to have value either only zero or one,
1669                we can simply return the value of the register in this
1670                case. */
1671             if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
1672                HReg dst = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
1673                return dst;
1674             }
1675             /* else fall through */
1676          case Iop_1Uto8: {
1677             HReg        dst  = newVRegI(env);
1678             ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1679             addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1680             addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1681             return dst;
1682          }
1683 
1684          case Iop_1Sto32: {
1685             HReg        dst  = newVRegI(env);
1686             ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1687             ARMRI5*     amt  = ARMRI5_I5(31);
1688             /* This is really rough.  We could do much better here;
1689                perhaps mvn{cond} dst, #0 as the second insn?
1690                (same applies to 1Sto64) */
1691             addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1692             addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1693             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1694             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1695             return dst;
1696          }
1697 
1698 
1699 //zz         case Iop_1Sto8:
1700 //zz         case Iop_1Sto16:
1701 //zz         case Iop_1Sto32: {
1702 //zz            /* could do better than this, but for now ... */
1703 //zz            HReg dst         = newVRegI(env);
1704 //zz            X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1705 //zz            addInstr(env, X86Instr_Set32(cond,dst));
1706 //zz            addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1707 //zz            addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1708 //zz            return dst;
1709 //zz         }
1710 //zz         case Iop_Ctz32: {
1711 //zz            /* Count trailing zeroes, implemented by x86 'bsfl' */
1712 //zz            HReg dst = newVRegI(env);
1713 //zz            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1714 //zz            addInstr(env, X86Instr_Bsfr32(True,src,dst));
1715 //zz            return dst;
1716 //zz         }
1717          case Iop_Clz32: {
1718             /* Count leading zeroes; easy on ARM. */
1719             HReg dst = newVRegI(env);
1720             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1721             addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
1722             return dst;
1723          }
1724 
1725          case Iop_CmpwNEZ32: {
1726             HReg dst = newVRegI(env);
1727             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1728             addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1729             addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1730             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
1731             return dst;
1732          }
1733 
1734          case Iop_Left32: {
1735             HReg dst = newVRegI(env);
1736             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1737             addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1738             addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1739             return dst;
1740          }
1741 
1742 //zz         case Iop_V128to32: {
1743 //zz            HReg      dst  = newVRegI(env);
1744 //zz            HReg      vec  = iselVecExpr(env, e->Iex.Unop.arg);
1745 //zz            X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1746 //zz            sub_from_esp(env, 16);
1747 //zz            addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1748 //zz            addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1749 //zz            add_to_esp(env, 16);
1750 //zz            return dst;
1751 //zz         }
1752 //zz
1753          case Iop_ReinterpF32asI32: {
1754             HReg dst = newVRegI(env);
1755             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1756             addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
1757             return dst;
1758          }
1759 
1760 //zz
1761 //zz         case Iop_16to8:
1762          case Iop_32to8:
1763          case Iop_32to16:
1764             /* These are no-ops. */
1765             return iselIntExpr_R(env, e->Iex.Unop.arg);
1766 
1767          default:
1768             break;
1769       }
1770 
1771       /* All Unop cases involving host-side helper calls. */
1772       void* fn = NULL;
1773       switch (e->Iex.Unop.op) {
1774          case Iop_CmpNEZ16x2:
1775             fn = &h_generic_calc_CmpNEZ16x2; break;
1776          case Iop_CmpNEZ8x4:
1777             fn = &h_generic_calc_CmpNEZ8x4; break;
1778          default:
1779             break;
1780       }
1781 
1782       if (fn) {
1783          HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1784          HReg res = newVRegI(env);
1785          addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
1786          addInstr(env, ARMInstr_Call( ARMcc_AL, (Addr)fn,
1787                                       1, mk_RetLoc_simple(RLPri_Int) ));
1788          addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1789          return res;
1790       }
1791 
1792       break;
1793    }
1794 
1795    /* --------- GET --------- */
1796    case Iex_Get: {
1797       if (ty == Ity_I32
1798           && 0 == (e->Iex.Get.offset & 3)
1799           && e->Iex.Get.offset < 4096-4) {
1800          HReg dst = newVRegI(env);
1801          addInstr(env, ARMInstr_LdSt32(
1802                           ARMcc_AL, True/*isLoad*/,
1803                           dst,
1804                           ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
1805          return dst;
1806       }
1807 //zz      if (ty == Ity_I8 || ty == Ity_I16) {
1808 //zz         HReg dst = newVRegI(env);
1809 //zz         addInstr(env, X86Instr_LoadEX(
1810 //zz                          toUChar(ty==Ity_I8 ? 1 : 2),
1811 //zz                          False,
1812 //zz                          X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1813 //zz                          dst));
1814 //zz         return dst;
1815 //zz      }
1816       break;
1817    }
1818 
1819 //zz   case Iex_GetI: {
1820 //zz      X86AMode* am
1821 //zz         = genGuestArrayOffset(
1822 //zz              env, e->Iex.GetI.descr,
1823 //zz                   e->Iex.GetI.ix, e->Iex.GetI.bias );
1824 //zz      HReg dst = newVRegI(env);
1825 //zz      if (ty == Ity_I8) {
1826 //zz         addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1827 //zz         return dst;
1828 //zz      }
1829 //zz      if (ty == Ity_I32) {
1830 //zz         addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1831 //zz         return dst;
1832 //zz      }
1833 //zz      break;
1834 //zz   }
1835 
1836    /* --------- CCALL --------- */
1837    case Iex_CCall: {
1838       HReg    dst = newVRegI(env);
1839       vassert(ty == e->Iex.CCall.retty);
1840 
1841       /* be very restrictive for now.  Only 32/64-bit ints allowed for
1842          args, and 32 bits for return type.  Don't forget to change
1843          the RetLoc if more types are allowed in future. */
1844       if (e->Iex.CCall.retty != Ity_I32)
1845          goto irreducible;
1846 
1847       /* Marshal args, do the call, clear stack. */
1848       UInt   addToSp = 0;
1849       RetLoc rloc    = mk_RetLoc_INVALID();
1850       Bool   ok      = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1851                                      e->Iex.CCall.cee, e->Iex.CCall.retty,
1852                                      e->Iex.CCall.args );
1853       /* */
1854       if (ok) {
1855          vassert(is_sane_RetLoc(rloc));
1856          vassert(rloc.pri == RLPri_Int);
1857          vassert(addToSp == 0);
1858          addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1859          return dst;
1860       }
1861       /* else fall through; will hit the irreducible: label */
1862    }
1863 
1864    /* --------- LITERAL --------- */
1865    /* 32 literals */
1866    case Iex_Const: {
1867       UInt u   = 0;
1868       HReg dst = newVRegI(env);
1869       switch (e->Iex.Const.con->tag) {
1870          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1871          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1872          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1873          default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
1874       }
1875       addInstr(env, ARMInstr_Imm32(dst, u));
1876       return dst;
1877    }
1878 
1879    /* --------- MULTIPLEX --------- */
1880    case Iex_ITE: { // VFD
1881       /* ITE(ccexpr, iftrue, iffalse) */
1882       if (ty == Ity_I32) {
1883          ARMCondCode cc;
1884          HReg     r1  = iselIntExpr_R(env, e->Iex.ITE.iftrue);
1885          ARMRI84* r0  = iselIntExpr_RI84(NULL, False, env, e->Iex.ITE.iffalse);
1886          HReg     dst = newVRegI(env);
1887          addInstr(env, mk_iMOVds_RR(dst, r1));
1888          cc = iselCondCode(env, e->Iex.ITE.cond);
1889          addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
1890          return dst;
1891       }
1892       break;
1893    }
1894 
1895    default:
1896    break;
1897    } /* switch (e->tag) */
1898 
1899    /* We get here if no pattern matched. */
1900   irreducible:
1901    ppIRExpr(e);
1902    vpanic("iselIntExpr_R: cannot reduce tree");
1903 }
1904 
1905 
1906 /* -------------------- 64-bit -------------------- */
1907 
1908 /* Compute a 64-bit value into a register pair, which is returned as
1909    the first two parameters.  As with iselIntExpr_R, these may be
1910    either real or virtual regs; in any case they must not be changed
1911    by subsequent code emitted by the caller.  */
1912 
iselInt64Expr(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)1913 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1914 {
1915    iselInt64Expr_wrk(rHi, rLo, env, e);
1916 #  if 0
1917    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1918 #  endif
1919    vassert(hregClass(*rHi) == HRcInt32);
1920    vassert(hregIsVirtual(*rHi));
1921    vassert(hregClass(*rLo) == HRcInt32);
1922    vassert(hregIsVirtual(*rLo));
1923 }
1924 
1925 /* DO NOT CALL THIS DIRECTLY ! */
iselInt64Expr_wrk(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)1926 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1927 {
1928    vassert(e);
1929    vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
1930 
1931    /* 64-bit literal */
1932    if (e->tag == Iex_Const) {
1933       ULong   w64 = e->Iex.Const.con->Ico.U64;
1934       UInt    wHi = toUInt(w64 >> 32);
1935       UInt    wLo = toUInt(w64);
1936       HReg    tHi = newVRegI(env);
1937       HReg    tLo = newVRegI(env);
1938       vassert(e->Iex.Const.con->tag == Ico_U64);
1939       addInstr(env, ARMInstr_Imm32(tHi, wHi));
1940       addInstr(env, ARMInstr_Imm32(tLo, wLo));
1941       *rHi = tHi;
1942       *rLo = tLo;
1943       return;
1944    }
1945 
1946    /* read 64-bit IRTemp */
1947    if (e->tag == Iex_RdTmp) {
1948       if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1949          HReg tHi = newVRegI(env);
1950          HReg tLo = newVRegI(env);
1951          HReg tmp = iselNeon64Expr(env, e);
1952          addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1953          *rHi = tHi;
1954          *rLo = tLo;
1955       } else {
1956          lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
1957       }
1958       return;
1959    }
1960 
1961    /* 64-bit load */
1962    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
1963       HReg      tLo, tHi, rA;
1964       vassert(e->Iex.Load.ty == Ity_I64);
1965       rA  = iselIntExpr_R(env, e->Iex.Load.addr);
1966       tHi = newVRegI(env);
1967       tLo = newVRegI(env);
1968       addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
1969                                     tHi, ARMAMode1_RI(rA, 4)));
1970       addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
1971                                     tLo, ARMAMode1_RI(rA, 0)));
1972       *rHi = tHi;
1973       *rLo = tLo;
1974       return;
1975    }
1976 
1977    /* 64-bit GET */
1978    if (e->tag == Iex_Get) {
1979       ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
1980       ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
1981       HReg tHi = newVRegI(env);
1982       HReg tLo = newVRegI(env);
1983       addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tHi, am4));
1984       addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tLo, am0));
1985       *rHi = tHi;
1986       *rLo = tLo;
1987       return;
1988    }
1989 
1990    /* --------- BINARY ops --------- */
1991    if (e->tag == Iex_Binop) {
1992       switch (e->Iex.Binop.op) {
1993 
1994          /* 32 x 32 -> 64 multiply */
1995          case Iop_MullS32:
1996          case Iop_MullU32: {
1997             HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1998             HReg     argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1999             HReg     tHi  = newVRegI(env);
2000             HReg     tLo  = newVRegI(env);
2001             ARMMulOp mop  = e->Iex.Binop.op == Iop_MullS32
2002                                ? ARMmul_SX : ARMmul_ZX;
2003             addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
2004             addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
2005             addInstr(env, ARMInstr_Mul(mop));
2006             addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
2007             addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
2008             *rHi = tHi;
2009             *rLo = tLo;
2010             return;
2011          }
2012 
2013          case Iop_Or64: {
2014             HReg xLo, xHi, yLo, yHi;
2015             HReg tHi = newVRegI(env);
2016             HReg tLo = newVRegI(env);
2017             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2018             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2019             addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
2020             addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
2021             *rHi = tHi;
2022             *rLo = tLo;
2023             return;
2024          }
2025 
2026          case Iop_Add64: {
2027             HReg xLo, xHi, yLo, yHi;
2028             HReg tHi = newVRegI(env);
2029             HReg tLo = newVRegI(env);
2030             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2031             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2032             addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
2033             addInstr(env, ARMInstr_Alu(ARMalu_ADC,  tHi, xHi, ARMRI84_R(yHi)));
2034             *rHi = tHi;
2035             *rLo = tLo;
2036             return;
2037          }
2038 
2039          /* 32HLto64(e1,e2) */
2040          case Iop_32HLto64: {
2041             *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2042             *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2043             return;
2044          }
2045 
2046          default:
2047             break;
2048       }
2049    }
2050 
2051    /* --------- UNARY ops --------- */
2052    if (e->tag == Iex_Unop) {
2053       switch (e->Iex.Unop.op) {
2054 
2055          /* ReinterpF64asI64 */
2056          case Iop_ReinterpF64asI64: {
2057             HReg dstHi = newVRegI(env);
2058             HReg dstLo = newVRegI(env);
2059             HReg src   = iselDblExpr(env, e->Iex.Unop.arg);
2060             addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
2061             *rHi = dstHi;
2062             *rLo = dstLo;
2063             return;
2064          }
2065 
2066          /* Left64(e) */
2067          case Iop_Left64: {
2068             HReg yLo, yHi;
2069             HReg tHi  = newVRegI(env);
2070             HReg tLo  = newVRegI(env);
2071             HReg zero = newVRegI(env);
2072             /* yHi:yLo = arg */
2073             iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2074             /* zero = 0 */
2075             addInstr(env, ARMInstr_Imm32(zero, 0));
2076             /* tLo = 0 - yLo, and set carry */
2077             addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
2078                                        tLo, zero, ARMRI84_R(yLo)));
2079             /* tHi = 0 - yHi - carry */
2080             addInstr(env, ARMInstr_Alu(ARMalu_SBC,
2081                                        tHi, zero, ARMRI84_R(yHi)));
2082             /* So now we have tHi:tLo = -arg.  To finish off, or 'arg'
2083                back in, so as to give the final result
2084                tHi:tLo = arg | -arg. */
2085             addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
2086             addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
2087             *rHi = tHi;
2088             *rLo = tLo;
2089             return;
2090          }
2091 
2092          /* CmpwNEZ64(e) */
2093          case Iop_CmpwNEZ64: {
2094             HReg srcLo, srcHi;
2095             HReg tmp1 = newVRegI(env);
2096             HReg tmp2 = newVRegI(env);
2097             /* srcHi:srcLo = arg */
2098             iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2099             /* tmp1 = srcHi | srcLo */
2100             addInstr(env, ARMInstr_Alu(ARMalu_OR,
2101                                        tmp1, srcHi, ARMRI84_R(srcLo)));
2102             /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2103             addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
2104             addInstr(env, ARMInstr_Alu(ARMalu_OR,
2105                                        tmp2, tmp2, ARMRI84_R(tmp1)));
2106             addInstr(env, ARMInstr_Shift(ARMsh_SAR,
2107                                          tmp2, tmp2, ARMRI5_I5(31)));
2108             *rHi = tmp2;
2109             *rLo = tmp2;
2110             return;
2111          }
2112 
2113          case Iop_1Sto64: {
2114             HReg        dst  = newVRegI(env);
2115             ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2116             ARMRI5*     amt  = ARMRI5_I5(31);
2117             /* This is really rough.  We could do much better here;
2118                perhaps mvn{cond} dst, #0 as the second insn?
2119                (same applies to 1Sto32) */
2120             addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2121             addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2122             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2123             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2124             *rHi = dst;
2125             *rLo = dst;
2126             return;
2127          }
2128 
2129          default:
2130             break;
2131       }
2132    } /* if (e->tag == Iex_Unop) */
2133 
2134    /* --------- MULTIPLEX --------- */
2135    if (e->tag == Iex_ITE) { // VFD
2136       IRType tyC;
2137       HReg   r1hi, r1lo, r0hi, r0lo, dstHi, dstLo;
2138       ARMCondCode cc;
2139       tyC = typeOfIRExpr(env->type_env,e->Iex.ITE.cond);
2140       vassert(tyC == Ity_I1);
2141       iselInt64Expr(&r1hi, &r1lo, env, e->Iex.ITE.iftrue);
2142       iselInt64Expr(&r0hi, &r0lo, env, e->Iex.ITE.iffalse);
2143       dstHi = newVRegI(env);
2144       dstLo = newVRegI(env);
2145       addInstr(env, mk_iMOVds_RR(dstHi, r1hi));
2146       addInstr(env, mk_iMOVds_RR(dstLo, r1lo));
2147       cc = iselCondCode(env, e->Iex.ITE.cond);
2148       addInstr(env, ARMInstr_CMov(cc ^ 1, dstHi, ARMRI84_R(r0hi)));
2149       addInstr(env, ARMInstr_CMov(cc ^ 1, dstLo, ARMRI84_R(r0lo)));
2150       *rHi = dstHi;
2151       *rLo = dstLo;
2152       return;
2153    }
2154 
2155    /* It is convenient sometimes to call iselInt64Expr even when we
2156       have NEON support (e.g. in do_helper_call we need 64-bit
2157       arguments as 2 x 32 regs). */
2158    if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2159       HReg tHi = newVRegI(env);
2160       HReg tLo = newVRegI(env);
2161       HReg tmp = iselNeon64Expr(env, e);
2162       addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2163       *rHi = tHi;
2164       *rLo = tLo;
2165       return ;
2166    }
2167 
2168    ppIRExpr(e);
2169    vpanic("iselInt64Expr");
2170 }
2171 
2172 
2173 /*---------------------------------------------------------*/
2174 /*--- ISEL: Vector (NEON) expressions (64 or 128 bit)   ---*/
2175 /*---------------------------------------------------------*/
2176 
iselNeon64Expr(ISelEnv * env,IRExpr * e)2177 static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
2178 {
2179    HReg r;
2180    vassert(env->hwcaps & VEX_HWCAPS_ARM_NEON);
2181    r = iselNeon64Expr_wrk( env, e );
2182    vassert(hregClass(r) == HRcFlt64);
2183    vassert(hregIsVirtual(r));
2184    return r;
2185 }
2186 
2187 /* DO NOT CALL THIS DIRECTLY */
iselNeon64Expr_wrk(ISelEnv * env,IRExpr * e)2188 static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
2189 {
2190    IRType ty = typeOfIRExpr(env->type_env, e);
2191    MatchInfo mi;
2192    vassert(e);
2193    vassert(ty == Ity_I64);
2194 
2195    if (e->tag == Iex_RdTmp) {
2196       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2197    }
2198 
2199    if (e->tag == Iex_Const) {
2200       HReg rLo, rHi;
2201       HReg res = newVRegD(env);
2202       iselInt64Expr(&rHi, &rLo, env, e);
2203       addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2204       return res;
2205    }
2206 
2207    /* 64-bit load */
2208    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2209       HReg res = newVRegD(env);
2210       ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2211       vassert(ty == Ity_I64);
2212       addInstr(env, ARMInstr_NLdStD(True, res, am));
2213       return res;
2214    }
2215 
2216    /* 64-bit GET */
2217    if (e->tag == Iex_Get) {
2218       HReg addr = newVRegI(env);
2219       HReg res = newVRegD(env);
2220       vassert(ty == Ity_I64);
2221       addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2222       addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2223       return res;
2224    }
2225 
2226    /* --------- BINARY ops --------- */
2227    if (e->tag == Iex_Binop) {
2228       switch (e->Iex.Binop.op) {
2229 
2230          /* 32 x 32 -> 64 multiply */
2231          case Iop_MullS32:
2232          case Iop_MullU32: {
2233             HReg rLo, rHi;
2234             HReg res = newVRegD(env);
2235             iselInt64Expr(&rHi, &rLo, env, e);
2236             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2237             return res;
2238          }
2239 
2240          case Iop_And64: {
2241             HReg res = newVRegD(env);
2242             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2243             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2244             addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2245                                            res, argL, argR, 4, False));
2246             return res;
2247          }
2248          case Iop_Or64: {
2249             HReg res = newVRegD(env);
2250             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2251             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2252             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2253                                            res, argL, argR, 4, False));
2254             return res;
2255          }
2256          case Iop_Xor64: {
2257             HReg res = newVRegD(env);
2258             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2259             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2260             addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2261                                            res, argL, argR, 4, False));
2262             return res;
2263          }
2264 
2265          /* 32HLto64(e1,e2) */
2266          case Iop_32HLto64: {
2267             HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2268             HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2269             HReg res = newVRegD(env);
2270             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2271             return res;
2272          }
2273 
2274          case Iop_Add8x8:
2275          case Iop_Add16x4:
2276          case Iop_Add32x2:
2277          case Iop_Add64: {
2278             HReg res = newVRegD(env);
2279             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2280             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2281             UInt size;
2282             switch (e->Iex.Binop.op) {
2283                case Iop_Add8x8: size = 0; break;
2284                case Iop_Add16x4: size = 1; break;
2285                case Iop_Add32x2: size = 2; break;
2286                case Iop_Add64: size = 3; break;
2287                default: vassert(0);
2288             }
2289             addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2290                                            res, argL, argR, size, False));
2291             return res;
2292          }
2293          case Iop_Add32Fx2: {
2294             HReg res = newVRegD(env);
2295             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2296             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2297             UInt size = 0;
2298             addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2299                                            res, argL, argR, size, False));
2300             return res;
2301          }
2302          case Iop_RecipStep32Fx2: {
2303             HReg res = newVRegD(env);
2304             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2305             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2306             UInt size = 0;
2307             addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2308                                            res, argL, argR, size, False));
2309             return res;
2310          }
2311          case Iop_RSqrtStep32Fx2: {
2312             HReg res = newVRegD(env);
2313             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2314             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2315             UInt size = 0;
2316             addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2317                                            res, argL, argR, size, False));
2318             return res;
2319          }
2320 
2321          // These 6 verified 18 Apr 2013
2322          case Iop_InterleaveHI32x2:
2323          case Iop_InterleaveLO32x2:
2324          case Iop_InterleaveOddLanes8x8:
2325          case Iop_InterleaveEvenLanes8x8:
2326          case Iop_InterleaveOddLanes16x4:
2327          case Iop_InterleaveEvenLanes16x4: {
2328             HReg rD   = newVRegD(env);
2329             HReg rM   = newVRegD(env);
2330             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2331             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2332             UInt size;
2333             Bool resRd;  // is the result in rD or rM ?
2334             switch (e->Iex.Binop.op) {
2335                case Iop_InterleaveOddLanes8x8:   resRd = False; size = 0; break;
2336                case Iop_InterleaveEvenLanes8x8:  resRd = True;  size = 0; break;
2337                case Iop_InterleaveOddLanes16x4:  resRd = False; size = 1; break;
2338                case Iop_InterleaveEvenLanes16x4: resRd = True;  size = 1; break;
2339                case Iop_InterleaveHI32x2:        resRd = False; size = 2; break;
2340                case Iop_InterleaveLO32x2:        resRd = True;  size = 2; break;
2341                default: vassert(0);
2342             }
2343             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2344             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2345             addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, False));
2346             return resRd ? rD : rM;
2347          }
2348 
2349          // These 4 verified 18 Apr 2013
2350          case Iop_InterleaveHI8x8:
2351          case Iop_InterleaveLO8x8:
2352          case Iop_InterleaveHI16x4:
2353          case Iop_InterleaveLO16x4: {
2354             HReg rD   = newVRegD(env);
2355             HReg rM   = newVRegD(env);
2356             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2357             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2358             UInt size;
2359             Bool resRd;  // is the result in rD or rM ?
2360             switch (e->Iex.Binop.op) {
2361                case Iop_InterleaveHI8x8:  resRd = False; size = 0; break;
2362                case Iop_InterleaveLO8x8:  resRd = True;  size = 0; break;
2363                case Iop_InterleaveHI16x4: resRd = False; size = 1; break;
2364                case Iop_InterleaveLO16x4: resRd = True;  size = 1; break;
2365                default: vassert(0);
2366             }
2367             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2368             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2369             addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, False));
2370             return resRd ? rD : rM;
2371          }
2372 
2373          // These 4 verified 18 Apr 2013
2374          case Iop_CatOddLanes8x8:
2375          case Iop_CatEvenLanes8x8:
2376          case Iop_CatOddLanes16x4:
2377          case Iop_CatEvenLanes16x4: {
2378             HReg rD   = newVRegD(env);
2379             HReg rM   = newVRegD(env);
2380             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2381             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2382             UInt size;
2383             Bool resRd;  // is the result in rD or rM ?
2384             switch (e->Iex.Binop.op) {
2385                case Iop_CatOddLanes8x8:   resRd = False; size = 0; break;
2386                case Iop_CatEvenLanes8x8:  resRd = True;  size = 0; break;
2387                case Iop_CatOddLanes16x4:  resRd = False; size = 1; break;
2388                case Iop_CatEvenLanes16x4: resRd = True;  size = 1; break;
2389                default: vassert(0);
2390             }
2391             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2392             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2393             addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, False));
2394             return resRd ? rD : rM;
2395          }
2396 
2397          case Iop_QAdd8Ux8:
2398          case Iop_QAdd16Ux4:
2399          case Iop_QAdd32Ux2:
2400          case Iop_QAdd64Ux1: {
2401             HReg res = newVRegD(env);
2402             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2403             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2404             UInt size;
2405             switch (e->Iex.Binop.op) {
2406                case Iop_QAdd8Ux8: size = 0; break;
2407                case Iop_QAdd16Ux4: size = 1; break;
2408                case Iop_QAdd32Ux2: size = 2; break;
2409                case Iop_QAdd64Ux1: size = 3; break;
2410                default: vassert(0);
2411             }
2412             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2413                                            res, argL, argR, size, False));
2414             return res;
2415          }
2416          case Iop_QAdd8Sx8:
2417          case Iop_QAdd16Sx4:
2418          case Iop_QAdd32Sx2:
2419          case Iop_QAdd64Sx1: {
2420             HReg res = newVRegD(env);
2421             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2422             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2423             UInt size;
2424             switch (e->Iex.Binop.op) {
2425                case Iop_QAdd8Sx8: size = 0; break;
2426                case Iop_QAdd16Sx4: size = 1; break;
2427                case Iop_QAdd32Sx2: size = 2; break;
2428                case Iop_QAdd64Sx1: size = 3; break;
2429                default: vassert(0);
2430             }
2431             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2432                                            res, argL, argR, size, False));
2433             return res;
2434          }
2435          case Iop_Sub8x8:
2436          case Iop_Sub16x4:
2437          case Iop_Sub32x2:
2438          case Iop_Sub64: {
2439             HReg res = newVRegD(env);
2440             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2441             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2442             UInt size;
2443             switch (e->Iex.Binop.op) {
2444                case Iop_Sub8x8: size = 0; break;
2445                case Iop_Sub16x4: size = 1; break;
2446                case Iop_Sub32x2: size = 2; break;
2447                case Iop_Sub64: size = 3; break;
2448                default: vassert(0);
2449             }
2450             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2451                                            res, argL, argR, size, False));
2452             return res;
2453          }
2454          case Iop_Sub32Fx2: {
2455             HReg res = newVRegD(env);
2456             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2457             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2458             UInt size = 0;
2459             addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2460                                            res, argL, argR, size, False));
2461             return res;
2462          }
2463          case Iop_QSub8Ux8:
2464          case Iop_QSub16Ux4:
2465          case Iop_QSub32Ux2:
2466          case Iop_QSub64Ux1: {
2467             HReg res = newVRegD(env);
2468             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2469             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2470             UInt size;
2471             switch (e->Iex.Binop.op) {
2472                case Iop_QSub8Ux8: size = 0; break;
2473                case Iop_QSub16Ux4: size = 1; break;
2474                case Iop_QSub32Ux2: size = 2; break;
2475                case Iop_QSub64Ux1: size = 3; break;
2476                default: vassert(0);
2477             }
2478             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
2479                                            res, argL, argR, size, False));
2480             return res;
2481          }
2482          case Iop_QSub8Sx8:
2483          case Iop_QSub16Sx4:
2484          case Iop_QSub32Sx2:
2485          case Iop_QSub64Sx1: {
2486             HReg res = newVRegD(env);
2487             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2488             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2489             UInt size;
2490             switch (e->Iex.Binop.op) {
2491                case Iop_QSub8Sx8: size = 0; break;
2492                case Iop_QSub16Sx4: size = 1; break;
2493                case Iop_QSub32Sx2: size = 2; break;
2494                case Iop_QSub64Sx1: size = 3; break;
2495                default: vassert(0);
2496             }
2497             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
2498                                            res, argL, argR, size, False));
2499             return res;
2500          }
2501          case Iop_Max8Ux8:
2502          case Iop_Max16Ux4:
2503          case Iop_Max32Ux2: {
2504             HReg res = newVRegD(env);
2505             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2506             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2507             UInt size;
2508             switch (e->Iex.Binop.op) {
2509                case Iop_Max8Ux8: size = 0; break;
2510                case Iop_Max16Ux4: size = 1; break;
2511                case Iop_Max32Ux2: size = 2; break;
2512                default: vassert(0);
2513             }
2514             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
2515                                            res, argL, argR, size, False));
2516             return res;
2517          }
2518          case Iop_Max8Sx8:
2519          case Iop_Max16Sx4:
2520          case Iop_Max32Sx2: {
2521             HReg res = newVRegD(env);
2522             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2523             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2524             UInt size;
2525             switch (e->Iex.Binop.op) {
2526                case Iop_Max8Sx8: size = 0; break;
2527                case Iop_Max16Sx4: size = 1; break;
2528                case Iop_Max32Sx2: size = 2; break;
2529                default: vassert(0);
2530             }
2531             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
2532                                            res, argL, argR, size, False));
2533             return res;
2534          }
2535          case Iop_Min8Ux8:
2536          case Iop_Min16Ux4:
2537          case Iop_Min32Ux2: {
2538             HReg res = newVRegD(env);
2539             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2540             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2541             UInt size;
2542             switch (e->Iex.Binop.op) {
2543                case Iop_Min8Ux8: size = 0; break;
2544                case Iop_Min16Ux4: size = 1; break;
2545                case Iop_Min32Ux2: size = 2; break;
2546                default: vassert(0);
2547             }
2548             addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
2549                                            res, argL, argR, size, False));
2550             return res;
2551          }
2552          case Iop_Min8Sx8:
2553          case Iop_Min16Sx4:
2554          case Iop_Min32Sx2: {
2555             HReg res = newVRegD(env);
2556             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2557             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2558             UInt size;
2559             switch (e->Iex.Binop.op) {
2560                case Iop_Min8Sx8: size = 0; break;
2561                case Iop_Min16Sx4: size = 1; break;
2562                case Iop_Min32Sx2: size = 2; break;
2563                default: vassert(0);
2564             }
2565             addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
2566                                            res, argL, argR, size, False));
2567             return res;
2568          }
2569          case Iop_Sar8x8:
2570          case Iop_Sar16x4:
2571          case Iop_Sar32x2: {
2572             HReg res = newVRegD(env);
2573             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2574             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2575             HReg argR2 = newVRegD(env);
2576             HReg zero = newVRegD(env);
2577             UInt size;
2578             switch (e->Iex.Binop.op) {
2579                case Iop_Sar8x8: size = 0; break;
2580                case Iop_Sar16x4: size = 1; break;
2581                case Iop_Sar32x2: size = 2; break;
2582                case Iop_Sar64: size = 3; break;
2583                default: vassert(0);
2584             }
2585             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2586             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2587                                            argR2, zero, argR, size, False));
2588             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2589                                           res, argL, argR2, size, False));
2590             return res;
2591          }
2592          case Iop_Sal8x8:
2593          case Iop_Sal16x4:
2594          case Iop_Sal32x2:
2595          case Iop_Sal64x1: {
2596             HReg res = newVRegD(env);
2597             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2598             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2599             UInt size;
2600             switch (e->Iex.Binop.op) {
2601                case Iop_Sal8x8: size = 0; break;
2602                case Iop_Sal16x4: size = 1; break;
2603                case Iop_Sal32x2: size = 2; break;
2604                case Iop_Sal64x1: size = 3; break;
2605                default: vassert(0);
2606             }
2607             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2608                                           res, argL, argR, size, False));
2609             return res;
2610          }
2611          case Iop_Shr8x8:
2612          case Iop_Shr16x4:
2613          case Iop_Shr32x2: {
2614             HReg res = newVRegD(env);
2615             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2616             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2617             HReg argR2 = newVRegD(env);
2618             HReg zero = newVRegD(env);
2619             UInt size;
2620             switch (e->Iex.Binop.op) {
2621                case Iop_Shr8x8: size = 0; break;
2622                case Iop_Shr16x4: size = 1; break;
2623                case Iop_Shr32x2: size = 2; break;
2624                default: vassert(0);
2625             }
2626             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2627             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2628                                            argR2, zero, argR, size, False));
2629             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2630                                           res, argL, argR2, size, False));
2631             return res;
2632          }
2633          case Iop_Shl8x8:
2634          case Iop_Shl16x4:
2635          case Iop_Shl32x2: {
2636             HReg res = newVRegD(env);
2637             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2638             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2639             UInt size;
2640             switch (e->Iex.Binop.op) {
2641                case Iop_Shl8x8: size = 0; break;
2642                case Iop_Shl16x4: size = 1; break;
2643                case Iop_Shl32x2: size = 2; break;
2644                default: vassert(0);
2645             }
2646             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2647                                           res, argL, argR, size, False));
2648             return res;
2649          }
2650          case Iop_QShl8x8:
2651          case Iop_QShl16x4:
2652          case Iop_QShl32x2:
2653          case Iop_QShl64x1: {
2654             HReg res = newVRegD(env);
2655             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2656             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2657             UInt size;
2658             switch (e->Iex.Binop.op) {
2659                case Iop_QShl8x8: size = 0; break;
2660                case Iop_QShl16x4: size = 1; break;
2661                case Iop_QShl32x2: size = 2; break;
2662                case Iop_QShl64x1: size = 3; break;
2663                default: vassert(0);
2664             }
2665             addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
2666                                           res, argL, argR, size, False));
2667             return res;
2668          }
2669          case Iop_QSal8x8:
2670          case Iop_QSal16x4:
2671          case Iop_QSal32x2:
2672          case Iop_QSal64x1: {
2673             HReg res = newVRegD(env);
2674             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2675             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2676             UInt size;
2677             switch (e->Iex.Binop.op) {
2678                case Iop_QSal8x8: size = 0; break;
2679                case Iop_QSal16x4: size = 1; break;
2680                case Iop_QSal32x2: size = 2; break;
2681                case Iop_QSal64x1: size = 3; break;
2682                default: vassert(0);
2683             }
2684             addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
2685                                           res, argL, argR, size, False));
2686             return res;
2687          }
2688          case Iop_QShlNsatUU8x8:
2689          case Iop_QShlNsatUU16x4:
2690          case Iop_QShlNsatUU32x2:
2691          case Iop_QShlNsatUU64x1: {
2692             HReg res = newVRegD(env);
2693             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2694             UInt size, imm;
2695             if (e->Iex.Binop.arg2->tag != Iex_Const ||
2696                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2697                vpanic("ARM target supports Iop_QShlNsatUUAxB with constant "
2698                       "second argument only\n");
2699             }
2700             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2701             switch (e->Iex.Binop.op) {
2702                case Iop_QShlNsatUU8x8: size = 8 | imm; break;
2703                case Iop_QShlNsatUU16x4: size = 16 | imm; break;
2704                case Iop_QShlNsatUU32x2: size = 32 | imm; break;
2705                case Iop_QShlNsatUU64x1: size = 64 | imm; break;
2706                default: vassert(0);
2707             }
2708             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
2709                                           res, argL, size, False));
2710             return res;
2711          }
2712          case Iop_QShlNsatSU8x8:
2713          case Iop_QShlNsatSU16x4:
2714          case Iop_QShlNsatSU32x2:
2715          case Iop_QShlNsatSU64x1: {
2716             HReg res = newVRegD(env);
2717             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2718             UInt size, imm;
2719             if (e->Iex.Binop.arg2->tag != Iex_Const ||
2720                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2721                vpanic("ARM target supports Iop_QShlNsatSUAxB with constant "
2722                       "second argument only\n");
2723             }
2724             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2725             switch (e->Iex.Binop.op) {
2726                case Iop_QShlNsatSU8x8: size = 8 | imm; break;
2727                case Iop_QShlNsatSU16x4: size = 16 | imm; break;
2728                case Iop_QShlNsatSU32x2: size = 32 | imm; break;
2729                case Iop_QShlNsatSU64x1: size = 64 | imm; break;
2730                default: vassert(0);
2731             }
2732             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
2733                                           res, argL, size, False));
2734             return res;
2735          }
2736          case Iop_QShlNsatSS8x8:
2737          case Iop_QShlNsatSS16x4:
2738          case Iop_QShlNsatSS32x2:
2739          case Iop_QShlNsatSS64x1: {
2740             HReg res = newVRegD(env);
2741             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2742             UInt size, imm;
2743             if (e->Iex.Binop.arg2->tag != Iex_Const ||
2744                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2745                vpanic("ARM target supports Iop_QShlNsatSSAxB with constant "
2746                       "second argument only\n");
2747             }
2748             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2749             switch (e->Iex.Binop.op) {
2750                case Iop_QShlNsatSS8x8: size = 8 | imm; break;
2751                case Iop_QShlNsatSS16x4: size = 16 | imm; break;
2752                case Iop_QShlNsatSS32x2: size = 32 | imm; break;
2753                case Iop_QShlNsatSS64x1: size = 64 | imm; break;
2754                default: vassert(0);
2755             }
2756             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
2757                                           res, argL, size, False));
2758             return res;
2759          }
2760          case Iop_ShrN8x8:
2761          case Iop_ShrN16x4:
2762          case Iop_ShrN32x2:
2763          case Iop_Shr64: {
2764             HReg res = newVRegD(env);
2765             HReg tmp = newVRegD(env);
2766             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2767             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2768             HReg argR2 = newVRegI(env);
2769             UInt size;
2770             switch (e->Iex.Binop.op) {
2771                case Iop_ShrN8x8: size = 0; break;
2772                case Iop_ShrN16x4: size = 1; break;
2773                case Iop_ShrN32x2: size = 2; break;
2774                case Iop_Shr64: size = 3; break;
2775                default: vassert(0);
2776             }
2777             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2778             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2779             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2780                                           res, argL, tmp, size, False));
2781             return res;
2782          }
2783          case Iop_ShlN8x8:
2784          case Iop_ShlN16x4:
2785          case Iop_ShlN32x2:
2786          case Iop_Shl64: {
2787             HReg res = newVRegD(env);
2788             HReg tmp = newVRegD(env);
2789             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2790             /* special-case Shl64(x, imm8) since the Neon front
2791                end produces a lot of those for V{LD,ST}{1,2,3,4}. */
2792             if (e->Iex.Binop.op == Iop_Shl64
2793                 && e->Iex.Binop.arg2->tag == Iex_Const) {
2794                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
2795                Int nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2796                if (nshift >= 1 && nshift <= 63) {
2797                   addInstr(env, ARMInstr_NShl64(res, argL, nshift));
2798                   return res;
2799                }
2800                /* else fall through to general case */
2801             }
2802             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2803             UInt size;
2804             switch (e->Iex.Binop.op) {
2805                case Iop_ShlN8x8:  size = 0; break;
2806                case Iop_ShlN16x4: size = 1; break;
2807                case Iop_ShlN32x2: size = 2; break;
2808                case Iop_Shl64:    size = 3; break;
2809                default: vassert(0);
2810             }
2811             addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
2812                                           tmp, argR, 0, False));
2813             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2814                                           res, argL, tmp, size, False));
2815             return res;
2816          }
2817          case Iop_SarN8x8:
2818          case Iop_SarN16x4:
2819          case Iop_SarN32x2:
2820          case Iop_Sar64: {
2821             HReg res = newVRegD(env);
2822             HReg tmp = newVRegD(env);
2823             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2824             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2825             HReg argR2 = newVRegI(env);
2826             UInt size;
2827             switch (e->Iex.Binop.op) {
2828                case Iop_SarN8x8: size = 0; break;
2829                case Iop_SarN16x4: size = 1; break;
2830                case Iop_SarN32x2: size = 2; break;
2831                case Iop_Sar64: size = 3; break;
2832                default: vassert(0);
2833             }
2834             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2835             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2836             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2837                                           res, argL, tmp, size, False));
2838             return res;
2839          }
2840          case Iop_CmpGT8Ux8:
2841          case Iop_CmpGT16Ux4:
2842          case Iop_CmpGT32Ux2: {
2843             HReg res = newVRegD(env);
2844             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2845             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2846             UInt size;
2847             switch (e->Iex.Binop.op) {
2848                case Iop_CmpGT8Ux8: size = 0; break;
2849                case Iop_CmpGT16Ux4: size = 1; break;
2850                case Iop_CmpGT32Ux2: size = 2; break;
2851                default: vassert(0);
2852             }
2853             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
2854                                            res, argL, argR, size, False));
2855             return res;
2856          }
2857          case Iop_CmpGT8Sx8:
2858          case Iop_CmpGT16Sx4:
2859          case Iop_CmpGT32Sx2: {
2860             HReg res = newVRegD(env);
2861             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2862             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2863             UInt size;
2864             switch (e->Iex.Binop.op) {
2865                case Iop_CmpGT8Sx8: size = 0; break;
2866                case Iop_CmpGT16Sx4: size = 1; break;
2867                case Iop_CmpGT32Sx2: size = 2; break;
2868                default: vassert(0);
2869             }
2870             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
2871                                            res, argL, argR, size, False));
2872             return res;
2873          }
2874          case Iop_CmpEQ8x8:
2875          case Iop_CmpEQ16x4:
2876          case Iop_CmpEQ32x2: {
2877             HReg res = newVRegD(env);
2878             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2879             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2880             UInt size;
2881             switch (e->Iex.Binop.op) {
2882                case Iop_CmpEQ8x8: size = 0; break;
2883                case Iop_CmpEQ16x4: size = 1; break;
2884                case Iop_CmpEQ32x2: size = 2; break;
2885                default: vassert(0);
2886             }
2887             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
2888                                            res, argL, argR, size, False));
2889             return res;
2890          }
2891          case Iop_Mul8x8:
2892          case Iop_Mul16x4:
2893          case Iop_Mul32x2: {
2894             HReg res = newVRegD(env);
2895             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2896             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2897             UInt size = 0;
2898             switch(e->Iex.Binop.op) {
2899                case Iop_Mul8x8: size = 0; break;
2900                case Iop_Mul16x4: size = 1; break;
2901                case Iop_Mul32x2: size = 2; break;
2902                default: vassert(0);
2903             }
2904             addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
2905                                            res, argL, argR, size, False));
2906             return res;
2907          }
2908          case Iop_Mul32Fx2: {
2909             HReg res = newVRegD(env);
2910             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2911             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2912             UInt size = 0;
2913             addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
2914                                            res, argL, argR, size, False));
2915             return res;
2916          }
2917          case Iop_QDMulHi16Sx4:
2918          case Iop_QDMulHi32Sx2: {
2919             HReg res = newVRegD(env);
2920             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2921             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2922             UInt size = 0;
2923             switch(e->Iex.Binop.op) {
2924                case Iop_QDMulHi16Sx4: size = 1; break;
2925                case Iop_QDMulHi32Sx2: size = 2; break;
2926                default: vassert(0);
2927             }
2928             addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
2929                                            res, argL, argR, size, False));
2930             return res;
2931          }
2932 
2933          case Iop_QRDMulHi16Sx4:
2934          case Iop_QRDMulHi32Sx2: {
2935             HReg res = newVRegD(env);
2936             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2937             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2938             UInt size = 0;
2939             switch(e->Iex.Binop.op) {
2940                case Iop_QRDMulHi16Sx4: size = 1; break;
2941                case Iop_QRDMulHi32Sx2: size = 2; break;
2942                default: vassert(0);
2943             }
2944             addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
2945                                            res, argL, argR, size, False));
2946             return res;
2947          }
2948 
2949          case Iop_PwAdd8x8:
2950          case Iop_PwAdd16x4:
2951          case Iop_PwAdd32x2: {
2952             HReg res = newVRegD(env);
2953             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2954             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2955             UInt size = 0;
2956             switch(e->Iex.Binop.op) {
2957                case Iop_PwAdd8x8: size = 0; break;
2958                case Iop_PwAdd16x4: size = 1; break;
2959                case Iop_PwAdd32x2: size = 2; break;
2960                default: vassert(0);
2961             }
2962             addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
2963                                            res, argL, argR, size, False));
2964             return res;
2965          }
2966          case Iop_PwAdd32Fx2: {
2967             HReg res = newVRegD(env);
2968             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2969             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2970             UInt size = 0;
2971             addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
2972                                            res, argL, argR, size, False));
2973             return res;
2974          }
2975          case Iop_PwMin8Ux8:
2976          case Iop_PwMin16Ux4:
2977          case Iop_PwMin32Ux2: {
2978             HReg res = newVRegD(env);
2979             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2980             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2981             UInt size = 0;
2982             switch(e->Iex.Binop.op) {
2983                case Iop_PwMin8Ux8: size = 0; break;
2984                case Iop_PwMin16Ux4: size = 1; break;
2985                case Iop_PwMin32Ux2: size = 2; break;
2986                default: vassert(0);
2987             }
2988             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
2989                                            res, argL, argR, size, False));
2990             return res;
2991          }
2992          case Iop_PwMin8Sx8:
2993          case Iop_PwMin16Sx4:
2994          case Iop_PwMin32Sx2: {
2995             HReg res = newVRegD(env);
2996             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2997             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2998             UInt size = 0;
2999             switch(e->Iex.Binop.op) {
3000                case Iop_PwMin8Sx8: size = 0; break;
3001                case Iop_PwMin16Sx4: size = 1; break;
3002                case Iop_PwMin32Sx2: size = 2; break;
3003                default: vassert(0);
3004             }
3005             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
3006                                            res, argL, argR, size, False));
3007             return res;
3008          }
3009          case Iop_PwMax8Ux8:
3010          case Iop_PwMax16Ux4:
3011          case Iop_PwMax32Ux2: {
3012             HReg res = newVRegD(env);
3013             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3014             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3015             UInt size = 0;
3016             switch(e->Iex.Binop.op) {
3017                case Iop_PwMax8Ux8: size = 0; break;
3018                case Iop_PwMax16Ux4: size = 1; break;
3019                case Iop_PwMax32Ux2: size = 2; break;
3020                default: vassert(0);
3021             }
3022             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
3023                                            res, argL, argR, size, False));
3024             return res;
3025          }
3026          case Iop_PwMax8Sx8:
3027          case Iop_PwMax16Sx4:
3028          case Iop_PwMax32Sx2: {
3029             HReg res = newVRegD(env);
3030             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3031             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3032             UInt size = 0;
3033             switch(e->Iex.Binop.op) {
3034                case Iop_PwMax8Sx8: size = 0; break;
3035                case Iop_PwMax16Sx4: size = 1; break;
3036                case Iop_PwMax32Sx2: size = 2; break;
3037                default: vassert(0);
3038             }
3039             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
3040                                            res, argL, argR, size, False));
3041             return res;
3042          }
3043          case Iop_Perm8x8: {
3044             HReg res = newVRegD(env);
3045             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3046             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3047             addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
3048                                            res, argL, argR, 0, False));
3049             return res;
3050          }
3051          case Iop_PolynomialMul8x8: {
3052             HReg res = newVRegD(env);
3053             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3054             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3055             UInt size = 0;
3056             addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
3057                                            res, argL, argR, size, False));
3058             return res;
3059          }
3060          case Iop_Max32Fx2: {
3061             HReg res = newVRegD(env);
3062             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3063             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3064             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
3065                                            res, argL, argR, 2, False));
3066             return res;
3067          }
3068          case Iop_Min32Fx2: {
3069             HReg res = newVRegD(env);
3070             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3071             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3072             addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
3073                                            res, argL, argR, 2, False));
3074             return res;
3075          }
3076          case Iop_PwMax32Fx2: {
3077             HReg res = newVRegD(env);
3078             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3079             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3080             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
3081                                            res, argL, argR, 2, False));
3082             return res;
3083          }
3084          case Iop_PwMin32Fx2: {
3085             HReg res = newVRegD(env);
3086             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3087             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3088             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
3089                                            res, argL, argR, 2, False));
3090             return res;
3091          }
3092          case Iop_CmpGT32Fx2: {
3093             HReg res = newVRegD(env);
3094             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3095             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3096             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
3097                                            res, argL, argR, 2, False));
3098             return res;
3099          }
3100          case Iop_CmpGE32Fx2: {
3101             HReg res = newVRegD(env);
3102             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3103             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3104             addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
3105                                            res, argL, argR, 2, False));
3106             return res;
3107          }
3108          case Iop_CmpEQ32Fx2: {
3109             HReg res = newVRegD(env);
3110             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3111             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3112             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3113                                            res, argL, argR, 2, False));
3114             return res;
3115          }
3116          case Iop_F32ToFixed32Ux2_RZ:
3117          case Iop_F32ToFixed32Sx2_RZ:
3118          case Iop_Fixed32UToF32x2_RN:
3119          case Iop_Fixed32SToF32x2_RN: {
3120             HReg res = newVRegD(env);
3121             HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3122             ARMNeonUnOp op;
3123             UInt imm6;
3124             if (e->Iex.Binop.arg2->tag != Iex_Const ||
3125                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3126                   vpanic("ARM supports FP <-> Fixed conversion with constant "
3127                          "second argument less than 33 only\n");
3128             }
3129             imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3130             vassert(imm6 <= 32 && imm6 > 0);
3131             imm6 = 64 - imm6;
3132             switch(e->Iex.Binop.op) {
3133                case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3134                case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3135                case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3136                case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3137                default: vassert(0);
3138             }
3139             addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3140             return res;
3141          }
3142          /*
3143          FIXME: is this here or not?
3144          case Iop_VDup8x8:
3145          case Iop_VDup16x4:
3146          case Iop_VDup32x2: {
3147             HReg res = newVRegD(env);
3148             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3149             UInt index;
3150             UInt imm4;
3151             UInt size = 0;
3152             if (e->Iex.Binop.arg2->tag != Iex_Const ||
3153                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3154                   vpanic("ARM supports Iop_VDup with constant "
3155                          "second argument less than 16 only\n");
3156             }
3157             index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3158             switch(e->Iex.Binop.op) {
3159                case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3160                case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3161                case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3162                default: vassert(0);
3163             }
3164             if (imm4 >= 16) {
3165                vpanic("ARM supports Iop_VDup with constant "
3166                       "second argument less than 16 only\n");
3167             }
3168             addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3169                                           res, argL, imm4, False));
3170             return res;
3171          }
3172          */
3173          default:
3174             break;
3175       }
3176    }
3177 
3178    /* --------- UNARY ops --------- */
3179    if (e->tag == Iex_Unop) {
3180       switch (e->Iex.Unop.op) {
3181 
3182          /* 32Uto64 */
3183          case Iop_32Uto64: {
3184             HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3185             HReg rHi = newVRegI(env);
3186             HReg res = newVRegD(env);
3187             addInstr(env, ARMInstr_Imm32(rHi, 0));
3188             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3189             return res;
3190          }
3191 
3192          /* 32Sto64 */
3193          case Iop_32Sto64: {
3194             HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3195             HReg rHi = newVRegI(env);
3196             addInstr(env, mk_iMOVds_RR(rHi, rLo));
3197             addInstr(env, ARMInstr_Shift(ARMsh_SAR, rHi, rHi, ARMRI5_I5(31)));
3198             HReg res = newVRegD(env);
3199             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3200             return res;
3201          }
3202 
3203          /* The next 3 are pass-throughs */
3204          /* ReinterpF64asI64 */
3205          case Iop_ReinterpF64asI64:
3206          /* Left64(e) */
3207          case Iop_Left64:
3208          /* CmpwNEZ64(e) */
3209          case Iop_1Sto64: {
3210             HReg rLo, rHi;
3211             HReg res = newVRegD(env);
3212             iselInt64Expr(&rHi, &rLo, env, e);
3213             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3214             return res;
3215          }
3216 
3217          case Iop_Not64: {
3218             DECLARE_PATTERN(p_veqz_8x8);
3219             DECLARE_PATTERN(p_veqz_16x4);
3220             DECLARE_PATTERN(p_veqz_32x2);
3221             DECLARE_PATTERN(p_vcge_8sx8);
3222             DECLARE_PATTERN(p_vcge_16sx4);
3223             DECLARE_PATTERN(p_vcge_32sx2);
3224             DECLARE_PATTERN(p_vcge_8ux8);
3225             DECLARE_PATTERN(p_vcge_16ux4);
3226             DECLARE_PATTERN(p_vcge_32ux2);
3227             DEFINE_PATTERN(p_veqz_8x8,
3228                   unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3229             DEFINE_PATTERN(p_veqz_16x4,
3230                   unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3231             DEFINE_PATTERN(p_veqz_32x2,
3232                   unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3233             DEFINE_PATTERN(p_vcge_8sx8,
3234                   unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3235             DEFINE_PATTERN(p_vcge_16sx4,
3236                   unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3237             DEFINE_PATTERN(p_vcge_32sx2,
3238                   unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3239             DEFINE_PATTERN(p_vcge_8ux8,
3240                   unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3241             DEFINE_PATTERN(p_vcge_16ux4,
3242                   unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3243             DEFINE_PATTERN(p_vcge_32ux2,
3244                   unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3245             if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3246                HReg res = newVRegD(env);
3247                HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3248                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3249                return res;
3250             } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3251                HReg res = newVRegD(env);
3252                HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3253                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3254                return res;
3255             } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3256                HReg res = newVRegD(env);
3257                HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3258                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3259                return res;
3260             } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3261                HReg res = newVRegD(env);
3262                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3263                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3264                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3265                                               res, argL, argR, 0, False));
3266                return res;
3267             } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3268                HReg res = newVRegD(env);
3269                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3270                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3271                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3272                                               res, argL, argR, 1, False));
3273                return res;
3274             } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3275                HReg res = newVRegD(env);
3276                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3277                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3278                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3279                                               res, argL, argR, 2, False));
3280                return res;
3281             } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3282                HReg res = newVRegD(env);
3283                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3284                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3285                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3286                                               res, argL, argR, 0, False));
3287                return res;
3288             } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3289                HReg res = newVRegD(env);
3290                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3291                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3292                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3293                                               res, argL, argR, 1, False));
3294                return res;
3295             } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3296                HReg res = newVRegD(env);
3297                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3298                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3299                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3300                                               res, argL, argR, 2, False));
3301                return res;
3302             } else {
3303                HReg res = newVRegD(env);
3304                HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3305                addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3306                return res;
3307             }
3308          }
3309          case Iop_Dup8x8:
3310          case Iop_Dup16x4:
3311          case Iop_Dup32x2: {
3312             HReg res, arg;
3313             UInt size;
3314             DECLARE_PATTERN(p_vdup_8x8);
3315             DECLARE_PATTERN(p_vdup_16x4);
3316             DECLARE_PATTERN(p_vdup_32x2);
3317             DEFINE_PATTERN(p_vdup_8x8,
3318                   unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3319             DEFINE_PATTERN(p_vdup_16x4,
3320                   unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3321             DEFINE_PATTERN(p_vdup_32x2,
3322                   unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3323             if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3324                UInt index;
3325                UInt imm4;
3326                if (mi.bindee[1]->tag == Iex_Const &&
3327                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3328                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3329                   imm4 = (index << 1) + 1;
3330                   if (index < 8) {
3331                      res = newVRegD(env);
3332                      arg = iselNeon64Expr(env, mi.bindee[0]);
3333                      addInstr(env, ARMInstr_NUnaryS(
3334                                       ARMneon_VDUP,
3335                                       mkARMNRS(ARMNRS_Reg, res, 0),
3336                                       mkARMNRS(ARMNRS_Scalar, arg, index),
3337                                       imm4, False
3338                              ));
3339                      return res;
3340                   }
3341                }
3342             } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3343                UInt index;
3344                UInt imm4;
3345                if (mi.bindee[1]->tag == Iex_Const &&
3346                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3347                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3348                   imm4 = (index << 2) + 2;
3349                   if (index < 4) {
3350                      res = newVRegD(env);
3351                      arg = iselNeon64Expr(env, mi.bindee[0]);
3352                      addInstr(env, ARMInstr_NUnaryS(
3353                                       ARMneon_VDUP,
3354                                       mkARMNRS(ARMNRS_Reg, res, 0),
3355                                       mkARMNRS(ARMNRS_Scalar, arg, index),
3356                                       imm4, False
3357                              ));
3358                      return res;
3359                   }
3360                }
3361             } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3362                UInt index;
3363                UInt imm4;
3364                if (mi.bindee[1]->tag == Iex_Const &&
3365                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3366                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3367                   imm4 = (index << 3) + 4;
3368                   if (index < 2) {
3369                      res = newVRegD(env);
3370                      arg = iselNeon64Expr(env, mi.bindee[0]);
3371                      addInstr(env, ARMInstr_NUnaryS(
3372                                       ARMneon_VDUP,
3373                                       mkARMNRS(ARMNRS_Reg, res, 0),
3374                                       mkARMNRS(ARMNRS_Scalar, arg, index),
3375                                       imm4, False
3376                              ));
3377                      return res;
3378                   }
3379                }
3380             }
3381             arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3382             res = newVRegD(env);
3383             switch (e->Iex.Unop.op) {
3384                case Iop_Dup8x8: size = 0; break;
3385                case Iop_Dup16x4: size = 1; break;
3386                case Iop_Dup32x2: size = 2; break;
3387                default: vassert(0);
3388             }
3389             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3390             return res;
3391          }
3392          case Iop_Abs8x8:
3393          case Iop_Abs16x4:
3394          case Iop_Abs32x2: {
3395             HReg res = newVRegD(env);
3396             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3397             UInt size = 0;
3398             switch(e->Iex.Binop.op) {
3399                case Iop_Abs8x8: size = 0; break;
3400                case Iop_Abs16x4: size = 1; break;
3401                case Iop_Abs32x2: size = 2; break;
3402                default: vassert(0);
3403             }
3404             addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3405             return res;
3406          }
3407          case Iop_Reverse8sIn64_x1:
3408          case Iop_Reverse16sIn64_x1:
3409          case Iop_Reverse32sIn64_x1: {
3410             HReg res = newVRegD(env);
3411             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3412             UInt size = 0;
3413             switch(e->Iex.Binop.op) {
3414                case Iop_Reverse8sIn64_x1: size = 0; break;
3415                case Iop_Reverse16sIn64_x1: size = 1; break;
3416                case Iop_Reverse32sIn64_x1: size = 2; break;
3417                default: vassert(0);
3418             }
3419             addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3420                                           res, arg, size, False));
3421             return res;
3422          }
3423          case Iop_Reverse8sIn32_x2:
3424          case Iop_Reverse16sIn32_x2: {
3425             HReg res = newVRegD(env);
3426             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3427             UInt size = 0;
3428             switch(e->Iex.Binop.op) {
3429                case Iop_Reverse8sIn32_x2: size = 0; break;
3430                case Iop_Reverse16sIn32_x2: size = 1; break;
3431                default: vassert(0);
3432             }
3433             addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3434                                           res, arg, size, False));
3435             return res;
3436          }
3437          case Iop_Reverse8sIn16_x4: {
3438             HReg res = newVRegD(env);
3439             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3440             UInt size = 0;
3441             addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3442                                           res, arg, size, False));
3443             return res;
3444          }
3445          case Iop_CmpwNEZ64: {
3446             HReg x_lsh = newVRegD(env);
3447             HReg x_rsh = newVRegD(env);
3448             HReg lsh_amt = newVRegD(env);
3449             HReg rsh_amt = newVRegD(env);
3450             HReg zero = newVRegD(env);
3451             HReg tmp = newVRegD(env);
3452             HReg tmp2 = newVRegD(env);
3453             HReg res = newVRegD(env);
3454             HReg x = newVRegD(env);
3455             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3456             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3457             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3458             addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3459             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3460             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3461                                            rsh_amt, zero, lsh_amt, 2, False));
3462             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3463                                           x_lsh, x, lsh_amt, 3, False));
3464             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3465                                           x_rsh, x, rsh_amt, 3, False));
3466             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3467                                            tmp, x_lsh, x_rsh, 0, False));
3468             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3469                                            res, tmp, x, 0, False));
3470             return res;
3471          }
3472          case Iop_CmpNEZ8x8:
3473          case Iop_CmpNEZ16x4:
3474          case Iop_CmpNEZ32x2: {
3475             HReg res = newVRegD(env);
3476             HReg tmp = newVRegD(env);
3477             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3478             UInt size;
3479             switch (e->Iex.Unop.op) {
3480                case Iop_CmpNEZ8x8: size = 0; break;
3481                case Iop_CmpNEZ16x4: size = 1; break;
3482                case Iop_CmpNEZ32x2: size = 2; break;
3483                default: vassert(0);
3484             }
3485             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
3486             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
3487             return res;
3488          }
3489          case Iop_NarrowUn16to8x8:
3490          case Iop_NarrowUn32to16x4:
3491          case Iop_NarrowUn64to32x2: {
3492             HReg res = newVRegD(env);
3493             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3494             UInt size = 0;
3495             switch(e->Iex.Binop.op) {
3496                case Iop_NarrowUn16to8x8:  size = 0; break;
3497                case Iop_NarrowUn32to16x4: size = 1; break;
3498                case Iop_NarrowUn64to32x2: size = 2; break;
3499                default: vassert(0);
3500             }
3501             addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
3502                                           res, arg, size, False));
3503             return res;
3504          }
3505          case Iop_QNarrowUn16Sto8Sx8:
3506          case Iop_QNarrowUn32Sto16Sx4:
3507          case Iop_QNarrowUn64Sto32Sx2: {
3508             HReg res = newVRegD(env);
3509             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3510             UInt size = 0;
3511             switch(e->Iex.Binop.op) {
3512                case Iop_QNarrowUn16Sto8Sx8:  size = 0; break;
3513                case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
3514                case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
3515                default: vassert(0);
3516             }
3517             addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
3518                                           res, arg, size, False));
3519             return res;
3520          }
3521          case Iop_QNarrowUn16Sto8Ux8:
3522          case Iop_QNarrowUn32Sto16Ux4:
3523          case Iop_QNarrowUn64Sto32Ux2: {
3524             HReg res = newVRegD(env);
3525             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3526             UInt size = 0;
3527             switch(e->Iex.Binop.op) {
3528                case Iop_QNarrowUn16Sto8Ux8:  size = 0; break;
3529                case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
3530                case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
3531                default: vassert(0);
3532             }
3533             addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
3534                                           res, arg, size, False));
3535             return res;
3536          }
3537          case Iop_QNarrowUn16Uto8Ux8:
3538          case Iop_QNarrowUn32Uto16Ux4:
3539          case Iop_QNarrowUn64Uto32Ux2: {
3540             HReg res = newVRegD(env);
3541             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3542             UInt size = 0;
3543             switch(e->Iex.Binop.op) {
3544                case Iop_QNarrowUn16Uto8Ux8:  size = 0; break;
3545                case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
3546                case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
3547                default: vassert(0);
3548             }
3549             addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
3550                                           res, arg, size, False));
3551             return res;
3552          }
3553          case Iop_PwAddL8Sx8:
3554          case Iop_PwAddL16Sx4:
3555          case Iop_PwAddL32Sx2: {
3556             HReg res = newVRegD(env);
3557             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3558             UInt size = 0;
3559             switch(e->Iex.Binop.op) {
3560                case Iop_PwAddL8Sx8: size = 0; break;
3561                case Iop_PwAddL16Sx4: size = 1; break;
3562                case Iop_PwAddL32Sx2: size = 2; break;
3563                default: vassert(0);
3564             }
3565             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
3566                                           res, arg, size, False));
3567             return res;
3568          }
3569          case Iop_PwAddL8Ux8:
3570          case Iop_PwAddL16Ux4:
3571          case Iop_PwAddL32Ux2: {
3572             HReg res = newVRegD(env);
3573             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3574             UInt size = 0;
3575             switch(e->Iex.Binop.op) {
3576                case Iop_PwAddL8Ux8: size = 0; break;
3577                case Iop_PwAddL16Ux4: size = 1; break;
3578                case Iop_PwAddL32Ux2: size = 2; break;
3579                default: vassert(0);
3580             }
3581             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
3582                                           res, arg, size, False));
3583             return res;
3584          }
3585          case Iop_Cnt8x8: {
3586             HReg res = newVRegD(env);
3587             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3588             UInt size = 0;
3589             addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
3590                                           res, arg, size, False));
3591             return res;
3592          }
3593          case Iop_Clz8x8:
3594          case Iop_Clz16x4:
3595          case Iop_Clz32x2: {
3596             HReg res = newVRegD(env);
3597             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3598             UInt size = 0;
3599             switch(e->Iex.Binop.op) {
3600                case Iop_Clz8x8: size = 0; break;
3601                case Iop_Clz16x4: size = 1; break;
3602                case Iop_Clz32x2: size = 2; break;
3603                default: vassert(0);
3604             }
3605             addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
3606                                           res, arg, size, False));
3607             return res;
3608          }
3609          case Iop_Cls8x8:
3610          case Iop_Cls16x4:
3611          case Iop_Cls32x2: {
3612             HReg res = newVRegD(env);
3613             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3614             UInt size = 0;
3615             switch(e->Iex.Binop.op) {
3616                case Iop_Cls8x8: size = 0; break;
3617                case Iop_Cls16x4: size = 1; break;
3618                case Iop_Cls32x2: size = 2; break;
3619                default: vassert(0);
3620             }
3621             addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
3622                                           res, arg, size, False));
3623             return res;
3624          }
3625          case Iop_FtoI32Sx2_RZ: {
3626             HReg res = newVRegD(env);
3627             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3628             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
3629                                           res, arg, 2, False));
3630             return res;
3631          }
3632          case Iop_FtoI32Ux2_RZ: {
3633             HReg res = newVRegD(env);
3634             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3635             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
3636                                           res, arg, 2, False));
3637             return res;
3638          }
3639          case Iop_I32StoFx2: {
3640             HReg res = newVRegD(env);
3641             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3642             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
3643                                           res, arg, 2, False));
3644             return res;
3645          }
3646          case Iop_I32UtoFx2: {
3647             HReg res = newVRegD(env);
3648             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3649             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
3650                                           res, arg, 2, False));
3651             return res;
3652          }
3653          case Iop_F32toF16x4: {
3654             HReg res = newVRegD(env);
3655             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3656             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
3657                                           res, arg, 2, False));
3658             return res;
3659          }
3660          case Iop_RecipEst32Fx2: {
3661             HReg res = newVRegD(env);
3662             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3663             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
3664                                           res, argL, 0, False));
3665             return res;
3666          }
3667          case Iop_RecipEst32Ux2: {
3668             HReg res = newVRegD(env);
3669             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3670             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
3671                                           res, argL, 0, False));
3672             return res;
3673          }
3674          case Iop_Abs32Fx2: {
3675             DECLARE_PATTERN(p_vabd_32fx2);
3676             DEFINE_PATTERN(p_vabd_32fx2,
3677                            unop(Iop_Abs32Fx2,
3678                                 binop(Iop_Sub32Fx2,
3679                                       bind(0),
3680                                       bind(1))));
3681             if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
3682                HReg res = newVRegD(env);
3683                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3684                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3685                addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
3686                                               res, argL, argR, 0, False));
3687                return res;
3688             } else {
3689                HReg res = newVRegD(env);
3690                HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3691                addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
3692                                              res, arg, 0, False));
3693                return res;
3694             }
3695          }
3696          case Iop_RSqrtEst32Fx2: {
3697             HReg res = newVRegD(env);
3698             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3699             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
3700                                           res, arg, 0, False));
3701             return res;
3702          }
3703          case Iop_RSqrtEst32Ux2: {
3704             HReg res = newVRegD(env);
3705             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3706             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
3707                                           res, arg, 0, False));
3708             return res;
3709          }
3710          case Iop_Neg32Fx2: {
3711             HReg res = newVRegD(env);
3712             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3713             addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
3714                                           res, arg, 0, False));
3715             return res;
3716          }
3717          default:
3718             break;
3719       }
3720    } /* if (e->tag == Iex_Unop) */
3721 
3722    if (e->tag == Iex_Triop) {
3723       IRTriop *triop = e->Iex.Triop.details;
3724 
3725       switch (triop->op) {
3726          case Iop_Slice64: {
3727             HReg res = newVRegD(env);
3728             HReg argL = iselNeon64Expr(env, triop->arg2);
3729             HReg argR = iselNeon64Expr(env, triop->arg1);
3730             UInt imm4;
3731             if (triop->arg3->tag != Iex_Const ||
3732                 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
3733                vpanic("ARM target supports Iop_Extract64 with constant "
3734                       "third argument less than 16 only\n");
3735             }
3736             imm4 = triop->arg3->Iex.Const.con->Ico.U8;
3737             if (imm4 >= 8) {
3738                vpanic("ARM target supports Iop_Extract64 with constant "
3739                       "third argument less than 16 only\n");
3740             }
3741             addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
3742                                            res, argL, argR, imm4, False));
3743             return res;
3744          }
3745          case Iop_SetElem8x8:
3746          case Iop_SetElem16x4:
3747          case Iop_SetElem32x2: {
3748             HReg res = newVRegD(env);
3749             HReg dreg = iselNeon64Expr(env, triop->arg1);
3750             HReg arg = iselIntExpr_R(env, triop->arg3);
3751             UInt index, size;
3752             if (triop->arg2->tag != Iex_Const ||
3753                 typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) {
3754                vpanic("ARM target supports SetElem with constant "
3755                       "second argument only\n");
3756             }
3757             index = triop->arg2->Iex.Const.con->Ico.U8;
3758             switch (triop->op) {
3759                case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
3760                case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
3761                case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
3762                default: vassert(0);
3763             }
3764             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
3765             addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
3766                                            mkARMNRS(ARMNRS_Scalar, res, index),
3767                                            mkARMNRS(ARMNRS_Reg, arg, 0),
3768                                            size, False));
3769             return res;
3770          }
3771          default:
3772             break;
3773       }
3774    }
3775 
3776    /* --------- MULTIPLEX --------- */
3777    if (e->tag == Iex_ITE) { // VFD
3778       HReg rLo, rHi;
3779       HReg res = newVRegD(env);
3780       iselInt64Expr(&rHi, &rLo, env, e);
3781       addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3782       return res;
3783    }
3784 
3785    ppIRExpr(e);
3786    vpanic("iselNeon64Expr");
3787 }
3788 
3789 
iselNeonExpr(ISelEnv * env,IRExpr * e)3790 static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e )
3791 {
3792    HReg r;
3793    vassert(env->hwcaps & VEX_HWCAPS_ARM_NEON);
3794    r = iselNeonExpr_wrk( env, e );
3795    vassert(hregClass(r) == HRcVec128);
3796    vassert(hregIsVirtual(r));
3797    return r;
3798 }
3799 
3800 /* DO NOT CALL THIS DIRECTLY */
iselNeonExpr_wrk(ISelEnv * env,IRExpr * e)3801 static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e )
3802 {
3803    IRType ty = typeOfIRExpr(env->type_env, e);
3804    MatchInfo mi;
3805    vassert(e);
3806    vassert(ty == Ity_V128);
3807 
3808    if (e->tag == Iex_RdTmp) {
3809       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3810    }
3811 
3812    if (e->tag == Iex_Const) {
3813       /* At the moment there should be no 128-bit constants in IR for ARM
3814          generated during disassemble. They are represented as Iop_64HLtoV128
3815          binary operation and are handled among binary ops. */
3816       /* But zero can be created by valgrind internal optimizer */
3817       if (e->Iex.Const.con->Ico.V128 == 0x0000) {
3818          HReg res = newVRegV(env);
3819          addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(6, 0)));
3820          return res;
3821       }
3822       if (e->Iex.Const.con->Ico.V128 == 0xFFFF) {
3823          HReg res = newVRegV(env);
3824          addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(6, 255)));
3825          return res;
3826       }
3827       ppIRExpr(e);
3828       vpanic("128-bit constant is not implemented");
3829    }
3830 
3831    if (e->tag == Iex_Load) {
3832       HReg res = newVRegV(env);
3833       ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
3834       vassert(ty == Ity_V128);
3835       addInstr(env, ARMInstr_NLdStQ(True, res, am));
3836       return res;
3837    }
3838 
3839    if (e->tag == Iex_Get) {
3840       HReg addr = newVRegI(env);
3841       HReg res = newVRegV(env);
3842       vassert(ty == Ity_V128);
3843       addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
3844       addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
3845       return res;
3846    }
3847 
3848    if (e->tag == Iex_Unop) {
3849       switch (e->Iex.Unop.op) {
3850          case Iop_NotV128: {
3851             DECLARE_PATTERN(p_veqz_8x16);
3852             DECLARE_PATTERN(p_veqz_16x8);
3853             DECLARE_PATTERN(p_veqz_32x4);
3854             DECLARE_PATTERN(p_vcge_8sx16);
3855             DECLARE_PATTERN(p_vcge_16sx8);
3856             DECLARE_PATTERN(p_vcge_32sx4);
3857             DECLARE_PATTERN(p_vcge_8ux16);
3858             DECLARE_PATTERN(p_vcge_16ux8);
3859             DECLARE_PATTERN(p_vcge_32ux4);
3860             DEFINE_PATTERN(p_veqz_8x16,
3861                   unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
3862             DEFINE_PATTERN(p_veqz_16x8,
3863                   unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
3864             DEFINE_PATTERN(p_veqz_32x4,
3865                   unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
3866             DEFINE_PATTERN(p_vcge_8sx16,
3867                   unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
3868             DEFINE_PATTERN(p_vcge_16sx8,
3869                   unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
3870             DEFINE_PATTERN(p_vcge_32sx4,
3871                   unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
3872             DEFINE_PATTERN(p_vcge_8ux16,
3873                   unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
3874             DEFINE_PATTERN(p_vcge_16ux8,
3875                   unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
3876             DEFINE_PATTERN(p_vcge_32ux4,
3877                   unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
3878             if (matchIRExpr(&mi, p_veqz_8x16, e)) {
3879                HReg res = newVRegV(env);
3880                HReg arg = iselNeonExpr(env, mi.bindee[0]);
3881                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
3882                return res;
3883             } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
3884                HReg res = newVRegV(env);
3885                HReg arg = iselNeonExpr(env, mi.bindee[0]);
3886                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
3887                return res;
3888             } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
3889                HReg res = newVRegV(env);
3890                HReg arg = iselNeonExpr(env, mi.bindee[0]);
3891                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
3892                return res;
3893             } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
3894                HReg res = newVRegV(env);
3895                HReg argL = iselNeonExpr(env, mi.bindee[0]);
3896                HReg argR = iselNeonExpr(env, mi.bindee[1]);
3897                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3898                                               res, argL, argR, 0, True));
3899                return res;
3900             } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
3901                HReg res = newVRegV(env);
3902                HReg argL = iselNeonExpr(env, mi.bindee[0]);
3903                HReg argR = iselNeonExpr(env, mi.bindee[1]);
3904                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3905                                               res, argL, argR, 1, True));
3906                return res;
3907             } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
3908                HReg res = newVRegV(env);
3909                HReg argL = iselNeonExpr(env, mi.bindee[0]);
3910                HReg argR = iselNeonExpr(env, mi.bindee[1]);
3911                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3912                                               res, argL, argR, 2, True));
3913                return res;
3914             } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
3915                HReg res = newVRegV(env);
3916                HReg argL = iselNeonExpr(env, mi.bindee[0]);
3917                HReg argR = iselNeonExpr(env, mi.bindee[1]);
3918                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3919                                               res, argL, argR, 0, True));
3920                return res;
3921             } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
3922                HReg res = newVRegV(env);
3923                HReg argL = iselNeonExpr(env, mi.bindee[0]);
3924                HReg argR = iselNeonExpr(env, mi.bindee[1]);
3925                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3926                                               res, argL, argR, 1, True));
3927                return res;
3928             } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
3929                HReg res = newVRegV(env);
3930                HReg argL = iselNeonExpr(env, mi.bindee[0]);
3931                HReg argR = iselNeonExpr(env, mi.bindee[1]);
3932                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3933                                               res, argL, argR, 2, True));
3934                return res;
3935             } else {
3936                HReg res = newVRegV(env);
3937                HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3938                addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
3939                return res;
3940             }
3941          }
3942          case Iop_Dup8x16:
3943          case Iop_Dup16x8:
3944          case Iop_Dup32x4: {
3945             HReg res, arg;
3946             UInt size;
3947             DECLARE_PATTERN(p_vdup_8x16);
3948             DECLARE_PATTERN(p_vdup_16x8);
3949             DECLARE_PATTERN(p_vdup_32x4);
3950             DEFINE_PATTERN(p_vdup_8x16,
3951                   unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
3952             DEFINE_PATTERN(p_vdup_16x8,
3953                   unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
3954             DEFINE_PATTERN(p_vdup_32x4,
3955                   unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
3956             if (matchIRExpr(&mi, p_vdup_8x16, e)) {
3957                UInt index;
3958                UInt imm4;
3959                if (mi.bindee[1]->tag == Iex_Const &&
3960                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3961                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3962                   imm4 = (index << 1) + 1;
3963                   if (index < 8) {
3964                      res = newVRegV(env);
3965                      arg = iselNeon64Expr(env, mi.bindee[0]);
3966                      addInstr(env, ARMInstr_NUnaryS(
3967                                       ARMneon_VDUP,
3968                                       mkARMNRS(ARMNRS_Reg, res, 0),
3969                                       mkARMNRS(ARMNRS_Scalar, arg, index),
3970                                       imm4, True
3971                              ));
3972                      return res;
3973                   }
3974                }
3975             } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
3976                UInt index;
3977                UInt imm4;
3978                if (mi.bindee[1]->tag == Iex_Const &&
3979                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3980                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3981                   imm4 = (index << 2) + 2;
3982                   if (index < 4) {
3983                      res = newVRegV(env);
3984                      arg = iselNeon64Expr(env, mi.bindee[0]);
3985                      addInstr(env, ARMInstr_NUnaryS(
3986                                       ARMneon_VDUP,
3987                                       mkARMNRS(ARMNRS_Reg, res, 0),
3988                                       mkARMNRS(ARMNRS_Scalar, arg, index),
3989                                       imm4, True
3990                              ));
3991                      return res;
3992                   }
3993                }
3994             } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
3995                UInt index;
3996                UInt imm4;
3997                if (mi.bindee[1]->tag == Iex_Const &&
3998                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3999                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4000                   imm4 = (index << 3) + 4;
4001                   if (index < 2) {
4002                      res = newVRegV(env);
4003                      arg = iselNeon64Expr(env, mi.bindee[0]);
4004                      addInstr(env, ARMInstr_NUnaryS(
4005                                       ARMneon_VDUP,
4006                                       mkARMNRS(ARMNRS_Reg, res, 0),
4007                                       mkARMNRS(ARMNRS_Scalar, arg, index),
4008                                       imm4, True
4009                              ));
4010                      return res;
4011                   }
4012                }
4013             }
4014             arg = iselIntExpr_R(env, e->Iex.Unop.arg);
4015             res = newVRegV(env);
4016             switch (e->Iex.Unop.op) {
4017                case Iop_Dup8x16: size = 0; break;
4018                case Iop_Dup16x8: size = 1; break;
4019                case Iop_Dup32x4: size = 2; break;
4020                default: vassert(0);
4021             }
4022             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
4023             return res;
4024          }
4025          case Iop_Abs8x16:
4026          case Iop_Abs16x8:
4027          case Iop_Abs32x4: {
4028             HReg res = newVRegV(env);
4029             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4030             UInt size = 0;
4031             switch(e->Iex.Binop.op) {
4032                case Iop_Abs8x16: size = 0; break;
4033                case Iop_Abs16x8: size = 1; break;
4034                case Iop_Abs32x4: size = 2; break;
4035                default: vassert(0);
4036             }
4037             addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
4038             return res;
4039          }
4040          case Iop_Reverse8sIn64_x2:
4041          case Iop_Reverse16sIn64_x2:
4042          case Iop_Reverse32sIn64_x2: {
4043             HReg res = newVRegV(env);
4044             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4045             UInt size = 0;
4046             switch(e->Iex.Binop.op) {
4047                case Iop_Reverse8sIn64_x2: size = 0; break;
4048                case Iop_Reverse16sIn64_x2: size = 1; break;
4049                case Iop_Reverse32sIn64_x2: size = 2; break;
4050                default: vassert(0);
4051             }
4052             addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
4053                                           res, arg, size, True));
4054             return res;
4055          }
4056          case Iop_Reverse8sIn32_x4:
4057          case Iop_Reverse16sIn32_x4: {
4058             HReg res = newVRegV(env);
4059             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4060             UInt size = 0;
4061             switch(e->Iex.Binop.op) {
4062                case Iop_Reverse8sIn32_x4: size = 0; break;
4063                case Iop_Reverse16sIn32_x4: size = 1; break;
4064                default: vassert(0);
4065             }
4066             addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
4067                                           res, arg, size, True));
4068             return res;
4069          }
4070          case Iop_Reverse8sIn16_x8: {
4071             HReg res = newVRegV(env);
4072             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4073             UInt size = 0;
4074             addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
4075                                           res, arg, size, True));
4076             return res;
4077          }
4078          case Iop_CmpNEZ64x2: {
4079             HReg x_lsh = newVRegV(env);
4080             HReg x_rsh = newVRegV(env);
4081             HReg lsh_amt = newVRegV(env);
4082             HReg rsh_amt = newVRegV(env);
4083             HReg zero = newVRegV(env);
4084             HReg tmp = newVRegV(env);
4085             HReg tmp2 = newVRegV(env);
4086             HReg res = newVRegV(env);
4087             HReg x = newVRegV(env);
4088             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4089             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
4090             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
4091             addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
4092             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
4093             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4094                                            rsh_amt, zero, lsh_amt, 2, True));
4095             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4096                                           x_lsh, x, lsh_amt, 3, True));
4097             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4098                                           x_rsh, x, rsh_amt, 3, True));
4099             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4100                                            tmp, x_lsh, x_rsh, 0, True));
4101             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4102                                            res, tmp, x, 0, True));
4103             return res;
4104          }
4105          case Iop_CmpNEZ8x16:
4106          case Iop_CmpNEZ16x8:
4107          case Iop_CmpNEZ32x4: {
4108             HReg res = newVRegV(env);
4109             HReg tmp = newVRegV(env);
4110             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4111             UInt size;
4112             switch (e->Iex.Unop.op) {
4113                case Iop_CmpNEZ8x16: size = 0; break;
4114                case Iop_CmpNEZ16x8: size = 1; break;
4115                case Iop_CmpNEZ32x4: size = 2; break;
4116                default: vassert(0);
4117             }
4118             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
4119             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
4120             return res;
4121          }
4122          case Iop_Widen8Uto16x8:
4123          case Iop_Widen16Uto32x4:
4124          case Iop_Widen32Uto64x2: {
4125             HReg res = newVRegV(env);
4126             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4127             UInt size;
4128             switch (e->Iex.Unop.op) {
4129                case Iop_Widen8Uto16x8:  size = 0; break;
4130                case Iop_Widen16Uto32x4: size = 1; break;
4131                case Iop_Widen32Uto64x2: size = 2; break;
4132                default: vassert(0);
4133             }
4134             addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
4135                                           res, arg, size, True));
4136             return res;
4137          }
4138          case Iop_Widen8Sto16x8:
4139          case Iop_Widen16Sto32x4:
4140          case Iop_Widen32Sto64x2: {
4141             HReg res = newVRegV(env);
4142             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4143             UInt size;
4144             switch (e->Iex.Unop.op) {
4145                case Iop_Widen8Sto16x8:  size = 0; break;
4146                case Iop_Widen16Sto32x4: size = 1; break;
4147                case Iop_Widen32Sto64x2: size = 2; break;
4148                default: vassert(0);
4149             }
4150             addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4151                                           res, arg, size, True));
4152             return res;
4153          }
4154          case Iop_PwAddL8Sx16:
4155          case Iop_PwAddL16Sx8:
4156          case Iop_PwAddL32Sx4: {
4157             HReg res = newVRegV(env);
4158             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4159             UInt size = 0;
4160             switch(e->Iex.Binop.op) {
4161                case Iop_PwAddL8Sx16: size = 0; break;
4162                case Iop_PwAddL16Sx8: size = 1; break;
4163                case Iop_PwAddL32Sx4: size = 2; break;
4164                default: vassert(0);
4165             }
4166             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4167                                           res, arg, size, True));
4168             return res;
4169          }
4170          case Iop_PwAddL8Ux16:
4171          case Iop_PwAddL16Ux8:
4172          case Iop_PwAddL32Ux4: {
4173             HReg res = newVRegV(env);
4174             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4175             UInt size = 0;
4176             switch(e->Iex.Binop.op) {
4177                case Iop_PwAddL8Ux16: size = 0; break;
4178                case Iop_PwAddL16Ux8: size = 1; break;
4179                case Iop_PwAddL32Ux4: size = 2; break;
4180                default: vassert(0);
4181             }
4182             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4183                                           res, arg, size, True));
4184             return res;
4185          }
4186          case Iop_Cnt8x16: {
4187             HReg res = newVRegV(env);
4188             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4189             UInt size = 0;
4190             addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4191             return res;
4192          }
4193          case Iop_Clz8x16:
4194          case Iop_Clz16x8:
4195          case Iop_Clz32x4: {
4196             HReg res = newVRegV(env);
4197             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4198             UInt size = 0;
4199             switch(e->Iex.Binop.op) {
4200                case Iop_Clz8x16: size = 0; break;
4201                case Iop_Clz16x8: size = 1; break;
4202                case Iop_Clz32x4: size = 2; break;
4203                default: vassert(0);
4204             }
4205             addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4206             return res;
4207          }
4208          case Iop_Cls8x16:
4209          case Iop_Cls16x8:
4210          case Iop_Cls32x4: {
4211             HReg res = newVRegV(env);
4212             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4213             UInt size = 0;
4214             switch(e->Iex.Binop.op) {
4215                case Iop_Cls8x16: size = 0; break;
4216                case Iop_Cls16x8: size = 1; break;
4217                case Iop_Cls32x4: size = 2; break;
4218                default: vassert(0);
4219             }
4220             addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4221             return res;
4222          }
4223          case Iop_FtoI32Sx4_RZ: {
4224             HReg res = newVRegV(env);
4225             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4226             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4227                                           res, arg, 2, True));
4228             return res;
4229          }
4230          case Iop_FtoI32Ux4_RZ: {
4231             HReg res = newVRegV(env);
4232             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4233             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4234                                           res, arg, 2, True));
4235             return res;
4236          }
4237          case Iop_I32StoFx4: {
4238             HReg res = newVRegV(env);
4239             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4240             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4241                                           res, arg, 2, True));
4242             return res;
4243          }
4244          case Iop_I32UtoFx4: {
4245             HReg res = newVRegV(env);
4246             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4247             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4248                                           res, arg, 2, True));
4249             return res;
4250          }
4251          case Iop_F16toF32x4: {
4252             HReg res = newVRegV(env);
4253             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4254             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4255                                           res, arg, 2, True));
4256             return res;
4257          }
4258          case Iop_RecipEst32Fx4: {
4259             HReg res = newVRegV(env);
4260             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4261             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4262                                           res, argL, 0, True));
4263             return res;
4264          }
4265          case Iop_RecipEst32Ux4: {
4266             HReg res = newVRegV(env);
4267             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4268             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4269                                           res, argL, 0, True));
4270             return res;
4271          }
4272          case Iop_Abs32Fx4: {
4273             HReg res = newVRegV(env);
4274             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4275             addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4276                                           res, argL, 0, True));
4277             return res;
4278          }
4279          case Iop_RSqrtEst32Fx4: {
4280             HReg res = newVRegV(env);
4281             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4282             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4283                                           res, argL, 0, True));
4284             return res;
4285          }
4286          case Iop_RSqrtEst32Ux4: {
4287             HReg res = newVRegV(env);
4288             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4289             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4290                                           res, argL, 0, True));
4291             return res;
4292          }
4293          case Iop_Neg32Fx4: {
4294             HReg res = newVRegV(env);
4295             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4296             addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4297                                           res, arg, 0, True));
4298             return res;
4299          }
4300          /* ... */
4301          default:
4302             break;
4303       }
4304    }
4305 
4306    if (e->tag == Iex_Binop) {
4307       switch (e->Iex.Binop.op) {
4308          case Iop_64HLtoV128:
4309             /* Try to match into single "VMOV reg, imm" instruction */
4310             if (e->Iex.Binop.arg1->tag == Iex_Const &&
4311                 e->Iex.Binop.arg2->tag == Iex_Const &&
4312                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
4313                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
4314                 e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
4315                            e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
4316                ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
4317                ARMNImm *imm = Imm64_to_ARMNImm(imm64);
4318                if (imm) {
4319                   HReg res = newVRegV(env);
4320                   addInstr(env, ARMInstr_NeonImm(res, imm));
4321                   return res;
4322                }
4323                if ((imm64 >> 32) == 0LL &&
4324                    (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
4325                   HReg tmp1 = newVRegV(env);
4326                   HReg tmp2 = newVRegV(env);
4327                   HReg res = newVRegV(env);
4328                   if (imm->type < 10) {
4329                      addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
4330                      addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4331                      addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4332                                                     res, tmp1, tmp2, 4, True));
4333                      return res;
4334                   }
4335                }
4336                if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
4337                    (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
4338                   HReg tmp1 = newVRegV(env);
4339                   HReg tmp2 = newVRegV(env);
4340                   HReg res = newVRegV(env);
4341                   if (imm->type < 10) {
4342                      addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
4343                      addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4344                      addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4345                                                     res, tmp1, tmp2, 4, True));
4346                      return res;
4347                   }
4348                }
4349             }
4350             /* Does not match "VMOV Reg, Imm" form.  We'll have to do
4351                it the slow way. */
4352             {
4353                /* local scope */
4354                /* Done via the stack for ease of use. */
4355                /* FIXME: assumes little endian host */
4356                HReg       w3, w2, w1, w0;
4357                HReg       res  = newVRegV(env);
4358                ARMAMode1* sp_0  = ARMAMode1_RI(hregARM_R13(), 0);
4359                ARMAMode1* sp_4  = ARMAMode1_RI(hregARM_R13(), 4);
4360                ARMAMode1* sp_8  = ARMAMode1_RI(hregARM_R13(), 8);
4361                ARMAMode1* sp_12 = ARMAMode1_RI(hregARM_R13(), 12);
4362                ARMRI84*   c_16  = ARMRI84_I84(16,0);
4363                /* Make space for SP */
4364                addInstr(env, ARMInstr_Alu(ARMalu_SUB, hregARM_R13(),
4365                                                       hregARM_R13(), c_16));
4366 
4367                /* Store the less significant 64 bits */
4368                iselInt64Expr(&w1, &w0, env, e->Iex.Binop.arg2);
4369                addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
4370                                              w0, sp_0));
4371                addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
4372                                              w1, sp_4));
4373 
4374                /* Store the more significant 64 bits */
4375                iselInt64Expr(&w3, &w2, env, e->Iex.Binop.arg1);
4376                addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
4377                                              w2, sp_8));
4378                addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
4379                                              w3, sp_12));
4380 
4381                 /* Load result back from stack. */
4382                 addInstr(env, ARMInstr_NLdStQ(True/*load*/, res,
4383                                               mkARMAModeN_R(hregARM_R13())));
4384 
4385                 /* Restore SP */
4386                 addInstr(env, ARMInstr_Alu(ARMalu_ADD, hregARM_R13(),
4387                                            hregARM_R13(), c_16));
4388                 return res;
4389             } /* local scope */
4390             goto neon_expr_bad;
4391          case Iop_AndV128: {
4392             HReg res = newVRegV(env);
4393             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4394             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4395             addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4396                                            res, argL, argR, 4, True));
4397             return res;
4398          }
4399          case Iop_OrV128: {
4400             HReg res = newVRegV(env);
4401             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4402             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4403             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4404                                            res, argL, argR, 4, True));
4405             return res;
4406          }
4407          case Iop_XorV128: {
4408             HReg res = newVRegV(env);
4409             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4410             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4411             addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4412                                            res, argL, argR, 4, True));
4413             return res;
4414          }
4415          case Iop_Add8x16:
4416          case Iop_Add16x8:
4417          case Iop_Add32x4:
4418          case Iop_Add64x2: {
4419             /*
4420             FIXME: remove this if not used
4421             DECLARE_PATTERN(p_vrhadd_32sx4);
4422             ULong one = (1LL << 32) | 1LL;
4423             DEFINE_PATTERN(p_vrhadd_32sx4,
4424                   binop(Iop_Add32x4,
4425                         binop(Iop_Add32x4,
4426                               binop(Iop_SarN32x4,
4427                                     bind(0),
4428                                     mkU8(1)),
4429                               binop(Iop_SarN32x4,
4430                                     bind(1),
4431                                     mkU8(1))),
4432                         binop(Iop_SarN32x4,
4433                               binop(Iop_Add32x4,
4434                                     binop(Iop_Add32x4,
4435                                           binop(Iop_AndV128,
4436                                                 bind(0),
4437                                                 mkU128(one)),
4438                                           binop(Iop_AndV128,
4439                                                 bind(1),
4440                                                 mkU128(one))),
4441                                     mkU128(one)),
4442                               mkU8(1))));
4443             */
4444             HReg res = newVRegV(env);
4445             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4446             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4447             UInt size;
4448             switch (e->Iex.Binop.op) {
4449                case Iop_Add8x16: size = 0; break;
4450                case Iop_Add16x8: size = 1; break;
4451                case Iop_Add32x4: size = 2; break;
4452                case Iop_Add64x2: size = 3; break;
4453                default:
4454                   ppIROp(e->Iex.Binop.op);
4455                   vpanic("Illegal element size in VADD");
4456             }
4457             addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
4458                                            res, argL, argR, size, True));
4459             return res;
4460          }
4461          case Iop_RecipStep32Fx4: {
4462             HReg res = newVRegV(env);
4463             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4464             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4465             UInt size = 0;
4466             addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
4467                                            res, argL, argR, size, True));
4468             return res;
4469          }
4470          case Iop_RSqrtStep32Fx4: {
4471             HReg res = newVRegV(env);
4472             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4473             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4474             UInt size = 0;
4475             addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
4476                                            res, argL, argR, size, True));
4477             return res;
4478          }
4479 
4480          // These 6 verified 18 Apr 2013
4481          case Iop_InterleaveEvenLanes8x16:
4482          case Iop_InterleaveOddLanes8x16:
4483          case Iop_InterleaveEvenLanes16x8:
4484          case Iop_InterleaveOddLanes16x8:
4485          case Iop_InterleaveEvenLanes32x4:
4486          case Iop_InterleaveOddLanes32x4: {
4487             HReg rD   = newVRegV(env);
4488             HReg rM   = newVRegV(env);
4489             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4490             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4491             UInt size;
4492             Bool resRd;  // is the result in rD or rM ?
4493             switch (e->Iex.Binop.op) {
4494                case Iop_InterleaveOddLanes8x16:  resRd = False; size = 0; break;
4495                case Iop_InterleaveEvenLanes8x16: resRd = True;  size = 0; break;
4496                case Iop_InterleaveOddLanes16x8:  resRd = False; size = 1; break;
4497                case Iop_InterleaveEvenLanes16x8: resRd = True;  size = 1; break;
4498                case Iop_InterleaveOddLanes32x4:  resRd = False; size = 2; break;
4499                case Iop_InterleaveEvenLanes32x4: resRd = True;  size = 2; break;
4500                default: vassert(0);
4501             }
4502             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4503             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4504             addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, True));
4505             return resRd ? rD : rM;
4506          }
4507 
4508          // These 6 verified 18 Apr 2013
4509          case Iop_InterleaveHI8x16:
4510          case Iop_InterleaveLO8x16:
4511          case Iop_InterleaveHI16x8:
4512          case Iop_InterleaveLO16x8:
4513          case Iop_InterleaveHI32x4:
4514          case Iop_InterleaveLO32x4: {
4515             HReg rD   = newVRegV(env);
4516             HReg rM   = newVRegV(env);
4517             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4518             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4519             UInt size;
4520             Bool resRd;  // is the result in rD or rM ?
4521             switch (e->Iex.Binop.op) {
4522                case Iop_InterleaveHI8x16: resRd = False; size = 0; break;
4523                case Iop_InterleaveLO8x16: resRd = True;  size = 0; break;
4524                case Iop_InterleaveHI16x8: resRd = False; size = 1; break;
4525                case Iop_InterleaveLO16x8: resRd = True;  size = 1; break;
4526                case Iop_InterleaveHI32x4: resRd = False; size = 2; break;
4527                case Iop_InterleaveLO32x4: resRd = True;  size = 2; break;
4528                default: vassert(0);
4529             }
4530             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4531             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4532             addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, True));
4533             return resRd ? rD : rM;
4534          }
4535 
4536          // These 6 verified 18 Apr 2013
4537          case Iop_CatOddLanes8x16:
4538          case Iop_CatEvenLanes8x16:
4539          case Iop_CatOddLanes16x8:
4540          case Iop_CatEvenLanes16x8:
4541          case Iop_CatOddLanes32x4:
4542          case Iop_CatEvenLanes32x4: {
4543             HReg rD   = newVRegV(env);
4544             HReg rM   = newVRegV(env);
4545             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4546             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4547             UInt size;
4548             Bool resRd;  // is the result in rD or rM ?
4549             switch (e->Iex.Binop.op) {
4550                case Iop_CatOddLanes8x16:  resRd = False; size = 0; break;
4551                case Iop_CatEvenLanes8x16: resRd = True;  size = 0; break;
4552                case Iop_CatOddLanes16x8:  resRd = False; size = 1; break;
4553                case Iop_CatEvenLanes16x8: resRd = True;  size = 1; break;
4554                case Iop_CatOddLanes32x4:  resRd = False; size = 2; break;
4555                case Iop_CatEvenLanes32x4: resRd = True;  size = 2; break;
4556                default: vassert(0);
4557             }
4558             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4559             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4560             addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, True));
4561             return resRd ? rD : rM;
4562          }
4563 
4564          case Iop_QAdd8Ux16:
4565          case Iop_QAdd16Ux8:
4566          case Iop_QAdd32Ux4:
4567          case Iop_QAdd64Ux2: {
4568             HReg res = newVRegV(env);
4569             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4570             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4571             UInt size;
4572             switch (e->Iex.Binop.op) {
4573                case Iop_QAdd8Ux16: size = 0; break;
4574                case Iop_QAdd16Ux8: size = 1; break;
4575                case Iop_QAdd32Ux4: size = 2; break;
4576                case Iop_QAdd64Ux2: size = 3; break;
4577                default:
4578                   ppIROp(e->Iex.Binop.op);
4579                   vpanic("Illegal element size in VQADDU");
4580             }
4581             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
4582                                            res, argL, argR, size, True));
4583             return res;
4584          }
4585          case Iop_QAdd8Sx16:
4586          case Iop_QAdd16Sx8:
4587          case Iop_QAdd32Sx4:
4588          case Iop_QAdd64Sx2: {
4589             HReg res = newVRegV(env);
4590             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4591             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4592             UInt size;
4593             switch (e->Iex.Binop.op) {
4594                case Iop_QAdd8Sx16: size = 0; break;
4595                case Iop_QAdd16Sx8: size = 1; break;
4596                case Iop_QAdd32Sx4: size = 2; break;
4597                case Iop_QAdd64Sx2: size = 3; break;
4598                default:
4599                   ppIROp(e->Iex.Binop.op);
4600                   vpanic("Illegal element size in VQADDS");
4601             }
4602             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
4603                                            res, argL, argR, size, True));
4604             return res;
4605          }
4606          case Iop_Sub8x16:
4607          case Iop_Sub16x8:
4608          case Iop_Sub32x4:
4609          case Iop_Sub64x2: {
4610             HReg res = newVRegV(env);
4611             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4612             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4613             UInt size;
4614             switch (e->Iex.Binop.op) {
4615                case Iop_Sub8x16: size = 0; break;
4616                case Iop_Sub16x8: size = 1; break;
4617                case Iop_Sub32x4: size = 2; break;
4618                case Iop_Sub64x2: size = 3; break;
4619                default:
4620                   ppIROp(e->Iex.Binop.op);
4621                   vpanic("Illegal element size in VSUB");
4622             }
4623             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4624                                            res, argL, argR, size, True));
4625             return res;
4626          }
4627          case Iop_QSub8Ux16:
4628          case Iop_QSub16Ux8:
4629          case Iop_QSub32Ux4:
4630          case Iop_QSub64Ux2: {
4631             HReg res = newVRegV(env);
4632             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4633             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4634             UInt size;
4635             switch (e->Iex.Binop.op) {
4636                case Iop_QSub8Ux16: size = 0; break;
4637                case Iop_QSub16Ux8: size = 1; break;
4638                case Iop_QSub32Ux4: size = 2; break;
4639                case Iop_QSub64Ux2: size = 3; break;
4640                default:
4641                   ppIROp(e->Iex.Binop.op);
4642                   vpanic("Illegal element size in VQSUBU");
4643             }
4644             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
4645                                            res, argL, argR, size, True));
4646             return res;
4647          }
4648          case Iop_QSub8Sx16:
4649          case Iop_QSub16Sx8:
4650          case Iop_QSub32Sx4:
4651          case Iop_QSub64Sx2: {
4652             HReg res = newVRegV(env);
4653             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4654             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4655             UInt size;
4656             switch (e->Iex.Binop.op) {
4657                case Iop_QSub8Sx16: size = 0; break;
4658                case Iop_QSub16Sx8: size = 1; break;
4659                case Iop_QSub32Sx4: size = 2; break;
4660                case Iop_QSub64Sx2: size = 3; break;
4661                default:
4662                   ppIROp(e->Iex.Binop.op);
4663                   vpanic("Illegal element size in VQSUBS");
4664             }
4665             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
4666                                            res, argL, argR, size, True));
4667             return res;
4668          }
4669          case Iop_Max8Ux16:
4670          case Iop_Max16Ux8:
4671          case Iop_Max32Ux4: {
4672             HReg res = newVRegV(env);
4673             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4674             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4675             UInt size;
4676             switch (e->Iex.Binop.op) {
4677                case Iop_Max8Ux16: size = 0; break;
4678                case Iop_Max16Ux8: size = 1; break;
4679                case Iop_Max32Ux4: size = 2; break;
4680                default: vpanic("Illegal element size in VMAXU");
4681             }
4682             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
4683                                            res, argL, argR, size, True));
4684             return res;
4685          }
4686          case Iop_Max8Sx16:
4687          case Iop_Max16Sx8:
4688          case Iop_Max32Sx4: {
4689             HReg res = newVRegV(env);
4690             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4691             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4692             UInt size;
4693             switch (e->Iex.Binop.op) {
4694                case Iop_Max8Sx16: size = 0; break;
4695                case Iop_Max16Sx8: size = 1; break;
4696                case Iop_Max32Sx4: size = 2; break;
4697                default: vpanic("Illegal element size in VMAXU");
4698             }
4699             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
4700                                            res, argL, argR, size, True));
4701             return res;
4702          }
4703          case Iop_Min8Ux16:
4704          case Iop_Min16Ux8:
4705          case Iop_Min32Ux4: {
4706             HReg res = newVRegV(env);
4707             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4708             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4709             UInt size;
4710             switch (e->Iex.Binop.op) {
4711                case Iop_Min8Ux16: size = 0; break;
4712                case Iop_Min16Ux8: size = 1; break;
4713                case Iop_Min32Ux4: size = 2; break;
4714                default: vpanic("Illegal element size in VMAXU");
4715             }
4716             addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
4717                                            res, argL, argR, size, True));
4718             return res;
4719          }
4720          case Iop_Min8Sx16:
4721          case Iop_Min16Sx8:
4722          case Iop_Min32Sx4: {
4723             HReg res = newVRegV(env);
4724             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4725             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4726             UInt size;
4727             switch (e->Iex.Binop.op) {
4728                case Iop_Min8Sx16: size = 0; break;
4729                case Iop_Min16Sx8: size = 1; break;
4730                case Iop_Min32Sx4: size = 2; break;
4731                default: vpanic("Illegal element size in VMAXU");
4732             }
4733             addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
4734                                            res, argL, argR, size, True));
4735             return res;
4736          }
4737          case Iop_Sar8x16:
4738          case Iop_Sar16x8:
4739          case Iop_Sar32x4:
4740          case Iop_Sar64x2: {
4741             HReg res = newVRegV(env);
4742             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4743             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4744             HReg argR2 = newVRegV(env);
4745             HReg zero = newVRegV(env);
4746             UInt size;
4747             switch (e->Iex.Binop.op) {
4748                case Iop_Sar8x16: size = 0; break;
4749                case Iop_Sar16x8: size = 1; break;
4750                case Iop_Sar32x4: size = 2; break;
4751                case Iop_Sar64x2: size = 3; break;
4752                default: vassert(0);
4753             }
4754             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4755             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4756                                            argR2, zero, argR, size, True));
4757             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4758                                           res, argL, argR2, size, True));
4759             return res;
4760          }
4761          case Iop_Sal8x16:
4762          case Iop_Sal16x8:
4763          case Iop_Sal32x4:
4764          case Iop_Sal64x2: {
4765             HReg res = newVRegV(env);
4766             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4767             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4768             UInt size;
4769             switch (e->Iex.Binop.op) {
4770                case Iop_Sal8x16: size = 0; break;
4771                case Iop_Sal16x8: size = 1; break;
4772                case Iop_Sal32x4: size = 2; break;
4773                case Iop_Sal64x2: size = 3; break;
4774                default: vassert(0);
4775             }
4776             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4777                                           res, argL, argR, size, True));
4778             return res;
4779          }
4780          case Iop_Shr8x16:
4781          case Iop_Shr16x8:
4782          case Iop_Shr32x4:
4783          case Iop_Shr64x2: {
4784             HReg res = newVRegV(env);
4785             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4786             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4787             HReg argR2 = newVRegV(env);
4788             HReg zero = newVRegV(env);
4789             UInt size;
4790             switch (e->Iex.Binop.op) {
4791                case Iop_Shr8x16: size = 0; break;
4792                case Iop_Shr16x8: size = 1; break;
4793                case Iop_Shr32x4: size = 2; break;
4794                case Iop_Shr64x2: size = 3; break;
4795                default: vassert(0);
4796             }
4797             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4798             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4799                                            argR2, zero, argR, size, True));
4800             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4801                                           res, argL, argR2, size, True));
4802             return res;
4803          }
4804          case Iop_Shl8x16:
4805          case Iop_Shl16x8:
4806          case Iop_Shl32x4:
4807          case Iop_Shl64x2: {
4808             HReg res = newVRegV(env);
4809             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4810             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4811             UInt size;
4812             switch (e->Iex.Binop.op) {
4813                case Iop_Shl8x16: size = 0; break;
4814                case Iop_Shl16x8: size = 1; break;
4815                case Iop_Shl32x4: size = 2; break;
4816                case Iop_Shl64x2: size = 3; break;
4817                default: vassert(0);
4818             }
4819             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4820                                           res, argL, argR, size, True));
4821             return res;
4822          }
4823          case Iop_QShl8x16:
4824          case Iop_QShl16x8:
4825          case Iop_QShl32x4:
4826          case Iop_QShl64x2: {
4827             HReg res = newVRegV(env);
4828             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4829             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4830             UInt size;
4831             switch (e->Iex.Binop.op) {
4832                case Iop_QShl8x16: size = 0; break;
4833                case Iop_QShl16x8: size = 1; break;
4834                case Iop_QShl32x4: size = 2; break;
4835                case Iop_QShl64x2: size = 3; break;
4836                default: vassert(0);
4837             }
4838             addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
4839                                           res, argL, argR, size, True));
4840             return res;
4841          }
4842          case Iop_QSal8x16:
4843          case Iop_QSal16x8:
4844          case Iop_QSal32x4:
4845          case Iop_QSal64x2: {
4846             HReg res = newVRegV(env);
4847             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4848             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4849             UInt size;
4850             switch (e->Iex.Binop.op) {
4851                case Iop_QSal8x16: size = 0; break;
4852                case Iop_QSal16x8: size = 1; break;
4853                case Iop_QSal32x4: size = 2; break;
4854                case Iop_QSal64x2: size = 3; break;
4855                default: vassert(0);
4856             }
4857             addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
4858                                           res, argL, argR, size, True));
4859             return res;
4860          }
4861          case Iop_QShlNsatUU8x16:
4862          case Iop_QShlNsatUU16x8:
4863          case Iop_QShlNsatUU32x4:
4864          case Iop_QShlNsatUU64x2: {
4865             HReg res = newVRegV(env);
4866             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4867             UInt size, imm;
4868             if (e->Iex.Binop.arg2->tag != Iex_Const ||
4869                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4870                vpanic("ARM target supports Iop_QShlNsatUUAxB with constant "
4871                       "second argument only\n");
4872             }
4873             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4874             switch (e->Iex.Binop.op) {
4875                case Iop_QShlNsatUU8x16: size = 8 | imm; break;
4876                case Iop_QShlNsatUU16x8: size = 16 | imm; break;
4877                case Iop_QShlNsatUU32x4: size = 32 | imm; break;
4878                case Iop_QShlNsatUU64x2: size = 64 | imm; break;
4879                default: vassert(0);
4880             }
4881             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
4882                                           res, argL, size, True));
4883             return res;
4884          }
4885          case Iop_QShlNsatSU8x16:
4886          case Iop_QShlNsatSU16x8:
4887          case Iop_QShlNsatSU32x4:
4888          case Iop_QShlNsatSU64x2: {
4889             HReg res = newVRegV(env);
4890             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4891             UInt size, imm;
4892             if (e->Iex.Binop.arg2->tag != Iex_Const ||
4893                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4894                vpanic("ARM target supports Iop_QShlNsatSUAxB with constant "
4895                       "second argument only\n");
4896             }
4897             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4898             switch (e->Iex.Binop.op) {
4899                case Iop_QShlNsatSU8x16: size = 8 | imm; break;
4900                case Iop_QShlNsatSU16x8: size = 16 | imm; break;
4901                case Iop_QShlNsatSU32x4: size = 32 | imm; break;
4902                case Iop_QShlNsatSU64x2: size = 64 | imm; break;
4903                default: vassert(0);
4904             }
4905             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
4906                                           res, argL, size, True));
4907             return res;
4908          }
4909          case Iop_QShlNsatSS8x16:
4910          case Iop_QShlNsatSS16x8:
4911          case Iop_QShlNsatSS32x4:
4912          case Iop_QShlNsatSS64x2: {
4913             HReg res = newVRegV(env);
4914             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4915             UInt size, imm;
4916             if (e->Iex.Binop.arg2->tag != Iex_Const ||
4917                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4918                vpanic("ARM target supports Iop_QShlNsatSSAxB with constant "
4919                       "second argument only\n");
4920             }
4921             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4922             switch (e->Iex.Binop.op) {
4923                case Iop_QShlNsatSS8x16: size = 8 | imm; break;
4924                case Iop_QShlNsatSS16x8: size = 16 | imm; break;
4925                case Iop_QShlNsatSS32x4: size = 32 | imm; break;
4926                case Iop_QShlNsatSS64x2: size = 64 | imm; break;
4927                default: vassert(0);
4928             }
4929             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
4930                                           res, argL, size, True));
4931             return res;
4932          }
4933          case Iop_ShrN8x16:
4934          case Iop_ShrN16x8:
4935          case Iop_ShrN32x4:
4936          case Iop_ShrN64x2: {
4937             HReg res = newVRegV(env);
4938             HReg tmp = newVRegV(env);
4939             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4940             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4941             HReg argR2 = newVRegI(env);
4942             UInt size;
4943             switch (e->Iex.Binop.op) {
4944                case Iop_ShrN8x16: size = 0; break;
4945                case Iop_ShrN16x8: size = 1; break;
4946                case Iop_ShrN32x4: size = 2; break;
4947                case Iop_ShrN64x2: size = 3; break;
4948                default: vassert(0);
4949             }
4950             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4951             addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
4952                                           tmp, argR2, 0, True));
4953             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4954                                           res, argL, tmp, size, True));
4955             return res;
4956          }
4957          case Iop_ShlN8x16:
4958          case Iop_ShlN16x8:
4959          case Iop_ShlN32x4:
4960          case Iop_ShlN64x2: {
4961             HReg res = newVRegV(env);
4962             HReg tmp = newVRegV(env);
4963             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4964             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4965             UInt size;
4966             switch (e->Iex.Binop.op) {
4967                case Iop_ShlN8x16: size = 0; break;
4968                case Iop_ShlN16x8: size = 1; break;
4969                case Iop_ShlN32x4: size = 2; break;
4970                case Iop_ShlN64x2: size = 3; break;
4971                default: vassert(0);
4972             }
4973             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
4974             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4975                                           res, argL, tmp, size, True));
4976             return res;
4977          }
4978          case Iop_SarN8x16:
4979          case Iop_SarN16x8:
4980          case Iop_SarN32x4:
4981          case Iop_SarN64x2: {
4982             HReg res = newVRegV(env);
4983             HReg tmp = newVRegV(env);
4984             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4985             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4986             HReg argR2 = newVRegI(env);
4987             UInt size;
4988             switch (e->Iex.Binop.op) {
4989                case Iop_SarN8x16: size = 0; break;
4990                case Iop_SarN16x8: size = 1; break;
4991                case Iop_SarN32x4: size = 2; break;
4992                case Iop_SarN64x2: size = 3; break;
4993                default: vassert(0);
4994             }
4995             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4996             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
4997             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4998                                           res, argL, tmp, size, True));
4999             return res;
5000          }
5001          case Iop_CmpGT8Ux16:
5002          case Iop_CmpGT16Ux8:
5003          case Iop_CmpGT32Ux4: {
5004             HReg res = newVRegV(env);
5005             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5006             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5007             UInt size;
5008             switch (e->Iex.Binop.op) {
5009                case Iop_CmpGT8Ux16: size = 0; break;
5010                case Iop_CmpGT16Ux8: size = 1; break;
5011                case Iop_CmpGT32Ux4: size = 2; break;
5012                default: vassert(0);
5013             }
5014             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
5015                                            res, argL, argR, size, True));
5016             return res;
5017          }
5018          case Iop_CmpGT8Sx16:
5019          case Iop_CmpGT16Sx8:
5020          case Iop_CmpGT32Sx4: {
5021             HReg res = newVRegV(env);
5022             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5023             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5024             UInt size;
5025             switch (e->Iex.Binop.op) {
5026                case Iop_CmpGT8Sx16: size = 0; break;
5027                case Iop_CmpGT16Sx8: size = 1; break;
5028                case Iop_CmpGT32Sx4: size = 2; break;
5029                default: vassert(0);
5030             }
5031             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
5032                                            res, argL, argR, size, True));
5033             return res;
5034          }
5035          case Iop_CmpEQ8x16:
5036          case Iop_CmpEQ16x8:
5037          case Iop_CmpEQ32x4: {
5038             HReg res = newVRegV(env);
5039             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5040             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5041             UInt size;
5042             switch (e->Iex.Binop.op) {
5043                case Iop_CmpEQ8x16: size = 0; break;
5044                case Iop_CmpEQ16x8: size = 1; break;
5045                case Iop_CmpEQ32x4: size = 2; break;
5046                default: vassert(0);
5047             }
5048             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
5049                                            res, argL, argR, size, True));
5050             return res;
5051          }
5052          case Iop_Mul8x16:
5053          case Iop_Mul16x8:
5054          case Iop_Mul32x4: {
5055             HReg res = newVRegV(env);
5056             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5057             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5058             UInt size = 0;
5059             switch(e->Iex.Binop.op) {
5060                case Iop_Mul8x16: size = 0; break;
5061                case Iop_Mul16x8: size = 1; break;
5062                case Iop_Mul32x4: size = 2; break;
5063                default: vassert(0);
5064             }
5065             addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
5066                                            res, argL, argR, size, True));
5067             return res;
5068          }
5069          case Iop_Mull8Ux8:
5070          case Iop_Mull16Ux4:
5071          case Iop_Mull32Ux2: {
5072             HReg res = newVRegV(env);
5073             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5074             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5075             UInt size = 0;
5076             switch(e->Iex.Binop.op) {
5077                case Iop_Mull8Ux8: size = 0; break;
5078                case Iop_Mull16Ux4: size = 1; break;
5079                case Iop_Mull32Ux2: size = 2; break;
5080                default: vassert(0);
5081             }
5082             addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
5083                                            res, argL, argR, size, True));
5084             return res;
5085          }
5086 
5087          case Iop_Mull8Sx8:
5088          case Iop_Mull16Sx4:
5089          case Iop_Mull32Sx2: {
5090             HReg res = newVRegV(env);
5091             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5092             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5093             UInt size = 0;
5094             switch(e->Iex.Binop.op) {
5095                case Iop_Mull8Sx8: size = 0; break;
5096                case Iop_Mull16Sx4: size = 1; break;
5097                case Iop_Mull32Sx2: size = 2; break;
5098                default: vassert(0);
5099             }
5100             addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
5101                                            res, argL, argR, size, True));
5102             return res;
5103          }
5104 
5105          case Iop_QDMulHi16Sx8:
5106          case Iop_QDMulHi32Sx4: {
5107             HReg res = newVRegV(env);
5108             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5109             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5110             UInt size = 0;
5111             switch(e->Iex.Binop.op) {
5112                case Iop_QDMulHi16Sx8: size = 1; break;
5113                case Iop_QDMulHi32Sx4: size = 2; break;
5114                default: vassert(0);
5115             }
5116             addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5117                                            res, argL, argR, size, True));
5118             return res;
5119          }
5120 
5121          case Iop_QRDMulHi16Sx8:
5122          case Iop_QRDMulHi32Sx4: {
5123             HReg res = newVRegV(env);
5124             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5125             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5126             UInt size = 0;
5127             switch(e->Iex.Binop.op) {
5128                case Iop_QRDMulHi16Sx8: size = 1; break;
5129                case Iop_QRDMulHi32Sx4: size = 2; break;
5130                default: vassert(0);
5131             }
5132             addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5133                                            res, argL, argR, size, True));
5134             return res;
5135          }
5136 
5137          case Iop_QDMull16Sx4:
5138          case Iop_QDMull32Sx2: {
5139             HReg res = newVRegV(env);
5140             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5141             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5142             UInt size = 0;
5143             switch(e->Iex.Binop.op) {
5144                case Iop_QDMull16Sx4: size = 1; break;
5145                case Iop_QDMull32Sx2: size = 2; break;
5146                default: vassert(0);
5147             }
5148             addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5149                                            res, argL, argR, size, True));
5150             return res;
5151          }
5152          case Iop_PolynomialMul8x16: {
5153             HReg res = newVRegV(env);
5154             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5155             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5156             UInt size = 0;
5157             addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5158                                            res, argL, argR, size, True));
5159             return res;
5160          }
5161          case Iop_Max32Fx4: {
5162             HReg res = newVRegV(env);
5163             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5164             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5165             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5166                                            res, argL, argR, 2, True));
5167             return res;
5168          }
5169          case Iop_Min32Fx4: {
5170             HReg res = newVRegV(env);
5171             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5172             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5173             addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5174                                            res, argL, argR, 2, True));
5175             return res;
5176          }
5177          case Iop_PwMax32Fx4: {
5178             HReg res = newVRegV(env);
5179             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5180             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5181             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5182                                            res, argL, argR, 2, True));
5183             return res;
5184          }
5185          case Iop_PwMin32Fx4: {
5186             HReg res = newVRegV(env);
5187             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5188             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5189             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5190                                            res, argL, argR, 2, True));
5191             return res;
5192          }
5193          case Iop_CmpGT32Fx4: {
5194             HReg res = newVRegV(env);
5195             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5196             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5197             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5198                                            res, argL, argR, 2, True));
5199             return res;
5200          }
5201          case Iop_CmpGE32Fx4: {
5202             HReg res = newVRegV(env);
5203             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5204             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5205             addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5206                                            res, argL, argR, 2, True));
5207             return res;
5208          }
5209          case Iop_CmpEQ32Fx4: {
5210             HReg res = newVRegV(env);
5211             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5212             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5213             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5214                                            res, argL, argR, 2, True));
5215             return res;
5216          }
5217 
5218          case Iop_PolynomialMull8x8: {
5219             HReg res = newVRegV(env);
5220             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5221             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5222             UInt size = 0;
5223             addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5224                                            res, argL, argR, size, True));
5225             return res;
5226          }
5227          case Iop_F32ToFixed32Ux4_RZ:
5228          case Iop_F32ToFixed32Sx4_RZ:
5229          case Iop_Fixed32UToF32x4_RN:
5230          case Iop_Fixed32SToF32x4_RN: {
5231             HReg res = newVRegV(env);
5232             HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5233             ARMNeonUnOp op;
5234             UInt imm6;
5235             if (e->Iex.Binop.arg2->tag != Iex_Const ||
5236                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5237                   vpanic("ARM supports FP <-> Fixed conversion with constant "
5238                          "second argument less than 33 only\n");
5239             }
5240             imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5241             vassert(imm6 <= 32 && imm6 > 0);
5242             imm6 = 64 - imm6;
5243             switch(e->Iex.Binop.op) {
5244                case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5245                case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5246                case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5247                case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5248                default: vassert(0);
5249             }
5250             addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5251             return res;
5252          }
5253          /*
5254          FIXME remove if not used
5255          case Iop_VDup8x16:
5256          case Iop_VDup16x8:
5257          case Iop_VDup32x4: {
5258             HReg res = newVRegV(env);
5259             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5260             UInt imm4;
5261             UInt index;
5262             if (e->Iex.Binop.arg2->tag != Iex_Const ||
5263                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5264                   vpanic("ARM supports Iop_VDup with constant "
5265                          "second argument less than 16 only\n");
5266             }
5267             index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5268             switch(e->Iex.Binop.op) {
5269                case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5270                case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5271                case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5272                default: vassert(0);
5273             }
5274             if (imm4 >= 16) {
5275                vpanic("ARM supports Iop_VDup with constant "
5276                       "second argument less than 16 only\n");
5277             }
5278             addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5279                                           res, argL, imm4, True));
5280             return res;
5281          }
5282          */
5283          case Iop_PwAdd8x16:
5284          case Iop_PwAdd16x8:
5285          case Iop_PwAdd32x4: {
5286             HReg res = newVRegV(env);
5287             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5288             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5289             UInt size = 0;
5290             switch(e->Iex.Binop.op) {
5291                case Iop_PwAdd8x16: size = 0; break;
5292                case Iop_PwAdd16x8: size = 1; break;
5293                case Iop_PwAdd32x4: size = 2; break;
5294                default: vassert(0);
5295             }
5296             addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5297                                            res, argL, argR, size, True));
5298             return res;
5299          }
5300          /* ... */
5301          default:
5302             break;
5303       }
5304    }
5305 
5306    if (e->tag == Iex_Triop) {
5307       IRTriop *triop = e->Iex.Triop.details;
5308 
5309       switch (triop->op) {
5310          case Iop_SliceV128: {
5311             HReg res = newVRegV(env);
5312             HReg argL = iselNeonExpr(env, triop->arg2);
5313             HReg argR = iselNeonExpr(env, triop->arg1);
5314             UInt imm4;
5315             if (triop->arg3->tag != Iex_Const ||
5316                 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
5317                vpanic("ARM target supports Iop_ExtractV128 with constant "
5318                       "third argument less than 16 only\n");
5319             }
5320             imm4 = triop->arg3->Iex.Const.con->Ico.U8;
5321             if (imm4 >= 16) {
5322                vpanic("ARM target supports Iop_ExtractV128 with constant "
5323                       "third argument less than 16 only\n");
5324             }
5325             addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5326                                            res, argL, argR, imm4, True));
5327             return res;
5328          }
5329          case Iop_Mul32Fx4:
5330          case Iop_Sub32Fx4:
5331          case Iop_Add32Fx4: {
5332             HReg res = newVRegV(env);
5333             HReg argL = iselNeonExpr(env, triop->arg2);
5334             HReg argR = iselNeonExpr(env, triop->arg3);
5335             UInt size = 0;
5336             ARMNeonBinOp op = ARMneon_INVALID;
5337             switch (triop->op) {
5338                case Iop_Mul32Fx4: op = ARMneon_VMULFP; break;
5339                case Iop_Sub32Fx4: op = ARMneon_VSUBFP; break;
5340                case Iop_Add32Fx4: op = ARMneon_VADDFP; break;
5341                default: vassert(0);
5342             }
5343             addInstr(env, ARMInstr_NBinary(op, res, argL, argR, size, True));
5344             return res;
5345          }
5346          default:
5347             break;
5348       }
5349    }
5350 
5351    if (e->tag == Iex_ITE) { // VFD
5352       ARMCondCode cc;
5353       HReg r1  = iselNeonExpr(env, e->Iex.ITE.iftrue);
5354       HReg r0  = iselNeonExpr(env, e->Iex.ITE.iffalse);
5355       HReg dst = newVRegV(env);
5356       addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, r1, 4, True));
5357       cc = iselCondCode(env, e->Iex.ITE.cond);
5358       addInstr(env, ARMInstr_NCMovQ(cc ^ 1, dst, r0));
5359       return dst;
5360    }
5361 
5362   neon_expr_bad:
5363    ppIRExpr(e);
5364    vpanic("iselNeonExpr_wrk");
5365 }
5366 
5367 /*---------------------------------------------------------*/
5368 /*--- ISEL: Floating point expressions (64 bit)         ---*/
5369 /*---------------------------------------------------------*/
5370 
5371 /* Compute a 64-bit floating point value into a register, the identity
5372    of which is returned.  As with iselIntExpr_R, the reg may be either
5373    real or virtual; in any case it must not be changed by subsequent
5374    code emitted by the caller.  */
5375 
iselDblExpr(ISelEnv * env,IRExpr * e)5376 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5377 {
5378    HReg r = iselDblExpr_wrk( env, e );
5379 #  if 0
5380    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5381 #  endif
5382    vassert(hregClass(r) == HRcFlt64);
5383    vassert(hregIsVirtual(r));
5384    return r;
5385 }
5386 
5387 /* DO NOT CALL THIS DIRECTLY */
iselDblExpr_wrk(ISelEnv * env,IRExpr * e)5388 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5389 {
5390    IRType ty = typeOfIRExpr(env->type_env,e);
5391    vassert(e);
5392    vassert(ty == Ity_F64);
5393 
5394    if (e->tag == Iex_RdTmp) {
5395       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5396    }
5397 
5398    if (e->tag == Iex_Const) {
5399       /* Just handle the zero case. */
5400       IRConst* con = e->Iex.Const.con;
5401       if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
5402          HReg z32 = newVRegI(env);
5403          HReg dst = newVRegD(env);
5404          addInstr(env, ARMInstr_Imm32(z32, 0));
5405          addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
5406          return dst;
5407       }
5408    }
5409 
5410    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5411       ARMAModeV* am;
5412       HReg res = newVRegD(env);
5413       vassert(e->Iex.Load.ty == Ity_F64);
5414       am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5415       addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5416       return res;
5417    }
5418 
5419    if (e->tag == Iex_Get) {
5420       // XXX This won't work if offset > 1020 or is not 0 % 4.
5421       // In which case we'll have to generate more longwinded code.
5422       ARMAModeV* am  = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5423       HReg       res = newVRegD(env);
5424       addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5425       return res;
5426    }
5427 
5428    if (e->tag == Iex_Unop) {
5429       switch (e->Iex.Unop.op) {
5430          case Iop_ReinterpI64asF64: {
5431             if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5432                return iselNeon64Expr(env, e->Iex.Unop.arg);
5433             } else {
5434                HReg srcHi, srcLo;
5435                HReg dst = newVRegD(env);
5436                iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
5437                addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
5438                return dst;
5439             }
5440          }
5441          case Iop_NegF64: {
5442             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5443             HReg dst = newVRegD(env);
5444             addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
5445             return dst;
5446          }
5447          case Iop_AbsF64: {
5448             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5449             HReg dst = newVRegD(env);
5450             addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
5451             return dst;
5452          }
5453          case Iop_F32toF64: {
5454             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5455             HReg dst = newVRegD(env);
5456             addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
5457             return dst;
5458          }
5459          case Iop_I32UtoF64:
5460          case Iop_I32StoF64: {
5461             HReg src   = iselIntExpr_R(env, e->Iex.Unop.arg);
5462             HReg f32   = newVRegF(env);
5463             HReg dst   = newVRegD(env);
5464             Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
5465             /* VMOV f32, src */
5466             addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
5467             /* FSITOD dst, f32 */
5468             addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
5469                                           dst, f32));
5470             return dst;
5471          }
5472          default:
5473             break;
5474       }
5475    }
5476 
5477    if (e->tag == Iex_Binop) {
5478       switch (e->Iex.Binop.op) {
5479          case Iop_SqrtF64: {
5480             /* first arg is rounding mode; we ignore it. */
5481             HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5482             HReg dst = newVRegD(env);
5483             addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
5484             return dst;
5485          }
5486          default:
5487             break;
5488       }
5489    }
5490 
5491    if (e->tag == Iex_Triop) {
5492       IRTriop *triop = e->Iex.Triop.details;
5493 
5494       switch (triop->op) {
5495          case Iop_DivF64:
5496          case Iop_MulF64:
5497          case Iop_AddF64:
5498          case Iop_SubF64: {
5499             ARMVfpOp op = 0; /*INVALID*/
5500             HReg argL = iselDblExpr(env, triop->arg2);
5501             HReg argR = iselDblExpr(env, triop->arg3);
5502             HReg dst  = newVRegD(env);
5503             switch (triop->op) {
5504                case Iop_DivF64: op = ARMvfp_DIV; break;
5505                case Iop_MulF64: op = ARMvfp_MUL; break;
5506                case Iop_AddF64: op = ARMvfp_ADD; break;
5507                case Iop_SubF64: op = ARMvfp_SUB; break;
5508                default: vassert(0);
5509             }
5510             addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
5511             return dst;
5512          }
5513          default:
5514             break;
5515       }
5516    }
5517 
5518    if (e->tag == Iex_ITE) { // VFD
5519       if (ty == Ity_F64
5520           && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
5521          HReg r1  = iselDblExpr(env, e->Iex.ITE.iftrue);
5522          HReg r0  = iselDblExpr(env, e->Iex.ITE.iffalse);
5523          HReg dst = newVRegD(env);
5524          addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, r1));
5525          ARMCondCode cc = iselCondCode(env, e->Iex.ITE.cond);
5526          addInstr(env, ARMInstr_VCMovD(cc ^ 1, dst, r0));
5527          return dst;
5528       }
5529    }
5530 
5531    ppIRExpr(e);
5532    vpanic("iselDblExpr_wrk");
5533 }
5534 
5535 
5536 /*---------------------------------------------------------*/
5537 /*--- ISEL: Floating point expressions (32 bit)         ---*/
5538 /*---------------------------------------------------------*/
5539 
5540 /* Compute a 32-bit floating point value into a register, the identity
5541    of which is returned.  As with iselIntExpr_R, the reg may be either
5542    real or virtual; in any case it must not be changed by subsequent
5543    code emitted by the caller.  */
5544 
iselFltExpr(ISelEnv * env,IRExpr * e)5545 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
5546 {
5547    HReg r = iselFltExpr_wrk( env, e );
5548 #  if 0
5549    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5550 #  endif
5551    vassert(hregClass(r) == HRcFlt32);
5552    vassert(hregIsVirtual(r));
5553    return r;
5554 }
5555 
5556 /* DO NOT CALL THIS DIRECTLY */
iselFltExpr_wrk(ISelEnv * env,IRExpr * e)5557 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
5558 {
5559    IRType ty = typeOfIRExpr(env->type_env,e);
5560    vassert(e);
5561    vassert(ty == Ity_F32);
5562 
5563    if (e->tag == Iex_RdTmp) {
5564       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5565    }
5566 
5567    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5568       ARMAModeV* am;
5569       HReg res = newVRegF(env);
5570       vassert(e->Iex.Load.ty == Ity_F32);
5571       am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5572       addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5573       return res;
5574    }
5575 
5576    if (e->tag == Iex_Get) {
5577       // XXX This won't work if offset > 1020 or is not 0 % 4.
5578       // In which case we'll have to generate more longwinded code.
5579       ARMAModeV* am  = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5580       HReg       res = newVRegF(env);
5581       addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5582       return res;
5583    }
5584 
5585    if (e->tag == Iex_Unop) {
5586       switch (e->Iex.Unop.op) {
5587          case Iop_ReinterpI32asF32: {
5588             HReg dst = newVRegF(env);
5589             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5590             addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
5591             return dst;
5592          }
5593          case Iop_NegF32: {
5594             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5595             HReg dst = newVRegF(env);
5596             addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
5597             return dst;
5598          }
5599          case Iop_AbsF32: {
5600             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5601             HReg dst = newVRegF(env);
5602             addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
5603             return dst;
5604          }
5605          default:
5606             break;
5607       }
5608    }
5609 
5610    if (e->tag == Iex_Binop) {
5611       switch (e->Iex.Binop.op) {
5612          case Iop_SqrtF32: {
5613             /* first arg is rounding mode; we ignore it. */
5614             HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5615             HReg dst = newVRegF(env);
5616             addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
5617             return dst;
5618          }
5619          case Iop_F64toF32: {
5620             HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
5621             set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5622             HReg valS = newVRegF(env);
5623             /* FCVTSD valS, valD */
5624             addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
5625             set_VFP_rounding_default(env);
5626             return valS;
5627          }
5628          default:
5629             break;
5630       }
5631    }
5632 
5633    if (e->tag == Iex_Triop) {
5634       IRTriop *triop = e->Iex.Triop.details;
5635 
5636       switch (triop->op) {
5637          case Iop_DivF32:
5638          case Iop_MulF32:
5639          case Iop_AddF32:
5640          case Iop_SubF32: {
5641             ARMVfpOp op = 0; /*INVALID*/
5642             HReg argL = iselFltExpr(env, triop->arg2);
5643             HReg argR = iselFltExpr(env, triop->arg3);
5644             HReg dst  = newVRegF(env);
5645             switch (triop->op) {
5646                case Iop_DivF32: op = ARMvfp_DIV; break;
5647                case Iop_MulF32: op = ARMvfp_MUL; break;
5648                case Iop_AddF32: op = ARMvfp_ADD; break;
5649                case Iop_SubF32: op = ARMvfp_SUB; break;
5650                default: vassert(0);
5651             }
5652             addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
5653             return dst;
5654          }
5655          default:
5656             break;
5657       }
5658    }
5659 
5660    if (e->tag == Iex_ITE) { // VFD
5661       if (ty == Ity_F32
5662           && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
5663          ARMCondCode cc;
5664          HReg r1  = iselFltExpr(env, e->Iex.ITE.iftrue);
5665          HReg r0  = iselFltExpr(env, e->Iex.ITE.iffalse);
5666          HReg dst = newVRegF(env);
5667          addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, r1));
5668          cc = iselCondCode(env, e->Iex.ITE.cond);
5669          addInstr(env, ARMInstr_VCMovS(cc ^ 1, dst, r0));
5670          return dst;
5671       }
5672    }
5673 
5674    ppIRExpr(e);
5675    vpanic("iselFltExpr_wrk");
5676 }
5677 
5678 
5679 /*---------------------------------------------------------*/
5680 /*--- ISEL: Statements                                  ---*/
5681 /*---------------------------------------------------------*/
5682 
iselStmt(ISelEnv * env,IRStmt * stmt)5683 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
5684 {
5685    if (vex_traceflags & VEX_TRACE_VCODE) {
5686       vex_printf("\n-- ");
5687       ppIRStmt(stmt);
5688       vex_printf("\n");
5689    }
5690    switch (stmt->tag) {
5691 
5692    /* --------- STORE --------- */
5693    /* little-endian write to memory */
5694    case Ist_Store: {
5695       IRType    tya  = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
5696       IRType    tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
5697       IREndness end  = stmt->Ist.Store.end;
5698 
5699       if (tya != Ity_I32 || end != Iend_LE)
5700          goto stmt_fail;
5701 
5702       if (tyd == Ity_I32) {
5703          HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5704          ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5705          addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am));
5706          return;
5707       }
5708       if (tyd == Ity_I16) {
5709          HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5710          ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
5711          addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
5712                                        False/*!isLoad*/,
5713                                        False/*!isSignedLoad*/, rD, am));
5714          return;
5715       }
5716       if (tyd == Ity_I8) {
5717          HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5718          ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5719          addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, False/*!isLoad*/, rD, am));
5720          return;
5721       }
5722       if (tyd == Ity_I64) {
5723          if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5724             HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
5725             ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5726             addInstr(env, ARMInstr_NLdStD(False, dD, am));
5727          } else {
5728             HReg rDhi, rDlo, rA;
5729             iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
5730             rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
5731             addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDhi,
5732                                           ARMAMode1_RI(rA,4)));
5733             addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDlo,
5734                                           ARMAMode1_RI(rA,0)));
5735          }
5736          return;
5737       }
5738       if (tyd == Ity_F64) {
5739          HReg       dD = iselDblExpr(env, stmt->Ist.Store.data);
5740          ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5741          addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
5742          return;
5743       }
5744       if (tyd == Ity_F32) {
5745          HReg       fD = iselFltExpr(env, stmt->Ist.Store.data);
5746          ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5747          addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
5748          return;
5749       }
5750       if (tyd == Ity_V128) {
5751          HReg       qD = iselNeonExpr(env, stmt->Ist.Store.data);
5752          ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5753          addInstr(env, ARMInstr_NLdStQ(False, qD, am));
5754          return;
5755       }
5756 
5757       break;
5758    }
5759 
5760    /* --------- CONDITIONAL STORE --------- */
5761    /* conditional little-endian write to memory */
5762    case Ist_StoreG: {
5763       IRStoreG* sg   = stmt->Ist.StoreG.details;
5764       IRType    tya  = typeOfIRExpr(env->type_env, sg->addr);
5765       IRType    tyd  = typeOfIRExpr(env->type_env, sg->data);
5766       IREndness end  = sg->end;
5767 
5768       if (tya != Ity_I32 || end != Iend_LE)
5769          goto stmt_fail;
5770 
5771       switch (tyd) {
5772          case Ity_I8:
5773          case Ity_I32: {
5774             HReg        rD = iselIntExpr_R(env, sg->data);
5775             ARMAMode1*  am = iselIntExpr_AMode1(env, sg->addr);
5776             ARMCondCode cc = iselCondCode(env, sg->guard);
5777             addInstr(env, (tyd == Ity_I32 ? ARMInstr_LdSt32 : ARMInstr_LdSt8U)
5778                              (cc, False/*!isLoad*/, rD, am));
5779             return;
5780          }
5781          case Ity_I16: {
5782             HReg        rD = iselIntExpr_R(env, sg->data);
5783             ARMAMode2*  am = iselIntExpr_AMode2(env, sg->addr);
5784             ARMCondCode cc = iselCondCode(env, sg->guard);
5785             addInstr(env, ARMInstr_LdSt16(cc,
5786                                           False/*!isLoad*/,
5787                                           False/*!isSignedLoad*/, rD, am));
5788             return;
5789          }
5790          default:
5791             break;
5792       }
5793       break;
5794    }
5795 
5796    /* --------- CONDITIONAL LOAD --------- */
5797    /* conditional little-endian load from memory */
5798    case Ist_LoadG: {
5799       IRLoadG*  lg   = stmt->Ist.LoadG.details;
5800       IRType    tya  = typeOfIRExpr(env->type_env, lg->addr);
5801       IREndness end  = lg->end;
5802 
5803       if (tya != Ity_I32 || end != Iend_LE)
5804          goto stmt_fail;
5805 
5806       switch (lg->cvt) {
5807          case ILGop_8Uto32:
5808          case ILGop_Ident32: {
5809             HReg        rAlt = iselIntExpr_R(env, lg->alt);
5810             ARMAMode1*  am   = iselIntExpr_AMode1(env, lg->addr);
5811             HReg        rD   = lookupIRTemp(env, lg->dst);
5812             addInstr(env, mk_iMOVds_RR(rD, rAlt));
5813             ARMCondCode cc   = iselCondCode(env, lg->guard);
5814             addInstr(env, (lg->cvt == ILGop_Ident32 ? ARMInstr_LdSt32
5815                                                     : ARMInstr_LdSt8U)
5816                              (cc, True/*isLoad*/, rD, am));
5817             return;
5818          }
5819          case ILGop_16Sto32:
5820          case ILGop_16Uto32:
5821          case ILGop_8Sto32: {
5822             HReg        rAlt = iselIntExpr_R(env, lg->alt);
5823             ARMAMode2*  am   = iselIntExpr_AMode2(env, lg->addr);
5824             HReg        rD   = lookupIRTemp(env, lg->dst);
5825             addInstr(env, mk_iMOVds_RR(rD, rAlt));
5826             ARMCondCode cc   = iselCondCode(env, lg->guard);
5827             if (lg->cvt == ILGop_8Sto32) {
5828                addInstr(env, ARMInstr_Ld8S(cc, rD, am));
5829             } else {
5830                vassert(lg->cvt == ILGop_16Sto32 || lg->cvt == ILGop_16Uto32);
5831                Bool sx = lg->cvt == ILGop_16Sto32;
5832                addInstr(env, ARMInstr_LdSt16(cc, True/*isLoad*/, sx, rD, am));
5833             }
5834             return;
5835          }
5836          default:
5837             break;
5838       }
5839       break;
5840    }
5841 
5842    /* --------- PUT --------- */
5843    /* write guest state, fixed offset */
5844    case Ist_Put: {
5845        IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
5846 
5847        if (tyd == Ity_I32) {
5848            HReg       rD = iselIntExpr_R(env, stmt->Ist.Put.data);
5849            ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
5850            addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am));
5851            return;
5852        }
5853        if (tyd == Ity_I64) {
5854           if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5855              HReg addr = newVRegI(env);
5856              HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
5857              addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5858                                                 stmt->Ist.Put.offset));
5859              addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
5860           } else {
5861              HReg rDhi, rDlo;
5862              ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
5863                                            stmt->Ist.Put.offset + 0);
5864              ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
5865                                            stmt->Ist.Put.offset + 4);
5866              iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
5867              addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
5868                                            rDhi, am4));
5869              addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
5870                                            rDlo, am0));
5871           }
5872           return;
5873        }
5874        if (tyd == Ity_F64) {
5875           // XXX This won't work if offset > 1020 or is not 0 % 4.
5876           // In which case we'll have to generate more longwinded code.
5877           ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5878           HReg       rD = iselDblExpr(env, stmt->Ist.Put.data);
5879           addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
5880           return;
5881        }
5882        if (tyd == Ity_F32) {
5883           // XXX This won't work if offset > 1020 or is not 0 % 4.
5884           // In which case we'll have to generate more longwinded code.
5885           ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5886           HReg       rD = iselFltExpr(env, stmt->Ist.Put.data);
5887           addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
5888           return;
5889        }
5890        if (tyd == Ity_V128) {
5891           HReg addr = newVRegI(env);
5892           HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
5893           addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5894                                        stmt->Ist.Put.offset));
5895           addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
5896           return;
5897        }
5898        break;
5899    }
5900 
5901    /* --------- TMP --------- */
5902    /* assign value to temporary */
5903    case Ist_WrTmp: {
5904       IRTemp tmp = stmt->Ist.WrTmp.tmp;
5905       IRType ty = typeOfIRTemp(env->type_env, tmp);
5906 
5907       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
5908          ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
5909                                           env, stmt->Ist.WrTmp.data);
5910          HReg     dst  = lookupIRTemp(env, tmp);
5911          addInstr(env, ARMInstr_Mov(dst,ri84));
5912          return;
5913       }
5914       if (ty == Ity_I1) {
5915          /* Here, we are generating a I1 value into a 32 bit register.
5916             Make sure the value in the register is only zero or one,
5917             but no other.  This allows optimisation of the
5918             1Uto32(tmp:I1) case, by making it simply a copy of the
5919             register holding 'tmp'.  The point being that the value in
5920             the register holding 'tmp' can only have been created
5921             here. */
5922          HReg        dst  = lookupIRTemp(env, tmp);
5923          ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
5924          addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
5925          addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
5926          return;
5927       }
5928       if (ty == Ity_I64) {
5929          if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5930             HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
5931             HReg dst = lookupIRTemp(env, tmp);
5932             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
5933          } else {
5934             HReg rHi, rLo, dstHi, dstLo;
5935             iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
5936             lookupIRTemp64( &dstHi, &dstLo, env, tmp);
5937             addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
5938             addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
5939          }
5940          return;
5941       }
5942       if (ty == Ity_F64) {
5943          HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
5944          HReg dst = lookupIRTemp(env, tmp);
5945          addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
5946          return;
5947       }
5948       if (ty == Ity_F32) {
5949          HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
5950          HReg dst = lookupIRTemp(env, tmp);
5951          addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
5952          return;
5953       }
5954       if (ty == Ity_V128) {
5955          HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
5956          HReg dst = lookupIRTemp(env, tmp);
5957          addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
5958          return;
5959       }
5960       break;
5961    }
5962 
5963    /* --------- Call to DIRTY helper --------- */
5964    /* call complex ("dirty") helper function */
5965    case Ist_Dirty: {
5966       IRDirty* d = stmt->Ist.Dirty.details;
5967 
5968       /* Figure out the return type, if any. */
5969       IRType retty = Ity_INVALID;
5970       if (d->tmp != IRTemp_INVALID)
5971          retty = typeOfIRTemp(env->type_env, d->tmp);
5972 
5973       Bool retty_ok = False;
5974       switch (retty) {
5975          case Ity_INVALID: /* function doesn't return anything */
5976          case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
5977          //case Ity_V128: //ATC
5978             retty_ok = True; break;
5979          default:
5980             break;
5981       }
5982       if (!retty_ok)
5983          break; /* will go to stmt_fail: */
5984 
5985       /* Marshal args, do the call, and set the return value to 0x555..555
5986          if this is a conditional call that returns a value and the
5987          call is skipped. */
5988       UInt   addToSp = 0;
5989       RetLoc rloc    = mk_RetLoc_INVALID();
5990       doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
5991       vassert(is_sane_RetLoc(rloc));
5992 
5993       /* Now figure out what to do with the returned value, if any. */
5994       switch (retty) {
5995          case Ity_INVALID: {
5996             /* No return value.  Nothing to do. */
5997             vassert(d->tmp == IRTemp_INVALID);
5998             vassert(rloc.pri == RLPri_None);
5999             vassert(addToSp == 0);
6000             return;
6001          }
6002          case Ity_I64: {
6003             vassert(rloc.pri == RLPri_2Int);
6004             vassert(addToSp == 0);
6005             if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6006                HReg tmp = lookupIRTemp(env, d->tmp);
6007                addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
6008                                                         hregARM_R0()));
6009             } else {
6010                HReg dstHi, dstLo;
6011                /* The returned value is in r1:r0.  Park it in the
6012                   register-pair associated with tmp. */
6013                lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
6014                addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
6015                addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
6016             }
6017             return;
6018          }
6019          case Ity_I32: case Ity_I16: case Ity_I8: {
6020             vassert(rloc.pri == RLPri_Int);
6021             vassert(addToSp == 0);
6022             /* The returned value is in r0.  Park it in the register
6023                associated with tmp. */
6024             HReg dst = lookupIRTemp(env, d->tmp);
6025             addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
6026             return;
6027          }
6028          case Ity_V128: {
6029             vassert(0); // ATC.  The code that this produces really
6030             // needs to be looked at, to verify correctness.
6031             // I don't think this can ever happen though, since the
6032             // ARM front end never produces 128-bit loads/stores.
6033             // Hence the following is mostly theoretical.
6034             /* The returned value is on the stack, and *retloc tells
6035                us where.  Fish it off the stack and then move the
6036                stack pointer upwards to clear it, as directed by
6037                doHelperCall. */
6038             vassert(rloc.pri == RLPri_V128SpRel);
6039             vassert(rloc.spOff < 256); // else ARMRI84_I84(_,0) can't encode it
6040             vassert(addToSp >= 16);
6041             vassert(addToSp < 256); // ditto reason as for rloc.spOff
6042             HReg dst = lookupIRTemp(env, d->tmp);
6043             HReg tmp = newVRegI(env);
6044             HReg r13 = hregARM_R13(); // sp
6045             addInstr(env, ARMInstr_Alu(ARMalu_ADD,
6046                                        tmp, r13, ARMRI84_I84(rloc.spOff,0)));
6047             ARMAModeN* am = mkARMAModeN_R(tmp);
6048             addInstr(env, ARMInstr_NLdStQ(True/*load*/, dst, am));
6049             addInstr(env, ARMInstr_Alu(ARMalu_ADD,
6050                                        r13, r13, ARMRI84_I84(addToSp,0)));
6051             return;
6052          }
6053          default:
6054             /*NOTREACHED*/
6055             vassert(0);
6056       }
6057       break;
6058    }
6059 
6060    /* --------- Load Linked and Store Conditional --------- */
6061    case Ist_LLSC: {
6062       if (stmt->Ist.LLSC.storedata == NULL) {
6063          /* LL */
6064          IRTemp res = stmt->Ist.LLSC.result;
6065          IRType ty  = typeOfIRTemp(env->type_env, res);
6066          if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
6067             Int  szB   = 0;
6068             HReg r_dst = lookupIRTemp(env, res);
6069             HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6070             switch (ty) {
6071                case Ity_I8:  szB = 1; break;
6072                case Ity_I16: szB = 2; break;
6073                case Ity_I32: szB = 4; break;
6074                default:      vassert(0);
6075             }
6076             addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
6077             addInstr(env, ARMInstr_LdrEX(szB));
6078             addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
6079             return;
6080          }
6081          if (ty == Ity_I64) {
6082             HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6083             addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
6084             addInstr(env, ARMInstr_LdrEX(8));
6085             /* Result is in r3:r2.  On a non-NEON capable CPU, we must
6086                move it into a result register pair.  On a NEON capable
6087                CPU, the result register will be a 64 bit NEON
6088                register, so we must move it there instead. */
6089             if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6090                HReg dst = lookupIRTemp(env, res);
6091                addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
6092                                                         hregARM_R2()));
6093             } else {
6094                HReg r_dst_hi, r_dst_lo;
6095                lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
6096                addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
6097                addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
6098             }
6099             return;
6100          }
6101          /*NOTREACHED*/
6102          vassert(0);
6103       } else {
6104          /* SC */
6105          IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
6106          if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
6107             Int  szB = 0;
6108             HReg rD  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
6109             HReg rA  = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6110             switch (tyd) {
6111                case Ity_I8:  szB = 1; break;
6112                case Ity_I16: szB = 2; break;
6113                case Ity_I32: szB = 4; break;
6114                default:      vassert(0);
6115             }
6116             addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
6117             addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
6118             addInstr(env, ARMInstr_StrEX(szB));
6119          } else {
6120             vassert(tyd == Ity_I64);
6121             /* This is really ugly.  There is no is/is-not NEON
6122                decision akin to the case for LL, because iselInt64Expr
6123                fudges this for us, and always gets the result into two
6124                GPRs even if this means moving it from a NEON
6125                register. */
6126             HReg rDhi, rDlo;
6127             iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
6128             HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6129             addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
6130             addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
6131             addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
6132             addInstr(env, ARMInstr_StrEX(8));
6133          }
6134          /* now r0 is 1 if failed, 0 if success.  Change to IR
6135             conventions (0 is fail, 1 is success).  Also transfer
6136             result to r_res. */
6137          IRTemp   res   = stmt->Ist.LLSC.result;
6138          IRType   ty    = typeOfIRTemp(env->type_env, res);
6139          HReg     r_res = lookupIRTemp(env, res);
6140          ARMRI84* one   = ARMRI84_I84(1,0);
6141          vassert(ty == Ity_I1);
6142          addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
6143          /* And be conservative -- mask off all but the lowest bit */
6144          addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
6145          return;
6146       }
6147       break;
6148    }
6149 
6150    /* --------- MEM FENCE --------- */
6151    case Ist_MBE:
6152       switch (stmt->Ist.MBE.event) {
6153          case Imbe_Fence:
6154             addInstr(env, ARMInstr_MFence());
6155             return;
6156          case Imbe_CancelReservation:
6157             addInstr(env, ARMInstr_CLREX());
6158             return;
6159          default:
6160             break;
6161       }
6162       break;
6163 
6164    /* --------- INSTR MARK --------- */
6165    /* Doesn't generate any executable code ... */
6166    case Ist_IMark:
6167        return;
6168 
6169    /* --------- NO-OP --------- */
6170    case Ist_NoOp:
6171        return;
6172 
6173    /* --------- EXIT --------- */
6174    case Ist_Exit: {
6175       if (stmt->Ist.Exit.dst->tag != Ico_U32)
6176          vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
6177 
6178       ARMCondCode cc     = iselCondCode(env, stmt->Ist.Exit.guard);
6179       ARMAMode1*  amR15T = ARMAMode1_RI(hregARM_R8(),
6180                                         stmt->Ist.Exit.offsIP);
6181 
6182       /* Case: boring transfer to known address */
6183       if (stmt->Ist.Exit.jk == Ijk_Boring
6184           || stmt->Ist.Exit.jk == Ijk_Call
6185           || stmt->Ist.Exit.jk == Ijk_Ret) {
6186          if (env->chainingAllowed) {
6187             /* .. almost always true .. */
6188             /* Skip the event check at the dst if this is a forwards
6189                edge. */
6190             Bool toFastEP
6191                = stmt->Ist.Exit.dst->Ico.U32 > env->max_ga;
6192             if (0) vex_printf("%s", toFastEP ? "Y" : ",");
6193             addInstr(env, ARMInstr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
6194                                            amR15T, cc, toFastEP));
6195          } else {
6196             /* .. very occasionally .. */
6197             /* We can't use chaining, so ask for an assisted transfer,
6198                as that's the only alternative that is allowable. */
6199             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6200             addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, Ijk_Boring));
6201          }
6202          return;
6203       }
6204 
6205       /* Case: assisted transfer to arbitrary address */
6206       switch (stmt->Ist.Exit.jk) {
6207          /* Keep this list in sync with that in iselNext below */
6208          case Ijk_ClientReq:
6209          case Ijk_NoDecode:
6210          case Ijk_NoRedir:
6211          case Ijk_Sys_syscall:
6212          case Ijk_InvalICache:
6213          case Ijk_Yield:
6214          {
6215             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6216             addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
6217                                              stmt->Ist.Exit.jk));
6218             return;
6219          }
6220          default:
6221             break;
6222       }
6223 
6224       /* Do we ever expect to see any other kind? */
6225       goto stmt_fail;
6226    }
6227 
6228    default: break;
6229    }
6230   stmt_fail:
6231    ppIRStmt(stmt);
6232    vpanic("iselStmt");
6233 }
6234 
6235 
6236 /*---------------------------------------------------------*/
6237 /*--- ISEL: Basic block terminators (Nexts)             ---*/
6238 /*---------------------------------------------------------*/
6239 
iselNext(ISelEnv * env,IRExpr * next,IRJumpKind jk,Int offsIP)6240 static void iselNext ( ISelEnv* env,
6241                        IRExpr* next, IRJumpKind jk, Int offsIP )
6242 {
6243    if (vex_traceflags & VEX_TRACE_VCODE) {
6244       vex_printf( "\n-- PUT(%d) = ", offsIP);
6245       ppIRExpr( next );
6246       vex_printf( "; exit-");
6247       ppIRJumpKind(jk);
6248       vex_printf( "\n");
6249    }
6250 
6251    /* Case: boring transfer to known address */
6252    if (next->tag == Iex_Const) {
6253       IRConst* cdst = next->Iex.Const.con;
6254       vassert(cdst->tag == Ico_U32);
6255       if (jk == Ijk_Boring || jk == Ijk_Call) {
6256          /* Boring transfer to known address */
6257          ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6258          if (env->chainingAllowed) {
6259             /* .. almost always true .. */
6260             /* Skip the event check at the dst if this is a forwards
6261                edge. */
6262             Bool toFastEP
6263                = cdst->Ico.U32 > env->max_ga;
6264             if (0) vex_printf("%s", toFastEP ? "X" : ".");
6265             addInstr(env, ARMInstr_XDirect(cdst->Ico.U32,
6266                                            amR15T, ARMcc_AL,
6267                                            toFastEP));
6268          } else {
6269             /* .. very occasionally .. */
6270             /* We can't use chaining, so ask for an assisted transfer,
6271                as that's the only alternative that is allowable. */
6272             HReg r = iselIntExpr_R(env, next);
6273             addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6274                                              Ijk_Boring));
6275          }
6276          return;
6277       }
6278    }
6279 
6280    /* Case: call/return (==boring) transfer to any address */
6281    switch (jk) {
6282       case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
6283          HReg       r      = iselIntExpr_R(env, next);
6284          ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6285          if (env->chainingAllowed) {
6286             addInstr(env, ARMInstr_XIndir(r, amR15T, ARMcc_AL));
6287          } else {
6288             addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6289                                                 Ijk_Boring));
6290          }
6291          return;
6292       }
6293       default:
6294          break;
6295    }
6296 
6297    /* Case: assisted transfer to arbitrary address */
6298    switch (jk) {
6299       /* Keep this list in sync with that for Ist_Exit above */
6300       case Ijk_ClientReq:
6301       case Ijk_NoDecode:
6302       case Ijk_NoRedir:
6303       case Ijk_Sys_syscall:
6304       case Ijk_InvalICache:
6305       case Ijk_Yield:
6306       {
6307          HReg       r      = iselIntExpr_R(env, next);
6308          ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6309          addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, jk));
6310          return;
6311       }
6312       default:
6313          break;
6314    }
6315 
6316    vex_printf( "\n-- PUT(%d) = ", offsIP);
6317    ppIRExpr( next );
6318    vex_printf( "; exit-");
6319    ppIRJumpKind(jk);
6320    vex_printf( "\n");
6321    vassert(0); // are we expecting any other kind?
6322 }
6323 
6324 
6325 /*---------------------------------------------------------*/
6326 /*--- Insn selector top-level                           ---*/
6327 /*---------------------------------------------------------*/
6328 
6329 /* Translate an entire SB to arm code. */
6330 
iselSB_ARM(const IRSB * bb,VexArch arch_host,const VexArchInfo * archinfo_host,const VexAbiInfo * vbi,Int offs_Host_EvC_Counter,Int offs_Host_EvC_FailAddr,Bool chainingAllowed,Bool addProfInc,Addr max_ga)6331 HInstrArray* iselSB_ARM ( const IRSB* bb,
6332                           VexArch      arch_host,
6333                           const VexArchInfo* archinfo_host,
6334                           const VexAbiInfo*  vbi/*UNUSED*/,
6335                           Int offs_Host_EvC_Counter,
6336                           Int offs_Host_EvC_FailAddr,
6337                           Bool chainingAllowed,
6338                           Bool addProfInc,
6339                           Addr max_ga )
6340 {
6341    Int       i, j;
6342    HReg      hreg, hregHI;
6343    ISelEnv*  env;
6344    UInt      hwcaps_host = archinfo_host->hwcaps;
6345    ARMAMode1 *amCounter, *amFailAddr;
6346 
6347    /* sanity ... */
6348    vassert(arch_host == VexArchARM);
6349 
6350    /* Check that the host's endianness is as expected. */
6351    vassert(archinfo_host->endness == VexEndnessLE);
6352 
6353    /* guard against unexpected space regressions */
6354    vassert(sizeof(ARMInstr) <= 28);
6355 
6356    /* hwcaps should not change from one ISEL call to another. */
6357    arm_hwcaps = hwcaps_host; // JRS 2012 Mar 31: FIXME (RM)
6358 
6359    /* Make up an initial environment to use. */
6360    env = LibVEX_Alloc_inline(sizeof(ISelEnv));
6361    env->vreg_ctr = 0;
6362 
6363    /* Set up output code array. */
6364    env->code = newHInstrArray();
6365 
6366    /* Copy BB's type env. */
6367    env->type_env = bb->tyenv;
6368 
6369    /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
6370       change as we go along. */
6371    env->n_vregmap = bb->tyenv->types_used;
6372    env->vregmap   = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
6373    env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
6374 
6375    /* and finally ... */
6376    env->chainingAllowed = chainingAllowed;
6377    env->hwcaps          = hwcaps_host;
6378    env->max_ga          = max_ga;
6379 
6380    /* For each IR temporary, allocate a suitably-kinded virtual
6381       register. */
6382    j = 0;
6383    for (i = 0; i < env->n_vregmap; i++) {
6384       hregHI = hreg = INVALID_HREG;
6385       switch (bb->tyenv->types[i]) {
6386          case Ity_I1:
6387          case Ity_I8:
6388          case Ity_I16:
6389          case Ity_I32:  hreg   = mkHReg(True, HRcInt32, 0, j++); break;
6390          case Ity_I64:
6391             if (hwcaps_host & VEX_HWCAPS_ARM_NEON) {
6392                hreg = mkHReg(True, HRcFlt64, 0, j++);
6393             } else {
6394                hregHI = mkHReg(True, HRcInt32, 0, j++);
6395                hreg   = mkHReg(True, HRcInt32, 0, j++);
6396             }
6397             break;
6398          case Ity_F32:  hreg   = mkHReg(True, HRcFlt32,  0, j++); break;
6399          case Ity_F64:  hreg   = mkHReg(True, HRcFlt64,  0, j++); break;
6400          case Ity_V128: hreg   = mkHReg(True, HRcVec128, 0, j++); break;
6401          default: ppIRType(bb->tyenv->types[i]);
6402                   vpanic("iselBB: IRTemp type");
6403       }
6404       env->vregmap[i]   = hreg;
6405       env->vregmapHI[i] = hregHI;
6406    }
6407    env->vreg_ctr = j;
6408 
6409    /* The very first instruction must be an event check. */
6410    amCounter  = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_Counter);
6411    amFailAddr = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_FailAddr);
6412    addInstr(env, ARMInstr_EvCheck(amCounter, amFailAddr));
6413 
6414    /* Possibly a block counter increment (for profiling).  At this
6415       point we don't know the address of the counter, so just pretend
6416       it is zero.  It will have to be patched later, but before this
6417       translation is used, by a call to LibVEX_patchProfCtr. */
6418    if (addProfInc) {
6419       addInstr(env, ARMInstr_ProfInc());
6420    }
6421 
6422    /* Ok, finally we can iterate over the statements. */
6423    for (i = 0; i < bb->stmts_used; i++)
6424       iselStmt(env, bb->stmts[i]);
6425 
6426    iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
6427 
6428    /* record the number of vregs we used. */
6429    env->code->n_vregs = env->vreg_ctr;
6430    return env->code;
6431 }
6432 
6433 
6434 /*---------------------------------------------------------------*/
6435 /*--- end                                     host_arm_isel.c ---*/
6436 /*---------------------------------------------------------------*/
6437 
6438