1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                                 host_arm64_isel.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2013-2013 OpenWorks
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 #include "libvex_basictypes.h"
32 #include "libvex_ir.h"
33 #include "libvex.h"
34 #include "ir_match.h"
35 
36 #include "main_util.h"
37 #include "main_globals.h"
38 #include "host_generic_regs.h"
39 #include "host_generic_simd64.h"  // for 32-bit SIMD helpers
40 #include "host_arm64_defs.h"
41 
42 
43 /*---------------------------------------------------------*/
44 /*--- ISelEnv                                           ---*/
45 /*---------------------------------------------------------*/
46 
47 /* This carries around:
48 
49    - A mapping from IRTemp to IRType, giving the type of any IRTemp we
50      might encounter.  This is computed before insn selection starts,
51      and does not change.
52 
53    - A mapping from IRTemp to HReg.  This tells the insn selector
54      which virtual register is associated with each IRTemp temporary.
55      This is computed before insn selection starts, and does not
56      change.  We expect this mapping to map precisely the same set of
57      IRTemps as the type mapping does.
58 
59      |vregmap|   holds the primary register for the IRTemp.
60      |vregmapHI| is only used for 128-bit integer-typed
61                  IRTemps.  It holds the identity of a second
62                  64-bit virtual HReg, which holds the high half
63                  of the value.
64 
65    - The code array, that is, the insns selected so far.
66 
67    - A counter, for generating new virtual registers.
68 
69    - The host hardware capabilities word.  This is set at the start
70      and does not change.
71 
72    - A Bool for indicating whether we may generate chain-me
73      instructions for control flow transfers, or whether we must use
74      XAssisted.
75 
76    - The maximum guest address of any guest insn in this block.
77      Actually, the address of the highest-addressed byte from any insn
78      in this block.  Is set at the start and does not change.  This is
79      used for detecting jumps which are definitely forward-edges from
80      this block, and therefore can be made (chained) to the fast entry
81      point of the destination, thereby avoiding the destination's
82      event check.
83 
84     - An IRExpr*, which may be NULL, holding the IR expression (an
85       IRRoundingMode-encoded value) to which the FPU's rounding mode
86       was most recently set.  Setting to NULL is always safe.  Used to
87       avoid redundant settings of the FPU's rounding mode, as
88       described in set_FPCR_rounding_mode below.
89 
90    Note, this is all (well, mostly) host-independent.
91 */
92 
93 typedef
94    struct {
95       /* Constant -- are set at the start and do not change. */
96       IRTypeEnv*   type_env;
97 
98       HReg*        vregmap;
99       HReg*        vregmapHI;
100       Int          n_vregmap;
101 
102       UInt         hwcaps;
103 
104       Bool         chainingAllowed;
105       Addr64       max_ga;
106 
107       /* These are modified as we go along. */
108       HInstrArray* code;
109       Int          vreg_ctr;
110 
111       IRExpr*      previous_rm;
112    }
113    ISelEnv;
114 
lookupIRTemp(ISelEnv * env,IRTemp tmp)115 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
116 {
117    vassert(tmp >= 0);
118    vassert(tmp < env->n_vregmap);
119    return env->vregmap[tmp];
120 }
121 
lookupIRTempPair(HReg * vrHI,HReg * vrLO,ISelEnv * env,IRTemp tmp)122 static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
123                                ISelEnv* env, IRTemp tmp )
124 {
125    vassert(tmp >= 0);
126    vassert(tmp < env->n_vregmap);
127    vassert(! hregIsInvalid(env->vregmapHI[tmp]));
128    *vrLO = env->vregmap[tmp];
129    *vrHI = env->vregmapHI[tmp];
130 }
131 
addInstr(ISelEnv * env,ARM64Instr * instr)132 static void addInstr ( ISelEnv* env, ARM64Instr* instr )
133 {
134    addHInstr(env->code, instr);
135    if (vex_traceflags & VEX_TRACE_VCODE) {
136       ppARM64Instr(instr);
137       vex_printf("\n");
138    }
139 }
140 
newVRegI(ISelEnv * env)141 static HReg newVRegI ( ISelEnv* env )
142 {
143    HReg reg = mkHReg(True/*virtual reg*/, HRcInt64, 0, env->vreg_ctr);
144    env->vreg_ctr++;
145    return reg;
146 }
147 
newVRegD(ISelEnv * env)148 static HReg newVRegD ( ISelEnv* env )
149 {
150    HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0, env->vreg_ctr);
151    env->vreg_ctr++;
152    return reg;
153 }
154 
newVRegV(ISelEnv * env)155 static HReg newVRegV ( ISelEnv* env )
156 {
157    HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0, env->vreg_ctr);
158    env->vreg_ctr++;
159    return reg;
160 }
161 
162 
163 /*---------------------------------------------------------*/
164 /*--- ISEL: Forward declarations                        ---*/
165 /*---------------------------------------------------------*/
166 
167 /* These are organised as iselXXX and iselXXX_wrk pairs.  The
168    iselXXX_wrk do the real work, but are not to be called directly.
169    For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
170    checks that all returned registers are virtual.  You should not
171    call the _wrk version directly.
172 
173    Because some forms of ARM64 memory amodes are implicitly scaled by
174    the access size, iselIntExpr_AMode takes an IRType which tells it
175    the type of the access for which the amode is to be used.  This
176    type needs to be correct, else you'll get incorrect code.
177 */
178 static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env,
179                                            IRExpr* e, IRType dty );
180 static ARM64AMode* iselIntExpr_AMode     ( ISelEnv* env,
181                                            IRExpr* e, IRType dty );
182 
183 static ARM64RIA*   iselIntExpr_RIA_wrk   ( ISelEnv* env, IRExpr* e );
184 static ARM64RIA*   iselIntExpr_RIA       ( ISelEnv* env, IRExpr* e );
185 
186 static ARM64RIL*   iselIntExpr_RIL_wrk   ( ISelEnv* env, IRExpr* e );
187 static ARM64RIL*   iselIntExpr_RIL       ( ISelEnv* env, IRExpr* e );
188 
189 static ARM64RI6*   iselIntExpr_RI6_wrk   ( ISelEnv* env, IRExpr* e );
190 static ARM64RI6*   iselIntExpr_RI6       ( ISelEnv* env, IRExpr* e );
191 
192 static ARM64CondCode iselCondCode_wrk    ( ISelEnv* env, IRExpr* e );
193 static ARM64CondCode iselCondCode        ( ISelEnv* env, IRExpr* e );
194 
195 static HReg        iselIntExpr_R_wrk     ( ISelEnv* env, IRExpr* e );
196 static HReg        iselIntExpr_R         ( ISelEnv* env, IRExpr* e );
197 
198 static void        iselInt128Expr_wrk    ( /*OUT*/HReg* rHi, HReg* rLo,
199                                            ISelEnv* env, IRExpr* e );
200 static void        iselInt128Expr        ( /*OUT*/HReg* rHi, HReg* rLo,
201                                            ISelEnv* env, IRExpr* e );
202 
203 static HReg        iselDblExpr_wrk        ( ISelEnv* env, IRExpr* e );
204 static HReg        iselDblExpr            ( ISelEnv* env, IRExpr* e );
205 
206 static HReg        iselFltExpr_wrk        ( ISelEnv* env, IRExpr* e );
207 static HReg        iselFltExpr            ( ISelEnv* env, IRExpr* e );
208 
209 static HReg        iselF16Expr_wrk        ( ISelEnv* env, IRExpr* e );
210 static HReg        iselF16Expr            ( ISelEnv* env, IRExpr* e );
211 
212 static HReg        iselV128Expr_wrk       ( ISelEnv* env, IRExpr* e );
213 static HReg        iselV128Expr           ( ISelEnv* env, IRExpr* e );
214 
215 static void        iselV256Expr_wrk       ( /*OUT*/HReg* rHi, HReg* rLo,
216                                             ISelEnv* env, IRExpr* e );
217 static void        iselV256Expr           ( /*OUT*/HReg* rHi, HReg* rLo,
218                                             ISelEnv* env, IRExpr* e );
219 
220 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 );
221 
222 
223 /*---------------------------------------------------------*/
224 /*--- ISEL: Misc helpers                                ---*/
225 /*---------------------------------------------------------*/
226 
227 /* Generate an amode suitable for a 64-bit sized access relative to
228    the baseblock register (X21).  This generates an RI12 amode, which
229    means its scaled by the access size, which is why the access size
230    -- 64 bit -- is stated explicitly here.  Consequently |off| needs
231    to be divisible by 8. */
mk_baseblock_64bit_access_amode(UInt off)232 static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off )
233 {
234    vassert(off < (8 << 12)); /* otherwise it's unrepresentable */
235    vassert((off & 7) == 0);  /* ditto */
236    return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/);
237 }
238 
239 /* Ditto, for 32 bit accesses. */
mk_baseblock_32bit_access_amode(UInt off)240 static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off )
241 {
242    vassert(off < (4 << 12)); /* otherwise it's unrepresentable */
243    vassert((off & 3) == 0);  /* ditto */
244    return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/);
245 }
246 
247 /* Ditto, for 16 bit accesses. */
mk_baseblock_16bit_access_amode(UInt off)248 static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off )
249 {
250    vassert(off < (2 << 12)); /* otherwise it's unrepresentable */
251    vassert((off & 1) == 0);  /* ditto */
252    return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/);
253 }
254 
255 /* Ditto, for 8 bit accesses. */
mk_baseblock_8bit_access_amode(UInt off)256 static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off )
257 {
258    vassert(off < (1 << 12)); /* otherwise it's unrepresentable */
259    return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/);
260 }
261 
mk_baseblock_128bit_access_addr(ISelEnv * env,UInt off)262 static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off )
263 {
264    vassert(off < (1<<12));
265    HReg r = newVRegI(env);
266    addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(),
267                                      ARM64RIA_I12(off,0), True/*isAdd*/));
268    return r;
269 }
270 
get_baseblock_register(void)271 static HReg get_baseblock_register ( void )
272 {
273    return hregARM64_X21();
274 }
275 
276 /* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in
277    a new register, and return the new register. */
widen_z_32_to_64(ISelEnv * env,HReg src)278 static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src )
279 {
280    HReg      dst  = newVRegI(env);
281    ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */
282    addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
283    return dst;
284 }
285 
286 /* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in
287    a new register, and return the new register. */
widen_s_16_to_64(ISelEnv * env,HReg src)288 static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src )
289 {
290    HReg      dst = newVRegI(env);
291    ARM64RI6* n48 = ARM64RI6_I6(48);
292    addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
293    addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR));
294    return dst;
295 }
296 
297 /* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in
298    a new register, and return the new register. */
widen_z_16_to_64(ISelEnv * env,HReg src)299 static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src )
300 {
301    HReg      dst = newVRegI(env);
302    ARM64RI6* n48 = ARM64RI6_I6(48);
303    addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
304    addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SHR));
305    return dst;
306 }
307 
308 /* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in
309    a new register, and return the new register. */
widen_s_32_to_64(ISelEnv * env,HReg src)310 static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src )
311 {
312    HReg      dst = newVRegI(env);
313    ARM64RI6* n32 = ARM64RI6_I6(32);
314    addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL));
315    addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR));
316    return dst;
317 }
318 
319 /* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in
320    a new register, and return the new register. */
widen_s_8_to_64(ISelEnv * env,HReg src)321 static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src )
322 {
323    HReg      dst = newVRegI(env);
324    ARM64RI6* n56 = ARM64RI6_I6(56);
325    addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
326    addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR));
327    return dst;
328 }
329 
widen_z_8_to_64(ISelEnv * env,HReg src)330 static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src )
331 {
332    HReg      dst = newVRegI(env);
333    ARM64RI6* n56 = ARM64RI6_I6(56);
334    addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
335    addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SHR));
336    return dst;
337 }
338 
339 /* Is this IRExpr_Const(IRConst_U64(0)) ? */
isZeroU64(IRExpr * e)340 static Bool isZeroU64 ( IRExpr* e ) {
341    if (e->tag != Iex_Const) return False;
342    IRConst* con = e->Iex.Const.con;
343    vassert(con->tag == Ico_U64);
344    return con->Ico.U64 == 0;
345 }
346 
347 
348 /*---------------------------------------------------------*/
349 /*--- ISEL: FP rounding mode helpers                    ---*/
350 /*---------------------------------------------------------*/
351 
352 /* Set the FP rounding mode: 'mode' is an I32-typed expression
353    denoting a value in the range 0 .. 3, indicating a round mode
354    encoded as per type IRRoundingMode -- the first four values only
355    (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO).  Set the ARM64
356    FSCR to have the same rounding.
357 
358    For speed & simplicity, we're setting the *entire* FPCR here.
359 
360    Setting the rounding mode is expensive.  So this function tries to
361    avoid repeatedly setting the rounding mode to the same thing by
362    first comparing 'mode' to the 'mode' tree supplied in the previous
363    call to this function, if any.  (The previous value is stored in
364    env->previous_rm.)  If 'mode' is a single IR temporary 't' and
365    env->previous_rm is also just 't', then the setting is skipped.
366 
367    This is safe because of the SSA property of IR: an IR temporary can
368    only be defined once and so will have the same value regardless of
369    where it appears in the block.  Cool stuff, SSA.
370 
371    A safety condition: all attempts to set the RM must be aware of
372    this mechanism - by being routed through the functions here.
373 
374    Of course this only helps if blocks where the RM is set more than
375    once and it is set to the same value each time, *and* that value is
376    held in the same IR temporary each time.  In order to assure the
377    latter as much as possible, the IR optimiser takes care to do CSE
378    on any block with any sign of floating point activity.
379 */
380 static
set_FPCR_rounding_mode(ISelEnv * env,IRExpr * mode)381 void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode )
382 {
383    vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
384 
385    /* Do we need to do anything? */
386    if (env->previous_rm
387        && env->previous_rm->tag == Iex_RdTmp
388        && mode->tag == Iex_RdTmp
389        && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
390       /* no - setting it to what it was before.  */
391       vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
392       return;
393    }
394 
395    /* No luck - we better set it, and remember what we set it to. */
396    env->previous_rm = mode;
397 
398    /* Only supporting the rounding-mode bits - the rest of FPCR is set
399       to zero - so we can set the whole register at once (faster). */
400 
401    /* This isn't simple, because 'mode' carries an IR rounding
402       encoding, and we need to translate that to an ARM64 FP one:
403       The IR encoding:
404          00  to nearest (the default)
405          10  to +infinity
406          01  to -infinity
407          11  to zero
408       The ARM64 FP encoding:
409          00  to nearest
410          01  to +infinity
411          10  to -infinity
412          11  to zero
413       Easy enough to do; just swap the two bits.
414    */
415    HReg irrm = iselIntExpr_R(env, mode);
416    HReg tL   = newVRegI(env);
417    HReg tR   = newVRegI(env);
418    HReg t3   = newVRegI(env);
419    /* tL = irrm << 1;
420       tR = irrm >> 1;  if we're lucky, these will issue together
421       tL &= 2;
422       tR &= 1;         ditto
423       t3 = tL | tR;
424       t3 <<= 22;
425       fmxr fpscr, t3
426    */
427    ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
428    ARM64RIL* ril_two = mb_mkARM64RIL_I(2);
429    vassert(ril_one && ril_two);
430    addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL));
431    addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR));
432    addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND));
433    addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND));
434    addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR));
435    addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL));
436    addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3));
437 }
438 
439 
440 /*---------------------------------------------------------*/
441 /*--- ISEL: Function call helpers                       ---*/
442 /*---------------------------------------------------------*/
443 
444 /* Used only in doHelperCall.  See big comment in doHelperCall re
445    handling of register-parameter args.  This function figures out
446    whether evaluation of an expression might require use of a fixed
447    register.  If in doubt return True (safe but suboptimal).
448 */
449 static
mightRequireFixedRegs(IRExpr * e)450 Bool mightRequireFixedRegs ( IRExpr* e )
451 {
452    if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) {
453       // These are always "safe" -- either a copy of SP in some
454       // arbitrary vreg, or a copy of x21, respectively.
455       return False;
456    }
457    /* Else it's a "normal" expression. */
458    switch (e->tag) {
459       case Iex_RdTmp: case Iex_Const: case Iex_Get:
460          return False;
461       default:
462          return True;
463    }
464 }
465 
466 
467 /* Do a complete function call.  |guard| is a Ity_Bit expression
468    indicating whether or not the call happens.  If guard==NULL, the
469    call is unconditional.  |retloc| is set to indicate where the
470    return value is after the call.  The caller (of this fn) must
471    generate code to add |stackAdjustAfterCall| to the stack pointer
472    after the call is done.  Returns True iff it managed to handle this
473    combination of arg/return types, else returns False. */
474 
475 static
doHelperCall(UInt * stackAdjustAfterCall,RetLoc * retloc,ISelEnv * env,IRExpr * guard,IRCallee * cee,IRType retTy,IRExpr ** args)476 Bool doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
477                     /*OUT*/RetLoc* retloc,
478                     ISelEnv* env,
479                     IRExpr* guard,
480                     IRCallee* cee, IRType retTy, IRExpr** args )
481 {
482    ARM64CondCode cc;
483    HReg          argregs[ARM64_N_ARGREGS];
484    HReg          tmpregs[ARM64_N_ARGREGS];
485    Bool          go_fast;
486    Int           n_args, i, nextArgReg;
487    Addr64        target;
488 
489    vassert(ARM64_N_ARGREGS == 8);
490 
491    /* Set default returns.  We'll update them later if needed. */
492    *stackAdjustAfterCall = 0;
493    *retloc               = mk_RetLoc_INVALID();
494 
495    /* These are used for cross-checking that IR-level constraints on
496       the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */
497    UInt nVECRETs = 0;
498    UInt nBBPTRs  = 0;
499 
500    /* Marshal args for a call and do the call.
501 
502       This function only deals with a tiny set of possibilities, which
503       cover all helpers in practice.  The restrictions are that only
504       arguments in registers are supported, hence only
505       ARM64_N_REGPARMS x 64 integer bits in total can be passed.  In
506       fact the only supported arg type is I64.
507 
508       The return type can be I{64,32} or V128.  In the V128 case, it
509       is expected that |args| will contain the special node
510       IRExpr_VECRET(), in which case this routine generates code to
511       allocate space on the stack for the vector return value.  Since
512       we are not passing any scalars on the stack, it is enough to
513       preallocate the return space before marshalling any arguments,
514       in this case.
515 
516       |args| may also contain IRExpr_BBPTR(), in which case the
517       value in x21 is passed as the corresponding argument.
518 
519       Generating code which is both efficient and correct when
520       parameters are to be passed in registers is difficult, for the
521       reasons elaborated in detail in comments attached to
522       doHelperCall() in priv/host-x86/isel.c.  Here, we use a variant
523       of the method described in those comments.
524 
525       The problem is split into two cases: the fast scheme and the
526       slow scheme.  In the fast scheme, arguments are computed
527       directly into the target (real) registers.  This is only safe
528       when we can be sure that computation of each argument will not
529       trash any real registers set by computation of any other
530       argument.
531 
532       In the slow scheme, all args are first computed into vregs, and
533       once they are all done, they are moved to the relevant real
534       regs.  This always gives correct code, but it also gives a bunch
535       of vreg-to-rreg moves which are usually redundant but are hard
536       for the register allocator to get rid of.
537 
538       To decide which scheme to use, all argument expressions are
539       first examined.  If they are all so simple that it is clear they
540       will be evaluated without use of any fixed registers, use the
541       fast scheme, else use the slow scheme.  Note also that only
542       unconditional calls may use the fast scheme, since having to
543       compute a condition expression could itself trash real
544       registers.
545 
546       Note this requires being able to examine an expression and
547       determine whether or not evaluation of it might use a fixed
548       register.  That requires knowledge of how the rest of this insn
549       selector works.  Currently just the following 3 are regarded as
550       safe -- hopefully they cover the majority of arguments in
551       practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
552    */
553 
554    /* Note that the cee->regparms field is meaningless on ARM64 hosts
555       (since there is only one calling convention) and so we always
556       ignore it. */
557 
558    n_args = 0;
559    for (i = 0; args[i]; i++) {
560       IRExpr* arg = args[i];
561       if (UNLIKELY(arg->tag == Iex_VECRET)) {
562          nVECRETs++;
563       } else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
564          nBBPTRs++;
565       }
566       n_args++;
567    }
568 
569    /* If this fails, the IR is ill-formed */
570    vassert(nBBPTRs == 0 || nBBPTRs == 1);
571 
572    /* If we have a VECRET, allocate space on the stack for the return
573       value, and record the stack pointer after that. */
574    HReg r_vecRetAddr = INVALID_HREG;
575    if (nVECRETs == 1) {
576       vassert(retTy == Ity_V128 || retTy == Ity_V256);
577       vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
578       r_vecRetAddr = newVRegI(env);
579       addInstr(env, ARM64Instr_AddToSP(-16));
580       addInstr(env, ARM64Instr_FromSP(r_vecRetAddr));
581    } else {
582       // If either of these fail, the IR is ill-formed
583       vassert(retTy != Ity_V128 && retTy != Ity_V256);
584       vassert(nVECRETs == 0);
585    }
586 
587    argregs[0] = hregARM64_X0();
588    argregs[1] = hregARM64_X1();
589    argregs[2] = hregARM64_X2();
590    argregs[3] = hregARM64_X3();
591    argregs[4] = hregARM64_X4();
592    argregs[5] = hregARM64_X5();
593    argregs[6] = hregARM64_X6();
594    argregs[7] = hregARM64_X7();
595 
596    tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG;
597    tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG;
598 
599    /* First decide which scheme (slow or fast) is to be used.  First
600       assume the fast scheme, and select slow if any contraindications
601       (wow) appear. */
602 
603    go_fast = True;
604 
605    if (guard) {
606       if (guard->tag == Iex_Const
607           && guard->Iex.Const.con->tag == Ico_U1
608           && guard->Iex.Const.con->Ico.U1 == True) {
609          /* unconditional */
610       } else {
611          /* Not manifestly unconditional -- be conservative. */
612          go_fast = False;
613       }
614    }
615 
616    if (go_fast) {
617       for (i = 0; i < n_args; i++) {
618          if (mightRequireFixedRegs(args[i])) {
619             go_fast = False;
620             break;
621          }
622       }
623    }
624 
625    if (go_fast) {
626       if (retTy == Ity_V128 || retTy == Ity_V256)
627          go_fast = False;
628    }
629 
630    /* At this point the scheme to use has been established.  Generate
631       code to get the arg values into the argument rregs.  If we run
632       out of arg regs, give up. */
633 
634    if (go_fast) {
635 
636       /* FAST SCHEME */
637       nextArgReg = 0;
638 
639       for (i = 0; i < n_args; i++) {
640          IRExpr* arg = args[i];
641 
642          IRType  aTy = Ity_INVALID;
643          if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
644             aTy = typeOfIRExpr(env->type_env, args[i]);
645 
646          if (nextArgReg >= ARM64_N_ARGREGS)
647             return False; /* out of argregs */
648 
649          if (aTy == Ity_I64) {
650             addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
651                                            iselIntExpr_R(env, args[i]) ));
652             nextArgReg++;
653          }
654          else if (arg->tag == Iex_BBPTR) {
655             vassert(0); //ATC
656             addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
657                                            hregARM64_X21() ));
658             nextArgReg++;
659          }
660          else if (arg->tag == Iex_VECRET) {
661             // because of the go_fast logic above, we can't get here,
662             // since vector return values makes us use the slow path
663             // instead.
664             vassert(0);
665          }
666          else
667             return False; /* unhandled arg type */
668       }
669 
670       /* Fast scheme only applies for unconditional calls.  Hence: */
671       cc = ARM64cc_AL;
672 
673    } else {
674 
675       /* SLOW SCHEME; move via temporaries */
676       nextArgReg = 0;
677 
678       for (i = 0; i < n_args; i++) {
679          IRExpr* arg = args[i];
680 
681          IRType  aTy = Ity_INVALID;
682          if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
683             aTy = typeOfIRExpr(env->type_env, args[i]);
684 
685          if (nextArgReg >= ARM64_N_ARGREGS)
686             return False; /* out of argregs */
687 
688          if (aTy == Ity_I64) {
689             tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
690             nextArgReg++;
691          }
692          else if (arg->tag == Iex_BBPTR) {
693             vassert(0); //ATC
694             tmpregs[nextArgReg] = hregARM64_X21();
695             nextArgReg++;
696          }
697          else if (arg->tag == Iex_VECRET) {
698             vassert(!hregIsInvalid(r_vecRetAddr));
699             tmpregs[nextArgReg] = r_vecRetAddr;
700             nextArgReg++;
701          }
702          else
703             return False; /* unhandled arg type */
704       }
705 
706       /* Now we can compute the condition.  We can't do it earlier
707          because the argument computations could trash the condition
708          codes.  Be a bit clever to handle the common case where the
709          guard is 1:Bit. */
710       cc = ARM64cc_AL;
711       if (guard) {
712          if (guard->tag == Iex_Const
713              && guard->Iex.Const.con->tag == Ico_U1
714              && guard->Iex.Const.con->Ico.U1 == True) {
715             /* unconditional -- do nothing */
716          } else {
717             cc = iselCondCode( env, guard );
718          }
719       }
720 
721       /* Move the args to their final destinations. */
722       for (i = 0; i < nextArgReg; i++) {
723          vassert(!(hregIsInvalid(tmpregs[i])));
724          /* None of these insns, including any spill code that might
725             be generated, may alter the condition codes. */
726          addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) );
727       }
728 
729    }
730 
731    /* Should be assured by checks above */
732    vassert(nextArgReg <= ARM64_N_ARGREGS);
733 
734    /* Do final checks, set the return values, and generate the call
735       instruction proper. */
736    vassert(nBBPTRs == 0 || nBBPTRs == 1);
737    vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0);
738    vassert(*stackAdjustAfterCall == 0);
739    vassert(is_RetLoc_INVALID(*retloc));
740    switch (retTy) {
741       case Ity_INVALID:
742          /* Function doesn't return a value. */
743          *retloc = mk_RetLoc_simple(RLPri_None);
744          break;
745       case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
746          *retloc = mk_RetLoc_simple(RLPri_Int);
747          break;
748       case Ity_V128:
749          *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
750          *stackAdjustAfterCall = 16;
751          break;
752       case Ity_V256:
753          vassert(0); // ATC
754          *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
755          *stackAdjustAfterCall = 32;
756          break;
757       default:
758          /* IR can denote other possible return types, but we don't
759             handle those here. */
760          vassert(0);
761    }
762 
763    /* Finally, generate the call itself.  This needs the *retloc value
764       set in the switch above, which is why it's at the end. */
765 
766    /* nextArgReg doles out argument registers.  Since these are
767       assigned in the order x0 .. x7, its numeric value at this point,
768       which must be between 0 and 8 inclusive, is going to be equal to
769       the number of arg regs in use for the call.  Hence bake that
770       number into the call (we'll need to know it when doing register
771       allocation, to know what regs the call reads.) */
772 
773    target = (Addr)cee->addr;
774    addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc ));
775 
776    return True; /* success */
777 }
778 
779 
780 /*---------------------------------------------------------*/
781 /*--- ISEL: Integer expressions (64/32 bit)             ---*/
782 /*---------------------------------------------------------*/
783 
784 /* Select insns for an integer-typed expression, and add them to the
785    code list.  Return a reg holding the result.  This reg will be a
786    virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
787    want to modify it, ask for a new vreg, copy it in there, and modify
788    the copy.  The register allocator will do its best to map both
789    vregs to the same real register, so the copies will often disappear
790    later in the game.
791 
792    This should handle expressions of 64- and 32-bit type.  All results
793    are returned in a 64-bit register.  For 32-bit expressions, the
794    upper 32 bits are arbitrary, so you should mask or sign extend
795    partial values if necessary.
796 */
797 
798 /* --------------------- AMode --------------------- */
799 
800 /* Return an AMode which computes the value of the specified
801    expression, possibly also adding insns to the code list as a
802    result.  The expression may only be a 64-bit one.
803 */
804 
isValidScale(UChar scale)805 static Bool isValidScale ( UChar scale )
806 {
807    switch (scale) {
808       case 1: case 2: case 4: case 8: /* case 16: ??*/ return True;
809       default: return False;
810    }
811 }
812 
sane_AMode(ARM64AMode * am)813 static Bool sane_AMode ( ARM64AMode* am )
814 {
815    switch (am->tag) {
816       case ARM64am_RI9:
817          return
818             toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64
819                     && (hregIsVirtual(am->ARM64am.RI9.reg)
820                         /* || sameHReg(am->ARM64am.RI9.reg,
821                                        hregARM64_X21()) */ )
822                     && am->ARM64am.RI9.simm9 >= -256
823                     && am->ARM64am.RI9.simm9 <= 255 );
824       case ARM64am_RI12:
825          return
826             toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64
827                     && (hregIsVirtual(am->ARM64am.RI12.reg)
828                         /* || sameHReg(am->ARM64am.RI12.reg,
829                                        hregARM64_X21()) */ )
830                     && am->ARM64am.RI12.uimm12 < 4096
831                     && isValidScale(am->ARM64am.RI12.szB) );
832       case ARM64am_RR:
833          return
834             toBool( hregClass(am->ARM64am.RR.base) == HRcInt64
835                     && hregIsVirtual(am->ARM64am.RR.base)
836                     && hregClass(am->ARM64am.RR.index) == HRcInt64
837                     && hregIsVirtual(am->ARM64am.RR.index) );
838       default:
839          vpanic("sane_AMode: unknown ARM64 AMode1 tag");
840    }
841 }
842 
843 static
iselIntExpr_AMode(ISelEnv * env,IRExpr * e,IRType dty)844 ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty )
845 {
846    ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty);
847    vassert(sane_AMode(am));
848    return am;
849 }
850 
851 static
iselIntExpr_AMode_wrk(ISelEnv * env,IRExpr * e,IRType dty)852 ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty )
853 {
854    IRType ty = typeOfIRExpr(env->type_env,e);
855    vassert(ty == Ity_I64);
856 
857    ULong szBbits = 0;
858    switch (dty) {
859       case Ity_I64: szBbits = 3; break;
860       case Ity_I32: szBbits = 2; break;
861       case Ity_I16: szBbits = 1; break;
862       case Ity_I8:  szBbits = 0; break;
863       default: vassert(0);
864    }
865 
866    /* {Add64,Sub64}(expr,simm9).  We don't care about |dty| here since
867       we're going to create an amode suitable for LDU* or STU*
868       instructions, which use unscaled immediate offsets.  */
869    if (e->tag == Iex_Binop
870        && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64)
871        && e->Iex.Binop.arg2->tag == Iex_Const
872        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
873       Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
874       if (simm >= -255 && simm <= 255) {
875          /* Although the gating condition might seem to be
876                simm >= -256 && simm <= 255
877             we will need to negate simm in the case where the op is Sub64.
878             Hence limit the lower value to -255 in order that its negation
879             is representable. */
880          HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
881          if (e->Iex.Binop.op == Iop_Sub64) simm = -simm;
882          return ARM64AMode_RI9(reg, (Int)simm);
883       }
884    }
885 
886    /* Add64(expr, uimm12 * transfer-size) */
887    if (e->tag == Iex_Binop
888        && e->Iex.Binop.op == Iop_Add64
889        && e->Iex.Binop.arg2->tag == Iex_Const
890        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
891       ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
892       ULong szB  = 1 << szBbits;
893       if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */
894           && (uimm >> szBbits) < 4096) {
895          HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
896          return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB);
897       }
898    }
899 
900    /* Add64(expr1, expr2) */
901    if (e->tag == Iex_Binop
902        && e->Iex.Binop.op == Iop_Add64) {
903       HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
904       HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
905       return ARM64AMode_RR(reg1, reg2);
906    }
907 
908    /* Doesn't match anything in particular.  Generate it into
909       a register and use that. */
910    HReg reg = iselIntExpr_R(env, e);
911    return ARM64AMode_RI9(reg, 0);
912 }
913 
914 
915 /* --------------------- RIA --------------------- */
916 
917 /* Select instructions to generate 'e' into a RIA. */
918 
iselIntExpr_RIA(ISelEnv * env,IRExpr * e)919 static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e )
920 {
921    ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e);
922    /* sanity checks ... */
923    switch (ri->tag) {
924       case ARM64riA_I12:
925          vassert(ri->ARM64riA.I12.imm12 < 4096);
926          vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12);
927          return ri;
928       case ARM64riA_R:
929          vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64);
930          vassert(hregIsVirtual(ri->ARM64riA.R.reg));
931          return ri;
932       default:
933          vpanic("iselIntExpr_RIA: unknown arm RIA tag");
934    }
935 }
936 
937 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RIA_wrk(ISelEnv * env,IRExpr * e)938 static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e )
939 {
940    IRType ty = typeOfIRExpr(env->type_env,e);
941    vassert(ty == Ity_I64 || ty == Ity_I32);
942 
943    /* special case: immediate */
944    if (e->tag == Iex_Const) {
945       ULong u = 0xF000000ULL; /* invalid */
946       switch (e->Iex.Const.con->tag) {
947          case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
948          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
949          default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)");
950       }
951       if (0 == (u & ~(0xFFFULL << 0)))
952          return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0);
953       if (0 == (u & ~(0xFFFULL << 12)))
954          return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12);
955       /* else fail, fall through to default case */
956    }
957 
958    /* default case: calculate into a register and return that */
959    {
960       HReg r = iselIntExpr_R ( env, e );
961       return ARM64RIA_R(r);
962    }
963 }
964 
965 
966 /* --------------------- RIL --------------------- */
967 
968 /* Select instructions to generate 'e' into a RIL.  At this point we
969    have to deal with the strange bitfield-immediate encoding for logic
970    instructions. */
971 
972 
973 // The following four functions
974 //    CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical
975 // are copied, with modifications, from
976 // https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc
977 // which has the following copyright notice:
978 /*
979    Copyright 2013, ARM Limited
980    All rights reserved.
981 
982    Redistribution and use in source and binary forms, with or without
983    modification, are permitted provided that the following conditions are met:
984 
985    * Redistributions of source code must retain the above copyright notice,
986      this list of conditions and the following disclaimer.
987    * Redistributions in binary form must reproduce the above copyright notice,
988      this list of conditions and the following disclaimer in the documentation
989      and/or other materials provided with the distribution.
990    * Neither the name of ARM Limited nor the names of its contributors may be
991      used to endorse or promote products derived from this software without
992      specific prior written permission.
993 
994    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
995    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
996    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
997    DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
998    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
999    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1000    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
1001    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
1002    OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1003    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1004 */
1005 
CountLeadingZeros(ULong value,Int width)1006 static Int CountLeadingZeros(ULong value, Int width)
1007 {
1008    vassert(width == 32 || width == 64);
1009    Int count = 0;
1010    ULong bit_test = 1ULL << (width - 1);
1011    while ((count < width) && ((bit_test & value) == 0)) {
1012       count++;
1013       bit_test >>= 1;
1014    }
1015    return count;
1016 }
1017 
CountTrailingZeros(ULong value,Int width)1018 static Int CountTrailingZeros(ULong value, Int width)
1019 {
1020    vassert(width == 32 || width == 64);
1021    Int count = 0;
1022    while ((count < width) && (((value >> count) & 1) == 0)) {
1023       count++;
1024    }
1025    return count;
1026 }
1027 
CountSetBits(ULong value,Int width)1028 static Int CountSetBits(ULong value, Int width)
1029 {
1030    // TODO: Other widths could be added here, as the implementation already
1031    // supports them.
1032    vassert(width == 32 || width == 64);
1033 
1034    // Mask out unused bits to ensure that they are not counted.
1035    value &= (0xffffffffffffffffULL >> (64-width));
1036 
1037    // Add up the set bits.
1038    // The algorithm works by adding pairs of bit fields together iteratively,
1039    // where the size of each bit field doubles each time.
1040    // An example for an 8-bit value:
1041    // Bits: h g f e d c b a
1042    // \ | \ | \ | \ |
1043    // value = h+g f+e d+c b+a
1044    // \ | \ |
1045    // value = h+g+f+e d+c+b+a
1046    // \ |
1047    // value = h+g+f+e+d+c+b+a
1048    value = ((value >>  1) & 0x5555555555555555ULL)
1049                  + (value & 0x5555555555555555ULL);
1050    value = ((value >>  2) & 0x3333333333333333ULL)
1051                  + (value & 0x3333333333333333ULL);
1052    value = ((value >>  4) & 0x0f0f0f0f0f0f0f0fULL)
1053                  + (value & 0x0f0f0f0f0f0f0f0fULL);
1054    value = ((value >>  8) & 0x00ff00ff00ff00ffULL)
1055                  + (value & 0x00ff00ff00ff00ffULL);
1056    value = ((value >> 16) & 0x0000ffff0000ffffULL)
1057                  + (value & 0x0000ffff0000ffffULL);
1058    value = ((value >> 32) & 0x00000000ffffffffULL)
1059                  + (value & 0x00000000ffffffffULL);
1060 
1061    return value;
1062 }
1063 
isImmLogical(UInt * n,UInt * imm_s,UInt * imm_r,ULong value,UInt width)1064 static Bool isImmLogical ( /*OUT*/UInt* n,
1065                            /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r,
1066                            ULong value, UInt width )
1067 {
1068   // Test if a given value can be encoded in the immediate field of a
1069   // logical instruction.
1070 
1071   // If it can be encoded, the function returns true, and values
1072   // pointed to by n, imm_s and imm_r are updated with immediates
1073   // encoded in the format required by the corresponding fields in the
1074   // logical instruction.  If it can not be encoded, the function
1075   // returns false, and the values pointed to by n, imm_s and imm_r
1076   // are undefined.
1077   vassert(n != NULL && imm_s != NULL && imm_r != NULL);
1078   vassert(width == 32 || width == 64);
1079 
1080   // Logical immediates are encoded using parameters n, imm_s and imm_r using
1081   // the following table:
1082   //
1083   // N imms immr size S R
1084   // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
1085   // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
1086   // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
1087   // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
1088   // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
1089   // 0 11110s xxxxxr 2 UInt(s) UInt(r)
1090   // (s bits must not be all set)
1091   //
1092   // A pattern is constructed of size bits, where the least significant S+1
1093   // bits are set. The pattern is rotated right by R, and repeated across a
1094   // 32 or 64-bit value, depending on destination register width.
1095   //
1096   // To test if an arbitrary immediate can be encoded using this scheme, an
1097   // iterative algorithm is used.
1098   //
1099   // TODO: This code does not consider using X/W register overlap to support
1100   // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits
1101   // are an encodable logical immediate.
1102 
1103   // 1. If the value has all set or all clear bits, it can't be encoded.
1104   if ((value == 0) || (value == 0xffffffffffffffffULL) ||
1105       ((width == 32) && (value == 0xffffffff))) {
1106     return False;
1107   }
1108 
1109   UInt lead_zero = CountLeadingZeros(value, width);
1110   UInt lead_one = CountLeadingZeros(~value, width);
1111   UInt trail_zero = CountTrailingZeros(value, width);
1112   UInt trail_one = CountTrailingZeros(~value, width);
1113   UInt set_bits = CountSetBits(value, width);
1114 
1115   // The fixed bits in the immediate s field.
1116   // If width == 64 (X reg), start at 0xFFFFFF80.
1117   // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
1118   // widths won't be executed.
1119   Int imm_s_fixed = (width == 64) ? -128 : -64;
1120   Int imm_s_mask = 0x3F;
1121 
1122   for (;;) {
1123     // 2. If the value is two bits wide, it can be encoded.
1124     if (width == 2) {
1125       *n = 0;
1126       *imm_s = 0x3C;
1127       *imm_r = (value & 3) - 1;
1128       return True;
1129     }
1130 
1131     *n = (width == 64) ? 1 : 0;
1132     *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
1133     if ((lead_zero + set_bits) == width) {
1134       *imm_r = 0;
1135     } else {
1136       *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
1137     }
1138 
1139     // 3. If the sum of leading zeros, trailing zeros and set bits is equal to
1140     // the bit width of the value, it can be encoded.
1141     if (lead_zero + trail_zero + set_bits == width) {
1142       return True;
1143     }
1144 
1145     // 4. If the sum of leading ones, trailing ones and unset bits in the
1146     // value is equal to the bit width of the value, it can be encoded.
1147     if (lead_one + trail_one + (width - set_bits) == width) {
1148       return True;
1149     }
1150 
1151     // 5. If the most-significant half of the bitwise value is equal to the
1152     // least-significant half, return to step 2 using the least-significant
1153     // half of the value.
1154     ULong mask = (1ULL << (width >> 1)) - 1;
1155     if ((value & mask) == ((value >> (width >> 1)) & mask)) {
1156       width >>= 1;
1157       set_bits >>= 1;
1158       imm_s_fixed >>= 1;
1159       continue;
1160     }
1161 
1162     // 6. Otherwise, the value can't be encoded.
1163     return False;
1164   }
1165 }
1166 
1167 
1168 /* Create a RIL for the given immediate, if it is representable, or
1169    return NULL if not. */
1170 
mb_mkARM64RIL_I(ULong imm64)1171 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 )
1172 {
1173    UInt n = 0, imm_s = 0, imm_r = 0;
1174    Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64);
1175    if (!ok) return NULL;
1176    vassert(n < 2 && imm_s < 64 && imm_r < 64);
1177    return ARM64RIL_I13(n, imm_r, imm_s);
1178 }
1179 
1180 /* So, finally .. */
1181 
iselIntExpr_RIL(ISelEnv * env,IRExpr * e)1182 static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e )
1183 {
1184    ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e);
1185    /* sanity checks ... */
1186    switch (ri->tag) {
1187       case ARM64riL_I13:
1188          vassert(ri->ARM64riL.I13.bitN < 2);
1189          vassert(ri->ARM64riL.I13.immR < 64);
1190          vassert(ri->ARM64riL.I13.immS < 64);
1191          return ri;
1192       case ARM64riL_R:
1193          vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64);
1194          vassert(hregIsVirtual(ri->ARM64riL.R.reg));
1195          return ri;
1196       default:
1197          vpanic("iselIntExpr_RIL: unknown arm RIL tag");
1198    }
1199 }
1200 
1201 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RIL_wrk(ISelEnv * env,IRExpr * e)1202 static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e )
1203 {
1204    IRType ty = typeOfIRExpr(env->type_env,e);
1205    vassert(ty == Ity_I64 || ty == Ity_I32);
1206 
1207    /* special case: immediate */
1208    if (e->tag == Iex_Const) {
1209       ARM64RIL* maybe = NULL;
1210       if (ty == Ity_I64) {
1211          vassert(e->Iex.Const.con->tag == Ico_U64);
1212          maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64);
1213       } else {
1214          vassert(ty == Ity_I32);
1215          vassert(e->Iex.Const.con->tag == Ico_U32);
1216          UInt  u32 = e->Iex.Const.con->Ico.U32;
1217          ULong u64 = (ULong)u32;
1218          /* First try with 32 leading zeroes. */
1219          maybe = mb_mkARM64RIL_I(u64);
1220          /* If that doesn't work, try with 2 copies, since it doesn't
1221             matter what winds up in the upper 32 bits. */
1222          if (!maybe) {
1223             maybe = mb_mkARM64RIL_I((u64 << 32) | u64);
1224          }
1225       }
1226       if (maybe) return maybe;
1227       /* else fail, fall through to default case */
1228    }
1229 
1230    /* default case: calculate into a register and return that */
1231    {
1232       HReg r = iselIntExpr_R ( env, e );
1233       return ARM64RIL_R(r);
1234    }
1235 }
1236 
1237 
1238 /* --------------------- RI6 --------------------- */
1239 
1240 /* Select instructions to generate 'e' into a RI6. */
1241 
iselIntExpr_RI6(ISelEnv * env,IRExpr * e)1242 static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e )
1243 {
1244    ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e);
1245    /* sanity checks ... */
1246    switch (ri->tag) {
1247       case ARM64ri6_I6:
1248          vassert(ri->ARM64ri6.I6.imm6 < 64);
1249          vassert(ri->ARM64ri6.I6.imm6 > 0);
1250          return ri;
1251       case ARM64ri6_R:
1252          vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64);
1253          vassert(hregIsVirtual(ri->ARM64ri6.R.reg));
1254          return ri;
1255       default:
1256          vpanic("iselIntExpr_RI6: unknown arm RI6 tag");
1257    }
1258 }
1259 
1260 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RI6_wrk(ISelEnv * env,IRExpr * e)1261 static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e )
1262 {
1263    IRType ty = typeOfIRExpr(env->type_env,e);
1264    vassert(ty == Ity_I64 || ty == Ity_I8);
1265 
1266    /* special case: immediate */
1267    if (e->tag == Iex_Const) {
1268       switch (e->Iex.Const.con->tag) {
1269          case Ico_U8: {
1270             UInt u = e->Iex.Const.con->Ico.U8;
1271             if (u > 0 && u < 64)
1272               return ARM64RI6_I6(u);
1273             break;
1274          default:
1275             break;
1276          }
1277       }
1278       /* else fail, fall through to default case */
1279    }
1280 
1281    /* default case: calculate into a register and return that */
1282    {
1283       HReg r = iselIntExpr_R ( env, e );
1284       return ARM64RI6_R(r);
1285    }
1286 }
1287 
1288 
1289 /* ------------------- CondCode ------------------- */
1290 
1291 /* Generate code to evaluated a bit-typed expression, returning the
1292    condition code which would correspond when the expression would
1293    notionally have returned 1. */
1294 
iselCondCode(ISelEnv * env,IRExpr * e)1295 static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1296 {
1297    ARM64CondCode cc = iselCondCode_wrk(env,e);
1298    vassert(cc != ARM64cc_NV);
1299    return cc;
1300 }
1301 
iselCondCode_wrk(ISelEnv * env,IRExpr * e)1302 static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1303 {
1304    vassert(e);
1305    vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1306 
1307    /* var */
1308    if (e->tag == Iex_RdTmp) {
1309       HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1310       /* Cmp doesn't modify rTmp; so this is OK. */
1311       ARM64RIL* one = mb_mkARM64RIL_I(1);
1312       vassert(one);
1313       addInstr(env, ARM64Instr_Test(rTmp, one));
1314       return ARM64cc_NE;
1315    }
1316 
1317    /* Not1(e) */
1318    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1319       /* Generate code for the arg, and negate the test condition */
1320       ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1321       if (cc == ARM64cc_AL || cc == ARM64cc_NV) {
1322         return ARM64cc_AL;
1323       } else {
1324         return 1 ^ cc;
1325       }
1326    }
1327 
1328    /* --- patterns rooted at: 64to1 --- */
1329 
1330    if (e->tag == Iex_Unop
1331        && e->Iex.Unop.op == Iop_64to1) {
1332       HReg      rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1333       ARM64RIL* one  = mb_mkARM64RIL_I(1);
1334       vassert(one); /* '1' must be representable */
1335       addInstr(env, ARM64Instr_Test(rTmp, one));
1336       return ARM64cc_NE;
1337    }
1338 
1339    /* --- patterns rooted at: CmpNEZ8 --- */
1340 
1341    if (e->tag == Iex_Unop
1342        && e->Iex.Unop.op == Iop_CmpNEZ8) {
1343       HReg      r1  = iselIntExpr_R(env, e->Iex.Unop.arg);
1344       ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF);
1345       addInstr(env, ARM64Instr_Test(r1, xFF));
1346       return ARM64cc_NE;
1347    }
1348 
1349    /* --- patterns rooted at: CmpNEZ16 --- */
1350 
1351    if (e->tag == Iex_Unop
1352        && e->Iex.Unop.op == Iop_CmpNEZ16) {
1353       HReg      r1    = iselIntExpr_R(env, e->Iex.Unop.arg);
1354       ARM64RIL* xFFFF = mb_mkARM64RIL_I(0xFFFF);
1355       addInstr(env, ARM64Instr_Test(r1, xFFFF));
1356       return ARM64cc_NE;
1357    }
1358 
1359    /* --- patterns rooted at: CmpNEZ64 --- */
1360 
1361    if (e->tag == Iex_Unop
1362        && e->Iex.Unop.op == Iop_CmpNEZ64) {
1363       HReg      r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1364       ARM64RIA* zero = ARM64RIA_I12(0,0);
1365       addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/));
1366       return ARM64cc_NE;
1367    }
1368 
1369    /* --- patterns rooted at: CmpNEZ32 --- */
1370 
1371    if (e->tag == Iex_Unop
1372        && e->Iex.Unop.op == Iop_CmpNEZ32) {
1373       HReg      r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1374       ARM64RIA* zero = ARM64RIA_I12(0,0);
1375       addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/));
1376       return ARM64cc_NE;
1377    }
1378 
1379    /* --- Cmp*64*(x,y) --- */
1380    if (e->tag == Iex_Binop
1381        && (e->Iex.Binop.op == Iop_CmpEQ64
1382            || e->Iex.Binop.op == Iop_CmpNE64
1383            || e->Iex.Binop.op == Iop_CmpLT64S
1384            || e->Iex.Binop.op == Iop_CmpLT64U
1385            || e->Iex.Binop.op == Iop_CmpLE64S
1386            || e->Iex.Binop.op == Iop_CmpLE64U)) {
1387       HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1388       ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1389       addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/));
1390       switch (e->Iex.Binop.op) {
1391          case Iop_CmpEQ64:  return ARM64cc_EQ;
1392          case Iop_CmpNE64:  return ARM64cc_NE;
1393          case Iop_CmpLT64S: return ARM64cc_LT;
1394          case Iop_CmpLT64U: return ARM64cc_CC;
1395          case Iop_CmpLE64S: return ARM64cc_LE;
1396          case Iop_CmpLE64U: return ARM64cc_LS;
1397          default: vpanic("iselCondCode(arm64): CmpXX64");
1398       }
1399    }
1400 
1401    /* --- Cmp*32*(x,y) --- */
1402    if (e->tag == Iex_Binop
1403        && (e->Iex.Binop.op == Iop_CmpEQ32
1404            || e->Iex.Binop.op == Iop_CmpNE32
1405            || e->Iex.Binop.op == Iop_CmpLT32S
1406            || e->Iex.Binop.op == Iop_CmpLT32U
1407            || e->Iex.Binop.op == Iop_CmpLE32S
1408            || e->Iex.Binop.op == Iop_CmpLE32U)) {
1409       HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1410       ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1411       addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/));
1412       switch (e->Iex.Binop.op) {
1413          case Iop_CmpEQ32:  return ARM64cc_EQ;
1414          case Iop_CmpNE32:  return ARM64cc_NE;
1415          case Iop_CmpLT32S: return ARM64cc_LT;
1416          case Iop_CmpLT32U: return ARM64cc_CC;
1417          case Iop_CmpLE32S: return ARM64cc_LE;
1418          case Iop_CmpLE32U: return ARM64cc_LS;
1419          default: vpanic("iselCondCode(arm64): CmpXX32");
1420       }
1421    }
1422 
1423    ppIRExpr(e);
1424    vpanic("iselCondCode");
1425 }
1426 
1427 
1428 /* --------------------- Reg --------------------- */
1429 
iselIntExpr_R(ISelEnv * env,IRExpr * e)1430 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1431 {
1432    HReg r = iselIntExpr_R_wrk(env, e);
1433    /* sanity checks ... */
1434 #  if 0
1435    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1436 #  endif
1437    vassert(hregClass(r) == HRcInt64);
1438    vassert(hregIsVirtual(r));
1439    return r;
1440 }
1441 
1442 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_R_wrk(ISelEnv * env,IRExpr * e)1443 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1444 {
1445    IRType ty = typeOfIRExpr(env->type_env,e);
1446    vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1447 
1448    switch (e->tag) {
1449 
1450    /* --------- TEMP --------- */
1451    case Iex_RdTmp: {
1452       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1453    }
1454 
1455    /* --------- LOAD --------- */
1456    case Iex_Load: {
1457       HReg dst  = newVRegI(env);
1458 
1459       if (e->Iex.Load.end != Iend_LE)
1460          goto irreducible;
1461 
1462       if (ty == Ity_I64) {
1463          ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1464          addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode));
1465          return dst;
1466       }
1467       if (ty == Ity_I32) {
1468          ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1469          addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode));
1470          return dst;
1471       }
1472       if (ty == Ity_I16) {
1473          ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1474          addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode));
1475          return dst;
1476       }
1477       if (ty == Ity_I8) {
1478          ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1479          addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode));
1480          return dst;
1481       }
1482       break;
1483    }
1484 
1485    /* --------- BINARY OP --------- */
1486    case Iex_Binop: {
1487 
1488       ARM64LogicOp lop = 0; /* invalid */
1489       ARM64ShiftOp sop = 0; /* invalid */
1490 
1491       /* Special-case 0-x into a Neg instruction.  Not because it's
1492          particularly useful but more so as to give value flow using
1493          this instruction, so as to check its assembly correctness for
1494          implementation of Left32/Left64. */
1495       switch (e->Iex.Binop.op) {
1496          case Iop_Sub64:
1497             if (isZeroU64(e->Iex.Binop.arg1)) {
1498                HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1499                HReg dst  = newVRegI(env);
1500                addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG));
1501                return dst;
1502             }
1503             break;
1504          default:
1505             break;
1506       }
1507 
1508       /* ADD/SUB */
1509       switch (e->Iex.Binop.op) {
1510          case Iop_Add64: case Iop_Add32:
1511          case Iop_Sub64: case Iop_Sub32: {
1512             Bool      isAdd = e->Iex.Binop.op == Iop_Add64
1513                               || e->Iex.Binop.op == Iop_Add32;
1514             HReg      dst   = newVRegI(env);
1515             HReg      argL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
1516             ARM64RIA* argR  = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1517             addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd));
1518             return dst;
1519          }
1520          default:
1521             break;
1522       }
1523 
1524       /* AND/OR/XOR */
1525       switch (e->Iex.Binop.op) {
1526          case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop;
1527          case Iop_Or64:  case Iop_Or32:  lop = ARM64lo_OR;  goto log_binop;
1528          case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop;
1529          log_binop: {
1530             HReg      dst  = newVRegI(env);
1531             HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1532             ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2);
1533             addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop));
1534             return dst;
1535          }
1536          default:
1537             break;
1538       }
1539 
1540       /* SHL/SHR/SAR */
1541       switch (e->Iex.Binop.op) {
1542          case Iop_Shr64:                 sop = ARM64sh_SHR; goto sh_binop;
1543          case Iop_Sar64:                 sop = ARM64sh_SAR; goto sh_binop;
1544          case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop;
1545          sh_binop: {
1546             HReg      dst  = newVRegI(env);
1547             HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1548             ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1549             addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop));
1550             return dst;
1551          }
1552          case Iop_Shr32:
1553          case Iop_Sar32: {
1554             Bool      zx   = e->Iex.Binop.op == Iop_Shr32;
1555             HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1556             ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1557             HReg      dst  = zx ? widen_z_32_to_64(env, argL)
1558                                 : widen_s_32_to_64(env, argL);
1559             addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR));
1560             return dst;
1561          }
1562          default: break;
1563       }
1564 
1565       /* MUL */
1566       if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) {
1567          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1568          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1569          HReg dst  = newVRegI(env);
1570          addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN));
1571          return dst;
1572       }
1573 
1574       /* MULL */
1575       if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) {
1576          Bool isS  = e->Iex.Binop.op == Iop_MullS32;
1577          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1578          HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL);
1579          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1580          HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR);
1581          HReg dst  = newVRegI(env);
1582          addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN));
1583          return dst;
1584       }
1585 
1586       /* Handle misc other ops. */
1587 
1588       if (e->Iex.Binop.op == Iop_Max32U) {
1589          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1590          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1591          HReg dst  = newVRegI(env);
1592          addInstr(env, ARM64Instr_Cmp(argL, ARM64RIA_R(argR), False/*!is64*/));
1593          addInstr(env, ARM64Instr_CSel(dst, argL, argR, ARM64cc_CS));
1594          return dst;
1595       }
1596 
1597       if (e->Iex.Binop.op == Iop_32HLto64) {
1598          HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1599          HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1600          HReg lo32  = widen_z_32_to_64(env, lo32s);
1601          HReg hi32  = newVRegI(env);
1602          addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32),
1603                                         ARM64sh_SHL));
1604          addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32),
1605                                         ARM64lo_OR));
1606          return hi32;
1607       }
1608 
1609       if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32) {
1610          Bool isD = e->Iex.Binop.op == Iop_CmpF64;
1611          HReg dL  = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg1);
1612          HReg dR  = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg2);
1613          HReg dst = newVRegI(env);
1614          HReg imm = newVRegI(env);
1615          /* Do the compare (FCMP), which sets NZCV in PSTATE.  Then
1616             create in dst, the IRCmpF64Result encoded result. */
1617          addInstr(env, (isD ? ARM64Instr_VCmpD : ARM64Instr_VCmpS)(dL, dR));
1618          addInstr(env, ARM64Instr_Imm64(dst, 0));
1619          addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ
1620          addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ));
1621          addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT
1622          addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI));
1623          addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT
1624          addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT));
1625          addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN
1626          addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS));
1627          return dst;
1628       }
1629 
1630       { /* local scope */
1631         ARM64CvtOp cvt_op = ARM64cvt_INVALID;
1632         Bool       srcIsD = False;
1633         switch (e->Iex.Binop.op) {
1634            case Iop_F64toI64S:
1635               cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break;
1636            case Iop_F64toI64U:
1637               cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break;
1638            case Iop_F64toI32S:
1639               cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break;
1640            case Iop_F64toI32U:
1641               cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break;
1642            case Iop_F32toI32S:
1643               cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break;
1644            case Iop_F32toI32U:
1645               cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break;
1646            case Iop_F32toI64S:
1647               cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break;
1648            case Iop_F32toI64U:
1649               cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break;
1650            default:
1651               break;
1652         }
1653         if (cvt_op != ARM64cvt_INVALID) {
1654            /* This is all a bit dodgy, because we can't handle a
1655               non-constant (not-known-at-JIT-time) rounding mode
1656               indication.  That's because there's no instruction
1657               AFAICS that does this conversion but rounds according to
1658               FPCR.RM, so we have to bake the rounding mode into the
1659               instruction right now.  But that should be OK because
1660               (1) the front end attaches a literal Irrm_ value to the
1661               conversion binop, and (2) iropt will never float that
1662               off via CSE, into a literal.  Hence we should always
1663               have an Irrm_ value as the first arg. */
1664            IRExpr* arg1 = e->Iex.Binop.arg1;
1665            if (arg1->tag != Iex_Const) goto irreducible;
1666            IRConst* arg1con = arg1->Iex.Const.con;
1667            vassert(arg1con->tag == Ico_U32); // else ill-typed IR
1668            UInt irrm = arg1con->Ico.U32;
1669            /* Find the ARM-encoded equivalent for |irrm|. */
1670            UInt armrm = 4; /* impossible */
1671            switch (irrm) {
1672               case Irrm_NEAREST: armrm = 0; break;
1673               case Irrm_NegINF:  armrm = 2; break;
1674               case Irrm_PosINF:  armrm = 1; break;
1675               case Irrm_ZERO:    armrm = 3; break;
1676               default: goto irreducible;
1677            }
1678            HReg src = (srcIsD ? iselDblExpr : iselFltExpr)
1679                          (env, e->Iex.Binop.arg2);
1680            HReg dst = newVRegI(env);
1681            addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm));
1682            return dst;
1683         }
1684       } /* local scope */
1685 
1686       /* All cases involving host-side helper calls. */
1687       void* fn = NULL;
1688       switch (e->Iex.Binop.op) {
1689          case Iop_DivU32:
1690             fn = &h_calc_udiv32_w_arm_semantics; break;
1691          case Iop_DivS32:
1692             fn = &h_calc_sdiv32_w_arm_semantics; break;
1693          case Iop_DivU64:
1694             fn = &h_calc_udiv64_w_arm_semantics; break;
1695          case Iop_DivS64:
1696             fn = &h_calc_sdiv64_w_arm_semantics; break;
1697          default:
1698             break;
1699       }
1700 
1701       if (fn) {
1702          HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1703          HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1704          HReg res  = newVRegI(env);
1705          addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL));
1706          addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR));
1707          addInstr(env, ARM64Instr_Call( ARM64cc_AL, (Addr)fn,
1708                                         2, mk_RetLoc_simple(RLPri_Int) ));
1709          addInstr(env, ARM64Instr_MovI(res, hregARM64_X0()));
1710          return res;
1711       }
1712 
1713       break;
1714    }
1715 
1716    /* --------- UNARY OP --------- */
1717    case Iex_Unop: {
1718 
1719       switch (e->Iex.Unop.op) {
1720          case Iop_16Uto64: {
1721             /* This probably doesn't occur often enough to be worth
1722                rolling the extension into the load. */
1723             IRExpr* arg = e->Iex.Unop.arg;
1724             HReg    src = iselIntExpr_R(env, arg);
1725             HReg    dst = widen_z_16_to_64(env, src);
1726             return dst;
1727          }
1728          case Iop_32Uto64: {
1729             IRExpr* arg = e->Iex.Unop.arg;
1730             if (arg->tag == Iex_Load) {
1731                /* This correctly zero extends because _LdSt32 is
1732                   defined to do a zero extending load. */
1733                HReg dst = newVRegI(env);
1734                ARM64AMode* am
1735                   = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32);
1736                addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
1737                return dst;
1738             }
1739             /* else be lame and mask it  */
1740             HReg src  = iselIntExpr_R(env, arg);
1741             HReg dst  = widen_z_32_to_64(env, src);
1742             return dst;
1743          }
1744          case Iop_8Uto32: /* Just freeload on the 8Uto64 case */
1745          case Iop_8Uto64: {
1746             IRExpr* arg = e->Iex.Unop.arg;
1747             if (arg->tag == Iex_Load) {
1748                /* This correctly zero extends because _LdSt8 is
1749                   defined to do a zero extending load. */
1750                HReg dst = newVRegI(env);
1751                ARM64AMode* am
1752                   = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8);
1753                addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
1754                return dst;
1755             }
1756             /* else be lame and mask it  */
1757             HReg src = iselIntExpr_R(env, arg);
1758             HReg dst = widen_z_8_to_64(env, src);
1759             return dst;
1760          }
1761          case Iop_128HIto64: {
1762             HReg rHi, rLo;
1763             iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1764             return rHi; /* and abandon rLo */
1765          }
1766          case Iop_8Sto32: case Iop_8Sto64: {
1767             IRExpr* arg = e->Iex.Unop.arg;
1768             HReg    src = iselIntExpr_R(env, arg);
1769             HReg    dst = widen_s_8_to_64(env, src);
1770             return dst;
1771          }
1772          case Iop_16Sto32: case Iop_16Sto64: {
1773             IRExpr* arg = e->Iex.Unop.arg;
1774             HReg    src = iselIntExpr_R(env, arg);
1775             HReg    dst = widen_s_16_to_64(env, src);
1776             return dst;
1777          }
1778          case Iop_32Sto64: {
1779             IRExpr* arg = e->Iex.Unop.arg;
1780             HReg    src = iselIntExpr_R(env, arg);
1781             HReg    dst = widen_s_32_to_64(env, src);
1782             return dst;
1783          }
1784          case Iop_Not32:
1785          case Iop_Not64: {
1786             HReg dst = newVRegI(env);
1787             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1788             addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT));
1789             return dst;
1790          }
1791          case Iop_Clz64: {
1792             HReg dst = newVRegI(env);
1793             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1794             addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ));
1795             return dst;
1796          }
1797          case Iop_Left32:
1798          case Iop_Left64: {
1799             /* Left64(src) = src | -src.  Left32 can use the same
1800                implementation since in that case we don't care what
1801                the upper 32 bits become. */
1802             HReg dst = newVRegI(env);
1803             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1804             addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
1805             addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
1806                                            ARM64lo_OR));
1807             return dst;
1808          }
1809          case Iop_CmpwNEZ64: {
1810            /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1
1811                              = Left64(src) >>s 63 */
1812             HReg dst = newVRegI(env);
1813             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1814             addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
1815             addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
1816                                            ARM64lo_OR));
1817             addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1818                                            ARM64sh_SAR));
1819             return dst;
1820          }
1821          case Iop_CmpwNEZ32: {
1822             /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF)
1823                               = Left64(src & 0xFFFFFFFF) >>s 63 */
1824             HReg dst = newVRegI(env);
1825             HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
1826             HReg src = widen_z_32_to_64(env, pre);
1827             addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
1828             addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
1829                                            ARM64lo_OR));
1830             addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1831                                            ARM64sh_SAR));
1832             return dst;
1833          }
1834          case Iop_V128to64: case Iop_V128HIto64: {
1835             HReg dst    = newVRegI(env);
1836             HReg src    = iselV128Expr(env, e->Iex.Unop.arg);
1837             UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0;
1838             addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo));
1839             return dst;
1840          }
1841          case Iop_ReinterpF64asI64: {
1842             HReg dst = newVRegI(env);
1843             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1844             addInstr(env, ARM64Instr_VXfromDorS(dst, src, True/*fromD*/));
1845             return dst;
1846          }
1847          case Iop_ReinterpF32asI32: {
1848             HReg dst = newVRegI(env);
1849             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1850             addInstr(env, ARM64Instr_VXfromDorS(dst, src, False/*!fromD*/));
1851             return dst;
1852          }
1853          case Iop_1Sto16:
1854          case Iop_1Sto32:
1855          case Iop_1Sto64: {
1856             /* As with the iselStmt case for 'tmp:I1 = expr', we could
1857                do a lot better here if it ever became necessary. */
1858             HReg zero = newVRegI(env);
1859             HReg one  = newVRegI(env);
1860             HReg dst  = newVRegI(env);
1861             addInstr(env, ARM64Instr_Imm64(zero, 0));
1862             addInstr(env, ARM64Instr_Imm64(one,  1));
1863             ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1864             addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
1865             addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1866                                            ARM64sh_SHL));
1867             addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1868                                            ARM64sh_SAR));
1869             return dst;
1870          }
1871          case Iop_NarrowUn16to8x8:
1872          case Iop_NarrowUn32to16x4:
1873          case Iop_NarrowUn64to32x2:
1874          case Iop_QNarrowUn16Sto8Sx8:
1875          case Iop_QNarrowUn32Sto16Sx4:
1876          case Iop_QNarrowUn64Sto32Sx2:
1877          case Iop_QNarrowUn16Uto8Ux8:
1878          case Iop_QNarrowUn32Uto16Ux4:
1879          case Iop_QNarrowUn64Uto32Ux2:
1880          case Iop_QNarrowUn16Sto8Ux8:
1881          case Iop_QNarrowUn32Sto16Ux4:
1882          case Iop_QNarrowUn64Sto32Ux2:
1883          {
1884             HReg src = iselV128Expr(env, e->Iex.Unop.arg);
1885             HReg tmp = newVRegV(env);
1886             HReg dst = newVRegI(env);
1887             UInt dszBlg2 = 3; /* illegal */
1888             ARM64VecNarrowOp op = ARM64vecna_INVALID;
1889             switch (e->Iex.Unop.op) {
1890                case Iop_NarrowUn16to8x8:
1891                   dszBlg2 = 0; op = ARM64vecna_XTN; break;
1892                case Iop_NarrowUn32to16x4:
1893                   dszBlg2 = 1; op = ARM64vecna_XTN; break;
1894                case Iop_NarrowUn64to32x2:
1895                   dszBlg2 = 2; op = ARM64vecna_XTN; break;
1896                case Iop_QNarrowUn16Sto8Sx8:
1897                   dszBlg2 = 0; op = ARM64vecna_SQXTN; break;
1898                case Iop_QNarrowUn32Sto16Sx4:
1899                   dszBlg2 = 1; op = ARM64vecna_SQXTN; break;
1900                case Iop_QNarrowUn64Sto32Sx2:
1901                   dszBlg2 = 2; op = ARM64vecna_SQXTN; break;
1902                case Iop_QNarrowUn16Uto8Ux8:
1903                   dszBlg2 = 0; op = ARM64vecna_UQXTN; break;
1904                case Iop_QNarrowUn32Uto16Ux4:
1905                   dszBlg2 = 1; op = ARM64vecna_UQXTN; break;
1906                case Iop_QNarrowUn64Uto32Ux2:
1907                   dszBlg2 = 2; op = ARM64vecna_UQXTN; break;
1908                case Iop_QNarrowUn16Sto8Ux8:
1909                   dszBlg2 = 0; op = ARM64vecna_SQXTUN; break;
1910                case Iop_QNarrowUn32Sto16Ux4:
1911                   dszBlg2 = 1; op = ARM64vecna_SQXTUN; break;
1912                case Iop_QNarrowUn64Sto32Ux2:
1913                   dszBlg2 = 2; op = ARM64vecna_SQXTUN; break;
1914                default:
1915                   vassert(0);
1916             }
1917             addInstr(env, ARM64Instr_VNarrowV(op, dszBlg2, tmp, src));
1918             addInstr(env, ARM64Instr_VXfromQ(dst, tmp, 0/*laneNo*/));
1919             return dst;
1920          }
1921          case Iop_1Uto64: {
1922             /* 1Uto64(tmp). */
1923             HReg dst = newVRegI(env);
1924             if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
1925                ARM64RIL* one = mb_mkARM64RIL_I(1);
1926                HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
1927                vassert(one);
1928                addInstr(env, ARM64Instr_Logic(dst, src, one, ARM64lo_AND));
1929             } else {
1930                /* CLONE-01 */
1931                HReg zero = newVRegI(env);
1932                HReg one  = newVRegI(env);
1933                addInstr(env, ARM64Instr_Imm64(zero, 0));
1934                addInstr(env, ARM64Instr_Imm64(one,  1));
1935                ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1936                addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
1937             }
1938             return dst;
1939          }
1940          case Iop_64to32:
1941          case Iop_64to16:
1942          case Iop_64to8:
1943             /* These are no-ops. */
1944             return iselIntExpr_R(env, e->Iex.Unop.arg);
1945 
1946          default:
1947             break;
1948       }
1949 
1950       break;
1951    }
1952 
1953    /* --------- GET --------- */
1954    case Iex_Get: {
1955       if (ty == Ity_I64
1956           && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < (8<<12)-8) {
1957          HReg        dst = newVRegI(env);
1958          ARM64AMode* am
1959             = mk_baseblock_64bit_access_amode(e->Iex.Get.offset);
1960          addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am));
1961          return dst;
1962       }
1963       if (ty == Ity_I32
1964           && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < (4<<12)-4) {
1965          HReg        dst = newVRegI(env);
1966          ARM64AMode* am
1967             = mk_baseblock_32bit_access_amode(e->Iex.Get.offset);
1968          addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
1969          return dst;
1970       }
1971       if (ty == Ity_I16
1972           && 0 == (e->Iex.Get.offset & 1) && e->Iex.Get.offset < (2<<12)-2) {
1973          HReg        dst = newVRegI(env);
1974          ARM64AMode* am
1975             = mk_baseblock_16bit_access_amode(e->Iex.Get.offset);
1976          addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, am));
1977          return dst;
1978       }
1979       if (ty == Ity_I8
1980           /* && no alignment check */ && e->Iex.Get.offset < (1<<12)-1) {
1981          HReg        dst = newVRegI(env);
1982          ARM64AMode* am
1983             = mk_baseblock_8bit_access_amode(e->Iex.Get.offset);
1984          addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
1985          return dst;
1986       }
1987       break;
1988    }
1989 
1990    /* --------- CCALL --------- */
1991    case Iex_CCall: {
1992       HReg    dst = newVRegI(env);
1993       vassert(ty == e->Iex.CCall.retty);
1994 
1995       /* be very restrictive for now.  Only 64-bit ints allowed for
1996          args, and 64 bits for return type.  Don't forget to change
1997          the RetLoc if more types are allowed in future. */
1998       if (e->Iex.CCall.retty != Ity_I64)
1999          goto irreducible;
2000 
2001       /* Marshal args, do the call, clear stack. */
2002       UInt   addToSp = 0;
2003       RetLoc rloc    = mk_RetLoc_INVALID();
2004       Bool   ok      = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2005                                      e->Iex.CCall.cee, e->Iex.CCall.retty,
2006                                      e->Iex.CCall.args );
2007       /* */
2008       if (ok) {
2009          vassert(is_sane_RetLoc(rloc));
2010          vassert(rloc.pri == RLPri_Int);
2011          vassert(addToSp == 0);
2012          addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()));
2013          return dst;
2014       }
2015       /* else fall through; will hit the irreducible: label */
2016    }
2017 
2018    /* --------- LITERAL --------- */
2019    /* 64-bit literals */
2020    case Iex_Const: {
2021       ULong u   = 0;
2022       HReg  dst = newVRegI(env);
2023       switch (e->Iex.Const.con->tag) {
2024          case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
2025          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
2026          case Ico_U16: u = e->Iex.Const.con->Ico.U16; break;
2027          case Ico_U8:  u = e->Iex.Const.con->Ico.U8;  break;
2028          default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)");
2029       }
2030       addInstr(env, ARM64Instr_Imm64(dst, u));
2031       return dst;
2032    }
2033 
2034    /* --------- MULTIPLEX --------- */
2035    case Iex_ITE: {
2036       /* ITE(ccexpr, iftrue, iffalse) */
2037       if (ty == Ity_I64 || ty == Ity_I32) {
2038          ARM64CondCode cc;
2039          HReg r1  = iselIntExpr_R(env, e->Iex.ITE.iftrue);
2040          HReg r0  = iselIntExpr_R(env, e->Iex.ITE.iffalse);
2041          HReg dst = newVRegI(env);
2042          cc = iselCondCode(env, e->Iex.ITE.cond);
2043          addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc));
2044          return dst;
2045       }
2046       break;
2047    }
2048 
2049    default:
2050    break;
2051    } /* switch (e->tag) */
2052 
2053    /* We get here if no pattern matched. */
2054   irreducible:
2055    ppIRExpr(e);
2056    vpanic("iselIntExpr_R: cannot reduce tree");
2057 }
2058 
2059 
2060 /*---------------------------------------------------------*/
2061 /*--- ISEL: Integer expressions (128 bit)               ---*/
2062 /*---------------------------------------------------------*/
2063 
2064 /* Compute a 128-bit value into a register pair, which is returned as
2065    the first two parameters.  As with iselIntExpr_R, these may be
2066    either real or virtual regs; in any case they must not be changed
2067    by subsequent code emitted by the caller.  */
2068 
iselInt128Expr(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)2069 static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2070                              ISelEnv* env, IRExpr* e )
2071 {
2072    iselInt128Expr_wrk(rHi, rLo, env, e);
2073 #  if 0
2074    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2075 #  endif
2076    vassert(hregClass(*rHi) == HRcInt64);
2077    vassert(hregIsVirtual(*rHi));
2078    vassert(hregClass(*rLo) == HRcInt64);
2079    vassert(hregIsVirtual(*rLo));
2080 }
2081 
2082 /* DO NOT CALL THIS DIRECTLY ! */
iselInt128Expr_wrk(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)2083 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2084                                  ISelEnv* env, IRExpr* e )
2085 {
2086    vassert(e);
2087    vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2088 
2089    /* --------- BINARY ops --------- */
2090    if (e->tag == Iex_Binop) {
2091       switch (e->Iex.Binop.op) {
2092          /* 64 x 64 -> 128 multiply */
2093          case Iop_MullU64:
2094          case Iop_MullS64: {
2095             Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
2096             HReg argL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
2097             HReg argR  = iselIntExpr_R(env, e->Iex.Binop.arg2);
2098             HReg dstLo = newVRegI(env);
2099             HReg dstHi = newVRegI(env);
2100             addInstr(env, ARM64Instr_Mul(dstLo, argL, argR,
2101                                          ARM64mul_PLAIN));
2102             addInstr(env, ARM64Instr_Mul(dstHi, argL, argR,
2103                                          syned ? ARM64mul_SX : ARM64mul_ZX));
2104             *rHi = dstHi;
2105             *rLo = dstLo;
2106             return;
2107          }
2108          /* 64HLto128(e1,e2) */
2109          case Iop_64HLto128:
2110             *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2111             *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2112             return;
2113          default:
2114             break;
2115       }
2116    } /* if (e->tag == Iex_Binop) */
2117 
2118    ppIRExpr(e);
2119    vpanic("iselInt128Expr(arm64)");
2120 }
2121 
2122 
2123 /*---------------------------------------------------------*/
2124 /*--- ISEL: Vector expressions (128 bit)                ---*/
2125 /*---------------------------------------------------------*/
2126 
iselV128Expr(ISelEnv * env,IRExpr * e)2127 static HReg iselV128Expr ( ISelEnv* env, IRExpr* e )
2128 {
2129    HReg r = iselV128Expr_wrk( env, e );
2130    vassert(hregClass(r) == HRcVec128);
2131    vassert(hregIsVirtual(r));
2132    return r;
2133 }
2134 
2135 /* DO NOT CALL THIS DIRECTLY */
iselV128Expr_wrk(ISelEnv * env,IRExpr * e)2136 static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
2137 {
2138    IRType ty = typeOfIRExpr(env->type_env, e);
2139    vassert(e);
2140    vassert(ty == Ity_V128);
2141 
2142    if (e->tag == Iex_RdTmp) {
2143       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2144    }
2145 
2146    if (e->tag == Iex_Const) {
2147       /* Only a very limited range of constants is handled. */
2148       vassert(e->Iex.Const.con->tag == Ico_V128);
2149       UShort con = e->Iex.Const.con->Ico.V128;
2150       HReg   res = newVRegV(env);
2151       switch (con) {
2152          case 0x0000: case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
2153             addInstr(env, ARM64Instr_VImmQ(res, con));
2154             return res;
2155          case 0x00F0:
2156             addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2157             addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2158             return res;
2159          case 0x0F00:
2160             addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2161             addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2162             return res;
2163          case 0x0FF0:
2164             addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2165             addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2166             return res;
2167          case 0x0FFF:
2168             addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2169             addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2170             addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2171             return res;
2172          case 0xF000:
2173             addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2174             addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2175             return res;
2176          case 0xFF00:
2177             addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2178             addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2179             return res;
2180          default:
2181             break;
2182       }
2183       /* Unhandled */
2184       goto v128_expr_bad;
2185    }
2186 
2187    if (e->tag == Iex_Load) {
2188       HReg res = newVRegV(env);
2189       HReg rN  = iselIntExpr_R(env, e->Iex.Load.addr);
2190       vassert(ty == Ity_V128);
2191       addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN));
2192       return res;
2193    }
2194 
2195    if (e->tag == Iex_Get) {
2196       UInt offs = (UInt)e->Iex.Get.offset;
2197       if (offs < (1<<12)) {
2198          HReg addr = mk_baseblock_128bit_access_addr(env, offs);
2199          HReg res  = newVRegV(env);
2200          vassert(ty == Ity_V128);
2201          addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr));
2202          return res;
2203       }
2204       goto v128_expr_bad;
2205    }
2206 
2207    if (e->tag == Iex_Unop) {
2208 
2209       /* Iop_ZeroHIXXofV128 cases */
2210       UShort imm16 = 0;
2211       switch (e->Iex.Unop.op) {
2212          case Iop_ZeroHI64ofV128:  imm16 = 0x00FF; break;
2213          case Iop_ZeroHI96ofV128:  imm16 = 0x000F; break;
2214          case Iop_ZeroHI112ofV128: imm16 = 0x0003; break;
2215          case Iop_ZeroHI120ofV128: imm16 = 0x0001; break;
2216          default: break;
2217       }
2218       if (imm16 != 0) {
2219          HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2220          HReg imm = newVRegV(env);
2221          HReg res = newVRegV(env);
2222          addInstr(env, ARM64Instr_VImmQ(imm, imm16));
2223          addInstr(env, ARM64Instr_VBinV(ARM64vecb_AND, res, src, imm));
2224          return res;
2225       }
2226 
2227       /* Other cases */
2228       switch (e->Iex.Unop.op) {
2229          case Iop_NotV128:
2230          case Iop_Abs64Fx2: case Iop_Abs32Fx4:
2231          case Iop_Neg64Fx2: case Iop_Neg32Fx4:
2232          case Iop_Abs64x2:  case Iop_Abs32x4:
2233          case Iop_Abs16x8:  case Iop_Abs8x16:
2234          case Iop_Cls32x4:  case Iop_Cls16x8:  case Iop_Cls8x16:
2235          case Iop_Clz32x4:  case Iop_Clz16x8:  case Iop_Clz8x16:
2236          case Iop_Cnt8x16:
2237          case Iop_Reverse1sIn8_x16:
2238          case Iop_Reverse8sIn16_x8:
2239          case Iop_Reverse8sIn32_x4: case Iop_Reverse16sIn32_x4:
2240          case Iop_Reverse8sIn64_x2: case Iop_Reverse16sIn64_x2:
2241          case Iop_Reverse32sIn64_x2:
2242          case Iop_RecipEst32Ux4:
2243          case Iop_RSqrtEst32Ux4:
2244          case Iop_RecipEst64Fx2: case Iop_RecipEst32Fx4:
2245          case Iop_RSqrtEst64Fx2: case Iop_RSqrtEst32Fx4:
2246          {
2247             HReg res   = newVRegV(env);
2248             HReg arg   = iselV128Expr(env, e->Iex.Unop.arg);
2249             Bool setRM = False;
2250             ARM64VecUnaryOp op = ARM64vecu_INVALID;
2251             switch (e->Iex.Unop.op) {
2252                case Iop_NotV128:           op = ARM64vecu_NOT;         break;
2253                case Iop_Abs64Fx2:          op = ARM64vecu_FABS64x2;    break;
2254                case Iop_Abs32Fx4:          op = ARM64vecu_FABS32x4;    break;
2255                case Iop_Neg64Fx2:          op = ARM64vecu_FNEG64x2;    break;
2256                case Iop_Neg32Fx4:          op = ARM64vecu_FNEG32x4;    break;
2257                case Iop_Abs64x2:           op = ARM64vecu_ABS64x2;     break;
2258                case Iop_Abs32x4:           op = ARM64vecu_ABS32x4;     break;
2259                case Iop_Abs16x8:           op = ARM64vecu_ABS16x8;     break;
2260                case Iop_Abs8x16:           op = ARM64vecu_ABS8x16;     break;
2261                case Iop_Cls32x4:           op = ARM64vecu_CLS32x4;     break;
2262                case Iop_Cls16x8:           op = ARM64vecu_CLS16x8;     break;
2263                case Iop_Cls8x16:           op = ARM64vecu_CLS8x16;     break;
2264                case Iop_Clz32x4:           op = ARM64vecu_CLZ32x4;     break;
2265                case Iop_Clz16x8:           op = ARM64vecu_CLZ16x8;     break;
2266                case Iop_Clz8x16:           op = ARM64vecu_CLZ8x16;     break;
2267                case Iop_Cnt8x16:           op = ARM64vecu_CNT8x16;     break;
2268                case Iop_Reverse1sIn8_x16:  op = ARM64vecu_RBIT;        break;
2269                case Iop_Reverse8sIn16_x8:  op = ARM64vecu_REV1616B;    break;
2270                case Iop_Reverse8sIn32_x4:  op = ARM64vecu_REV3216B;    break;
2271                case Iop_Reverse16sIn32_x4: op = ARM64vecu_REV328H;     break;
2272                case Iop_Reverse8sIn64_x2:  op = ARM64vecu_REV6416B;    break;
2273                case Iop_Reverse16sIn64_x2: op = ARM64vecu_REV648H;     break;
2274                case Iop_Reverse32sIn64_x2: op = ARM64vecu_REV644S;     break;
2275                case Iop_RecipEst32Ux4:     op = ARM64vecu_URECPE32x4;  break;
2276                case Iop_RSqrtEst32Ux4:     op = ARM64vecu_URSQRTE32x4; break;
2277                case Iop_RecipEst64Fx2:     setRM = True;
2278                                            op = ARM64vecu_FRECPE64x2;  break;
2279                case Iop_RecipEst32Fx4:     setRM = True;
2280                                            op = ARM64vecu_FRECPE32x4;  break;
2281                case Iop_RSqrtEst64Fx2:     setRM = True;
2282                                            op = ARM64vecu_FRSQRTE64x2; break;
2283                case Iop_RSqrtEst32Fx4:     setRM = True;
2284                                            op = ARM64vecu_FRSQRTE32x4; break;
2285                default: vassert(0);
2286             }
2287             if (setRM) {
2288                // This is a bit of a kludge.  We should do rm properly for
2289                // these recip-est insns, but that would require changing the
2290                // primop's type to take an rmode.
2291                set_FPCR_rounding_mode(env, IRExpr_Const(
2292                                               IRConst_U32(Irrm_NEAREST)));
2293             }
2294             addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
2295             return res;
2296          }
2297          case Iop_CmpNEZ8x16:
2298          case Iop_CmpNEZ16x8:
2299          case Iop_CmpNEZ32x4:
2300          case Iop_CmpNEZ64x2: {
2301             HReg arg  = iselV128Expr(env, e->Iex.Unop.arg);
2302             HReg zero = newVRegV(env);
2303             HReg res  = newVRegV(env);
2304             ARM64VecBinOp cmp = ARM64vecb_INVALID;
2305             switch (e->Iex.Unop.op) {
2306                case Iop_CmpNEZ64x2: cmp = ARM64vecb_CMEQ64x2; break;
2307                case Iop_CmpNEZ32x4: cmp = ARM64vecb_CMEQ32x4; break;
2308                case Iop_CmpNEZ16x8: cmp = ARM64vecb_CMEQ16x8; break;
2309                case Iop_CmpNEZ8x16: cmp = ARM64vecb_CMEQ8x16; break;
2310                default: vassert(0);
2311             }
2312             // This is pretty feeble.  Better: use CMP against zero
2313             // and avoid the extra instruction and extra register.
2314             addInstr(env, ARM64Instr_VImmQ(zero, 0x0000));
2315             addInstr(env, ARM64Instr_VBinV(cmp, res, arg, zero));
2316             addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2317             return res;
2318          }
2319          case Iop_V256toV128_0:
2320          case Iop_V256toV128_1: {
2321             HReg vHi, vLo;
2322             iselV256Expr(&vHi, &vLo, env, e->Iex.Unop.arg);
2323             return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo;
2324          }
2325          case Iop_64UtoV128: {
2326             HReg res = newVRegV(env);
2327             HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2328             addInstr(env, ARM64Instr_VQfromX(res, arg));
2329             return res;
2330          }
2331          case Iop_Widen8Sto16x8: {
2332             HReg res = newVRegV(env);
2333             HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2334             addInstr(env, ARM64Instr_VQfromX(res, arg));
2335             addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP18x16, res, res, res));
2336             addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR16x8,
2337                                                 res, res, 8));
2338             return res;
2339          }
2340          case Iop_Widen16Sto32x4: {
2341             HReg res = newVRegV(env);
2342             HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2343             addInstr(env, ARM64Instr_VQfromX(res, arg));
2344             addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP116x8, res, res, res));
2345             addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR32x4,
2346                                                 res, res, 16));
2347             return res;
2348          }
2349          case Iop_Widen32Sto64x2: {
2350             HReg res = newVRegV(env);
2351             HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2352             addInstr(env, ARM64Instr_VQfromX(res, arg));
2353             addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP132x4, res, res, res));
2354             addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR64x2,
2355                                                 res, res, 32));
2356             return res;
2357          }
2358          /* ... */
2359          default:
2360             break;
2361       } /* switch on the unop */
2362    } /* if (e->tag == Iex_Unop) */
2363 
2364    if (e->tag == Iex_Binop) {
2365       switch (e->Iex.Binop.op) {
2366          case Iop_Sqrt32Fx4:
2367          case Iop_Sqrt64Fx2: {
2368             HReg arg = iselV128Expr(env, e->Iex.Binop.arg2);
2369             HReg res = newVRegV(env);
2370             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
2371             ARM64VecUnaryOp op
2372                = e->Iex.Binop.op == Iop_Sqrt32Fx4
2373                     ? ARM64vecu_FSQRT32x4 : ARM64vecu_FSQRT64x2;
2374             addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
2375             return res;
2376          }
2377          case Iop_64HLtoV128: {
2378             HReg res  = newVRegV(env);
2379             HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2380             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2381             addInstr(env, ARM64Instr_VQfromXX(res, argL, argR));
2382             return res;
2383          }
2384          /* -- Cases where we can generate a simple three-reg instruction. -- */
2385          case Iop_AndV128:
2386          case Iop_OrV128:
2387          case Iop_XorV128:
2388          case Iop_Max32Ux4: case Iop_Max16Ux8: case Iop_Max8Ux16:
2389          case Iop_Min32Ux4: case Iop_Min16Ux8: case Iop_Min8Ux16:
2390          case Iop_Max32Sx4: case Iop_Max16Sx8: case Iop_Max8Sx16:
2391          case Iop_Min32Sx4: case Iop_Min16Sx8: case Iop_Min8Sx16:
2392          case Iop_Add64x2: case Iop_Add32x4:
2393          case Iop_Add16x8: case Iop_Add8x16:
2394          case Iop_Sub64x2: case Iop_Sub32x4:
2395          case Iop_Sub16x8: case Iop_Sub8x16:
2396          case Iop_Mul32x4: case Iop_Mul16x8: case Iop_Mul8x16:
2397          case Iop_CmpEQ64x2: case Iop_CmpEQ32x4:
2398          case Iop_CmpEQ16x8:  case Iop_CmpEQ8x16:
2399          case Iop_CmpGT64Ux2: case Iop_CmpGT32Ux4:
2400          case Iop_CmpGT16Ux8: case Iop_CmpGT8Ux16:
2401          case Iop_CmpGT64Sx2: case Iop_CmpGT32Sx4:
2402          case Iop_CmpGT16Sx8: case Iop_CmpGT8Sx16:
2403          case Iop_CmpEQ64Fx2: case Iop_CmpEQ32Fx4:
2404          case Iop_CmpLE64Fx2: case Iop_CmpLE32Fx4:
2405          case Iop_CmpLT64Fx2: case Iop_CmpLT32Fx4:
2406          case Iop_Perm8x16:
2407          case Iop_InterleaveLO64x2: case Iop_CatEvenLanes32x4:
2408          case Iop_CatEvenLanes16x8: case Iop_CatEvenLanes8x16:
2409          case Iop_InterleaveHI64x2: case Iop_CatOddLanes32x4:
2410          case Iop_CatOddLanes16x8:  case Iop_CatOddLanes8x16:
2411          case Iop_InterleaveHI32x4:
2412          case Iop_InterleaveHI16x8: case Iop_InterleaveHI8x16:
2413          case Iop_InterleaveLO32x4:
2414          case Iop_InterleaveLO16x8: case Iop_InterleaveLO8x16:
2415          case Iop_PolynomialMul8x16:
2416          case Iop_QAdd64Sx2: case Iop_QAdd32Sx4:
2417          case Iop_QAdd16Sx8: case Iop_QAdd8Sx16:
2418          case Iop_QAdd64Ux2: case Iop_QAdd32Ux4:
2419          case Iop_QAdd16Ux8: case Iop_QAdd8Ux16:
2420          case Iop_QSub64Sx2: case Iop_QSub32Sx4:
2421          case Iop_QSub16Sx8: case Iop_QSub8Sx16:
2422          case Iop_QSub64Ux2: case Iop_QSub32Ux4:
2423          case Iop_QSub16Ux8: case Iop_QSub8Ux16:
2424          case Iop_QDMulHi32Sx4:  case Iop_QDMulHi16Sx8:
2425          case Iop_QRDMulHi32Sx4: case Iop_QRDMulHi16Sx8:
2426          case Iop_Sh8Sx16:  case Iop_Sh16Sx8:
2427          case Iop_Sh32Sx4:  case Iop_Sh64Sx2:
2428          case Iop_Sh8Ux16:  case Iop_Sh16Ux8:
2429          case Iop_Sh32Ux4:  case Iop_Sh64Ux2:
2430          case Iop_Rsh8Sx16: case Iop_Rsh16Sx8:
2431          case Iop_Rsh32Sx4: case Iop_Rsh64Sx2:
2432          case Iop_Rsh8Ux16: case Iop_Rsh16Ux8:
2433          case Iop_Rsh32Ux4: case Iop_Rsh64Ux2:
2434          case Iop_Max64Fx2: case Iop_Max32Fx4:
2435          case Iop_Min64Fx2: case Iop_Min32Fx4:
2436          case Iop_RecipStep64Fx2: case Iop_RecipStep32Fx4:
2437          case Iop_RSqrtStep64Fx2: case Iop_RSqrtStep32Fx4:
2438          {
2439             HReg res   = newVRegV(env);
2440             HReg argL  = iselV128Expr(env, e->Iex.Binop.arg1);
2441             HReg argR  = iselV128Expr(env, e->Iex.Binop.arg2);
2442             Bool sw    = False;
2443             Bool setRM = False;
2444             ARM64VecBinOp op = ARM64vecb_INVALID;
2445             switch (e->Iex.Binop.op) {
2446                case Iop_AndV128:    op = ARM64vecb_AND; break;
2447                case Iop_OrV128:     op = ARM64vecb_ORR; break;
2448                case Iop_XorV128:    op = ARM64vecb_XOR; break;
2449                case Iop_Max32Ux4:   op = ARM64vecb_UMAX32x4; break;
2450                case Iop_Max16Ux8:   op = ARM64vecb_UMAX16x8; break;
2451                case Iop_Max8Ux16:   op = ARM64vecb_UMAX8x16; break;
2452                case Iop_Min32Ux4:   op = ARM64vecb_UMIN32x4; break;
2453                case Iop_Min16Ux8:   op = ARM64vecb_UMIN16x8; break;
2454                case Iop_Min8Ux16:   op = ARM64vecb_UMIN8x16; break;
2455                case Iop_Max32Sx4:   op = ARM64vecb_SMAX32x4; break;
2456                case Iop_Max16Sx8:   op = ARM64vecb_SMAX16x8; break;
2457                case Iop_Max8Sx16:   op = ARM64vecb_SMAX8x16; break;
2458                case Iop_Min32Sx4:   op = ARM64vecb_SMIN32x4; break;
2459                case Iop_Min16Sx8:   op = ARM64vecb_SMIN16x8; break;
2460                case Iop_Min8Sx16:   op = ARM64vecb_SMIN8x16; break;
2461                case Iop_Add64x2:    op = ARM64vecb_ADD64x2; break;
2462                case Iop_Add32x4:    op = ARM64vecb_ADD32x4; break;
2463                case Iop_Add16x8:    op = ARM64vecb_ADD16x8; break;
2464                case Iop_Add8x16:    op = ARM64vecb_ADD8x16; break;
2465                case Iop_Sub64x2:    op = ARM64vecb_SUB64x2; break;
2466                case Iop_Sub32x4:    op = ARM64vecb_SUB32x4; break;
2467                case Iop_Sub16x8:    op = ARM64vecb_SUB16x8; break;
2468                case Iop_Sub8x16:    op = ARM64vecb_SUB8x16; break;
2469                case Iop_Mul32x4:    op = ARM64vecb_MUL32x4; break;
2470                case Iop_Mul16x8:    op = ARM64vecb_MUL16x8; break;
2471                case Iop_Mul8x16:    op = ARM64vecb_MUL8x16; break;
2472                case Iop_CmpEQ64x2:  op = ARM64vecb_CMEQ64x2; break;
2473                case Iop_CmpEQ32x4:  op = ARM64vecb_CMEQ32x4; break;
2474                case Iop_CmpEQ16x8:  op = ARM64vecb_CMEQ16x8; break;
2475                case Iop_CmpEQ8x16:  op = ARM64vecb_CMEQ8x16; break;
2476                case Iop_CmpGT64Ux2: op = ARM64vecb_CMHI64x2; break;
2477                case Iop_CmpGT32Ux4: op = ARM64vecb_CMHI32x4; break;
2478                case Iop_CmpGT16Ux8: op = ARM64vecb_CMHI16x8; break;
2479                case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break;
2480                case Iop_CmpGT64Sx2: op = ARM64vecb_CMGT64x2; break;
2481                case Iop_CmpGT32Sx4: op = ARM64vecb_CMGT32x4; break;
2482                case Iop_CmpGT16Sx8: op = ARM64vecb_CMGT16x8; break;
2483                case Iop_CmpGT8Sx16: op = ARM64vecb_CMGT8x16; break;
2484                case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break;
2485                case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break;
2486                case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break;
2487                case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break;
2488                case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break;
2489                case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break;
2490                case Iop_Perm8x16:   op = ARM64vecb_TBL1; break;
2491                case Iop_InterleaveLO64x2: op = ARM64vecb_UZP164x2; sw = True;
2492                                           break;
2493                case Iop_CatEvenLanes32x4: op = ARM64vecb_UZP132x4; sw = True;
2494                                           break;
2495                case Iop_CatEvenLanes16x8: op = ARM64vecb_UZP116x8; sw = True;
2496                                           break;
2497                case Iop_CatEvenLanes8x16: op = ARM64vecb_UZP18x16; sw = True;
2498                                           break;
2499                case Iop_InterleaveHI64x2: op = ARM64vecb_UZP264x2; sw = True;
2500                                           break;
2501                case Iop_CatOddLanes32x4:  op = ARM64vecb_UZP232x4; sw = True;
2502                                           break;
2503                case Iop_CatOddLanes16x8:  op = ARM64vecb_UZP216x8; sw = True;
2504                                           break;
2505                case Iop_CatOddLanes8x16:  op = ARM64vecb_UZP28x16; sw = True;
2506                                           break;
2507                case Iop_InterleaveHI32x4: op = ARM64vecb_ZIP232x4; sw = True;
2508                                           break;
2509                case Iop_InterleaveHI16x8: op = ARM64vecb_ZIP216x8; sw = True;
2510                                           break;
2511                case Iop_InterleaveHI8x16: op = ARM64vecb_ZIP28x16; sw = True;
2512                                           break;
2513                case Iop_InterleaveLO32x4: op = ARM64vecb_ZIP132x4; sw = True;
2514                                           break;
2515                case Iop_InterleaveLO16x8: op = ARM64vecb_ZIP116x8; sw = True;
2516                                           break;
2517                case Iop_InterleaveLO8x16: op = ARM64vecb_ZIP18x16; sw = True;
2518                                           break;
2519                case Iop_PolynomialMul8x16: op = ARM64vecb_PMUL8x16; break;
2520                case Iop_QAdd64Sx2:      op = ARM64vecb_SQADD64x2; break;
2521                case Iop_QAdd32Sx4:      op = ARM64vecb_SQADD32x4; break;
2522                case Iop_QAdd16Sx8:      op = ARM64vecb_SQADD16x8; break;
2523                case Iop_QAdd8Sx16:      op = ARM64vecb_SQADD8x16; break;
2524                case Iop_QAdd64Ux2:      op = ARM64vecb_UQADD64x2; break;
2525                case Iop_QAdd32Ux4:      op = ARM64vecb_UQADD32x4; break;
2526                case Iop_QAdd16Ux8:      op = ARM64vecb_UQADD16x8; break;
2527                case Iop_QAdd8Ux16:      op = ARM64vecb_UQADD8x16; break;
2528                case Iop_QSub64Sx2:      op = ARM64vecb_SQSUB64x2; break;
2529                case Iop_QSub32Sx4:      op = ARM64vecb_SQSUB32x4; break;
2530                case Iop_QSub16Sx8:      op = ARM64vecb_SQSUB16x8; break;
2531                case Iop_QSub8Sx16:      op = ARM64vecb_SQSUB8x16; break;
2532                case Iop_QSub64Ux2:      op = ARM64vecb_UQSUB64x2; break;
2533                case Iop_QSub32Ux4:      op = ARM64vecb_UQSUB32x4; break;
2534                case Iop_QSub16Ux8:      op = ARM64vecb_UQSUB16x8; break;
2535                case Iop_QSub8Ux16:      op = ARM64vecb_UQSUB8x16; break;
2536                case Iop_QDMulHi32Sx4:   op = ARM64vecb_SQDMULH32x4; break;
2537                case Iop_QDMulHi16Sx8:   op = ARM64vecb_SQDMULH16x8; break;
2538                case Iop_QRDMulHi32Sx4:  op = ARM64vecb_SQRDMULH32x4; break;
2539                case Iop_QRDMulHi16Sx8:  op = ARM64vecb_SQRDMULH16x8; break;
2540                case Iop_Sh8Sx16:        op = ARM64vecb_SSHL8x16; break;
2541                case Iop_Sh16Sx8:        op = ARM64vecb_SSHL16x8; break;
2542                case Iop_Sh32Sx4:        op = ARM64vecb_SSHL32x4; break;
2543                case Iop_Sh64Sx2:        op = ARM64vecb_SSHL64x2; break;
2544                case Iop_Sh8Ux16:        op = ARM64vecb_USHL8x16; break;
2545                case Iop_Sh16Ux8:        op = ARM64vecb_USHL16x8; break;
2546                case Iop_Sh32Ux4:        op = ARM64vecb_USHL32x4; break;
2547                case Iop_Sh64Ux2:        op = ARM64vecb_USHL64x2; break;
2548                case Iop_Rsh8Sx16:       op = ARM64vecb_SRSHL8x16; break;
2549                case Iop_Rsh16Sx8:       op = ARM64vecb_SRSHL16x8; break;
2550                case Iop_Rsh32Sx4:       op = ARM64vecb_SRSHL32x4; break;
2551                case Iop_Rsh64Sx2:       op = ARM64vecb_SRSHL64x2; break;
2552                case Iop_Rsh8Ux16:       op = ARM64vecb_URSHL8x16; break;
2553                case Iop_Rsh16Ux8:       op = ARM64vecb_URSHL16x8; break;
2554                case Iop_Rsh32Ux4:       op = ARM64vecb_URSHL32x4; break;
2555                case Iop_Rsh64Ux2:       op = ARM64vecb_URSHL64x2; break;
2556                case Iop_Max64Fx2:       op = ARM64vecb_FMAX64x2; break;
2557                case Iop_Max32Fx4:       op = ARM64vecb_FMAX32x4; break;
2558                case Iop_Min64Fx2:       op = ARM64vecb_FMIN64x2; break;
2559                case Iop_Min32Fx4:       op = ARM64vecb_FMIN32x4; break;
2560                case Iop_RecipStep64Fx2: setRM = True;
2561                                         op = ARM64vecb_FRECPS64x2; break;
2562                case Iop_RecipStep32Fx4: setRM = True;
2563                                         op = ARM64vecb_FRECPS32x4; break;
2564                case Iop_RSqrtStep64Fx2: setRM = True;
2565                                         op = ARM64vecb_FRSQRTS64x2; break;
2566                case Iop_RSqrtStep32Fx4: setRM = True;
2567                                         op = ARM64vecb_FRSQRTS32x4; break;
2568                default: vassert(0);
2569             }
2570             if (setRM) {
2571                // This is a bit of a kludge.  We should do rm properly for
2572                // these recip-step insns, but that would require changing the
2573                // primop's type to take an rmode.
2574                set_FPCR_rounding_mode(env, IRExpr_Const(
2575                                               IRConst_U32(Irrm_NEAREST)));
2576             }
2577             if (sw) {
2578                addInstr(env, ARM64Instr_VBinV(op, res, argR, argL));
2579             } else {
2580                addInstr(env, ARM64Instr_VBinV(op, res, argL, argR));
2581             }
2582             return res;
2583          }
2584          /* -- These only have 2 operand instructions, so we have to first move
2585             the first argument into a new register, for modification. -- */
2586          case Iop_QAddExtUSsatSS8x16: case Iop_QAddExtUSsatSS16x8:
2587          case Iop_QAddExtUSsatSS32x4: case Iop_QAddExtUSsatSS64x2:
2588          case Iop_QAddExtSUsatUU8x16: case Iop_QAddExtSUsatUU16x8:
2589          case Iop_QAddExtSUsatUU32x4: case Iop_QAddExtSUsatUU64x2:
2590          {
2591             HReg res  = newVRegV(env);
2592             HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
2593             HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
2594             ARM64VecModifyOp op = ARM64vecmo_INVALID;
2595             switch (e->Iex.Binop.op) {
2596                /* In the following 8 cases, the US - SU switching is intended.
2597                   See comments on the libvex_ir.h for details.  Also in the
2598                   ARM64 front end, where used these primops are generated. */
2599                case Iop_QAddExtUSsatSS8x16: op = ARM64vecmo_SUQADD8x16; break;
2600                case Iop_QAddExtUSsatSS16x8: op = ARM64vecmo_SUQADD16x8; break;
2601                case Iop_QAddExtUSsatSS32x4: op = ARM64vecmo_SUQADD32x4; break;
2602                case Iop_QAddExtUSsatSS64x2: op = ARM64vecmo_SUQADD64x2; break;
2603                case Iop_QAddExtSUsatUU8x16: op = ARM64vecmo_USQADD8x16; break;
2604                case Iop_QAddExtSUsatUU16x8: op = ARM64vecmo_USQADD16x8; break;
2605                case Iop_QAddExtSUsatUU32x4: op = ARM64vecmo_USQADD32x4; break;
2606                case Iop_QAddExtSUsatUU64x2: op = ARM64vecmo_USQADD64x2; break;
2607                default: vassert(0);
2608             }
2609             /* The order of the operands is important.  Although this is
2610                basically addition, the two operands are extended differently,
2611                making it important to get them into the correct registers in
2612                the instruction. */
2613             addInstr(env, ARM64Instr_VMov(16, res, argR));
2614             addInstr(env, ARM64Instr_VModifyV(op, res, argL));
2615             return res;
2616          }
2617          /* -- Shifts by an immediate. -- */
2618          case Iop_ShrN64x2: case Iop_ShrN32x4:
2619          case Iop_ShrN16x8: case Iop_ShrN8x16:
2620          case Iop_SarN64x2: case Iop_SarN32x4:
2621          case Iop_SarN16x8: case Iop_SarN8x16:
2622          case Iop_ShlN64x2: case Iop_ShlN32x4:
2623          case Iop_ShlN16x8: case Iop_ShlN8x16:
2624          case Iop_QShlNsatUU64x2: case Iop_QShlNsatUU32x4:
2625          case Iop_QShlNsatUU16x8: case Iop_QShlNsatUU8x16:
2626          case Iop_QShlNsatSS64x2: case Iop_QShlNsatSS32x4:
2627          case Iop_QShlNsatSS16x8: case Iop_QShlNsatSS8x16:
2628          case Iop_QShlNsatSU64x2: case Iop_QShlNsatSU32x4:
2629          case Iop_QShlNsatSU16x8: case Iop_QShlNsatSU8x16:
2630          {
2631             IRExpr* argL = e->Iex.Binop.arg1;
2632             IRExpr* argR = e->Iex.Binop.arg2;
2633             if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2634                UInt amt   = argR->Iex.Const.con->Ico.U8;
2635                UInt limLo = 0;
2636                UInt limHi = 0;
2637                ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
2638                /* Establish the instruction to use. */
2639                switch (e->Iex.Binop.op) {
2640                   case Iop_ShrN64x2:       op = ARM64vecshi_USHR64x2;   break;
2641                   case Iop_ShrN32x4:       op = ARM64vecshi_USHR32x4;   break;
2642                   case Iop_ShrN16x8:       op = ARM64vecshi_USHR16x8;   break;
2643                   case Iop_ShrN8x16:       op = ARM64vecshi_USHR8x16;   break;
2644                   case Iop_SarN64x2:       op = ARM64vecshi_SSHR64x2;   break;
2645                   case Iop_SarN32x4:       op = ARM64vecshi_SSHR32x4;   break;
2646                   case Iop_SarN16x8:       op = ARM64vecshi_SSHR16x8;   break;
2647                   case Iop_SarN8x16:       op = ARM64vecshi_SSHR8x16;   break;
2648                   case Iop_ShlN64x2:       op = ARM64vecshi_SHL64x2;    break;
2649                   case Iop_ShlN32x4:       op = ARM64vecshi_SHL32x4;    break;
2650                   case Iop_ShlN16x8:       op = ARM64vecshi_SHL16x8;    break;
2651                   case Iop_ShlN8x16:       op = ARM64vecshi_SHL8x16;    break;
2652                   case Iop_QShlNsatUU64x2: op = ARM64vecshi_UQSHL64x2;  break;
2653                   case Iop_QShlNsatUU32x4: op = ARM64vecshi_UQSHL32x4;  break;
2654                   case Iop_QShlNsatUU16x8: op = ARM64vecshi_UQSHL16x8;  break;
2655                   case Iop_QShlNsatUU8x16: op = ARM64vecshi_UQSHL8x16;  break;
2656                   case Iop_QShlNsatSS64x2: op = ARM64vecshi_SQSHL64x2;  break;
2657                   case Iop_QShlNsatSS32x4: op = ARM64vecshi_SQSHL32x4;  break;
2658                   case Iop_QShlNsatSS16x8: op = ARM64vecshi_SQSHL16x8;  break;
2659                   case Iop_QShlNsatSS8x16: op = ARM64vecshi_SQSHL8x16;  break;
2660                   case Iop_QShlNsatSU64x2: op = ARM64vecshi_SQSHLU64x2; break;
2661                   case Iop_QShlNsatSU32x4: op = ARM64vecshi_SQSHLU32x4; break;
2662                   case Iop_QShlNsatSU16x8: op = ARM64vecshi_SQSHLU16x8; break;
2663                   case Iop_QShlNsatSU8x16: op = ARM64vecshi_SQSHLU8x16; break;
2664                   default: vassert(0);
2665                }
2666                /* Establish the shift limits, for sanity check purposes only. */
2667                switch (e->Iex.Binop.op) {
2668                   case Iop_ShrN64x2:       limLo = 1; limHi = 64; break;
2669                   case Iop_ShrN32x4:       limLo = 1; limHi = 32; break;
2670                   case Iop_ShrN16x8:       limLo = 1; limHi = 16; break;
2671                   case Iop_ShrN8x16:       limLo = 1; limHi = 8;  break;
2672                   case Iop_SarN64x2:       limLo = 1; limHi = 64; break;
2673                   case Iop_SarN32x4:       limLo = 1; limHi = 32; break;
2674                   case Iop_SarN16x8:       limLo = 1; limHi = 16; break;
2675                   case Iop_SarN8x16:       limLo = 1; limHi = 8;  break;
2676                   case Iop_ShlN64x2:       limLo = 0; limHi = 63; break;
2677                   case Iop_ShlN32x4:       limLo = 0; limHi = 31; break;
2678                   case Iop_ShlN16x8:       limLo = 0; limHi = 15; break;
2679                   case Iop_ShlN8x16:       limLo = 0; limHi = 7;  break;
2680                   case Iop_QShlNsatUU64x2: limLo = 0; limHi = 63; break;
2681                   case Iop_QShlNsatUU32x4: limLo = 0; limHi = 31; break;
2682                   case Iop_QShlNsatUU16x8: limLo = 0; limHi = 15; break;
2683                   case Iop_QShlNsatUU8x16: limLo = 0; limHi = 7;  break;
2684                   case Iop_QShlNsatSS64x2: limLo = 0; limHi = 63; break;
2685                   case Iop_QShlNsatSS32x4: limLo = 0; limHi = 31; break;
2686                   case Iop_QShlNsatSS16x8: limLo = 0; limHi = 15; break;
2687                   case Iop_QShlNsatSS8x16: limLo = 0; limHi = 7;  break;
2688                   case Iop_QShlNsatSU64x2: limLo = 0; limHi = 63; break;
2689                   case Iop_QShlNsatSU32x4: limLo = 0; limHi = 31; break;
2690                   case Iop_QShlNsatSU16x8: limLo = 0; limHi = 15; break;
2691                   case Iop_QShlNsatSU8x16: limLo = 0; limHi = 7;  break;
2692                   default: vassert(0);
2693                }
2694                /* For left shifts, the allowable amt values are
2695                   0 .. lane_bits-1.  For right shifts the allowable
2696                   values are 1 .. lane_bits. */
2697                if (op != ARM64vecshi_INVALID && amt >= limLo && amt <= limHi) {
2698                   HReg src = iselV128Expr(env, argL);
2699                   HReg dst = newVRegV(env);
2700                   addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
2701                   return dst;
2702                }
2703                /* Special case some no-op shifts that the arm64 front end
2704                   throws at us.  We can't generate any instructions for these,
2705                   but we don't need to either. */
2706                switch (e->Iex.Binop.op) {
2707                   case Iop_ShrN64x2: case Iop_ShrN32x4:
2708                   case Iop_ShrN16x8: case Iop_ShrN8x16:
2709                      if (amt == 0) {
2710                         return iselV128Expr(env, argL);
2711                      }
2712                      break;
2713                   default:
2714                      break;
2715                }
2716                /* otherwise unhandled */
2717             }
2718             /* else fall out; this is unhandled */
2719             break;
2720          }
2721          /* -- Saturating narrowing by an immediate -- */
2722          /* uu */
2723          case Iop_QandQShrNnarrow16Uto8Ux8:
2724          case Iop_QandQShrNnarrow32Uto16Ux4:
2725          case Iop_QandQShrNnarrow64Uto32Ux2:
2726          /* ss */
2727          case Iop_QandQSarNnarrow16Sto8Sx8:
2728          case Iop_QandQSarNnarrow32Sto16Sx4:
2729          case Iop_QandQSarNnarrow64Sto32Sx2:
2730          /* su */
2731          case Iop_QandQSarNnarrow16Sto8Ux8:
2732          case Iop_QandQSarNnarrow32Sto16Ux4:
2733          case Iop_QandQSarNnarrow64Sto32Ux2:
2734          /* ruu */
2735          case Iop_QandQRShrNnarrow16Uto8Ux8:
2736          case Iop_QandQRShrNnarrow32Uto16Ux4:
2737          case Iop_QandQRShrNnarrow64Uto32Ux2:
2738          /* rss */
2739          case Iop_QandQRSarNnarrow16Sto8Sx8:
2740          case Iop_QandQRSarNnarrow32Sto16Sx4:
2741          case Iop_QandQRSarNnarrow64Sto32Sx2:
2742          /* rsu */
2743          case Iop_QandQRSarNnarrow16Sto8Ux8:
2744          case Iop_QandQRSarNnarrow32Sto16Ux4:
2745          case Iop_QandQRSarNnarrow64Sto32Ux2:
2746          {
2747             IRExpr* argL = e->Iex.Binop.arg1;
2748             IRExpr* argR = e->Iex.Binop.arg2;
2749             if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2750                UInt amt   = argR->Iex.Const.con->Ico.U8;
2751                UInt limit = 0;
2752                ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
2753                switch (e->Iex.Binop.op) {
2754                   /* uu */
2755                   case Iop_QandQShrNnarrow64Uto32Ux2:
2756                      op = ARM64vecshi_UQSHRN2SD; limit = 64; break;
2757                   case Iop_QandQShrNnarrow32Uto16Ux4:
2758                      op = ARM64vecshi_UQSHRN4HS; limit = 32; break;
2759                   case Iop_QandQShrNnarrow16Uto8Ux8:
2760                      op = ARM64vecshi_UQSHRN8BH; limit = 16; break;
2761                   /* ss */
2762                   case Iop_QandQSarNnarrow64Sto32Sx2:
2763                      op = ARM64vecshi_SQSHRN2SD; limit = 64; break;
2764                   case Iop_QandQSarNnarrow32Sto16Sx4:
2765                      op = ARM64vecshi_SQSHRN4HS; limit = 32; break;
2766                   case Iop_QandQSarNnarrow16Sto8Sx8:
2767                      op = ARM64vecshi_SQSHRN8BH; limit = 16; break;
2768                   /* su */
2769                   case Iop_QandQSarNnarrow64Sto32Ux2:
2770                      op = ARM64vecshi_SQSHRUN2SD; limit = 64; break;
2771                   case Iop_QandQSarNnarrow32Sto16Ux4:
2772                      op = ARM64vecshi_SQSHRUN4HS; limit = 32; break;
2773                   case Iop_QandQSarNnarrow16Sto8Ux8:
2774                      op = ARM64vecshi_SQSHRUN8BH; limit = 16; break;
2775                   /* ruu */
2776                   case Iop_QandQRShrNnarrow64Uto32Ux2:
2777                      op = ARM64vecshi_UQRSHRN2SD; limit = 64; break;
2778                   case Iop_QandQRShrNnarrow32Uto16Ux4:
2779                      op = ARM64vecshi_UQRSHRN4HS; limit = 32; break;
2780                   case Iop_QandQRShrNnarrow16Uto8Ux8:
2781                      op = ARM64vecshi_UQRSHRN8BH; limit = 16; break;
2782                   /* rss */
2783                   case Iop_QandQRSarNnarrow64Sto32Sx2:
2784                      op = ARM64vecshi_SQRSHRN2SD; limit = 64; break;
2785                   case Iop_QandQRSarNnarrow32Sto16Sx4:
2786                      op = ARM64vecshi_SQRSHRN4HS; limit = 32; break;
2787                   case Iop_QandQRSarNnarrow16Sto8Sx8:
2788                      op = ARM64vecshi_SQRSHRN8BH; limit = 16; break;
2789                   /* rsu */
2790                   case Iop_QandQRSarNnarrow64Sto32Ux2:
2791                      op = ARM64vecshi_SQRSHRUN2SD; limit = 64; break;
2792                   case Iop_QandQRSarNnarrow32Sto16Ux4:
2793                      op = ARM64vecshi_SQRSHRUN4HS; limit = 32; break;
2794                   case Iop_QandQRSarNnarrow16Sto8Ux8:
2795                      op = ARM64vecshi_SQRSHRUN8BH; limit = 16; break;
2796                   /**/
2797                   default:
2798                      vassert(0);
2799                }
2800                if (op != ARM64vecshi_INVALID && amt >= 1 && amt <= limit) {
2801                   HReg src  = iselV128Expr(env, argL);
2802                   HReg dst  = newVRegV(env);
2803                   HReg fpsr = newVRegI(env);
2804                   /* Clear FPSR.Q, do the operation, and return both its
2805                      result and the new value of FPSR.Q.  We can simply
2806                      zero out FPSR since all the other bits have no relevance
2807                      in VEX generated code. */
2808                   addInstr(env, ARM64Instr_Imm64(fpsr, 0));
2809                   addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
2810                   addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
2811                   addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
2812                   addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
2813                                                              ARM64sh_SHR));
2814                   ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
2815                   vassert(ril_one);
2816                   addInstr(env, ARM64Instr_Logic(fpsr,
2817                                                  fpsr, ril_one, ARM64lo_AND));
2818                   /* Now we have: the main (shift) result in the bottom half
2819                      of |dst|, and the Q bit at the bottom of |fpsr|.
2820                      Combining them with a "InterleaveLO64x2" style operation
2821                      produces a 128 bit value, dst[63:0]:fpsr[63:0],
2822                      which is what we want. */
2823                   HReg scratch = newVRegV(env);
2824                   addInstr(env, ARM64Instr_VQfromX(scratch, fpsr));
2825                   addInstr(env, ARM64Instr_VBinV(ARM64vecb_UZP164x2,
2826                                                  dst, dst, scratch));
2827                   return dst;
2828                }
2829             }
2830             /* else fall out; this is unhandled */
2831             break;
2832          }
2833 
2834          // Use Iop_SliceV128 in preference to Iop_ShlV128 and Iop_ShrV128,
2835          // as it is in some ways more general and often leads to better
2836          // code overall.
2837          case Iop_ShlV128:
2838          case Iop_ShrV128: {
2839             Bool isSHR = e->Iex.Binop.op == Iop_ShrV128;
2840             /* This is tricky.  Generate an EXT instruction with zeroes in
2841                the high operand (shift right) or low operand (shift left).
2842                Note that we can only slice in the EXT instruction at a byte
2843                level of granularity, so the shift amount needs careful
2844                checking. */
2845             IRExpr* argL = e->Iex.Binop.arg1;
2846             IRExpr* argR = e->Iex.Binop.arg2;
2847             if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2848                UInt amt   = argR->Iex.Const.con->Ico.U8;
2849                Bool amtOK = False;
2850                switch (amt) {
2851                   case 0x08: case 0x10: case 0x18: case 0x20: case 0x28:
2852                   case 0x30: case 0x38: case 0x40: case 0x48: case 0x50:
2853                   case 0x58: case 0x60: case 0x68: case 0x70: case 0x78:
2854                      amtOK = True; break;
2855                }
2856                /* We could also deal with amt==0 by copying the source to
2857                   the destination, but there's no need for that so far. */
2858                if (amtOK) {
2859                   HReg src  = iselV128Expr(env, argL);
2860                   HReg srcZ = newVRegV(env);
2861                   addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000));
2862                   UInt immB = amt / 8;
2863                   vassert(immB >= 1 && immB <= 15);
2864                   HReg dst = newVRegV(env);
2865                   if (isSHR) {
2866                     addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/,
2867                                                          immB));
2868                   } else {
2869                     addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/,
2870                                                          16 - immB));
2871                   }
2872                   return dst;
2873                }
2874             }
2875             /* else fall out; this is unhandled */
2876             break;
2877          }
2878 
2879          case Iop_PolynomialMull8x8:
2880          case Iop_Mull32Ux2:
2881          case Iop_Mull16Ux4:
2882          case Iop_Mull8Ux8:
2883          case Iop_Mull32Sx2:
2884          case Iop_Mull16Sx4:
2885          case Iop_Mull8Sx8:
2886          case Iop_QDMull32Sx2:
2887          case Iop_QDMull16Sx4:
2888          {
2889             HReg iSrcL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2890             HReg iSrcR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2891             HReg vSrcL = newVRegV(env);
2892             HReg vSrcR = newVRegV(env);
2893             HReg dst   = newVRegV(env);
2894             ARM64VecBinOp op = ARM64vecb_INVALID;
2895             switch (e->Iex.Binop.op) {
2896                case Iop_PolynomialMull8x8: op = ARM64vecb_PMULL8x8;    break;
2897                case Iop_Mull32Ux2:         op = ARM64vecb_UMULL2DSS;   break;
2898                case Iop_Mull16Ux4:         op = ARM64vecb_UMULL4SHH;   break;
2899                case Iop_Mull8Ux8:          op = ARM64vecb_UMULL8HBB;   break;
2900                case Iop_Mull32Sx2:         op = ARM64vecb_SMULL2DSS;   break;
2901                case Iop_Mull16Sx4:         op = ARM64vecb_SMULL4SHH;   break;
2902                case Iop_Mull8Sx8:          op = ARM64vecb_SMULL8HBB;   break;
2903                case Iop_QDMull32Sx2:       op = ARM64vecb_SQDMULL2DSS; break;
2904                case Iop_QDMull16Sx4:       op = ARM64vecb_SQDMULL4SHH; break;
2905                default: vassert(0);
2906             }
2907             addInstr(env, ARM64Instr_VQfromXX(vSrcL, iSrcL, iSrcL));
2908             addInstr(env, ARM64Instr_VQfromXX(vSrcR, iSrcR, iSrcR));
2909             addInstr(env, ARM64Instr_VBinV(op, dst, vSrcL, vSrcR));
2910             return dst;
2911          }
2912 
2913          /* ... */
2914          default:
2915             break;
2916       } /* switch on the binop */
2917    } /* if (e->tag == Iex_Binop) */
2918 
2919    if (e->tag == Iex_Triop) {
2920       IRTriop*      triop  = e->Iex.Triop.details;
2921       ARM64VecBinOp vecbop = ARM64vecb_INVALID;
2922       switch (triop->op) {
2923          case Iop_Add64Fx2: vecbop = ARM64vecb_FADD64x2; break;
2924          case Iop_Sub64Fx2: vecbop = ARM64vecb_FSUB64x2; break;
2925          case Iop_Mul64Fx2: vecbop = ARM64vecb_FMUL64x2; break;
2926          case Iop_Div64Fx2: vecbop = ARM64vecb_FDIV64x2; break;
2927          case Iop_Add32Fx4: vecbop = ARM64vecb_FADD32x4; break;
2928          case Iop_Sub32Fx4: vecbop = ARM64vecb_FSUB32x4; break;
2929          case Iop_Mul32Fx4: vecbop = ARM64vecb_FMUL32x4; break;
2930          case Iop_Div32Fx4: vecbop = ARM64vecb_FDIV32x4; break;
2931          default: break;
2932       }
2933       if (vecbop != ARM64vecb_INVALID) {
2934          HReg argL = iselV128Expr(env, triop->arg2);
2935          HReg argR = iselV128Expr(env, triop->arg3);
2936          HReg dst  = newVRegV(env);
2937          set_FPCR_rounding_mode(env, triop->arg1);
2938          addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR));
2939          return dst;
2940       }
2941 
2942       if (triop->op == Iop_SliceV128) {
2943          /* Note that, compared to ShlV128/ShrV128 just above, the shift
2944             amount here is in bytes, not bits. */
2945          IRExpr* argHi  = triop->arg1;
2946          IRExpr* argLo  = triop->arg2;
2947          IRExpr* argAmt = triop->arg3;
2948          if (argAmt->tag == Iex_Const && argAmt->Iex.Const.con->tag == Ico_U8) {
2949             UInt amt   = argAmt->Iex.Const.con->Ico.U8;
2950             Bool amtOK = amt >= 1 && amt <= 15;
2951             /* We could also deal with amt==0 by copying argLO to
2952                the destination, but there's no need for that so far. */
2953             if (amtOK) {
2954                HReg srcHi = iselV128Expr(env, argHi);
2955                HReg srcLo = iselV128Expr(env, argLo);
2956                HReg dst = newVRegV(env);
2957               addInstr(env, ARM64Instr_VExtV(dst, srcLo, srcHi, amt));
2958                return dst;
2959             }
2960          }
2961          /* else fall out; this is unhandled */
2962       }
2963 
2964    } /* if (e->tag == Iex_Triop) */
2965 
2966   v128_expr_bad:
2967    ppIRExpr(e);
2968    vpanic("iselV128Expr_wrk");
2969 }
2970 
2971 
2972 /*---------------------------------------------------------*/
2973 /*--- ISEL: Floating point expressions (64 bit)         ---*/
2974 /*---------------------------------------------------------*/
2975 
2976 /* Compute a 64-bit floating point value into a register, the identity
2977    of which is returned.  As with iselIntExpr_R, the reg may be either
2978    real or virtual; in any case it must not be changed by subsequent
2979    code emitted by the caller.  */
2980 
iselDblExpr(ISelEnv * env,IRExpr * e)2981 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2982 {
2983    HReg r = iselDblExpr_wrk( env, e );
2984 #  if 0
2985    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2986 #  endif
2987    vassert(hregClass(r) == HRcFlt64);
2988    vassert(hregIsVirtual(r));
2989    return r;
2990 }
2991 
2992 /* DO NOT CALL THIS DIRECTLY */
iselDblExpr_wrk(ISelEnv * env,IRExpr * e)2993 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
2994 {
2995    IRType ty = typeOfIRExpr(env->type_env,e);
2996    vassert(e);
2997    vassert(ty == Ity_F64);
2998 
2999    if (e->tag == Iex_RdTmp) {
3000       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3001    }
3002 
3003    if (e->tag == Iex_Const) {
3004       IRConst* con = e->Iex.Const.con;
3005       if (con->tag == Ico_F64i) {
3006          HReg src = newVRegI(env);
3007          HReg dst = newVRegD(env);
3008          addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i));
3009          addInstr(env, ARM64Instr_VDfromX(dst, src));
3010          return dst;
3011       }
3012       if (con->tag == Ico_F64) {
3013          HReg src = newVRegI(env);
3014          HReg dst = newVRegD(env);
3015          union { Double d64; ULong u64; } u;
3016          vassert(sizeof(u) == 8);
3017          u.d64 = con->Ico.F64;
3018          addInstr(env, ARM64Instr_Imm64(src, u.u64));
3019          addInstr(env, ARM64Instr_VDfromX(dst, src));
3020          return dst;
3021       }
3022    }
3023 
3024    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3025       vassert(e->Iex.Load.ty == Ity_F64);
3026       HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
3027       HReg res  = newVRegD(env);
3028       addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0));
3029       return res;
3030    }
3031 
3032    if (e->tag == Iex_Get) {
3033       Int offs = e->Iex.Get.offset;
3034       if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) {
3035          HReg rD = newVRegD(env);
3036          HReg rN = get_baseblock_register();
3037          addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs));
3038          return rD;
3039       }
3040    }
3041 
3042    if (e->tag == Iex_Unop) {
3043       switch (e->Iex.Unop.op) {
3044          case Iop_NegF64: {
3045             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3046             HReg dst = newVRegD(env);
3047             addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src));
3048             return dst;
3049          }
3050          case Iop_AbsF64: {
3051             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3052             HReg dst = newVRegD(env);
3053             addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src));
3054             return dst;
3055          }
3056          case Iop_F32toF64: {
3057             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3058             HReg dst = newVRegD(env);
3059             addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src));
3060             return dst;
3061          }
3062          case Iop_F16toF64: {
3063             HReg src = iselF16Expr(env, e->Iex.Unop.arg);
3064             HReg dst = newVRegD(env);
3065             addInstr(env, ARM64Instr_VCvtHD(True/*hToD*/, dst, src));
3066             return dst;
3067          }
3068          case Iop_I32UtoF64:
3069          case Iop_I32StoF64: {
3070             /* Rounding mode is not involved here, since the
3071                conversion can always be done without loss of
3072                precision. */
3073             HReg src   = iselIntExpr_R(env, e->Iex.Unop.arg);
3074             HReg dst   = newVRegD(env);
3075             Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
3076             ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U;
3077             addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src));
3078             return dst;
3079          }
3080          default:
3081             break;
3082       }
3083    }
3084 
3085    if (e->tag == Iex_Binop) {
3086       switch (e->Iex.Binop.op) {
3087          case Iop_RoundF64toInt:
3088          case Iop_SqrtF64:
3089          case Iop_RecpExpF64: {
3090             HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3091             HReg dst = newVRegD(env);
3092             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3093             ARM64FpUnaryOp op = ARM64fpu_INVALID;
3094             switch (e->Iex.Binop.op) {
3095                case Iop_RoundF64toInt: op = ARM64fpu_RINT;  break;
3096                case Iop_SqrtF64:       op = ARM64fpu_SQRT;  break;
3097                case Iop_RecpExpF64:    op = ARM64fpu_RECPX; break;
3098                default: vassert(0);
3099             }
3100             addInstr(env, ARM64Instr_VUnaryD(op, dst, src));
3101             return dst;
3102          }
3103          case Iop_I64StoF64:
3104          case Iop_I64UtoF64: {
3105             ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64
3106                                    ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U;
3107             HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3108             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3109             HReg dstS = newVRegD(env);
3110             addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3111             return dstS;
3112          }
3113          default:
3114             break;
3115       }
3116    }
3117 
3118    if (e->tag == Iex_Triop) {
3119       IRTriop*     triop = e->Iex.Triop.details;
3120       ARM64FpBinOp dblop = ARM64fpb_INVALID;
3121       switch (triop->op) {
3122          case Iop_DivF64: dblop = ARM64fpb_DIV; break;
3123          case Iop_MulF64: dblop = ARM64fpb_MUL; break;
3124          case Iop_SubF64: dblop = ARM64fpb_SUB; break;
3125          case Iop_AddF64: dblop = ARM64fpb_ADD; break;
3126          default: break;
3127       }
3128       if (dblop != ARM64fpb_INVALID) {
3129          HReg argL = iselDblExpr(env, triop->arg2);
3130          HReg argR = iselDblExpr(env, triop->arg3);
3131          HReg dst  = newVRegD(env);
3132          set_FPCR_rounding_mode(env, triop->arg1);
3133          addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR));
3134          return dst;
3135       }
3136    }
3137 
3138    if (e->tag == Iex_ITE) {
3139       /* ITE(ccexpr, iftrue, iffalse) */
3140       ARM64CondCode cc;
3141       HReg r1  = iselDblExpr(env, e->Iex.ITE.iftrue);
3142       HReg r0  = iselDblExpr(env, e->Iex.ITE.iffalse);
3143       HReg dst = newVRegD(env);
3144       cc = iselCondCode(env, e->Iex.ITE.cond);
3145       addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, True/*64-bit*/));
3146       return dst;
3147    }
3148 
3149    ppIRExpr(e);
3150    vpanic("iselDblExpr_wrk");
3151 }
3152 
3153 
3154 /*---------------------------------------------------------*/
3155 /*--- ISEL: Floating point expressions (32 bit)         ---*/
3156 /*---------------------------------------------------------*/
3157 
3158 /* Compute a 32-bit floating point value into a register, the identity
3159    of which is returned.  As with iselIntExpr_R, the reg may be either
3160    real or virtual; in any case it must not be changed by subsequent
3161    code emitted by the caller.  Values are generated into HRcFlt64
3162    registers despite the values themselves being Ity_F32s. */
3163 
iselFltExpr(ISelEnv * env,IRExpr * e)3164 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
3165 {
3166    HReg r = iselFltExpr_wrk( env, e );
3167 #  if 0
3168    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3169 #  endif
3170    vassert(hregClass(r) == HRcFlt64);
3171    vassert(hregIsVirtual(r));
3172    return r;
3173 }
3174 
3175 /* DO NOT CALL THIS DIRECTLY */
iselFltExpr_wrk(ISelEnv * env,IRExpr * e)3176 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
3177 {
3178    IRType ty = typeOfIRExpr(env->type_env,e);
3179    vassert(e);
3180    vassert(ty == Ity_F32);
3181 
3182    if (e->tag == Iex_RdTmp) {
3183       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3184    }
3185 
3186    if (e->tag == Iex_Const) {
3187       /* This is something of a kludge.  Since a 32 bit floating point
3188          zero is just .. all zeroes, just create a 64 bit zero word
3189          and transfer it.  This avoids having to create a SfromW
3190          instruction for this specific case. */
3191       IRConst* con = e->Iex.Const.con;
3192       if (con->tag == Ico_F32i && con->Ico.F32i == 0) {
3193          HReg src = newVRegI(env);
3194          HReg dst = newVRegD(env);
3195          addInstr(env, ARM64Instr_Imm64(src, 0));
3196          addInstr(env, ARM64Instr_VDfromX(dst, src));
3197          return dst;
3198       }
3199       if (con->tag == Ico_F32) {
3200          HReg src = newVRegI(env);
3201          HReg dst = newVRegD(env);
3202          union { Float f32; UInt u32; } u;
3203          vassert(sizeof(u) == 4);
3204          u.f32 = con->Ico.F32;
3205          addInstr(env, ARM64Instr_Imm64(src, (ULong)u.u32));
3206          addInstr(env, ARM64Instr_VDfromX(dst, src));
3207          return dst;
3208       }
3209    }
3210 
3211    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3212       vassert(e->Iex.Load.ty == Ity_F32);
3213       HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
3214       HReg res  = newVRegD(env);
3215       addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, res, addr, 0));
3216       return res;
3217    }
3218 
3219    if (e->tag == Iex_Get) {
3220       Int offs = e->Iex.Get.offset;
3221       if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) {
3222          HReg rD = newVRegD(env);
3223          HReg rN = get_baseblock_register();
3224          addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs));
3225          return rD;
3226       }
3227    }
3228 
3229    if (e->tag == Iex_Unop) {
3230       switch (e->Iex.Unop.op) {
3231          case Iop_NegF32: {
3232             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3233             HReg dst = newVRegD(env);
3234             addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src));
3235             return dst;
3236          }
3237          case Iop_AbsF32: {
3238             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3239             HReg dst = newVRegD(env);
3240             addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src));
3241             return dst;
3242          }
3243          case Iop_F16toF32: {
3244             HReg src = iselF16Expr(env, e->Iex.Unop.arg);
3245             HReg dst = newVRegD(env);
3246             addInstr(env, ARM64Instr_VCvtHS(True/*hToS*/, dst, src));
3247             return dst;
3248          }
3249          default:
3250             break;
3251       }
3252    }
3253 
3254    if (e->tag == Iex_Binop) {
3255       switch (e->Iex.Binop.op) {
3256          case Iop_RoundF32toInt:
3257          case Iop_SqrtF32:
3258          case Iop_RecpExpF32: {
3259             HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
3260             HReg dst = newVRegD(env);
3261             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3262             ARM64FpUnaryOp op = ARM64fpu_INVALID;
3263             switch (e->Iex.Binop.op) {
3264                case Iop_RoundF32toInt: op = ARM64fpu_RINT;  break;
3265                case Iop_SqrtF32:       op = ARM64fpu_SQRT;  break;
3266                case Iop_RecpExpF32:    op = ARM64fpu_RECPX; break;
3267                default: vassert(0);
3268             }
3269             addInstr(env, ARM64Instr_VUnaryS(op, dst, src));
3270             return dst;
3271          }
3272          case Iop_F64toF32: {
3273             HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
3274             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3275             HReg dstS = newVRegD(env);
3276             addInstr(env, ARM64Instr_VCvtSD(False/*!sToD*/, dstS, srcD));
3277             return dstS;
3278          }
3279          case Iop_I32UtoF32:
3280          case Iop_I32StoF32:
3281          case Iop_I64UtoF32:
3282          case Iop_I64StoF32: {
3283             ARM64CvtOp cvt_op = ARM64cvt_INVALID;
3284             switch (e->Iex.Binop.op) {
3285                case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break;
3286                case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break;
3287                case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break;
3288                case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break;
3289                default: vassert(0);
3290             }
3291             HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3292             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3293             HReg dstS = newVRegD(env);
3294             addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3295             return dstS;
3296          }
3297          default:
3298             break;
3299       }
3300    }
3301 
3302    if (e->tag == Iex_Triop) {
3303       IRTriop*     triop = e->Iex.Triop.details;
3304       ARM64FpBinOp sglop = ARM64fpb_INVALID;
3305       switch (triop->op) {
3306          case Iop_DivF32: sglop = ARM64fpb_DIV; break;
3307          case Iop_MulF32: sglop = ARM64fpb_MUL; break;
3308          case Iop_SubF32: sglop = ARM64fpb_SUB; break;
3309          case Iop_AddF32: sglop = ARM64fpb_ADD; break;
3310          default: break;
3311       }
3312       if (sglop != ARM64fpb_INVALID) {
3313          HReg argL = iselFltExpr(env, triop->arg2);
3314          HReg argR = iselFltExpr(env, triop->arg3);
3315          HReg dst  = newVRegD(env);
3316          set_FPCR_rounding_mode(env, triop->arg1);
3317          addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR));
3318          return dst;
3319       }
3320    }
3321 
3322    if (e->tag == Iex_ITE) {
3323       /* ITE(ccexpr, iftrue, iffalse) */
3324       ARM64CondCode cc;
3325       HReg r1  = iselFltExpr(env, e->Iex.ITE.iftrue);
3326       HReg r0  = iselFltExpr(env, e->Iex.ITE.iffalse);
3327       HReg dst = newVRegD(env);
3328       cc = iselCondCode(env, e->Iex.ITE.cond);
3329       addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, False/*!64-bit*/));
3330       return dst;
3331    }
3332 
3333    ppIRExpr(e);
3334    vpanic("iselFltExpr_wrk");
3335 }
3336 
3337 
3338 /*---------------------------------------------------------*/
3339 /*--- ISEL: Floating point expressions (16 bit)         ---*/
3340 /*---------------------------------------------------------*/
3341 
3342 /* Compute a 16-bit floating point value into a register, the identity
3343    of which is returned.  As with iselIntExpr_R, the reg may be either
3344    real or virtual; in any case it must not be changed by subsequent
3345    code emitted by the caller.  Values are generated into HRcFlt64
3346    registers despite the values themselves being Ity_F16s. */
3347 
iselF16Expr(ISelEnv * env,IRExpr * e)3348 static HReg iselF16Expr ( ISelEnv* env, IRExpr* e )
3349 {
3350    HReg r = iselF16Expr_wrk( env, e );
3351 #  if 0
3352    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3353 #  endif
3354    vassert(hregClass(r) == HRcFlt64);
3355    vassert(hregIsVirtual(r));
3356    return r;
3357 }
3358 
3359 /* DO NOT CALL THIS DIRECTLY */
iselF16Expr_wrk(ISelEnv * env,IRExpr * e)3360 static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e )
3361 {
3362    IRType ty = typeOfIRExpr(env->type_env,e);
3363    vassert(e);
3364    vassert(ty == Ity_F16);
3365 
3366    if (e->tag == Iex_Get) {
3367       Int offs = e->Iex.Get.offset;
3368       if (offs >= 0 && offs < 8192 && 0 == (offs & 1)) {
3369          HReg rD = newVRegD(env);
3370          HReg rN = get_baseblock_register();
3371          addInstr(env, ARM64Instr_VLdStH(True/*isLoad*/, rD, rN, offs));
3372          return rD;
3373       }
3374    }
3375 
3376    if (e->tag == Iex_Binop) {
3377       switch (e->Iex.Binop.op) {
3378          case Iop_F32toF16: {
3379             HReg srcS = iselFltExpr(env, e->Iex.Binop.arg2);
3380             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3381             HReg dstH = newVRegD(env);
3382             addInstr(env, ARM64Instr_VCvtHS(False/*!hToS*/, dstH, srcS));
3383             return dstH;
3384          }
3385          case Iop_F64toF16: {
3386             HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
3387             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3388             HReg dstH = newVRegD(env);
3389             addInstr(env, ARM64Instr_VCvtHD(False/*!hToD*/, dstH, srcD));
3390             return dstH;
3391          }
3392          default:
3393             break;
3394       }
3395    }
3396 
3397    ppIRExpr(e);
3398    vpanic("iselF16Expr_wrk");
3399 }
3400 
3401 
3402 /*---------------------------------------------------------*/
3403 /*--- ISEL: Vector expressions (256 bit)                ---*/
3404 /*---------------------------------------------------------*/
3405 
iselV256Expr(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)3406 static void iselV256Expr ( /*OUT*/HReg* rHi, HReg* rLo,
3407                            ISelEnv* env, IRExpr* e )
3408 {
3409    iselV256Expr_wrk( rHi, rLo, env, e );
3410    vassert(hregClass(*rHi) == HRcVec128);
3411    vassert(hregClass(*rLo) == HRcVec128);
3412    vassert(hregIsVirtual(*rHi));
3413    vassert(hregIsVirtual(*rLo));
3414 }
3415 
3416 /* DO NOT CALL THIS DIRECTLY */
iselV256Expr_wrk(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)3417 static void iselV256Expr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
3418                                ISelEnv* env, IRExpr* e )
3419 {
3420    vassert(e);
3421    IRType ty = typeOfIRExpr(env->type_env,e);
3422    vassert(ty == Ity_V256);
3423 
3424    /* read 256-bit IRTemp */
3425    if (e->tag == Iex_RdTmp) {
3426       lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3427       return;
3428    }
3429 
3430    if (e->tag == Iex_Binop) {
3431       switch (e->Iex.Binop.op) {
3432          case Iop_V128HLtoV256: {
3433             *rHi = iselV128Expr(env, e->Iex.Binop.arg1);
3434             *rLo = iselV128Expr(env, e->Iex.Binop.arg2);
3435             return;
3436          }
3437          case Iop_QandSQsh64x2:
3438          case Iop_QandSQsh32x4:
3439          case Iop_QandSQsh16x8:
3440          case Iop_QandSQsh8x16:
3441          case Iop_QandUQsh64x2:
3442          case Iop_QandUQsh32x4:
3443          case Iop_QandUQsh16x8:
3444          case Iop_QandUQsh8x16:
3445          case Iop_QandSQRsh64x2:
3446          case Iop_QandSQRsh32x4:
3447          case Iop_QandSQRsh16x8:
3448          case Iop_QandSQRsh8x16:
3449          case Iop_QandUQRsh64x2:
3450          case Iop_QandUQRsh32x4:
3451          case Iop_QandUQRsh16x8:
3452          case Iop_QandUQRsh8x16:
3453          {
3454             HReg argL  = iselV128Expr(env, e->Iex.Binop.arg1);
3455             HReg argR  = iselV128Expr(env, e->Iex.Binop.arg2);
3456             HReg fpsr  = newVRegI(env);
3457             HReg resHi = newVRegV(env);
3458             HReg resLo = newVRegV(env);
3459             ARM64VecBinOp op = ARM64vecb_INVALID;
3460             switch (e->Iex.Binop.op) {
3461                case Iop_QandSQsh64x2:  op = ARM64vecb_SQSHL64x2;  break;
3462                case Iop_QandSQsh32x4:  op = ARM64vecb_SQSHL32x4;  break;
3463                case Iop_QandSQsh16x8:  op = ARM64vecb_SQSHL16x8;  break;
3464                case Iop_QandSQsh8x16:  op = ARM64vecb_SQSHL8x16;  break;
3465                case Iop_QandUQsh64x2:  op = ARM64vecb_UQSHL64x2;  break;
3466                case Iop_QandUQsh32x4:  op = ARM64vecb_UQSHL32x4;  break;
3467                case Iop_QandUQsh16x8:  op = ARM64vecb_UQSHL16x8;  break;
3468                case Iop_QandUQsh8x16:  op = ARM64vecb_UQSHL8x16;  break;
3469                case Iop_QandSQRsh64x2: op = ARM64vecb_SQRSHL64x2; break;
3470                case Iop_QandSQRsh32x4: op = ARM64vecb_SQRSHL32x4; break;
3471                case Iop_QandSQRsh16x8: op = ARM64vecb_SQRSHL16x8; break;
3472                case Iop_QandSQRsh8x16: op = ARM64vecb_SQRSHL8x16; break;
3473                case Iop_QandUQRsh64x2: op = ARM64vecb_UQRSHL64x2; break;
3474                case Iop_QandUQRsh32x4: op = ARM64vecb_UQRSHL32x4; break;
3475                case Iop_QandUQRsh16x8: op = ARM64vecb_UQRSHL16x8; break;
3476                case Iop_QandUQRsh8x16: op = ARM64vecb_UQRSHL8x16; break;
3477                default: vassert(0);
3478             }
3479             /* Clear FPSR.Q, do the operation, and return both its result
3480                and the new value of FPSR.Q.  We can simply zero out FPSR
3481                since all the other bits have no relevance in VEX generated
3482                code. */
3483             addInstr(env, ARM64Instr_Imm64(fpsr, 0));
3484             addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
3485             addInstr(env, ARM64Instr_VBinV(op, resLo, argL, argR));
3486             addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
3487             addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
3488                                                        ARM64sh_SHR));
3489             ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
3490             vassert(ril_one);
3491             addInstr(env, ARM64Instr_Logic(fpsr, fpsr, ril_one, ARM64lo_AND));
3492             /* Now we have: the main (shift) result in |resLo|, and the
3493                Q bit at the bottom of |fpsr|. */
3494             addInstr(env, ARM64Instr_VQfromX(resHi, fpsr));
3495             *rHi = resHi;
3496             *rLo = resLo;
3497             return;
3498          }
3499 
3500          /* ... */
3501          default:
3502             break;
3503       } /* switch on the binop */
3504    } /* if (e->tag == Iex_Binop) */
3505 
3506    ppIRExpr(e);
3507    vpanic("iselV256Expr_wrk");
3508 }
3509 
3510 
3511 /*---------------------------------------------------------*/
3512 /*--- ISEL: Statements                                  ---*/
3513 /*---------------------------------------------------------*/
3514 
iselStmt(ISelEnv * env,IRStmt * stmt)3515 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3516 {
3517    if (vex_traceflags & VEX_TRACE_VCODE) {
3518       vex_printf("\n-- ");
3519       ppIRStmt(stmt);
3520       vex_printf("\n");
3521    }
3522    switch (stmt->tag) {
3523 
3524    /* --------- STORE --------- */
3525    /* little-endian write to memory */
3526    case Ist_Store: {
3527       IRType    tya  = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3528       IRType    tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3529       IREndness end  = stmt->Ist.Store.end;
3530 
3531       if (tya != Ity_I64 || end != Iend_LE)
3532          goto stmt_fail;
3533 
3534       if (tyd == Ity_I64) {
3535          HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3536          ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3537          addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
3538          return;
3539       }
3540       if (tyd == Ity_I32) {
3541          HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3542          ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3543          addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
3544          return;
3545       }
3546       if (tyd == Ity_I16) {
3547          HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3548          ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3549          addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
3550          return;
3551       }
3552       if (tyd == Ity_I8) {
3553          HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3554          ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3555          addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
3556          return;
3557       }
3558       if (tyd == Ity_V128) {
3559          HReg qD   = iselV128Expr(env, stmt->Ist.Store.data);
3560          HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3561          addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
3562          return;
3563       }
3564       if (tyd == Ity_F64) {
3565          HReg dD   = iselDblExpr(env, stmt->Ist.Store.data);
3566          HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3567          addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0));
3568          return;
3569       }
3570       if (tyd == Ity_F32) {
3571          HReg sD   = iselFltExpr(env, stmt->Ist.Store.data);
3572          HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3573          addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, addr, 0));
3574          return;
3575       }
3576       break;
3577    }
3578 
3579    /* --------- PUT --------- */
3580    /* write guest state, fixed offset */
3581    case Ist_Put: {
3582       IRType tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3583       UInt   offs = (UInt)stmt->Ist.Put.offset;
3584       if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) {
3585          HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3586          ARM64AMode* am = mk_baseblock_64bit_access_amode(offs);
3587          addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
3588          return;
3589       }
3590       if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) {
3591          HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3592          ARM64AMode* am = mk_baseblock_32bit_access_amode(offs);
3593          addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
3594          return;
3595       }
3596       if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) {
3597          HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3598          ARM64AMode* am = mk_baseblock_16bit_access_amode(offs);
3599          addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
3600          return;
3601       }
3602       if (tyd == Ity_I8 && offs < (1<<12)) {
3603          HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3604          ARM64AMode* am = mk_baseblock_8bit_access_amode(offs);
3605          addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
3606          return;
3607       }
3608       if (tyd == Ity_V128 && offs < (1<<12)) {
3609          HReg qD   = iselV128Expr(env, stmt->Ist.Put.data);
3610          HReg addr = mk_baseblock_128bit_access_addr(env, offs);
3611          addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
3612          return;
3613       }
3614       if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) {
3615          HReg dD   = iselDblExpr(env, stmt->Ist.Put.data);
3616          HReg bbp  = get_baseblock_register();
3617          addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs));
3618          return;
3619       }
3620       if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) {
3621          HReg sD   = iselFltExpr(env, stmt->Ist.Put.data);
3622          HReg bbp  = get_baseblock_register();
3623          addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, bbp, offs));
3624          return;
3625       }
3626       if (tyd == Ity_F16 && 0 == (offs & 1) && offs < (2<<12)) {
3627          HReg hD   = iselF16Expr(env, stmt->Ist.Put.data);
3628          HReg bbp  = get_baseblock_register();
3629          addInstr(env, ARM64Instr_VLdStH(False/*!isLoad*/, hD, bbp, offs));
3630          return;
3631       }
3632 
3633       break;
3634    }
3635 
3636    /* --------- TMP --------- */
3637    /* assign value to temporary */
3638    case Ist_WrTmp: {
3639       IRTemp tmp = stmt->Ist.WrTmp.tmp;
3640       IRType ty  = typeOfIRTemp(env->type_env, tmp);
3641 
3642       if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
3643          /* We could do a lot better here.  But for the time being: */
3644          HReg dst = lookupIRTemp(env, tmp);
3645          HReg rD  = iselIntExpr_R(env, stmt->Ist.WrTmp.data);
3646          addInstr(env, ARM64Instr_MovI(dst, rD));
3647          return;
3648       }
3649       if (ty == Ity_I1) {
3650          /* Here, we are generating a I1 value into a 64 bit register.
3651             Make sure the value in the register is only zero or one,
3652             but no other.  This allows optimisation of the
3653             1Uto64(tmp:I1) case, by making it simply a copy of the
3654             register holding 'tmp'.  The point being that the value in
3655             the register holding 'tmp' can only have been created
3656             here.  LATER: that seems dangerous; safer to do 'tmp & 1'
3657             in that case.  Also, could do this just with a single CINC
3658             insn. */
3659          /* CLONE-01 */
3660          HReg zero = newVRegI(env);
3661          HReg one  = newVRegI(env);
3662          HReg dst  = lookupIRTemp(env, tmp);
3663          addInstr(env, ARM64Instr_Imm64(zero, 0));
3664          addInstr(env, ARM64Instr_Imm64(one,  1));
3665          ARM64CondCode cc = iselCondCode(env, stmt->Ist.WrTmp.data);
3666          addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
3667          return;
3668       }
3669       if (ty == Ity_F64) {
3670          HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
3671          HReg dst = lookupIRTemp(env, tmp);
3672          addInstr(env, ARM64Instr_VMov(8, dst, src));
3673          return;
3674       }
3675       if (ty == Ity_F32) {
3676          HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
3677          HReg dst = lookupIRTemp(env, tmp);
3678          addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
3679          return;
3680       }
3681       if (ty == Ity_V128) {
3682          HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data);
3683          HReg dst = lookupIRTemp(env, tmp);
3684          addInstr(env, ARM64Instr_VMov(16, dst, src));
3685          return;
3686       }
3687       if (ty == Ity_V256) {
3688          HReg srcHi, srcLo, dstHi, dstLo;
3689          iselV256Expr(&srcHi,&srcLo, env, stmt->Ist.WrTmp.data);
3690          lookupIRTempPair( &dstHi, &dstLo, env, tmp);
3691          addInstr(env, ARM64Instr_VMov(16, dstHi, srcHi));
3692          addInstr(env, ARM64Instr_VMov(16, dstLo, srcLo));
3693          return;
3694       }
3695       break;
3696    }
3697 
3698    /* --------- Call to DIRTY helper --------- */
3699    /* call complex ("dirty") helper function */
3700    case Ist_Dirty: {
3701       IRDirty* d = stmt->Ist.Dirty.details;
3702 
3703       /* Figure out the return type, if any. */
3704       IRType retty = Ity_INVALID;
3705       if (d->tmp != IRTemp_INVALID)
3706          retty = typeOfIRTemp(env->type_env, d->tmp);
3707 
3708       Bool retty_ok = False;
3709       switch (retty) {
3710          case Ity_INVALID: /* function doesn't return anything */
3711          case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
3712          case Ity_V128:
3713             retty_ok = True; break;
3714          default:
3715             break;
3716       }
3717       if (!retty_ok)
3718          break; /* will go to stmt_fail: */
3719 
3720       /* Marshal args, do the call, and set the return value to 0x555..555
3721          if this is a conditional call that returns a value and the
3722          call is skipped. */
3723       UInt   addToSp = 0;
3724       RetLoc rloc    = mk_RetLoc_INVALID();
3725       doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
3726       vassert(is_sane_RetLoc(rloc));
3727 
3728       /* Now figure out what to do with the returned value, if any. */
3729       switch (retty) {
3730          case Ity_INVALID: {
3731             /* No return value.  Nothing to do. */
3732             vassert(d->tmp == IRTemp_INVALID);
3733             vassert(rloc.pri == RLPri_None);
3734             vassert(addToSp == 0);
3735             return;
3736          }
3737          case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
3738             vassert(rloc.pri == RLPri_Int);
3739             vassert(addToSp == 0);
3740             /* The returned value is in x0.  Park it in the register
3741                associated with tmp. */
3742             HReg dst = lookupIRTemp(env, d->tmp);
3743             addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) );
3744             return;
3745          }
3746          case Ity_V128: {
3747             /* The returned value is on the stack, and *retloc tells
3748                us where.  Fish it off the stack and then move the
3749                stack pointer upwards to clear it, as directed by
3750                doHelperCall. */
3751             vassert(rloc.pri == RLPri_V128SpRel);
3752             vassert(rloc.spOff < 256); // stay sane
3753             vassert(addToSp >= 16); // ditto
3754             vassert(addToSp < 256); // ditto
3755             HReg dst = lookupIRTemp(env, d->tmp);
3756             HReg tmp = newVRegI(env); // the address of the returned value
3757             addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP
3758             addInstr(env, ARM64Instr_Arith(tmp, tmp,
3759                                            ARM64RIA_I12((UShort)rloc.spOff, 0),
3760                                            True/*isAdd*/ ));
3761             addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp));
3762             addInstr(env, ARM64Instr_AddToSP(addToSp));
3763             return;
3764          }
3765          default:
3766             /*NOTREACHED*/
3767             vassert(0);
3768       }
3769       break;
3770    }
3771 
3772    /* --------- Load Linked and Store Conditional --------- */
3773    case Ist_LLSC: {
3774       if (stmt->Ist.LLSC.storedata == NULL) {
3775          /* LL */
3776          IRTemp res = stmt->Ist.LLSC.result;
3777          IRType ty  = typeOfIRTemp(env->type_env, res);
3778          if (ty == Ity_I64 || ty == Ity_I32
3779              || ty == Ity_I16 || ty == Ity_I8) {
3780             Int  szB   = 0;
3781             HReg r_dst = lookupIRTemp(env, res);
3782             HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
3783             switch (ty) {
3784                case Ity_I8:  szB = 1; break;
3785                case Ity_I16: szB = 2; break;
3786                case Ity_I32: szB = 4; break;
3787                case Ity_I64: szB = 8; break;
3788                default:      vassert(0);
3789             }
3790             addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
3791             addInstr(env, ARM64Instr_LdrEX(szB));
3792             addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2()));
3793             return;
3794          }
3795          goto stmt_fail;
3796       } else {
3797          /* SC */
3798          IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
3799          if (tyd == Ity_I64 || tyd == Ity_I32
3800              || tyd == Ity_I16 || tyd == Ity_I8) {
3801             Int  szB = 0;
3802             HReg rD  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
3803             HReg rA  = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
3804             switch (tyd) {
3805                case Ity_I8:  szB = 1; break;
3806                case Ity_I16: szB = 2; break;
3807                case Ity_I32: szB = 4; break;
3808                case Ity_I64: szB = 8; break;
3809                default:      vassert(0);
3810             }
3811             addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD));
3812             addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
3813             addInstr(env, ARM64Instr_StrEX(szB));
3814          } else {
3815             goto stmt_fail;
3816          }
3817          /* now r0 is 1 if failed, 0 if success.  Change to IR
3818             conventions (0 is fail, 1 is success).  Also transfer
3819             result to r_res. */
3820          IRTemp    res   = stmt->Ist.LLSC.result;
3821          IRType    ty    = typeOfIRTemp(env->type_env, res);
3822          HReg      r_res = lookupIRTemp(env, res);
3823          ARM64RIL* one   = mb_mkARM64RIL_I(1);
3824          vassert(ty == Ity_I1);
3825          vassert(one);
3826          addInstr(env, ARM64Instr_Logic(r_res, hregARM64_X0(), one,
3827                                         ARM64lo_XOR));
3828          /* And be conservative -- mask off all but the lowest bit. */
3829          addInstr(env, ARM64Instr_Logic(r_res, r_res, one,
3830                                         ARM64lo_AND));
3831          return;
3832       }
3833       break;
3834    }
3835 
3836    /* --------- MEM FENCE --------- */
3837    case Ist_MBE:
3838       switch (stmt->Ist.MBE.event) {
3839          case Imbe_Fence:
3840             addInstr(env, ARM64Instr_MFence());
3841             return;
3842          default:
3843             break;
3844       }
3845       break;
3846 
3847    /* --------- INSTR MARK --------- */
3848    /* Doesn't generate any executable code ... */
3849    case Ist_IMark:
3850        return;
3851 
3852    /* --------- ABI HINT --------- */
3853    /* These have no meaning (denotation in the IR) and so we ignore
3854       them ... if any actually made it this far. */
3855    case Ist_AbiHint:
3856        return;
3857 
3858    /* --------- NO-OP --------- */
3859    case Ist_NoOp:
3860        return;
3861 
3862    /* --------- EXIT --------- */
3863    case Ist_Exit: {
3864       if (stmt->Ist.Exit.dst->tag != Ico_U64)
3865          vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value");
3866 
3867       ARM64CondCode cc
3868          = iselCondCode(env, stmt->Ist.Exit.guard);
3869       ARM64AMode* amPC
3870          = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP);
3871 
3872       /* Case: boring transfer to known address */
3873       if (stmt->Ist.Exit.jk == Ijk_Boring
3874           /*ATC || stmt->Ist.Exit.jk == Ijk_Call */
3875           /*ATC || stmt->Ist.Exit.jk == Ijk_Ret */ ) {
3876          if (env->chainingAllowed) {
3877             /* .. almost always true .. */
3878             /* Skip the event check at the dst if this is a forwards
3879                edge. */
3880             Bool toFastEP
3881                = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
3882             if (0) vex_printf("%s", toFastEP ? "Y" : ",");
3883             addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
3884                                              amPC, cc, toFastEP));
3885          } else {
3886             /* .. very occasionally .. */
3887             /* We can't use chaining, so ask for an assisted transfer,
3888                as that's the only alternative that is allowable. */
3889             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
3890             addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring));
3891          }
3892          return;
3893       }
3894 
3895       /* Do we ever expect to see any other kind? */
3896       goto stmt_fail;
3897    }
3898 
3899    default: break;
3900    }
3901   stmt_fail:
3902    ppIRStmt(stmt);
3903    vpanic("iselStmt");
3904 }
3905 
3906 
3907 /*---------------------------------------------------------*/
3908 /*--- ISEL: Basic block terminators (Nexts)             ---*/
3909 /*---------------------------------------------------------*/
3910 
iselNext(ISelEnv * env,IRExpr * next,IRJumpKind jk,Int offsIP)3911 static void iselNext ( ISelEnv* env,
3912                        IRExpr* next, IRJumpKind jk, Int offsIP )
3913 {
3914    if (vex_traceflags & VEX_TRACE_VCODE) {
3915       vex_printf( "\n-- PUT(%d) = ", offsIP);
3916       ppIRExpr( next );
3917       vex_printf( "; exit-");
3918       ppIRJumpKind(jk);
3919       vex_printf( "\n");
3920    }
3921 
3922    /* Case: boring transfer to known address */
3923    if (next->tag == Iex_Const) {
3924       IRConst* cdst = next->Iex.Const.con;
3925       vassert(cdst->tag == Ico_U64);
3926       if (jk == Ijk_Boring || jk == Ijk_Call) {
3927          /* Boring transfer to known address */
3928          ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
3929          if (env->chainingAllowed) {
3930             /* .. almost always true .. */
3931             /* Skip the event check at the dst if this is a forwards
3932                edge. */
3933             Bool toFastEP
3934                = ((Addr64)cdst->Ico.U64) > env->max_ga;
3935             if (0) vex_printf("%s", toFastEP ? "X" : ".");
3936             addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64,
3937                                              amPC, ARM64cc_AL,
3938                                              toFastEP));
3939          } else {
3940             /* .. very occasionally .. */
3941             /* We can't use chaining, so ask for an assisted transfer,
3942                as that's the only alternative that is allowable. */
3943             HReg r = iselIntExpr_R(env, next);
3944             addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
3945                                                Ijk_Boring));
3946          }
3947          return;
3948       }
3949    }
3950 
3951    /* Case: call/return (==boring) transfer to any address */
3952    switch (jk) {
3953       case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
3954          HReg        r    = iselIntExpr_R(env, next);
3955          ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
3956          if (env->chainingAllowed) {
3957             addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL));
3958          } else {
3959             addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
3960                                                Ijk_Boring));
3961          }
3962          return;
3963       }
3964       default:
3965          break;
3966    }
3967 
3968    /* Case: assisted transfer to arbitrary address */
3969    switch (jk) {
3970       /* Keep this list in sync with that for Ist_Exit above */
3971       case Ijk_ClientReq:
3972       case Ijk_NoDecode:
3973       case Ijk_NoRedir:
3974       case Ijk_Sys_syscall:
3975       case Ijk_InvalICache:
3976       case Ijk_FlushDCache:
3977       case Ijk_SigTRAP:
3978       {
3979          HReg        r    = iselIntExpr_R(env, next);
3980          ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
3981          addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk));
3982          return;
3983       }
3984       default:
3985          break;
3986    }
3987 
3988    vex_printf( "\n-- PUT(%d) = ", offsIP);
3989    ppIRExpr( next );
3990    vex_printf( "; exit-");
3991    ppIRJumpKind(jk);
3992    vex_printf( "\n");
3993    vassert(0); // are we expecting any other kind?
3994 }
3995 
3996 
3997 /*---------------------------------------------------------*/
3998 /*--- Insn selector top-level                           ---*/
3999 /*---------------------------------------------------------*/
4000 
4001 /* Translate an entire SB to arm64 code. */
4002 
iselSB_ARM64(const IRSB * bb,VexArch arch_host,const VexArchInfo * archinfo_host,const VexAbiInfo * vbi,Int offs_Host_EvC_Counter,Int offs_Host_EvC_FailAddr,Bool chainingAllowed,Bool addProfInc,Addr max_ga)4003 HInstrArray* iselSB_ARM64 ( const IRSB* bb,
4004                             VexArch      arch_host,
4005                             const VexArchInfo* archinfo_host,
4006                             const VexAbiInfo*  vbi/*UNUSED*/,
4007                             Int offs_Host_EvC_Counter,
4008                             Int offs_Host_EvC_FailAddr,
4009                             Bool chainingAllowed,
4010                             Bool addProfInc,
4011                             Addr max_ga )
4012 {
4013    Int        i, j;
4014    HReg       hreg, hregHI;
4015    ISelEnv*   env;
4016    UInt       hwcaps_host = archinfo_host->hwcaps;
4017    ARM64AMode *amCounter, *amFailAddr;
4018 
4019    /* sanity ... */
4020    vassert(arch_host == VexArchARM64);
4021 
4022    /* Check that the host's endianness is as expected. */
4023    vassert(archinfo_host->endness == VexEndnessLE);
4024 
4025    /* guard against unexpected space regressions */
4026    vassert(sizeof(ARM64Instr) <= 32);
4027 
4028    /* Make up an initial environment to use. */
4029    env = LibVEX_Alloc_inline(sizeof(ISelEnv));
4030    env->vreg_ctr = 0;
4031 
4032    /* Set up output code array. */
4033    env->code = newHInstrArray();
4034 
4035    /* Copy BB's type env. */
4036    env->type_env = bb->tyenv;
4037 
4038    /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
4039       change as we go along. */
4040    env->n_vregmap = bb->tyenv->types_used;
4041    env->vregmap   = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4042    env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4043 
4044    /* and finally ... */
4045    env->chainingAllowed = chainingAllowed;
4046    env->hwcaps          = hwcaps_host;
4047    env->previous_rm     = NULL;
4048    env->max_ga          = max_ga;
4049 
4050    /* For each IR temporary, allocate a suitably-kinded virtual
4051       register. */
4052    j = 0;
4053    for (i = 0; i < env->n_vregmap; i++) {
4054       hregHI = hreg = INVALID_HREG;
4055       switch (bb->tyenv->types[i]) {
4056          case Ity_I1:
4057          case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
4058             hreg = mkHReg(True, HRcInt64, 0, j++);
4059             break;
4060          case Ity_I128:
4061             hreg   = mkHReg(True, HRcInt64, 0, j++);
4062             hregHI = mkHReg(True, HRcInt64, 0, j++);
4063             break;
4064          case Ity_F16: // we'll use HRcFlt64 regs for F16 too
4065          case Ity_F32: // we'll use HRcFlt64 regs for F32 too
4066          case Ity_F64:
4067             hreg = mkHReg(True, HRcFlt64, 0, j++);
4068             break;
4069          case Ity_V128:
4070             hreg = mkHReg(True, HRcVec128, 0, j++);
4071             break;
4072          case Ity_V256:
4073             hreg   = mkHReg(True, HRcVec128, 0, j++);
4074             hregHI = mkHReg(True, HRcVec128, 0, j++);
4075             break;
4076          default:
4077             ppIRType(bb->tyenv->types[i]);
4078             vpanic("iselBB(arm64): IRTemp type");
4079       }
4080       env->vregmap[i]   = hreg;
4081       env->vregmapHI[i] = hregHI;
4082    }
4083    env->vreg_ctr = j;
4084 
4085    /* The very first instruction must be an event check. */
4086    amCounter  = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter);
4087    amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr);
4088    addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr));
4089 
4090    /* Possibly a block counter increment (for profiling).  At this
4091       point we don't know the address of the counter, so just pretend
4092       it is zero.  It will have to be patched later, but before this
4093       translation is used, by a call to LibVEX_patchProfCtr. */
4094    if (addProfInc) {
4095       addInstr(env, ARM64Instr_ProfInc());
4096    }
4097 
4098    /* Ok, finally we can iterate over the statements. */
4099    for (i = 0; i < bb->stmts_used; i++)
4100       iselStmt(env, bb->stmts[i]);
4101 
4102    iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
4103 
4104    /* record the number of vregs we used. */
4105    env->code->n_vregs = env->vreg_ctr;
4106    return env->code;
4107 }
4108 
4109 
4110 /*---------------------------------------------------------------*/
4111 /*--- end                                   host_arm64_isel.c ---*/
4112 /*---------------------------------------------------------------*/
4113