1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm64_isel.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2013-2013 OpenWorks
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 #include "libvex_basictypes.h"
32 #include "libvex_ir.h"
33 #include "libvex.h"
34 #include "ir_match.h"
35
36 #include "main_util.h"
37 #include "main_globals.h"
38 #include "host_generic_regs.h"
39 #include "host_generic_simd64.h" // for 32-bit SIMD helpers
40 #include "host_arm64_defs.h"
41
42
43 /*---------------------------------------------------------*/
44 /*--- ISelEnv ---*/
45 /*---------------------------------------------------------*/
46
47 /* This carries around:
48
49 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
50 might encounter. This is computed before insn selection starts,
51 and does not change.
52
53 - A mapping from IRTemp to HReg. This tells the insn selector
54 which virtual register is associated with each IRTemp temporary.
55 This is computed before insn selection starts, and does not
56 change. We expect this mapping to map precisely the same set of
57 IRTemps as the type mapping does.
58
59 |vregmap| holds the primary register for the IRTemp.
60 |vregmapHI| is only used for 128-bit integer-typed
61 IRTemps. It holds the identity of a second
62 64-bit virtual HReg, which holds the high half
63 of the value.
64
65 - The code array, that is, the insns selected so far.
66
67 - A counter, for generating new virtual registers.
68
69 - The host hardware capabilities word. This is set at the start
70 and does not change.
71
72 - A Bool for indicating whether we may generate chain-me
73 instructions for control flow transfers, or whether we must use
74 XAssisted.
75
76 - The maximum guest address of any guest insn in this block.
77 Actually, the address of the highest-addressed byte from any insn
78 in this block. Is set at the start and does not change. This is
79 used for detecting jumps which are definitely forward-edges from
80 this block, and therefore can be made (chained) to the fast entry
81 point of the destination, thereby avoiding the destination's
82 event check.
83
84 - An IRExpr*, which may be NULL, holding the IR expression (an
85 IRRoundingMode-encoded value) to which the FPU's rounding mode
86 was most recently set. Setting to NULL is always safe. Used to
87 avoid redundant settings of the FPU's rounding mode, as
88 described in set_FPCR_rounding_mode below.
89
90 Note, this is all (well, mostly) host-independent.
91 */
92
93 typedef
94 struct {
95 /* Constant -- are set at the start and do not change. */
96 IRTypeEnv* type_env;
97
98 HReg* vregmap;
99 HReg* vregmapHI;
100 Int n_vregmap;
101
102 UInt hwcaps;
103
104 Bool chainingAllowed;
105 Addr64 max_ga;
106
107 /* These are modified as we go along. */
108 HInstrArray* code;
109 Int vreg_ctr;
110
111 IRExpr* previous_rm;
112 }
113 ISelEnv;
114
lookupIRTemp(ISelEnv * env,IRTemp tmp)115 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
116 {
117 vassert(tmp >= 0);
118 vassert(tmp < env->n_vregmap);
119 return env->vregmap[tmp];
120 }
121
lookupIRTempPair(HReg * vrHI,HReg * vrLO,ISelEnv * env,IRTemp tmp)122 static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
123 ISelEnv* env, IRTemp tmp )
124 {
125 vassert(tmp >= 0);
126 vassert(tmp < env->n_vregmap);
127 vassert(! hregIsInvalid(env->vregmapHI[tmp]));
128 *vrLO = env->vregmap[tmp];
129 *vrHI = env->vregmapHI[tmp];
130 }
131
addInstr(ISelEnv * env,ARM64Instr * instr)132 static void addInstr ( ISelEnv* env, ARM64Instr* instr )
133 {
134 addHInstr(env->code, instr);
135 if (vex_traceflags & VEX_TRACE_VCODE) {
136 ppARM64Instr(instr);
137 vex_printf("\n");
138 }
139 }
140
newVRegI(ISelEnv * env)141 static HReg newVRegI ( ISelEnv* env )
142 {
143 HReg reg = mkHReg(True/*virtual reg*/, HRcInt64, 0, env->vreg_ctr);
144 env->vreg_ctr++;
145 return reg;
146 }
147
newVRegD(ISelEnv * env)148 static HReg newVRegD ( ISelEnv* env )
149 {
150 HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0, env->vreg_ctr);
151 env->vreg_ctr++;
152 return reg;
153 }
154
newVRegV(ISelEnv * env)155 static HReg newVRegV ( ISelEnv* env )
156 {
157 HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0, env->vreg_ctr);
158 env->vreg_ctr++;
159 return reg;
160 }
161
162
163 /*---------------------------------------------------------*/
164 /*--- ISEL: Forward declarations ---*/
165 /*---------------------------------------------------------*/
166
167 /* These are organised as iselXXX and iselXXX_wrk pairs. The
168 iselXXX_wrk do the real work, but are not to be called directly.
169 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
170 checks that all returned registers are virtual. You should not
171 call the _wrk version directly.
172
173 Because some forms of ARM64 memory amodes are implicitly scaled by
174 the access size, iselIntExpr_AMode takes an IRType which tells it
175 the type of the access for which the amode is to be used. This
176 type needs to be correct, else you'll get incorrect code.
177 */
178 static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env,
179 IRExpr* e, IRType dty );
180 static ARM64AMode* iselIntExpr_AMode ( ISelEnv* env,
181 IRExpr* e, IRType dty );
182
183 static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e );
184 static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e );
185
186 static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e );
187 static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e );
188
189 static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e );
190 static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e );
191
192 static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
193 static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
194
195 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
196 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
197
198 static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
199 ISelEnv* env, IRExpr* e );
200 static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo,
201 ISelEnv* env, IRExpr* e );
202
203 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
204 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
205
206 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
207 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
208
209 static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e );
210 static HReg iselF16Expr ( ISelEnv* env, IRExpr* e );
211
212 static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e );
213 static HReg iselV128Expr ( ISelEnv* env, IRExpr* e );
214
215 static void iselV256Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
216 ISelEnv* env, IRExpr* e );
217 static void iselV256Expr ( /*OUT*/HReg* rHi, HReg* rLo,
218 ISelEnv* env, IRExpr* e );
219
220 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 );
221
222
223 /*---------------------------------------------------------*/
224 /*--- ISEL: Misc helpers ---*/
225 /*---------------------------------------------------------*/
226
227 /* Generate an amode suitable for a 64-bit sized access relative to
228 the baseblock register (X21). This generates an RI12 amode, which
229 means its scaled by the access size, which is why the access size
230 -- 64 bit -- is stated explicitly here. Consequently |off| needs
231 to be divisible by 8. */
mk_baseblock_64bit_access_amode(UInt off)232 static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off )
233 {
234 vassert(off < (8 << 12)); /* otherwise it's unrepresentable */
235 vassert((off & 7) == 0); /* ditto */
236 return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/);
237 }
238
239 /* Ditto, for 32 bit accesses. */
mk_baseblock_32bit_access_amode(UInt off)240 static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off )
241 {
242 vassert(off < (4 << 12)); /* otherwise it's unrepresentable */
243 vassert((off & 3) == 0); /* ditto */
244 return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/);
245 }
246
247 /* Ditto, for 16 bit accesses. */
mk_baseblock_16bit_access_amode(UInt off)248 static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off )
249 {
250 vassert(off < (2 << 12)); /* otherwise it's unrepresentable */
251 vassert((off & 1) == 0); /* ditto */
252 return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/);
253 }
254
255 /* Ditto, for 8 bit accesses. */
mk_baseblock_8bit_access_amode(UInt off)256 static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off )
257 {
258 vassert(off < (1 << 12)); /* otherwise it's unrepresentable */
259 return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/);
260 }
261
mk_baseblock_128bit_access_addr(ISelEnv * env,UInt off)262 static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off )
263 {
264 vassert(off < (1<<12));
265 HReg r = newVRegI(env);
266 addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(),
267 ARM64RIA_I12(off,0), True/*isAdd*/));
268 return r;
269 }
270
get_baseblock_register(void)271 static HReg get_baseblock_register ( void )
272 {
273 return hregARM64_X21();
274 }
275
276 /* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in
277 a new register, and return the new register. */
widen_z_32_to_64(ISelEnv * env,HReg src)278 static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src )
279 {
280 HReg dst = newVRegI(env);
281 ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */
282 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
283 return dst;
284 }
285
286 /* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in
287 a new register, and return the new register. */
widen_s_16_to_64(ISelEnv * env,HReg src)288 static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src )
289 {
290 HReg dst = newVRegI(env);
291 ARM64RI6* n48 = ARM64RI6_I6(48);
292 addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
293 addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR));
294 return dst;
295 }
296
297 /* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in
298 a new register, and return the new register. */
widen_z_16_to_64(ISelEnv * env,HReg src)299 static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src )
300 {
301 HReg dst = newVRegI(env);
302 ARM64RI6* n48 = ARM64RI6_I6(48);
303 addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
304 addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SHR));
305 return dst;
306 }
307
308 /* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in
309 a new register, and return the new register. */
widen_s_32_to_64(ISelEnv * env,HReg src)310 static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src )
311 {
312 HReg dst = newVRegI(env);
313 ARM64RI6* n32 = ARM64RI6_I6(32);
314 addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL));
315 addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR));
316 return dst;
317 }
318
319 /* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in
320 a new register, and return the new register. */
widen_s_8_to_64(ISelEnv * env,HReg src)321 static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src )
322 {
323 HReg dst = newVRegI(env);
324 ARM64RI6* n56 = ARM64RI6_I6(56);
325 addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
326 addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR));
327 return dst;
328 }
329
widen_z_8_to_64(ISelEnv * env,HReg src)330 static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src )
331 {
332 HReg dst = newVRegI(env);
333 ARM64RI6* n56 = ARM64RI6_I6(56);
334 addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
335 addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SHR));
336 return dst;
337 }
338
339 /* Is this IRExpr_Const(IRConst_U64(0)) ? */
isZeroU64(IRExpr * e)340 static Bool isZeroU64 ( IRExpr* e ) {
341 if (e->tag != Iex_Const) return False;
342 IRConst* con = e->Iex.Const.con;
343 vassert(con->tag == Ico_U64);
344 return con->Ico.U64 == 0;
345 }
346
347
348 /*---------------------------------------------------------*/
349 /*--- ISEL: FP rounding mode helpers ---*/
350 /*---------------------------------------------------------*/
351
352 /* Set the FP rounding mode: 'mode' is an I32-typed expression
353 denoting a value in the range 0 .. 3, indicating a round mode
354 encoded as per type IRRoundingMode -- the first four values only
355 (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO). Set the ARM64
356 FSCR to have the same rounding.
357
358 For speed & simplicity, we're setting the *entire* FPCR here.
359
360 Setting the rounding mode is expensive. So this function tries to
361 avoid repeatedly setting the rounding mode to the same thing by
362 first comparing 'mode' to the 'mode' tree supplied in the previous
363 call to this function, if any. (The previous value is stored in
364 env->previous_rm.) If 'mode' is a single IR temporary 't' and
365 env->previous_rm is also just 't', then the setting is skipped.
366
367 This is safe because of the SSA property of IR: an IR temporary can
368 only be defined once and so will have the same value regardless of
369 where it appears in the block. Cool stuff, SSA.
370
371 A safety condition: all attempts to set the RM must be aware of
372 this mechanism - by being routed through the functions here.
373
374 Of course this only helps if blocks where the RM is set more than
375 once and it is set to the same value each time, *and* that value is
376 held in the same IR temporary each time. In order to assure the
377 latter as much as possible, the IR optimiser takes care to do CSE
378 on any block with any sign of floating point activity.
379 */
380 static
set_FPCR_rounding_mode(ISelEnv * env,IRExpr * mode)381 void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode )
382 {
383 vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
384
385 /* Do we need to do anything? */
386 if (env->previous_rm
387 && env->previous_rm->tag == Iex_RdTmp
388 && mode->tag == Iex_RdTmp
389 && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
390 /* no - setting it to what it was before. */
391 vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
392 return;
393 }
394
395 /* No luck - we better set it, and remember what we set it to. */
396 env->previous_rm = mode;
397
398 /* Only supporting the rounding-mode bits - the rest of FPCR is set
399 to zero - so we can set the whole register at once (faster). */
400
401 /* This isn't simple, because 'mode' carries an IR rounding
402 encoding, and we need to translate that to an ARM64 FP one:
403 The IR encoding:
404 00 to nearest (the default)
405 10 to +infinity
406 01 to -infinity
407 11 to zero
408 The ARM64 FP encoding:
409 00 to nearest
410 01 to +infinity
411 10 to -infinity
412 11 to zero
413 Easy enough to do; just swap the two bits.
414 */
415 HReg irrm = iselIntExpr_R(env, mode);
416 HReg tL = newVRegI(env);
417 HReg tR = newVRegI(env);
418 HReg t3 = newVRegI(env);
419 /* tL = irrm << 1;
420 tR = irrm >> 1; if we're lucky, these will issue together
421 tL &= 2;
422 tR &= 1; ditto
423 t3 = tL | tR;
424 t3 <<= 22;
425 fmxr fpscr, t3
426 */
427 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
428 ARM64RIL* ril_two = mb_mkARM64RIL_I(2);
429 vassert(ril_one && ril_two);
430 addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL));
431 addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR));
432 addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND));
433 addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND));
434 addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR));
435 addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL));
436 addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3));
437 }
438
439
440 /*---------------------------------------------------------*/
441 /*--- ISEL: Function call helpers ---*/
442 /*---------------------------------------------------------*/
443
444 /* Used only in doHelperCall. See big comment in doHelperCall re
445 handling of register-parameter args. This function figures out
446 whether evaluation of an expression might require use of a fixed
447 register. If in doubt return True (safe but suboptimal).
448 */
449 static
mightRequireFixedRegs(IRExpr * e)450 Bool mightRequireFixedRegs ( IRExpr* e )
451 {
452 if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) {
453 // These are always "safe" -- either a copy of SP in some
454 // arbitrary vreg, or a copy of x21, respectively.
455 return False;
456 }
457 /* Else it's a "normal" expression. */
458 switch (e->tag) {
459 case Iex_RdTmp: case Iex_Const: case Iex_Get:
460 return False;
461 default:
462 return True;
463 }
464 }
465
466
467 /* Do a complete function call. |guard| is a Ity_Bit expression
468 indicating whether or not the call happens. If guard==NULL, the
469 call is unconditional. |retloc| is set to indicate where the
470 return value is after the call. The caller (of this fn) must
471 generate code to add |stackAdjustAfterCall| to the stack pointer
472 after the call is done. Returns True iff it managed to handle this
473 combination of arg/return types, else returns False. */
474
475 static
doHelperCall(UInt * stackAdjustAfterCall,RetLoc * retloc,ISelEnv * env,IRExpr * guard,IRCallee * cee,IRType retTy,IRExpr ** args)476 Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
477 /*OUT*/RetLoc* retloc,
478 ISelEnv* env,
479 IRExpr* guard,
480 IRCallee* cee, IRType retTy, IRExpr** args )
481 {
482 ARM64CondCode cc;
483 HReg argregs[ARM64_N_ARGREGS];
484 HReg tmpregs[ARM64_N_ARGREGS];
485 Bool go_fast;
486 Int n_args, i, nextArgReg;
487 Addr64 target;
488
489 vassert(ARM64_N_ARGREGS == 8);
490
491 /* Set default returns. We'll update them later if needed. */
492 *stackAdjustAfterCall = 0;
493 *retloc = mk_RetLoc_INVALID();
494
495 /* These are used for cross-checking that IR-level constraints on
496 the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */
497 UInt nVECRETs = 0;
498 UInt nBBPTRs = 0;
499
500 /* Marshal args for a call and do the call.
501
502 This function only deals with a tiny set of possibilities, which
503 cover all helpers in practice. The restrictions are that only
504 arguments in registers are supported, hence only
505 ARM64_N_REGPARMS x 64 integer bits in total can be passed. In
506 fact the only supported arg type is I64.
507
508 The return type can be I{64,32} or V128. In the V128 case, it
509 is expected that |args| will contain the special node
510 IRExpr_VECRET(), in which case this routine generates code to
511 allocate space on the stack for the vector return value. Since
512 we are not passing any scalars on the stack, it is enough to
513 preallocate the return space before marshalling any arguments,
514 in this case.
515
516 |args| may also contain IRExpr_BBPTR(), in which case the
517 value in x21 is passed as the corresponding argument.
518
519 Generating code which is both efficient and correct when
520 parameters are to be passed in registers is difficult, for the
521 reasons elaborated in detail in comments attached to
522 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
523 of the method described in those comments.
524
525 The problem is split into two cases: the fast scheme and the
526 slow scheme. In the fast scheme, arguments are computed
527 directly into the target (real) registers. This is only safe
528 when we can be sure that computation of each argument will not
529 trash any real registers set by computation of any other
530 argument.
531
532 In the slow scheme, all args are first computed into vregs, and
533 once they are all done, they are moved to the relevant real
534 regs. This always gives correct code, but it also gives a bunch
535 of vreg-to-rreg moves which are usually redundant but are hard
536 for the register allocator to get rid of.
537
538 To decide which scheme to use, all argument expressions are
539 first examined. If they are all so simple that it is clear they
540 will be evaluated without use of any fixed registers, use the
541 fast scheme, else use the slow scheme. Note also that only
542 unconditional calls may use the fast scheme, since having to
543 compute a condition expression could itself trash real
544 registers.
545
546 Note this requires being able to examine an expression and
547 determine whether or not evaluation of it might use a fixed
548 register. That requires knowledge of how the rest of this insn
549 selector works. Currently just the following 3 are regarded as
550 safe -- hopefully they cover the majority of arguments in
551 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
552 */
553
554 /* Note that the cee->regparms field is meaningless on ARM64 hosts
555 (since there is only one calling convention) and so we always
556 ignore it. */
557
558 n_args = 0;
559 for (i = 0; args[i]; i++) {
560 IRExpr* arg = args[i];
561 if (UNLIKELY(arg->tag == Iex_VECRET)) {
562 nVECRETs++;
563 } else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
564 nBBPTRs++;
565 }
566 n_args++;
567 }
568
569 /* If this fails, the IR is ill-formed */
570 vassert(nBBPTRs == 0 || nBBPTRs == 1);
571
572 /* If we have a VECRET, allocate space on the stack for the return
573 value, and record the stack pointer after that. */
574 HReg r_vecRetAddr = INVALID_HREG;
575 if (nVECRETs == 1) {
576 vassert(retTy == Ity_V128 || retTy == Ity_V256);
577 vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
578 r_vecRetAddr = newVRegI(env);
579 addInstr(env, ARM64Instr_AddToSP(-16));
580 addInstr(env, ARM64Instr_FromSP(r_vecRetAddr));
581 } else {
582 // If either of these fail, the IR is ill-formed
583 vassert(retTy != Ity_V128 && retTy != Ity_V256);
584 vassert(nVECRETs == 0);
585 }
586
587 argregs[0] = hregARM64_X0();
588 argregs[1] = hregARM64_X1();
589 argregs[2] = hregARM64_X2();
590 argregs[3] = hregARM64_X3();
591 argregs[4] = hregARM64_X4();
592 argregs[5] = hregARM64_X5();
593 argregs[6] = hregARM64_X6();
594 argregs[7] = hregARM64_X7();
595
596 tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG;
597 tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG;
598
599 /* First decide which scheme (slow or fast) is to be used. First
600 assume the fast scheme, and select slow if any contraindications
601 (wow) appear. */
602
603 go_fast = True;
604
605 if (guard) {
606 if (guard->tag == Iex_Const
607 && guard->Iex.Const.con->tag == Ico_U1
608 && guard->Iex.Const.con->Ico.U1 == True) {
609 /* unconditional */
610 } else {
611 /* Not manifestly unconditional -- be conservative. */
612 go_fast = False;
613 }
614 }
615
616 if (go_fast) {
617 for (i = 0; i < n_args; i++) {
618 if (mightRequireFixedRegs(args[i])) {
619 go_fast = False;
620 break;
621 }
622 }
623 }
624
625 if (go_fast) {
626 if (retTy == Ity_V128 || retTy == Ity_V256)
627 go_fast = False;
628 }
629
630 /* At this point the scheme to use has been established. Generate
631 code to get the arg values into the argument rregs. If we run
632 out of arg regs, give up. */
633
634 if (go_fast) {
635
636 /* FAST SCHEME */
637 nextArgReg = 0;
638
639 for (i = 0; i < n_args; i++) {
640 IRExpr* arg = args[i];
641
642 IRType aTy = Ity_INVALID;
643 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
644 aTy = typeOfIRExpr(env->type_env, args[i]);
645
646 if (nextArgReg >= ARM64_N_ARGREGS)
647 return False; /* out of argregs */
648
649 if (aTy == Ity_I64) {
650 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
651 iselIntExpr_R(env, args[i]) ));
652 nextArgReg++;
653 }
654 else if (arg->tag == Iex_BBPTR) {
655 vassert(0); //ATC
656 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
657 hregARM64_X21() ));
658 nextArgReg++;
659 }
660 else if (arg->tag == Iex_VECRET) {
661 // because of the go_fast logic above, we can't get here,
662 // since vector return values makes us use the slow path
663 // instead.
664 vassert(0);
665 }
666 else
667 return False; /* unhandled arg type */
668 }
669
670 /* Fast scheme only applies for unconditional calls. Hence: */
671 cc = ARM64cc_AL;
672
673 } else {
674
675 /* SLOW SCHEME; move via temporaries */
676 nextArgReg = 0;
677
678 for (i = 0; i < n_args; i++) {
679 IRExpr* arg = args[i];
680
681 IRType aTy = Ity_INVALID;
682 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
683 aTy = typeOfIRExpr(env->type_env, args[i]);
684
685 if (nextArgReg >= ARM64_N_ARGREGS)
686 return False; /* out of argregs */
687
688 if (aTy == Ity_I64) {
689 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
690 nextArgReg++;
691 }
692 else if (arg->tag == Iex_BBPTR) {
693 vassert(0); //ATC
694 tmpregs[nextArgReg] = hregARM64_X21();
695 nextArgReg++;
696 }
697 else if (arg->tag == Iex_VECRET) {
698 vassert(!hregIsInvalid(r_vecRetAddr));
699 tmpregs[nextArgReg] = r_vecRetAddr;
700 nextArgReg++;
701 }
702 else
703 return False; /* unhandled arg type */
704 }
705
706 /* Now we can compute the condition. We can't do it earlier
707 because the argument computations could trash the condition
708 codes. Be a bit clever to handle the common case where the
709 guard is 1:Bit. */
710 cc = ARM64cc_AL;
711 if (guard) {
712 if (guard->tag == Iex_Const
713 && guard->Iex.Const.con->tag == Ico_U1
714 && guard->Iex.Const.con->Ico.U1 == True) {
715 /* unconditional -- do nothing */
716 } else {
717 cc = iselCondCode( env, guard );
718 }
719 }
720
721 /* Move the args to their final destinations. */
722 for (i = 0; i < nextArgReg; i++) {
723 vassert(!(hregIsInvalid(tmpregs[i])));
724 /* None of these insns, including any spill code that might
725 be generated, may alter the condition codes. */
726 addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) );
727 }
728
729 }
730
731 /* Should be assured by checks above */
732 vassert(nextArgReg <= ARM64_N_ARGREGS);
733
734 /* Do final checks, set the return values, and generate the call
735 instruction proper. */
736 vassert(nBBPTRs == 0 || nBBPTRs == 1);
737 vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0);
738 vassert(*stackAdjustAfterCall == 0);
739 vassert(is_RetLoc_INVALID(*retloc));
740 switch (retTy) {
741 case Ity_INVALID:
742 /* Function doesn't return a value. */
743 *retloc = mk_RetLoc_simple(RLPri_None);
744 break;
745 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
746 *retloc = mk_RetLoc_simple(RLPri_Int);
747 break;
748 case Ity_V128:
749 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
750 *stackAdjustAfterCall = 16;
751 break;
752 case Ity_V256:
753 vassert(0); // ATC
754 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
755 *stackAdjustAfterCall = 32;
756 break;
757 default:
758 /* IR can denote other possible return types, but we don't
759 handle those here. */
760 vassert(0);
761 }
762
763 /* Finally, generate the call itself. This needs the *retloc value
764 set in the switch above, which is why it's at the end. */
765
766 /* nextArgReg doles out argument registers. Since these are
767 assigned in the order x0 .. x7, its numeric value at this point,
768 which must be between 0 and 8 inclusive, is going to be equal to
769 the number of arg regs in use for the call. Hence bake that
770 number into the call (we'll need to know it when doing register
771 allocation, to know what regs the call reads.) */
772
773 target = (Addr)cee->addr;
774 addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc ));
775
776 return True; /* success */
777 }
778
779
780 /*---------------------------------------------------------*/
781 /*--- ISEL: Integer expressions (64/32 bit) ---*/
782 /*---------------------------------------------------------*/
783
784 /* Select insns for an integer-typed expression, and add them to the
785 code list. Return a reg holding the result. This reg will be a
786 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
787 want to modify it, ask for a new vreg, copy it in there, and modify
788 the copy. The register allocator will do its best to map both
789 vregs to the same real register, so the copies will often disappear
790 later in the game.
791
792 This should handle expressions of 64- and 32-bit type. All results
793 are returned in a 64-bit register. For 32-bit expressions, the
794 upper 32 bits are arbitrary, so you should mask or sign extend
795 partial values if necessary.
796 */
797
798 /* --------------------- AMode --------------------- */
799
800 /* Return an AMode which computes the value of the specified
801 expression, possibly also adding insns to the code list as a
802 result. The expression may only be a 64-bit one.
803 */
804
isValidScale(UChar scale)805 static Bool isValidScale ( UChar scale )
806 {
807 switch (scale) {
808 case 1: case 2: case 4: case 8: /* case 16: ??*/ return True;
809 default: return False;
810 }
811 }
812
sane_AMode(ARM64AMode * am)813 static Bool sane_AMode ( ARM64AMode* am )
814 {
815 switch (am->tag) {
816 case ARM64am_RI9:
817 return
818 toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64
819 && (hregIsVirtual(am->ARM64am.RI9.reg)
820 /* || sameHReg(am->ARM64am.RI9.reg,
821 hregARM64_X21()) */ )
822 && am->ARM64am.RI9.simm9 >= -256
823 && am->ARM64am.RI9.simm9 <= 255 );
824 case ARM64am_RI12:
825 return
826 toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64
827 && (hregIsVirtual(am->ARM64am.RI12.reg)
828 /* || sameHReg(am->ARM64am.RI12.reg,
829 hregARM64_X21()) */ )
830 && am->ARM64am.RI12.uimm12 < 4096
831 && isValidScale(am->ARM64am.RI12.szB) );
832 case ARM64am_RR:
833 return
834 toBool( hregClass(am->ARM64am.RR.base) == HRcInt64
835 && hregIsVirtual(am->ARM64am.RR.base)
836 && hregClass(am->ARM64am.RR.index) == HRcInt64
837 && hregIsVirtual(am->ARM64am.RR.index) );
838 default:
839 vpanic("sane_AMode: unknown ARM64 AMode1 tag");
840 }
841 }
842
843 static
iselIntExpr_AMode(ISelEnv * env,IRExpr * e,IRType dty)844 ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty )
845 {
846 ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty);
847 vassert(sane_AMode(am));
848 return am;
849 }
850
851 static
iselIntExpr_AMode_wrk(ISelEnv * env,IRExpr * e,IRType dty)852 ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty )
853 {
854 IRType ty = typeOfIRExpr(env->type_env,e);
855 vassert(ty == Ity_I64);
856
857 ULong szBbits = 0;
858 switch (dty) {
859 case Ity_I64: szBbits = 3; break;
860 case Ity_I32: szBbits = 2; break;
861 case Ity_I16: szBbits = 1; break;
862 case Ity_I8: szBbits = 0; break;
863 default: vassert(0);
864 }
865
866 /* {Add64,Sub64}(expr,simm9). We don't care about |dty| here since
867 we're going to create an amode suitable for LDU* or STU*
868 instructions, which use unscaled immediate offsets. */
869 if (e->tag == Iex_Binop
870 && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64)
871 && e->Iex.Binop.arg2->tag == Iex_Const
872 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
873 Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
874 if (simm >= -255 && simm <= 255) {
875 /* Although the gating condition might seem to be
876 simm >= -256 && simm <= 255
877 we will need to negate simm in the case where the op is Sub64.
878 Hence limit the lower value to -255 in order that its negation
879 is representable. */
880 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
881 if (e->Iex.Binop.op == Iop_Sub64) simm = -simm;
882 return ARM64AMode_RI9(reg, (Int)simm);
883 }
884 }
885
886 /* Add64(expr, uimm12 * transfer-size) */
887 if (e->tag == Iex_Binop
888 && e->Iex.Binop.op == Iop_Add64
889 && e->Iex.Binop.arg2->tag == Iex_Const
890 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
891 ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
892 ULong szB = 1 << szBbits;
893 if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */
894 && (uimm >> szBbits) < 4096) {
895 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
896 return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB);
897 }
898 }
899
900 /* Add64(expr1, expr2) */
901 if (e->tag == Iex_Binop
902 && e->Iex.Binop.op == Iop_Add64) {
903 HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
904 HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
905 return ARM64AMode_RR(reg1, reg2);
906 }
907
908 /* Doesn't match anything in particular. Generate it into
909 a register and use that. */
910 HReg reg = iselIntExpr_R(env, e);
911 return ARM64AMode_RI9(reg, 0);
912 }
913
914
915 /* --------------------- RIA --------------------- */
916
917 /* Select instructions to generate 'e' into a RIA. */
918
iselIntExpr_RIA(ISelEnv * env,IRExpr * e)919 static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e )
920 {
921 ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e);
922 /* sanity checks ... */
923 switch (ri->tag) {
924 case ARM64riA_I12:
925 vassert(ri->ARM64riA.I12.imm12 < 4096);
926 vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12);
927 return ri;
928 case ARM64riA_R:
929 vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64);
930 vassert(hregIsVirtual(ri->ARM64riA.R.reg));
931 return ri;
932 default:
933 vpanic("iselIntExpr_RIA: unknown arm RIA tag");
934 }
935 }
936
937 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RIA_wrk(ISelEnv * env,IRExpr * e)938 static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e )
939 {
940 IRType ty = typeOfIRExpr(env->type_env,e);
941 vassert(ty == Ity_I64 || ty == Ity_I32);
942
943 /* special case: immediate */
944 if (e->tag == Iex_Const) {
945 ULong u = 0xF000000ULL; /* invalid */
946 switch (e->Iex.Const.con->tag) {
947 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
948 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
949 default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)");
950 }
951 if (0 == (u & ~(0xFFFULL << 0)))
952 return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0);
953 if (0 == (u & ~(0xFFFULL << 12)))
954 return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12);
955 /* else fail, fall through to default case */
956 }
957
958 /* default case: calculate into a register and return that */
959 {
960 HReg r = iselIntExpr_R ( env, e );
961 return ARM64RIA_R(r);
962 }
963 }
964
965
966 /* --------------------- RIL --------------------- */
967
968 /* Select instructions to generate 'e' into a RIL. At this point we
969 have to deal with the strange bitfield-immediate encoding for logic
970 instructions. */
971
972
973 // The following four functions
974 // CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical
975 // are copied, with modifications, from
976 // https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc
977 // which has the following copyright notice:
978 /*
979 Copyright 2013, ARM Limited
980 All rights reserved.
981
982 Redistribution and use in source and binary forms, with or without
983 modification, are permitted provided that the following conditions are met:
984
985 * Redistributions of source code must retain the above copyright notice,
986 this list of conditions and the following disclaimer.
987 * Redistributions in binary form must reproduce the above copyright notice,
988 this list of conditions and the following disclaimer in the documentation
989 and/or other materials provided with the distribution.
990 * Neither the name of ARM Limited nor the names of its contributors may be
991 used to endorse or promote products derived from this software without
992 specific prior written permission.
993
994 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
995 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
996 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
997 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
998 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
999 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1000 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
1001 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
1002 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1003 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1004 */
1005
CountLeadingZeros(ULong value,Int width)1006 static Int CountLeadingZeros(ULong value, Int width)
1007 {
1008 vassert(width == 32 || width == 64);
1009 Int count = 0;
1010 ULong bit_test = 1ULL << (width - 1);
1011 while ((count < width) && ((bit_test & value) == 0)) {
1012 count++;
1013 bit_test >>= 1;
1014 }
1015 return count;
1016 }
1017
CountTrailingZeros(ULong value,Int width)1018 static Int CountTrailingZeros(ULong value, Int width)
1019 {
1020 vassert(width == 32 || width == 64);
1021 Int count = 0;
1022 while ((count < width) && (((value >> count) & 1) == 0)) {
1023 count++;
1024 }
1025 return count;
1026 }
1027
CountSetBits(ULong value,Int width)1028 static Int CountSetBits(ULong value, Int width)
1029 {
1030 // TODO: Other widths could be added here, as the implementation already
1031 // supports them.
1032 vassert(width == 32 || width == 64);
1033
1034 // Mask out unused bits to ensure that they are not counted.
1035 value &= (0xffffffffffffffffULL >> (64-width));
1036
1037 // Add up the set bits.
1038 // The algorithm works by adding pairs of bit fields together iteratively,
1039 // where the size of each bit field doubles each time.
1040 // An example for an 8-bit value:
1041 // Bits: h g f e d c b a
1042 // \ | \ | \ | \ |
1043 // value = h+g f+e d+c b+a
1044 // \ | \ |
1045 // value = h+g+f+e d+c+b+a
1046 // \ |
1047 // value = h+g+f+e+d+c+b+a
1048 value = ((value >> 1) & 0x5555555555555555ULL)
1049 + (value & 0x5555555555555555ULL);
1050 value = ((value >> 2) & 0x3333333333333333ULL)
1051 + (value & 0x3333333333333333ULL);
1052 value = ((value >> 4) & 0x0f0f0f0f0f0f0f0fULL)
1053 + (value & 0x0f0f0f0f0f0f0f0fULL);
1054 value = ((value >> 8) & 0x00ff00ff00ff00ffULL)
1055 + (value & 0x00ff00ff00ff00ffULL);
1056 value = ((value >> 16) & 0x0000ffff0000ffffULL)
1057 + (value & 0x0000ffff0000ffffULL);
1058 value = ((value >> 32) & 0x00000000ffffffffULL)
1059 + (value & 0x00000000ffffffffULL);
1060
1061 return value;
1062 }
1063
isImmLogical(UInt * n,UInt * imm_s,UInt * imm_r,ULong value,UInt width)1064 static Bool isImmLogical ( /*OUT*/UInt* n,
1065 /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r,
1066 ULong value, UInt width )
1067 {
1068 // Test if a given value can be encoded in the immediate field of a
1069 // logical instruction.
1070
1071 // If it can be encoded, the function returns true, and values
1072 // pointed to by n, imm_s and imm_r are updated with immediates
1073 // encoded in the format required by the corresponding fields in the
1074 // logical instruction. If it can not be encoded, the function
1075 // returns false, and the values pointed to by n, imm_s and imm_r
1076 // are undefined.
1077 vassert(n != NULL && imm_s != NULL && imm_r != NULL);
1078 vassert(width == 32 || width == 64);
1079
1080 // Logical immediates are encoded using parameters n, imm_s and imm_r using
1081 // the following table:
1082 //
1083 // N imms immr size S R
1084 // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
1085 // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
1086 // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
1087 // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
1088 // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
1089 // 0 11110s xxxxxr 2 UInt(s) UInt(r)
1090 // (s bits must not be all set)
1091 //
1092 // A pattern is constructed of size bits, where the least significant S+1
1093 // bits are set. The pattern is rotated right by R, and repeated across a
1094 // 32 or 64-bit value, depending on destination register width.
1095 //
1096 // To test if an arbitrary immediate can be encoded using this scheme, an
1097 // iterative algorithm is used.
1098 //
1099 // TODO: This code does not consider using X/W register overlap to support
1100 // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits
1101 // are an encodable logical immediate.
1102
1103 // 1. If the value has all set or all clear bits, it can't be encoded.
1104 if ((value == 0) || (value == 0xffffffffffffffffULL) ||
1105 ((width == 32) && (value == 0xffffffff))) {
1106 return False;
1107 }
1108
1109 UInt lead_zero = CountLeadingZeros(value, width);
1110 UInt lead_one = CountLeadingZeros(~value, width);
1111 UInt trail_zero = CountTrailingZeros(value, width);
1112 UInt trail_one = CountTrailingZeros(~value, width);
1113 UInt set_bits = CountSetBits(value, width);
1114
1115 // The fixed bits in the immediate s field.
1116 // If width == 64 (X reg), start at 0xFFFFFF80.
1117 // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
1118 // widths won't be executed.
1119 Int imm_s_fixed = (width == 64) ? -128 : -64;
1120 Int imm_s_mask = 0x3F;
1121
1122 for (;;) {
1123 // 2. If the value is two bits wide, it can be encoded.
1124 if (width == 2) {
1125 *n = 0;
1126 *imm_s = 0x3C;
1127 *imm_r = (value & 3) - 1;
1128 return True;
1129 }
1130
1131 *n = (width == 64) ? 1 : 0;
1132 *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
1133 if ((lead_zero + set_bits) == width) {
1134 *imm_r = 0;
1135 } else {
1136 *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
1137 }
1138
1139 // 3. If the sum of leading zeros, trailing zeros and set bits is equal to
1140 // the bit width of the value, it can be encoded.
1141 if (lead_zero + trail_zero + set_bits == width) {
1142 return True;
1143 }
1144
1145 // 4. If the sum of leading ones, trailing ones and unset bits in the
1146 // value is equal to the bit width of the value, it can be encoded.
1147 if (lead_one + trail_one + (width - set_bits) == width) {
1148 return True;
1149 }
1150
1151 // 5. If the most-significant half of the bitwise value is equal to the
1152 // least-significant half, return to step 2 using the least-significant
1153 // half of the value.
1154 ULong mask = (1ULL << (width >> 1)) - 1;
1155 if ((value & mask) == ((value >> (width >> 1)) & mask)) {
1156 width >>= 1;
1157 set_bits >>= 1;
1158 imm_s_fixed >>= 1;
1159 continue;
1160 }
1161
1162 // 6. Otherwise, the value can't be encoded.
1163 return False;
1164 }
1165 }
1166
1167
1168 /* Create a RIL for the given immediate, if it is representable, or
1169 return NULL if not. */
1170
mb_mkARM64RIL_I(ULong imm64)1171 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 )
1172 {
1173 UInt n = 0, imm_s = 0, imm_r = 0;
1174 Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64);
1175 if (!ok) return NULL;
1176 vassert(n < 2 && imm_s < 64 && imm_r < 64);
1177 return ARM64RIL_I13(n, imm_r, imm_s);
1178 }
1179
1180 /* So, finally .. */
1181
iselIntExpr_RIL(ISelEnv * env,IRExpr * e)1182 static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e )
1183 {
1184 ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e);
1185 /* sanity checks ... */
1186 switch (ri->tag) {
1187 case ARM64riL_I13:
1188 vassert(ri->ARM64riL.I13.bitN < 2);
1189 vassert(ri->ARM64riL.I13.immR < 64);
1190 vassert(ri->ARM64riL.I13.immS < 64);
1191 return ri;
1192 case ARM64riL_R:
1193 vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64);
1194 vassert(hregIsVirtual(ri->ARM64riL.R.reg));
1195 return ri;
1196 default:
1197 vpanic("iselIntExpr_RIL: unknown arm RIL tag");
1198 }
1199 }
1200
1201 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RIL_wrk(ISelEnv * env,IRExpr * e)1202 static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e )
1203 {
1204 IRType ty = typeOfIRExpr(env->type_env,e);
1205 vassert(ty == Ity_I64 || ty == Ity_I32);
1206
1207 /* special case: immediate */
1208 if (e->tag == Iex_Const) {
1209 ARM64RIL* maybe = NULL;
1210 if (ty == Ity_I64) {
1211 vassert(e->Iex.Const.con->tag == Ico_U64);
1212 maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64);
1213 } else {
1214 vassert(ty == Ity_I32);
1215 vassert(e->Iex.Const.con->tag == Ico_U32);
1216 UInt u32 = e->Iex.Const.con->Ico.U32;
1217 ULong u64 = (ULong)u32;
1218 /* First try with 32 leading zeroes. */
1219 maybe = mb_mkARM64RIL_I(u64);
1220 /* If that doesn't work, try with 2 copies, since it doesn't
1221 matter what winds up in the upper 32 bits. */
1222 if (!maybe) {
1223 maybe = mb_mkARM64RIL_I((u64 << 32) | u64);
1224 }
1225 }
1226 if (maybe) return maybe;
1227 /* else fail, fall through to default case */
1228 }
1229
1230 /* default case: calculate into a register and return that */
1231 {
1232 HReg r = iselIntExpr_R ( env, e );
1233 return ARM64RIL_R(r);
1234 }
1235 }
1236
1237
1238 /* --------------------- RI6 --------------------- */
1239
1240 /* Select instructions to generate 'e' into a RI6. */
1241
iselIntExpr_RI6(ISelEnv * env,IRExpr * e)1242 static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e )
1243 {
1244 ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e);
1245 /* sanity checks ... */
1246 switch (ri->tag) {
1247 case ARM64ri6_I6:
1248 vassert(ri->ARM64ri6.I6.imm6 < 64);
1249 vassert(ri->ARM64ri6.I6.imm6 > 0);
1250 return ri;
1251 case ARM64ri6_R:
1252 vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64);
1253 vassert(hregIsVirtual(ri->ARM64ri6.R.reg));
1254 return ri;
1255 default:
1256 vpanic("iselIntExpr_RI6: unknown arm RI6 tag");
1257 }
1258 }
1259
1260 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RI6_wrk(ISelEnv * env,IRExpr * e)1261 static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e )
1262 {
1263 IRType ty = typeOfIRExpr(env->type_env,e);
1264 vassert(ty == Ity_I64 || ty == Ity_I8);
1265
1266 /* special case: immediate */
1267 if (e->tag == Iex_Const) {
1268 switch (e->Iex.Const.con->tag) {
1269 case Ico_U8: {
1270 UInt u = e->Iex.Const.con->Ico.U8;
1271 if (u > 0 && u < 64)
1272 return ARM64RI6_I6(u);
1273 break;
1274 default:
1275 break;
1276 }
1277 }
1278 /* else fail, fall through to default case */
1279 }
1280
1281 /* default case: calculate into a register and return that */
1282 {
1283 HReg r = iselIntExpr_R ( env, e );
1284 return ARM64RI6_R(r);
1285 }
1286 }
1287
1288
1289 /* ------------------- CondCode ------------------- */
1290
1291 /* Generate code to evaluated a bit-typed expression, returning the
1292 condition code which would correspond when the expression would
1293 notionally have returned 1. */
1294
iselCondCode(ISelEnv * env,IRExpr * e)1295 static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1296 {
1297 ARM64CondCode cc = iselCondCode_wrk(env,e);
1298 vassert(cc != ARM64cc_NV);
1299 return cc;
1300 }
1301
iselCondCode_wrk(ISelEnv * env,IRExpr * e)1302 static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1303 {
1304 vassert(e);
1305 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1306
1307 /* var */
1308 if (e->tag == Iex_RdTmp) {
1309 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1310 /* Cmp doesn't modify rTmp; so this is OK. */
1311 ARM64RIL* one = mb_mkARM64RIL_I(1);
1312 vassert(one);
1313 addInstr(env, ARM64Instr_Test(rTmp, one));
1314 return ARM64cc_NE;
1315 }
1316
1317 /* Not1(e) */
1318 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1319 /* Generate code for the arg, and negate the test condition */
1320 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1321 if (cc == ARM64cc_AL || cc == ARM64cc_NV) {
1322 return ARM64cc_AL;
1323 } else {
1324 return 1 ^ cc;
1325 }
1326 }
1327
1328 /* --- patterns rooted at: 64to1 --- */
1329
1330 if (e->tag == Iex_Unop
1331 && e->Iex.Unop.op == Iop_64to1) {
1332 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1333 ARM64RIL* one = mb_mkARM64RIL_I(1);
1334 vassert(one); /* '1' must be representable */
1335 addInstr(env, ARM64Instr_Test(rTmp, one));
1336 return ARM64cc_NE;
1337 }
1338
1339 /* --- patterns rooted at: CmpNEZ8 --- */
1340
1341 if (e->tag == Iex_Unop
1342 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1343 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1344 ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF);
1345 addInstr(env, ARM64Instr_Test(r1, xFF));
1346 return ARM64cc_NE;
1347 }
1348
1349 /* --- patterns rooted at: CmpNEZ16 --- */
1350
1351 if (e->tag == Iex_Unop
1352 && e->Iex.Unop.op == Iop_CmpNEZ16) {
1353 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1354 ARM64RIL* xFFFF = mb_mkARM64RIL_I(0xFFFF);
1355 addInstr(env, ARM64Instr_Test(r1, xFFFF));
1356 return ARM64cc_NE;
1357 }
1358
1359 /* --- patterns rooted at: CmpNEZ64 --- */
1360
1361 if (e->tag == Iex_Unop
1362 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1363 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1364 ARM64RIA* zero = ARM64RIA_I12(0,0);
1365 addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/));
1366 return ARM64cc_NE;
1367 }
1368
1369 /* --- patterns rooted at: CmpNEZ32 --- */
1370
1371 if (e->tag == Iex_Unop
1372 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1373 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1374 ARM64RIA* zero = ARM64RIA_I12(0,0);
1375 addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/));
1376 return ARM64cc_NE;
1377 }
1378
1379 /* --- Cmp*64*(x,y) --- */
1380 if (e->tag == Iex_Binop
1381 && (e->Iex.Binop.op == Iop_CmpEQ64
1382 || e->Iex.Binop.op == Iop_CmpNE64
1383 || e->Iex.Binop.op == Iop_CmpLT64S
1384 || e->Iex.Binop.op == Iop_CmpLT64U
1385 || e->Iex.Binop.op == Iop_CmpLE64S
1386 || e->Iex.Binop.op == Iop_CmpLE64U)) {
1387 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1388 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1389 addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/));
1390 switch (e->Iex.Binop.op) {
1391 case Iop_CmpEQ64: return ARM64cc_EQ;
1392 case Iop_CmpNE64: return ARM64cc_NE;
1393 case Iop_CmpLT64S: return ARM64cc_LT;
1394 case Iop_CmpLT64U: return ARM64cc_CC;
1395 case Iop_CmpLE64S: return ARM64cc_LE;
1396 case Iop_CmpLE64U: return ARM64cc_LS;
1397 default: vpanic("iselCondCode(arm64): CmpXX64");
1398 }
1399 }
1400
1401 /* --- Cmp*32*(x,y) --- */
1402 if (e->tag == Iex_Binop
1403 && (e->Iex.Binop.op == Iop_CmpEQ32
1404 || e->Iex.Binop.op == Iop_CmpNE32
1405 || e->Iex.Binop.op == Iop_CmpLT32S
1406 || e->Iex.Binop.op == Iop_CmpLT32U
1407 || e->Iex.Binop.op == Iop_CmpLE32S
1408 || e->Iex.Binop.op == Iop_CmpLE32U)) {
1409 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1410 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1411 addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/));
1412 switch (e->Iex.Binop.op) {
1413 case Iop_CmpEQ32: return ARM64cc_EQ;
1414 case Iop_CmpNE32: return ARM64cc_NE;
1415 case Iop_CmpLT32S: return ARM64cc_LT;
1416 case Iop_CmpLT32U: return ARM64cc_CC;
1417 case Iop_CmpLE32S: return ARM64cc_LE;
1418 case Iop_CmpLE32U: return ARM64cc_LS;
1419 default: vpanic("iselCondCode(arm64): CmpXX32");
1420 }
1421 }
1422
1423 ppIRExpr(e);
1424 vpanic("iselCondCode");
1425 }
1426
1427
1428 /* --------------------- Reg --------------------- */
1429
iselIntExpr_R(ISelEnv * env,IRExpr * e)1430 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1431 {
1432 HReg r = iselIntExpr_R_wrk(env, e);
1433 /* sanity checks ... */
1434 # if 0
1435 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1436 # endif
1437 vassert(hregClass(r) == HRcInt64);
1438 vassert(hregIsVirtual(r));
1439 return r;
1440 }
1441
1442 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_R_wrk(ISelEnv * env,IRExpr * e)1443 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1444 {
1445 IRType ty = typeOfIRExpr(env->type_env,e);
1446 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1447
1448 switch (e->tag) {
1449
1450 /* --------- TEMP --------- */
1451 case Iex_RdTmp: {
1452 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1453 }
1454
1455 /* --------- LOAD --------- */
1456 case Iex_Load: {
1457 HReg dst = newVRegI(env);
1458
1459 if (e->Iex.Load.end != Iend_LE)
1460 goto irreducible;
1461
1462 if (ty == Ity_I64) {
1463 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1464 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode));
1465 return dst;
1466 }
1467 if (ty == Ity_I32) {
1468 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1469 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode));
1470 return dst;
1471 }
1472 if (ty == Ity_I16) {
1473 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1474 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode));
1475 return dst;
1476 }
1477 if (ty == Ity_I8) {
1478 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1479 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode));
1480 return dst;
1481 }
1482 break;
1483 }
1484
1485 /* --------- BINARY OP --------- */
1486 case Iex_Binop: {
1487
1488 ARM64LogicOp lop = 0; /* invalid */
1489 ARM64ShiftOp sop = 0; /* invalid */
1490
1491 /* Special-case 0-x into a Neg instruction. Not because it's
1492 particularly useful but more so as to give value flow using
1493 this instruction, so as to check its assembly correctness for
1494 implementation of Left32/Left64. */
1495 switch (e->Iex.Binop.op) {
1496 case Iop_Sub64:
1497 if (isZeroU64(e->Iex.Binop.arg1)) {
1498 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1499 HReg dst = newVRegI(env);
1500 addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG));
1501 return dst;
1502 }
1503 break;
1504 default:
1505 break;
1506 }
1507
1508 /* ADD/SUB */
1509 switch (e->Iex.Binop.op) {
1510 case Iop_Add64: case Iop_Add32:
1511 case Iop_Sub64: case Iop_Sub32: {
1512 Bool isAdd = e->Iex.Binop.op == Iop_Add64
1513 || e->Iex.Binop.op == Iop_Add32;
1514 HReg dst = newVRegI(env);
1515 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1516 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1517 addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd));
1518 return dst;
1519 }
1520 default:
1521 break;
1522 }
1523
1524 /* AND/OR/XOR */
1525 switch (e->Iex.Binop.op) {
1526 case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop;
1527 case Iop_Or64: case Iop_Or32: lop = ARM64lo_OR; goto log_binop;
1528 case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop;
1529 log_binop: {
1530 HReg dst = newVRegI(env);
1531 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1532 ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2);
1533 addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop));
1534 return dst;
1535 }
1536 default:
1537 break;
1538 }
1539
1540 /* SHL/SHR/SAR */
1541 switch (e->Iex.Binop.op) {
1542 case Iop_Shr64: sop = ARM64sh_SHR; goto sh_binop;
1543 case Iop_Sar64: sop = ARM64sh_SAR; goto sh_binop;
1544 case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop;
1545 sh_binop: {
1546 HReg dst = newVRegI(env);
1547 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1548 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1549 addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop));
1550 return dst;
1551 }
1552 case Iop_Shr32:
1553 case Iop_Sar32: {
1554 Bool zx = e->Iex.Binop.op == Iop_Shr32;
1555 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1556 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1557 HReg dst = zx ? widen_z_32_to_64(env, argL)
1558 : widen_s_32_to_64(env, argL);
1559 addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR));
1560 return dst;
1561 }
1562 default: break;
1563 }
1564
1565 /* MUL */
1566 if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) {
1567 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1568 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1569 HReg dst = newVRegI(env);
1570 addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN));
1571 return dst;
1572 }
1573
1574 /* MULL */
1575 if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) {
1576 Bool isS = e->Iex.Binop.op == Iop_MullS32;
1577 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1578 HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL);
1579 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1580 HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR);
1581 HReg dst = newVRegI(env);
1582 addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN));
1583 return dst;
1584 }
1585
1586 /* Handle misc other ops. */
1587
1588 if (e->Iex.Binop.op == Iop_Max32U) {
1589 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1590 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1591 HReg dst = newVRegI(env);
1592 addInstr(env, ARM64Instr_Cmp(argL, ARM64RIA_R(argR), False/*!is64*/));
1593 addInstr(env, ARM64Instr_CSel(dst, argL, argR, ARM64cc_CS));
1594 return dst;
1595 }
1596
1597 if (e->Iex.Binop.op == Iop_32HLto64) {
1598 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1599 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1600 HReg lo32 = widen_z_32_to_64(env, lo32s);
1601 HReg hi32 = newVRegI(env);
1602 addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32),
1603 ARM64sh_SHL));
1604 addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32),
1605 ARM64lo_OR));
1606 return hi32;
1607 }
1608
1609 if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32) {
1610 Bool isD = e->Iex.Binop.op == Iop_CmpF64;
1611 HReg dL = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg1);
1612 HReg dR = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg2);
1613 HReg dst = newVRegI(env);
1614 HReg imm = newVRegI(env);
1615 /* Do the compare (FCMP), which sets NZCV in PSTATE. Then
1616 create in dst, the IRCmpF64Result encoded result. */
1617 addInstr(env, (isD ? ARM64Instr_VCmpD : ARM64Instr_VCmpS)(dL, dR));
1618 addInstr(env, ARM64Instr_Imm64(dst, 0));
1619 addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ
1620 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ));
1621 addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT
1622 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI));
1623 addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT
1624 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT));
1625 addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN
1626 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS));
1627 return dst;
1628 }
1629
1630 { /* local scope */
1631 ARM64CvtOp cvt_op = ARM64cvt_INVALID;
1632 Bool srcIsD = False;
1633 switch (e->Iex.Binop.op) {
1634 case Iop_F64toI64S:
1635 cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break;
1636 case Iop_F64toI64U:
1637 cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break;
1638 case Iop_F64toI32S:
1639 cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break;
1640 case Iop_F64toI32U:
1641 cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break;
1642 case Iop_F32toI32S:
1643 cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break;
1644 case Iop_F32toI32U:
1645 cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break;
1646 case Iop_F32toI64S:
1647 cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break;
1648 case Iop_F32toI64U:
1649 cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break;
1650 default:
1651 break;
1652 }
1653 if (cvt_op != ARM64cvt_INVALID) {
1654 /* This is all a bit dodgy, because we can't handle a
1655 non-constant (not-known-at-JIT-time) rounding mode
1656 indication. That's because there's no instruction
1657 AFAICS that does this conversion but rounds according to
1658 FPCR.RM, so we have to bake the rounding mode into the
1659 instruction right now. But that should be OK because
1660 (1) the front end attaches a literal Irrm_ value to the
1661 conversion binop, and (2) iropt will never float that
1662 off via CSE, into a literal. Hence we should always
1663 have an Irrm_ value as the first arg. */
1664 IRExpr* arg1 = e->Iex.Binop.arg1;
1665 if (arg1->tag != Iex_Const) goto irreducible;
1666 IRConst* arg1con = arg1->Iex.Const.con;
1667 vassert(arg1con->tag == Ico_U32); // else ill-typed IR
1668 UInt irrm = arg1con->Ico.U32;
1669 /* Find the ARM-encoded equivalent for |irrm|. */
1670 UInt armrm = 4; /* impossible */
1671 switch (irrm) {
1672 case Irrm_NEAREST: armrm = 0; break;
1673 case Irrm_NegINF: armrm = 2; break;
1674 case Irrm_PosINF: armrm = 1; break;
1675 case Irrm_ZERO: armrm = 3; break;
1676 default: goto irreducible;
1677 }
1678 HReg src = (srcIsD ? iselDblExpr : iselFltExpr)
1679 (env, e->Iex.Binop.arg2);
1680 HReg dst = newVRegI(env);
1681 addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm));
1682 return dst;
1683 }
1684 } /* local scope */
1685
1686 /* All cases involving host-side helper calls. */
1687 void* fn = NULL;
1688 switch (e->Iex.Binop.op) {
1689 case Iop_DivU32:
1690 fn = &h_calc_udiv32_w_arm_semantics; break;
1691 case Iop_DivS32:
1692 fn = &h_calc_sdiv32_w_arm_semantics; break;
1693 case Iop_DivU64:
1694 fn = &h_calc_udiv64_w_arm_semantics; break;
1695 case Iop_DivS64:
1696 fn = &h_calc_sdiv64_w_arm_semantics; break;
1697 default:
1698 break;
1699 }
1700
1701 if (fn) {
1702 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1703 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1704 HReg res = newVRegI(env);
1705 addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL));
1706 addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR));
1707 addInstr(env, ARM64Instr_Call( ARM64cc_AL, (Addr)fn,
1708 2, mk_RetLoc_simple(RLPri_Int) ));
1709 addInstr(env, ARM64Instr_MovI(res, hregARM64_X0()));
1710 return res;
1711 }
1712
1713 break;
1714 }
1715
1716 /* --------- UNARY OP --------- */
1717 case Iex_Unop: {
1718
1719 switch (e->Iex.Unop.op) {
1720 case Iop_16Uto64: {
1721 /* This probably doesn't occur often enough to be worth
1722 rolling the extension into the load. */
1723 IRExpr* arg = e->Iex.Unop.arg;
1724 HReg src = iselIntExpr_R(env, arg);
1725 HReg dst = widen_z_16_to_64(env, src);
1726 return dst;
1727 }
1728 case Iop_32Uto64: {
1729 IRExpr* arg = e->Iex.Unop.arg;
1730 if (arg->tag == Iex_Load) {
1731 /* This correctly zero extends because _LdSt32 is
1732 defined to do a zero extending load. */
1733 HReg dst = newVRegI(env);
1734 ARM64AMode* am
1735 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32);
1736 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
1737 return dst;
1738 }
1739 /* else be lame and mask it */
1740 HReg src = iselIntExpr_R(env, arg);
1741 HReg dst = widen_z_32_to_64(env, src);
1742 return dst;
1743 }
1744 case Iop_8Uto32: /* Just freeload on the 8Uto64 case */
1745 case Iop_8Uto64: {
1746 IRExpr* arg = e->Iex.Unop.arg;
1747 if (arg->tag == Iex_Load) {
1748 /* This correctly zero extends because _LdSt8 is
1749 defined to do a zero extending load. */
1750 HReg dst = newVRegI(env);
1751 ARM64AMode* am
1752 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8);
1753 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
1754 return dst;
1755 }
1756 /* else be lame and mask it */
1757 HReg src = iselIntExpr_R(env, arg);
1758 HReg dst = widen_z_8_to_64(env, src);
1759 return dst;
1760 }
1761 case Iop_128HIto64: {
1762 HReg rHi, rLo;
1763 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1764 return rHi; /* and abandon rLo */
1765 }
1766 case Iop_8Sto32: case Iop_8Sto64: {
1767 IRExpr* arg = e->Iex.Unop.arg;
1768 HReg src = iselIntExpr_R(env, arg);
1769 HReg dst = widen_s_8_to_64(env, src);
1770 return dst;
1771 }
1772 case Iop_16Sto32: case Iop_16Sto64: {
1773 IRExpr* arg = e->Iex.Unop.arg;
1774 HReg src = iselIntExpr_R(env, arg);
1775 HReg dst = widen_s_16_to_64(env, src);
1776 return dst;
1777 }
1778 case Iop_32Sto64: {
1779 IRExpr* arg = e->Iex.Unop.arg;
1780 HReg src = iselIntExpr_R(env, arg);
1781 HReg dst = widen_s_32_to_64(env, src);
1782 return dst;
1783 }
1784 case Iop_Not32:
1785 case Iop_Not64: {
1786 HReg dst = newVRegI(env);
1787 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1788 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT));
1789 return dst;
1790 }
1791 case Iop_Clz64: {
1792 HReg dst = newVRegI(env);
1793 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1794 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ));
1795 return dst;
1796 }
1797 case Iop_Left32:
1798 case Iop_Left64: {
1799 /* Left64(src) = src | -src. Left32 can use the same
1800 implementation since in that case we don't care what
1801 the upper 32 bits become. */
1802 HReg dst = newVRegI(env);
1803 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1804 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
1805 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
1806 ARM64lo_OR));
1807 return dst;
1808 }
1809 case Iop_CmpwNEZ64: {
1810 /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1
1811 = Left64(src) >>s 63 */
1812 HReg dst = newVRegI(env);
1813 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1814 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
1815 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
1816 ARM64lo_OR));
1817 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1818 ARM64sh_SAR));
1819 return dst;
1820 }
1821 case Iop_CmpwNEZ32: {
1822 /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF)
1823 = Left64(src & 0xFFFFFFFF) >>s 63 */
1824 HReg dst = newVRegI(env);
1825 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
1826 HReg src = widen_z_32_to_64(env, pre);
1827 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
1828 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
1829 ARM64lo_OR));
1830 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1831 ARM64sh_SAR));
1832 return dst;
1833 }
1834 case Iop_V128to64: case Iop_V128HIto64: {
1835 HReg dst = newVRegI(env);
1836 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
1837 UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0;
1838 addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo));
1839 return dst;
1840 }
1841 case Iop_ReinterpF64asI64: {
1842 HReg dst = newVRegI(env);
1843 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1844 addInstr(env, ARM64Instr_VXfromDorS(dst, src, True/*fromD*/));
1845 return dst;
1846 }
1847 case Iop_ReinterpF32asI32: {
1848 HReg dst = newVRegI(env);
1849 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1850 addInstr(env, ARM64Instr_VXfromDorS(dst, src, False/*!fromD*/));
1851 return dst;
1852 }
1853 case Iop_1Sto16:
1854 case Iop_1Sto32:
1855 case Iop_1Sto64: {
1856 /* As with the iselStmt case for 'tmp:I1 = expr', we could
1857 do a lot better here if it ever became necessary. */
1858 HReg zero = newVRegI(env);
1859 HReg one = newVRegI(env);
1860 HReg dst = newVRegI(env);
1861 addInstr(env, ARM64Instr_Imm64(zero, 0));
1862 addInstr(env, ARM64Instr_Imm64(one, 1));
1863 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1864 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
1865 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1866 ARM64sh_SHL));
1867 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1868 ARM64sh_SAR));
1869 return dst;
1870 }
1871 case Iop_NarrowUn16to8x8:
1872 case Iop_NarrowUn32to16x4:
1873 case Iop_NarrowUn64to32x2:
1874 case Iop_QNarrowUn16Sto8Sx8:
1875 case Iop_QNarrowUn32Sto16Sx4:
1876 case Iop_QNarrowUn64Sto32Sx2:
1877 case Iop_QNarrowUn16Uto8Ux8:
1878 case Iop_QNarrowUn32Uto16Ux4:
1879 case Iop_QNarrowUn64Uto32Ux2:
1880 case Iop_QNarrowUn16Sto8Ux8:
1881 case Iop_QNarrowUn32Sto16Ux4:
1882 case Iop_QNarrowUn64Sto32Ux2:
1883 {
1884 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
1885 HReg tmp = newVRegV(env);
1886 HReg dst = newVRegI(env);
1887 UInt dszBlg2 = 3; /* illegal */
1888 ARM64VecNarrowOp op = ARM64vecna_INVALID;
1889 switch (e->Iex.Unop.op) {
1890 case Iop_NarrowUn16to8x8:
1891 dszBlg2 = 0; op = ARM64vecna_XTN; break;
1892 case Iop_NarrowUn32to16x4:
1893 dszBlg2 = 1; op = ARM64vecna_XTN; break;
1894 case Iop_NarrowUn64to32x2:
1895 dszBlg2 = 2; op = ARM64vecna_XTN; break;
1896 case Iop_QNarrowUn16Sto8Sx8:
1897 dszBlg2 = 0; op = ARM64vecna_SQXTN; break;
1898 case Iop_QNarrowUn32Sto16Sx4:
1899 dszBlg2 = 1; op = ARM64vecna_SQXTN; break;
1900 case Iop_QNarrowUn64Sto32Sx2:
1901 dszBlg2 = 2; op = ARM64vecna_SQXTN; break;
1902 case Iop_QNarrowUn16Uto8Ux8:
1903 dszBlg2 = 0; op = ARM64vecna_UQXTN; break;
1904 case Iop_QNarrowUn32Uto16Ux4:
1905 dszBlg2 = 1; op = ARM64vecna_UQXTN; break;
1906 case Iop_QNarrowUn64Uto32Ux2:
1907 dszBlg2 = 2; op = ARM64vecna_UQXTN; break;
1908 case Iop_QNarrowUn16Sto8Ux8:
1909 dszBlg2 = 0; op = ARM64vecna_SQXTUN; break;
1910 case Iop_QNarrowUn32Sto16Ux4:
1911 dszBlg2 = 1; op = ARM64vecna_SQXTUN; break;
1912 case Iop_QNarrowUn64Sto32Ux2:
1913 dszBlg2 = 2; op = ARM64vecna_SQXTUN; break;
1914 default:
1915 vassert(0);
1916 }
1917 addInstr(env, ARM64Instr_VNarrowV(op, dszBlg2, tmp, src));
1918 addInstr(env, ARM64Instr_VXfromQ(dst, tmp, 0/*laneNo*/));
1919 return dst;
1920 }
1921 case Iop_1Uto64: {
1922 /* 1Uto64(tmp). */
1923 HReg dst = newVRegI(env);
1924 if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
1925 ARM64RIL* one = mb_mkARM64RIL_I(1);
1926 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
1927 vassert(one);
1928 addInstr(env, ARM64Instr_Logic(dst, src, one, ARM64lo_AND));
1929 } else {
1930 /* CLONE-01 */
1931 HReg zero = newVRegI(env);
1932 HReg one = newVRegI(env);
1933 addInstr(env, ARM64Instr_Imm64(zero, 0));
1934 addInstr(env, ARM64Instr_Imm64(one, 1));
1935 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1936 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
1937 }
1938 return dst;
1939 }
1940 case Iop_64to32:
1941 case Iop_64to16:
1942 case Iop_64to8:
1943 /* These are no-ops. */
1944 return iselIntExpr_R(env, e->Iex.Unop.arg);
1945
1946 default:
1947 break;
1948 }
1949
1950 break;
1951 }
1952
1953 /* --------- GET --------- */
1954 case Iex_Get: {
1955 if (ty == Ity_I64
1956 && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < (8<<12)-8) {
1957 HReg dst = newVRegI(env);
1958 ARM64AMode* am
1959 = mk_baseblock_64bit_access_amode(e->Iex.Get.offset);
1960 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am));
1961 return dst;
1962 }
1963 if (ty == Ity_I32
1964 && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < (4<<12)-4) {
1965 HReg dst = newVRegI(env);
1966 ARM64AMode* am
1967 = mk_baseblock_32bit_access_amode(e->Iex.Get.offset);
1968 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
1969 return dst;
1970 }
1971 if (ty == Ity_I16
1972 && 0 == (e->Iex.Get.offset & 1) && e->Iex.Get.offset < (2<<12)-2) {
1973 HReg dst = newVRegI(env);
1974 ARM64AMode* am
1975 = mk_baseblock_16bit_access_amode(e->Iex.Get.offset);
1976 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, am));
1977 return dst;
1978 }
1979 if (ty == Ity_I8
1980 /* && no alignment check */ && e->Iex.Get.offset < (1<<12)-1) {
1981 HReg dst = newVRegI(env);
1982 ARM64AMode* am
1983 = mk_baseblock_8bit_access_amode(e->Iex.Get.offset);
1984 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
1985 return dst;
1986 }
1987 break;
1988 }
1989
1990 /* --------- CCALL --------- */
1991 case Iex_CCall: {
1992 HReg dst = newVRegI(env);
1993 vassert(ty == e->Iex.CCall.retty);
1994
1995 /* be very restrictive for now. Only 64-bit ints allowed for
1996 args, and 64 bits for return type. Don't forget to change
1997 the RetLoc if more types are allowed in future. */
1998 if (e->Iex.CCall.retty != Ity_I64)
1999 goto irreducible;
2000
2001 /* Marshal args, do the call, clear stack. */
2002 UInt addToSp = 0;
2003 RetLoc rloc = mk_RetLoc_INVALID();
2004 Bool ok = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2005 e->Iex.CCall.cee, e->Iex.CCall.retty,
2006 e->Iex.CCall.args );
2007 /* */
2008 if (ok) {
2009 vassert(is_sane_RetLoc(rloc));
2010 vassert(rloc.pri == RLPri_Int);
2011 vassert(addToSp == 0);
2012 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()));
2013 return dst;
2014 }
2015 /* else fall through; will hit the irreducible: label */
2016 }
2017
2018 /* --------- LITERAL --------- */
2019 /* 64-bit literals */
2020 case Iex_Const: {
2021 ULong u = 0;
2022 HReg dst = newVRegI(env);
2023 switch (e->Iex.Const.con->tag) {
2024 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
2025 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
2026 case Ico_U16: u = e->Iex.Const.con->Ico.U16; break;
2027 case Ico_U8: u = e->Iex.Const.con->Ico.U8; break;
2028 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)");
2029 }
2030 addInstr(env, ARM64Instr_Imm64(dst, u));
2031 return dst;
2032 }
2033
2034 /* --------- MULTIPLEX --------- */
2035 case Iex_ITE: {
2036 /* ITE(ccexpr, iftrue, iffalse) */
2037 if (ty == Ity_I64 || ty == Ity_I32) {
2038 ARM64CondCode cc;
2039 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
2040 HReg r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse);
2041 HReg dst = newVRegI(env);
2042 cc = iselCondCode(env, e->Iex.ITE.cond);
2043 addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc));
2044 return dst;
2045 }
2046 break;
2047 }
2048
2049 default:
2050 break;
2051 } /* switch (e->tag) */
2052
2053 /* We get here if no pattern matched. */
2054 irreducible:
2055 ppIRExpr(e);
2056 vpanic("iselIntExpr_R: cannot reduce tree");
2057 }
2058
2059
2060 /*---------------------------------------------------------*/
2061 /*--- ISEL: Integer expressions (128 bit) ---*/
2062 /*---------------------------------------------------------*/
2063
2064 /* Compute a 128-bit value into a register pair, which is returned as
2065 the first two parameters. As with iselIntExpr_R, these may be
2066 either real or virtual regs; in any case they must not be changed
2067 by subsequent code emitted by the caller. */
2068
iselInt128Expr(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)2069 static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2070 ISelEnv* env, IRExpr* e )
2071 {
2072 iselInt128Expr_wrk(rHi, rLo, env, e);
2073 # if 0
2074 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2075 # endif
2076 vassert(hregClass(*rHi) == HRcInt64);
2077 vassert(hregIsVirtual(*rHi));
2078 vassert(hregClass(*rLo) == HRcInt64);
2079 vassert(hregIsVirtual(*rLo));
2080 }
2081
2082 /* DO NOT CALL THIS DIRECTLY ! */
iselInt128Expr_wrk(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)2083 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2084 ISelEnv* env, IRExpr* e )
2085 {
2086 vassert(e);
2087 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2088
2089 /* --------- BINARY ops --------- */
2090 if (e->tag == Iex_Binop) {
2091 switch (e->Iex.Binop.op) {
2092 /* 64 x 64 -> 128 multiply */
2093 case Iop_MullU64:
2094 case Iop_MullS64: {
2095 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
2096 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2097 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2098 HReg dstLo = newVRegI(env);
2099 HReg dstHi = newVRegI(env);
2100 addInstr(env, ARM64Instr_Mul(dstLo, argL, argR,
2101 ARM64mul_PLAIN));
2102 addInstr(env, ARM64Instr_Mul(dstHi, argL, argR,
2103 syned ? ARM64mul_SX : ARM64mul_ZX));
2104 *rHi = dstHi;
2105 *rLo = dstLo;
2106 return;
2107 }
2108 /* 64HLto128(e1,e2) */
2109 case Iop_64HLto128:
2110 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2111 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2112 return;
2113 default:
2114 break;
2115 }
2116 } /* if (e->tag == Iex_Binop) */
2117
2118 ppIRExpr(e);
2119 vpanic("iselInt128Expr(arm64)");
2120 }
2121
2122
2123 /*---------------------------------------------------------*/
2124 /*--- ISEL: Vector expressions (128 bit) ---*/
2125 /*---------------------------------------------------------*/
2126
iselV128Expr(ISelEnv * env,IRExpr * e)2127 static HReg iselV128Expr ( ISelEnv* env, IRExpr* e )
2128 {
2129 HReg r = iselV128Expr_wrk( env, e );
2130 vassert(hregClass(r) == HRcVec128);
2131 vassert(hregIsVirtual(r));
2132 return r;
2133 }
2134
2135 /* DO NOT CALL THIS DIRECTLY */
iselV128Expr_wrk(ISelEnv * env,IRExpr * e)2136 static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
2137 {
2138 IRType ty = typeOfIRExpr(env->type_env, e);
2139 vassert(e);
2140 vassert(ty == Ity_V128);
2141
2142 if (e->tag == Iex_RdTmp) {
2143 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2144 }
2145
2146 if (e->tag == Iex_Const) {
2147 /* Only a very limited range of constants is handled. */
2148 vassert(e->Iex.Const.con->tag == Ico_V128);
2149 UShort con = e->Iex.Const.con->Ico.V128;
2150 HReg res = newVRegV(env);
2151 switch (con) {
2152 case 0x0000: case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
2153 addInstr(env, ARM64Instr_VImmQ(res, con));
2154 return res;
2155 case 0x00F0:
2156 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2157 addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2158 return res;
2159 case 0x0F00:
2160 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2161 addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2162 return res;
2163 case 0x0FF0:
2164 addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2165 addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2166 return res;
2167 case 0x0FFF:
2168 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2169 addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2170 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2171 return res;
2172 case 0xF000:
2173 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2174 addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2175 return res;
2176 case 0xFF00:
2177 addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2178 addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2179 return res;
2180 default:
2181 break;
2182 }
2183 /* Unhandled */
2184 goto v128_expr_bad;
2185 }
2186
2187 if (e->tag == Iex_Load) {
2188 HReg res = newVRegV(env);
2189 HReg rN = iselIntExpr_R(env, e->Iex.Load.addr);
2190 vassert(ty == Ity_V128);
2191 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN));
2192 return res;
2193 }
2194
2195 if (e->tag == Iex_Get) {
2196 UInt offs = (UInt)e->Iex.Get.offset;
2197 if (offs < (1<<12)) {
2198 HReg addr = mk_baseblock_128bit_access_addr(env, offs);
2199 HReg res = newVRegV(env);
2200 vassert(ty == Ity_V128);
2201 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr));
2202 return res;
2203 }
2204 goto v128_expr_bad;
2205 }
2206
2207 if (e->tag == Iex_Unop) {
2208
2209 /* Iop_ZeroHIXXofV128 cases */
2210 UShort imm16 = 0;
2211 switch (e->Iex.Unop.op) {
2212 case Iop_ZeroHI64ofV128: imm16 = 0x00FF; break;
2213 case Iop_ZeroHI96ofV128: imm16 = 0x000F; break;
2214 case Iop_ZeroHI112ofV128: imm16 = 0x0003; break;
2215 case Iop_ZeroHI120ofV128: imm16 = 0x0001; break;
2216 default: break;
2217 }
2218 if (imm16 != 0) {
2219 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2220 HReg imm = newVRegV(env);
2221 HReg res = newVRegV(env);
2222 addInstr(env, ARM64Instr_VImmQ(imm, imm16));
2223 addInstr(env, ARM64Instr_VBinV(ARM64vecb_AND, res, src, imm));
2224 return res;
2225 }
2226
2227 /* Other cases */
2228 switch (e->Iex.Unop.op) {
2229 case Iop_NotV128:
2230 case Iop_Abs64Fx2: case Iop_Abs32Fx4:
2231 case Iop_Neg64Fx2: case Iop_Neg32Fx4:
2232 case Iop_Abs64x2: case Iop_Abs32x4:
2233 case Iop_Abs16x8: case Iop_Abs8x16:
2234 case Iop_Cls32x4: case Iop_Cls16x8: case Iop_Cls8x16:
2235 case Iop_Clz32x4: case Iop_Clz16x8: case Iop_Clz8x16:
2236 case Iop_Cnt8x16:
2237 case Iop_Reverse1sIn8_x16:
2238 case Iop_Reverse8sIn16_x8:
2239 case Iop_Reverse8sIn32_x4: case Iop_Reverse16sIn32_x4:
2240 case Iop_Reverse8sIn64_x2: case Iop_Reverse16sIn64_x2:
2241 case Iop_Reverse32sIn64_x2:
2242 case Iop_RecipEst32Ux4:
2243 case Iop_RSqrtEst32Ux4:
2244 case Iop_RecipEst64Fx2: case Iop_RecipEst32Fx4:
2245 case Iop_RSqrtEst64Fx2: case Iop_RSqrtEst32Fx4:
2246 {
2247 HReg res = newVRegV(env);
2248 HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
2249 Bool setRM = False;
2250 ARM64VecUnaryOp op = ARM64vecu_INVALID;
2251 switch (e->Iex.Unop.op) {
2252 case Iop_NotV128: op = ARM64vecu_NOT; break;
2253 case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break;
2254 case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break;
2255 case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break;
2256 case Iop_Neg32Fx4: op = ARM64vecu_FNEG32x4; break;
2257 case Iop_Abs64x2: op = ARM64vecu_ABS64x2; break;
2258 case Iop_Abs32x4: op = ARM64vecu_ABS32x4; break;
2259 case Iop_Abs16x8: op = ARM64vecu_ABS16x8; break;
2260 case Iop_Abs8x16: op = ARM64vecu_ABS8x16; break;
2261 case Iop_Cls32x4: op = ARM64vecu_CLS32x4; break;
2262 case Iop_Cls16x8: op = ARM64vecu_CLS16x8; break;
2263 case Iop_Cls8x16: op = ARM64vecu_CLS8x16; break;
2264 case Iop_Clz32x4: op = ARM64vecu_CLZ32x4; break;
2265 case Iop_Clz16x8: op = ARM64vecu_CLZ16x8; break;
2266 case Iop_Clz8x16: op = ARM64vecu_CLZ8x16; break;
2267 case Iop_Cnt8x16: op = ARM64vecu_CNT8x16; break;
2268 case Iop_Reverse1sIn8_x16: op = ARM64vecu_RBIT; break;
2269 case Iop_Reverse8sIn16_x8: op = ARM64vecu_REV1616B; break;
2270 case Iop_Reverse8sIn32_x4: op = ARM64vecu_REV3216B; break;
2271 case Iop_Reverse16sIn32_x4: op = ARM64vecu_REV328H; break;
2272 case Iop_Reverse8sIn64_x2: op = ARM64vecu_REV6416B; break;
2273 case Iop_Reverse16sIn64_x2: op = ARM64vecu_REV648H; break;
2274 case Iop_Reverse32sIn64_x2: op = ARM64vecu_REV644S; break;
2275 case Iop_RecipEst32Ux4: op = ARM64vecu_URECPE32x4; break;
2276 case Iop_RSqrtEst32Ux4: op = ARM64vecu_URSQRTE32x4; break;
2277 case Iop_RecipEst64Fx2: setRM = True;
2278 op = ARM64vecu_FRECPE64x2; break;
2279 case Iop_RecipEst32Fx4: setRM = True;
2280 op = ARM64vecu_FRECPE32x4; break;
2281 case Iop_RSqrtEst64Fx2: setRM = True;
2282 op = ARM64vecu_FRSQRTE64x2; break;
2283 case Iop_RSqrtEst32Fx4: setRM = True;
2284 op = ARM64vecu_FRSQRTE32x4; break;
2285 default: vassert(0);
2286 }
2287 if (setRM) {
2288 // This is a bit of a kludge. We should do rm properly for
2289 // these recip-est insns, but that would require changing the
2290 // primop's type to take an rmode.
2291 set_FPCR_rounding_mode(env, IRExpr_Const(
2292 IRConst_U32(Irrm_NEAREST)));
2293 }
2294 addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
2295 return res;
2296 }
2297 case Iop_CmpNEZ8x16:
2298 case Iop_CmpNEZ16x8:
2299 case Iop_CmpNEZ32x4:
2300 case Iop_CmpNEZ64x2: {
2301 HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
2302 HReg zero = newVRegV(env);
2303 HReg res = newVRegV(env);
2304 ARM64VecBinOp cmp = ARM64vecb_INVALID;
2305 switch (e->Iex.Unop.op) {
2306 case Iop_CmpNEZ64x2: cmp = ARM64vecb_CMEQ64x2; break;
2307 case Iop_CmpNEZ32x4: cmp = ARM64vecb_CMEQ32x4; break;
2308 case Iop_CmpNEZ16x8: cmp = ARM64vecb_CMEQ16x8; break;
2309 case Iop_CmpNEZ8x16: cmp = ARM64vecb_CMEQ8x16; break;
2310 default: vassert(0);
2311 }
2312 // This is pretty feeble. Better: use CMP against zero
2313 // and avoid the extra instruction and extra register.
2314 addInstr(env, ARM64Instr_VImmQ(zero, 0x0000));
2315 addInstr(env, ARM64Instr_VBinV(cmp, res, arg, zero));
2316 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2317 return res;
2318 }
2319 case Iop_V256toV128_0:
2320 case Iop_V256toV128_1: {
2321 HReg vHi, vLo;
2322 iselV256Expr(&vHi, &vLo, env, e->Iex.Unop.arg);
2323 return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo;
2324 }
2325 case Iop_64UtoV128: {
2326 HReg res = newVRegV(env);
2327 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2328 addInstr(env, ARM64Instr_VQfromX(res, arg));
2329 return res;
2330 }
2331 case Iop_Widen8Sto16x8: {
2332 HReg res = newVRegV(env);
2333 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2334 addInstr(env, ARM64Instr_VQfromX(res, arg));
2335 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP18x16, res, res, res));
2336 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR16x8,
2337 res, res, 8));
2338 return res;
2339 }
2340 case Iop_Widen16Sto32x4: {
2341 HReg res = newVRegV(env);
2342 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2343 addInstr(env, ARM64Instr_VQfromX(res, arg));
2344 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP116x8, res, res, res));
2345 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR32x4,
2346 res, res, 16));
2347 return res;
2348 }
2349 case Iop_Widen32Sto64x2: {
2350 HReg res = newVRegV(env);
2351 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2352 addInstr(env, ARM64Instr_VQfromX(res, arg));
2353 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP132x4, res, res, res));
2354 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR64x2,
2355 res, res, 32));
2356 return res;
2357 }
2358 /* ... */
2359 default:
2360 break;
2361 } /* switch on the unop */
2362 } /* if (e->tag == Iex_Unop) */
2363
2364 if (e->tag == Iex_Binop) {
2365 switch (e->Iex.Binop.op) {
2366 case Iop_Sqrt32Fx4:
2367 case Iop_Sqrt64Fx2: {
2368 HReg arg = iselV128Expr(env, e->Iex.Binop.arg2);
2369 HReg res = newVRegV(env);
2370 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
2371 ARM64VecUnaryOp op
2372 = e->Iex.Binop.op == Iop_Sqrt32Fx4
2373 ? ARM64vecu_FSQRT32x4 : ARM64vecu_FSQRT64x2;
2374 addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
2375 return res;
2376 }
2377 case Iop_64HLtoV128: {
2378 HReg res = newVRegV(env);
2379 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2380 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2381 addInstr(env, ARM64Instr_VQfromXX(res, argL, argR));
2382 return res;
2383 }
2384 /* -- Cases where we can generate a simple three-reg instruction. -- */
2385 case Iop_AndV128:
2386 case Iop_OrV128:
2387 case Iop_XorV128:
2388 case Iop_Max32Ux4: case Iop_Max16Ux8: case Iop_Max8Ux16:
2389 case Iop_Min32Ux4: case Iop_Min16Ux8: case Iop_Min8Ux16:
2390 case Iop_Max32Sx4: case Iop_Max16Sx8: case Iop_Max8Sx16:
2391 case Iop_Min32Sx4: case Iop_Min16Sx8: case Iop_Min8Sx16:
2392 case Iop_Add64x2: case Iop_Add32x4:
2393 case Iop_Add16x8: case Iop_Add8x16:
2394 case Iop_Sub64x2: case Iop_Sub32x4:
2395 case Iop_Sub16x8: case Iop_Sub8x16:
2396 case Iop_Mul32x4: case Iop_Mul16x8: case Iop_Mul8x16:
2397 case Iop_CmpEQ64x2: case Iop_CmpEQ32x4:
2398 case Iop_CmpEQ16x8: case Iop_CmpEQ8x16:
2399 case Iop_CmpGT64Ux2: case Iop_CmpGT32Ux4:
2400 case Iop_CmpGT16Ux8: case Iop_CmpGT8Ux16:
2401 case Iop_CmpGT64Sx2: case Iop_CmpGT32Sx4:
2402 case Iop_CmpGT16Sx8: case Iop_CmpGT8Sx16:
2403 case Iop_CmpEQ64Fx2: case Iop_CmpEQ32Fx4:
2404 case Iop_CmpLE64Fx2: case Iop_CmpLE32Fx4:
2405 case Iop_CmpLT64Fx2: case Iop_CmpLT32Fx4:
2406 case Iop_Perm8x16:
2407 case Iop_InterleaveLO64x2: case Iop_CatEvenLanes32x4:
2408 case Iop_CatEvenLanes16x8: case Iop_CatEvenLanes8x16:
2409 case Iop_InterleaveHI64x2: case Iop_CatOddLanes32x4:
2410 case Iop_CatOddLanes16x8: case Iop_CatOddLanes8x16:
2411 case Iop_InterleaveHI32x4:
2412 case Iop_InterleaveHI16x8: case Iop_InterleaveHI8x16:
2413 case Iop_InterleaveLO32x4:
2414 case Iop_InterleaveLO16x8: case Iop_InterleaveLO8x16:
2415 case Iop_PolynomialMul8x16:
2416 case Iop_QAdd64Sx2: case Iop_QAdd32Sx4:
2417 case Iop_QAdd16Sx8: case Iop_QAdd8Sx16:
2418 case Iop_QAdd64Ux2: case Iop_QAdd32Ux4:
2419 case Iop_QAdd16Ux8: case Iop_QAdd8Ux16:
2420 case Iop_QSub64Sx2: case Iop_QSub32Sx4:
2421 case Iop_QSub16Sx8: case Iop_QSub8Sx16:
2422 case Iop_QSub64Ux2: case Iop_QSub32Ux4:
2423 case Iop_QSub16Ux8: case Iop_QSub8Ux16:
2424 case Iop_QDMulHi32Sx4: case Iop_QDMulHi16Sx8:
2425 case Iop_QRDMulHi32Sx4: case Iop_QRDMulHi16Sx8:
2426 case Iop_Sh8Sx16: case Iop_Sh16Sx8:
2427 case Iop_Sh32Sx4: case Iop_Sh64Sx2:
2428 case Iop_Sh8Ux16: case Iop_Sh16Ux8:
2429 case Iop_Sh32Ux4: case Iop_Sh64Ux2:
2430 case Iop_Rsh8Sx16: case Iop_Rsh16Sx8:
2431 case Iop_Rsh32Sx4: case Iop_Rsh64Sx2:
2432 case Iop_Rsh8Ux16: case Iop_Rsh16Ux8:
2433 case Iop_Rsh32Ux4: case Iop_Rsh64Ux2:
2434 case Iop_Max64Fx2: case Iop_Max32Fx4:
2435 case Iop_Min64Fx2: case Iop_Min32Fx4:
2436 case Iop_RecipStep64Fx2: case Iop_RecipStep32Fx4:
2437 case Iop_RSqrtStep64Fx2: case Iop_RSqrtStep32Fx4:
2438 {
2439 HReg res = newVRegV(env);
2440 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
2441 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
2442 Bool sw = False;
2443 Bool setRM = False;
2444 ARM64VecBinOp op = ARM64vecb_INVALID;
2445 switch (e->Iex.Binop.op) {
2446 case Iop_AndV128: op = ARM64vecb_AND; break;
2447 case Iop_OrV128: op = ARM64vecb_ORR; break;
2448 case Iop_XorV128: op = ARM64vecb_XOR; break;
2449 case Iop_Max32Ux4: op = ARM64vecb_UMAX32x4; break;
2450 case Iop_Max16Ux8: op = ARM64vecb_UMAX16x8; break;
2451 case Iop_Max8Ux16: op = ARM64vecb_UMAX8x16; break;
2452 case Iop_Min32Ux4: op = ARM64vecb_UMIN32x4; break;
2453 case Iop_Min16Ux8: op = ARM64vecb_UMIN16x8; break;
2454 case Iop_Min8Ux16: op = ARM64vecb_UMIN8x16; break;
2455 case Iop_Max32Sx4: op = ARM64vecb_SMAX32x4; break;
2456 case Iop_Max16Sx8: op = ARM64vecb_SMAX16x8; break;
2457 case Iop_Max8Sx16: op = ARM64vecb_SMAX8x16; break;
2458 case Iop_Min32Sx4: op = ARM64vecb_SMIN32x4; break;
2459 case Iop_Min16Sx8: op = ARM64vecb_SMIN16x8; break;
2460 case Iop_Min8Sx16: op = ARM64vecb_SMIN8x16; break;
2461 case Iop_Add64x2: op = ARM64vecb_ADD64x2; break;
2462 case Iop_Add32x4: op = ARM64vecb_ADD32x4; break;
2463 case Iop_Add16x8: op = ARM64vecb_ADD16x8; break;
2464 case Iop_Add8x16: op = ARM64vecb_ADD8x16; break;
2465 case Iop_Sub64x2: op = ARM64vecb_SUB64x2; break;
2466 case Iop_Sub32x4: op = ARM64vecb_SUB32x4; break;
2467 case Iop_Sub16x8: op = ARM64vecb_SUB16x8; break;
2468 case Iop_Sub8x16: op = ARM64vecb_SUB8x16; break;
2469 case Iop_Mul32x4: op = ARM64vecb_MUL32x4; break;
2470 case Iop_Mul16x8: op = ARM64vecb_MUL16x8; break;
2471 case Iop_Mul8x16: op = ARM64vecb_MUL8x16; break;
2472 case Iop_CmpEQ64x2: op = ARM64vecb_CMEQ64x2; break;
2473 case Iop_CmpEQ32x4: op = ARM64vecb_CMEQ32x4; break;
2474 case Iop_CmpEQ16x8: op = ARM64vecb_CMEQ16x8; break;
2475 case Iop_CmpEQ8x16: op = ARM64vecb_CMEQ8x16; break;
2476 case Iop_CmpGT64Ux2: op = ARM64vecb_CMHI64x2; break;
2477 case Iop_CmpGT32Ux4: op = ARM64vecb_CMHI32x4; break;
2478 case Iop_CmpGT16Ux8: op = ARM64vecb_CMHI16x8; break;
2479 case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break;
2480 case Iop_CmpGT64Sx2: op = ARM64vecb_CMGT64x2; break;
2481 case Iop_CmpGT32Sx4: op = ARM64vecb_CMGT32x4; break;
2482 case Iop_CmpGT16Sx8: op = ARM64vecb_CMGT16x8; break;
2483 case Iop_CmpGT8Sx16: op = ARM64vecb_CMGT8x16; break;
2484 case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break;
2485 case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break;
2486 case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break;
2487 case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break;
2488 case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break;
2489 case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break;
2490 case Iop_Perm8x16: op = ARM64vecb_TBL1; break;
2491 case Iop_InterleaveLO64x2: op = ARM64vecb_UZP164x2; sw = True;
2492 break;
2493 case Iop_CatEvenLanes32x4: op = ARM64vecb_UZP132x4; sw = True;
2494 break;
2495 case Iop_CatEvenLanes16x8: op = ARM64vecb_UZP116x8; sw = True;
2496 break;
2497 case Iop_CatEvenLanes8x16: op = ARM64vecb_UZP18x16; sw = True;
2498 break;
2499 case Iop_InterleaveHI64x2: op = ARM64vecb_UZP264x2; sw = True;
2500 break;
2501 case Iop_CatOddLanes32x4: op = ARM64vecb_UZP232x4; sw = True;
2502 break;
2503 case Iop_CatOddLanes16x8: op = ARM64vecb_UZP216x8; sw = True;
2504 break;
2505 case Iop_CatOddLanes8x16: op = ARM64vecb_UZP28x16; sw = True;
2506 break;
2507 case Iop_InterleaveHI32x4: op = ARM64vecb_ZIP232x4; sw = True;
2508 break;
2509 case Iop_InterleaveHI16x8: op = ARM64vecb_ZIP216x8; sw = True;
2510 break;
2511 case Iop_InterleaveHI8x16: op = ARM64vecb_ZIP28x16; sw = True;
2512 break;
2513 case Iop_InterleaveLO32x4: op = ARM64vecb_ZIP132x4; sw = True;
2514 break;
2515 case Iop_InterleaveLO16x8: op = ARM64vecb_ZIP116x8; sw = True;
2516 break;
2517 case Iop_InterleaveLO8x16: op = ARM64vecb_ZIP18x16; sw = True;
2518 break;
2519 case Iop_PolynomialMul8x16: op = ARM64vecb_PMUL8x16; break;
2520 case Iop_QAdd64Sx2: op = ARM64vecb_SQADD64x2; break;
2521 case Iop_QAdd32Sx4: op = ARM64vecb_SQADD32x4; break;
2522 case Iop_QAdd16Sx8: op = ARM64vecb_SQADD16x8; break;
2523 case Iop_QAdd8Sx16: op = ARM64vecb_SQADD8x16; break;
2524 case Iop_QAdd64Ux2: op = ARM64vecb_UQADD64x2; break;
2525 case Iop_QAdd32Ux4: op = ARM64vecb_UQADD32x4; break;
2526 case Iop_QAdd16Ux8: op = ARM64vecb_UQADD16x8; break;
2527 case Iop_QAdd8Ux16: op = ARM64vecb_UQADD8x16; break;
2528 case Iop_QSub64Sx2: op = ARM64vecb_SQSUB64x2; break;
2529 case Iop_QSub32Sx4: op = ARM64vecb_SQSUB32x4; break;
2530 case Iop_QSub16Sx8: op = ARM64vecb_SQSUB16x8; break;
2531 case Iop_QSub8Sx16: op = ARM64vecb_SQSUB8x16; break;
2532 case Iop_QSub64Ux2: op = ARM64vecb_UQSUB64x2; break;
2533 case Iop_QSub32Ux4: op = ARM64vecb_UQSUB32x4; break;
2534 case Iop_QSub16Ux8: op = ARM64vecb_UQSUB16x8; break;
2535 case Iop_QSub8Ux16: op = ARM64vecb_UQSUB8x16; break;
2536 case Iop_QDMulHi32Sx4: op = ARM64vecb_SQDMULH32x4; break;
2537 case Iop_QDMulHi16Sx8: op = ARM64vecb_SQDMULH16x8; break;
2538 case Iop_QRDMulHi32Sx4: op = ARM64vecb_SQRDMULH32x4; break;
2539 case Iop_QRDMulHi16Sx8: op = ARM64vecb_SQRDMULH16x8; break;
2540 case Iop_Sh8Sx16: op = ARM64vecb_SSHL8x16; break;
2541 case Iop_Sh16Sx8: op = ARM64vecb_SSHL16x8; break;
2542 case Iop_Sh32Sx4: op = ARM64vecb_SSHL32x4; break;
2543 case Iop_Sh64Sx2: op = ARM64vecb_SSHL64x2; break;
2544 case Iop_Sh8Ux16: op = ARM64vecb_USHL8x16; break;
2545 case Iop_Sh16Ux8: op = ARM64vecb_USHL16x8; break;
2546 case Iop_Sh32Ux4: op = ARM64vecb_USHL32x4; break;
2547 case Iop_Sh64Ux2: op = ARM64vecb_USHL64x2; break;
2548 case Iop_Rsh8Sx16: op = ARM64vecb_SRSHL8x16; break;
2549 case Iop_Rsh16Sx8: op = ARM64vecb_SRSHL16x8; break;
2550 case Iop_Rsh32Sx4: op = ARM64vecb_SRSHL32x4; break;
2551 case Iop_Rsh64Sx2: op = ARM64vecb_SRSHL64x2; break;
2552 case Iop_Rsh8Ux16: op = ARM64vecb_URSHL8x16; break;
2553 case Iop_Rsh16Ux8: op = ARM64vecb_URSHL16x8; break;
2554 case Iop_Rsh32Ux4: op = ARM64vecb_URSHL32x4; break;
2555 case Iop_Rsh64Ux2: op = ARM64vecb_URSHL64x2; break;
2556 case Iop_Max64Fx2: op = ARM64vecb_FMAX64x2; break;
2557 case Iop_Max32Fx4: op = ARM64vecb_FMAX32x4; break;
2558 case Iop_Min64Fx2: op = ARM64vecb_FMIN64x2; break;
2559 case Iop_Min32Fx4: op = ARM64vecb_FMIN32x4; break;
2560 case Iop_RecipStep64Fx2: setRM = True;
2561 op = ARM64vecb_FRECPS64x2; break;
2562 case Iop_RecipStep32Fx4: setRM = True;
2563 op = ARM64vecb_FRECPS32x4; break;
2564 case Iop_RSqrtStep64Fx2: setRM = True;
2565 op = ARM64vecb_FRSQRTS64x2; break;
2566 case Iop_RSqrtStep32Fx4: setRM = True;
2567 op = ARM64vecb_FRSQRTS32x4; break;
2568 default: vassert(0);
2569 }
2570 if (setRM) {
2571 // This is a bit of a kludge. We should do rm properly for
2572 // these recip-step insns, but that would require changing the
2573 // primop's type to take an rmode.
2574 set_FPCR_rounding_mode(env, IRExpr_Const(
2575 IRConst_U32(Irrm_NEAREST)));
2576 }
2577 if (sw) {
2578 addInstr(env, ARM64Instr_VBinV(op, res, argR, argL));
2579 } else {
2580 addInstr(env, ARM64Instr_VBinV(op, res, argL, argR));
2581 }
2582 return res;
2583 }
2584 /* -- These only have 2 operand instructions, so we have to first move
2585 the first argument into a new register, for modification. -- */
2586 case Iop_QAddExtUSsatSS8x16: case Iop_QAddExtUSsatSS16x8:
2587 case Iop_QAddExtUSsatSS32x4: case Iop_QAddExtUSsatSS64x2:
2588 case Iop_QAddExtSUsatUU8x16: case Iop_QAddExtSUsatUU16x8:
2589 case Iop_QAddExtSUsatUU32x4: case Iop_QAddExtSUsatUU64x2:
2590 {
2591 HReg res = newVRegV(env);
2592 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
2593 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
2594 ARM64VecModifyOp op = ARM64vecmo_INVALID;
2595 switch (e->Iex.Binop.op) {
2596 /* In the following 8 cases, the US - SU switching is intended.
2597 See comments on the libvex_ir.h for details. Also in the
2598 ARM64 front end, where used these primops are generated. */
2599 case Iop_QAddExtUSsatSS8x16: op = ARM64vecmo_SUQADD8x16; break;
2600 case Iop_QAddExtUSsatSS16x8: op = ARM64vecmo_SUQADD16x8; break;
2601 case Iop_QAddExtUSsatSS32x4: op = ARM64vecmo_SUQADD32x4; break;
2602 case Iop_QAddExtUSsatSS64x2: op = ARM64vecmo_SUQADD64x2; break;
2603 case Iop_QAddExtSUsatUU8x16: op = ARM64vecmo_USQADD8x16; break;
2604 case Iop_QAddExtSUsatUU16x8: op = ARM64vecmo_USQADD16x8; break;
2605 case Iop_QAddExtSUsatUU32x4: op = ARM64vecmo_USQADD32x4; break;
2606 case Iop_QAddExtSUsatUU64x2: op = ARM64vecmo_USQADD64x2; break;
2607 default: vassert(0);
2608 }
2609 /* The order of the operands is important. Although this is
2610 basically addition, the two operands are extended differently,
2611 making it important to get them into the correct registers in
2612 the instruction. */
2613 addInstr(env, ARM64Instr_VMov(16, res, argR));
2614 addInstr(env, ARM64Instr_VModifyV(op, res, argL));
2615 return res;
2616 }
2617 /* -- Shifts by an immediate. -- */
2618 case Iop_ShrN64x2: case Iop_ShrN32x4:
2619 case Iop_ShrN16x8: case Iop_ShrN8x16:
2620 case Iop_SarN64x2: case Iop_SarN32x4:
2621 case Iop_SarN16x8: case Iop_SarN8x16:
2622 case Iop_ShlN64x2: case Iop_ShlN32x4:
2623 case Iop_ShlN16x8: case Iop_ShlN8x16:
2624 case Iop_QShlNsatUU64x2: case Iop_QShlNsatUU32x4:
2625 case Iop_QShlNsatUU16x8: case Iop_QShlNsatUU8x16:
2626 case Iop_QShlNsatSS64x2: case Iop_QShlNsatSS32x4:
2627 case Iop_QShlNsatSS16x8: case Iop_QShlNsatSS8x16:
2628 case Iop_QShlNsatSU64x2: case Iop_QShlNsatSU32x4:
2629 case Iop_QShlNsatSU16x8: case Iop_QShlNsatSU8x16:
2630 {
2631 IRExpr* argL = e->Iex.Binop.arg1;
2632 IRExpr* argR = e->Iex.Binop.arg2;
2633 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2634 UInt amt = argR->Iex.Const.con->Ico.U8;
2635 UInt limLo = 0;
2636 UInt limHi = 0;
2637 ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
2638 /* Establish the instruction to use. */
2639 switch (e->Iex.Binop.op) {
2640 case Iop_ShrN64x2: op = ARM64vecshi_USHR64x2; break;
2641 case Iop_ShrN32x4: op = ARM64vecshi_USHR32x4; break;
2642 case Iop_ShrN16x8: op = ARM64vecshi_USHR16x8; break;
2643 case Iop_ShrN8x16: op = ARM64vecshi_USHR8x16; break;
2644 case Iop_SarN64x2: op = ARM64vecshi_SSHR64x2; break;
2645 case Iop_SarN32x4: op = ARM64vecshi_SSHR32x4; break;
2646 case Iop_SarN16x8: op = ARM64vecshi_SSHR16x8; break;
2647 case Iop_SarN8x16: op = ARM64vecshi_SSHR8x16; break;
2648 case Iop_ShlN64x2: op = ARM64vecshi_SHL64x2; break;
2649 case Iop_ShlN32x4: op = ARM64vecshi_SHL32x4; break;
2650 case Iop_ShlN16x8: op = ARM64vecshi_SHL16x8; break;
2651 case Iop_ShlN8x16: op = ARM64vecshi_SHL8x16; break;
2652 case Iop_QShlNsatUU64x2: op = ARM64vecshi_UQSHL64x2; break;
2653 case Iop_QShlNsatUU32x4: op = ARM64vecshi_UQSHL32x4; break;
2654 case Iop_QShlNsatUU16x8: op = ARM64vecshi_UQSHL16x8; break;
2655 case Iop_QShlNsatUU8x16: op = ARM64vecshi_UQSHL8x16; break;
2656 case Iop_QShlNsatSS64x2: op = ARM64vecshi_SQSHL64x2; break;
2657 case Iop_QShlNsatSS32x4: op = ARM64vecshi_SQSHL32x4; break;
2658 case Iop_QShlNsatSS16x8: op = ARM64vecshi_SQSHL16x8; break;
2659 case Iop_QShlNsatSS8x16: op = ARM64vecshi_SQSHL8x16; break;
2660 case Iop_QShlNsatSU64x2: op = ARM64vecshi_SQSHLU64x2; break;
2661 case Iop_QShlNsatSU32x4: op = ARM64vecshi_SQSHLU32x4; break;
2662 case Iop_QShlNsatSU16x8: op = ARM64vecshi_SQSHLU16x8; break;
2663 case Iop_QShlNsatSU8x16: op = ARM64vecshi_SQSHLU8x16; break;
2664 default: vassert(0);
2665 }
2666 /* Establish the shift limits, for sanity check purposes only. */
2667 switch (e->Iex.Binop.op) {
2668 case Iop_ShrN64x2: limLo = 1; limHi = 64; break;
2669 case Iop_ShrN32x4: limLo = 1; limHi = 32; break;
2670 case Iop_ShrN16x8: limLo = 1; limHi = 16; break;
2671 case Iop_ShrN8x16: limLo = 1; limHi = 8; break;
2672 case Iop_SarN64x2: limLo = 1; limHi = 64; break;
2673 case Iop_SarN32x4: limLo = 1; limHi = 32; break;
2674 case Iop_SarN16x8: limLo = 1; limHi = 16; break;
2675 case Iop_SarN8x16: limLo = 1; limHi = 8; break;
2676 case Iop_ShlN64x2: limLo = 0; limHi = 63; break;
2677 case Iop_ShlN32x4: limLo = 0; limHi = 31; break;
2678 case Iop_ShlN16x8: limLo = 0; limHi = 15; break;
2679 case Iop_ShlN8x16: limLo = 0; limHi = 7; break;
2680 case Iop_QShlNsatUU64x2: limLo = 0; limHi = 63; break;
2681 case Iop_QShlNsatUU32x4: limLo = 0; limHi = 31; break;
2682 case Iop_QShlNsatUU16x8: limLo = 0; limHi = 15; break;
2683 case Iop_QShlNsatUU8x16: limLo = 0; limHi = 7; break;
2684 case Iop_QShlNsatSS64x2: limLo = 0; limHi = 63; break;
2685 case Iop_QShlNsatSS32x4: limLo = 0; limHi = 31; break;
2686 case Iop_QShlNsatSS16x8: limLo = 0; limHi = 15; break;
2687 case Iop_QShlNsatSS8x16: limLo = 0; limHi = 7; break;
2688 case Iop_QShlNsatSU64x2: limLo = 0; limHi = 63; break;
2689 case Iop_QShlNsatSU32x4: limLo = 0; limHi = 31; break;
2690 case Iop_QShlNsatSU16x8: limLo = 0; limHi = 15; break;
2691 case Iop_QShlNsatSU8x16: limLo = 0; limHi = 7; break;
2692 default: vassert(0);
2693 }
2694 /* For left shifts, the allowable amt values are
2695 0 .. lane_bits-1. For right shifts the allowable
2696 values are 1 .. lane_bits. */
2697 if (op != ARM64vecshi_INVALID && amt >= limLo && amt <= limHi) {
2698 HReg src = iselV128Expr(env, argL);
2699 HReg dst = newVRegV(env);
2700 addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
2701 return dst;
2702 }
2703 /* Special case some no-op shifts that the arm64 front end
2704 throws at us. We can't generate any instructions for these,
2705 but we don't need to either. */
2706 switch (e->Iex.Binop.op) {
2707 case Iop_ShrN64x2: case Iop_ShrN32x4:
2708 case Iop_ShrN16x8: case Iop_ShrN8x16:
2709 if (amt == 0) {
2710 return iselV128Expr(env, argL);
2711 }
2712 break;
2713 default:
2714 break;
2715 }
2716 /* otherwise unhandled */
2717 }
2718 /* else fall out; this is unhandled */
2719 break;
2720 }
2721 /* -- Saturating narrowing by an immediate -- */
2722 /* uu */
2723 case Iop_QandQShrNnarrow16Uto8Ux8:
2724 case Iop_QandQShrNnarrow32Uto16Ux4:
2725 case Iop_QandQShrNnarrow64Uto32Ux2:
2726 /* ss */
2727 case Iop_QandQSarNnarrow16Sto8Sx8:
2728 case Iop_QandQSarNnarrow32Sto16Sx4:
2729 case Iop_QandQSarNnarrow64Sto32Sx2:
2730 /* su */
2731 case Iop_QandQSarNnarrow16Sto8Ux8:
2732 case Iop_QandQSarNnarrow32Sto16Ux4:
2733 case Iop_QandQSarNnarrow64Sto32Ux2:
2734 /* ruu */
2735 case Iop_QandQRShrNnarrow16Uto8Ux8:
2736 case Iop_QandQRShrNnarrow32Uto16Ux4:
2737 case Iop_QandQRShrNnarrow64Uto32Ux2:
2738 /* rss */
2739 case Iop_QandQRSarNnarrow16Sto8Sx8:
2740 case Iop_QandQRSarNnarrow32Sto16Sx4:
2741 case Iop_QandQRSarNnarrow64Sto32Sx2:
2742 /* rsu */
2743 case Iop_QandQRSarNnarrow16Sto8Ux8:
2744 case Iop_QandQRSarNnarrow32Sto16Ux4:
2745 case Iop_QandQRSarNnarrow64Sto32Ux2:
2746 {
2747 IRExpr* argL = e->Iex.Binop.arg1;
2748 IRExpr* argR = e->Iex.Binop.arg2;
2749 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2750 UInt amt = argR->Iex.Const.con->Ico.U8;
2751 UInt limit = 0;
2752 ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
2753 switch (e->Iex.Binop.op) {
2754 /* uu */
2755 case Iop_QandQShrNnarrow64Uto32Ux2:
2756 op = ARM64vecshi_UQSHRN2SD; limit = 64; break;
2757 case Iop_QandQShrNnarrow32Uto16Ux4:
2758 op = ARM64vecshi_UQSHRN4HS; limit = 32; break;
2759 case Iop_QandQShrNnarrow16Uto8Ux8:
2760 op = ARM64vecshi_UQSHRN8BH; limit = 16; break;
2761 /* ss */
2762 case Iop_QandQSarNnarrow64Sto32Sx2:
2763 op = ARM64vecshi_SQSHRN2SD; limit = 64; break;
2764 case Iop_QandQSarNnarrow32Sto16Sx4:
2765 op = ARM64vecshi_SQSHRN4HS; limit = 32; break;
2766 case Iop_QandQSarNnarrow16Sto8Sx8:
2767 op = ARM64vecshi_SQSHRN8BH; limit = 16; break;
2768 /* su */
2769 case Iop_QandQSarNnarrow64Sto32Ux2:
2770 op = ARM64vecshi_SQSHRUN2SD; limit = 64; break;
2771 case Iop_QandQSarNnarrow32Sto16Ux4:
2772 op = ARM64vecshi_SQSHRUN4HS; limit = 32; break;
2773 case Iop_QandQSarNnarrow16Sto8Ux8:
2774 op = ARM64vecshi_SQSHRUN8BH; limit = 16; break;
2775 /* ruu */
2776 case Iop_QandQRShrNnarrow64Uto32Ux2:
2777 op = ARM64vecshi_UQRSHRN2SD; limit = 64; break;
2778 case Iop_QandQRShrNnarrow32Uto16Ux4:
2779 op = ARM64vecshi_UQRSHRN4HS; limit = 32; break;
2780 case Iop_QandQRShrNnarrow16Uto8Ux8:
2781 op = ARM64vecshi_UQRSHRN8BH; limit = 16; break;
2782 /* rss */
2783 case Iop_QandQRSarNnarrow64Sto32Sx2:
2784 op = ARM64vecshi_SQRSHRN2SD; limit = 64; break;
2785 case Iop_QandQRSarNnarrow32Sto16Sx4:
2786 op = ARM64vecshi_SQRSHRN4HS; limit = 32; break;
2787 case Iop_QandQRSarNnarrow16Sto8Sx8:
2788 op = ARM64vecshi_SQRSHRN8BH; limit = 16; break;
2789 /* rsu */
2790 case Iop_QandQRSarNnarrow64Sto32Ux2:
2791 op = ARM64vecshi_SQRSHRUN2SD; limit = 64; break;
2792 case Iop_QandQRSarNnarrow32Sto16Ux4:
2793 op = ARM64vecshi_SQRSHRUN4HS; limit = 32; break;
2794 case Iop_QandQRSarNnarrow16Sto8Ux8:
2795 op = ARM64vecshi_SQRSHRUN8BH; limit = 16; break;
2796 /**/
2797 default:
2798 vassert(0);
2799 }
2800 if (op != ARM64vecshi_INVALID && amt >= 1 && amt <= limit) {
2801 HReg src = iselV128Expr(env, argL);
2802 HReg dst = newVRegV(env);
2803 HReg fpsr = newVRegI(env);
2804 /* Clear FPSR.Q, do the operation, and return both its
2805 result and the new value of FPSR.Q. We can simply
2806 zero out FPSR since all the other bits have no relevance
2807 in VEX generated code. */
2808 addInstr(env, ARM64Instr_Imm64(fpsr, 0));
2809 addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
2810 addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
2811 addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
2812 addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
2813 ARM64sh_SHR));
2814 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
2815 vassert(ril_one);
2816 addInstr(env, ARM64Instr_Logic(fpsr,
2817 fpsr, ril_one, ARM64lo_AND));
2818 /* Now we have: the main (shift) result in the bottom half
2819 of |dst|, and the Q bit at the bottom of |fpsr|.
2820 Combining them with a "InterleaveLO64x2" style operation
2821 produces a 128 bit value, dst[63:0]:fpsr[63:0],
2822 which is what we want. */
2823 HReg scratch = newVRegV(env);
2824 addInstr(env, ARM64Instr_VQfromX(scratch, fpsr));
2825 addInstr(env, ARM64Instr_VBinV(ARM64vecb_UZP164x2,
2826 dst, dst, scratch));
2827 return dst;
2828 }
2829 }
2830 /* else fall out; this is unhandled */
2831 break;
2832 }
2833
2834 // Use Iop_SliceV128 in preference to Iop_ShlV128 and Iop_ShrV128,
2835 // as it is in some ways more general and often leads to better
2836 // code overall.
2837 case Iop_ShlV128:
2838 case Iop_ShrV128: {
2839 Bool isSHR = e->Iex.Binop.op == Iop_ShrV128;
2840 /* This is tricky. Generate an EXT instruction with zeroes in
2841 the high operand (shift right) or low operand (shift left).
2842 Note that we can only slice in the EXT instruction at a byte
2843 level of granularity, so the shift amount needs careful
2844 checking. */
2845 IRExpr* argL = e->Iex.Binop.arg1;
2846 IRExpr* argR = e->Iex.Binop.arg2;
2847 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2848 UInt amt = argR->Iex.Const.con->Ico.U8;
2849 Bool amtOK = False;
2850 switch (amt) {
2851 case 0x08: case 0x10: case 0x18: case 0x20: case 0x28:
2852 case 0x30: case 0x38: case 0x40: case 0x48: case 0x50:
2853 case 0x58: case 0x60: case 0x68: case 0x70: case 0x78:
2854 amtOK = True; break;
2855 }
2856 /* We could also deal with amt==0 by copying the source to
2857 the destination, but there's no need for that so far. */
2858 if (amtOK) {
2859 HReg src = iselV128Expr(env, argL);
2860 HReg srcZ = newVRegV(env);
2861 addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000));
2862 UInt immB = amt / 8;
2863 vassert(immB >= 1 && immB <= 15);
2864 HReg dst = newVRegV(env);
2865 if (isSHR) {
2866 addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/,
2867 immB));
2868 } else {
2869 addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/,
2870 16 - immB));
2871 }
2872 return dst;
2873 }
2874 }
2875 /* else fall out; this is unhandled */
2876 break;
2877 }
2878
2879 case Iop_PolynomialMull8x8:
2880 case Iop_Mull32Ux2:
2881 case Iop_Mull16Ux4:
2882 case Iop_Mull8Ux8:
2883 case Iop_Mull32Sx2:
2884 case Iop_Mull16Sx4:
2885 case Iop_Mull8Sx8:
2886 case Iop_QDMull32Sx2:
2887 case Iop_QDMull16Sx4:
2888 {
2889 HReg iSrcL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2890 HReg iSrcR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2891 HReg vSrcL = newVRegV(env);
2892 HReg vSrcR = newVRegV(env);
2893 HReg dst = newVRegV(env);
2894 ARM64VecBinOp op = ARM64vecb_INVALID;
2895 switch (e->Iex.Binop.op) {
2896 case Iop_PolynomialMull8x8: op = ARM64vecb_PMULL8x8; break;
2897 case Iop_Mull32Ux2: op = ARM64vecb_UMULL2DSS; break;
2898 case Iop_Mull16Ux4: op = ARM64vecb_UMULL4SHH; break;
2899 case Iop_Mull8Ux8: op = ARM64vecb_UMULL8HBB; break;
2900 case Iop_Mull32Sx2: op = ARM64vecb_SMULL2DSS; break;
2901 case Iop_Mull16Sx4: op = ARM64vecb_SMULL4SHH; break;
2902 case Iop_Mull8Sx8: op = ARM64vecb_SMULL8HBB; break;
2903 case Iop_QDMull32Sx2: op = ARM64vecb_SQDMULL2DSS; break;
2904 case Iop_QDMull16Sx4: op = ARM64vecb_SQDMULL4SHH; break;
2905 default: vassert(0);
2906 }
2907 addInstr(env, ARM64Instr_VQfromXX(vSrcL, iSrcL, iSrcL));
2908 addInstr(env, ARM64Instr_VQfromXX(vSrcR, iSrcR, iSrcR));
2909 addInstr(env, ARM64Instr_VBinV(op, dst, vSrcL, vSrcR));
2910 return dst;
2911 }
2912
2913 /* ... */
2914 default:
2915 break;
2916 } /* switch on the binop */
2917 } /* if (e->tag == Iex_Binop) */
2918
2919 if (e->tag == Iex_Triop) {
2920 IRTriop* triop = e->Iex.Triop.details;
2921 ARM64VecBinOp vecbop = ARM64vecb_INVALID;
2922 switch (triop->op) {
2923 case Iop_Add64Fx2: vecbop = ARM64vecb_FADD64x2; break;
2924 case Iop_Sub64Fx2: vecbop = ARM64vecb_FSUB64x2; break;
2925 case Iop_Mul64Fx2: vecbop = ARM64vecb_FMUL64x2; break;
2926 case Iop_Div64Fx2: vecbop = ARM64vecb_FDIV64x2; break;
2927 case Iop_Add32Fx4: vecbop = ARM64vecb_FADD32x4; break;
2928 case Iop_Sub32Fx4: vecbop = ARM64vecb_FSUB32x4; break;
2929 case Iop_Mul32Fx4: vecbop = ARM64vecb_FMUL32x4; break;
2930 case Iop_Div32Fx4: vecbop = ARM64vecb_FDIV32x4; break;
2931 default: break;
2932 }
2933 if (vecbop != ARM64vecb_INVALID) {
2934 HReg argL = iselV128Expr(env, triop->arg2);
2935 HReg argR = iselV128Expr(env, triop->arg3);
2936 HReg dst = newVRegV(env);
2937 set_FPCR_rounding_mode(env, triop->arg1);
2938 addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR));
2939 return dst;
2940 }
2941
2942 if (triop->op == Iop_SliceV128) {
2943 /* Note that, compared to ShlV128/ShrV128 just above, the shift
2944 amount here is in bytes, not bits. */
2945 IRExpr* argHi = triop->arg1;
2946 IRExpr* argLo = triop->arg2;
2947 IRExpr* argAmt = triop->arg3;
2948 if (argAmt->tag == Iex_Const && argAmt->Iex.Const.con->tag == Ico_U8) {
2949 UInt amt = argAmt->Iex.Const.con->Ico.U8;
2950 Bool amtOK = amt >= 1 && amt <= 15;
2951 /* We could also deal with amt==0 by copying argLO to
2952 the destination, but there's no need for that so far. */
2953 if (amtOK) {
2954 HReg srcHi = iselV128Expr(env, argHi);
2955 HReg srcLo = iselV128Expr(env, argLo);
2956 HReg dst = newVRegV(env);
2957 addInstr(env, ARM64Instr_VExtV(dst, srcLo, srcHi, amt));
2958 return dst;
2959 }
2960 }
2961 /* else fall out; this is unhandled */
2962 }
2963
2964 } /* if (e->tag == Iex_Triop) */
2965
2966 v128_expr_bad:
2967 ppIRExpr(e);
2968 vpanic("iselV128Expr_wrk");
2969 }
2970
2971
2972 /*---------------------------------------------------------*/
2973 /*--- ISEL: Floating point expressions (64 bit) ---*/
2974 /*---------------------------------------------------------*/
2975
2976 /* Compute a 64-bit floating point value into a register, the identity
2977 of which is returned. As with iselIntExpr_R, the reg may be either
2978 real or virtual; in any case it must not be changed by subsequent
2979 code emitted by the caller. */
2980
iselDblExpr(ISelEnv * env,IRExpr * e)2981 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2982 {
2983 HReg r = iselDblExpr_wrk( env, e );
2984 # if 0
2985 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2986 # endif
2987 vassert(hregClass(r) == HRcFlt64);
2988 vassert(hregIsVirtual(r));
2989 return r;
2990 }
2991
2992 /* DO NOT CALL THIS DIRECTLY */
iselDblExpr_wrk(ISelEnv * env,IRExpr * e)2993 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
2994 {
2995 IRType ty = typeOfIRExpr(env->type_env,e);
2996 vassert(e);
2997 vassert(ty == Ity_F64);
2998
2999 if (e->tag == Iex_RdTmp) {
3000 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3001 }
3002
3003 if (e->tag == Iex_Const) {
3004 IRConst* con = e->Iex.Const.con;
3005 if (con->tag == Ico_F64i) {
3006 HReg src = newVRegI(env);
3007 HReg dst = newVRegD(env);
3008 addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i));
3009 addInstr(env, ARM64Instr_VDfromX(dst, src));
3010 return dst;
3011 }
3012 if (con->tag == Ico_F64) {
3013 HReg src = newVRegI(env);
3014 HReg dst = newVRegD(env);
3015 union { Double d64; ULong u64; } u;
3016 vassert(sizeof(u) == 8);
3017 u.d64 = con->Ico.F64;
3018 addInstr(env, ARM64Instr_Imm64(src, u.u64));
3019 addInstr(env, ARM64Instr_VDfromX(dst, src));
3020 return dst;
3021 }
3022 }
3023
3024 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3025 vassert(e->Iex.Load.ty == Ity_F64);
3026 HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
3027 HReg res = newVRegD(env);
3028 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0));
3029 return res;
3030 }
3031
3032 if (e->tag == Iex_Get) {
3033 Int offs = e->Iex.Get.offset;
3034 if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) {
3035 HReg rD = newVRegD(env);
3036 HReg rN = get_baseblock_register();
3037 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs));
3038 return rD;
3039 }
3040 }
3041
3042 if (e->tag == Iex_Unop) {
3043 switch (e->Iex.Unop.op) {
3044 case Iop_NegF64: {
3045 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3046 HReg dst = newVRegD(env);
3047 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src));
3048 return dst;
3049 }
3050 case Iop_AbsF64: {
3051 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3052 HReg dst = newVRegD(env);
3053 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src));
3054 return dst;
3055 }
3056 case Iop_F32toF64: {
3057 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3058 HReg dst = newVRegD(env);
3059 addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src));
3060 return dst;
3061 }
3062 case Iop_F16toF64: {
3063 HReg src = iselF16Expr(env, e->Iex.Unop.arg);
3064 HReg dst = newVRegD(env);
3065 addInstr(env, ARM64Instr_VCvtHD(True/*hToD*/, dst, src));
3066 return dst;
3067 }
3068 case Iop_I32UtoF64:
3069 case Iop_I32StoF64: {
3070 /* Rounding mode is not involved here, since the
3071 conversion can always be done without loss of
3072 precision. */
3073 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
3074 HReg dst = newVRegD(env);
3075 Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
3076 ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U;
3077 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src));
3078 return dst;
3079 }
3080 default:
3081 break;
3082 }
3083 }
3084
3085 if (e->tag == Iex_Binop) {
3086 switch (e->Iex.Binop.op) {
3087 case Iop_RoundF64toInt:
3088 case Iop_SqrtF64:
3089 case Iop_RecpExpF64: {
3090 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3091 HReg dst = newVRegD(env);
3092 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3093 ARM64FpUnaryOp op = ARM64fpu_INVALID;
3094 switch (e->Iex.Binop.op) {
3095 case Iop_RoundF64toInt: op = ARM64fpu_RINT; break;
3096 case Iop_SqrtF64: op = ARM64fpu_SQRT; break;
3097 case Iop_RecpExpF64: op = ARM64fpu_RECPX; break;
3098 default: vassert(0);
3099 }
3100 addInstr(env, ARM64Instr_VUnaryD(op, dst, src));
3101 return dst;
3102 }
3103 case Iop_I64StoF64:
3104 case Iop_I64UtoF64: {
3105 ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64
3106 ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U;
3107 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3108 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3109 HReg dstS = newVRegD(env);
3110 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3111 return dstS;
3112 }
3113 default:
3114 break;
3115 }
3116 }
3117
3118 if (e->tag == Iex_Triop) {
3119 IRTriop* triop = e->Iex.Triop.details;
3120 ARM64FpBinOp dblop = ARM64fpb_INVALID;
3121 switch (triop->op) {
3122 case Iop_DivF64: dblop = ARM64fpb_DIV; break;
3123 case Iop_MulF64: dblop = ARM64fpb_MUL; break;
3124 case Iop_SubF64: dblop = ARM64fpb_SUB; break;
3125 case Iop_AddF64: dblop = ARM64fpb_ADD; break;
3126 default: break;
3127 }
3128 if (dblop != ARM64fpb_INVALID) {
3129 HReg argL = iselDblExpr(env, triop->arg2);
3130 HReg argR = iselDblExpr(env, triop->arg3);
3131 HReg dst = newVRegD(env);
3132 set_FPCR_rounding_mode(env, triop->arg1);
3133 addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR));
3134 return dst;
3135 }
3136 }
3137
3138 if (e->tag == Iex_ITE) {
3139 /* ITE(ccexpr, iftrue, iffalse) */
3140 ARM64CondCode cc;
3141 HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
3142 HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
3143 HReg dst = newVRegD(env);
3144 cc = iselCondCode(env, e->Iex.ITE.cond);
3145 addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, True/*64-bit*/));
3146 return dst;
3147 }
3148
3149 ppIRExpr(e);
3150 vpanic("iselDblExpr_wrk");
3151 }
3152
3153
3154 /*---------------------------------------------------------*/
3155 /*--- ISEL: Floating point expressions (32 bit) ---*/
3156 /*---------------------------------------------------------*/
3157
3158 /* Compute a 32-bit floating point value into a register, the identity
3159 of which is returned. As with iselIntExpr_R, the reg may be either
3160 real or virtual; in any case it must not be changed by subsequent
3161 code emitted by the caller. Values are generated into HRcFlt64
3162 registers despite the values themselves being Ity_F32s. */
3163
iselFltExpr(ISelEnv * env,IRExpr * e)3164 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
3165 {
3166 HReg r = iselFltExpr_wrk( env, e );
3167 # if 0
3168 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3169 # endif
3170 vassert(hregClass(r) == HRcFlt64);
3171 vassert(hregIsVirtual(r));
3172 return r;
3173 }
3174
3175 /* DO NOT CALL THIS DIRECTLY */
iselFltExpr_wrk(ISelEnv * env,IRExpr * e)3176 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
3177 {
3178 IRType ty = typeOfIRExpr(env->type_env,e);
3179 vassert(e);
3180 vassert(ty == Ity_F32);
3181
3182 if (e->tag == Iex_RdTmp) {
3183 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3184 }
3185
3186 if (e->tag == Iex_Const) {
3187 /* This is something of a kludge. Since a 32 bit floating point
3188 zero is just .. all zeroes, just create a 64 bit zero word
3189 and transfer it. This avoids having to create a SfromW
3190 instruction for this specific case. */
3191 IRConst* con = e->Iex.Const.con;
3192 if (con->tag == Ico_F32i && con->Ico.F32i == 0) {
3193 HReg src = newVRegI(env);
3194 HReg dst = newVRegD(env);
3195 addInstr(env, ARM64Instr_Imm64(src, 0));
3196 addInstr(env, ARM64Instr_VDfromX(dst, src));
3197 return dst;
3198 }
3199 if (con->tag == Ico_F32) {
3200 HReg src = newVRegI(env);
3201 HReg dst = newVRegD(env);
3202 union { Float f32; UInt u32; } u;
3203 vassert(sizeof(u) == 4);
3204 u.f32 = con->Ico.F32;
3205 addInstr(env, ARM64Instr_Imm64(src, (ULong)u.u32));
3206 addInstr(env, ARM64Instr_VDfromX(dst, src));
3207 return dst;
3208 }
3209 }
3210
3211 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3212 vassert(e->Iex.Load.ty == Ity_F32);
3213 HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
3214 HReg res = newVRegD(env);
3215 addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, res, addr, 0));
3216 return res;
3217 }
3218
3219 if (e->tag == Iex_Get) {
3220 Int offs = e->Iex.Get.offset;
3221 if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) {
3222 HReg rD = newVRegD(env);
3223 HReg rN = get_baseblock_register();
3224 addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs));
3225 return rD;
3226 }
3227 }
3228
3229 if (e->tag == Iex_Unop) {
3230 switch (e->Iex.Unop.op) {
3231 case Iop_NegF32: {
3232 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3233 HReg dst = newVRegD(env);
3234 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src));
3235 return dst;
3236 }
3237 case Iop_AbsF32: {
3238 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3239 HReg dst = newVRegD(env);
3240 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src));
3241 return dst;
3242 }
3243 case Iop_F16toF32: {
3244 HReg src = iselF16Expr(env, e->Iex.Unop.arg);
3245 HReg dst = newVRegD(env);
3246 addInstr(env, ARM64Instr_VCvtHS(True/*hToS*/, dst, src));
3247 return dst;
3248 }
3249 default:
3250 break;
3251 }
3252 }
3253
3254 if (e->tag == Iex_Binop) {
3255 switch (e->Iex.Binop.op) {
3256 case Iop_RoundF32toInt:
3257 case Iop_SqrtF32:
3258 case Iop_RecpExpF32: {
3259 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
3260 HReg dst = newVRegD(env);
3261 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3262 ARM64FpUnaryOp op = ARM64fpu_INVALID;
3263 switch (e->Iex.Binop.op) {
3264 case Iop_RoundF32toInt: op = ARM64fpu_RINT; break;
3265 case Iop_SqrtF32: op = ARM64fpu_SQRT; break;
3266 case Iop_RecpExpF32: op = ARM64fpu_RECPX; break;
3267 default: vassert(0);
3268 }
3269 addInstr(env, ARM64Instr_VUnaryS(op, dst, src));
3270 return dst;
3271 }
3272 case Iop_F64toF32: {
3273 HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
3274 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3275 HReg dstS = newVRegD(env);
3276 addInstr(env, ARM64Instr_VCvtSD(False/*!sToD*/, dstS, srcD));
3277 return dstS;
3278 }
3279 case Iop_I32UtoF32:
3280 case Iop_I32StoF32:
3281 case Iop_I64UtoF32:
3282 case Iop_I64StoF32: {
3283 ARM64CvtOp cvt_op = ARM64cvt_INVALID;
3284 switch (e->Iex.Binop.op) {
3285 case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break;
3286 case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break;
3287 case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break;
3288 case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break;
3289 default: vassert(0);
3290 }
3291 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3292 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3293 HReg dstS = newVRegD(env);
3294 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3295 return dstS;
3296 }
3297 default:
3298 break;
3299 }
3300 }
3301
3302 if (e->tag == Iex_Triop) {
3303 IRTriop* triop = e->Iex.Triop.details;
3304 ARM64FpBinOp sglop = ARM64fpb_INVALID;
3305 switch (triop->op) {
3306 case Iop_DivF32: sglop = ARM64fpb_DIV; break;
3307 case Iop_MulF32: sglop = ARM64fpb_MUL; break;
3308 case Iop_SubF32: sglop = ARM64fpb_SUB; break;
3309 case Iop_AddF32: sglop = ARM64fpb_ADD; break;
3310 default: break;
3311 }
3312 if (sglop != ARM64fpb_INVALID) {
3313 HReg argL = iselFltExpr(env, triop->arg2);
3314 HReg argR = iselFltExpr(env, triop->arg3);
3315 HReg dst = newVRegD(env);
3316 set_FPCR_rounding_mode(env, triop->arg1);
3317 addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR));
3318 return dst;
3319 }
3320 }
3321
3322 if (e->tag == Iex_ITE) {
3323 /* ITE(ccexpr, iftrue, iffalse) */
3324 ARM64CondCode cc;
3325 HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue);
3326 HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse);
3327 HReg dst = newVRegD(env);
3328 cc = iselCondCode(env, e->Iex.ITE.cond);
3329 addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, False/*!64-bit*/));
3330 return dst;
3331 }
3332
3333 ppIRExpr(e);
3334 vpanic("iselFltExpr_wrk");
3335 }
3336
3337
3338 /*---------------------------------------------------------*/
3339 /*--- ISEL: Floating point expressions (16 bit) ---*/
3340 /*---------------------------------------------------------*/
3341
3342 /* Compute a 16-bit floating point value into a register, the identity
3343 of which is returned. As with iselIntExpr_R, the reg may be either
3344 real or virtual; in any case it must not be changed by subsequent
3345 code emitted by the caller. Values are generated into HRcFlt64
3346 registers despite the values themselves being Ity_F16s. */
3347
iselF16Expr(ISelEnv * env,IRExpr * e)3348 static HReg iselF16Expr ( ISelEnv* env, IRExpr* e )
3349 {
3350 HReg r = iselF16Expr_wrk( env, e );
3351 # if 0
3352 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3353 # endif
3354 vassert(hregClass(r) == HRcFlt64);
3355 vassert(hregIsVirtual(r));
3356 return r;
3357 }
3358
3359 /* DO NOT CALL THIS DIRECTLY */
iselF16Expr_wrk(ISelEnv * env,IRExpr * e)3360 static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e )
3361 {
3362 IRType ty = typeOfIRExpr(env->type_env,e);
3363 vassert(e);
3364 vassert(ty == Ity_F16);
3365
3366 if (e->tag == Iex_Get) {
3367 Int offs = e->Iex.Get.offset;
3368 if (offs >= 0 && offs < 8192 && 0 == (offs & 1)) {
3369 HReg rD = newVRegD(env);
3370 HReg rN = get_baseblock_register();
3371 addInstr(env, ARM64Instr_VLdStH(True/*isLoad*/, rD, rN, offs));
3372 return rD;
3373 }
3374 }
3375
3376 if (e->tag == Iex_Binop) {
3377 switch (e->Iex.Binop.op) {
3378 case Iop_F32toF16: {
3379 HReg srcS = iselFltExpr(env, e->Iex.Binop.arg2);
3380 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3381 HReg dstH = newVRegD(env);
3382 addInstr(env, ARM64Instr_VCvtHS(False/*!hToS*/, dstH, srcS));
3383 return dstH;
3384 }
3385 case Iop_F64toF16: {
3386 HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
3387 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3388 HReg dstH = newVRegD(env);
3389 addInstr(env, ARM64Instr_VCvtHD(False/*!hToD*/, dstH, srcD));
3390 return dstH;
3391 }
3392 default:
3393 break;
3394 }
3395 }
3396
3397 ppIRExpr(e);
3398 vpanic("iselF16Expr_wrk");
3399 }
3400
3401
3402 /*---------------------------------------------------------*/
3403 /*--- ISEL: Vector expressions (256 bit) ---*/
3404 /*---------------------------------------------------------*/
3405
iselV256Expr(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)3406 static void iselV256Expr ( /*OUT*/HReg* rHi, HReg* rLo,
3407 ISelEnv* env, IRExpr* e )
3408 {
3409 iselV256Expr_wrk( rHi, rLo, env, e );
3410 vassert(hregClass(*rHi) == HRcVec128);
3411 vassert(hregClass(*rLo) == HRcVec128);
3412 vassert(hregIsVirtual(*rHi));
3413 vassert(hregIsVirtual(*rLo));
3414 }
3415
3416 /* DO NOT CALL THIS DIRECTLY */
iselV256Expr_wrk(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)3417 static void iselV256Expr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
3418 ISelEnv* env, IRExpr* e )
3419 {
3420 vassert(e);
3421 IRType ty = typeOfIRExpr(env->type_env,e);
3422 vassert(ty == Ity_V256);
3423
3424 /* read 256-bit IRTemp */
3425 if (e->tag == Iex_RdTmp) {
3426 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3427 return;
3428 }
3429
3430 if (e->tag == Iex_Binop) {
3431 switch (e->Iex.Binop.op) {
3432 case Iop_V128HLtoV256: {
3433 *rHi = iselV128Expr(env, e->Iex.Binop.arg1);
3434 *rLo = iselV128Expr(env, e->Iex.Binop.arg2);
3435 return;
3436 }
3437 case Iop_QandSQsh64x2:
3438 case Iop_QandSQsh32x4:
3439 case Iop_QandSQsh16x8:
3440 case Iop_QandSQsh8x16:
3441 case Iop_QandUQsh64x2:
3442 case Iop_QandUQsh32x4:
3443 case Iop_QandUQsh16x8:
3444 case Iop_QandUQsh8x16:
3445 case Iop_QandSQRsh64x2:
3446 case Iop_QandSQRsh32x4:
3447 case Iop_QandSQRsh16x8:
3448 case Iop_QandSQRsh8x16:
3449 case Iop_QandUQRsh64x2:
3450 case Iop_QandUQRsh32x4:
3451 case Iop_QandUQRsh16x8:
3452 case Iop_QandUQRsh8x16:
3453 {
3454 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
3455 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
3456 HReg fpsr = newVRegI(env);
3457 HReg resHi = newVRegV(env);
3458 HReg resLo = newVRegV(env);
3459 ARM64VecBinOp op = ARM64vecb_INVALID;
3460 switch (e->Iex.Binop.op) {
3461 case Iop_QandSQsh64x2: op = ARM64vecb_SQSHL64x2; break;
3462 case Iop_QandSQsh32x4: op = ARM64vecb_SQSHL32x4; break;
3463 case Iop_QandSQsh16x8: op = ARM64vecb_SQSHL16x8; break;
3464 case Iop_QandSQsh8x16: op = ARM64vecb_SQSHL8x16; break;
3465 case Iop_QandUQsh64x2: op = ARM64vecb_UQSHL64x2; break;
3466 case Iop_QandUQsh32x4: op = ARM64vecb_UQSHL32x4; break;
3467 case Iop_QandUQsh16x8: op = ARM64vecb_UQSHL16x8; break;
3468 case Iop_QandUQsh8x16: op = ARM64vecb_UQSHL8x16; break;
3469 case Iop_QandSQRsh64x2: op = ARM64vecb_SQRSHL64x2; break;
3470 case Iop_QandSQRsh32x4: op = ARM64vecb_SQRSHL32x4; break;
3471 case Iop_QandSQRsh16x8: op = ARM64vecb_SQRSHL16x8; break;
3472 case Iop_QandSQRsh8x16: op = ARM64vecb_SQRSHL8x16; break;
3473 case Iop_QandUQRsh64x2: op = ARM64vecb_UQRSHL64x2; break;
3474 case Iop_QandUQRsh32x4: op = ARM64vecb_UQRSHL32x4; break;
3475 case Iop_QandUQRsh16x8: op = ARM64vecb_UQRSHL16x8; break;
3476 case Iop_QandUQRsh8x16: op = ARM64vecb_UQRSHL8x16; break;
3477 default: vassert(0);
3478 }
3479 /* Clear FPSR.Q, do the operation, and return both its result
3480 and the new value of FPSR.Q. We can simply zero out FPSR
3481 since all the other bits have no relevance in VEX generated
3482 code. */
3483 addInstr(env, ARM64Instr_Imm64(fpsr, 0));
3484 addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
3485 addInstr(env, ARM64Instr_VBinV(op, resLo, argL, argR));
3486 addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
3487 addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
3488 ARM64sh_SHR));
3489 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
3490 vassert(ril_one);
3491 addInstr(env, ARM64Instr_Logic(fpsr, fpsr, ril_one, ARM64lo_AND));
3492 /* Now we have: the main (shift) result in |resLo|, and the
3493 Q bit at the bottom of |fpsr|. */
3494 addInstr(env, ARM64Instr_VQfromX(resHi, fpsr));
3495 *rHi = resHi;
3496 *rLo = resLo;
3497 return;
3498 }
3499
3500 /* ... */
3501 default:
3502 break;
3503 } /* switch on the binop */
3504 } /* if (e->tag == Iex_Binop) */
3505
3506 ppIRExpr(e);
3507 vpanic("iselV256Expr_wrk");
3508 }
3509
3510
3511 /*---------------------------------------------------------*/
3512 /*--- ISEL: Statements ---*/
3513 /*---------------------------------------------------------*/
3514
iselStmt(ISelEnv * env,IRStmt * stmt)3515 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3516 {
3517 if (vex_traceflags & VEX_TRACE_VCODE) {
3518 vex_printf("\n-- ");
3519 ppIRStmt(stmt);
3520 vex_printf("\n");
3521 }
3522 switch (stmt->tag) {
3523
3524 /* --------- STORE --------- */
3525 /* little-endian write to memory */
3526 case Ist_Store: {
3527 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3528 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3529 IREndness end = stmt->Ist.Store.end;
3530
3531 if (tya != Ity_I64 || end != Iend_LE)
3532 goto stmt_fail;
3533
3534 if (tyd == Ity_I64) {
3535 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3536 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3537 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
3538 return;
3539 }
3540 if (tyd == Ity_I32) {
3541 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3542 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3543 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
3544 return;
3545 }
3546 if (tyd == Ity_I16) {
3547 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3548 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3549 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
3550 return;
3551 }
3552 if (tyd == Ity_I8) {
3553 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3554 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3555 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
3556 return;
3557 }
3558 if (tyd == Ity_V128) {
3559 HReg qD = iselV128Expr(env, stmt->Ist.Store.data);
3560 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3561 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
3562 return;
3563 }
3564 if (tyd == Ity_F64) {
3565 HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
3566 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3567 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0));
3568 return;
3569 }
3570 if (tyd == Ity_F32) {
3571 HReg sD = iselFltExpr(env, stmt->Ist.Store.data);
3572 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3573 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, addr, 0));
3574 return;
3575 }
3576 break;
3577 }
3578
3579 /* --------- PUT --------- */
3580 /* write guest state, fixed offset */
3581 case Ist_Put: {
3582 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3583 UInt offs = (UInt)stmt->Ist.Put.offset;
3584 if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) {
3585 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3586 ARM64AMode* am = mk_baseblock_64bit_access_amode(offs);
3587 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
3588 return;
3589 }
3590 if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) {
3591 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3592 ARM64AMode* am = mk_baseblock_32bit_access_amode(offs);
3593 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
3594 return;
3595 }
3596 if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) {
3597 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3598 ARM64AMode* am = mk_baseblock_16bit_access_amode(offs);
3599 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
3600 return;
3601 }
3602 if (tyd == Ity_I8 && offs < (1<<12)) {
3603 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3604 ARM64AMode* am = mk_baseblock_8bit_access_amode(offs);
3605 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
3606 return;
3607 }
3608 if (tyd == Ity_V128 && offs < (1<<12)) {
3609 HReg qD = iselV128Expr(env, stmt->Ist.Put.data);
3610 HReg addr = mk_baseblock_128bit_access_addr(env, offs);
3611 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
3612 return;
3613 }
3614 if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) {
3615 HReg dD = iselDblExpr(env, stmt->Ist.Put.data);
3616 HReg bbp = get_baseblock_register();
3617 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs));
3618 return;
3619 }
3620 if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) {
3621 HReg sD = iselFltExpr(env, stmt->Ist.Put.data);
3622 HReg bbp = get_baseblock_register();
3623 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, bbp, offs));
3624 return;
3625 }
3626 if (tyd == Ity_F16 && 0 == (offs & 1) && offs < (2<<12)) {
3627 HReg hD = iselF16Expr(env, stmt->Ist.Put.data);
3628 HReg bbp = get_baseblock_register();
3629 addInstr(env, ARM64Instr_VLdStH(False/*!isLoad*/, hD, bbp, offs));
3630 return;
3631 }
3632
3633 break;
3634 }
3635
3636 /* --------- TMP --------- */
3637 /* assign value to temporary */
3638 case Ist_WrTmp: {
3639 IRTemp tmp = stmt->Ist.WrTmp.tmp;
3640 IRType ty = typeOfIRTemp(env->type_env, tmp);
3641
3642 if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
3643 /* We could do a lot better here. But for the time being: */
3644 HReg dst = lookupIRTemp(env, tmp);
3645 HReg rD = iselIntExpr_R(env, stmt->Ist.WrTmp.data);
3646 addInstr(env, ARM64Instr_MovI(dst, rD));
3647 return;
3648 }
3649 if (ty == Ity_I1) {
3650 /* Here, we are generating a I1 value into a 64 bit register.
3651 Make sure the value in the register is only zero or one,
3652 but no other. This allows optimisation of the
3653 1Uto64(tmp:I1) case, by making it simply a copy of the
3654 register holding 'tmp'. The point being that the value in
3655 the register holding 'tmp' can only have been created
3656 here. LATER: that seems dangerous; safer to do 'tmp & 1'
3657 in that case. Also, could do this just with a single CINC
3658 insn. */
3659 /* CLONE-01 */
3660 HReg zero = newVRegI(env);
3661 HReg one = newVRegI(env);
3662 HReg dst = lookupIRTemp(env, tmp);
3663 addInstr(env, ARM64Instr_Imm64(zero, 0));
3664 addInstr(env, ARM64Instr_Imm64(one, 1));
3665 ARM64CondCode cc = iselCondCode(env, stmt->Ist.WrTmp.data);
3666 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
3667 return;
3668 }
3669 if (ty == Ity_F64) {
3670 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
3671 HReg dst = lookupIRTemp(env, tmp);
3672 addInstr(env, ARM64Instr_VMov(8, dst, src));
3673 return;
3674 }
3675 if (ty == Ity_F32) {
3676 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
3677 HReg dst = lookupIRTemp(env, tmp);
3678 addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
3679 return;
3680 }
3681 if (ty == Ity_V128) {
3682 HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data);
3683 HReg dst = lookupIRTemp(env, tmp);
3684 addInstr(env, ARM64Instr_VMov(16, dst, src));
3685 return;
3686 }
3687 if (ty == Ity_V256) {
3688 HReg srcHi, srcLo, dstHi, dstLo;
3689 iselV256Expr(&srcHi,&srcLo, env, stmt->Ist.WrTmp.data);
3690 lookupIRTempPair( &dstHi, &dstLo, env, tmp);
3691 addInstr(env, ARM64Instr_VMov(16, dstHi, srcHi));
3692 addInstr(env, ARM64Instr_VMov(16, dstLo, srcLo));
3693 return;
3694 }
3695 break;
3696 }
3697
3698 /* --------- Call to DIRTY helper --------- */
3699 /* call complex ("dirty") helper function */
3700 case Ist_Dirty: {
3701 IRDirty* d = stmt->Ist.Dirty.details;
3702
3703 /* Figure out the return type, if any. */
3704 IRType retty = Ity_INVALID;
3705 if (d->tmp != IRTemp_INVALID)
3706 retty = typeOfIRTemp(env->type_env, d->tmp);
3707
3708 Bool retty_ok = False;
3709 switch (retty) {
3710 case Ity_INVALID: /* function doesn't return anything */
3711 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
3712 case Ity_V128:
3713 retty_ok = True; break;
3714 default:
3715 break;
3716 }
3717 if (!retty_ok)
3718 break; /* will go to stmt_fail: */
3719
3720 /* Marshal args, do the call, and set the return value to 0x555..555
3721 if this is a conditional call that returns a value and the
3722 call is skipped. */
3723 UInt addToSp = 0;
3724 RetLoc rloc = mk_RetLoc_INVALID();
3725 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
3726 vassert(is_sane_RetLoc(rloc));
3727
3728 /* Now figure out what to do with the returned value, if any. */
3729 switch (retty) {
3730 case Ity_INVALID: {
3731 /* No return value. Nothing to do. */
3732 vassert(d->tmp == IRTemp_INVALID);
3733 vassert(rloc.pri == RLPri_None);
3734 vassert(addToSp == 0);
3735 return;
3736 }
3737 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
3738 vassert(rloc.pri == RLPri_Int);
3739 vassert(addToSp == 0);
3740 /* The returned value is in x0. Park it in the register
3741 associated with tmp. */
3742 HReg dst = lookupIRTemp(env, d->tmp);
3743 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) );
3744 return;
3745 }
3746 case Ity_V128: {
3747 /* The returned value is on the stack, and *retloc tells
3748 us where. Fish it off the stack and then move the
3749 stack pointer upwards to clear it, as directed by
3750 doHelperCall. */
3751 vassert(rloc.pri == RLPri_V128SpRel);
3752 vassert(rloc.spOff < 256); // stay sane
3753 vassert(addToSp >= 16); // ditto
3754 vassert(addToSp < 256); // ditto
3755 HReg dst = lookupIRTemp(env, d->tmp);
3756 HReg tmp = newVRegI(env); // the address of the returned value
3757 addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP
3758 addInstr(env, ARM64Instr_Arith(tmp, tmp,
3759 ARM64RIA_I12((UShort)rloc.spOff, 0),
3760 True/*isAdd*/ ));
3761 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp));
3762 addInstr(env, ARM64Instr_AddToSP(addToSp));
3763 return;
3764 }
3765 default:
3766 /*NOTREACHED*/
3767 vassert(0);
3768 }
3769 break;
3770 }
3771
3772 /* --------- Load Linked and Store Conditional --------- */
3773 case Ist_LLSC: {
3774 if (stmt->Ist.LLSC.storedata == NULL) {
3775 /* LL */
3776 IRTemp res = stmt->Ist.LLSC.result;
3777 IRType ty = typeOfIRTemp(env->type_env, res);
3778 if (ty == Ity_I64 || ty == Ity_I32
3779 || ty == Ity_I16 || ty == Ity_I8) {
3780 Int szB = 0;
3781 HReg r_dst = lookupIRTemp(env, res);
3782 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
3783 switch (ty) {
3784 case Ity_I8: szB = 1; break;
3785 case Ity_I16: szB = 2; break;
3786 case Ity_I32: szB = 4; break;
3787 case Ity_I64: szB = 8; break;
3788 default: vassert(0);
3789 }
3790 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
3791 addInstr(env, ARM64Instr_LdrEX(szB));
3792 addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2()));
3793 return;
3794 }
3795 goto stmt_fail;
3796 } else {
3797 /* SC */
3798 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
3799 if (tyd == Ity_I64 || tyd == Ity_I32
3800 || tyd == Ity_I16 || tyd == Ity_I8) {
3801 Int szB = 0;
3802 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
3803 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
3804 switch (tyd) {
3805 case Ity_I8: szB = 1; break;
3806 case Ity_I16: szB = 2; break;
3807 case Ity_I32: szB = 4; break;
3808 case Ity_I64: szB = 8; break;
3809 default: vassert(0);
3810 }
3811 addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD));
3812 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
3813 addInstr(env, ARM64Instr_StrEX(szB));
3814 } else {
3815 goto stmt_fail;
3816 }
3817 /* now r0 is 1 if failed, 0 if success. Change to IR
3818 conventions (0 is fail, 1 is success). Also transfer
3819 result to r_res. */
3820 IRTemp res = stmt->Ist.LLSC.result;
3821 IRType ty = typeOfIRTemp(env->type_env, res);
3822 HReg r_res = lookupIRTemp(env, res);
3823 ARM64RIL* one = mb_mkARM64RIL_I(1);
3824 vassert(ty == Ity_I1);
3825 vassert(one);
3826 addInstr(env, ARM64Instr_Logic(r_res, hregARM64_X0(), one,
3827 ARM64lo_XOR));
3828 /* And be conservative -- mask off all but the lowest bit. */
3829 addInstr(env, ARM64Instr_Logic(r_res, r_res, one,
3830 ARM64lo_AND));
3831 return;
3832 }
3833 break;
3834 }
3835
3836 /* --------- MEM FENCE --------- */
3837 case Ist_MBE:
3838 switch (stmt->Ist.MBE.event) {
3839 case Imbe_Fence:
3840 addInstr(env, ARM64Instr_MFence());
3841 return;
3842 default:
3843 break;
3844 }
3845 break;
3846
3847 /* --------- INSTR MARK --------- */
3848 /* Doesn't generate any executable code ... */
3849 case Ist_IMark:
3850 return;
3851
3852 /* --------- ABI HINT --------- */
3853 /* These have no meaning (denotation in the IR) and so we ignore
3854 them ... if any actually made it this far. */
3855 case Ist_AbiHint:
3856 return;
3857
3858 /* --------- NO-OP --------- */
3859 case Ist_NoOp:
3860 return;
3861
3862 /* --------- EXIT --------- */
3863 case Ist_Exit: {
3864 if (stmt->Ist.Exit.dst->tag != Ico_U64)
3865 vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value");
3866
3867 ARM64CondCode cc
3868 = iselCondCode(env, stmt->Ist.Exit.guard);
3869 ARM64AMode* amPC
3870 = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP);
3871
3872 /* Case: boring transfer to known address */
3873 if (stmt->Ist.Exit.jk == Ijk_Boring
3874 /*ATC || stmt->Ist.Exit.jk == Ijk_Call */
3875 /*ATC || stmt->Ist.Exit.jk == Ijk_Ret */ ) {
3876 if (env->chainingAllowed) {
3877 /* .. almost always true .. */
3878 /* Skip the event check at the dst if this is a forwards
3879 edge. */
3880 Bool toFastEP
3881 = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
3882 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
3883 addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
3884 amPC, cc, toFastEP));
3885 } else {
3886 /* .. very occasionally .. */
3887 /* We can't use chaining, so ask for an assisted transfer,
3888 as that's the only alternative that is allowable. */
3889 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
3890 addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring));
3891 }
3892 return;
3893 }
3894
3895 /* Do we ever expect to see any other kind? */
3896 goto stmt_fail;
3897 }
3898
3899 default: break;
3900 }
3901 stmt_fail:
3902 ppIRStmt(stmt);
3903 vpanic("iselStmt");
3904 }
3905
3906
3907 /*---------------------------------------------------------*/
3908 /*--- ISEL: Basic block terminators (Nexts) ---*/
3909 /*---------------------------------------------------------*/
3910
iselNext(ISelEnv * env,IRExpr * next,IRJumpKind jk,Int offsIP)3911 static void iselNext ( ISelEnv* env,
3912 IRExpr* next, IRJumpKind jk, Int offsIP )
3913 {
3914 if (vex_traceflags & VEX_TRACE_VCODE) {
3915 vex_printf( "\n-- PUT(%d) = ", offsIP);
3916 ppIRExpr( next );
3917 vex_printf( "; exit-");
3918 ppIRJumpKind(jk);
3919 vex_printf( "\n");
3920 }
3921
3922 /* Case: boring transfer to known address */
3923 if (next->tag == Iex_Const) {
3924 IRConst* cdst = next->Iex.Const.con;
3925 vassert(cdst->tag == Ico_U64);
3926 if (jk == Ijk_Boring || jk == Ijk_Call) {
3927 /* Boring transfer to known address */
3928 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
3929 if (env->chainingAllowed) {
3930 /* .. almost always true .. */
3931 /* Skip the event check at the dst if this is a forwards
3932 edge. */
3933 Bool toFastEP
3934 = ((Addr64)cdst->Ico.U64) > env->max_ga;
3935 if (0) vex_printf("%s", toFastEP ? "X" : ".");
3936 addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64,
3937 amPC, ARM64cc_AL,
3938 toFastEP));
3939 } else {
3940 /* .. very occasionally .. */
3941 /* We can't use chaining, so ask for an assisted transfer,
3942 as that's the only alternative that is allowable. */
3943 HReg r = iselIntExpr_R(env, next);
3944 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
3945 Ijk_Boring));
3946 }
3947 return;
3948 }
3949 }
3950
3951 /* Case: call/return (==boring) transfer to any address */
3952 switch (jk) {
3953 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
3954 HReg r = iselIntExpr_R(env, next);
3955 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
3956 if (env->chainingAllowed) {
3957 addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL));
3958 } else {
3959 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
3960 Ijk_Boring));
3961 }
3962 return;
3963 }
3964 default:
3965 break;
3966 }
3967
3968 /* Case: assisted transfer to arbitrary address */
3969 switch (jk) {
3970 /* Keep this list in sync with that for Ist_Exit above */
3971 case Ijk_ClientReq:
3972 case Ijk_NoDecode:
3973 case Ijk_NoRedir:
3974 case Ijk_Sys_syscall:
3975 case Ijk_InvalICache:
3976 case Ijk_FlushDCache:
3977 case Ijk_SigTRAP:
3978 {
3979 HReg r = iselIntExpr_R(env, next);
3980 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
3981 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk));
3982 return;
3983 }
3984 default:
3985 break;
3986 }
3987
3988 vex_printf( "\n-- PUT(%d) = ", offsIP);
3989 ppIRExpr( next );
3990 vex_printf( "; exit-");
3991 ppIRJumpKind(jk);
3992 vex_printf( "\n");
3993 vassert(0); // are we expecting any other kind?
3994 }
3995
3996
3997 /*---------------------------------------------------------*/
3998 /*--- Insn selector top-level ---*/
3999 /*---------------------------------------------------------*/
4000
4001 /* Translate an entire SB to arm64 code. */
4002
iselSB_ARM64(const IRSB * bb,VexArch arch_host,const VexArchInfo * archinfo_host,const VexAbiInfo * vbi,Int offs_Host_EvC_Counter,Int offs_Host_EvC_FailAddr,Bool chainingAllowed,Bool addProfInc,Addr max_ga)4003 HInstrArray* iselSB_ARM64 ( const IRSB* bb,
4004 VexArch arch_host,
4005 const VexArchInfo* archinfo_host,
4006 const VexAbiInfo* vbi/*UNUSED*/,
4007 Int offs_Host_EvC_Counter,
4008 Int offs_Host_EvC_FailAddr,
4009 Bool chainingAllowed,
4010 Bool addProfInc,
4011 Addr max_ga )
4012 {
4013 Int i, j;
4014 HReg hreg, hregHI;
4015 ISelEnv* env;
4016 UInt hwcaps_host = archinfo_host->hwcaps;
4017 ARM64AMode *amCounter, *amFailAddr;
4018
4019 /* sanity ... */
4020 vassert(arch_host == VexArchARM64);
4021
4022 /* Check that the host's endianness is as expected. */
4023 vassert(archinfo_host->endness == VexEndnessLE);
4024
4025 /* guard against unexpected space regressions */
4026 vassert(sizeof(ARM64Instr) <= 32);
4027
4028 /* Make up an initial environment to use. */
4029 env = LibVEX_Alloc_inline(sizeof(ISelEnv));
4030 env->vreg_ctr = 0;
4031
4032 /* Set up output code array. */
4033 env->code = newHInstrArray();
4034
4035 /* Copy BB's type env. */
4036 env->type_env = bb->tyenv;
4037
4038 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4039 change as we go along. */
4040 env->n_vregmap = bb->tyenv->types_used;
4041 env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4042 env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4043
4044 /* and finally ... */
4045 env->chainingAllowed = chainingAllowed;
4046 env->hwcaps = hwcaps_host;
4047 env->previous_rm = NULL;
4048 env->max_ga = max_ga;
4049
4050 /* For each IR temporary, allocate a suitably-kinded virtual
4051 register. */
4052 j = 0;
4053 for (i = 0; i < env->n_vregmap; i++) {
4054 hregHI = hreg = INVALID_HREG;
4055 switch (bb->tyenv->types[i]) {
4056 case Ity_I1:
4057 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
4058 hreg = mkHReg(True, HRcInt64, 0, j++);
4059 break;
4060 case Ity_I128:
4061 hreg = mkHReg(True, HRcInt64, 0, j++);
4062 hregHI = mkHReg(True, HRcInt64, 0, j++);
4063 break;
4064 case Ity_F16: // we'll use HRcFlt64 regs for F16 too
4065 case Ity_F32: // we'll use HRcFlt64 regs for F32 too
4066 case Ity_F64:
4067 hreg = mkHReg(True, HRcFlt64, 0, j++);
4068 break;
4069 case Ity_V128:
4070 hreg = mkHReg(True, HRcVec128, 0, j++);
4071 break;
4072 case Ity_V256:
4073 hreg = mkHReg(True, HRcVec128, 0, j++);
4074 hregHI = mkHReg(True, HRcVec128, 0, j++);
4075 break;
4076 default:
4077 ppIRType(bb->tyenv->types[i]);
4078 vpanic("iselBB(arm64): IRTemp type");
4079 }
4080 env->vregmap[i] = hreg;
4081 env->vregmapHI[i] = hregHI;
4082 }
4083 env->vreg_ctr = j;
4084
4085 /* The very first instruction must be an event check. */
4086 amCounter = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter);
4087 amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr);
4088 addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr));
4089
4090 /* Possibly a block counter increment (for profiling). At this
4091 point we don't know the address of the counter, so just pretend
4092 it is zero. It will have to be patched later, but before this
4093 translation is used, by a call to LibVEX_patchProfCtr. */
4094 if (addProfInc) {
4095 addInstr(env, ARM64Instr_ProfInc());
4096 }
4097
4098 /* Ok, finally we can iterate over the statements. */
4099 for (i = 0; i < bb->stmts_used; i++)
4100 iselStmt(env, bb->stmts[i]);
4101
4102 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
4103
4104 /* record the number of vregs we used. */
4105 env->code->n_vregs = env->vreg_ctr;
4106 return env->code;
4107 }
4108
4109
4110 /*---------------------------------------------------------------*/
4111 /*--- end host_arm64_isel.c ---*/
4112 /*---------------------------------------------------------------*/
4113