1 /* -*- mode: C; c-basic-offset: 3; -*- */
2 
3 /*--------------------------------------------------------------------*/
4 /*--- begin                                     guest_arm64_toIR.c ---*/
5 /*--------------------------------------------------------------------*/
6 
7 /*
8    This file is part of Valgrind, a dynamic binary instrumentation
9    framework.
10 
11    Copyright (C) 2013-2013 OpenWorks
12       info@open-works.net
13 
14    This program is free software; you can redistribute it and/or
15    modify it under the terms of the GNU General Public License as
16    published by the Free Software Foundation; either version 2 of the
17    License, or (at your option) any later version.
18 
19    This program is distributed in the hope that it will be useful, but
20    WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    General Public License for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27    02110-1301, USA.
28 
29    The GNU General Public License is contained in the file COPYING.
30 */
31 
32 /* KNOWN LIMITATIONS 2014-Nov-16
33 
34    * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN.
35 
36      Also FP comparison "unordered" .. is implemented as normal FP
37      comparison.
38 
39      Both should be fixed.  They behave incorrectly in the presence of
40      NaNs.
41 
42      FMULX is treated the same as FMUL.  That's also not correct.
43 
44    * Floating multiply-add (etc) insns.  Are split into a multiply and
45      an add, and so suffer double rounding and hence sometimes the
46      least significant mantissa bit is incorrect.  Fix: use the IR
47      multiply-add IROps instead.
48 
49    * FRINTA, FRINTN are kludged .. they just round to nearest.  No special
50      handling for the "ties" case.  FRINTX might be dubious too.
51 
52    * Ditto FCVTXN.  No idea what "round to odd" means.  This implementation
53      just rounds to nearest.
54 */
55 
56 /* "Special" instructions.
57 
58    This instruction decoder can decode four special instructions
59    which mean nothing natively (are no-ops as far as regs/mem are
60    concerned) but have meaning for supporting Valgrind.  A special
61    instruction is flagged by a 16-byte preamble:
62 
63       93CC0D8C 93CC358C 93CCCD8C 93CCF58C
64       (ror x12, x12, #3;   ror x12, x12, #13
65        ror x12, x12, #51;  ror x12, x12, #61)
66 
67    Following that, one of the following 3 are allowed
68    (standard interpretation in parentheses):
69 
70       AA0A014A (orr x10,x10,x10)   X3 = client_request ( X4 )
71       AA0B016B (orr x11,x11,x11)   X3 = guest_NRADDR
72       AA0C018C (orr x12,x12,x12)   branch-and-link-to-noredir X8
73       AA090129 (orr x9,x9,x9)      IR injection
74 
75    Any other bytes following the 16-byte preamble are illegal and
76    constitute a failure in instruction decoding.  This all assumes
77    that the preamble will never occur except in specific code
78    fragments designed for Valgrind to catch.
79 */
80 
81 /* Translates ARM64 code to IR. */
82 
83 #include "libvex_basictypes.h"
84 #include "libvex_ir.h"
85 #include "libvex.h"
86 #include "libvex_guest_arm64.h"
87 
88 #include "main_util.h"
89 #include "main_globals.h"
90 #include "guest_generic_bb_to_IR.h"
91 #include "guest_arm64_defs.h"
92 
93 
94 /*------------------------------------------------------------*/
95 /*--- Globals                                              ---*/
96 /*------------------------------------------------------------*/
97 
98 /* These are set at the start of the translation of a instruction, so
99    that we don't have to pass them around endlessly.  CONST means does
100    not change during translation of the instruction.
101 */
102 
103 /* CONST: what is the host's endianness?  We need to know this in
104    order to do sub-register accesses to the SIMD/FP registers
105    correctly. */
106 static VexEndness host_endness;
107 
108 /* CONST: The guest address for the instruction currently being
109    translated.  */
110 static Addr64 guest_PC_curr_instr;
111 
112 /* MOD: The IRSB* into which we're generating code. */
113 static IRSB* irsb;
114 
115 
116 /*------------------------------------------------------------*/
117 /*--- Debugging output                                     ---*/
118 /*------------------------------------------------------------*/
119 
120 #define DIP(format, args...)           \
121    if (vex_traceflags & VEX_TRACE_FE)  \
122       vex_printf(format, ## args)
123 
124 #define DIS(buf, format, args...)      \
125    if (vex_traceflags & VEX_TRACE_FE)  \
126       vex_sprintf(buf, format, ## args)
127 
128 
129 /*------------------------------------------------------------*/
130 /*--- Helper bits and pieces for deconstructing the        ---*/
131 /*--- arm insn stream.                                     ---*/
132 /*------------------------------------------------------------*/
133 
134 /* Do a little-endian load of a 32-bit word, regardless of the
135    endianness of the underlying host. */
getUIntLittleEndianly(const UChar * p)136 static inline UInt getUIntLittleEndianly ( const UChar* p )
137 {
138    UInt w = 0;
139    w = (w << 8) | p[3];
140    w = (w << 8) | p[2];
141    w = (w << 8) | p[1];
142    w = (w << 8) | p[0];
143    return w;
144 }
145 
146 /* Sign extend a N-bit value up to 64 bits, by copying
147    bit N-1 into all higher positions. */
sx_to_64(ULong x,UInt n)148 static ULong sx_to_64 ( ULong x, UInt n )
149 {
150    vassert(n > 1 && n < 64);
151    Long r = (Long)x;
152    r = (r << (64-n)) >> (64-n);
153    return (ULong)r;
154 }
155 
156 //ZZ /* Do a little-endian load of a 16-bit word, regardless of the
157 //ZZ    endianness of the underlying host. */
158 //ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
159 //ZZ {
160 //ZZ    UShort w = 0;
161 //ZZ    w = (w << 8) | p[1];
162 //ZZ    w = (w << 8) | p[0];
163 //ZZ    return w;
164 //ZZ }
165 //ZZ
166 //ZZ static UInt ROR32 ( UInt x, UInt sh ) {
167 //ZZ    vassert(sh >= 0 && sh < 32);
168 //ZZ    if (sh == 0)
169 //ZZ       return x;
170 //ZZ    else
171 //ZZ       return (x << (32-sh)) | (x >> sh);
172 //ZZ }
173 //ZZ
174 //ZZ static Int popcount32 ( UInt x )
175 //ZZ {
176 //ZZ    Int res = 0, i;
177 //ZZ    for (i = 0; i < 32; i++) {
178 //ZZ       res += (x & 1);
179 //ZZ       x >>= 1;
180 //ZZ    }
181 //ZZ    return res;
182 //ZZ }
183 //ZZ
184 //ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
185 //ZZ {
186 //ZZ    UInt mask = 1 << ix;
187 //ZZ    x &= ~mask;
188 //ZZ    x |= ((b << ix) & mask);
189 //ZZ    return x;
190 //ZZ }
191 
192 #define BITS2(_b1,_b0)  \
193    (((_b1) << 1) | (_b0))
194 
195 #define BITS3(_b2,_b1,_b0)  \
196   (((_b2) << 2) | ((_b1) << 1) | (_b0))
197 
198 #define BITS4(_b3,_b2,_b1,_b0)  \
199    (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
200 
201 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
202    ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4)  \
203     | BITS4((_b3),(_b2),(_b1),(_b0)))
204 
205 #define BITS5(_b4,_b3,_b2,_b1,_b0)  \
206    (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
207 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
208    (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
209 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
210    (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
211 
212 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
213    (((_b8) << 8)  \
214     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
215 
216 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
217    (((_b9) << 9) | ((_b8) << 8)  \
218     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
219 
220 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
221    (((_b10) << 10)  \
222     | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
223 
224 #define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
225    (((_b11) << 11)  \
226     | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
227 
228 #define X00 BITS2(0,0)
229 #define X01 BITS2(0,1)
230 #define X10 BITS2(1,0)
231 #define X11 BITS2(1,1)
232 
233 // produces _uint[_bMax:_bMin]
234 #define SLICE_UInt(_uint,_bMax,_bMin)  \
235    (( ((UInt)(_uint)) >> (_bMin))  \
236     & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
237 
238 
239 /*------------------------------------------------------------*/
240 /*--- Helper bits and pieces for creating IR fragments.    ---*/
241 /*------------------------------------------------------------*/
242 
mkV128(UShort w)243 static IRExpr* mkV128 ( UShort w )
244 {
245    return IRExpr_Const(IRConst_V128(w));
246 }
247 
mkU64(ULong i)248 static IRExpr* mkU64 ( ULong i )
249 {
250    return IRExpr_Const(IRConst_U64(i));
251 }
252 
mkU32(UInt i)253 static IRExpr* mkU32 ( UInt i )
254 {
255    return IRExpr_Const(IRConst_U32(i));
256 }
257 
mkU16(UInt i)258 static IRExpr* mkU16 ( UInt i )
259 {
260    vassert(i < 65536);
261    return IRExpr_Const(IRConst_U16(i));
262 }
263 
mkU8(UInt i)264 static IRExpr* mkU8 ( UInt i )
265 {
266    vassert(i < 256);
267    return IRExpr_Const(IRConst_U8( (UChar)i ));
268 }
269 
mkexpr(IRTemp tmp)270 static IRExpr* mkexpr ( IRTemp tmp )
271 {
272    return IRExpr_RdTmp(tmp);
273 }
274 
unop(IROp op,IRExpr * a)275 static IRExpr* unop ( IROp op, IRExpr* a )
276 {
277    return IRExpr_Unop(op, a);
278 }
279 
binop(IROp op,IRExpr * a1,IRExpr * a2)280 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
281 {
282    return IRExpr_Binop(op, a1, a2);
283 }
284 
triop(IROp op,IRExpr * a1,IRExpr * a2,IRExpr * a3)285 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
286 {
287    return IRExpr_Triop(op, a1, a2, a3);
288 }
289 
loadLE(IRType ty,IRExpr * addr)290 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
291 {
292    return IRExpr_Load(Iend_LE, ty, addr);
293 }
294 
295 /* Add a statement to the list held by "irbb". */
stmt(IRStmt * st)296 static void stmt ( IRStmt* st )
297 {
298    addStmtToIRSB( irsb, st );
299 }
300 
assign(IRTemp dst,IRExpr * e)301 static void assign ( IRTemp dst, IRExpr* e )
302 {
303    stmt( IRStmt_WrTmp(dst, e) );
304 }
305 
storeLE(IRExpr * addr,IRExpr * data)306 static void storeLE ( IRExpr* addr, IRExpr* data )
307 {
308    stmt( IRStmt_Store(Iend_LE, addr, data) );
309 }
310 
311 //ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
312 //ZZ {
313 //ZZ    if (guardT == IRTemp_INVALID) {
314 //ZZ       /* unconditional */
315 //ZZ       storeLE(addr, data);
316 //ZZ    } else {
317 //ZZ       stmt( IRStmt_StoreG(Iend_LE, addr, data,
318 //ZZ                           binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
319 //ZZ    }
320 //ZZ }
321 //ZZ
322 //ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
323 //ZZ                             IRExpr* addr, IRExpr* alt,
324 //ZZ                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
325 //ZZ {
326 //ZZ    if (guardT == IRTemp_INVALID) {
327 //ZZ       /* unconditional */
328 //ZZ       IRExpr* loaded = NULL;
329 //ZZ       switch (cvt) {
330 //ZZ          case ILGop_Ident32:
331 //ZZ             loaded = loadLE(Ity_I32, addr); break;
332 //ZZ          case ILGop_8Uto32:
333 //ZZ             loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
334 //ZZ          case ILGop_8Sto32:
335 //ZZ             loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
336 //ZZ          case ILGop_16Uto32:
337 //ZZ             loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
338 //ZZ          case ILGop_16Sto32:
339 //ZZ             loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
340 //ZZ          default:
341 //ZZ             vassert(0);
342 //ZZ       }
343 //ZZ       vassert(loaded != NULL);
344 //ZZ       assign(dst, loaded);
345 //ZZ    } else {
346 //ZZ       /* Generate a guarded load into 'dst', but apply 'cvt' to the
347 //ZZ          loaded data before putting the data in 'dst'.  If the load
348 //ZZ          does not take place, 'alt' is placed directly in 'dst'. */
349 //ZZ       stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
350 //ZZ                          binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
351 //ZZ    }
352 //ZZ }
353 
354 /* Generate a new temporary of the given type. */
newTemp(IRType ty)355 static IRTemp newTemp ( IRType ty )
356 {
357    vassert(isPlausibleIRType(ty));
358    return newIRTemp( irsb->tyenv, ty );
359 }
360 
361 /* This is used in many places, so the brevity is an advantage. */
newTempV128(void)362 static IRTemp newTempV128(void)
363 {
364    return newTemp(Ity_V128);
365 }
366 
367 /* Initialise V128 temporaries en masse. */
368 static
newTempsV128_2(IRTemp * t1,IRTemp * t2)369 void newTempsV128_2(IRTemp* t1, IRTemp* t2)
370 {
371    vassert(t1 && *t1 == IRTemp_INVALID);
372    vassert(t2 && *t2 == IRTemp_INVALID);
373    *t1 = newTempV128();
374    *t2 = newTempV128();
375 }
376 
377 static
newTempsV128_3(IRTemp * t1,IRTemp * t2,IRTemp * t3)378 void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3)
379 {
380    vassert(t1 && *t1 == IRTemp_INVALID);
381    vassert(t2 && *t2 == IRTemp_INVALID);
382    vassert(t3 && *t3 == IRTemp_INVALID);
383    *t1 = newTempV128();
384    *t2 = newTempV128();
385    *t3 = newTempV128();
386 }
387 
388 static
newTempsV128_4(IRTemp * t1,IRTemp * t2,IRTemp * t3,IRTemp * t4)389 void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4)
390 {
391    vassert(t1 && *t1 == IRTemp_INVALID);
392    vassert(t2 && *t2 == IRTemp_INVALID);
393    vassert(t3 && *t3 == IRTemp_INVALID);
394    vassert(t4 && *t4 == IRTemp_INVALID);
395    *t1 = newTempV128();
396    *t2 = newTempV128();
397    *t3 = newTempV128();
398    *t4 = newTempV128();
399 }
400 
401 static
newTempsV128_7(IRTemp * t1,IRTemp * t2,IRTemp * t3,IRTemp * t4,IRTemp * t5,IRTemp * t6,IRTemp * t7)402 void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3,
403                     IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7)
404 {
405    vassert(t1 && *t1 == IRTemp_INVALID);
406    vassert(t2 && *t2 == IRTemp_INVALID);
407    vassert(t3 && *t3 == IRTemp_INVALID);
408    vassert(t4 && *t4 == IRTemp_INVALID);
409    vassert(t5 && *t5 == IRTemp_INVALID);
410    vassert(t6 && *t6 == IRTemp_INVALID);
411    vassert(t7 && *t7 == IRTemp_INVALID);
412    *t1 = newTempV128();
413    *t2 = newTempV128();
414    *t3 = newTempV128();
415    *t4 = newTempV128();
416    *t5 = newTempV128();
417    *t6 = newTempV128();
418    *t7 = newTempV128();
419 }
420 
421 //ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
422 //ZZ    IRRoundingMode. */
423 //ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
424 //ZZ {
425 //ZZ    return mkU32(Irrm_NEAREST);
426 //ZZ }
427 //ZZ
428 //ZZ /* Generate an expression for SRC rotated right by ROT. */
429 //ZZ static IRExpr* genROR32( IRTemp src, Int rot )
430 //ZZ {
431 //ZZ    vassert(rot >= 0 && rot < 32);
432 //ZZ    if (rot == 0)
433 //ZZ       return mkexpr(src);
434 //ZZ    return
435 //ZZ       binop(Iop_Or32,
436 //ZZ             binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
437 //ZZ             binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
438 //ZZ }
439 //ZZ
440 //ZZ static IRExpr* mkU128 ( ULong i )
441 //ZZ {
442 //ZZ    return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
443 //ZZ }
444 //ZZ
445 //ZZ /* Generate a 4-aligned version of the given expression if
446 //ZZ    the given condition is true.  Else return it unchanged. */
447 //ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
448 //ZZ {
449 //ZZ    if (b)
450 //ZZ       return binop(Iop_And32, e, mkU32(~3));
451 //ZZ    else
452 //ZZ       return e;
453 //ZZ }
454 
455 /* Other IR construction helpers. */
mkAND(IRType ty)456 static IROp mkAND ( IRType ty ) {
457    switch (ty) {
458       case Ity_I32: return Iop_And32;
459       case Ity_I64: return Iop_And64;
460       default: vpanic("mkAND");
461    }
462 }
463 
mkOR(IRType ty)464 static IROp mkOR ( IRType ty ) {
465    switch (ty) {
466       case Ity_I32: return Iop_Or32;
467       case Ity_I64: return Iop_Or64;
468       default: vpanic("mkOR");
469    }
470 }
471 
mkXOR(IRType ty)472 static IROp mkXOR ( IRType ty ) {
473    switch (ty) {
474       case Ity_I32: return Iop_Xor32;
475       case Ity_I64: return Iop_Xor64;
476       default: vpanic("mkXOR");
477    }
478 }
479 
mkSHL(IRType ty)480 static IROp mkSHL ( IRType ty ) {
481    switch (ty) {
482       case Ity_I32: return Iop_Shl32;
483       case Ity_I64: return Iop_Shl64;
484       default: vpanic("mkSHL");
485    }
486 }
487 
mkSHR(IRType ty)488 static IROp mkSHR ( IRType ty ) {
489    switch (ty) {
490       case Ity_I32: return Iop_Shr32;
491       case Ity_I64: return Iop_Shr64;
492       default: vpanic("mkSHR");
493    }
494 }
495 
mkSAR(IRType ty)496 static IROp mkSAR ( IRType ty ) {
497    switch (ty) {
498       case Ity_I32: return Iop_Sar32;
499       case Ity_I64: return Iop_Sar64;
500       default: vpanic("mkSAR");
501    }
502 }
503 
mkNOT(IRType ty)504 static IROp mkNOT ( IRType ty ) {
505    switch (ty) {
506       case Ity_I32: return Iop_Not32;
507       case Ity_I64: return Iop_Not64;
508       default: vpanic("mkNOT");
509    }
510 }
511 
mkADD(IRType ty)512 static IROp mkADD ( IRType ty ) {
513    switch (ty) {
514       case Ity_I32: return Iop_Add32;
515       case Ity_I64: return Iop_Add64;
516       default: vpanic("mkADD");
517    }
518 }
519 
mkSUB(IRType ty)520 static IROp mkSUB ( IRType ty ) {
521    switch (ty) {
522       case Ity_I32: return Iop_Sub32;
523       case Ity_I64: return Iop_Sub64;
524       default: vpanic("mkSUB");
525    }
526 }
527 
mkADDF(IRType ty)528 static IROp mkADDF ( IRType ty ) {
529    switch (ty) {
530       case Ity_F32: return Iop_AddF32;
531       case Ity_F64: return Iop_AddF64;
532       default: vpanic("mkADDF");
533    }
534 }
535 
mkSUBF(IRType ty)536 static IROp mkSUBF ( IRType ty ) {
537    switch (ty) {
538       case Ity_F32: return Iop_SubF32;
539       case Ity_F64: return Iop_SubF64;
540       default: vpanic("mkSUBF");
541    }
542 }
543 
mkMULF(IRType ty)544 static IROp mkMULF ( IRType ty ) {
545    switch (ty) {
546       case Ity_F32: return Iop_MulF32;
547       case Ity_F64: return Iop_MulF64;
548       default: vpanic("mkMULF");
549    }
550 }
551 
mkDIVF(IRType ty)552 static IROp mkDIVF ( IRType ty ) {
553    switch (ty) {
554       case Ity_F32: return Iop_DivF32;
555       case Ity_F64: return Iop_DivF64;
556       default: vpanic("mkMULF");
557    }
558 }
559 
mkNEGF(IRType ty)560 static IROp mkNEGF ( IRType ty ) {
561    switch (ty) {
562       case Ity_F32: return Iop_NegF32;
563       case Ity_F64: return Iop_NegF64;
564       default: vpanic("mkNEGF");
565    }
566 }
567 
mkABSF(IRType ty)568 static IROp mkABSF ( IRType ty ) {
569    switch (ty) {
570       case Ity_F32: return Iop_AbsF32;
571       case Ity_F64: return Iop_AbsF64;
572       default: vpanic("mkNEGF");
573    }
574 }
575 
mkSQRTF(IRType ty)576 static IROp mkSQRTF ( IRType ty ) {
577    switch (ty) {
578       case Ity_F32: return Iop_SqrtF32;
579       case Ity_F64: return Iop_SqrtF64;
580       default: vpanic("mkNEGF");
581    }
582 }
583 
mkVecADD(UInt size)584 static IROp mkVecADD ( UInt size ) {
585    const IROp ops[4]
586       = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
587    vassert(size < 4);
588    return ops[size];
589 }
590 
mkVecQADDU(UInt size)591 static IROp mkVecQADDU ( UInt size ) {
592    const IROp ops[4]
593       = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 };
594    vassert(size < 4);
595    return ops[size];
596 }
597 
mkVecQADDS(UInt size)598 static IROp mkVecQADDS ( UInt size ) {
599    const IROp ops[4]
600       = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 };
601    vassert(size < 4);
602    return ops[size];
603 }
604 
mkVecQADDEXTSUSATUU(UInt size)605 static IROp mkVecQADDEXTSUSATUU ( UInt size ) {
606    const IROp ops[4]
607       = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
608           Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 };
609    vassert(size < 4);
610    return ops[size];
611 }
612 
mkVecQADDEXTUSSATSS(UInt size)613 static IROp mkVecQADDEXTUSSATSS ( UInt size ) {
614    const IROp ops[4]
615       = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
616           Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 };
617    vassert(size < 4);
618    return ops[size];
619 }
620 
mkVecSUB(UInt size)621 static IROp mkVecSUB ( UInt size ) {
622    const IROp ops[4]
623       = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
624    vassert(size < 4);
625    return ops[size];
626 }
627 
mkVecQSUBU(UInt size)628 static IROp mkVecQSUBU ( UInt size ) {
629    const IROp ops[4]
630       = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 };
631    vassert(size < 4);
632    return ops[size];
633 }
634 
mkVecQSUBS(UInt size)635 static IROp mkVecQSUBS ( UInt size ) {
636    const IROp ops[4]
637       = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 };
638    vassert(size < 4);
639    return ops[size];
640 }
641 
mkVecSARN(UInt size)642 static IROp mkVecSARN ( UInt size ) {
643    const IROp ops[4]
644       = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
645    vassert(size < 4);
646    return ops[size];
647 }
648 
mkVecSHRN(UInt size)649 static IROp mkVecSHRN ( UInt size ) {
650    const IROp ops[4]
651       = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
652    vassert(size < 4);
653    return ops[size];
654 }
655 
mkVecSHLN(UInt size)656 static IROp mkVecSHLN ( UInt size ) {
657    const IROp ops[4]
658       = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
659    vassert(size < 4);
660    return ops[size];
661 }
662 
mkVecCATEVENLANES(UInt size)663 static IROp mkVecCATEVENLANES ( UInt size ) {
664    const IROp ops[4]
665       = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8,
666           Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 };
667    vassert(size < 4);
668    return ops[size];
669 }
670 
mkVecCATODDLANES(UInt size)671 static IROp mkVecCATODDLANES ( UInt size ) {
672    const IROp ops[4]
673       = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8,
674           Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 };
675    vassert(size < 4);
676    return ops[size];
677 }
678 
mkVecINTERLEAVELO(UInt size)679 static IROp mkVecINTERLEAVELO ( UInt size ) {
680    const IROp ops[4]
681       = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
682           Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 };
683    vassert(size < 4);
684    return ops[size];
685 }
686 
mkVecINTERLEAVEHI(UInt size)687 static IROp mkVecINTERLEAVEHI ( UInt size ) {
688    const IROp ops[4]
689       = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
690           Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 };
691    vassert(size < 4);
692    return ops[size];
693 }
694 
mkVecMAXU(UInt size)695 static IROp mkVecMAXU ( UInt size ) {
696    const IROp ops[4]
697       = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
698    vassert(size < 4);
699    return ops[size];
700 }
701 
mkVecMAXS(UInt size)702 static IROp mkVecMAXS ( UInt size ) {
703    const IROp ops[4]
704       = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
705    vassert(size < 4);
706    return ops[size];
707 }
708 
mkVecMINU(UInt size)709 static IROp mkVecMINU ( UInt size ) {
710    const IROp ops[4]
711       = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
712    vassert(size < 4);
713    return ops[size];
714 }
715 
mkVecMINS(UInt size)716 static IROp mkVecMINS ( UInt size ) {
717    const IROp ops[4]
718       = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
719    vassert(size < 4);
720    return ops[size];
721 }
722 
mkVecMUL(UInt size)723 static IROp mkVecMUL ( UInt size ) {
724    const IROp ops[4]
725       = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
726    vassert(size < 3);
727    return ops[size];
728 }
729 
mkVecMULLU(UInt sizeNarrow)730 static IROp mkVecMULLU ( UInt sizeNarrow ) {
731    const IROp ops[4]
732       = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID };
733    vassert(sizeNarrow < 3);
734    return ops[sizeNarrow];
735 }
736 
mkVecMULLS(UInt sizeNarrow)737 static IROp mkVecMULLS ( UInt sizeNarrow ) {
738    const IROp ops[4]
739       = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID };
740    vassert(sizeNarrow < 3);
741    return ops[sizeNarrow];
742 }
743 
mkVecQDMULLS(UInt sizeNarrow)744 static IROp mkVecQDMULLS ( UInt sizeNarrow ) {
745    const IROp ops[4]
746       = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID };
747    vassert(sizeNarrow < 3);
748    return ops[sizeNarrow];
749 }
750 
mkVecCMPEQ(UInt size)751 static IROp mkVecCMPEQ ( UInt size ) {
752    const IROp ops[4]
753       = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
754    vassert(size < 4);
755    return ops[size];
756 }
757 
mkVecCMPGTU(UInt size)758 static IROp mkVecCMPGTU ( UInt size ) {
759    const IROp ops[4]
760       = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
761    vassert(size < 4);
762    return ops[size];
763 }
764 
mkVecCMPGTS(UInt size)765 static IROp mkVecCMPGTS ( UInt size ) {
766    const IROp ops[4]
767       = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
768    vassert(size < 4);
769    return ops[size];
770 }
771 
mkVecABS(UInt size)772 static IROp mkVecABS ( UInt size ) {
773    const IROp ops[4]
774       = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 };
775    vassert(size < 4);
776    return ops[size];
777 }
778 
mkVecZEROHIxxOFV128(UInt size)779 static IROp mkVecZEROHIxxOFV128 ( UInt size ) {
780    const IROp ops[4]
781       = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128,
782           Iop_ZeroHI96ofV128,  Iop_ZeroHI64ofV128 };
783    vassert(size < 4);
784    return ops[size];
785 }
786 
mkU(IRType ty,ULong imm)787 static IRExpr* mkU ( IRType ty, ULong imm ) {
788    switch (ty) {
789       case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
790       case Ity_I64: return mkU64(imm);
791       default: vpanic("mkU");
792    }
793 }
794 
mkVecQDMULHIS(UInt size)795 static IROp mkVecQDMULHIS ( UInt size ) {
796    const IROp ops[4]
797       = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID };
798    vassert(size < 4);
799    return ops[size];
800 }
801 
mkVecQRDMULHIS(UInt size)802 static IROp mkVecQRDMULHIS ( UInt size ) {
803    const IROp ops[4]
804       = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID };
805    vassert(size < 4);
806    return ops[size];
807 }
808 
mkVecQANDUQSH(UInt size)809 static IROp mkVecQANDUQSH ( UInt size ) {
810    const IROp ops[4]
811       = { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
812           Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
813    vassert(size < 4);
814    return ops[size];
815 }
816 
mkVecQANDSQSH(UInt size)817 static IROp mkVecQANDSQSH ( UInt size ) {
818    const IROp ops[4]
819       = { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
820           Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
821    vassert(size < 4);
822    return ops[size];
823 }
824 
mkVecQANDUQRSH(UInt size)825 static IROp mkVecQANDUQRSH ( UInt size ) {
826    const IROp ops[4]
827       = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
828           Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
829    vassert(size < 4);
830    return ops[size];
831 }
832 
mkVecQANDSQRSH(UInt size)833 static IROp mkVecQANDSQRSH ( UInt size ) {
834    const IROp ops[4]
835       = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
836           Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
837    vassert(size < 4);
838    return ops[size];
839 }
840 
mkVecSHU(UInt size)841 static IROp mkVecSHU ( UInt size ) {
842    const IROp ops[4]
843       = { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 };
844    vassert(size < 4);
845    return ops[size];
846 }
847 
mkVecSHS(UInt size)848 static IROp mkVecSHS ( UInt size ) {
849    const IROp ops[4]
850       = { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 };
851    vassert(size < 4);
852    return ops[size];
853 }
854 
mkVecRSHU(UInt size)855 static IROp mkVecRSHU ( UInt size ) {
856    const IROp ops[4]
857       = { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 };
858    vassert(size < 4);
859    return ops[size];
860 }
861 
mkVecRSHS(UInt size)862 static IROp mkVecRSHS ( UInt size ) {
863    const IROp ops[4]
864       = { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 };
865    vassert(size < 4);
866    return ops[size];
867 }
868 
mkVecNARROWUN(UInt sizeNarrow)869 static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
870    const IROp ops[4]
871       = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
872           Iop_NarrowUn64to32x2, Iop_INVALID };
873    vassert(sizeNarrow < 4);
874    return ops[sizeNarrow];
875 }
876 
mkVecQNARROWUNSU(UInt sizeNarrow)877 static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
878    const IROp ops[4]
879       = { Iop_QNarrowUn16Sto8Ux8,  Iop_QNarrowUn32Sto16Ux4,
880           Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
881    vassert(sizeNarrow < 4);
882    return ops[sizeNarrow];
883 }
884 
mkVecQNARROWUNSS(UInt sizeNarrow)885 static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
886    const IROp ops[4]
887       = { Iop_QNarrowUn16Sto8Sx8,  Iop_QNarrowUn32Sto16Sx4,
888           Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
889    vassert(sizeNarrow < 4);
890    return ops[sizeNarrow];
891 }
892 
mkVecQNARROWUNUU(UInt sizeNarrow)893 static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
894    const IROp ops[4]
895       = { Iop_QNarrowUn16Uto8Ux8,  Iop_QNarrowUn32Uto16Ux4,
896           Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
897    vassert(sizeNarrow < 4);
898    return ops[sizeNarrow];
899 }
900 
mkVecQANDqshrNNARROWUU(UInt sizeNarrow)901 static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
902    const IROp ops[4]
903       = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
904           Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
905    vassert(sizeNarrow < 4);
906    return ops[sizeNarrow];
907 }
908 
mkVecQANDqsarNNARROWSS(UInt sizeNarrow)909 static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
910    const IROp ops[4]
911       = { Iop_QandQSarNnarrow16Sto8Sx8,  Iop_QandQSarNnarrow32Sto16Sx4,
912           Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
913    vassert(sizeNarrow < 4);
914    return ops[sizeNarrow];
915 }
916 
mkVecQANDqsarNNARROWSU(UInt sizeNarrow)917 static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
918    const IROp ops[4]
919       = { Iop_QandQSarNnarrow16Sto8Ux8,  Iop_QandQSarNnarrow32Sto16Ux4,
920           Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
921    vassert(sizeNarrow < 4);
922    return ops[sizeNarrow];
923 }
924 
mkVecQANDqrshrNNARROWUU(UInt sizeNarrow)925 static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
926    const IROp ops[4]
927       = { Iop_QandQRShrNnarrow16Uto8Ux8,  Iop_QandQRShrNnarrow32Uto16Ux4,
928           Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
929    vassert(sizeNarrow < 4);
930    return ops[sizeNarrow];
931 }
932 
mkVecQANDqrsarNNARROWSS(UInt sizeNarrow)933 static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
934    const IROp ops[4]
935       = { Iop_QandQRSarNnarrow16Sto8Sx8,  Iop_QandQRSarNnarrow32Sto16Sx4,
936           Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
937    vassert(sizeNarrow < 4);
938    return ops[sizeNarrow];
939 }
940 
mkVecQANDqrsarNNARROWSU(UInt sizeNarrow)941 static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
942    const IROp ops[4]
943       = { Iop_QandQRSarNnarrow16Sto8Ux8,  Iop_QandQRSarNnarrow32Sto16Ux4,
944           Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
945    vassert(sizeNarrow < 4);
946    return ops[sizeNarrow];
947 }
948 
mkVecQSHLNSATUU(UInt size)949 static IROp mkVecQSHLNSATUU ( UInt size ) {
950    const IROp ops[4]
951       = { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8,
952           Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 };
953    vassert(size < 4);
954    return ops[size];
955 }
956 
mkVecQSHLNSATSS(UInt size)957 static IROp mkVecQSHLNSATSS ( UInt size ) {
958    const IROp ops[4]
959       = { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8,
960           Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 };
961    vassert(size < 4);
962    return ops[size];
963 }
964 
mkVecQSHLNSATSU(UInt size)965 static IROp mkVecQSHLNSATSU ( UInt size ) {
966    const IROp ops[4]
967       = { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8,
968           Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 };
969    vassert(size < 4);
970    return ops[size];
971 }
972 
mkVecADDF(UInt size)973 static IROp mkVecADDF ( UInt size ) {
974    const IROp ops[4]
975       = { Iop_INVALID, Iop_INVALID, Iop_Add32Fx4, Iop_Add64Fx2 };
976    vassert(size < 4);
977    return ops[size];
978 }
979 
mkVecMAXF(UInt size)980 static IROp mkVecMAXF ( UInt size ) {
981    const IROp ops[4]
982       = { Iop_INVALID, Iop_INVALID, Iop_Max32Fx4, Iop_Max64Fx2 };
983    vassert(size < 4);
984    return ops[size];
985 }
986 
mkVecMINF(UInt size)987 static IROp mkVecMINF ( UInt size ) {
988    const IROp ops[4]
989       = { Iop_INVALID, Iop_INVALID, Iop_Min32Fx4, Iop_Min64Fx2 };
990    vassert(size < 4);
991    return ops[size];
992 }
993 
994 /* Generate IR to create 'arg rotated right by imm', for sane values
995    of 'ty' and 'imm'. */
mathROR(IRType ty,IRTemp arg,UInt imm)996 static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
997 {
998    UInt w = 0;
999    if (ty == Ity_I64) {
1000       w = 64;
1001    } else {
1002       vassert(ty == Ity_I32);
1003       w = 32;
1004    }
1005    vassert(w != 0);
1006    vassert(imm < w);
1007    if (imm == 0) {
1008       return arg;
1009    }
1010    IRTemp res = newTemp(ty);
1011    assign(res, binop(mkOR(ty),
1012                      binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
1013                      binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
1014    return res;
1015 }
1016 
1017 /* Generate IR to set the returned temp to either all-zeroes or
1018    all ones, as a copy of arg<imm>. */
mathREPLICATE(IRType ty,IRTemp arg,UInt imm)1019 static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
1020 {
1021    UInt w = 0;
1022    if (ty == Ity_I64) {
1023       w = 64;
1024    } else {
1025       vassert(ty == Ity_I32);
1026       w = 32;
1027    }
1028    vassert(w != 0);
1029    vassert(imm < w);
1030    IRTemp res = newTemp(ty);
1031    assign(res, binop(mkSAR(ty),
1032                      binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
1033                      mkU8(w - 1)));
1034    return res;
1035 }
1036 
1037 /* U-widen 8/16/32/64 bit int expr to 64. */
widenUto64(IRType srcTy,IRExpr * e)1038 static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
1039 {
1040    switch (srcTy) {
1041       case Ity_I64: return e;
1042       case Ity_I32: return unop(Iop_32Uto64, e);
1043       case Ity_I16: return unop(Iop_16Uto64, e);
1044       case Ity_I8:  return unop(Iop_8Uto64, e);
1045       default: vpanic("widenUto64(arm64)");
1046    }
1047 }
1048 
1049 /* Narrow 64 bit int expr to 8/16/32/64.  Clearly only some
1050    of these combinations make sense. */
narrowFrom64(IRType dstTy,IRExpr * e)1051 static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
1052 {
1053    switch (dstTy) {
1054       case Ity_I64: return e;
1055       case Ity_I32: return unop(Iop_64to32, e);
1056       case Ity_I16: return unop(Iop_64to16, e);
1057       case Ity_I8:  return unop(Iop_64to8, e);
1058       default: vpanic("narrowFrom64(arm64)");
1059    }
1060 }
1061 
1062 
1063 /*------------------------------------------------------------*/
1064 /*--- Helpers for accessing guest registers.               ---*/
1065 /*------------------------------------------------------------*/
1066 
1067 #define OFFB_X0       offsetof(VexGuestARM64State,guest_X0)
1068 #define OFFB_X1       offsetof(VexGuestARM64State,guest_X1)
1069 #define OFFB_X2       offsetof(VexGuestARM64State,guest_X2)
1070 #define OFFB_X3       offsetof(VexGuestARM64State,guest_X3)
1071 #define OFFB_X4       offsetof(VexGuestARM64State,guest_X4)
1072 #define OFFB_X5       offsetof(VexGuestARM64State,guest_X5)
1073 #define OFFB_X6       offsetof(VexGuestARM64State,guest_X6)
1074 #define OFFB_X7       offsetof(VexGuestARM64State,guest_X7)
1075 #define OFFB_X8       offsetof(VexGuestARM64State,guest_X8)
1076 #define OFFB_X9       offsetof(VexGuestARM64State,guest_X9)
1077 #define OFFB_X10      offsetof(VexGuestARM64State,guest_X10)
1078 #define OFFB_X11      offsetof(VexGuestARM64State,guest_X11)
1079 #define OFFB_X12      offsetof(VexGuestARM64State,guest_X12)
1080 #define OFFB_X13      offsetof(VexGuestARM64State,guest_X13)
1081 #define OFFB_X14      offsetof(VexGuestARM64State,guest_X14)
1082 #define OFFB_X15      offsetof(VexGuestARM64State,guest_X15)
1083 #define OFFB_X16      offsetof(VexGuestARM64State,guest_X16)
1084 #define OFFB_X17      offsetof(VexGuestARM64State,guest_X17)
1085 #define OFFB_X18      offsetof(VexGuestARM64State,guest_X18)
1086 #define OFFB_X19      offsetof(VexGuestARM64State,guest_X19)
1087 #define OFFB_X20      offsetof(VexGuestARM64State,guest_X20)
1088 #define OFFB_X21      offsetof(VexGuestARM64State,guest_X21)
1089 #define OFFB_X22      offsetof(VexGuestARM64State,guest_X22)
1090 #define OFFB_X23      offsetof(VexGuestARM64State,guest_X23)
1091 #define OFFB_X24      offsetof(VexGuestARM64State,guest_X24)
1092 #define OFFB_X25      offsetof(VexGuestARM64State,guest_X25)
1093 #define OFFB_X26      offsetof(VexGuestARM64State,guest_X26)
1094 #define OFFB_X27      offsetof(VexGuestARM64State,guest_X27)
1095 #define OFFB_X28      offsetof(VexGuestARM64State,guest_X28)
1096 #define OFFB_X29      offsetof(VexGuestARM64State,guest_X29)
1097 #define OFFB_X30      offsetof(VexGuestARM64State,guest_X30)
1098 
1099 #define OFFB_XSP      offsetof(VexGuestARM64State,guest_XSP)
1100 #define OFFB_PC       offsetof(VexGuestARM64State,guest_PC)
1101 
1102 #define OFFB_CC_OP    offsetof(VexGuestARM64State,guest_CC_OP)
1103 #define OFFB_CC_DEP1  offsetof(VexGuestARM64State,guest_CC_DEP1)
1104 #define OFFB_CC_DEP2  offsetof(VexGuestARM64State,guest_CC_DEP2)
1105 #define OFFB_CC_NDEP  offsetof(VexGuestARM64State,guest_CC_NDEP)
1106 
1107 #define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
1108 #define OFFB_NRADDR   offsetof(VexGuestARM64State,guest_NRADDR)
1109 
1110 #define OFFB_Q0       offsetof(VexGuestARM64State,guest_Q0)
1111 #define OFFB_Q1       offsetof(VexGuestARM64State,guest_Q1)
1112 #define OFFB_Q2       offsetof(VexGuestARM64State,guest_Q2)
1113 #define OFFB_Q3       offsetof(VexGuestARM64State,guest_Q3)
1114 #define OFFB_Q4       offsetof(VexGuestARM64State,guest_Q4)
1115 #define OFFB_Q5       offsetof(VexGuestARM64State,guest_Q5)
1116 #define OFFB_Q6       offsetof(VexGuestARM64State,guest_Q6)
1117 #define OFFB_Q7       offsetof(VexGuestARM64State,guest_Q7)
1118 #define OFFB_Q8       offsetof(VexGuestARM64State,guest_Q8)
1119 #define OFFB_Q9       offsetof(VexGuestARM64State,guest_Q9)
1120 #define OFFB_Q10      offsetof(VexGuestARM64State,guest_Q10)
1121 #define OFFB_Q11      offsetof(VexGuestARM64State,guest_Q11)
1122 #define OFFB_Q12      offsetof(VexGuestARM64State,guest_Q12)
1123 #define OFFB_Q13      offsetof(VexGuestARM64State,guest_Q13)
1124 #define OFFB_Q14      offsetof(VexGuestARM64State,guest_Q14)
1125 #define OFFB_Q15      offsetof(VexGuestARM64State,guest_Q15)
1126 #define OFFB_Q16      offsetof(VexGuestARM64State,guest_Q16)
1127 #define OFFB_Q17      offsetof(VexGuestARM64State,guest_Q17)
1128 #define OFFB_Q18      offsetof(VexGuestARM64State,guest_Q18)
1129 #define OFFB_Q19      offsetof(VexGuestARM64State,guest_Q19)
1130 #define OFFB_Q20      offsetof(VexGuestARM64State,guest_Q20)
1131 #define OFFB_Q21      offsetof(VexGuestARM64State,guest_Q21)
1132 #define OFFB_Q22      offsetof(VexGuestARM64State,guest_Q22)
1133 #define OFFB_Q23      offsetof(VexGuestARM64State,guest_Q23)
1134 #define OFFB_Q24      offsetof(VexGuestARM64State,guest_Q24)
1135 #define OFFB_Q25      offsetof(VexGuestARM64State,guest_Q25)
1136 #define OFFB_Q26      offsetof(VexGuestARM64State,guest_Q26)
1137 #define OFFB_Q27      offsetof(VexGuestARM64State,guest_Q27)
1138 #define OFFB_Q28      offsetof(VexGuestARM64State,guest_Q28)
1139 #define OFFB_Q29      offsetof(VexGuestARM64State,guest_Q29)
1140 #define OFFB_Q30      offsetof(VexGuestARM64State,guest_Q30)
1141 #define OFFB_Q31      offsetof(VexGuestARM64State,guest_Q31)
1142 
1143 #define OFFB_FPCR     offsetof(VexGuestARM64State,guest_FPCR)
1144 #define OFFB_QCFLAG   offsetof(VexGuestARM64State,guest_QCFLAG)
1145 
1146 #define OFFB_CMSTART  offsetof(VexGuestARM64State,guest_CMSTART)
1147 #define OFFB_CMLEN    offsetof(VexGuestARM64State,guest_CMLEN)
1148 
1149 
1150 /* ---------------- Integer registers ---------------- */
1151 
offsetIReg64(UInt iregNo)1152 static Int offsetIReg64 ( UInt iregNo )
1153 {
1154    /* Do we care about endianness here?  We do if sub-parts of integer
1155       registers are accessed. */
1156    switch (iregNo) {
1157       case 0:  return OFFB_X0;
1158       case 1:  return OFFB_X1;
1159       case 2:  return OFFB_X2;
1160       case 3:  return OFFB_X3;
1161       case 4:  return OFFB_X4;
1162       case 5:  return OFFB_X5;
1163       case 6:  return OFFB_X6;
1164       case 7:  return OFFB_X7;
1165       case 8:  return OFFB_X8;
1166       case 9:  return OFFB_X9;
1167       case 10: return OFFB_X10;
1168       case 11: return OFFB_X11;
1169       case 12: return OFFB_X12;
1170       case 13: return OFFB_X13;
1171       case 14: return OFFB_X14;
1172       case 15: return OFFB_X15;
1173       case 16: return OFFB_X16;
1174       case 17: return OFFB_X17;
1175       case 18: return OFFB_X18;
1176       case 19: return OFFB_X19;
1177       case 20: return OFFB_X20;
1178       case 21: return OFFB_X21;
1179       case 22: return OFFB_X22;
1180       case 23: return OFFB_X23;
1181       case 24: return OFFB_X24;
1182       case 25: return OFFB_X25;
1183       case 26: return OFFB_X26;
1184       case 27: return OFFB_X27;
1185       case 28: return OFFB_X28;
1186       case 29: return OFFB_X29;
1187       case 30: return OFFB_X30;
1188       /* but not 31 */
1189       default: vassert(0);
1190    }
1191 }
1192 
offsetIReg64orSP(UInt iregNo)1193 static Int offsetIReg64orSP ( UInt iregNo )
1194 {
1195    return iregNo == 31  ? OFFB_XSP  : offsetIReg64(iregNo);
1196 }
1197 
nameIReg64orZR(UInt iregNo)1198 static const HChar* nameIReg64orZR ( UInt iregNo )
1199 {
1200    vassert(iregNo < 32);
1201    static const HChar* names[32]
1202       = { "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",
1203           "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
1204           "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
1205           "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
1206    return names[iregNo];
1207 }
1208 
nameIReg64orSP(UInt iregNo)1209 static const HChar* nameIReg64orSP ( UInt iregNo )
1210 {
1211    if (iregNo == 31) {
1212       return "sp";
1213    }
1214    vassert(iregNo < 31);
1215    return nameIReg64orZR(iregNo);
1216 }
1217 
getIReg64orSP(UInt iregNo)1218 static IRExpr* getIReg64orSP ( UInt iregNo )
1219 {
1220    vassert(iregNo < 32);
1221    return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1222 }
1223 
getIReg64orZR(UInt iregNo)1224 static IRExpr* getIReg64orZR ( UInt iregNo )
1225 {
1226    if (iregNo == 31) {
1227       return mkU64(0);
1228    }
1229    vassert(iregNo < 31);
1230    return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1231 }
1232 
putIReg64orSP(UInt iregNo,IRExpr * e)1233 static void putIReg64orSP ( UInt iregNo, IRExpr* e )
1234 {
1235    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1236    stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1237 }
1238 
putIReg64orZR(UInt iregNo,IRExpr * e)1239 static void putIReg64orZR ( UInt iregNo, IRExpr* e )
1240 {
1241    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1242    if (iregNo == 31) {
1243       return;
1244    }
1245    vassert(iregNo < 31);
1246    stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1247 }
1248 
nameIReg32orZR(UInt iregNo)1249 static const HChar* nameIReg32orZR ( UInt iregNo )
1250 {
1251    vassert(iregNo < 32);
1252    static const HChar* names[32]
1253       = { "w0",  "w1",  "w2",  "w3",  "w4",  "w5",  "w6",  "w7",
1254           "w8",  "w9",  "w10", "w11", "w12", "w13", "w14", "w15",
1255           "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
1256           "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
1257    return names[iregNo];
1258 }
1259 
nameIReg32orSP(UInt iregNo)1260 static const HChar* nameIReg32orSP ( UInt iregNo )
1261 {
1262    if (iregNo == 31) {
1263       return "wsp";
1264    }
1265    vassert(iregNo < 31);
1266    return nameIReg32orZR(iregNo);
1267 }
1268 
getIReg32orSP(UInt iregNo)1269 static IRExpr* getIReg32orSP ( UInt iregNo )
1270 {
1271    vassert(iregNo < 32);
1272    return unop(Iop_64to32,
1273                IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1274 }
1275 
getIReg32orZR(UInt iregNo)1276 static IRExpr* getIReg32orZR ( UInt iregNo )
1277 {
1278    if (iregNo == 31) {
1279       return mkU32(0);
1280    }
1281    vassert(iregNo < 31);
1282    return unop(Iop_64to32,
1283                IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1284 }
1285 
putIReg32orSP(UInt iregNo,IRExpr * e)1286 static void putIReg32orSP ( UInt iregNo, IRExpr* e )
1287 {
1288    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1289    stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1290 }
1291 
putIReg32orZR(UInt iregNo,IRExpr * e)1292 static void putIReg32orZR ( UInt iregNo, IRExpr* e )
1293 {
1294    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1295    if (iregNo == 31) {
1296       return;
1297    }
1298    vassert(iregNo < 31);
1299    stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1300 }
1301 
nameIRegOrSP(Bool is64,UInt iregNo)1302 static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
1303 {
1304    vassert(is64 == True || is64 == False);
1305    return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
1306 }
1307 
nameIRegOrZR(Bool is64,UInt iregNo)1308 static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
1309 {
1310    vassert(is64 == True || is64 == False);
1311    return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
1312 }
1313 
getIRegOrZR(Bool is64,UInt iregNo)1314 static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
1315 {
1316    vassert(is64 == True || is64 == False);
1317    return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
1318 }
1319 
putIRegOrZR(Bool is64,UInt iregNo,IRExpr * e)1320 static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
1321 {
1322    vassert(is64 == True || is64 == False);
1323    if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
1324 }
1325 
putPC(IRExpr * e)1326 static void putPC ( IRExpr* e )
1327 {
1328    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1329    stmt( IRStmt_Put(OFFB_PC, e) );
1330 }
1331 
1332 
1333 /* ---------------- Vector (Q) registers ---------------- */
1334 
offsetQReg128(UInt qregNo)1335 static Int offsetQReg128 ( UInt qregNo )
1336 {
1337    /* We don't care about endianness at this point.  It only becomes
1338       relevant when dealing with sections of these registers.*/
1339    switch (qregNo) {
1340       case 0:  return OFFB_Q0;
1341       case 1:  return OFFB_Q1;
1342       case 2:  return OFFB_Q2;
1343       case 3:  return OFFB_Q3;
1344       case 4:  return OFFB_Q4;
1345       case 5:  return OFFB_Q5;
1346       case 6:  return OFFB_Q6;
1347       case 7:  return OFFB_Q7;
1348       case 8:  return OFFB_Q8;
1349       case 9:  return OFFB_Q9;
1350       case 10: return OFFB_Q10;
1351       case 11: return OFFB_Q11;
1352       case 12: return OFFB_Q12;
1353       case 13: return OFFB_Q13;
1354       case 14: return OFFB_Q14;
1355       case 15: return OFFB_Q15;
1356       case 16: return OFFB_Q16;
1357       case 17: return OFFB_Q17;
1358       case 18: return OFFB_Q18;
1359       case 19: return OFFB_Q19;
1360       case 20: return OFFB_Q20;
1361       case 21: return OFFB_Q21;
1362       case 22: return OFFB_Q22;
1363       case 23: return OFFB_Q23;
1364       case 24: return OFFB_Q24;
1365       case 25: return OFFB_Q25;
1366       case 26: return OFFB_Q26;
1367       case 27: return OFFB_Q27;
1368       case 28: return OFFB_Q28;
1369       case 29: return OFFB_Q29;
1370       case 30: return OFFB_Q30;
1371       case 31: return OFFB_Q31;
1372       default: vassert(0);
1373    }
1374 }
1375 
1376 /* Write to a complete Qreg. */
putQReg128(UInt qregNo,IRExpr * e)1377 static void putQReg128 ( UInt qregNo, IRExpr* e )
1378 {
1379    vassert(qregNo < 32);
1380    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
1381    stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
1382 }
1383 
1384 /* Read a complete Qreg. */
getQReg128(UInt qregNo)1385 static IRExpr* getQReg128 ( UInt qregNo )
1386 {
1387    vassert(qregNo < 32);
1388    return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
1389 }
1390 
1391 /* Produce the IR type for some sub-part of a vector.  For 32- and 64-
1392    bit sub-parts we can choose either integer or float types, and
1393    choose float on the basis that that is the common use case and so
1394    will give least interference with Put-to-Get forwarding later
1395    on. */
preferredVectorSubTypeFromSize(UInt szB)1396 static IRType preferredVectorSubTypeFromSize ( UInt szB )
1397 {
1398    switch (szB) {
1399       case 1:  return Ity_I8;
1400       case 2:  return Ity_I16;
1401       case 4:  return Ity_I32; //Ity_F32;
1402       case 8:  return Ity_F64;
1403       case 16: return Ity_V128;
1404       default: vassert(0);
1405    }
1406 }
1407 
1408 /* Find the offset of the laneNo'th lane of type laneTy in the given
1409    Qreg.  Since the host is little-endian, the least significant lane
1410    has the lowest offset. */
offsetQRegLane(UInt qregNo,IRType laneTy,UInt laneNo)1411 static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
1412 {
1413    vassert(host_endness == VexEndnessLE);
1414    Int base = offsetQReg128(qregNo);
1415    /* Since the host is little-endian, the least significant lane
1416       will be at the lowest address. */
1417    /* Restrict this to known types, so as to avoid silently accepting
1418       stupid types. */
1419    UInt laneSzB = 0;
1420    switch (laneTy) {
1421       case Ity_I8:                 laneSzB = 1;  break;
1422       case Ity_F16: case Ity_I16:  laneSzB = 2;  break;
1423       case Ity_F32: case Ity_I32:  laneSzB = 4;  break;
1424       case Ity_F64: case Ity_I64:  laneSzB = 8;  break;
1425       case Ity_V128:               laneSzB = 16; break;
1426       default: break;
1427    }
1428    vassert(laneSzB > 0);
1429    UInt minOff = laneNo * laneSzB;
1430    UInt maxOff = minOff + laneSzB - 1;
1431    vassert(maxOff < 16);
1432    return base + minOff;
1433 }
1434 
1435 /* Put to the least significant lane of a Qreg. */
putQRegLO(UInt qregNo,IRExpr * e)1436 static void putQRegLO ( UInt qregNo, IRExpr* e )
1437 {
1438    IRType ty  = typeOfIRExpr(irsb->tyenv, e);
1439    Int    off = offsetQRegLane(qregNo, ty, 0);
1440    switch (ty) {
1441       case Ity_I8:  case Ity_I16: case Ity_I32: case Ity_I64:
1442       case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128:
1443          break;
1444       default:
1445          vassert(0); // Other cases are probably invalid
1446    }
1447    stmt(IRStmt_Put(off, e));
1448 }
1449 
1450 /* Get from the least significant lane of a Qreg. */
getQRegLO(UInt qregNo,IRType ty)1451 static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
1452 {
1453    Int off = offsetQRegLane(qregNo, ty, 0);
1454    switch (ty) {
1455       case Ity_I8:
1456       case Ity_F16: case Ity_I16:
1457       case Ity_I32: case Ity_I64:
1458       case Ity_F32: case Ity_F64: case Ity_V128:
1459          break;
1460       default:
1461          vassert(0); // Other cases are ATC
1462    }
1463    return IRExpr_Get(off, ty);
1464 }
1465 
nameQRegLO(UInt qregNo,IRType laneTy)1466 static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
1467 {
1468    static const HChar* namesQ[32]
1469       = { "q0",  "q1",  "q2",  "q3",  "q4",  "q5",  "q6",  "q7",
1470           "q8",  "q9",  "q10", "q11", "q12", "q13", "q14", "q15",
1471           "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1472           "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1473    static const HChar* namesD[32]
1474       = { "d0",  "d1",  "d2",  "d3",  "d4",  "d5",  "d6",  "d7",
1475           "d8",  "d9",  "d10", "d11", "d12", "d13", "d14", "d15",
1476           "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1477           "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1478    static const HChar* namesS[32]
1479       = { "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
1480           "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15",
1481           "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1482           "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1483    static const HChar* namesH[32]
1484       = { "h0",  "h1",  "h2",  "h3",  "h4",  "h5",  "h6",  "h7",
1485           "h8",  "h9",  "h10", "h11", "h12", "h13", "h14", "h15",
1486           "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1487           "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1488    static const HChar* namesB[32]
1489       = { "b0",  "b1",  "b2",  "b3",  "b4",  "b5",  "b6",  "b7",
1490           "b8",  "b9",  "b10", "b11", "b12", "b13", "b14", "b15",
1491           "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1492           "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1493    vassert(qregNo < 32);
1494    switch (sizeofIRType(laneTy)) {
1495       case 1:  return namesB[qregNo];
1496       case 2:  return namesH[qregNo];
1497       case 4:  return namesS[qregNo];
1498       case 8:  return namesD[qregNo];
1499       case 16: return namesQ[qregNo];
1500       default: vassert(0);
1501    }
1502    /*NOTREACHED*/
1503 }
1504 
nameQReg128(UInt qregNo)1505 static const HChar* nameQReg128 ( UInt qregNo )
1506 {
1507    return nameQRegLO(qregNo, Ity_V128);
1508 }
1509 
1510 /* Find the offset of the most significant half (8 bytes) of the given
1511    Qreg.  This requires knowing the endianness of the host. */
offsetQRegHI64(UInt qregNo)1512 static Int offsetQRegHI64 ( UInt qregNo )
1513 {
1514    return offsetQRegLane(qregNo, Ity_I64, 1);
1515 }
1516 
getQRegHI64(UInt qregNo)1517 static IRExpr* getQRegHI64 ( UInt qregNo )
1518 {
1519    return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
1520 }
1521 
putQRegHI64(UInt qregNo,IRExpr * e)1522 static void putQRegHI64 ( UInt qregNo, IRExpr* e )
1523 {
1524    IRType ty  = typeOfIRExpr(irsb->tyenv, e);
1525    Int    off = offsetQRegHI64(qregNo);
1526    switch (ty) {
1527       case Ity_I64: case Ity_F64:
1528          break;
1529       default:
1530          vassert(0); // Other cases are plain wrong
1531    }
1532    stmt(IRStmt_Put(off, e));
1533 }
1534 
1535 /* Put to a specified lane of a Qreg. */
putQRegLane(UInt qregNo,UInt laneNo,IRExpr * e)1536 static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1537 {
1538    IRType laneTy  = typeOfIRExpr(irsb->tyenv, e);
1539    Int    off     = offsetQRegLane(qregNo, laneTy, laneNo);
1540    switch (laneTy) {
1541       case Ity_F64: case Ity_I64:
1542       case Ity_I32: case Ity_F32:
1543       case Ity_I16: case Ity_F16:
1544       case Ity_I8:
1545          break;
1546       default:
1547          vassert(0); // Other cases are ATC
1548    }
1549    stmt(IRStmt_Put(off, e));
1550 }
1551 
1552 /* Get from a specified lane of a Qreg. */
getQRegLane(UInt qregNo,UInt laneNo,IRType laneTy)1553 static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1554 {
1555    Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1556    switch (laneTy) {
1557       case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
1558       case Ity_F64: case Ity_F32: case Ity_F16:
1559          break;
1560       default:
1561          vassert(0); // Other cases are ATC
1562    }
1563    return IRExpr_Get(off, laneTy);
1564 }
1565 
1566 
1567 //ZZ /* ---------------- Misc registers ---------------- */
1568 //ZZ
1569 //ZZ static void putMiscReg32 ( UInt    gsoffset,
1570 //ZZ                            IRExpr* e, /* :: Ity_I32 */
1571 //ZZ                            IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
1572 //ZZ {
1573 //ZZ    switch (gsoffset) {
1574 //ZZ       case OFFB_FPSCR:   break;
1575 //ZZ       case OFFB_QFLAG32: break;
1576 //ZZ       case OFFB_GEFLAG0: break;
1577 //ZZ       case OFFB_GEFLAG1: break;
1578 //ZZ       case OFFB_GEFLAG2: break;
1579 //ZZ       case OFFB_GEFLAG3: break;
1580 //ZZ       default: vassert(0); /* awaiting more cases */
1581 //ZZ    }
1582 //ZZ    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1583 //ZZ
1584 //ZZ    if (guardT == IRTemp_INVALID) {
1585 //ZZ       /* unconditional write */
1586 //ZZ       stmt(IRStmt_Put(gsoffset, e));
1587 //ZZ    } else {
1588 //ZZ       stmt(IRStmt_Put(
1589 //ZZ          gsoffset,
1590 //ZZ          IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1591 //ZZ                      e, IRExpr_Get(gsoffset, Ity_I32) )
1592 //ZZ       ));
1593 //ZZ    }
1594 //ZZ }
1595 //ZZ
1596 //ZZ static IRTemp get_ITSTATE ( void )
1597 //ZZ {
1598 //ZZ    ASSERT_IS_THUMB;
1599 //ZZ    IRTemp t = newTemp(Ity_I32);
1600 //ZZ    assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1601 //ZZ    return t;
1602 //ZZ }
1603 //ZZ
1604 //ZZ static void put_ITSTATE ( IRTemp t )
1605 //ZZ {
1606 //ZZ    ASSERT_IS_THUMB;
1607 //ZZ    stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1608 //ZZ }
1609 //ZZ
1610 //ZZ static IRTemp get_QFLAG32 ( void )
1611 //ZZ {
1612 //ZZ    IRTemp t = newTemp(Ity_I32);
1613 //ZZ    assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1614 //ZZ    return t;
1615 //ZZ }
1616 //ZZ
1617 //ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1618 //ZZ {
1619 //ZZ    putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1620 //ZZ }
1621 //ZZ
1622 //ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1623 //ZZ    Status Register) to indicate that overflow or saturation occurred.
1624 //ZZ    Nb: t must be zero to denote no saturation, and any nonzero
1625 //ZZ    value to indicate saturation. */
1626 //ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1627 //ZZ {
1628 //ZZ    IRTemp old = get_QFLAG32();
1629 //ZZ    IRTemp nyu = newTemp(Ity_I32);
1630 //ZZ    assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1631 //ZZ    put_QFLAG32(nyu, condT);
1632 //ZZ }
1633 
1634 
1635 /* ---------------- FPCR stuff ---------------- */
1636 
1637 /* Generate IR to get hold of the rounding mode bits in FPCR, and
1638    convert them to IR format.  Bind the final result to the
1639    returned temp. */
mk_get_IR_rounding_mode(void)1640 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1641 {
1642    /* The ARMvfp encoding for rounding mode bits is:
1643          00  to nearest
1644          01  to +infinity
1645          10  to -infinity
1646          11  to zero
1647       We need to convert that to the IR encoding:
1648          00  to nearest (the default)
1649          10  to +infinity
1650          01  to -infinity
1651          11  to zero
1652       Which can be done by swapping bits 0 and 1.
1653       The rmode bits are at 23:22 in FPSCR.
1654    */
1655    IRTemp armEncd = newTemp(Ity_I32);
1656    IRTemp swapped = newTemp(Ity_I32);
1657    /* Fish FPCR[23:22] out, and slide to bottom.  Doesn't matter that
1658       we don't zero out bits 24 and above, since the assignment to
1659       'swapped' will mask them out anyway. */
1660    assign(armEncd,
1661           binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1662    /* Now swap them. */
1663    assign(swapped,
1664           binop(Iop_Or32,
1665                 binop(Iop_And32,
1666                       binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1667                       mkU32(2)),
1668                 binop(Iop_And32,
1669                       binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1670                       mkU32(1))
1671          ));
1672    return swapped;
1673 }
1674 
1675 
1676 /*------------------------------------------------------------*/
1677 /*--- Helpers for flag handling and conditional insns      ---*/
1678 /*------------------------------------------------------------*/
1679 
nameARM64Condcode(ARM64Condcode cond)1680 static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1681 {
1682    switch (cond) {
1683       case ARM64CondEQ:  return "eq";
1684       case ARM64CondNE:  return "ne";
1685       case ARM64CondCS:  return "cs";  // or 'hs'
1686       case ARM64CondCC:  return "cc";  // or 'lo'
1687       case ARM64CondMI:  return "mi";
1688       case ARM64CondPL:  return "pl";
1689       case ARM64CondVS:  return "vs";
1690       case ARM64CondVC:  return "vc";
1691       case ARM64CondHI:  return "hi";
1692       case ARM64CondLS:  return "ls";
1693       case ARM64CondGE:  return "ge";
1694       case ARM64CondLT:  return "lt";
1695       case ARM64CondGT:  return "gt";
1696       case ARM64CondLE:  return "le";
1697       case ARM64CondAL:  return "al";
1698       case ARM64CondNV:  return "nv";
1699       default: vpanic("name_ARM64Condcode");
1700    }
1701 }
1702 
1703 /* and a handy shorthand for it */
nameCC(ARM64Condcode cond)1704 static const HChar* nameCC ( ARM64Condcode cond ) {
1705    return nameARM64Condcode(cond);
1706 }
1707 
1708 
1709 /* Build IR to calculate some particular condition from stored
1710    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1711    Ity_I64, suitable for narrowing.  Although the return type is
1712    Ity_I64, the returned value is either 0 or 1.  'cond' must be
1713    :: Ity_I64 and must denote the condition to compute in
1714    bits 7:4, and be zero everywhere else.
1715 */
mk_arm64g_calculate_condition_dyn(IRExpr * cond)1716 static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1717 {
1718    vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1719    /* And 'cond' had better produce a value in which only bits 7:4 are
1720       nonzero.  However, obviously we can't assert for that. */
1721 
1722    /* So what we're constructing for the first argument is
1723       "(cond << 4) | stored-operation".
1724       However, as per comments above, 'cond' must be supplied
1725       pre-shifted to this function.
1726 
1727       This pairing scheme requires that the ARM64_CC_OP_ values all fit
1728       in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
1729       8 bits of the first argument. */
1730    IRExpr** args
1731       = mkIRExprVec_4(
1732            binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1733            IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1734            IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1735            IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1736         );
1737    IRExpr* call
1738       = mkIRExprCCall(
1739            Ity_I64,
1740            0/*regparm*/,
1741            "arm64g_calculate_condition", &arm64g_calculate_condition,
1742            args
1743         );
1744 
1745    /* Exclude the requested condition, OP and NDEP from definedness
1746       checking.  We're only interested in DEP1 and DEP2. */
1747    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1748    return call;
1749 }
1750 
1751 
1752 /* Build IR to calculate some particular condition from stored
1753    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1754    Ity_I64, suitable for narrowing.  Although the return type is
1755    Ity_I64, the returned value is either 0 or 1.
1756 */
mk_arm64g_calculate_condition(ARM64Condcode cond)1757 static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1758 {
1759   /* First arg is "(cond << 4) | condition".  This requires that the
1760      ARM64_CC_OP_ values all fit in 4 bits.  Hence we are passing a
1761      (COND, OP) pair in the lowest 8 bits of the first argument. */
1762    vassert(cond >= 0 && cond <= 15);
1763    return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1764 }
1765 
1766 
1767 /* Build IR to calculate just the carry flag from stored
1768    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1769    Ity_I64. */
mk_arm64g_calculate_flag_c(void)1770 static IRExpr* mk_arm64g_calculate_flag_c ( void )
1771 {
1772    IRExpr** args
1773       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I64),
1774                        IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1775                        IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1776                        IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1777    IRExpr* call
1778       = mkIRExprCCall(
1779            Ity_I64,
1780            0/*regparm*/,
1781            "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
1782            args
1783         );
1784    /* Exclude OP and NDEP from definedness checking.  We're only
1785       interested in DEP1 and DEP2. */
1786    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1787    return call;
1788 }
1789 
1790 
1791 //ZZ /* Build IR to calculate just the overflow flag from stored
1792 //ZZ    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1793 //ZZ    Ity_I32. */
1794 //ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1795 //ZZ {
1796 //ZZ    IRExpr** args
1797 //ZZ       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1798 //ZZ                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1799 //ZZ                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1800 //ZZ                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1801 //ZZ    IRExpr* call
1802 //ZZ       = mkIRExprCCall(
1803 //ZZ            Ity_I32,
1804 //ZZ            0/*regparm*/,
1805 //ZZ            "armg_calculate_flag_v", &armg_calculate_flag_v,
1806 //ZZ            args
1807 //ZZ         );
1808 //ZZ    /* Exclude OP and NDEP from definedness checking.  We're only
1809 //ZZ       interested in DEP1 and DEP2. */
1810 //ZZ    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1811 //ZZ    return call;
1812 //ZZ }
1813 
1814 
1815 /* Build IR to calculate N Z C V in bits 31:28 of the
1816    returned word. */
mk_arm64g_calculate_flags_nzcv(void)1817 static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1818 {
1819    IRExpr** args
1820       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I64),
1821                        IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1822                        IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1823                        IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1824    IRExpr* call
1825       = mkIRExprCCall(
1826            Ity_I64,
1827            0/*regparm*/,
1828            "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1829            args
1830         );
1831    /* Exclude OP and NDEP from definedness checking.  We're only
1832       interested in DEP1 and DEP2. */
1833    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1834    return call;
1835 }
1836 
1837 
1838 /* Build IR to set the flags thunk, in the most general case. */
1839 static
setFlags_D1_D2_ND(UInt cc_op,IRTemp t_dep1,IRTemp t_dep2,IRTemp t_ndep)1840 void setFlags_D1_D2_ND ( UInt cc_op,
1841                          IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1842 {
1843    vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1844    vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1845    vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1846    vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1847    stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(cc_op) ));
1848    stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1849    stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1850    stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1851 }
1852 
1853 /* Build IR to set the flags thunk after ADD or SUB. */
1854 static
setFlags_ADD_SUB(Bool is64,Bool isSUB,IRTemp argL,IRTemp argR)1855 void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1856 {
1857    IRTemp argL64 = IRTemp_INVALID;
1858    IRTemp argR64 = IRTemp_INVALID;
1859    IRTemp z64    = newTemp(Ity_I64);
1860    if (is64) {
1861       argL64 = argL;
1862       argR64 = argR;
1863    } else {
1864       argL64 = newTemp(Ity_I64);
1865       argR64 = newTemp(Ity_I64);
1866       assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1867       assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1868    }
1869    assign(z64, mkU64(0));
1870    UInt cc_op = ARM64G_CC_OP_NUMBER;
1871    /**/ if ( isSUB &&  is64) { cc_op = ARM64G_CC_OP_SUB64; }
1872    else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1873    else if (!isSUB &&  is64) { cc_op = ARM64G_CC_OP_ADD64; }
1874    else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1875    else                      { vassert(0); }
1876    setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1877 }
1878 
1879 /* Build IR to set the flags thunk after ADC or SBC. */
1880 static
setFlags_ADC_SBC(Bool is64,Bool isSBC,IRTemp argL,IRTemp argR,IRTemp oldC)1881 void setFlags_ADC_SBC ( Bool is64, Bool isSBC,
1882                         IRTemp argL, IRTemp argR, IRTemp oldC )
1883 {
1884    IRTemp argL64 = IRTemp_INVALID;
1885    IRTemp argR64 = IRTemp_INVALID;
1886    IRTemp oldC64 = IRTemp_INVALID;
1887    if (is64) {
1888       argL64 = argL;
1889       argR64 = argR;
1890       oldC64 = oldC;
1891    } else {
1892       argL64 = newTemp(Ity_I64);
1893       argR64 = newTemp(Ity_I64);
1894       oldC64 = newTemp(Ity_I64);
1895       assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1896       assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1897       assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
1898    }
1899    UInt cc_op = ARM64G_CC_OP_NUMBER;
1900    /**/ if ( isSBC &&  is64) { cc_op = ARM64G_CC_OP_SBC64; }
1901    else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
1902    else if (!isSBC &&  is64) { cc_op = ARM64G_CC_OP_ADC64; }
1903    else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
1904    else                      { vassert(0); }
1905    setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
1906 }
1907 
1908 /* Build IR to set the flags thunk after ADD or SUB, if the given
1909    condition evaluates to True at run time.  If not, the flags are set
1910    to the specified NZCV value. */
1911 static
setFlags_ADD_SUB_conditionally(Bool is64,Bool isSUB,IRTemp cond,IRTemp argL,IRTemp argR,UInt nzcv)1912 void setFlags_ADD_SUB_conditionally (
1913         Bool is64, Bool isSUB,
1914         IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1915      )
1916 {
1917    /* Generate IR as follows:
1918         CC_OP   = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1919         CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1920         CC_DEP2 = ITE(cond, argR64, 0)
1921         CC_NDEP = 0
1922    */
1923 
1924    IRTemp z64 = newTemp(Ity_I64);
1925    assign(z64, mkU64(0));
1926 
1927    /* Establish the operation and operands for the True case. */
1928    IRTemp t_dep1 = IRTemp_INVALID;
1929    IRTemp t_dep2 = IRTemp_INVALID;
1930    UInt   t_op   = ARM64G_CC_OP_NUMBER;
1931    /**/ if ( isSUB &&  is64) { t_op = ARM64G_CC_OP_SUB64; }
1932    else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1933    else if (!isSUB &&  is64) { t_op = ARM64G_CC_OP_ADD64; }
1934    else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1935    else                      { vassert(0); }
1936    /* */
1937    if (is64) {
1938       t_dep1 = argL;
1939       t_dep2 = argR;
1940    } else {
1941       t_dep1 = newTemp(Ity_I64);
1942       t_dep2 = newTemp(Ity_I64);
1943       assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1944       assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1945    }
1946 
1947    /* Establish the operation and operands for the False case. */
1948    IRTemp f_dep1 = newTemp(Ity_I64);
1949    IRTemp f_dep2 = z64;
1950    UInt   f_op   = ARM64G_CC_OP_COPY;
1951    assign(f_dep1, mkU64(nzcv << 28));
1952 
1953    /* Final thunk values */
1954    IRTemp dep1 = newTemp(Ity_I64);
1955    IRTemp dep2 = newTemp(Ity_I64);
1956    IRTemp op   = newTemp(Ity_I64);
1957 
1958    assign(op,   IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1959    assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1960    assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1961 
1962    /* finally .. */
1963    stmt( IRStmt_Put( OFFB_CC_OP,   mkexpr(op) ));
1964    stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
1965    stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
1966    stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
1967 }
1968 
1969 /* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1970 static
setFlags_LOGIC(Bool is64,IRTemp res)1971 void setFlags_LOGIC ( Bool is64, IRTemp res )
1972 {
1973    IRTemp res64 = IRTemp_INVALID;
1974    IRTemp z64   = newTemp(Ity_I64);
1975    UInt   cc_op = ARM64G_CC_OP_NUMBER;
1976    if (is64) {
1977       res64 = res;
1978       cc_op = ARM64G_CC_OP_LOGIC64;
1979    } else {
1980       res64 = newTemp(Ity_I64);
1981       assign(res64, unop(Iop_32Uto64, mkexpr(res)));
1982       cc_op = ARM64G_CC_OP_LOGIC32;
1983    }
1984    assign(z64, mkU64(0));
1985    setFlags_D1_D2_ND(cc_op, res64, z64, z64);
1986 }
1987 
1988 /* Build IR to set the flags thunk to a given NZCV value.  NZCV is
1989    located in bits 31:28 of the supplied value. */
1990 static
setFlags_COPY(IRTemp nzcv_28x0)1991 void setFlags_COPY ( IRTemp nzcv_28x0 )
1992 {
1993    IRTemp z64 = newTemp(Ity_I64);
1994    assign(z64, mkU64(0));
1995    setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
1996 }
1997 
1998 
1999 //ZZ /* Minor variant of the above that sets NDEP to zero (if it
2000 //ZZ    sets it at all) */
2001 //ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
2002 //ZZ                              IRTemp t_dep2,
2003 //ZZ                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2004 //ZZ {
2005 //ZZ    IRTemp z32 = newTemp(Ity_I32);
2006 //ZZ    assign( z32, mkU32(0) );
2007 //ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
2008 //ZZ }
2009 //ZZ
2010 //ZZ
2011 //ZZ /* Minor variant of the above that sets DEP2 to zero (if it
2012 //ZZ    sets it at all) */
2013 //ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
2014 //ZZ                              IRTemp t_ndep,
2015 //ZZ                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2016 //ZZ {
2017 //ZZ    IRTemp z32 = newTemp(Ity_I32);
2018 //ZZ    assign( z32, mkU32(0) );
2019 //ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
2020 //ZZ }
2021 //ZZ
2022 //ZZ
2023 //ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
2024 //ZZ    sets them at all) */
2025 //ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
2026 //ZZ                           IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2027 //ZZ {
2028 //ZZ    IRTemp z32 = newTemp(Ity_I32);
2029 //ZZ    assign( z32, mkU32(0) );
2030 //ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
2031 //ZZ }
2032 
2033 
2034 /*------------------------------------------------------------*/
2035 /*--- Misc math helpers                                    ---*/
2036 /*------------------------------------------------------------*/
2037 
2038 /* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
math_SWAPHELPER(IRTemp x,ULong mask,Int sh)2039 static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
2040 {
2041    IRTemp maskT = newTemp(Ity_I64);
2042    IRTemp res   = newTemp(Ity_I64);
2043    vassert(sh >= 1 && sh <= 63);
2044    assign(maskT, mkU64(mask));
2045    assign( res,
2046            binop(Iop_Or64,
2047                  binop(Iop_Shr64,
2048                        binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
2049                        mkU8(sh)),
2050                  binop(Iop_And64,
2051                        binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
2052                        mkexpr(maskT))
2053                  )
2054            );
2055    return res;
2056 }
2057 
2058 /* Generates byte swaps within 32-bit lanes. */
math_UINTSWAP64(IRTemp src)2059 static IRTemp math_UINTSWAP64 ( IRTemp src )
2060 {
2061    IRTemp res;
2062    res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2063    res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2064    return res;
2065 }
2066 
2067 /* Generates byte swaps within 16-bit lanes. */
math_USHORTSWAP64(IRTemp src)2068 static IRTemp math_USHORTSWAP64 ( IRTemp src )
2069 {
2070    IRTemp res;
2071    res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2072    return res;
2073 }
2074 
2075 /* Generates a 64-bit byte swap. */
math_BYTESWAP64(IRTemp src)2076 static IRTemp math_BYTESWAP64 ( IRTemp src )
2077 {
2078    IRTemp res;
2079    res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2080    res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2081    res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
2082    return res;
2083 }
2084 
2085 /* Generates a 64-bit bit swap. */
math_BITSWAP64(IRTemp src)2086 static IRTemp math_BITSWAP64 ( IRTemp src )
2087 {
2088    IRTemp res;
2089    res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
2090    res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
2091    res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
2092    return math_BYTESWAP64(res);
2093 }
2094 
2095 /* Duplicates the bits at the bottom of the given word to fill the
2096    whole word.  src :: Ity_I64 is assumed to have zeroes everywhere
2097    except for the bottom bits. */
math_DUP_TO_64(IRTemp src,IRType srcTy)2098 static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
2099 {
2100    if (srcTy == Ity_I8) {
2101       IRTemp t16 = newTemp(Ity_I64);
2102       assign(t16, binop(Iop_Or64, mkexpr(src),
2103                                   binop(Iop_Shl64, mkexpr(src), mkU8(8))));
2104       IRTemp t32 = newTemp(Ity_I64);
2105       assign(t32, binop(Iop_Or64, mkexpr(t16),
2106                                   binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
2107       IRTemp t64 = newTemp(Ity_I64);
2108       assign(t64, binop(Iop_Or64, mkexpr(t32),
2109                                   binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2110       return t64;
2111    }
2112    if (srcTy == Ity_I16) {
2113       IRTemp t32 = newTemp(Ity_I64);
2114       assign(t32, binop(Iop_Or64, mkexpr(src),
2115                                   binop(Iop_Shl64, mkexpr(src), mkU8(16))));
2116       IRTemp t64 = newTemp(Ity_I64);
2117       assign(t64, binop(Iop_Or64, mkexpr(t32),
2118                                   binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2119       return t64;
2120    }
2121    if (srcTy == Ity_I32) {
2122       IRTemp t64 = newTemp(Ity_I64);
2123       assign(t64, binop(Iop_Or64, mkexpr(src),
2124                                   binop(Iop_Shl64, mkexpr(src), mkU8(32))));
2125       return t64;
2126    }
2127    if (srcTy == Ity_I64) {
2128       return src;
2129    }
2130    vassert(0);
2131 }
2132 
2133 
2134 /* Duplicates the src element exactly so as to fill a V128 value. */
math_DUP_TO_V128(IRTemp src,IRType srcTy)2135 static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
2136 {
2137    IRTemp res = newTempV128();
2138    if (srcTy == Ity_F64) {
2139       IRTemp i64 = newTemp(Ity_I64);
2140       assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
2141       assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
2142       return res;
2143    }
2144    if (srcTy == Ity_F32) {
2145       IRTemp i64a = newTemp(Ity_I64);
2146       assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
2147       IRTemp i64b = newTemp(Ity_I64);
2148       assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
2149                                    mkexpr(i64a)));
2150       assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
2151       return res;
2152    }
2153    if (srcTy == Ity_I64) {
2154       assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src)));
2155       return res;
2156    }
2157    if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) {
2158       IRTemp t1 = newTemp(Ity_I64);
2159       assign(t1, widenUto64(srcTy, mkexpr(src)));
2160       IRTemp t2 = math_DUP_TO_64(t1, srcTy);
2161       assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2)));
2162       return res;
2163    }
2164    vassert(0);
2165 }
2166 
2167 
2168 /* |fullWidth| is a full V128 width result.  Depending on bitQ,
2169    zero out the upper half. */
math_MAYBE_ZERO_HI64(UInt bitQ,IRTemp fullWidth)2170 static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth )
2171 {
2172    if (bitQ == 1) return mkexpr(fullWidth);
2173    if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth));
2174    vassert(0);
2175 }
2176 
2177 /* The same, but from an expression instead. */
math_MAYBE_ZERO_HI64_fromE(UInt bitQ,IRExpr * fullWidth)2178 static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth )
2179 {
2180    IRTemp fullWidthT = newTempV128();
2181    assign(fullWidthT, fullWidth);
2182    return math_MAYBE_ZERO_HI64(bitQ, fullWidthT);
2183 }
2184 
2185 
2186 /*------------------------------------------------------------*/
2187 /*--- FP comparison helpers                                ---*/
2188 /*------------------------------------------------------------*/
2189 
2190 /* irRes :: Ity_I32 holds a floating point comparison result encoded
2191    as an IRCmpF64Result.  Generate code to convert it to an
2192    ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
2193    Assign a new temp to hold that value, and return the temp. */
2194 static
mk_convert_IRCmpF64Result_to_NZCV(IRTemp irRes32)2195 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
2196 {
2197    IRTemp ix       = newTemp(Ity_I64);
2198    IRTemp termL    = newTemp(Ity_I64);
2199    IRTemp termR    = newTemp(Ity_I64);
2200    IRTemp nzcv     = newTemp(Ity_I64);
2201    IRTemp irRes    = newTemp(Ity_I64);
2202 
2203    /* This is where the fun starts.  We have to convert 'irRes' from
2204       an IR-convention return result (IRCmpF64Result) to an
2205       ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
2206       4 bits of 'nzcv'. */
2207    /* Map compare result from IR to ARM(nzcv) */
2208    /*
2209       FP cmp result | IR   | ARM(nzcv)
2210       --------------------------------
2211       UN              0x45   0011
2212       LT              0x01   1000
2213       GT              0x00   0010
2214       EQ              0x40   0110
2215    */
2216    /* Now since you're probably wondering WTF ..
2217 
2218       ix fishes the useful bits out of the IR value, bits 6 and 0, and
2219       places them side by side, giving a number which is 0, 1, 2 or 3.
2220 
2221       termL is a sequence cooked up by GNU superopt.  It converts ix
2222          into an almost correct value NZCV value (incredibly), except
2223          for the case of UN, where it produces 0100 instead of the
2224          required 0011.
2225 
2226       termR is therefore a correction term, also computed from ix.  It
2227          is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
2228          the final correct value, we subtract termR from termL.
2229 
2230       Don't take my word for it.  There's a test program at the bottom
2231       of guest_arm_toIR.c, to try this out with.
2232    */
2233    assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
2234 
2235    assign(
2236       ix,
2237       binop(Iop_Or64,
2238             binop(Iop_And64,
2239                   binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
2240                   mkU64(3)),
2241             binop(Iop_And64, mkexpr(irRes), mkU64(1))));
2242 
2243    assign(
2244       termL,
2245       binop(Iop_Add64,
2246             binop(Iop_Shr64,
2247                   binop(Iop_Sub64,
2248                         binop(Iop_Shl64,
2249                               binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
2250                               mkU8(62)),
2251                         mkU64(1)),
2252                   mkU8(61)),
2253             mkU64(1)));
2254 
2255    assign(
2256       termR,
2257       binop(Iop_And64,
2258             binop(Iop_And64,
2259                   mkexpr(ix),
2260                   binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
2261             mkU64(1)));
2262 
2263    assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
2264    return nzcv;
2265 }
2266 
2267 
2268 /*------------------------------------------------------------*/
2269 /*--- Data processing (immediate)                          ---*/
2270 /*------------------------------------------------------------*/
2271 
2272 /* Helper functions for supporting "DecodeBitMasks" */
2273 
dbm_ROR(Int width,ULong x,Int rot)2274 static ULong dbm_ROR ( Int width, ULong x, Int rot )
2275 {
2276    vassert(width > 0 && width <= 64);
2277    vassert(rot >= 0 && rot < width);
2278    if (rot == 0) return x;
2279    ULong res = x >> rot;
2280    res |= (x << (width - rot));
2281    if (width < 64)
2282      res &= ((1ULL << width) - 1);
2283    return res;
2284 }
2285 
dbm_RepTo64(Int esize,ULong x)2286 static ULong dbm_RepTo64( Int esize, ULong x )
2287 {
2288    switch (esize) {
2289       case 64:
2290          return x;
2291       case 32:
2292          x &= 0xFFFFFFFF; x |= (x << 32);
2293          return x;
2294       case 16:
2295          x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
2296          return x;
2297       case 8:
2298          x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
2299          return x;
2300       case 4:
2301          x &= 0xF; x |= (x << 4); x |= (x << 8);
2302          x |= (x << 16); x |= (x << 32);
2303          return x;
2304       case 2:
2305          x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
2306          x |= (x << 16); x |= (x << 32);
2307          return x;
2308       default:
2309          break;
2310    }
2311    vpanic("dbm_RepTo64");
2312    /*NOTREACHED*/
2313    return 0;
2314 }
2315 
dbm_highestSetBit(ULong x)2316 static Int dbm_highestSetBit ( ULong x )
2317 {
2318    Int i;
2319    for (i = 63; i >= 0; i--) {
2320       if (x & (1ULL << i))
2321          return i;
2322    }
2323    vassert(x == 0);
2324    return -1;
2325 }
2326 
2327 static
dbm_DecodeBitMasks(ULong * wmask,ULong * tmask,ULong immN,ULong imms,ULong immr,Bool immediate,UInt M)2328 Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
2329                           ULong immN, ULong imms, ULong immr, Bool immediate,
2330                           UInt M /*32 or 64*/)
2331 {
2332    vassert(immN < (1ULL << 1));
2333    vassert(imms < (1ULL << 6));
2334    vassert(immr < (1ULL << 6));
2335    vassert(immediate == False || immediate == True);
2336    vassert(M == 32 || M == 64);
2337 
2338    Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
2339    if (len < 1) { /* printf("fail1\n"); */ return False; }
2340    vassert(len <= 6);
2341    vassert(M >= (1 << len));
2342 
2343    vassert(len >= 1 && len <= 6);
2344    ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
2345                   (1 << len) - 1;
2346    vassert(levels >= 1 && levels <= 63);
2347 
2348    if (immediate && ((imms & levels) == levels)) {
2349       /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
2350       return False;
2351    }
2352 
2353    ULong S = imms & levels;
2354    ULong R = immr & levels;
2355    Int   diff = S - R;
2356    diff &= 63;
2357    Int esize = 1 << len;
2358    vassert(2 <= esize && esize <= 64);
2359 
2360    /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
2361       same below with d.  S can be 63 in which case we have an out of
2362       range and hence undefined shift. */
2363    vassert(S >= 0 && S <= 63);
2364    vassert(esize >= (S+1));
2365    ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
2366                   //(1ULL << (S+1)) - 1;
2367                   ((1ULL << S) - 1) + (1ULL << S);
2368 
2369    Int d = // diff<len-1:0>
2370            diff & ((1 << len)-1);
2371    vassert(esize >= (d+1));
2372    vassert(d >= 0 && d <= 63);
2373 
2374    ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
2375                   //(1ULL << (d+1)) - 1;
2376                   ((1ULL << d) - 1) + (1ULL << d);
2377 
2378    if (esize != 64) vassert(elem_s < (1ULL << esize));
2379    if (esize != 64) vassert(elem_d < (1ULL << esize));
2380 
2381    if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
2382    if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
2383 
2384    return True;
2385 }
2386 
2387 
2388 static
dis_ARM64_data_processing_immediate(DisResult * dres,UInt insn)2389 Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
2390                                          UInt insn)
2391 {
2392 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
2393 
2394    /* insn[28:23]
2395       10000x PC-rel addressing
2396       10001x Add/subtract (immediate)
2397       100100 Logical (immediate)
2398       100101 Move Wide (immediate)
2399       100110 Bitfield
2400       100111 Extract
2401    */
2402 
2403    /* ------------------ ADD/SUB{,S} imm12 ------------------ */
2404    if (INSN(28,24) == BITS5(1,0,0,0,1)) {
2405       Bool is64   = INSN(31,31) == 1;
2406       Bool isSub  = INSN(30,30) == 1;
2407       Bool setCC  = INSN(29,29) == 1;
2408       UInt sh     = INSN(23,22);
2409       UInt uimm12 = INSN(21,10);
2410       UInt nn     = INSN(9,5);
2411       UInt dd     = INSN(4,0);
2412       const HChar* nm = isSub ? "sub" : "add";
2413       if (sh >= 2) {
2414          /* Invalid; fall through */
2415       } else {
2416          vassert(sh <= 1);
2417          uimm12 <<= (12 * sh);
2418          if (is64) {
2419             IRTemp argL  = newTemp(Ity_I64);
2420             IRTemp argR  = newTemp(Ity_I64);
2421             IRTemp res   = newTemp(Ity_I64);
2422             assign(argL, getIReg64orSP(nn));
2423             assign(argR, mkU64(uimm12));
2424             assign(res,  binop(isSub ? Iop_Sub64 : Iop_Add64,
2425                                mkexpr(argL), mkexpr(argR)));
2426             if (setCC) {
2427                putIReg64orZR(dd, mkexpr(res));
2428                setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2429                DIP("%ss %s, %s, 0x%x\n",
2430                    nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
2431             } else {
2432                putIReg64orSP(dd, mkexpr(res));
2433                DIP("%s %s, %s, 0x%x\n",
2434                    nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
2435             }
2436          } else {
2437             IRTemp argL  = newTemp(Ity_I32);
2438             IRTemp argR  = newTemp(Ity_I32);
2439             IRTemp res   = newTemp(Ity_I32);
2440             assign(argL, getIReg32orSP(nn));
2441             assign(argR, mkU32(uimm12));
2442             assign(res,  binop(isSub ? Iop_Sub32 : Iop_Add32,
2443                                mkexpr(argL), mkexpr(argR)));
2444             if (setCC) {
2445                putIReg32orZR(dd, mkexpr(res));
2446                setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
2447                DIP("%ss %s, %s, 0x%x\n",
2448                    nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
2449             } else {
2450                putIReg32orSP(dd, mkexpr(res));
2451                DIP("%s %s, %s, 0x%x\n",
2452                    nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
2453             }
2454          }
2455          return True;
2456       }
2457    }
2458 
2459    /* -------------------- ADR/ADRP -------------------- */
2460    if (INSN(28,24) == BITS5(1,0,0,0,0)) {
2461       UInt  bP    = INSN(31,31);
2462       UInt  immLo = INSN(30,29);
2463       UInt  immHi = INSN(23,5);
2464       UInt  rD    = INSN(4,0);
2465       ULong uimm  = (immHi << 2) | immLo;
2466       ULong simm  = sx_to_64(uimm, 21);
2467       ULong val;
2468       if (bP) {
2469          val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
2470       } else {
2471          val = guest_PC_curr_instr + simm;
2472       }
2473       putIReg64orZR(rD, mkU64(val));
2474       DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
2475       return True;
2476    }
2477 
2478    /* -------------------- LOGIC(imm) -------------------- */
2479    if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
2480       /* 31 30 28     22 21   15   9  4
2481          sf op 100100 N  immr imms Rn Rd
2482            op=00: AND  Rd|SP, Rn, #imm
2483            op=01: ORR  Rd|SP, Rn, #imm
2484            op=10: EOR  Rd|SP, Rn, #imm
2485            op=11: ANDS Rd|ZR, Rn, #imm
2486       */
2487       Bool  is64 = INSN(31,31) == 1;
2488       UInt  op   = INSN(30,29);
2489       UInt  N    = INSN(22,22);
2490       UInt  immR = INSN(21,16);
2491       UInt  immS = INSN(15,10);
2492       UInt  nn   = INSN(9,5);
2493       UInt  dd   = INSN(4,0);
2494       ULong imm  = 0;
2495       Bool  ok;
2496       if (N == 1 && !is64)
2497          goto after_logic_imm; /* not allowed; fall through */
2498       ok = dbm_DecodeBitMasks(&imm, NULL,
2499                               N, immS, immR, True, is64 ? 64 : 32);
2500       if (!ok)
2501          goto after_logic_imm;
2502 
2503       const HChar* names[4] = { "and", "orr", "eor", "ands" };
2504       const IROp   ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
2505       const IROp   ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
2506 
2507       vassert(op < 4);
2508       if (is64) {
2509          IRExpr* argL = getIReg64orZR(nn);
2510          IRExpr* argR = mkU64(imm);
2511          IRTemp  res  = newTemp(Ity_I64);
2512          assign(res, binop(ops64[op], argL, argR));
2513          if (op < 3) {
2514             putIReg64orSP(dd, mkexpr(res));
2515             DIP("%s %s, %s, 0x%llx\n", names[op],
2516                 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
2517          } else {
2518             putIReg64orZR(dd, mkexpr(res));
2519             setFlags_LOGIC(True/*is64*/, res);
2520             DIP("%s %s, %s, 0x%llx\n", names[op],
2521                 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
2522          }
2523       } else {
2524          IRExpr* argL = getIReg32orZR(nn);
2525          IRExpr* argR = mkU32((UInt)imm);
2526          IRTemp  res  = newTemp(Ity_I32);
2527          assign(res, binop(ops32[op], argL, argR));
2528          if (op < 3) {
2529             putIReg32orSP(dd, mkexpr(res));
2530             DIP("%s %s, %s, 0x%x\n", names[op],
2531                 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2532          } else {
2533             putIReg32orZR(dd, mkexpr(res));
2534             setFlags_LOGIC(False/*!is64*/, res);
2535             DIP("%s %s, %s, 0x%x\n", names[op],
2536                 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2537          }
2538       }
2539       return True;
2540    }
2541    after_logic_imm:
2542 
2543    /* -------------------- MOV{Z,N,K} -------------------- */
2544    if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2545       /* 31 30 28      22 20    4
2546          |  |  |       |  |     |
2547          sf 10 100 101 hw imm16 Rd   MOV(Z) Rd, (imm16 << (16*hw))
2548          sf 00 100 101 hw imm16 Rd   MOV(N) Rd, ~(imm16 << (16*hw))
2549          sf 11 100 101 hw imm16 Rd   MOV(K) Rd, (imm16 << (16*hw))
2550       */
2551       Bool is64   = INSN(31,31) == 1;
2552       UInt subopc = INSN(30,29);
2553       UInt hw     = INSN(22,21);
2554       UInt imm16  = INSN(20,5);
2555       UInt dd     = INSN(4,0);
2556       if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2557          /* invalid; fall through */
2558       } else {
2559          ULong imm64 = ((ULong)imm16) << (16 * hw);
2560          if (!is64)
2561             vassert(imm64 < 0x100000000ULL);
2562          switch (subopc) {
2563             case BITS2(1,0): // MOVZ
2564                putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2565                DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2566                break;
2567             case BITS2(0,0): // MOVN
2568                imm64 = ~imm64;
2569                if (!is64)
2570                   imm64 &= 0xFFFFFFFFULL;
2571                putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2572                DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2573                break;
2574             case BITS2(1,1): // MOVK
2575                /* This is more complex.  We are inserting a slice into
2576                   the destination register, so we need to have the old
2577                   value of it. */
2578                if (is64) {
2579                   IRTemp old = newTemp(Ity_I64);
2580                   assign(old, getIReg64orZR(dd));
2581                   ULong mask = 0xFFFFULL << (16 * hw);
2582                   IRExpr* res
2583                      = binop(Iop_Or64,
2584                              binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2585                              mkU64(imm64));
2586                   putIReg64orZR(dd, res);
2587                   DIP("movk %s, 0x%x, lsl %u\n",
2588                       nameIReg64orZR(dd), imm16, 16*hw);
2589                } else {
2590                   IRTemp old = newTemp(Ity_I32);
2591                   assign(old, getIReg32orZR(dd));
2592                   vassert(hw <= 1);
2593                   UInt mask = 0xFFFF << (16 * hw);
2594                   IRExpr* res
2595                      = binop(Iop_Or32,
2596                              binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2597                              mkU32((UInt)imm64));
2598                   putIReg32orZR(dd, res);
2599                   DIP("movk %s, 0x%x, lsl %u\n",
2600                       nameIReg32orZR(dd), imm16, 16*hw);
2601                }
2602                break;
2603             default:
2604                vassert(0);
2605          }
2606          return True;
2607       }
2608    }
2609 
2610    /* -------------------- {U,S,}BFM -------------------- */
2611    /*    30 28     22 21   15   9  4
2612 
2613       sf 10 100110 N  immr imms nn dd
2614          UBFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
2615          UBFM Xd, Xn, #immr, #imms   when sf=1, N=1
2616 
2617       sf 00 100110 N  immr imms nn dd
2618          SBFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
2619          SBFM Xd, Xn, #immr, #imms   when sf=1, N=1
2620 
2621       sf 01 100110 N  immr imms nn dd
2622          BFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
2623          BFM Xd, Xn, #immr, #imms   when sf=1, N=1
2624    */
2625    if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2626       UInt sf     = INSN(31,31);
2627       UInt opc    = INSN(30,29);
2628       UInt N      = INSN(22,22);
2629       UInt immR   = INSN(21,16);
2630       UInt immS   = INSN(15,10);
2631       UInt nn     = INSN(9,5);
2632       UInt dd     = INSN(4,0);
2633       Bool inZero = False;
2634       Bool extend = False;
2635       const HChar* nm = "???";
2636       /* skip invalid combinations */
2637       switch (opc) {
2638          case BITS2(0,0):
2639             inZero = True; extend = True; nm = "sbfm"; break;
2640          case BITS2(0,1):
2641             inZero = False; extend = False; nm = "bfm"; break;
2642          case BITS2(1,0):
2643             inZero = True; extend = False; nm = "ubfm"; break;
2644          case BITS2(1,1):
2645             goto after_bfm; /* invalid */
2646          default:
2647             vassert(0);
2648       }
2649       if (sf == 1 && N != 1) goto after_bfm;
2650       if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2651                              || ((immS >> 5) & 1) != 0)) goto after_bfm;
2652       ULong wmask = 0, tmask = 0;
2653       Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2654                                    N, immS, immR, False, sf == 1 ? 64 : 32);
2655       if (!ok) goto after_bfm; /* hmmm */
2656 
2657       Bool   is64 = sf == 1;
2658       IRType ty   = is64 ? Ity_I64 : Ity_I32;
2659 
2660       IRTemp dst = newTemp(ty);
2661       IRTemp src = newTemp(ty);
2662       IRTemp bot = newTemp(ty);
2663       IRTemp top = newTemp(ty);
2664       IRTemp res = newTemp(ty);
2665       assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2666       assign(src, getIRegOrZR(is64, nn));
2667       /* perform bitfield move on low bits */
2668       assign(bot, binop(mkOR(ty),
2669                         binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2670                         binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2671                                          mkU(ty, wmask))));
2672       /* determine extension bits (sign, zero or dest register) */
2673       assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2674       /* combine extension bits and result bits */
2675       assign(res, binop(mkOR(ty),
2676                         binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2677                         binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2678       putIRegOrZR(is64, dd, mkexpr(res));
2679       DIP("%s %s, %s, immR=%u, immS=%u\n",
2680           nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2681       return True;
2682    }
2683    after_bfm:
2684 
2685    /* ---------------------- EXTR ---------------------- */
2686    /*   30 28     22 20 15   9 4
2687       1 00 100111 10 m  imm6 n d  EXTR Xd, Xn, Xm, #imm6
2688       0 00 100111 00 m  imm6 n d  EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2689    */
2690    if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2691       Bool is64  = INSN(31,31) == 1;
2692       UInt mm    = INSN(20,16);
2693       UInt imm6  = INSN(15,10);
2694       UInt nn    = INSN(9,5);
2695       UInt dd    = INSN(4,0);
2696       Bool valid = True;
2697       if (INSN(31,31) != INSN(22,22))
2698         valid = False;
2699       if (!is64 && imm6 >= 32)
2700         valid = False;
2701       if (!valid) goto after_extr;
2702       IRType ty    = is64 ? Ity_I64 : Ity_I32;
2703       IRTemp srcHi = newTemp(ty);
2704       IRTemp srcLo = newTemp(ty);
2705       IRTemp res   = newTemp(ty);
2706       assign(srcHi, getIRegOrZR(is64, nn));
2707       assign(srcLo, getIRegOrZR(is64, mm));
2708       if (imm6 == 0) {
2709         assign(res, mkexpr(srcLo));
2710       } else {
2711         UInt szBits = 8 * sizeofIRType(ty);
2712         vassert(imm6 > 0 && imm6 < szBits);
2713         assign(res, binop(mkOR(ty),
2714                           binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2715                           binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2716       }
2717       putIRegOrZR(is64, dd, mkexpr(res));
2718       DIP("extr %s, %s, %s, #%u\n",
2719           nameIRegOrZR(is64,dd),
2720           nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2721       return True;
2722    }
2723   after_extr:
2724 
2725    vex_printf("ARM64 front end: data_processing_immediate\n");
2726    return False;
2727 #  undef INSN
2728 }
2729 
2730 
2731 /*------------------------------------------------------------*/
2732 /*--- Data processing (register) instructions              ---*/
2733 /*------------------------------------------------------------*/
2734 
nameSH(UInt sh)2735 static const HChar* nameSH ( UInt sh ) {
2736    switch (sh) {
2737       case 0: return "lsl";
2738       case 1: return "lsr";
2739       case 2: return "asr";
2740       case 3: return "ror";
2741       default: vassert(0);
2742    }
2743 }
2744 
2745 /* Generate IR to get a register value, possibly shifted by an
2746    immediate.  Returns either a 32- or 64-bit temporary holding the
2747    result.  After the shift, the value can optionally be NOT-ed
2748    too.
2749 
2750    sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR.  sh_amt may only be
2751    in the range 0 to (is64 ? 64 : 32)-1.  For some instructions, ROR
2752    isn't allowed, but it's the job of the caller to check that.
2753 */
getShiftedIRegOrZR(Bool is64,UInt sh_how,UInt sh_amt,UInt regNo,Bool invert)2754 static IRTemp getShiftedIRegOrZR ( Bool is64,
2755                                    UInt sh_how, UInt sh_amt, UInt regNo,
2756                                    Bool invert )
2757 {
2758    vassert(sh_how < 4);
2759    vassert(sh_amt < (is64 ? 64 : 32));
2760    IRType ty = is64 ? Ity_I64 : Ity_I32;
2761    IRTemp t0 = newTemp(ty);
2762    assign(t0, getIRegOrZR(is64, regNo));
2763    IRTemp t1 = newTemp(ty);
2764    switch (sh_how) {
2765       case BITS2(0,0):
2766          assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2767          break;
2768       case BITS2(0,1):
2769          assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2770          break;
2771       case BITS2(1,0):
2772          assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2773          break;
2774       case BITS2(1,1):
2775          assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2776          break;
2777       default:
2778          vassert(0);
2779    }
2780    if (invert) {
2781       IRTemp t2 = newTemp(ty);
2782       assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2783       return t2;
2784    } else {
2785       return t1;
2786    }
2787 }
2788 
2789 
2790 static
dis_ARM64_data_processing_register(DisResult * dres,UInt insn)2791 Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2792                                         UInt insn)
2793 {
2794 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
2795 
2796    /* ------------------- ADD/SUB(reg) ------------------- */
2797    /* x==0 => 32 bit op      x==1 => 64 bit op
2798       sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2799 
2800       31 30 29 28    23 21 20 15   9  4
2801       |  |  |  |     |  |  |  |    |  |
2802       x  0  0  01011 sh 0  Rm imm6 Rn Rd   ADD  Rd,Rn, sh(Rm,imm6)
2803       x  0  1  01011 sh 0  Rm imm6 Rn Rd   ADDS Rd,Rn, sh(Rm,imm6)
2804       x  1  0  01011 sh 0  Rm imm6 Rn Rd   SUB  Rd,Rn, sh(Rm,imm6)
2805       x  1  1  01011 sh 0  Rm imm6 Rn Rd   SUBS Rd,Rn, sh(Rm,imm6)
2806    */
2807    if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2808       UInt   bX    = INSN(31,31);
2809       UInt   bOP   = INSN(30,30); /* 0: ADD, 1: SUB */
2810       UInt   bS    = INSN(29, 29); /* set flags? */
2811       UInt   sh    = INSN(23,22);
2812       UInt   rM    = INSN(20,16);
2813       UInt   imm6  = INSN(15,10);
2814       UInt   rN    = INSN(9,5);
2815       UInt   rD    = INSN(4,0);
2816       Bool   isSUB = bOP == 1;
2817       Bool   is64  = bX == 1;
2818       IRType ty    = is64 ? Ity_I64 : Ity_I32;
2819       if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2820          /* invalid; fall through */
2821       } else {
2822          IRTemp argL = newTemp(ty);
2823          assign(argL, getIRegOrZR(is64, rN));
2824          IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2825          IROp   op   = isSUB ? mkSUB(ty) : mkADD(ty);
2826          IRTemp res  = newTemp(ty);
2827          assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2828          if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2829          if (bS) {
2830             setFlags_ADD_SUB(is64, isSUB, argL, argR);
2831          }
2832          DIP("%s%s %s, %s, %s, %s #%u\n",
2833              bOP ? "sub" : "add", bS ? "s" : "",
2834              nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2835              nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2836          return True;
2837       }
2838    }
2839 
2840    /* ------------------- ADC/SBC(reg) ------------------- */
2841    /* x==0 => 32 bit op      x==1 => 64 bit op
2842 
2843       31 30 29 28    23 21 20 15     9  4
2844       |  |  |  |     |  |  |  |      |  |
2845       x  0  0  11010 00 0  Rm 000000 Rn Rd   ADC  Rd,Rn,Rm
2846       x  0  1  11010 00 0  Rm 000000 Rn Rd   ADCS Rd,Rn,Rm
2847       x  1  0  11010 00 0  Rm 000000 Rn Rd   SBC  Rd,Rn,Rm
2848       x  1  1  11010 00 0  Rm 000000 Rn Rd   SBCS Rd,Rn,Rm
2849    */
2850 
2851    if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2852       UInt   bX    = INSN(31,31);
2853       UInt   bOP   = INSN(30,30); /* 0: ADC, 1: SBC */
2854       UInt   bS    = INSN(29,29); /* set flags */
2855       UInt   rM    = INSN(20,16);
2856       UInt   rN    = INSN(9,5);
2857       UInt   rD    = INSN(4,0);
2858 
2859       Bool   isSUB = bOP == 1;
2860       Bool   is64  = bX == 1;
2861       IRType ty    = is64 ? Ity_I64 : Ity_I32;
2862 
2863       IRTemp oldC = newTemp(ty);
2864       assign(oldC,
2865              is64 ? mk_arm64g_calculate_flag_c()
2866                   : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
2867 
2868       IRTemp argL = newTemp(ty);
2869       assign(argL, getIRegOrZR(is64, rN));
2870       IRTemp argR = newTemp(ty);
2871       assign(argR, getIRegOrZR(is64, rM));
2872 
2873       IROp   op   = isSUB ? mkSUB(ty) : mkADD(ty);
2874       IRTemp res  = newTemp(ty);
2875       if (isSUB) {
2876          IRExpr* one = is64 ? mkU64(1) : mkU32(1);
2877          IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
2878          assign(res,
2879                 binop(op,
2880                       binop(op, mkexpr(argL), mkexpr(argR)),
2881                       binop(xorOp, mkexpr(oldC), one)));
2882       } else {
2883          assign(res,
2884                 binop(op,
2885                       binop(op, mkexpr(argL), mkexpr(argR)),
2886                       mkexpr(oldC)));
2887       }
2888 
2889       if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2890 
2891       if (bS) {
2892          setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
2893       }
2894 
2895       DIP("%s%s %s, %s, %s\n",
2896           bOP ? "sbc" : "adc", bS ? "s" : "",
2897           nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2898           nameIRegOrZR(is64, rM));
2899       return True;
2900    }
2901 
2902    /* -------------------- LOGIC(reg) -------------------- */
2903    /* x==0 => 32 bit op      x==1 => 64 bit op
2904       N==0 => inv? is no-op (no inversion)
2905       N==1 => inv? is NOT
2906       sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2907 
2908       31 30 28    23 21 20 15   9  4
2909       |  |  |     |  |  |  |    |  |
2910       x  00 01010 sh N  Rm imm6 Rn Rd  AND  Rd,Rn, inv?(sh(Rm,imm6))
2911       x  01 01010 sh N  Rm imm6 Rn Rd  ORR  Rd,Rn, inv?(sh(Rm,imm6))
2912       x  10 01010 sh N  Rm imm6 Rn Rd  EOR  Rd,Rn, inv?(sh(Rm,imm6))
2913       x  11 01010 sh N  Rm imm6 Rn Rd  ANDS Rd,Rn, inv?(sh(Rm,imm6))
2914       With N=1, the names are: BIC ORN EON BICS
2915    */
2916    if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2917       UInt   bX   = INSN(31,31);
2918       UInt   sh   = INSN(23,22);
2919       UInt   bN   = INSN(21,21);
2920       UInt   rM   = INSN(20,16);
2921       UInt   imm6 = INSN(15,10);
2922       UInt   rN   = INSN(9,5);
2923       UInt   rD   = INSN(4,0);
2924       Bool   is64 = bX == 1;
2925       IRType ty   = is64 ? Ity_I64 : Ity_I32;
2926       if (!is64 && imm6 > 31) {
2927          /* invalid; fall though */
2928       } else {
2929          IRTemp argL = newTemp(ty);
2930          assign(argL, getIRegOrZR(is64, rN));
2931          IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
2932          IROp   op   = Iop_INVALID;
2933          switch (INSN(30,29)) {
2934             case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
2935             case BITS2(0,1):                  op = mkOR(ty);  break;
2936             case BITS2(1,0):                  op = mkXOR(ty); break;
2937             default: vassert(0);
2938          }
2939          IRTemp res = newTemp(ty);
2940          assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2941          if (INSN(30,29) == BITS2(1,1)) {
2942             setFlags_LOGIC(is64, res);
2943          }
2944          putIRegOrZR(is64, rD, mkexpr(res));
2945 
2946          static const HChar* names_op[8]
2947             = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2948          vassert(((bN << 2) | INSN(30,29)) < 8);
2949          const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
2950          /* Special-case the printing of "MOV" */
2951          if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
2952             DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
2953                                 nameIRegOrZR(is64, rM));
2954          } else {
2955             DIP("%s %s, %s, %s, %s #%u\n", nm_op,
2956                 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2957                 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2958          }
2959          return True;
2960       }
2961    }
2962 
2963    /* -------------------- {U,S}MULH -------------------- */
2964    /* 31       23 22 20 15     9   4
2965       10011011 1  10 Rm 011111 Rn Rd   UMULH Xd,Xn,Xm
2966       10011011 0  10 Rm 011111 Rn Rd   SMULH Xd,Xn,Xm
2967    */
2968    if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
2969        && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
2970       Bool isU = INSN(23,23) == 1;
2971       UInt mm  = INSN(20,16);
2972       UInt nn  = INSN(9,5);
2973       UInt dd  = INSN(4,0);
2974       putIReg64orZR(dd, unop(Iop_128HIto64,
2975                              binop(isU ? Iop_MullU64 : Iop_MullS64,
2976                                    getIReg64orZR(nn), getIReg64orZR(mm))));
2977       DIP("%cmulh %s, %s, %s\n",
2978           isU ? 'u' : 's',
2979           nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
2980       return True;
2981    }
2982 
2983    /* -------------------- M{ADD,SUB} -------------------- */
2984    /* 31 30           20 15 14 9 4
2985       sf 00 11011 000 m  0  a  n r   MADD Rd,Rn,Rm,Ra  d = a+m*n
2986       sf 00 11011 000 m  1  a  n r   MADD Rd,Rn,Rm,Ra  d = a-m*n
2987    */
2988    if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
2989       Bool is64  = INSN(31,31) == 1;
2990       UInt mm    = INSN(20,16);
2991       Bool isAdd = INSN(15,15) == 0;
2992       UInt aa    = INSN(14,10);
2993       UInt nn    = INSN(9,5);
2994       UInt dd    = INSN(4,0);
2995       if (is64) {
2996          putIReg64orZR(
2997             dd,
2998             binop(isAdd ? Iop_Add64 : Iop_Sub64,
2999                   getIReg64orZR(aa),
3000                   binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
3001       } else {
3002          putIReg32orZR(
3003             dd,
3004             binop(isAdd ? Iop_Add32 : Iop_Sub32,
3005                   getIReg32orZR(aa),
3006                   binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
3007       }
3008       DIP("%s %s, %s, %s, %s\n",
3009           isAdd ? "madd" : "msub",
3010           nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3011           nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
3012       return True;
3013    }
3014 
3015    /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
3016    /* 31 30 28        20 15   11 9  4
3017       sf 00 1101 0100 mm cond 00 nn dd   CSEL  Rd,Rn,Rm
3018       sf 00 1101 0100 mm cond 01 nn dd   CSINC Rd,Rn,Rm
3019       sf 10 1101 0100 mm cond 00 nn dd   CSINV Rd,Rn,Rm
3020       sf 10 1101 0100 mm cond 01 nn dd   CSNEG Rd,Rn,Rm
3021       In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
3022    */
3023    if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
3024       Bool    is64 = INSN(31,31) == 1;
3025       UInt    b30  = INSN(30,30);
3026       UInt    mm   = INSN(20,16);
3027       UInt    cond = INSN(15,12);
3028       UInt    b10  = INSN(10,10);
3029       UInt    nn   = INSN(9,5);
3030       UInt    dd   = INSN(4,0);
3031       UInt    op   = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
3032       IRType  ty   = is64 ? Ity_I64 : Ity_I32;
3033       IRExpr* argL = getIRegOrZR(is64, nn);
3034       IRExpr* argR = getIRegOrZR(is64, mm);
3035       switch (op) {
3036          case BITS2(0,0):
3037             break;
3038          case BITS2(0,1):
3039             argR = binop(mkADD(ty), argR, mkU(ty,1));
3040             break;
3041          case BITS2(1,0):
3042             argR = unop(mkNOT(ty), argR);
3043             break;
3044          case BITS2(1,1):
3045             argR = binop(mkSUB(ty), mkU(ty,0), argR);
3046             break;
3047          default:
3048             vassert(0);
3049       }
3050       putIRegOrZR(
3051          is64, dd,
3052          IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
3053                     argL, argR)
3054       );
3055       const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
3056       DIP("%s %s, %s, %s, %s\n", op_nm[op],
3057           nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3058           nameIRegOrZR(is64, mm), nameCC(cond));
3059       return True;
3060    }
3061 
3062    /* -------------- ADD/SUB(extended reg) -------------- */
3063    /*     28         20 15  12   9 4
3064       000 01011 00 1 m  opt imm3 n d   ADD  Wd|SP, Wn|SP, Wm ext&lsld
3065       100 01011 00 1 m  opt imm3 n d   ADD  Xd|SP, Xn|SP, Rm ext&lsld
3066 
3067       001 01011 00 1 m  opt imm3 n d   ADDS Wd,    Wn|SP, Wm ext&lsld
3068       101 01011 00 1 m  opt imm3 n d   ADDS Xd,    Xn|SP, Rm ext&lsld
3069 
3070       010 01011 00 1 m  opt imm3 n d   SUB  Wd|SP, Wn|SP, Wm ext&lsld
3071       110 01011 00 1 m  opt imm3 n d   SUB  Xd|SP, Xn|SP, Rm ext&lsld
3072 
3073       011 01011 00 1 m  opt imm3 n d   SUBS Wd,    Wn|SP, Wm ext&lsld
3074       111 01011 00 1 m  opt imm3 n d   SUBS Xd,    Xn|SP, Rm ext&lsld
3075 
3076       The 'm' operand is extended per opt, thusly:
3077 
3078         000   Xm & 0xFF           UXTB
3079         001   Xm & 0xFFFF         UXTH
3080         010   Xm & (2^32)-1       UXTW
3081         011   Xm                  UXTX
3082 
3083         100   Xm sx from bit 7    SXTB
3084         101   Xm sx from bit 15   SXTH
3085         110   Xm sx from bit 31   SXTW
3086         111   Xm                  SXTX
3087 
3088       In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
3089       operation on Xm.  In the 32 bit case, UXTW, UXTX, SXTW and SXTX
3090       are the identity operation on Wm.
3091 
3092       After extension, the value is shifted left by imm3 bits, which
3093       may only be in the range 0 .. 4 inclusive.
3094    */
3095    if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
3096       Bool is64  = INSN(31,31) == 1;
3097       Bool isSub = INSN(30,30) == 1;
3098       Bool setCC = INSN(29,29) == 1;
3099       UInt mm    = INSN(20,16);
3100       UInt opt   = INSN(15,13);
3101       UInt imm3  = INSN(12,10);
3102       UInt nn    = INSN(9,5);
3103       UInt dd    = INSN(4,0);
3104       const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
3105                                   "sxtb", "sxth", "sxtw", "sxtx" };
3106       /* Do almost the same thing in the 32- and 64-bit cases. */
3107       IRTemp xN = newTemp(Ity_I64);
3108       IRTemp xM = newTemp(Ity_I64);
3109       assign(xN, getIReg64orSP(nn));
3110       assign(xM, getIReg64orZR(mm));
3111       IRExpr* xMw  = mkexpr(xM); /* "xM widened" */
3112       Int     shSX = 0;
3113       /* widen Xm .. */
3114       switch (opt) {
3115          case BITS3(0,0,0): // UXTB
3116             xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
3117          case BITS3(0,0,1): // UXTH
3118             xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
3119          case BITS3(0,1,0): // UXTW -- noop for the 32bit case
3120             if (is64) {
3121                xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
3122             }
3123             break;
3124          case BITS3(0,1,1): // UXTX -- always a noop
3125             break;
3126          case BITS3(1,0,0): // SXTB
3127             shSX = 56; goto sxTo64;
3128          case BITS3(1,0,1): // SXTH
3129             shSX = 48; goto sxTo64;
3130          case BITS3(1,1,0): // SXTW -- noop for the 32bit case
3131             if (is64) {
3132                shSX = 32; goto sxTo64;
3133             }
3134             break;
3135          case BITS3(1,1,1): // SXTX -- always a noop
3136             break;
3137          sxTo64:
3138             vassert(shSX >= 32);
3139             xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
3140                         mkU8(shSX));
3141             break;
3142          default:
3143             vassert(0);
3144       }
3145       /* and now shift */
3146       IRTemp argL = xN;
3147       IRTemp argR = newTemp(Ity_I64);
3148       assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
3149       IRTemp res = newTemp(Ity_I64);
3150       assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
3151                         mkexpr(argL), mkexpr(argR)));
3152       if (is64) {
3153          if (setCC) {
3154             putIReg64orZR(dd, mkexpr(res));
3155             setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
3156          } else {
3157             putIReg64orSP(dd, mkexpr(res));
3158          }
3159       } else {
3160          if (setCC) {
3161             IRTemp argL32 = newTemp(Ity_I32);
3162             IRTemp argR32 = newTemp(Ity_I32);
3163             putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
3164             assign(argL32, unop(Iop_64to32, mkexpr(argL)));
3165             assign(argR32, unop(Iop_64to32, mkexpr(argR)));
3166             setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
3167          } else {
3168             putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
3169          }
3170       }
3171       DIP("%s%s %s, %s, %s %s lsl %u\n",
3172           isSub ? "sub" : "add", setCC ? "s" : "",
3173           setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
3174           nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
3175           nameExt[opt], imm3);
3176       return True;
3177    }
3178 
3179    /* ---------------- CCMP/CCMN(imm) ---------------- */
3180    /* Bizarrely, these appear in the "data processing register"
3181       category, even though they are operations against an
3182       immediate. */
3183    /* 31   29        20   15   11 9    3
3184       sf 1 111010010 imm5 cond 10 Rn 0 nzcv   CCMP Rn, #imm5, #nzcv, cond
3185       sf 0 111010010 imm5 cond 10 Rn 0 nzcv   CCMN Rn, #imm5, #nzcv, cond
3186 
3187       Operation is:
3188          (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
3189          (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
3190    */
3191    if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3192        && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
3193       Bool is64  = INSN(31,31) == 1;
3194       Bool isSUB = INSN(30,30) == 1;
3195       UInt imm5  = INSN(20,16);
3196       UInt cond  = INSN(15,12);
3197       UInt nn    = INSN(9,5);
3198       UInt nzcv  = INSN(3,0);
3199 
3200       IRTemp condT = newTemp(Ity_I1);
3201       assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3202 
3203       IRType ty   = is64 ? Ity_I64 : Ity_I32;
3204       IRTemp argL = newTemp(ty);
3205       IRTemp argR = newTemp(ty);
3206 
3207       if (is64) {
3208          assign(argL, getIReg64orZR(nn));
3209          assign(argR, mkU64(imm5));
3210       } else {
3211          assign(argL, getIReg32orZR(nn));
3212          assign(argR, mkU32(imm5));
3213       }
3214       setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3215 
3216       DIP("ccm%c %s, #%u, #%u, %s\n",
3217           isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3218           imm5, nzcv, nameCC(cond));
3219       return True;
3220    }
3221 
3222    /* ---------------- CCMP/CCMN(reg) ---------------- */
3223    /* 31   29        20 15   11 9    3
3224       sf 1 111010010 Rm cond 00 Rn 0 nzcv   CCMP Rn, Rm, #nzcv, cond
3225       sf 0 111010010 Rm cond 00 Rn 0 nzcv   CCMN Rn, Rm, #nzcv, cond
3226       Operation is:
3227          (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
3228          (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
3229    */
3230    if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3231        && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
3232       Bool is64  = INSN(31,31) == 1;
3233       Bool isSUB = INSN(30,30) == 1;
3234       UInt mm    = INSN(20,16);
3235       UInt cond  = INSN(15,12);
3236       UInt nn    = INSN(9,5);
3237       UInt nzcv  = INSN(3,0);
3238 
3239       IRTemp condT = newTemp(Ity_I1);
3240       assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3241 
3242       IRType ty   = is64 ? Ity_I64 : Ity_I32;
3243       IRTemp argL = newTemp(ty);
3244       IRTemp argR = newTemp(ty);
3245 
3246       if (is64) {
3247          assign(argL, getIReg64orZR(nn));
3248          assign(argR, getIReg64orZR(mm));
3249       } else {
3250          assign(argL, getIReg32orZR(nn));
3251          assign(argR, getIReg32orZR(mm));
3252       }
3253       setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3254 
3255       DIP("ccm%c %s, %s, #%u, %s\n",
3256           isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3257           nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
3258       return True;
3259    }
3260 
3261 
3262    /* -------------- REV/REV16/REV32/RBIT -------------- */
3263    /* 31 30 28       20    15   11 9 4
3264 
3265       1  10 11010110 00000 0000 11 n d    (1) REV   Xd, Xn
3266       0  10 11010110 00000 0000 10 n d    (2) REV   Wd, Wn
3267 
3268       1  10 11010110 00000 0000 00 n d    (3) RBIT  Xd, Xn
3269       0  10 11010110 00000 0000 00 n d    (4) RBIT  Wd, Wn
3270 
3271       1  10 11010110 00000 0000 01 n d    (5) REV16 Xd, Xn
3272       0  10 11010110 00000 0000 01 n d    (6) REV16 Wd, Wn
3273 
3274       1  10 11010110 00000 0000 10 n d    (7) REV32 Xd, Xn
3275    */
3276    if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3277        && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
3278       UInt b31 = INSN(31,31);
3279       UInt opc = INSN(11,10);
3280 
3281       UInt ix = 0;
3282       /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
3283       else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
3284       else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
3285       else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
3286       else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
3287       else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
3288       else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
3289       if (ix >= 1 && ix <= 7) {
3290          Bool   is64  = ix == 1 || ix == 3 || ix == 5 || ix == 7;
3291          UInt   nn    = INSN(9,5);
3292          UInt   dd    = INSN(4,0);
3293          IRTemp src   = newTemp(Ity_I64);
3294          IRTemp dst   = IRTemp_INVALID;
3295          IRTemp (*math)(IRTemp) = NULL;
3296          switch (ix) {
3297             case 1: case 2: math = math_BYTESWAP64;   break;
3298             case 3: case 4: math = math_BITSWAP64;    break;
3299             case 5: case 6: math = math_USHORTSWAP64; break;
3300             case 7:         math = math_UINTSWAP64;   break;
3301             default: vassert(0);
3302          }
3303          const HChar* names[7]
3304            = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
3305          const HChar* nm = names[ix-1];
3306          vassert(math);
3307          if (ix == 6) {
3308             /* This has to be special cased, since the logic below doesn't
3309                handle it correctly. */
3310             assign(src, getIReg64orZR(nn));
3311             dst = math(src);
3312             putIReg64orZR(dd,
3313                           unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
3314          } else if (is64) {
3315             assign(src, getIReg64orZR(nn));
3316             dst = math(src);
3317             putIReg64orZR(dd, mkexpr(dst));
3318          } else {
3319             assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
3320             dst = math(src);
3321             putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3322          }
3323          DIP("%s %s, %s\n", nm,
3324              nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
3325          return True;
3326       }
3327       /* else fall through */
3328    }
3329 
3330    /* -------------------- CLZ/CLS -------------------- */
3331    /*    30 28   24   20    15      9 4
3332       sf 10 1101 0110 00000 00010 0 n d    CLZ Rd, Rn
3333       sf 10 1101 0110 00000 00010 1 n d    CLS Rd, Rn
3334    */
3335    if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3336        && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
3337       Bool   is64  = INSN(31,31) == 1;
3338       Bool   isCLS = INSN(10,10) == 1;
3339       UInt   nn    = INSN(9,5);
3340       UInt   dd    = INSN(4,0);
3341       IRTemp src   = newTemp(Ity_I64);
3342       IRTemp srcZ  = newTemp(Ity_I64);
3343       IRTemp dst   = newTemp(Ity_I64);
3344       /* Get the argument, widened out to 64 bit */
3345       if (is64) {
3346          assign(src, getIReg64orZR(nn));
3347       } else {
3348          assign(src, binop(Iop_Shl64,
3349                            unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
3350       }
3351       /* If this is CLS, mash the arg around accordingly */
3352       if (isCLS) {
3353          IRExpr* one = mkU8(1);
3354          assign(srcZ,
3355          binop(Iop_Xor64,
3356                binop(Iop_Shl64, mkexpr(src), one),
3357                binop(Iop_Shl64, binop(Iop_Shr64, mkexpr(src), one), one)));
3358       } else {
3359          assign(srcZ, mkexpr(src));
3360       }
3361       /* And compute CLZ. */
3362       if (is64) {
3363          assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3364                                 mkU64(isCLS ? 63 : 64),
3365                                 unop(Iop_Clz64, mkexpr(srcZ))));
3366          putIReg64orZR(dd, mkexpr(dst));
3367       } else {
3368          assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3369                                 mkU64(isCLS ? 31 : 32),
3370                                 unop(Iop_Clz64, mkexpr(srcZ))));
3371          putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3372       }
3373       DIP("cl%c %s, %s\n", isCLS ? 's' : 'z',
3374           nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
3375       return True;
3376    }
3377 
3378    /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */
3379    /*    30 28        20 15   11 9 4
3380       sf 00 1101 0110 m  0010 00 n d   LSLV Rd,Rn,Rm
3381       sf 00 1101 0110 m  0010 01 n d   LSRV Rd,Rn,Rm
3382       sf 00 1101 0110 m  0010 10 n d   ASRV Rd,Rn,Rm
3383       sf 00 1101 0110 m  0010 11 n d   RORV Rd,Rn,Rm
3384    */
3385    if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3386        && INSN(15,12) == BITS4(0,0,1,0)) {
3387       Bool   is64 = INSN(31,31) == 1;
3388       UInt   mm   = INSN(20,16);
3389       UInt   op   = INSN(11,10);
3390       UInt   nn   = INSN(9,5);
3391       UInt   dd   = INSN(4,0);
3392       IRType ty   = is64 ? Ity_I64 : Ity_I32;
3393       IRTemp srcL = newTemp(ty);
3394       IRTemp srcR = newTemp(Ity_I64);
3395       IRTemp res  = newTemp(ty);
3396       IROp   iop  = Iop_INVALID;
3397       assign(srcL, getIRegOrZR(is64, nn));
3398       assign(srcR, binop(Iop_And64, getIReg64orZR(mm),
3399                                     mkU64(is64 ? 63 : 31)));
3400       if (op < 3) {
3401          // LSLV, LSRV, ASRV
3402          switch (op) {
3403             case BITS2(0,0): iop = mkSHL(ty); break;
3404             case BITS2(0,1): iop = mkSHR(ty); break;
3405             case BITS2(1,0): iop = mkSAR(ty); break;
3406             default: vassert(0);
3407          }
3408          assign(res, binop(iop, mkexpr(srcL),
3409                                 unop(Iop_64to8, mkexpr(srcR))));
3410       } else {
3411          // RORV
3412          IROp opSHL = mkSHL(ty);
3413          IROp opSHR = mkSHR(ty);
3414          IROp opOR  = mkOR(ty);
3415          IRExpr* width = mkU64(is64 ? 64: 32);
3416          assign(
3417             res,
3418             IRExpr_ITE(
3419                binop(Iop_CmpEQ64, mkexpr(srcR), mkU64(0)),
3420                mkexpr(srcL),
3421                binop(opOR,
3422                      binop(opSHL,
3423                            mkexpr(srcL),
3424                            unop(Iop_64to8, binop(Iop_Sub64, width,
3425                                                             mkexpr(srcR)))),
3426                      binop(opSHR,
3427                            mkexpr(srcL), unop(Iop_64to8, mkexpr(srcR))))
3428          ));
3429       }
3430       putIRegOrZR(is64, dd, mkexpr(res));
3431       vassert(op < 4);
3432       const HChar* names[4] = { "lslv", "lsrv", "asrv", "rorv" };
3433       DIP("%s %s, %s, %s\n",
3434           names[op], nameIRegOrZR(is64,dd),
3435                      nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
3436       return True;
3437    }
3438 
3439    /* -------------------- SDIV/UDIV -------------------- */
3440    /*    30 28        20 15    10 9 4
3441       sf 00 1101 0110 m  00001  1 n d  SDIV Rd,Rn,Rm
3442       sf 00 1101 0110 m  00001  0 n d  UDIV Rd,Rn,Rm
3443    */
3444    if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3445        && INSN(15,11) == BITS5(0,0,0,0,1)) {
3446       Bool is64 = INSN(31,31) == 1;
3447       UInt mm   = INSN(20,16);
3448       Bool isS  = INSN(10,10) == 1;
3449       UInt nn   = INSN(9,5);
3450       UInt dd   = INSN(4,0);
3451       if (isS) {
3452          putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
3453                                      getIRegOrZR(is64, nn),
3454                                      getIRegOrZR(is64, mm)));
3455       } else {
3456          putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
3457                                      getIRegOrZR(is64, nn),
3458                                      getIRegOrZR(is64, mm)));
3459       }
3460       DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
3461           nameIRegOrZR(is64, dd),
3462           nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
3463       return True;
3464    }
3465 
3466    /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
3467    /* 31        23  20 15 14 9 4
3468       1001 1011 101 m  0  a  n d   UMADDL Xd,Wn,Wm,Xa
3469       1001 1011 001 m  0  a  n d   SMADDL Xd,Wn,Wm,Xa
3470       1001 1011 101 m  1  a  n d   UMSUBL Xd,Wn,Wm,Xa
3471       1001 1011 001 m  1  a  n d   SMSUBL Xd,Wn,Wm,Xa
3472       with operation
3473          Xd = Xa +/- (Wn *u/s Wm)
3474    */
3475    if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
3476       Bool   isU   = INSN(23,23) == 1;
3477       UInt   mm    = INSN(20,16);
3478       Bool   isAdd = INSN(15,15) == 0;
3479       UInt   aa    = INSN(14,10);
3480       UInt   nn    = INSN(9,5);
3481       UInt   dd    = INSN(4,0);
3482       IRTemp wN    = newTemp(Ity_I32);
3483       IRTemp wM    = newTemp(Ity_I32);
3484       IRTemp xA    = newTemp(Ity_I64);
3485       IRTemp muld  = newTemp(Ity_I64);
3486       IRTemp res   = newTemp(Ity_I64);
3487       assign(wN, getIReg32orZR(nn));
3488       assign(wM, getIReg32orZR(mm));
3489       assign(xA, getIReg64orZR(aa));
3490       assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
3491                          mkexpr(wN), mkexpr(wM)));
3492       assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
3493                         mkexpr(xA), mkexpr(muld)));
3494       putIReg64orZR(dd, mkexpr(res));
3495       DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
3496           nameIReg64orZR(dd), nameIReg32orZR(nn),
3497           nameIReg32orZR(mm), nameIReg64orZR(aa));
3498       return True;
3499    }
3500    vex_printf("ARM64 front end: data_processing_register\n");
3501    return False;
3502 #  undef INSN
3503 }
3504 
3505 
3506 /*------------------------------------------------------------*/
3507 /*--- Math helpers for vector interleave/deinterleave      ---*/
3508 /*------------------------------------------------------------*/
3509 
3510 #define EX(_tmp) \
3511            mkexpr(_tmp)
3512 #define SL(_hi128,_lo128,_nbytes) \
3513            ( (_nbytes) == 0 \
3514                 ? (_lo128) \
3515                 : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) )
3516 #define ROR(_v128,_nbytes) \
3517            SL((_v128),(_v128),(_nbytes))
3518 #define ROL(_v128,_nbytes) \
3519            SL((_v128),(_v128),16-(_nbytes))
3520 #define SHR(_v128,_nbytes) \
3521            binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes)))
3522 #define SHL(_v128,_nbytes) \
3523            binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes)))
3524 #define ILO64x2(_argL,_argR) \
3525            binop(Iop_InterleaveLO64x2,(_argL),(_argR))
3526 #define IHI64x2(_argL,_argR) \
3527            binop(Iop_InterleaveHI64x2,(_argL),(_argR))
3528 #define ILO32x4(_argL,_argR) \
3529            binop(Iop_InterleaveLO32x4,(_argL),(_argR))
3530 #define IHI32x4(_argL,_argR) \
3531            binop(Iop_InterleaveHI32x4,(_argL),(_argR))
3532 #define ILO16x8(_argL,_argR) \
3533            binop(Iop_InterleaveLO16x8,(_argL),(_argR))
3534 #define IHI16x8(_argL,_argR) \
3535            binop(Iop_InterleaveHI16x8,(_argL),(_argR))
3536 #define ILO8x16(_argL,_argR) \
3537            binop(Iop_InterleaveLO8x16,(_argL),(_argR))
3538 #define IHI8x16(_argL,_argR) \
3539            binop(Iop_InterleaveHI8x16,(_argL),(_argR))
3540 #define CEV32x4(_argL,_argR) \
3541            binop(Iop_CatEvenLanes32x4,(_argL),(_argR))
3542 #define COD32x4(_argL,_argR) \
3543            binop(Iop_CatOddLanes32x4,(_argL),(_argR))
3544 #define COD16x8(_argL,_argR) \
3545            binop(Iop_CatOddLanes16x8,(_argL),(_argR))
3546 #define COD8x16(_argL,_argR) \
3547            binop(Iop_CatOddLanes8x16,(_argL),(_argR))
3548 #define CEV8x16(_argL,_argR) \
3549            binop(Iop_CatEvenLanes8x16,(_argL),(_argR))
3550 #define AND(_arg1,_arg2) \
3551            binop(Iop_AndV128,(_arg1),(_arg2))
3552 #define OR2(_arg1,_arg2) \
3553            binop(Iop_OrV128,(_arg1),(_arg2))
3554 #define OR3(_arg1,_arg2,_arg3) \
3555            binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3)))
3556 #define OR4(_arg1,_arg2,_arg3,_arg4) \
3557            binop(Iop_OrV128, \
3558                  binop(Iop_OrV128,(_arg1),(_arg2)), \
3559                  binop(Iop_OrV128,(_arg3),(_arg4)))
3560 
3561 
3562 /* Do interleaving for 1 128 bit vector, for ST1 insns. */
3563 static
math_INTERLEAVE1_128(IRTemp * i0,UInt laneSzBlg2,IRTemp u0)3564 void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp* i0,
3565                            UInt laneSzBlg2, IRTemp u0 )
3566 {
3567    assign(*i0, mkexpr(u0));
3568 }
3569 
3570 
3571 /* Do interleaving for 2 128 bit vectors, for ST2 insns. */
3572 static
math_INTERLEAVE2_128(IRTemp * i0,IRTemp * i1,UInt laneSzBlg2,IRTemp u0,IRTemp u1)3573 void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
3574                            UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
3575 {
3576    /* This is pretty easy, since we have primitives directly to
3577       hand. */
3578    if (laneSzBlg2 == 3) {
3579       // 64x2
3580       // u1 == B1 B0, u0 == A1 A0
3581       // i1 == B1 A1, i0 == B0 A0
3582       assign(*i0, binop(Iop_InterleaveLO64x2, mkexpr(u1), mkexpr(u0)));
3583       assign(*i1, binop(Iop_InterleaveHI64x2, mkexpr(u1), mkexpr(u0)));
3584       return;
3585    }
3586    if (laneSzBlg2 == 2) {
3587       // 32x4
3588       // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3589       // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3590       assign(*i0, binop(Iop_InterleaveLO32x4, mkexpr(u1), mkexpr(u0)));
3591       assign(*i1, binop(Iop_InterleaveHI32x4, mkexpr(u1), mkexpr(u0)));
3592       return;
3593    }
3594    if (laneSzBlg2 == 1) {
3595       // 16x8
3596       // u1 == B{7..0}, u0 == A{7..0}
3597       // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3598       // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3599       assign(*i0, binop(Iop_InterleaveLO16x8, mkexpr(u1), mkexpr(u0)));
3600       assign(*i1, binop(Iop_InterleaveHI16x8, mkexpr(u1), mkexpr(u0)));
3601       return;
3602    }
3603    if (laneSzBlg2 == 0) {
3604       // 8x16
3605       // u1 == B{f..0}, u0 == A{f..0}
3606       // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3607       // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3608       assign(*i0, binop(Iop_InterleaveLO8x16, mkexpr(u1), mkexpr(u0)));
3609       assign(*i1, binop(Iop_InterleaveHI8x16, mkexpr(u1), mkexpr(u0)));
3610       return;
3611    }
3612    /*NOTREACHED*/
3613    vassert(0);
3614 }
3615 
3616 
3617 /* Do interleaving for 3 128 bit vectors, for ST3 insns. */
3618 static
math_INTERLEAVE3_128(IRTemp * i0,IRTemp * i1,IRTemp * i2,UInt laneSzBlg2,IRTemp u0,IRTemp u1,IRTemp u2)3619 void math_INTERLEAVE3_128(
3620         /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
3621         UInt laneSzBlg2,
3622         IRTemp u0, IRTemp u1, IRTemp u2 )
3623 {
3624    if (laneSzBlg2 == 3) {
3625       // 64x2
3626       // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3627       // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3628       assign(*i2, IHI64x2( EX(u2), EX(u1) ));
3629       assign(*i1, ILO64x2( ROR(EX(u0),8), EX(u2) ));
3630       assign(*i0, ILO64x2( EX(u1), EX(u0) ));
3631       return;
3632    }
3633 
3634    if (laneSzBlg2 == 2) {
3635       // 32x4
3636       // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3637       // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3638       // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3639       IRTemp p0    = newTempV128();
3640       IRTemp p1    = newTempV128();
3641       IRTemp p2    = newTempV128();
3642       IRTemp c1100 = newTempV128();
3643       IRTemp c0011 = newTempV128();
3644       IRTemp c0110 = newTempV128();
3645       assign(c1100, mkV128(0xFF00));
3646       assign(c0011, mkV128(0x00FF));
3647       assign(c0110, mkV128(0x0FF0));
3648       // First interleave them at 64x2 granularity,
3649       // generating partial ("p") values.
3650       math_INTERLEAVE3_128(&p0, &p1, &p2, 3, u0, u1, u2);
3651       // And more shuffling around for the final answer
3652       assign(*i2, OR2( AND( IHI32x4(EX(p2), ROL(EX(p2),8)), EX(c1100) ),
3653                        AND( IHI32x4(ROR(EX(p1),4), EX(p2)), EX(c0011) ) ));
3654       assign(*i1, OR3( SHL(EX(p2),12),
3655                        AND(EX(p1),EX(c0110)),
3656                        SHR(EX(p0),12) ));
3657       assign(*i0, OR2( AND( ILO32x4(EX(p0),ROL(EX(p1),4)), EX(c1100) ),
3658                        AND( ILO32x4(ROR(EX(p0),8),EX(p0)), EX(c0011) ) ));
3659       return;
3660    }
3661 
3662    if (laneSzBlg2 == 1) {
3663       // 16x8
3664       // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3665       // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3666       // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3667       //
3668       // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3669       // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3670       // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3671       //
3672       // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3673       // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3674       // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3675       IRTemp p0    = newTempV128();
3676       IRTemp p1    = newTempV128();
3677       IRTemp p2    = newTempV128();
3678       IRTemp c1000 = newTempV128();
3679       IRTemp c0100 = newTempV128();
3680       IRTemp c0010 = newTempV128();
3681       IRTemp c0001 = newTempV128();
3682       assign(c1000, mkV128(0xF000));
3683       assign(c0100, mkV128(0x0F00));
3684       assign(c0010, mkV128(0x00F0));
3685       assign(c0001, mkV128(0x000F));
3686       // First interleave them at 32x4 granularity,
3687       // generating partial ("p") values.
3688       math_INTERLEAVE3_128(&p0, &p1, &p2, 2, u0, u1, u2);
3689       // And more shuffling around for the final answer
3690       assign(*i2,
3691              OR4( AND( IHI16x8( EX(p2),        ROL(EX(p2),4) ), EX(c1000) ),
3692                   AND( IHI16x8( ROL(EX(p2),6), EX(p2)        ), EX(c0100) ),
3693                   AND( IHI16x8( ROL(EX(p2),2), ROL(EX(p2),6) ), EX(c0010) ),
3694                   AND( ILO16x8( ROR(EX(p2),2), ROL(EX(p1),2) ), EX(c0001) )
3695       ));
3696       assign(*i1,
3697              OR4( AND( IHI16x8( ROL(EX(p1),4), ROR(EX(p2),2) ), EX(c1000) ),
3698                   AND( IHI16x8( EX(p1),        ROL(EX(p1),4) ), EX(c0100) ),
3699                   AND( IHI16x8( ROL(EX(p1),4), ROL(EX(p1),8) ), EX(c0010) ),
3700                   AND( IHI16x8( ROR(EX(p0),6), ROL(EX(p1),4) ), EX(c0001) )
3701       ));
3702       assign(*i0,
3703              OR4( AND( IHI16x8( ROR(EX(p1),2), ROL(EX(p0),2) ), EX(c1000) ),
3704                   AND( IHI16x8( ROL(EX(p0),2), ROL(EX(p0),6) ), EX(c0100) ),
3705                   AND( IHI16x8( ROL(EX(p0),8), ROL(EX(p0),2) ), EX(c0010) ),
3706                   AND( IHI16x8( ROL(EX(p0),4), ROL(EX(p0),8) ), EX(c0001) )
3707       ));
3708       return;
3709    }
3710 
3711    if (laneSzBlg2 == 0) {
3712       // 8x16.  It doesn't seem worth the hassle of first doing a
3713       // 16x8 interleave, so just generate all 24 partial results
3714       // directly :-(
3715       // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0
3716       // i2 == Cf Bf Af Ce .. Bb Ab Ca
3717       // i1 == Ba Aa C9 B9 .. A6 C5 B5
3718       // i0 == A5 C4 B4 A4 .. C0 B0 A0
3719 
3720       IRTemp i2_FEDC = newTempV128(); IRTemp i2_BA98 = newTempV128();
3721       IRTemp i2_7654 = newTempV128(); IRTemp i2_3210 = newTempV128();
3722       IRTemp i1_FEDC = newTempV128(); IRTemp i1_BA98 = newTempV128();
3723       IRTemp i1_7654 = newTempV128(); IRTemp i1_3210 = newTempV128();
3724       IRTemp i0_FEDC = newTempV128(); IRTemp i0_BA98 = newTempV128();
3725       IRTemp i0_7654 = newTempV128(); IRTemp i0_3210 = newTempV128();
3726       IRTemp i2_hi64 = newTempV128(); IRTemp i2_lo64 = newTempV128();
3727       IRTemp i1_hi64 = newTempV128(); IRTemp i1_lo64 = newTempV128();
3728       IRTemp i0_hi64 = newTempV128(); IRTemp i0_lo64 = newTempV128();
3729 
3730       // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector
3731       // of the form 14 bytes junk : CC[0xF] : BB[0xA]
3732       //
3733 #     define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \
3734          IRTemp t_##_tempName = newTempV128(); \
3735          assign(t_##_tempName, \
3736                 ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \
3737                          ROR(EX(_srcVec2),(_srcShift2)) ) )
3738 
3739       // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively
3740       IRTemp CC = u2; IRTemp BB = u1; IRTemp AA = u0;
3741 
3742       // The slicing and reassembly are done as interleavedly as possible,
3743       // so as to minimise the demand for registers in the back end, which
3744       // was observed to be a problem in testing.
3745 
3746       XXXX(CfBf, CC, 0xf, BB, 0xf); // i2[15:14]
3747       XXXX(AfCe, AA, 0xf, CC, 0xe);
3748       assign(i2_FEDC, ILO16x8(EX(t_CfBf), EX(t_AfCe)));
3749 
3750       XXXX(BeAe, BB, 0xe, AA, 0xe);
3751       XXXX(CdBd, CC, 0xd, BB, 0xd);
3752       assign(i2_BA98, ILO16x8(EX(t_BeAe), EX(t_CdBd)));
3753       assign(i2_hi64, ILO32x4(EX(i2_FEDC), EX(i2_BA98)));
3754 
3755       XXXX(AdCc, AA, 0xd, CC, 0xc);
3756       XXXX(BcAc, BB, 0xc, AA, 0xc);
3757       assign(i2_7654, ILO16x8(EX(t_AdCc), EX(t_BcAc)));
3758 
3759       XXXX(CbBb, CC, 0xb, BB, 0xb);
3760       XXXX(AbCa, AA, 0xb, CC, 0xa); // i2[1:0]
3761       assign(i2_3210, ILO16x8(EX(t_CbBb), EX(t_AbCa)));
3762       assign(i2_lo64, ILO32x4(EX(i2_7654), EX(i2_3210)));
3763       assign(*i2, ILO64x2(EX(i2_hi64), EX(i2_lo64)));
3764 
3765       XXXX(BaAa, BB, 0xa, AA, 0xa); // i1[15:14]
3766       XXXX(C9B9, CC, 0x9, BB, 0x9);
3767       assign(i1_FEDC, ILO16x8(EX(t_BaAa), EX(t_C9B9)));
3768 
3769       XXXX(A9C8, AA, 0x9, CC, 0x8);
3770       XXXX(B8A8, BB, 0x8, AA, 0x8);
3771       assign(i1_BA98, ILO16x8(EX(t_A9C8), EX(t_B8A8)));
3772       assign(i1_hi64, ILO32x4(EX(i1_FEDC), EX(i1_BA98)));
3773 
3774       XXXX(C7B7, CC, 0x7, BB, 0x7);
3775       XXXX(A7C6, AA, 0x7, CC, 0x6);
3776       assign(i1_7654, ILO16x8(EX(t_C7B7), EX(t_A7C6)));
3777 
3778       XXXX(B6A6, BB, 0x6, AA, 0x6);
3779       XXXX(C5B5, CC, 0x5, BB, 0x5); // i1[1:0]
3780       assign(i1_3210, ILO16x8(EX(t_B6A6), EX(t_C5B5)));
3781       assign(i1_lo64, ILO32x4(EX(i1_7654), EX(i1_3210)));
3782       assign(*i1, ILO64x2(EX(i1_hi64), EX(i1_lo64)));
3783 
3784       XXXX(A5C4, AA, 0x5, CC, 0x4); // i0[15:14]
3785       XXXX(B4A4, BB, 0x4, AA, 0x4);
3786       assign(i0_FEDC, ILO16x8(EX(t_A5C4), EX(t_B4A4)));
3787 
3788       XXXX(C3B3, CC, 0x3, BB, 0x3);
3789       XXXX(A3C2, AA, 0x3, CC, 0x2);
3790       assign(i0_BA98, ILO16x8(EX(t_C3B3), EX(t_A3C2)));
3791       assign(i0_hi64, ILO32x4(EX(i0_FEDC), EX(i0_BA98)));
3792 
3793       XXXX(B2A2, BB, 0x2, AA, 0x2);
3794       XXXX(C1B1, CC, 0x1, BB, 0x1);
3795       assign(i0_7654, ILO16x8(EX(t_B2A2), EX(t_C1B1)));
3796 
3797       XXXX(A1C0, AA, 0x1, CC, 0x0);
3798       XXXX(B0A0, BB, 0x0, AA, 0x0); // i0[1:0]
3799       assign(i0_3210, ILO16x8(EX(t_A1C0), EX(t_B0A0)));
3800       assign(i0_lo64, ILO32x4(EX(i0_7654), EX(i0_3210)));
3801       assign(*i0, ILO64x2(EX(i0_hi64), EX(i0_lo64)));
3802 
3803 #     undef XXXX
3804       return;
3805    }
3806 
3807    /*NOTREACHED*/
3808    vassert(0);
3809 }
3810 
3811 
3812 /* Do interleaving for 4 128 bit vectors, for ST4 insns. */
3813 static
math_INTERLEAVE4_128(IRTemp * i0,IRTemp * i1,IRTemp * i2,IRTemp * i3,UInt laneSzBlg2,IRTemp u0,IRTemp u1,IRTemp u2,IRTemp u3)3814 void math_INTERLEAVE4_128(
3815         /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
3816         UInt laneSzBlg2,
3817         IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
3818 {
3819    if (laneSzBlg2 == 3) {
3820       // 64x2
3821       assign(*i0, ILO64x2(EX(u1), EX(u0)));
3822       assign(*i1, ILO64x2(EX(u3), EX(u2)));
3823       assign(*i2, IHI64x2(EX(u1), EX(u0)));
3824       assign(*i3, IHI64x2(EX(u3), EX(u2)));
3825       return;
3826    }
3827    if (laneSzBlg2 == 2) {
3828       // 32x4
3829       // First, interleave at the 64-bit lane size.
3830       IRTemp p0 = newTempV128();
3831       IRTemp p1 = newTempV128();
3832       IRTemp p2 = newTempV128();
3833       IRTemp p3 = newTempV128();
3834       math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 3, u0, u1, u2, u3);
3835       // And interleave (cat) at the 32 bit size.
3836       assign(*i0, CEV32x4(EX(p1), EX(p0)));
3837       assign(*i1, COD32x4(EX(p1), EX(p0)));
3838       assign(*i2, CEV32x4(EX(p3), EX(p2)));
3839       assign(*i3, COD32x4(EX(p3), EX(p2)));
3840       return;
3841    }
3842    if (laneSzBlg2 == 1) {
3843       // 16x8
3844       // First, interleave at the 32-bit lane size.
3845       IRTemp p0 = newTempV128();
3846       IRTemp p1 = newTempV128();
3847       IRTemp p2 = newTempV128();
3848       IRTemp p3 = newTempV128();
3849       math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 2, u0, u1, u2, u3);
3850       // And rearrange within each vector, to get the right 16 bit lanes.
3851       assign(*i0, COD16x8(EX(p0), SHL(EX(p0), 2)));
3852       assign(*i1, COD16x8(EX(p1), SHL(EX(p1), 2)));
3853       assign(*i2, COD16x8(EX(p2), SHL(EX(p2), 2)));
3854       assign(*i3, COD16x8(EX(p3), SHL(EX(p3), 2)));
3855       return;
3856    }
3857    if (laneSzBlg2 == 0) {
3858       // 8x16
3859       // First, interleave at the 16-bit lane size.
3860       IRTemp p0 = newTempV128();
3861       IRTemp p1 = newTempV128();
3862       IRTemp p2 = newTempV128();
3863       IRTemp p3 = newTempV128();
3864       math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 1, u0, u1, u2, u3);
3865       // And rearrange within each vector, to get the right 8 bit lanes.
3866       assign(*i0, IHI32x4(COD8x16(EX(p0),EX(p0)), CEV8x16(EX(p0),EX(p0))));
3867       assign(*i1, IHI32x4(COD8x16(EX(p1),EX(p1)), CEV8x16(EX(p1),EX(p1))));
3868       assign(*i2, IHI32x4(COD8x16(EX(p2),EX(p2)), CEV8x16(EX(p2),EX(p2))));
3869       assign(*i3, IHI32x4(COD8x16(EX(p3),EX(p3)), CEV8x16(EX(p3),EX(p3))));
3870       return;
3871    }
3872    /*NOTREACHED*/
3873    vassert(0);
3874 }
3875 
3876 
3877 /* Do deinterleaving for 1 128 bit vector, for LD1 insns. */
3878 static
math_DEINTERLEAVE1_128(IRTemp * u0,UInt laneSzBlg2,IRTemp i0)3879 void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp* u0,
3880                              UInt laneSzBlg2, IRTemp i0 )
3881 {
3882    assign(*u0, mkexpr(i0));
3883 }
3884 
3885 
3886 /* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */
3887 static
math_DEINTERLEAVE2_128(IRTemp * u0,IRTemp * u1,UInt laneSzBlg2,IRTemp i0,IRTemp i1)3888 void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
3889                              UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
3890 {
3891    /* This is pretty easy, since we have primitives directly to
3892       hand. */
3893    if (laneSzBlg2 == 3) {
3894       // 64x2
3895       // i1 == B1 A1, i0 == B0 A0
3896       // u1 == B1 B0, u0 == A1 A0
3897       assign(*u0, binop(Iop_InterleaveLO64x2, mkexpr(i1), mkexpr(i0)));
3898       assign(*u1, binop(Iop_InterleaveHI64x2, mkexpr(i1), mkexpr(i0)));
3899       return;
3900    }
3901    if (laneSzBlg2 == 2) {
3902       // 32x4
3903       // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3904       // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3905       assign(*u0, binop(Iop_CatEvenLanes32x4, mkexpr(i1), mkexpr(i0)));
3906       assign(*u1, binop(Iop_CatOddLanes32x4, mkexpr(i1), mkexpr(i0)));
3907       return;
3908    }
3909    if (laneSzBlg2 == 1) {
3910       // 16x8
3911       // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3912       // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3913       // u1 == B{7..0}, u0 == A{7..0}
3914       assign(*u0, binop(Iop_CatEvenLanes16x8, mkexpr(i1), mkexpr(i0)));
3915       assign(*u1, binop(Iop_CatOddLanes16x8,  mkexpr(i1), mkexpr(i0)));
3916       return;
3917    }
3918    if (laneSzBlg2 == 0) {
3919       // 8x16
3920       // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3921       // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3922       // u1 == B{f..0}, u0 == A{f..0}
3923       assign(*u0, binop(Iop_CatEvenLanes8x16, mkexpr(i1), mkexpr(i0)));
3924       assign(*u1, binop(Iop_CatOddLanes8x16,  mkexpr(i1), mkexpr(i0)));
3925       return;
3926    }
3927    /*NOTREACHED*/
3928    vassert(0);
3929 }
3930 
3931 
3932 /* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */
3933 static
math_DEINTERLEAVE3_128(IRTemp * u0,IRTemp * u1,IRTemp * u2,UInt laneSzBlg2,IRTemp i0,IRTemp i1,IRTemp i2)3934 void math_DEINTERLEAVE3_128(
3935         /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
3936         UInt laneSzBlg2,
3937         IRTemp i0, IRTemp i1, IRTemp i2 )
3938 {
3939    if (laneSzBlg2 == 3) {
3940       // 64x2
3941       // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3942       // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3943       assign(*u2, ILO64x2( ROL(EX(i2),8), EX(i1)        ));
3944       assign(*u1, ILO64x2( EX(i2),        ROL(EX(i0),8) ));
3945       assign(*u0, ILO64x2( ROL(EX(i1),8), EX(i0)        ));
3946       return;
3947    }
3948 
3949    if (laneSzBlg2 == 2) {
3950       // 32x4
3951       // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3952       // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3953       // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3954       IRTemp t_a1c0b0a0 = newTempV128();
3955       IRTemp t_a2c1b1a1 = newTempV128();
3956       IRTemp t_a3c2b2a2 = newTempV128();
3957       IRTemp t_a0c3b3a3 = newTempV128();
3958       IRTemp p0 = newTempV128();
3959       IRTemp p1 = newTempV128();
3960       IRTemp p2 = newTempV128();
3961       // Compute some intermediate values.
3962       assign(t_a1c0b0a0, EX(i0));
3963       assign(t_a2c1b1a1, SL(EX(i1),EX(i0),3*4));
3964       assign(t_a3c2b2a2, SL(EX(i2),EX(i1),2*4));
3965       assign(t_a0c3b3a3, SL(EX(i0),EX(i2),1*4));
3966       // First deinterleave into lane-pairs
3967       assign(p0, ILO32x4(EX(t_a2c1b1a1),EX(t_a1c0b0a0)));
3968       assign(p1, ILO64x2(ILO32x4(EX(t_a0c3b3a3), EX(t_a3c2b2a2)),
3969                          IHI32x4(EX(t_a2c1b1a1), EX(t_a1c0b0a0))));
3970       assign(p2, ILO32x4(ROR(EX(t_a0c3b3a3),1*4), ROR(EX(t_a3c2b2a2),1*4)));
3971       // Then deinterleave at 64x2 granularity.
3972       math_DEINTERLEAVE3_128(u0, u1, u2, 3, p0, p1, p2);
3973       return;
3974    }
3975 
3976    if (laneSzBlg2 == 1) {
3977       // 16x8
3978       // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3979       // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3980       // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3981       //
3982       // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3983       // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3984       // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3985       //
3986       // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3987       // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3988       // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3989 
3990       IRTemp s0, s1, s2, s3, t0, t1, t2, t3, p0, p1, p2, c00111111;
3991       s0 = s1 = s2 = s3
3992          = t0 = t1 = t2 = t3 = p0 = p1 = p2 = c00111111 = IRTemp_INVALID;
3993       newTempsV128_4(&s0, &s1, &s2, &s3);
3994       newTempsV128_4(&t0, &t1, &t2, &t3);
3995       newTempsV128_4(&p0, &p1, &p2, &c00111111);
3996 
3997       // s0 == b2a2 c1b1a1 c0b0a0
3998       // s1 == b4a4 c3b3c3 c2b2a2
3999       // s2 == b6a6 c5b5a5 c4b4a4
4000       // s3 == b0a0 c7b7a7 c6b6a6
4001       assign(s0, EX(i0));
4002       assign(s1, SL(EX(i1),EX(i0),6*2));
4003       assign(s2, SL(EX(i2),EX(i1),4*2));
4004       assign(s3, SL(EX(i0),EX(i2),2*2));
4005 
4006       // t0 == 0 0 c1c0 b1b0 a1a0
4007       // t1 == 0 0 c3c2 b3b2 a3a2
4008       // t2 == 0 0 c5c4 b5b4 a5a4
4009       // t3 == 0 0 c7c6 b7b6 a7a6
4010       assign(c00111111, mkV128(0x0FFF));
4011       assign(t0, AND( ILO16x8( ROR(EX(s0),3*2), EX(s0)), EX(c00111111)));
4012       assign(t1, AND( ILO16x8( ROR(EX(s1),3*2), EX(s1)), EX(c00111111)));
4013       assign(t2, AND( ILO16x8( ROR(EX(s2),3*2), EX(s2)), EX(c00111111)));
4014       assign(t3, AND( ILO16x8( ROR(EX(s3),3*2), EX(s3)), EX(c00111111)));
4015 
4016       assign(p0, OR2(EX(t0),          SHL(EX(t1),6*2)));
4017       assign(p1, OR2(SHL(EX(t2),4*2), SHR(EX(t1),2*2)));
4018       assign(p2, OR2(SHL(EX(t3),2*2), SHR(EX(t2),4*2)));
4019 
4020       // Then deinterleave at 32x4 granularity.
4021       math_DEINTERLEAVE3_128(u0, u1, u2, 2, p0, p1, p2);
4022       return;
4023    }
4024 
4025    if (laneSzBlg2 == 0) {
4026       // 8x16.  This is the same scheme as for 16x8, with twice the
4027       // number of intermediate values.
4028       //
4029       // u2 == C{f..0}
4030       // u1 == B{f..0}
4031       // u0 == A{f..0}
4032       //
4033       // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a}
4034       // i1 ==  BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5}
4035       // i0 ==   A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4036       //
4037       // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba}
4038       // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54}
4039       // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10}
4040       //
4041       IRTemp s0, s1, s2, s3, s4, s5, s6, s7,
4042              t0, t1, t2, t3, t4, t5, t6, t7, p0, p1, p2, cMASK;
4043       s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7
4044          = t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = p0 = p1 = p2 = cMASK
4045          = IRTemp_INVALID;
4046       newTempsV128_4(&s0, &s1, &s2, &s3);
4047       newTempsV128_4(&s4, &s5, &s6, &s7);
4048       newTempsV128_4(&t0, &t1, &t2, &t3);
4049       newTempsV128_4(&t4, &t5, &t6, &t7);
4050       newTempsV128_4(&p0, &p1, &p2, &cMASK);
4051 
4052       // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4053       // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2}
4054       // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4}
4055       // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6}
4056       // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8}
4057       // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a}
4058       // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c}
4059       // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e}
4060       assign(s0, SL(EX(i1),EX(i0), 0));
4061       assign(s1, SL(EX(i1),EX(i0), 6));
4062       assign(s2, SL(EX(i1),EX(i0),12));
4063       assign(s3, SL(EX(i2),EX(i1), 2));
4064       assign(s4, SL(EX(i2),EX(i1), 8));
4065       assign(s5, SL(EX(i2),EX(i1),14));
4066       assign(s6, SL(EX(i0),EX(i2), 4));
4067       assign(s7, SL(EX(i0),EX(i2),10));
4068 
4069       // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0
4070       // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2
4071       // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4
4072       // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6
4073       // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8
4074       // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa
4075       // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac
4076       // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae
4077       assign(cMASK, mkV128(0x003F));
4078       assign(t0, AND( ILO8x16( ROR(EX(s0),3), EX(s0)), EX(cMASK)));
4079       assign(t1, AND( ILO8x16( ROR(EX(s1),3), EX(s1)), EX(cMASK)));
4080       assign(t2, AND( ILO8x16( ROR(EX(s2),3), EX(s2)), EX(cMASK)));
4081       assign(t3, AND( ILO8x16( ROR(EX(s3),3), EX(s3)), EX(cMASK)));
4082       assign(t4, AND( ILO8x16( ROR(EX(s4),3), EX(s4)), EX(cMASK)));
4083       assign(t5, AND( ILO8x16( ROR(EX(s5),3), EX(s5)), EX(cMASK)));
4084       assign(t6, AND( ILO8x16( ROR(EX(s6),3), EX(s6)), EX(cMASK)));
4085       assign(t7, AND( ILO8x16( ROR(EX(s7),3), EX(s7)), EX(cMASK)));
4086 
4087       assign(p0, OR3( SHL(EX(t2),12), SHL(EX(t1),6), EX(t0) ));
4088       assign(p1, OR4( SHL(EX(t5),14), SHL(EX(t4),8),
4089                  SHL(EX(t3),2), SHR(EX(t2),4) ));
4090       assign(p2, OR3( SHL(EX(t7),10), SHL(EX(t6),4), SHR(EX(t5),2) ));
4091 
4092       // Then deinterleave at 16x8 granularity.
4093       math_DEINTERLEAVE3_128(u0, u1, u2, 1, p0, p1, p2);
4094       return;
4095    }
4096 
4097    /*NOTREACHED*/
4098    vassert(0);
4099 }
4100 
4101 
4102 /* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */
4103 static
math_DEINTERLEAVE4_128(IRTemp * u0,IRTemp * u1,IRTemp * u2,IRTemp * u3,UInt laneSzBlg2,IRTemp i0,IRTemp i1,IRTemp i2,IRTemp i3)4104 void math_DEINTERLEAVE4_128(
4105         /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4106         UInt laneSzBlg2,
4107         IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4108 {
4109    if (laneSzBlg2 == 3) {
4110       // 64x2
4111       assign(*u0, ILO64x2(EX(i2), EX(i0)));
4112       assign(*u1, IHI64x2(EX(i2), EX(i0)));
4113       assign(*u2, ILO64x2(EX(i3), EX(i1)));
4114       assign(*u3, IHI64x2(EX(i3), EX(i1)));
4115       return;
4116    }
4117    if (laneSzBlg2 == 2) {
4118       // 32x4
4119       IRTemp p0 = newTempV128();
4120       IRTemp p2 = newTempV128();
4121       IRTemp p1 = newTempV128();
4122       IRTemp p3 = newTempV128();
4123       assign(p0, ILO32x4(EX(i1), EX(i0)));
4124       assign(p1, IHI32x4(EX(i1), EX(i0)));
4125       assign(p2, ILO32x4(EX(i3), EX(i2)));
4126       assign(p3, IHI32x4(EX(i3), EX(i2)));
4127       // And now do what we did for the 64-bit case.
4128       math_DEINTERLEAVE4_128(u0, u1, u2, u3, 3, p0, p1, p2, p3);
4129       return;
4130    }
4131    if (laneSzBlg2 == 1) {
4132       // 16x8
4133       // Deinterleave into 32-bit chunks, then do as the 32-bit case.
4134       IRTemp p0 = newTempV128();
4135       IRTemp p1 = newTempV128();
4136       IRTemp p2 = newTempV128();
4137       IRTemp p3 = newTempV128();
4138       assign(p0, IHI16x8(EX(i0), SHL(EX(i0), 8)));
4139       assign(p1, IHI16x8(EX(i1), SHL(EX(i1), 8)));
4140       assign(p2, IHI16x8(EX(i2), SHL(EX(i2), 8)));
4141       assign(p3, IHI16x8(EX(i3), SHL(EX(i3), 8)));
4142       // From here on is like the 32 bit case.
4143       math_DEINTERLEAVE4_128(u0, u1, u2, u3, 2, p0, p1, p2, p3);
4144       return;
4145    }
4146    if (laneSzBlg2 == 0) {
4147       // 8x16
4148       // Deinterleave into 16-bit chunks, then do as the 16-bit case.
4149       IRTemp p0 = newTempV128();
4150       IRTemp p1 = newTempV128();
4151       IRTemp p2 = newTempV128();
4152       IRTemp p3 = newTempV128();
4153       assign(p0, IHI64x2( IHI8x16(EX(i0),ROL(EX(i0),4)),
4154                           ILO8x16(EX(i0),ROL(EX(i0),4)) ));
4155       assign(p1, IHI64x2( IHI8x16(EX(i1),ROL(EX(i1),4)),
4156                           ILO8x16(EX(i1),ROL(EX(i1),4)) ));
4157       assign(p2, IHI64x2( IHI8x16(EX(i2),ROL(EX(i2),4)),
4158                           ILO8x16(EX(i2),ROL(EX(i2),4)) ));
4159       assign(p3, IHI64x2( IHI8x16(EX(i3),ROL(EX(i3),4)),
4160                           ILO8x16(EX(i3),ROL(EX(i3),4)) ));
4161       // From here on is like the 16 bit case.
4162       math_DEINTERLEAVE4_128(u0, u1, u2, u3, 1, p0, p1, p2, p3);
4163       return;
4164    }
4165    /*NOTREACHED*/
4166    vassert(0);
4167 }
4168 
4169 
4170 /* Wrappers that use the full-width (de)interleavers to do half-width
4171    (de)interleaving.  The scheme is to clone each input lane in the
4172    lower half of each incoming value, do a full width (de)interleave
4173    at the next lane size up, and remove every other lane of the the
4174    result.  The returned values may have any old junk in the upper
4175    64 bits -- the caller must ignore that. */
4176 
4177 /* Helper function -- get doubling and narrowing operations. */
4178 static
math_get_doubler_and_halver(IROp * doubler,IROp * halver,UInt laneSzBlg2)4179 void math_get_doubler_and_halver ( /*OUT*/IROp* doubler,
4180                                    /*OUT*/IROp* halver,
4181                                    UInt laneSzBlg2 )
4182 {
4183    switch (laneSzBlg2) {
4184       case 2:
4185          *doubler = Iop_InterleaveLO32x4; *halver = Iop_CatEvenLanes32x4;
4186          break;
4187       case 1:
4188          *doubler = Iop_InterleaveLO16x8; *halver = Iop_CatEvenLanes16x8;
4189          break;
4190       case 0:
4191          *doubler = Iop_InterleaveLO8x16; *halver = Iop_CatEvenLanes8x16;
4192          break;
4193       default:
4194          vassert(0);
4195    }
4196 }
4197 
4198 /* Do interleaving for 1 64 bit vector, for ST1 insns. */
4199 static
math_INTERLEAVE1_64(IRTemp * i0,UInt laneSzBlg2,IRTemp u0)4200 void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp* i0,
4201                           UInt laneSzBlg2, IRTemp u0 )
4202 {
4203    assign(*i0, mkexpr(u0));
4204 }
4205 
4206 
4207 /* Do interleaving for 2 64 bit vectors, for ST2 insns. */
4208 static
math_INTERLEAVE2_64(IRTemp * i0,IRTemp * i1,UInt laneSzBlg2,IRTemp u0,IRTemp u1)4209 void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
4210                           UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
4211 {
4212    if (laneSzBlg2 == 3) {
4213       // 1x64, degenerate case
4214       assign(*i0, EX(u0));
4215       assign(*i1, EX(u1));
4216       return;
4217    }
4218 
4219    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4220    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4221    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4222 
4223    IRTemp du0 = newTempV128();
4224    IRTemp du1 = newTempV128();
4225    assign(du0, binop(doubler, EX(u0), EX(u0)));
4226    assign(du1, binop(doubler, EX(u1), EX(u1)));
4227    IRTemp di0 = newTempV128();
4228    IRTemp di1 = newTempV128();
4229    math_INTERLEAVE2_128(&di0, &di1, laneSzBlg2 + 1, du0, du1);
4230    assign(*i0, binop(halver, EX(di0), EX(di0)));
4231    assign(*i1, binop(halver, EX(di1), EX(di1)));
4232 }
4233 
4234 
4235 /* Do interleaving for 3 64 bit vectors, for ST3 insns. */
4236 static
math_INTERLEAVE3_64(IRTemp * i0,IRTemp * i1,IRTemp * i2,UInt laneSzBlg2,IRTemp u0,IRTemp u1,IRTemp u2)4237 void math_INTERLEAVE3_64(
4238         /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
4239         UInt laneSzBlg2,
4240         IRTemp u0, IRTemp u1, IRTemp u2 )
4241 {
4242    if (laneSzBlg2 == 3) {
4243       // 1x64, degenerate case
4244       assign(*i0, EX(u0));
4245       assign(*i1, EX(u1));
4246       assign(*i2, EX(u2));
4247       return;
4248    }
4249 
4250    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4251    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4252    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4253 
4254    IRTemp du0 = newTempV128();
4255    IRTemp du1 = newTempV128();
4256    IRTemp du2 = newTempV128();
4257    assign(du0, binop(doubler, EX(u0), EX(u0)));
4258    assign(du1, binop(doubler, EX(u1), EX(u1)));
4259    assign(du2, binop(doubler, EX(u2), EX(u2)));
4260    IRTemp di0 = newTempV128();
4261    IRTemp di1 = newTempV128();
4262    IRTemp di2 = newTempV128();
4263    math_INTERLEAVE3_128(&di0, &di1, &di2, laneSzBlg2 + 1, du0, du1, du2);
4264    assign(*i0, binop(halver, EX(di0), EX(di0)));
4265    assign(*i1, binop(halver, EX(di1), EX(di1)));
4266    assign(*i2, binop(halver, EX(di2), EX(di2)));
4267 }
4268 
4269 
4270 /* Do interleaving for 4 64 bit vectors, for ST4 insns. */
4271 static
math_INTERLEAVE4_64(IRTemp * i0,IRTemp * i1,IRTemp * i2,IRTemp * i3,UInt laneSzBlg2,IRTemp u0,IRTemp u1,IRTemp u2,IRTemp u3)4272 void math_INTERLEAVE4_64(
4273         /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
4274         UInt laneSzBlg2,
4275         IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
4276 {
4277    if (laneSzBlg2 == 3) {
4278       // 1x64, degenerate case
4279       assign(*i0, EX(u0));
4280       assign(*i1, EX(u1));
4281       assign(*i2, EX(u2));
4282       assign(*i3, EX(u3));
4283       return;
4284    }
4285 
4286    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4287    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4288    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4289 
4290    IRTemp du0 = newTempV128();
4291    IRTemp du1 = newTempV128();
4292    IRTemp du2 = newTempV128();
4293    IRTemp du3 = newTempV128();
4294    assign(du0, binop(doubler, EX(u0), EX(u0)));
4295    assign(du1, binop(doubler, EX(u1), EX(u1)));
4296    assign(du2, binop(doubler, EX(u2), EX(u2)));
4297    assign(du3, binop(doubler, EX(u3), EX(u3)));
4298    IRTemp di0 = newTempV128();
4299    IRTemp di1 = newTempV128();
4300    IRTemp di2 = newTempV128();
4301    IRTemp di3 = newTempV128();
4302    math_INTERLEAVE4_128(&di0, &di1, &di2, &di3,
4303                         laneSzBlg2 + 1, du0, du1, du2, du3);
4304    assign(*i0, binop(halver, EX(di0), EX(di0)));
4305    assign(*i1, binop(halver, EX(di1), EX(di1)));
4306    assign(*i2, binop(halver, EX(di2), EX(di2)));
4307    assign(*i3, binop(halver, EX(di3), EX(di3)));
4308 }
4309 
4310 
4311 /* Do deinterleaving for 1 64 bit vector, for LD1 insns. */
4312 static
math_DEINTERLEAVE1_64(IRTemp * u0,UInt laneSzBlg2,IRTemp i0)4313 void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp* u0,
4314                             UInt laneSzBlg2, IRTemp i0 )
4315 {
4316    assign(*u0, mkexpr(i0));
4317 }
4318 
4319 
4320 /* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */
4321 static
math_DEINTERLEAVE2_64(IRTemp * u0,IRTemp * u1,UInt laneSzBlg2,IRTemp i0,IRTemp i1)4322 void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
4323                             UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
4324 {
4325    if (laneSzBlg2 == 3) {
4326       // 1x64, degenerate case
4327       assign(*u0, EX(i0));
4328       assign(*u1, EX(i1));
4329       return;
4330    }
4331 
4332    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4333    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4334    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4335 
4336    IRTemp di0 = newTempV128();
4337    IRTemp di1 = newTempV128();
4338    assign(di0, binop(doubler, EX(i0), EX(i0)));
4339    assign(di1, binop(doubler, EX(i1), EX(i1)));
4340 
4341    IRTemp du0 = newTempV128();
4342    IRTemp du1 = newTempV128();
4343    math_DEINTERLEAVE2_128(&du0, &du1, laneSzBlg2 + 1, di0, di1);
4344    assign(*u0, binop(halver, EX(du0), EX(du0)));
4345    assign(*u1, binop(halver, EX(du1), EX(du1)));
4346 }
4347 
4348 
4349 /* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */
4350 static
math_DEINTERLEAVE3_64(IRTemp * u0,IRTemp * u1,IRTemp * u2,UInt laneSzBlg2,IRTemp i0,IRTemp i1,IRTemp i2)4351 void math_DEINTERLEAVE3_64(
4352         /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4353         UInt laneSzBlg2,
4354         IRTemp i0, IRTemp i1, IRTemp i2 )
4355 {
4356    if (laneSzBlg2 == 3) {
4357       // 1x64, degenerate case
4358       assign(*u0, EX(i0));
4359       assign(*u1, EX(i1));
4360       assign(*u2, EX(i2));
4361       return;
4362    }
4363 
4364    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4365    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4366    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4367 
4368    IRTemp di0 = newTempV128();
4369    IRTemp di1 = newTempV128();
4370    IRTemp di2 = newTempV128();
4371    assign(di0, binop(doubler, EX(i0), EX(i0)));
4372    assign(di1, binop(doubler, EX(i1), EX(i1)));
4373    assign(di2, binop(doubler, EX(i2), EX(i2)));
4374    IRTemp du0 = newTempV128();
4375    IRTemp du1 = newTempV128();
4376    IRTemp du2 = newTempV128();
4377    math_DEINTERLEAVE3_128(&du0, &du1, &du2, laneSzBlg2 + 1, di0, di1, di2);
4378    assign(*u0, binop(halver, EX(du0), EX(du0)));
4379    assign(*u1, binop(halver, EX(du1), EX(du1)));
4380    assign(*u2, binop(halver, EX(du2), EX(du2)));
4381 }
4382 
4383 
4384 /* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */
4385 static
math_DEINTERLEAVE4_64(IRTemp * u0,IRTemp * u1,IRTemp * u2,IRTemp * u3,UInt laneSzBlg2,IRTemp i0,IRTemp i1,IRTemp i2,IRTemp i3)4386 void math_DEINTERLEAVE4_64(
4387         /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4388         UInt laneSzBlg2,
4389         IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4390 {
4391    if (laneSzBlg2 == 3) {
4392       // 1x64, degenerate case
4393       assign(*u0, EX(i0));
4394       assign(*u1, EX(i1));
4395       assign(*u2, EX(i2));
4396       assign(*u3, EX(i3));
4397       return;
4398    }
4399 
4400    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4401    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4402    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4403 
4404    IRTemp di0 = newTempV128();
4405    IRTemp di1 = newTempV128();
4406    IRTemp di2 = newTempV128();
4407    IRTemp di3 = newTempV128();
4408    assign(di0, binop(doubler, EX(i0), EX(i0)));
4409    assign(di1, binop(doubler, EX(i1), EX(i1)));
4410    assign(di2, binop(doubler, EX(i2), EX(i2)));
4411    assign(di3, binop(doubler, EX(i3), EX(i3)));
4412    IRTemp du0 = newTempV128();
4413    IRTemp du1 = newTempV128();
4414    IRTemp du2 = newTempV128();
4415    IRTemp du3 = newTempV128();
4416    math_DEINTERLEAVE4_128(&du0, &du1, &du2, &du3,
4417                           laneSzBlg2 + 1, di0, di1, di2, di3);
4418    assign(*u0, binop(halver, EX(du0), EX(du0)));
4419    assign(*u1, binop(halver, EX(du1), EX(du1)));
4420    assign(*u2, binop(halver, EX(du2), EX(du2)));
4421    assign(*u3, binop(halver, EX(du3), EX(du3)));
4422 }
4423 
4424 
4425 #undef EX
4426 #undef SL
4427 #undef ROR
4428 #undef ROL
4429 #undef SHR
4430 #undef SHL
4431 #undef ILO64x2
4432 #undef IHI64x2
4433 #undef ILO32x4
4434 #undef IHI32x4
4435 #undef ILO16x8
4436 #undef IHI16x8
4437 #undef ILO16x8
4438 #undef IHI16x8
4439 #undef CEV32x4
4440 #undef COD32x4
4441 #undef COD16x8
4442 #undef COD8x16
4443 #undef CEV8x16
4444 #undef AND
4445 #undef OR2
4446 #undef OR3
4447 #undef OR4
4448 
4449 
4450 /*------------------------------------------------------------*/
4451 /*--- Load and Store instructions                          ---*/
4452 /*------------------------------------------------------------*/
4453 
4454 /* Generate the EA for a "reg + reg" style amode.  This is done from
4455    parts of the insn, but for sanity checking sake it takes the whole
4456    insn.  This appears to depend on insn[15:12], with opt=insn[15:13]
4457    and S=insn[12]:
4458 
4459    The possible forms, along with their opt:S values, are:
4460       011:0   Xn|SP + Xm
4461       111:0   Xn|SP + Xm
4462       011:1   Xn|SP + Xm * transfer_szB
4463       111:1   Xn|SP + Xm * transfer_szB
4464       010:0   Xn|SP + 32Uto64(Wm)
4465       010:1   Xn|SP + 32Uto64(Wm) * transfer_szB
4466       110:0   Xn|SP + 32Sto64(Wm)
4467       110:1   Xn|SP + 32Sto64(Wm) * transfer_szB
4468 
4469    Rm is insn[20:16].  Rn is insn[9:5].  Rt is insn[4:0].  Log2 of
4470    the transfer size is insn[23,31,30].  For integer loads/stores,
4471    insn[23] is zero, hence szLg2 can be at most 3 in such cases.
4472 
4473    If the decoding fails, it returns IRTemp_INVALID.
4474 
4475    isInt is True iff this is decoding is for transfers to/from integer
4476    registers.  If False it is for transfers to/from vector registers.
4477 */
gen_indexed_EA(HChar * buf,UInt insn,Bool isInt)4478 static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
4479 {
4480    UInt    optS  = SLICE_UInt(insn, 15, 12);
4481    UInt    mm    = SLICE_UInt(insn, 20, 16);
4482    UInt    nn    = SLICE_UInt(insn, 9, 5);
4483    UInt    szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
4484                    | SLICE_UInt(insn, 31, 30); // Log2 of the size
4485 
4486    buf[0] = 0;
4487 
4488    /* Sanity checks, that this really is a load/store insn. */
4489    if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
4490       goto fail;
4491 
4492    if (isInt
4493        && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
4494        && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
4495        && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
4496        && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
4497       goto fail;
4498 
4499    if (!isInt
4500        && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
4501       goto fail;
4502 
4503    /* Throw out non-verified but possibly valid cases. */
4504    switch (szLg2) {
4505       case BITS3(0,0,0): break; //  8 bit, valid for both int and vec
4506       case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
4507       case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
4508       case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
4509       case BITS3(1,0,0): // can only ever be valid for the vector case
4510                          if (isInt) goto fail; else break;
4511       case BITS3(1,0,1): // these sizes are never valid
4512       case BITS3(1,1,0):
4513       case BITS3(1,1,1): goto fail;
4514 
4515       default: vassert(0);
4516    }
4517 
4518    IRExpr* rhs  = NULL;
4519    switch (optS) {
4520       case BITS4(1,1,1,0): goto fail; //ATC
4521       case BITS4(0,1,1,0):
4522          rhs = getIReg64orZR(mm);
4523          vex_sprintf(buf, "[%s, %s]",
4524                      nameIReg64orZR(nn), nameIReg64orZR(mm));
4525          break;
4526       case BITS4(1,1,1,1): goto fail; //ATC
4527       case BITS4(0,1,1,1):
4528          rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
4529          vex_sprintf(buf, "[%s, %s lsl %u]",
4530                      nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
4531          break;
4532       case BITS4(0,1,0,0):
4533          rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
4534          vex_sprintf(buf, "[%s, %s uxtx]",
4535                      nameIReg64orZR(nn), nameIReg32orZR(mm));
4536          break;
4537       case BITS4(0,1,0,1):
4538          rhs = binop(Iop_Shl64,
4539                      unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
4540          vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
4541                      nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4542          break;
4543       case BITS4(1,1,0,0):
4544          rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
4545          vex_sprintf(buf, "[%s, %s sxtx]",
4546                      nameIReg64orZR(nn), nameIReg32orZR(mm));
4547          break;
4548       case BITS4(1,1,0,1):
4549          rhs = binop(Iop_Shl64,
4550                      unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
4551          vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
4552                      nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4553          break;
4554       default:
4555          /* The rest appear to be genuinely invalid */
4556          goto fail;
4557    }
4558 
4559    vassert(rhs);
4560    IRTemp res = newTemp(Ity_I64);
4561    assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
4562    return res;
4563 
4564   fail:
4565    vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
4566    return IRTemp_INVALID;
4567 }
4568 
4569 
4570 /* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
4571    bits of DATAE :: Ity_I64. */
gen_narrowing_store(UInt szB,IRTemp addr,IRExpr * dataE)4572 static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
4573 {
4574    IRExpr* addrE = mkexpr(addr);
4575    switch (szB) {
4576       case 8:
4577          storeLE(addrE, dataE);
4578          break;
4579       case 4:
4580          storeLE(addrE, unop(Iop_64to32, dataE));
4581          break;
4582       case 2:
4583          storeLE(addrE, unop(Iop_64to16, dataE));
4584          break;
4585       case 1:
4586          storeLE(addrE, unop(Iop_64to8, dataE));
4587          break;
4588       default:
4589          vassert(0);
4590    }
4591 }
4592 
4593 
4594 /* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
4595    placing the result in an Ity_I64 temporary. */
gen_zwidening_load(UInt szB,IRTemp addr)4596 static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
4597 {
4598    IRTemp  res   = newTemp(Ity_I64);
4599    IRExpr* addrE = mkexpr(addr);
4600    switch (szB) {
4601       case 8:
4602          assign(res, loadLE(Ity_I64,addrE));
4603          break;
4604       case 4:
4605          assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
4606          break;
4607       case 2:
4608          assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
4609          break;
4610       case 1:
4611          assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
4612          break;
4613       default:
4614          vassert(0);
4615    }
4616    return res;
4617 }
4618 
4619 
4620 /* Generate a "standard 7" name, from bitQ and size.  But also
4621    allow ".1d" since that's occasionally useful. */
4622 static
nameArr_Q_SZ(UInt bitQ,UInt size)4623 const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
4624 {
4625    vassert(bitQ <= 1 && size <= 3);
4626    const HChar* nms[8]
4627       = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
4628    UInt ix = (bitQ << 2) | size;
4629    vassert(ix < 8);
4630    return nms[ix];
4631 }
4632 
4633 
4634 static
dis_ARM64_load_store(DisResult * dres,UInt insn)4635 Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
4636 {
4637 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
4638 
4639    /* ------------ LDR,STR (immediate, uimm12) ----------- */
4640    /* uimm12 is scaled by the transfer size
4641 
4642       31 29  26    21    9  4
4643       |  |   |     |     |  |
4644       11 111 00100 imm12 nn tt    STR  Xt, [Xn|SP, #imm12 * 8]
4645       11 111 00101 imm12 nn tt    LDR  Xt, [Xn|SP, #imm12 * 8]
4646 
4647       10 111 00100 imm12 nn tt    STR  Wt, [Xn|SP, #imm12 * 4]
4648       10 111 00101 imm12 nn tt    LDR  Wt, [Xn|SP, #imm12 * 4]
4649 
4650       01 111 00100 imm12 nn tt    STRH Wt, [Xn|SP, #imm12 * 2]
4651       01 111 00101 imm12 nn tt    LDRH Wt, [Xn|SP, #imm12 * 2]
4652 
4653       00 111 00100 imm12 nn tt    STRB Wt, [Xn|SP, #imm12 * 1]
4654       00 111 00101 imm12 nn tt    LDRB Wt, [Xn|SP, #imm12 * 1]
4655    */
4656    if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
4657       UInt   szLg2 = INSN(31,30);
4658       UInt   szB   = 1 << szLg2;
4659       Bool   isLD  = INSN(22,22) == 1;
4660       UInt   offs  = INSN(21,10) * szB;
4661       UInt   nn    = INSN(9,5);
4662       UInt   tt    = INSN(4,0);
4663       IRTemp ta    = newTemp(Ity_I64);
4664       assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
4665       if (nn == 31) { /* FIXME generate stack alignment check */ }
4666       vassert(szLg2 < 4);
4667       if (isLD) {
4668          putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
4669       } else {
4670          gen_narrowing_store(szB, ta, getIReg64orZR(tt));
4671       }
4672       const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
4673       const HChar* st_name[4] = { "strb", "strh", "str", "str" };
4674       DIP("%s %s, [%s, #%u]\n",
4675           (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
4676           nameIReg64orSP(nn), offs);
4677       return True;
4678    }
4679 
4680    /* ------------ LDUR,STUR (immediate, simm9) ----------- */
4681    /*
4682       31 29  26      20   11 9  4
4683       |  |   |       |    |  |  |
4684       (at-Rn-then-Rn=EA)  |  |  |
4685       sz 111 00000 0 imm9 01 Rn Rt   STR Rt, [Xn|SP], #simm9
4686       sz 111 00001 0 imm9 01 Rn Rt   LDR Rt, [Xn|SP], #simm9
4687 
4688       (at-EA-then-Rn=EA)
4689       sz 111 00000 0 imm9 11 Rn Rt   STR Rt, [Xn|SP, #simm9]!
4690       sz 111 00001 0 imm9 11 Rn Rt   LDR Rt, [Xn|SP, #simm9]!
4691 
4692       (at-EA)
4693       sz 111 00000 0 imm9 00 Rn Rt   STR Rt, [Xn|SP, #simm9]
4694       sz 111 00001 0 imm9 00 Rn Rt   LDR Rt, [Xn|SP, #simm9]
4695 
4696       simm9 is unscaled.
4697 
4698       The case 'wback && Rn == Rt && Rt != 31' is disallowed.  In the
4699       load case this is because would create two competing values for
4700       Rt.  In the store case the reason is unclear, but the spec
4701       disallows it anyway.
4702 
4703       Stores are narrowing, loads are unsigned widening.  sz encodes
4704       the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
4705    */
4706    if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
4707        == BITS9(1,1,1, 0,0,0,0,0, 0)) {
4708       UInt szLg2  = INSN(31,30);
4709       UInt szB    = 1 << szLg2;
4710       Bool isLoad = INSN(22,22) == 1;
4711       UInt imm9   = INSN(20,12);
4712       UInt nn     = INSN(9,5);
4713       UInt tt     = INSN(4,0);
4714       Bool wBack  = INSN(10,10) == 1;
4715       UInt how    = INSN(11,10);
4716       if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
4717          /* undecodable; fall through */
4718       } else {
4719          if (nn == 31) { /* FIXME generate stack alignment check */ }
4720 
4721          // Compute the transfer address TA and the writeback address WA.
4722          IRTemp tRN = newTemp(Ity_I64);
4723          assign(tRN, getIReg64orSP(nn));
4724          IRTemp tEA = newTemp(Ity_I64);
4725          Long simm9 = (Long)sx_to_64(imm9, 9);
4726          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4727 
4728          IRTemp tTA = newTemp(Ity_I64);
4729          IRTemp tWA = newTemp(Ity_I64);
4730          switch (how) {
4731             case BITS2(0,1):
4732                assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4733             case BITS2(1,1):
4734                assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4735             case BITS2(0,0):
4736                assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4737             default:
4738                vassert(0); /* NOTREACHED */
4739          }
4740 
4741          /* Normally rN would be updated after the transfer.  However, in
4742             the special case typifed by
4743                str x30, [sp,#-16]!
4744             it is necessary to update SP before the transfer, (1)
4745             because Memcheck will otherwise complain about a write
4746             below the stack pointer, and (2) because the segfault
4747             stack extension mechanism will otherwise extend the stack
4748             only down to SP before the instruction, which might not be
4749             far enough, if the -16 bit takes the actual access
4750             address to the next page.
4751          */
4752          Bool earlyWBack
4753            = wBack && simm9 < 0 && szB == 8
4754              && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
4755 
4756          if (wBack && earlyWBack)
4757             putIReg64orSP(nn, mkexpr(tEA));
4758 
4759          if (isLoad) {
4760             putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
4761          } else {
4762             gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
4763          }
4764 
4765          if (wBack && !earlyWBack)
4766             putIReg64orSP(nn, mkexpr(tEA));
4767 
4768          const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
4769          const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
4770          const HChar* fmt_str = NULL;
4771          switch (how) {
4772             case BITS2(0,1):
4773                fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4774                break;
4775             case BITS2(1,1):
4776                fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4777                break;
4778             case BITS2(0,0):
4779                fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
4780                break;
4781             default:
4782                vassert(0);
4783          }
4784          DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
4785                       nameIRegOrZR(szB == 8, tt),
4786                       nameIReg64orSP(nn), simm9);
4787          return True;
4788       }
4789    }
4790 
4791    /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
4792    /* L==1 => mm==LD
4793       L==0 => mm==ST
4794       x==0 => 32 bit transfers, and zero extended loads
4795       x==1 => 64 bit transfers
4796       simm7 is scaled by the (single-register) transfer size
4797 
4798       (at-Rn-then-Rn=EA)
4799       x0 101 0001 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP], #imm
4800 
4801       (at-EA-then-Rn=EA)
4802       x0 101 0011 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP, #imm]!
4803 
4804       (at-EA)
4805       x0 101 0010 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP, #imm]
4806    */
4807 
4808    UInt insn_30_23 = INSN(30,23);
4809    if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
4810        || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
4811        || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
4812       UInt bL     = INSN(22,22);
4813       UInt bX     = INSN(31,31);
4814       UInt bWBack = INSN(23,23);
4815       UInt rT1    = INSN(4,0);
4816       UInt rN     = INSN(9,5);
4817       UInt rT2    = INSN(14,10);
4818       Long simm7  = (Long)sx_to_64(INSN(21,15), 7);
4819       if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
4820           || (bL && rT1 == rT2)) {
4821          /* undecodable; fall through */
4822       } else {
4823          if (rN == 31) { /* FIXME generate stack alignment check */ }
4824 
4825          // Compute the transfer address TA and the writeback address WA.
4826          IRTemp tRN = newTemp(Ity_I64);
4827          assign(tRN, getIReg64orSP(rN));
4828          IRTemp tEA = newTemp(Ity_I64);
4829          simm7 = (bX ? 8 : 4) * simm7;
4830          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
4831 
4832          IRTemp tTA = newTemp(Ity_I64);
4833          IRTemp tWA = newTemp(Ity_I64);
4834          switch (INSN(24,23)) {
4835             case BITS2(0,1):
4836                assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4837             case BITS2(1,1):
4838                assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4839             case BITS2(1,0):
4840                assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4841             default:
4842                vassert(0); /* NOTREACHED */
4843          }
4844 
4845          /* Normally rN would be updated after the transfer.  However, in
4846             the special case typifed by
4847                stp x29, x30, [sp,#-112]!
4848             it is necessary to update SP before the transfer, (1)
4849             because Memcheck will otherwise complain about a write
4850             below the stack pointer, and (2) because the segfault
4851             stack extension mechanism will otherwise extend the stack
4852             only down to SP before the instruction, which might not be
4853             far enough, if the -112 bit takes the actual access
4854             address to the next page.
4855          */
4856          Bool earlyWBack
4857            = bWBack && simm7 < 0
4858              && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
4859 
4860          if (bWBack && earlyWBack)
4861             putIReg64orSP(rN, mkexpr(tEA));
4862 
4863          /**/ if (bL == 1 && bX == 1) {
4864             // 64 bit load
4865             putIReg64orZR(rT1, loadLE(Ity_I64,
4866                                       binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4867             putIReg64orZR(rT2, loadLE(Ity_I64,
4868                                       binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
4869          } else if (bL == 1 && bX == 0) {
4870             // 32 bit load
4871             putIReg32orZR(rT1, loadLE(Ity_I32,
4872                                       binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4873             putIReg32orZR(rT2, loadLE(Ity_I32,
4874                                       binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
4875          } else if (bL == 0 && bX == 1) {
4876             // 64 bit store
4877             storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4878                     getIReg64orZR(rT1));
4879             storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
4880                     getIReg64orZR(rT2));
4881          } else {
4882             vassert(bL == 0 && bX == 0);
4883             // 32 bit store
4884             storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4885                     getIReg32orZR(rT1));
4886             storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
4887                     getIReg32orZR(rT2));
4888          }
4889 
4890          if (bWBack && !earlyWBack)
4891             putIReg64orSP(rN, mkexpr(tEA));
4892 
4893          const HChar* fmt_str = NULL;
4894          switch (INSN(24,23)) {
4895             case BITS2(0,1):
4896                fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4897                break;
4898             case BITS2(1,1):
4899                fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4900                break;
4901             case BITS2(1,0):
4902                fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
4903                break;
4904             default:
4905                vassert(0);
4906          }
4907          DIP(fmt_str, bL == 0 ? "st" : "ld",
4908                       nameIRegOrZR(bX == 1, rT1),
4909                       nameIRegOrZR(bX == 1, rT2),
4910                       nameIReg64orSP(rN), simm7);
4911          return True;
4912       }
4913    }
4914 
4915    /* ---------------- LDR (literal, int reg) ---------------- */
4916    /* 31 29      23    4
4917       00 011 000 imm19 Rt   LDR   Wt, [PC + sxTo64(imm19 << 2)]
4918       01 011 000 imm19 Rt   LDR   Xt, [PC + sxTo64(imm19 << 2)]
4919       10 011 000 imm19 Rt   LDRSW Xt, [PC + sxTo64(imm19 << 2)]
4920       11 011 000 imm19 Rt   prefetch  [PC + sxTo64(imm19 << 2)]
4921       Just handles the first two cases for now.
4922    */
4923    if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
4924       UInt  imm19 = INSN(23,5);
4925       UInt  rT    = INSN(4,0);
4926       UInt  bX    = INSN(30,30);
4927       ULong ea    = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
4928       if (bX) {
4929          putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
4930       } else {
4931          putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
4932       }
4933       DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
4934       return True;
4935    }
4936 
4937    /* -------------- {LD,ST}R (integer register) --------------- */
4938    /* 31 29        20 15     12 11 9  4
4939       |  |         |  |      |  |  |  |
4940       11 111000011 Rm option S  10 Rn Rt  LDR  Xt, [Xn|SP, R<m>{ext/sh}]
4941       10 111000011 Rm option S  10 Rn Rt  LDR  Wt, [Xn|SP, R<m>{ext/sh}]
4942       01 111000011 Rm option S  10 Rn Rt  LDRH Wt, [Xn|SP, R<m>{ext/sh}]
4943       00 111000011 Rm option S  10 Rn Rt  LDRB Wt, [Xn|SP, R<m>{ext/sh}]
4944 
4945       11 111000001 Rm option S  10 Rn Rt  STR  Xt, [Xn|SP, R<m>{ext/sh}]
4946       10 111000001 Rm option S  10 Rn Rt  STR  Wt, [Xn|SP, R<m>{ext/sh}]
4947       01 111000001 Rm option S  10 Rn Rt  STRH Wt, [Xn|SP, R<m>{ext/sh}]
4948       00 111000001 Rm option S  10 Rn Rt  STRB Wt, [Xn|SP, R<m>{ext/sh}]
4949    */
4950    if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
4951        && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
4952       HChar  dis_buf[64];
4953       UInt   szLg2 = INSN(31,30);
4954       Bool   isLD  = INSN(22,22) == 1;
4955       UInt   tt    = INSN(4,0);
4956       IRTemp ea    = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
4957       if (ea != IRTemp_INVALID) {
4958          switch (szLg2) {
4959             case 3: /* 64 bit */
4960                if (isLD) {
4961                   putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
4962                   DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
4963                } else {
4964                   storeLE(mkexpr(ea), getIReg64orZR(tt));
4965                   DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
4966                }
4967                break;
4968             case 2: /* 32 bit */
4969                if (isLD) {
4970                   putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
4971                   DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
4972                } else {
4973                   storeLE(mkexpr(ea), getIReg32orZR(tt));
4974                   DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
4975                }
4976                break;
4977             case 1: /* 16 bit */
4978                if (isLD) {
4979                   putIReg64orZR(tt, unop(Iop_16Uto64,
4980                                          loadLE(Ity_I16, mkexpr(ea))));
4981                   DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
4982                } else {
4983                   storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
4984                   DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
4985                }
4986                break;
4987             case 0: /* 8 bit */
4988                if (isLD) {
4989                   putIReg64orZR(tt, unop(Iop_8Uto64,
4990                                          loadLE(Ity_I8, mkexpr(ea))));
4991                   DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
4992                } else {
4993                   storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
4994                   DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
4995                }
4996                break;
4997             default:
4998                vassert(0);
4999          }
5000          return True;
5001       }
5002    }
5003 
5004    /* -------------- LDRS{B,H,W} (uimm12) -------------- */
5005    /* 31 29  26  23 21    9 4
5006       10 111 001 10 imm12 n t   LDRSW Xt, [Xn|SP, #pimm12 * 4]
5007       01 111 001 1x imm12 n t   LDRSH Rt, [Xn|SP, #pimm12 * 2]
5008       00 111 001 1x imm12 n t   LDRSB Rt, [Xn|SP, #pimm12 * 1]
5009       where
5010          Rt is Wt when x==1, Xt when x==0
5011    */
5012    if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
5013       /* Further checks on bits 31:30 and 22 */
5014       Bool valid = False;
5015       switch ((INSN(31,30) << 1) | INSN(22,22)) {
5016          case BITS3(1,0,0):
5017          case BITS3(0,1,0): case BITS3(0,1,1):
5018          case BITS3(0,0,0): case BITS3(0,0,1):
5019             valid = True;
5020             break;
5021       }
5022       if (valid) {
5023          UInt    szLg2 = INSN(31,30);
5024          UInt    bitX  = INSN(22,22);
5025          UInt    imm12 = INSN(21,10);
5026          UInt    nn    = INSN(9,5);
5027          UInt    tt    = INSN(4,0);
5028          UInt    szB   = 1 << szLg2;
5029          IRExpr* ea    = binop(Iop_Add64,
5030                                getIReg64orSP(nn), mkU64(imm12 * szB));
5031          switch (szB) {
5032             case 4:
5033                vassert(bitX == 0);
5034                putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
5035                DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
5036                    nameIReg64orSP(nn), imm12 * szB);
5037                break;
5038             case 2:
5039                if (bitX == 1) {
5040                   putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
5041                } else {
5042                   putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
5043                }
5044                DIP("ldrsh %s, [%s, #%u]\n",
5045                    nameIRegOrZR(bitX == 0, tt),
5046                    nameIReg64orSP(nn), imm12 * szB);
5047                break;
5048             case 1:
5049                if (bitX == 1) {
5050                   putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
5051                } else {
5052                   putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
5053                }
5054                DIP("ldrsb %s, [%s, #%u]\n",
5055                    nameIRegOrZR(bitX == 0, tt),
5056                    nameIReg64orSP(nn), imm12 * szB);
5057                break;
5058             default:
5059                vassert(0);
5060          }
5061          return True;
5062       }
5063       /* else fall through */
5064    }
5065 
5066    /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
5067    /* (at-Rn-then-Rn=EA)
5068       31 29      23 21 20   11 9 4
5069       00 111 000 1x 0  imm9 01 n t  LDRSB Rt, [Xn|SP], #simm9
5070       01 111 000 1x 0  imm9 01 n t  LDRSH Rt, [Xn|SP], #simm9
5071       10 111 000 10 0  imm9 01 n t  LDRSW Xt, [Xn|SP], #simm9
5072 
5073       (at-EA-then-Rn=EA)
5074       00 111 000 1x 0  imm9 11 n t  LDRSB Rt, [Xn|SP, #simm9]!
5075       01 111 000 1x 0  imm9 11 n t  LDRSH Rt, [Xn|SP, #simm9]!
5076       10 111 000 10 0  imm9 11 n t  LDRSW Xt, [Xn|SP, #simm9]!
5077       where
5078          Rt is Wt when x==1, Xt when x==0
5079          transfer-at-Rn when [11]==0, at EA when [11]==1
5080    */
5081    if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5082        && INSN(21,21) == 0 && INSN(10,10) == 1) {
5083       /* Further checks on bits 31:30 and 22 */
5084       Bool valid = False;
5085       switch ((INSN(31,30) << 1) | INSN(22,22)) {
5086          case BITS3(1,0,0):                    // LDRSW Xt
5087          case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
5088          case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
5089             valid = True;
5090             break;
5091       }
5092       if (valid) {
5093          UInt   szLg2 = INSN(31,30);
5094          UInt   imm9  = INSN(20,12);
5095          Bool   atRN  = INSN(11,11) == 0;
5096          UInt   nn    = INSN(9,5);
5097          UInt   tt    = INSN(4,0);
5098          IRTemp tRN   = newTemp(Ity_I64);
5099          IRTemp tEA   = newTemp(Ity_I64);
5100          IRTemp tTA   = IRTemp_INVALID;
5101          ULong  simm9 = sx_to_64(imm9, 9);
5102          Bool   is64  = INSN(22,22) == 0;
5103          assign(tRN, getIReg64orSP(nn));
5104          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5105          tTA = atRN ? tRN : tEA;
5106          HChar ch = '?';
5107          /* There are 5 cases:
5108                byte     load,           SX to 64
5109                byte     load, SX to 32, ZX to 64
5110                halfword load,           SX to 64
5111                halfword load, SX to 32, ZX to 64
5112                word     load,           SX to 64
5113             The ifs below handle them in the listed order.
5114          */
5115          if (szLg2 == 0) {
5116             ch = 'b';
5117             if (is64) {
5118                putIReg64orZR(tt, unop(Iop_8Sto64,
5119                                       loadLE(Ity_I8, mkexpr(tTA))));
5120             } else {
5121                putIReg32orZR(tt, unop(Iop_8Sto32,
5122                                       loadLE(Ity_I8, mkexpr(tTA))));
5123             }
5124          }
5125          else if (szLg2 == 1) {
5126             ch = 'h';
5127             if (is64) {
5128                putIReg64orZR(tt, unop(Iop_16Sto64,
5129                                       loadLE(Ity_I16, mkexpr(tTA))));
5130             } else {
5131                putIReg32orZR(tt, unop(Iop_16Sto32,
5132                                       loadLE(Ity_I16, mkexpr(tTA))));
5133             }
5134          }
5135          else if (szLg2 == 2 && is64) {
5136             ch = 'w';
5137             putIReg64orZR(tt, unop(Iop_32Sto64,
5138                                    loadLE(Ity_I32, mkexpr(tTA))));
5139          }
5140          else {
5141             vassert(0);
5142          }
5143          putIReg64orSP(nn, mkexpr(tEA));
5144          DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!",
5145              ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
5146          return True;
5147       }
5148       /* else fall through */
5149    }
5150 
5151    /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
5152    /* 31 29      23 21 20   11 9 4
5153       00 111 000 1x 0  imm9 00 n t  LDURSB Rt, [Xn|SP, #simm9]
5154       01 111 000 1x 0  imm9 00 n t  LDURSH Rt, [Xn|SP, #simm9]
5155       10 111 000 10 0  imm9 00 n t  LDURSW Xt, [Xn|SP, #simm9]
5156       where
5157          Rt is Wt when x==1, Xt when x==0
5158    */
5159    if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5160        && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5161       /* Further checks on bits 31:30 and 22 */
5162       Bool valid = False;
5163       switch ((INSN(31,30) << 1) | INSN(22,22)) {
5164          case BITS3(1,0,0):                    // LDURSW Xt
5165          case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
5166          case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
5167             valid = True;
5168             break;
5169       }
5170       if (valid) {
5171          UInt   szLg2 = INSN(31,30);
5172          UInt   imm9  = INSN(20,12);
5173          UInt   nn    = INSN(9,5);
5174          UInt   tt    = INSN(4,0);
5175          IRTemp tRN   = newTemp(Ity_I64);
5176          IRTemp tEA   = newTemp(Ity_I64);
5177          ULong  simm9 = sx_to_64(imm9, 9);
5178          Bool   is64  = INSN(22,22) == 0;
5179          assign(tRN, getIReg64orSP(nn));
5180          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5181          HChar ch = '?';
5182          /* There are 5 cases:
5183                byte     load,           SX to 64
5184                byte     load, SX to 32, ZX to 64
5185                halfword load,           SX to 64
5186                halfword load, SX to 32, ZX to 64
5187                word     load,           SX to 64
5188             The ifs below handle them in the listed order.
5189          */
5190          if (szLg2 == 0) {
5191             ch = 'b';
5192             if (is64) {
5193                putIReg64orZR(tt, unop(Iop_8Sto64,
5194                                       loadLE(Ity_I8, mkexpr(tEA))));
5195             } else {
5196                putIReg32orZR(tt, unop(Iop_8Sto32,
5197                                       loadLE(Ity_I8, mkexpr(tEA))));
5198             }
5199          }
5200          else if (szLg2 == 1) {
5201             ch = 'h';
5202             if (is64) {
5203                putIReg64orZR(tt, unop(Iop_16Sto64,
5204                                       loadLE(Ity_I16, mkexpr(tEA))));
5205             } else {
5206                putIReg32orZR(tt, unop(Iop_16Sto32,
5207                                       loadLE(Ity_I16, mkexpr(tEA))));
5208             }
5209          }
5210          else if (szLg2 == 2 && is64) {
5211             ch = 'w';
5212             putIReg64orZR(tt, unop(Iop_32Sto64,
5213                                    loadLE(Ity_I32, mkexpr(tEA))));
5214          }
5215          else {
5216             vassert(0);
5217          }
5218          DIP("ldurs%c %s, [%s, #%lld]",
5219              ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
5220          return True;
5221       }
5222       /* else fall through */
5223    }
5224 
5225    /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
5226    /* L==1    => mm==LD
5227       L==0    => mm==ST
5228       sz==00  => 32 bit (S) transfers
5229       sz==01  => 64 bit (D) transfers
5230       sz==10  => 128 bit (Q) transfers
5231       sz==11  isn't allowed
5232       simm7 is scaled by the (single-register) transfer size
5233 
5234       31 29  26   22 21   14 9 4
5235 
5236       sz 101 1000 L  imm7 t2 n t1   mmNP SDQt1, SDQt2, [Xn|SP, #imm]
5237                                     (at-EA, with nontemporal hint)
5238 
5239       sz 101 1001 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP], #imm
5240                                     (at-Rn-then-Rn=EA)
5241 
5242       sz 101 1010 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP, #imm]
5243                                     (at-EA)
5244 
5245       sz 101 1011 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP, #imm]!
5246                                     (at-EA-then-Rn=EA)
5247    */
5248    if (INSN(29,25) == BITS5(1,0,1,1,0)) {
5249       UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
5250       Bool isLD   = INSN(22,22) == 1;
5251       Bool wBack  = INSN(23,23) == 1;
5252       Long simm7  = (Long)sx_to_64(INSN(21,15), 7);
5253       UInt tt2    = INSN(14,10);
5254       UInt nn     = INSN(9,5);
5255       UInt tt1    = INSN(4,0);
5256       if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
5257          /* undecodable; fall through */
5258       } else {
5259          if (nn == 31) { /* FIXME generate stack alignment check */ }
5260 
5261          // Compute the transfer address TA and the writeback address WA.
5262          UInt   szB = 4 << szSlg2; /* szB is the per-register size */
5263          IRTemp tRN = newTemp(Ity_I64);
5264          assign(tRN, getIReg64orSP(nn));
5265          IRTemp tEA = newTemp(Ity_I64);
5266          simm7 = szB * simm7;
5267          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5268 
5269          IRTemp tTA = newTemp(Ity_I64);
5270          IRTemp tWA = newTemp(Ity_I64);
5271          switch (INSN(24,23)) {
5272             case BITS2(0,1):
5273                assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5274             case BITS2(1,1):
5275                assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5276             case BITS2(1,0):
5277             case BITS2(0,0):
5278                assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5279             default:
5280                vassert(0); /* NOTREACHED */
5281          }
5282 
5283          IRType ty = Ity_INVALID;
5284          switch (szB) {
5285             case 4:  ty = Ity_F32;  break;
5286             case 8:  ty = Ity_F64;  break;
5287             case 16: ty = Ity_V128; break;
5288             default: vassert(0);
5289          }
5290 
5291          /* Normally rN would be updated after the transfer.  However, in
5292             the special cases typifed by
5293                stp q0, q1, [sp,#-512]!
5294                stp d0, d1, [sp,#-512]!
5295                stp s0, s1, [sp,#-512]!
5296             it is necessary to update SP before the transfer, (1)
5297             because Memcheck will otherwise complain about a write
5298             below the stack pointer, and (2) because the segfault
5299             stack extension mechanism will otherwise extend the stack
5300             only down to SP before the instruction, which might not be
5301             far enough, if the -512 bit takes the actual access
5302             address to the next page.
5303          */
5304          Bool earlyWBack
5305            = wBack && simm7 < 0
5306              && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
5307 
5308          if (wBack && earlyWBack)
5309             putIReg64orSP(nn, mkexpr(tEA));
5310 
5311          if (isLD) {
5312             if (szB < 16) {
5313                putQReg128(tt1, mkV128(0x0000));
5314             }
5315             putQRegLO(tt1,
5316                       loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
5317             if (szB < 16) {
5318                putQReg128(tt2, mkV128(0x0000));
5319             }
5320             putQRegLO(tt2,
5321                       loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
5322          } else {
5323             storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
5324                     getQRegLO(tt1, ty));
5325             storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
5326                     getQRegLO(tt2, ty));
5327          }
5328 
5329          if (wBack && !earlyWBack)
5330             putIReg64orSP(nn, mkexpr(tEA));
5331 
5332          const HChar* fmt_str = NULL;
5333          switch (INSN(24,23)) {
5334             case BITS2(0,1):
5335                fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5336                break;
5337             case BITS2(1,1):
5338                fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5339                break;
5340             case BITS2(1,0):
5341                fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
5342                break;
5343             case BITS2(0,0):
5344                fmt_str = "%snp %s, %s, [%s, #%lld] (at-Rn)\n";
5345                break;
5346             default:
5347                vassert(0);
5348          }
5349          DIP(fmt_str, isLD ? "ld" : "st",
5350                       nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
5351                       nameIReg64orSP(nn), simm7);
5352          return True;
5353       }
5354    }
5355 
5356    /* -------------- {LD,ST}R (vector register) --------------- */
5357    /* 31 29     23  20 15     12 11 9  4
5358       |  |      |   |  |      |  |  |  |
5359       00 111100 011 Rm option S  10 Rn Rt  LDR Bt, [Xn|SP, R<m>{ext/sh}]
5360       01 111100 011 Rm option S  10 Rn Rt  LDR Ht, [Xn|SP, R<m>{ext/sh}]
5361       10 111100 011 Rm option S  10 Rn Rt  LDR St, [Xn|SP, R<m>{ext/sh}]
5362       11 111100 011 Rm option S  10 Rn Rt  LDR Dt, [Xn|SP, R<m>{ext/sh}]
5363       00 111100 111 Rm option S  10 Rn Rt  LDR Qt, [Xn|SP, R<m>{ext/sh}]
5364 
5365       00 111100 001 Rm option S  10 Rn Rt  STR Bt, [Xn|SP, R<m>{ext/sh}]
5366       01 111100 001 Rm option S  10 Rn Rt  STR Ht, [Xn|SP, R<m>{ext/sh}]
5367       10 111100 001 Rm option S  10 Rn Rt  STR St, [Xn|SP, R<m>{ext/sh}]
5368       11 111100 001 Rm option S  10 Rn Rt  STR Dt, [Xn|SP, R<m>{ext/sh}]
5369       00 111100 101 Rm option S  10 Rn Rt  STR Qt, [Xn|SP, R<m>{ext/sh}]
5370    */
5371    if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5372        && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5373       HChar  dis_buf[64];
5374       UInt   szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5375       Bool   isLD  = INSN(22,22) == 1;
5376       UInt   tt    = INSN(4,0);
5377       if (szLg2 > 4) goto after_LDR_STR_vector_register;
5378       IRTemp ea    = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
5379       if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
5380       switch (szLg2) {
5381          case 0: /* 8 bit */
5382             if (isLD) {
5383                putQReg128(tt, mkV128(0x0000));
5384                putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
5385                DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5386             } else {
5387                storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
5388                DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5389             }
5390             break;
5391          case 1:
5392             if (isLD) {
5393                putQReg128(tt, mkV128(0x0000));
5394                putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
5395                DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5396             } else {
5397                storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
5398                DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5399             }
5400             break;
5401          case 2: /* 32 bit */
5402             if (isLD) {
5403                putQReg128(tt, mkV128(0x0000));
5404                putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
5405                DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5406             } else {
5407                storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
5408                DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5409             }
5410             break;
5411          case 3: /* 64 bit */
5412             if (isLD) {
5413                putQReg128(tt, mkV128(0x0000));
5414                putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
5415                DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5416             } else {
5417                storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
5418                DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5419             }
5420             break;
5421          case 4:
5422             if (isLD) {
5423                putQReg128(tt, loadLE(Ity_V128, mkexpr(ea)));
5424                DIP("ldr %s, %s\n", nameQReg128(tt), dis_buf);
5425             } else {
5426                storeLE(mkexpr(ea), getQReg128(tt));
5427                DIP("str %s, %s\n", nameQReg128(tt), dis_buf);
5428             }
5429             break;
5430          default:
5431             vassert(0);
5432       }
5433       return True;
5434    }
5435   after_LDR_STR_vector_register:
5436 
5437    /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
5438    /* 31 29      22 20 15  12 11 9  4
5439       |  |       |  |  |   |  |  |  |
5440       10 1110001 01 Rm opt S 10 Rn Rt    LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
5441 
5442       01 1110001 01 Rm opt S 10 Rn Rt    LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
5443       01 1110001 11 Rm opt S 10 Rn Rt    LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
5444 
5445       00 1110001 01 Rm opt S 10 Rn Rt    LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
5446       00 1110001 11 Rm opt S 10 Rn Rt    LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
5447    */
5448    if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5449        && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5450       HChar  dis_buf[64];
5451       UInt   szLg2  = INSN(31,30);
5452       Bool   sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
5453       UInt   tt     = INSN(4,0);
5454       if (szLg2 == 3) goto after_LDRS_integer_register;
5455       IRTemp ea     = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5456       if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
5457       /* Enumerate the 5 variants explicitly. */
5458       if (szLg2 == 2/*32 bit*/ && sxTo64) {
5459          putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
5460          DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
5461          return True;
5462       }
5463       else
5464       if (szLg2 == 1/*16 bit*/) {
5465          if (sxTo64) {
5466             putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
5467             DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
5468          } else {
5469             putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
5470             DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5471          }
5472          return True;
5473       }
5474       else
5475       if (szLg2 == 0/*8 bit*/) {
5476          if (sxTo64) {
5477             putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
5478             DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
5479          } else {
5480             putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
5481             DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5482          }
5483          return True;
5484       }
5485       /* else it's an invalid combination */
5486    }
5487   after_LDRS_integer_register:
5488 
5489    /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
5490    /* This is the Unsigned offset variant only.  The Post-Index and
5491       Pre-Index variants are below.
5492 
5493       31 29      23 21    9 4
5494       00 111 101 01 imm12 n t   LDR Bt, [Xn|SP + imm12 * 1]
5495       01 111 101 01 imm12 n t   LDR Ht, [Xn|SP + imm12 * 2]
5496       10 111 101 01 imm12 n t   LDR St, [Xn|SP + imm12 * 4]
5497       11 111 101 01 imm12 n t   LDR Dt, [Xn|SP + imm12 * 8]
5498       00 111 101 11 imm12 n t   LDR Qt, [Xn|SP + imm12 * 16]
5499 
5500       00 111 101 00 imm12 n t   STR Bt, [Xn|SP + imm12 * 1]
5501       01 111 101 00 imm12 n t   STR Ht, [Xn|SP + imm12 * 2]
5502       10 111 101 00 imm12 n t   STR St, [Xn|SP + imm12 * 4]
5503       11 111 101 00 imm12 n t   STR Dt, [Xn|SP + imm12 * 8]
5504       00 111 101 10 imm12 n t   STR Qt, [Xn|SP + imm12 * 16]
5505    */
5506    if (INSN(29,24) == BITS6(1,1,1,1,0,1)
5507        && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
5508       UInt   szLg2  = (INSN(23,23) << 2) | INSN(31,30);
5509       Bool   isLD   = INSN(22,22) == 1;
5510       UInt   pimm12 = INSN(21,10) << szLg2;
5511       UInt   nn     = INSN(9,5);
5512       UInt   tt     = INSN(4,0);
5513       IRTemp tEA    = newTemp(Ity_I64);
5514       IRType ty     = preferredVectorSubTypeFromSize(1 << szLg2);
5515       assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
5516       if (isLD) {
5517          if (szLg2 < 4) {
5518             putQReg128(tt, mkV128(0x0000));
5519          }
5520          putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5521       } else {
5522          storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5523       }
5524       DIP("%s %s, [%s, #%u]\n",
5525           isLD ? "ldr" : "str",
5526           nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
5527       return True;
5528    }
5529 
5530    /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
5531    /* These are the Post-Index and Pre-Index variants.
5532 
5533       31 29      23   20   11 9 4
5534       (at-Rn-then-Rn=EA)
5535       00 111 100 01 0 imm9 01 n t   LDR Bt, [Xn|SP], #simm
5536       01 111 100 01 0 imm9 01 n t   LDR Ht, [Xn|SP], #simm
5537       10 111 100 01 0 imm9 01 n t   LDR St, [Xn|SP], #simm
5538       11 111 100 01 0 imm9 01 n t   LDR Dt, [Xn|SP], #simm
5539       00 111 100 11 0 imm9 01 n t   LDR Qt, [Xn|SP], #simm
5540 
5541       (at-EA-then-Rn=EA)
5542       00 111 100 01 0 imm9 11 n t   LDR Bt, [Xn|SP, #simm]!
5543       01 111 100 01 0 imm9 11 n t   LDR Ht, [Xn|SP, #simm]!
5544       10 111 100 01 0 imm9 11 n t   LDR St, [Xn|SP, #simm]!
5545       11 111 100 01 0 imm9 11 n t   LDR Dt, [Xn|SP, #simm]!
5546       00 111 100 11 0 imm9 11 n t   LDR Qt, [Xn|SP, #simm]!
5547 
5548       Stores are the same except with bit 22 set to 0.
5549    */
5550    if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5551        && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5552        && INSN(21,21) == 0 && INSN(10,10) == 1) {
5553       UInt   szLg2  = (INSN(23,23) << 2) | INSN(31,30);
5554       Bool   isLD   = INSN(22,22) == 1;
5555       UInt   imm9   = INSN(20,12);
5556       Bool   atRN   = INSN(11,11) == 0;
5557       UInt   nn     = INSN(9,5);
5558       UInt   tt     = INSN(4,0);
5559       IRTemp tRN    = newTemp(Ity_I64);
5560       IRTemp tEA    = newTemp(Ity_I64);
5561       IRTemp tTA    = IRTemp_INVALID;
5562       IRType ty     = preferredVectorSubTypeFromSize(1 << szLg2);
5563       ULong  simm9  = sx_to_64(imm9, 9);
5564       assign(tRN, getIReg64orSP(nn));
5565       assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5566       tTA = atRN ? tRN : tEA;
5567       if (isLD) {
5568          if (szLg2 < 4) {
5569             putQReg128(tt, mkV128(0x0000));
5570          }
5571          putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
5572       } else {
5573          storeLE(mkexpr(tTA), getQRegLO(tt, ty));
5574       }
5575       putIReg64orSP(nn, mkexpr(tEA));
5576       DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
5577           isLD ? "ldr" : "str",
5578           nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9);
5579       return True;
5580    }
5581 
5582    /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
5583    /* 31 29      23   20   11 9 4
5584       00 111 100 01 0 imm9 00 n t   LDR Bt, [Xn|SP, #simm]
5585       01 111 100 01 0 imm9 00 n t   LDR Ht, [Xn|SP, #simm]
5586       10 111 100 01 0 imm9 00 n t   LDR St, [Xn|SP, #simm]
5587       11 111 100 01 0 imm9 00 n t   LDR Dt, [Xn|SP, #simm]
5588       00 111 100 11 0 imm9 00 n t   LDR Qt, [Xn|SP, #simm]
5589 
5590       00 111 100 00 0 imm9 00 n t   STR Bt, [Xn|SP, #simm]
5591       01 111 100 00 0 imm9 00 n t   STR Ht, [Xn|SP, #simm]
5592       10 111 100 00 0 imm9 00 n t   STR St, [Xn|SP, #simm]
5593       11 111 100 00 0 imm9 00 n t   STR Dt, [Xn|SP, #simm]
5594       00 111 100 10 0 imm9 00 n t   STR Qt, [Xn|SP, #simm]
5595    */
5596    if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5597        && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5598        && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5599       UInt   szLg2  = (INSN(23,23) << 2) | INSN(31,30);
5600       Bool   isLD   = INSN(22,22) == 1;
5601       UInt   imm9   = INSN(20,12);
5602       UInt   nn     = INSN(9,5);
5603       UInt   tt     = INSN(4,0);
5604       ULong  simm9  = sx_to_64(imm9, 9);
5605       IRTemp tEA    = newTemp(Ity_I64);
5606       IRType ty     = preferredVectorSubTypeFromSize(1 << szLg2);
5607       assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
5608       if (isLD) {
5609          if (szLg2 < 4) {
5610             putQReg128(tt, mkV128(0x0000));
5611          }
5612          putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5613       } else {
5614          storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5615       }
5616       DIP("%s %s, [%s, #%lld]\n",
5617           isLD ? "ldur" : "stur",
5618           nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
5619       return True;
5620    }
5621 
5622    /* ---------------- LDR (literal, SIMD&FP) ---------------- */
5623    /* 31 29      23    4
5624       00 011 100 imm19 t    LDR St, [PC + sxTo64(imm19 << 2)]
5625       01 011 100 imm19 t    LDR Dt, [PC + sxTo64(imm19 << 2)]
5626       10 011 100 imm19 t    LDR Qt, [PC + sxTo64(imm19 << 2)]
5627    */
5628    if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
5629       UInt   szB   = 4 << INSN(31,30);
5630       UInt   imm19 = INSN(23,5);
5631       UInt   tt    = INSN(4,0);
5632       ULong  ea    = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5633       IRType ty    = preferredVectorSubTypeFromSize(szB);
5634       putQReg128(tt, mkV128(0x0000));
5635       putQRegLO(tt, loadLE(ty, mkU64(ea)));
5636       DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
5637       return True;
5638    }
5639 
5640    /* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg  ------ */
5641    /* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */
5642    /* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */
5643    /* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */
5644    /* 31 29  26   22 21 20    15   11 9 4
5645 
5646       0q 001 1000 L  0  00000 0000 sz n t  xx4 {Vt..t+3.T}, [Xn|SP]
5647       0q 001 1001 L  0  m     0000 sz n t  xx4 {Vt..t+3.T}, [Xn|SP], step
5648 
5649       0q 001 1000 L  0  00000 0100 sz n t  xx3 {Vt..t+2.T}, [Xn|SP]
5650       0q 001 1001 L  0  m     0100 sz n t  xx3 {Vt..t+2.T}, [Xn|SP], step
5651 
5652       0q 001 1000 L  0  00000 1000 sz n t  xx2 {Vt..t+1.T}, [Xn|SP]
5653       0q 001 1001 L  0  m     1000 sz n t  xx2 {Vt..t+1.T}, [Xn|SP], step
5654 
5655       0q 001 1000 L  0  00000 0111 sz n t  xx1 {Vt.T},      [Xn|SP]
5656       0q 001 1001 L  0  m     0111 sz n t  xx1 {Vt.T},      [Xn|SP], step
5657 
5658       T    = defined by Q and sz in the normal way
5659       step = if m == 11111 then transfer-size else Xm
5660       xx   = case L of 1 -> LD ; 0 -> ST
5661    */
5662    if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
5663        && INSN(21,21) == 0) {
5664       Bool bitQ  = INSN(30,30);
5665       Bool isPX  = INSN(23,23) == 1;
5666       Bool isLD  = INSN(22,22) == 1;
5667       UInt mm    = INSN(20,16);
5668       UInt opc   = INSN(15,12);
5669       UInt sz    = INSN(11,10);
5670       UInt nn    = INSN(9,5);
5671       UInt tt    = INSN(4,0);
5672       Bool isQ   = bitQ == 1;
5673       Bool is1d  = sz == BITS2(1,1) && !isQ;
5674       UInt nRegs = 0;
5675       switch (opc) {
5676          case BITS4(0,0,0,0): nRegs = 4; break;
5677          case BITS4(0,1,0,0): nRegs = 3; break;
5678          case BITS4(1,0,0,0): nRegs = 2; break;
5679          case BITS4(0,1,1,1): nRegs = 1; break;
5680          default: break;
5681       }
5682 
5683       /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
5684          If we see it, set nRegs to 0 so as to cause the next conditional
5685          to fail. */
5686       if (!isPX && mm != 0)
5687          nRegs = 0;
5688 
5689       if (nRegs == 1                             /* .1d is allowed */
5690           || (nRegs >= 2 && nRegs <= 4 && !is1d) /* .1d is not allowed */) {
5691 
5692          UInt xferSzB = (isQ ? 16 : 8) * nRegs;
5693 
5694          /* Generate the transfer address (TA) and if necessary the
5695             writeback address (WB) */
5696          IRTemp tTA = newTemp(Ity_I64);
5697          assign(tTA, getIReg64orSP(nn));
5698          if (nn == 31) { /* FIXME generate stack alignment check */ }
5699          IRTemp tWB = IRTemp_INVALID;
5700          if (isPX) {
5701             tWB = newTemp(Ity_I64);
5702             assign(tWB, binop(Iop_Add64,
5703                               mkexpr(tTA),
5704                               mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
5705                                                      : getIReg64orZR(mm)));
5706          }
5707 
5708          /* -- BEGIN generate the transfers -- */
5709 
5710          IRTemp u0, u1, u2, u3, i0, i1, i2, i3;
5711          u0 = u1 = u2 = u3 = i0 = i1 = i2 = i3 = IRTemp_INVALID;
5712          switch (nRegs) {
5713             case 4: u3 = newTempV128(); i3 = newTempV128(); /* fallthru */
5714             case 3: u2 = newTempV128(); i2 = newTempV128(); /* fallthru */
5715             case 2: u1 = newTempV128(); i1 = newTempV128(); /* fallthru */
5716             case 1: u0 = newTempV128(); i0 = newTempV128(); break;
5717             default: vassert(0);
5718          }
5719 
5720          /* -- Multiple 128 or 64 bit stores -- */
5721          if (!isLD) {
5722             switch (nRegs) {
5723                case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
5724                case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
5725                case 2: assign(u1, getQReg128((tt+1) % 32)); /* fallthru */
5726                case 1: assign(u0, getQReg128((tt+0) % 32)); break;
5727                default: vassert(0);
5728             }
5729             switch (nRegs) {
5730                case 4:  (isQ ? math_INTERLEAVE4_128 : math_INTERLEAVE4_64)
5731                            (&i0, &i1, &i2, &i3, sz, u0, u1, u2, u3);
5732                         break;
5733                case 3:  (isQ ? math_INTERLEAVE3_128 : math_INTERLEAVE3_64)
5734                            (&i0, &i1, &i2, sz, u0, u1, u2);
5735                         break;
5736                case 2:  (isQ ? math_INTERLEAVE2_128 : math_INTERLEAVE2_64)
5737                            (&i0, &i1, sz, u0, u1);
5738                         break;
5739                case 1:  (isQ ? math_INTERLEAVE1_128 : math_INTERLEAVE1_64)
5740                            (&i0, sz, u0);
5741                         break;
5742                default: vassert(0);
5743             }
5744 #           define MAYBE_NARROW_TO_64(_expr) \
5745                       (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
5746             UInt step = isQ ? 16 : 8;
5747             switch (nRegs) {
5748                case 4:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
5749                                  MAYBE_NARROW_TO_64(mkexpr(i3)) );
5750                         /* fallthru */
5751                case 3:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
5752                                  MAYBE_NARROW_TO_64(mkexpr(i2)) );
5753                         /* fallthru */
5754                case 2:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
5755                                  MAYBE_NARROW_TO_64(mkexpr(i1)) );
5756                         /* fallthru */
5757                case 1:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
5758                                  MAYBE_NARROW_TO_64(mkexpr(i0)) );
5759                         break;
5760                default: vassert(0);
5761             }
5762 #           undef MAYBE_NARROW_TO_64
5763          }
5764 
5765          /* -- Multiple 128 or 64 bit loads -- */
5766          else /* isLD */ {
5767             UInt   step   = isQ ? 16 : 8;
5768             IRType loadTy = isQ ? Ity_V128 : Ity_I64;
5769 #           define MAYBE_WIDEN_FROM_64(_expr) \
5770                       (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
5771             switch (nRegs) {
5772                case 4:
5773                   assign(i3, MAYBE_WIDEN_FROM_64(
5774                                 loadLE(loadTy,
5775                                        binop(Iop_Add64, mkexpr(tTA),
5776                                                         mkU64(3 * step)))));
5777                   /* fallthru */
5778                case 3:
5779                   assign(i2, MAYBE_WIDEN_FROM_64(
5780                                 loadLE(loadTy,
5781                                        binop(Iop_Add64, mkexpr(tTA),
5782                                                         mkU64(2 * step)))));
5783                   /* fallthru */
5784                case 2:
5785                   assign(i1, MAYBE_WIDEN_FROM_64(
5786                                 loadLE(loadTy,
5787                                        binop(Iop_Add64, mkexpr(tTA),
5788                                                         mkU64(1 * step)))));
5789                   /* fallthru */
5790                case 1:
5791                   assign(i0, MAYBE_WIDEN_FROM_64(
5792                                 loadLE(loadTy,
5793                                        binop(Iop_Add64, mkexpr(tTA),
5794                                                         mkU64(0 * step)))));
5795                   break;
5796                default:
5797                   vassert(0);
5798             }
5799 #           undef MAYBE_WIDEN_FROM_64
5800             switch (nRegs) {
5801                case 4:  (isQ ? math_DEINTERLEAVE4_128 : math_DEINTERLEAVE4_64)
5802                            (&u0, &u1, &u2, &u3, sz, i0,i1,i2,i3);
5803                         break;
5804                case 3:  (isQ ? math_DEINTERLEAVE3_128 : math_DEINTERLEAVE3_64)
5805                            (&u0, &u1, &u2, sz, i0, i1, i2);
5806                         break;
5807                case 2:  (isQ ? math_DEINTERLEAVE2_128 : math_DEINTERLEAVE2_64)
5808                            (&u0, &u1, sz, i0, i1);
5809                         break;
5810                case 1:  (isQ ? math_DEINTERLEAVE1_128 : math_DEINTERLEAVE1_64)
5811                            (&u0, sz, i0);
5812                         break;
5813                default: vassert(0);
5814             }
5815             switch (nRegs) {
5816                case 4:  putQReg128( (tt+3) % 32,
5817                                     math_MAYBE_ZERO_HI64(bitQ, u3));
5818                         /* fallthru */
5819                case 3:  putQReg128( (tt+2) % 32,
5820                                     math_MAYBE_ZERO_HI64(bitQ, u2));
5821                         /* fallthru */
5822                case 2:  putQReg128( (tt+1) % 32,
5823                                     math_MAYBE_ZERO_HI64(bitQ, u1));
5824                         /* fallthru */
5825                case 1:  putQReg128( (tt+0) % 32,
5826                                     math_MAYBE_ZERO_HI64(bitQ, u0));
5827                         break;
5828                default: vassert(0);
5829             }
5830          }
5831 
5832          /* -- END generate the transfers -- */
5833 
5834          /* Do the writeback, if necessary */
5835          if (isPX) {
5836             putIReg64orSP(nn, mkexpr(tWB));
5837          }
5838 
5839          HChar pxStr[20];
5840          pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
5841          if (isPX) {
5842             if (mm == BITS5(1,1,1,1,1))
5843                vex_sprintf(pxStr, ", #%u", xferSzB);
5844             else
5845                vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
5846          }
5847          const HChar* arr = nameArr_Q_SZ(bitQ, sz);
5848          DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n",
5849              isLD ? "ld" : "st", nRegs,
5850              (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
5851              pxStr);
5852 
5853          return True;
5854       }
5855       /* else fall through */
5856    }
5857 
5858    /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs  ------ */
5859    /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs  ------ */
5860    /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs  ------ */
5861    /* 31 29  26   22 21 20    15   11 9 4
5862 
5863       0q 001 1000 L  0  00000 0010 sz n t  xx1 {Vt..t+3.T}, [Xn|SP]
5864       0q 001 1001 L  0  m     0010 sz n t  xx1 {Vt..t+3.T}, [Xn|SP], step
5865 
5866       0q 001 1000 L  0  00000 0110 sz n t  xx1 {Vt..t+2.T}, [Xn|SP]
5867       0q 001 1001 L  0  m     0110 sz n t  xx1 {Vt..t+2.T}, [Xn|SP], step
5868 
5869       0q 001 1000 L  0  00000 1010 sz n t  xx1 {Vt..t+1.T}, [Xn|SP]
5870       0q 001 1001 L  0  m     1010 sz n t  xx1 {Vt..t+1.T}, [Xn|SP], step
5871 
5872       T    = defined by Q and sz in the normal way
5873       step = if m == 11111 then transfer-size else Xm
5874       xx   = case L of 1 -> LD ; 0 -> ST
5875    */
5876    if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
5877        && INSN(21,21) == 0) {
5878       Bool bitQ  = INSN(30,30);
5879       Bool isPX  = INSN(23,23) == 1;
5880       Bool isLD  = INSN(22,22) == 1;
5881       UInt mm    = INSN(20,16);
5882       UInt opc   = INSN(15,12);
5883       UInt sz    = INSN(11,10);
5884       UInt nn    = INSN(9,5);
5885       UInt tt    = INSN(4,0);
5886       Bool isQ   = bitQ == 1;
5887       UInt nRegs = 0;
5888       switch (opc) {
5889          case BITS4(0,0,1,0): nRegs = 4; break;
5890          case BITS4(0,1,1,0): nRegs = 3; break;
5891          case BITS4(1,0,1,0): nRegs = 2; break;
5892          default: break;
5893       }
5894 
5895       /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
5896          If we see it, set nRegs to 0 so as to cause the next conditional
5897          to fail. */
5898       if (!isPX && mm != 0)
5899          nRegs = 0;
5900 
5901       if (nRegs >= 2 && nRegs <= 4) {
5902 
5903          UInt xferSzB = (isQ ? 16 : 8) * nRegs;
5904 
5905          /* Generate the transfer address (TA) and if necessary the
5906             writeback address (WB) */
5907          IRTemp tTA = newTemp(Ity_I64);
5908          assign(tTA, getIReg64orSP(nn));
5909          if (nn == 31) { /* FIXME generate stack alignment check */ }
5910          IRTemp tWB = IRTemp_INVALID;
5911          if (isPX) {
5912             tWB = newTemp(Ity_I64);
5913             assign(tWB, binop(Iop_Add64,
5914                               mkexpr(tTA),
5915                               mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
5916                                                      : getIReg64orZR(mm)));
5917          }
5918 
5919          /* -- BEGIN generate the transfers -- */
5920 
5921          IRTemp u0, u1, u2, u3;
5922          u0 = u1 = u2 = u3 = IRTemp_INVALID;
5923          switch (nRegs) {
5924             case 4: u3 = newTempV128(); /* fallthru */
5925             case 3: u2 = newTempV128(); /* fallthru */
5926             case 2: u1 = newTempV128();
5927                     u0 = newTempV128(); break;
5928             default: vassert(0);
5929          }
5930 
5931          /* -- Multiple 128 or 64 bit stores -- */
5932          if (!isLD) {
5933             switch (nRegs) {
5934                case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
5935                case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
5936                case 2: assign(u1, getQReg128((tt+1) % 32));
5937                        assign(u0, getQReg128((tt+0) % 32)); break;
5938                default: vassert(0);
5939             }
5940 #           define MAYBE_NARROW_TO_64(_expr) \
5941                       (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
5942             UInt step = isQ ? 16 : 8;
5943             switch (nRegs) {
5944                case 4:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
5945                                  MAYBE_NARROW_TO_64(mkexpr(u3)) );
5946                         /* fallthru */
5947                case 3:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
5948                                  MAYBE_NARROW_TO_64(mkexpr(u2)) );
5949                         /* fallthru */
5950                case 2:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
5951                                  MAYBE_NARROW_TO_64(mkexpr(u1)) );
5952                         storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
5953                                  MAYBE_NARROW_TO_64(mkexpr(u0)) );
5954                         break;
5955                default: vassert(0);
5956             }
5957 #           undef MAYBE_NARROW_TO_64
5958          }
5959 
5960          /* -- Multiple 128 or 64 bit loads -- */
5961          else /* isLD */ {
5962             UInt   step   = isQ ? 16 : 8;
5963             IRType loadTy = isQ ? Ity_V128 : Ity_I64;
5964 #           define MAYBE_WIDEN_FROM_64(_expr) \
5965                       (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
5966             switch (nRegs) {
5967                case 4:
5968                   assign(u3, MAYBE_WIDEN_FROM_64(
5969                                 loadLE(loadTy,
5970                                        binop(Iop_Add64, mkexpr(tTA),
5971                                                         mkU64(3 * step)))));
5972                   /* fallthru */
5973                case 3:
5974                   assign(u2, MAYBE_WIDEN_FROM_64(
5975                                 loadLE(loadTy,
5976                                        binop(Iop_Add64, mkexpr(tTA),
5977                                                         mkU64(2 * step)))));
5978                   /* fallthru */
5979                case 2:
5980                   assign(u1, MAYBE_WIDEN_FROM_64(
5981                                 loadLE(loadTy,
5982                                        binop(Iop_Add64, mkexpr(tTA),
5983                                                         mkU64(1 * step)))));
5984                   assign(u0, MAYBE_WIDEN_FROM_64(
5985                                 loadLE(loadTy,
5986                                        binop(Iop_Add64, mkexpr(tTA),
5987                                                         mkU64(0 * step)))));
5988                   break;
5989                default:
5990                   vassert(0);
5991             }
5992 #           undef MAYBE_WIDEN_FROM_64
5993             switch (nRegs) {
5994                case 4:  putQReg128( (tt+3) % 32,
5995                                     math_MAYBE_ZERO_HI64(bitQ, u3));
5996                         /* fallthru */
5997                case 3:  putQReg128( (tt+2) % 32,
5998                                     math_MAYBE_ZERO_HI64(bitQ, u2));
5999                         /* fallthru */
6000                case 2:  putQReg128( (tt+1) % 32,
6001                                     math_MAYBE_ZERO_HI64(bitQ, u1));
6002                         putQReg128( (tt+0) % 32,
6003                                     math_MAYBE_ZERO_HI64(bitQ, u0));
6004                         break;
6005                default: vassert(0);
6006             }
6007          }
6008 
6009          /* -- END generate the transfers -- */
6010 
6011          /* Do the writeback, if necessary */
6012          if (isPX) {
6013             putIReg64orSP(nn, mkexpr(tWB));
6014          }
6015 
6016          HChar pxStr[20];
6017          pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6018          if (isPX) {
6019             if (mm == BITS5(1,1,1,1,1))
6020                vex_sprintf(pxStr, ", #%u", xferSzB);
6021             else
6022                vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6023          }
6024          const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6025          DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n",
6026              isLD ? "ld" : "st",
6027              (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6028              pxStr);
6029 
6030          return True;
6031       }
6032       /* else fall through */
6033    }
6034 
6035    /* ---------- LD1R (single structure, replicate) ---------- */
6036    /* ---------- LD2R (single structure, replicate) ---------- */
6037    /* ---------- LD3R (single structure, replicate) ---------- */
6038    /* ---------- LD4R (single structure, replicate) ---------- */
6039    /* 31 29       22 20    15    11 9 4
6040       0q 001 1010 10 00000 110 0 sz n t  LD1R {Vt.T}, [Xn|SP]
6041       0q 001 1011 10 m     110 0 sz n t  LD1R {Vt.T}, [Xn|SP], step
6042 
6043       0q 001 1010 11 00000 110 0 sz n t  LD2R {Vt..t+1.T}, [Xn|SP]
6044       0q 001 1011 11 m     110 0 sz n t  LD2R {Vt..t+1.T}, [Xn|SP], step
6045 
6046       0q 001 1010 10 00000 111 0 sz n t  LD3R {Vt..t+2.T}, [Xn|SP]
6047       0q 001 1011 10 m     111 0 sz n t  LD3R {Vt..t+2.T}, [Xn|SP], step
6048 
6049       0q 001 1010 11 00000 111 0 sz n t  LD4R {Vt..t+3.T}, [Xn|SP]
6050       0q 001 1011 11 m     111 0 sz n t  LD4R {Vt..t+3.T}, [Xn|SP], step
6051 
6052       step = if m == 11111 then transfer-size else Xm
6053    */
6054    if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
6055        && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1)
6056        && INSN(12,12) == 0) {
6057       UInt   bitQ  = INSN(30,30);
6058       Bool   isPX  = INSN(23,23) == 1;
6059       UInt   nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6060       UInt   mm    = INSN(20,16);
6061       UInt   sz    = INSN(11,10);
6062       UInt   nn    = INSN(9,5);
6063       UInt   tt    = INSN(4,0);
6064 
6065       /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6066       if (isPX || mm == 0) {
6067 
6068          IRType ty    = integerIRTypeOfSize(1 << sz);
6069 
6070          UInt laneSzB = 1 << sz;
6071          UInt xferSzB = laneSzB * nRegs;
6072 
6073          /* Generate the transfer address (TA) and if necessary the
6074             writeback address (WB) */
6075          IRTemp tTA = newTemp(Ity_I64);
6076          assign(tTA, getIReg64orSP(nn));
6077          if (nn == 31) { /* FIXME generate stack alignment check */ }
6078          IRTemp tWB = IRTemp_INVALID;
6079          if (isPX) {
6080             tWB = newTemp(Ity_I64);
6081             assign(tWB, binop(Iop_Add64,
6082                               mkexpr(tTA),
6083                               mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6084                                                      : getIReg64orZR(mm)));
6085          }
6086 
6087          /* Do the writeback, if necessary */
6088          if (isPX) {
6089             putIReg64orSP(nn, mkexpr(tWB));
6090          }
6091 
6092          IRTemp e0, e1, e2, e3, v0, v1, v2, v3;
6093          e0 = e1 = e2 = e3 = v0 = v1 = v2 = v3 = IRTemp_INVALID;
6094          switch (nRegs) {
6095             case 4:
6096                e3 = newTemp(ty);
6097                assign(e3, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6098                                                       mkU64(3 * laneSzB))));
6099                v3 = math_DUP_TO_V128(e3, ty);
6100                putQReg128((tt+3) % 32, math_MAYBE_ZERO_HI64(bitQ, v3));
6101                /* fallthrough */
6102             case 3:
6103                e2 = newTemp(ty);
6104                assign(e2, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6105                                                       mkU64(2 * laneSzB))));
6106                v2 = math_DUP_TO_V128(e2, ty);
6107                putQReg128((tt+2) % 32, math_MAYBE_ZERO_HI64(bitQ, v2));
6108                /* fallthrough */
6109             case 2:
6110                e1 = newTemp(ty);
6111                assign(e1, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6112                                                       mkU64(1 * laneSzB))));
6113                v1 = math_DUP_TO_V128(e1, ty);
6114                putQReg128((tt+1) % 32, math_MAYBE_ZERO_HI64(bitQ, v1));
6115                /* fallthrough */
6116             case 1:
6117                e0 = newTemp(ty);
6118                assign(e0, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6119                                                       mkU64(0 * laneSzB))));
6120                v0 = math_DUP_TO_V128(e0, ty);
6121                putQReg128((tt+0) % 32, math_MAYBE_ZERO_HI64(bitQ, v0));
6122                break;
6123             default:
6124                vassert(0);
6125          }
6126 
6127          HChar pxStr[20];
6128          pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6129          if (isPX) {
6130             if (mm == BITS5(1,1,1,1,1))
6131                vex_sprintf(pxStr, ", #%u", xferSzB);
6132             else
6133                vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6134          }
6135          const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6136          DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n",
6137              nRegs,
6138              (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6139              pxStr);
6140 
6141          return True;
6142       }
6143       /* else fall through */
6144    }
6145 
6146    /* ------ LD1/ST1 (single structure, to/from one lane) ------ */
6147    /* ------ LD2/ST2 (single structure, to/from one lane) ------ */
6148    /* ------ LD3/ST3 (single structure, to/from one lane) ------ */
6149    /* ------ LD4/ST4 (single structure, to/from one lane) ------ */
6150    /* 31 29       22 21 20    15    11 9 4
6151       0q 001 1010 L  0  00000 xx0 S sz n t  op1 {Vt.T}[ix], [Xn|SP]
6152       0q 001 1011 L  0  m     xx0 S sz n t  op1 {Vt.T}[ix], [Xn|SP], step
6153 
6154       0q 001 1010 L  1  00000 xx0 S sz n t  op2 {Vt..t+1.T}[ix], [Xn|SP]
6155       0q 001 1011 L  1  m     xx0 S sz n t  op2 {Vt..t+1.T}[ix], [Xn|SP], step
6156 
6157       0q 001 1010 L  0  00000 xx1 S sz n t  op3 {Vt..t+2.T}[ix], [Xn|SP]
6158       0q 001 1011 L  0  m     xx1 S sz n t  op3 {Vt..t+2.T}[ix], [Xn|SP], step
6159 
6160       0q 001 1010 L  1  00000 xx1 S sz n t  op4 {Vt..t+3.T}[ix], [Xn|SP]
6161       0q 001 1011 L  1  m     xx1 S sz n t  op4 {Vt..t+3.T}[ix], [Xn|SP], step
6162 
6163       step = if m == 11111 then transfer-size else Xm
6164       op   = case L of 1 -> LD ; 0 -> ST
6165 
6166       laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb
6167                                      01:b:b:b0 -> 2, bbb
6168                                      10:b:b:00 -> 4, bb
6169                                      10:b:0:01 -> 8, b
6170    */
6171    if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) {
6172       UInt   bitQ  = INSN(30,30);
6173       Bool   isPX  = INSN(23,23) == 1;
6174       Bool   isLD  = INSN(22,22) == 1;
6175       UInt   nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6176       UInt   mm    = INSN(20,16);
6177       UInt   xx    = INSN(15,14);
6178       UInt   bitS  = INSN(12,12);
6179       UInt   sz    = INSN(11,10);
6180       UInt   nn    = INSN(9,5);
6181       UInt   tt    = INSN(4,0);
6182 
6183       Bool valid = True;
6184 
6185       /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6186       if (!isPX && mm != 0)
6187          valid = False;
6188 
6189       UInt laneSzB = 0;  /* invalid */
6190       UInt ix      = 16; /* invalid */
6191 
6192       UInt xx_q_S_sz = (xx << 4) | (bitQ << 3) | (bitS << 2) | sz;
6193       switch (xx_q_S_sz) {
6194          case 0x00: case 0x01: case 0x02: case 0x03:
6195          case 0x04: case 0x05: case 0x06: case 0x07:
6196          case 0x08: case 0x09: case 0x0A: case 0x0B:
6197          case 0x0C: case 0x0D: case 0x0E: case 0x0F:
6198             laneSzB = 1; ix = xx_q_S_sz & 0xF;
6199             break;
6200          case 0x10: case 0x12: case 0x14: case 0x16:
6201          case 0x18: case 0x1A: case 0x1C: case 0x1E:
6202             laneSzB = 2; ix = (xx_q_S_sz >> 1) & 7;
6203             break;
6204          case 0x20: case 0x24: case 0x28: case 0x2C:
6205             laneSzB = 4; ix = (xx_q_S_sz >> 2) & 3;
6206             break;
6207          case 0x21: case 0x29:
6208             laneSzB = 8; ix = (xx_q_S_sz >> 3) & 1;
6209             break;
6210          default:
6211             break;
6212       }
6213 
6214       if (valid && laneSzB != 0) {
6215 
6216          IRType ty      = integerIRTypeOfSize(laneSzB);
6217          UInt   xferSzB = laneSzB * nRegs;
6218 
6219          /* Generate the transfer address (TA) and if necessary the
6220             writeback address (WB) */
6221          IRTemp tTA = newTemp(Ity_I64);
6222          assign(tTA, getIReg64orSP(nn));
6223          if (nn == 31) { /* FIXME generate stack alignment check */ }
6224          IRTemp tWB = IRTemp_INVALID;
6225          if (isPX) {
6226             tWB = newTemp(Ity_I64);
6227             assign(tWB, binop(Iop_Add64,
6228                               mkexpr(tTA),
6229                               mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6230                                                      : getIReg64orZR(mm)));
6231          }
6232 
6233          /* Do the writeback, if necessary */
6234          if (isPX) {
6235             putIReg64orSP(nn, mkexpr(tWB));
6236          }
6237 
6238          switch (nRegs) {
6239             case 4: {
6240                IRExpr* addr
6241                   = binop(Iop_Add64, mkexpr(tTA), mkU64(3 * laneSzB));
6242                if (isLD) {
6243                   putQRegLane((tt+3) % 32, ix, loadLE(ty, addr));
6244                } else {
6245                   storeLE(addr, getQRegLane((tt+3) % 32, ix, ty));
6246                }
6247                /* fallthrough */
6248             }
6249             case 3: {
6250                IRExpr* addr
6251                   = binop(Iop_Add64, mkexpr(tTA), mkU64(2 * laneSzB));
6252                if (isLD) {
6253                   putQRegLane((tt+2) % 32, ix, loadLE(ty, addr));
6254                } else {
6255                   storeLE(addr, getQRegLane((tt+2) % 32, ix, ty));
6256                }
6257                /* fallthrough */
6258             }
6259             case 2: {
6260                IRExpr* addr
6261                   = binop(Iop_Add64, mkexpr(tTA), mkU64(1 * laneSzB));
6262                if (isLD) {
6263                   putQRegLane((tt+1) % 32, ix, loadLE(ty, addr));
6264                } else {
6265                   storeLE(addr, getQRegLane((tt+1) % 32, ix, ty));
6266                }
6267                /* fallthrough */
6268             }
6269             case 1: {
6270                IRExpr* addr
6271                   = binop(Iop_Add64, mkexpr(tTA), mkU64(0 * laneSzB));
6272                if (isLD) {
6273                   putQRegLane((tt+0) % 32, ix, loadLE(ty, addr));
6274                } else {
6275                   storeLE(addr, getQRegLane((tt+0) % 32, ix, ty));
6276                }
6277                break;
6278             }
6279             default:
6280                vassert(0);
6281          }
6282 
6283          HChar pxStr[20];
6284          pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6285          if (isPX) {
6286             if (mm == BITS5(1,1,1,1,1))
6287                vex_sprintf(pxStr, ", #%u", xferSzB);
6288             else
6289                vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6290          }
6291          const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6292          DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n",
6293              isLD ? "ld" : "st", nRegs,
6294              (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr,
6295              ix, nameIReg64orSP(nn), pxStr);
6296 
6297          return True;
6298       }
6299       /* else fall through */
6300    }
6301 
6302    /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
6303    /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
6304    /* 31 29     23  20      14    9 4
6305       sz 001000 010 11111 0 11111 n t   LDX{R,RH,RB}  Rt, [Xn|SP]
6306       sz 001000 010 11111 1 11111 n t   LDAX{R,RH,RB} Rt, [Xn|SP]
6307       sz 001000 000 s     0 11111 n t   STX{R,RH,RB}  Ws, Rt, [Xn|SP]
6308       sz 001000 000 s     1 11111 n t   STLX{R,RH,RB} Ws, Rt, [Xn|SP]
6309    */
6310    if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
6311        && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
6312        && INSN(14,10) == BITS5(1,1,1,1,1)) {
6313       UInt szBlg2     = INSN(31,30);
6314       Bool isLD       = INSN(22,22) == 1;
6315       Bool isAcqOrRel = INSN(15,15) == 1;
6316       UInt ss         = INSN(20,16);
6317       UInt nn         = INSN(9,5);
6318       UInt tt         = INSN(4,0);
6319 
6320       vassert(szBlg2 < 4);
6321       UInt   szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6322       IRType ty  = integerIRTypeOfSize(szB);
6323       const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6324 
6325       IRTemp ea = newTemp(Ity_I64);
6326       assign(ea, getIReg64orSP(nn));
6327       /* FIXME generate check that ea is szB-aligned */
6328 
6329       if (isLD && ss == BITS5(1,1,1,1,1)) {
6330          IRTemp res = newTemp(ty);
6331          stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
6332          putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6333          if (isAcqOrRel) {
6334             stmt(IRStmt_MBE(Imbe_Fence));
6335          }
6336          DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6337              nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6338          return True;
6339       }
6340       if (!isLD) {
6341          if (isAcqOrRel) {
6342             stmt(IRStmt_MBE(Imbe_Fence));
6343          }
6344          IRTemp  res  = newTemp(Ity_I1);
6345          IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6346          stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
6347          /* IR semantics: res is 1 if store succeeds, 0 if it fails.
6348             Need to set rS to 1 on failure, 0 on success. */
6349          putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
6350                                             mkU64(1)));
6351          DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6352              nameIRegOrZR(False, ss),
6353              nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6354          return True;
6355       }
6356       /* else fall through */
6357    }
6358 
6359    /* ------------------ LDA{R,RH,RB} ------------------ */
6360    /* ------------------ STL{R,RH,RB} ------------------ */
6361    /* 31 29     23  20      14    9 4
6362       sz 001000 110 11111 1 11111 n t   LDAR<sz> Rt, [Xn|SP]
6363       sz 001000 100 11111 1 11111 n t   STLR<sz> Rt, [Xn|SP]
6364    */
6365    if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
6366        && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
6367       UInt szBlg2 = INSN(31,30);
6368       Bool isLD   = INSN(22,22) == 1;
6369       UInt nn     = INSN(9,5);
6370       UInt tt     = INSN(4,0);
6371 
6372       vassert(szBlg2 < 4);
6373       UInt   szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6374       IRType ty  = integerIRTypeOfSize(szB);
6375       const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6376 
6377       IRTemp ea = newTemp(Ity_I64);
6378       assign(ea, getIReg64orSP(nn));
6379       /* FIXME generate check that ea is szB-aligned */
6380 
6381       if (isLD) {
6382          IRTemp res = newTemp(ty);
6383          assign(res, loadLE(ty, mkexpr(ea)));
6384          putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6385          stmt(IRStmt_MBE(Imbe_Fence));
6386          DIP("lda%s %s, [%s]\n", suffix[szBlg2],
6387              nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6388       } else {
6389          stmt(IRStmt_MBE(Imbe_Fence));
6390          IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6391          storeLE(mkexpr(ea), data);
6392          DIP("stl%s %s, [%s]\n", suffix[szBlg2],
6393              nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6394       }
6395       return True;
6396    }
6397 
6398    /* ------------------ PRFM (immediate) ------------------ */
6399    /* 31           21    9 4
6400       11 111 00110 imm12 n t   PRFM pfrop=Rt, [Xn|SP, #pimm]
6401    */
6402    if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
6403       UInt imm12 = INSN(21,10);
6404       UInt nn    = INSN(9,5);
6405       UInt tt    = INSN(4,0);
6406       /* Generating any IR here is pointless, except for documentation
6407          purposes, as it will get optimised away later. */
6408       IRTemp ea = newTemp(Ity_I64);
6409       assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(imm12 * 8)));
6410       DIP("prfm prfop=%u, [%s, #%u]\n", tt, nameIReg64orSP(nn), imm12 * 8);
6411       return True;
6412    }
6413 
6414    vex_printf("ARM64 front end: load_store\n");
6415    return False;
6416 #  undef INSN
6417 }
6418 
6419 
6420 /*------------------------------------------------------------*/
6421 /*--- Control flow and misc instructions                   ---*/
6422 /*------------------------------------------------------------*/
6423 
6424 static
dis_ARM64_branch_etc(DisResult * dres,UInt insn,const VexArchInfo * archinfo)6425 Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
6426                           const VexArchInfo* archinfo)
6427 {
6428 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
6429 
6430    /* ---------------------- B cond ----------------------- */
6431    /* 31        24    4 3
6432       0101010 0 imm19 0 cond */
6433    if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
6434       UInt  cond   = INSN(3,0);
6435       ULong uimm64 = INSN(23,5) << 2;
6436       Long  simm64 = (Long)sx_to_64(uimm64, 21);
6437       vassert(dres->whatNext    == Dis_Continue);
6438       vassert(dres->len         == 4);
6439       vassert(dres->continueAt  == 0);
6440       vassert(dres->jk_StopHere == Ijk_INVALID);
6441       stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
6442                         Ijk_Boring,
6443                         IRConst_U64(guest_PC_curr_instr + simm64),
6444                         OFFB_PC) );
6445       putPC(mkU64(guest_PC_curr_instr + 4));
6446       dres->whatNext    = Dis_StopHere;
6447       dres->jk_StopHere = Ijk_Boring;
6448       DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
6449       return True;
6450    }
6451 
6452    /* -------------------- B{L} uncond -------------------- */
6453    if (INSN(30,26) == BITS5(0,0,1,0,1)) {
6454       /* 000101 imm26  B  (PC + sxTo64(imm26 << 2))
6455          100101 imm26  B  (PC + sxTo64(imm26 << 2))
6456       */
6457       UInt  bLink  = INSN(31,31);
6458       ULong uimm64 = INSN(25,0) << 2;
6459       Long  simm64 = (Long)sx_to_64(uimm64, 28);
6460       if (bLink) {
6461          putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
6462       }
6463       putPC(mkU64(guest_PC_curr_instr + simm64));
6464       dres->whatNext = Dis_StopHere;
6465       dres->jk_StopHere = Ijk_Call;
6466       DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
6467                           guest_PC_curr_instr + simm64);
6468       return True;
6469    }
6470 
6471    /* --------------------- B{L} reg --------------------- */
6472    /* 31      24 22 20    15     9  4
6473       1101011 00 10 11111 000000 nn 00000  RET  Rn
6474       1101011 00 01 11111 000000 nn 00000  CALL Rn
6475       1101011 00 00 11111 000000 nn 00000  JMP  Rn
6476    */
6477    if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
6478        && INSN(20,16) == BITS5(1,1,1,1,1)
6479        && INSN(15,10) == BITS6(0,0,0,0,0,0)
6480        && INSN(4,0) == BITS5(0,0,0,0,0)) {
6481       UInt branch_type = INSN(22,21);
6482       UInt nn          = INSN(9,5);
6483       if (branch_type == BITS2(1,0) /* RET */) {
6484          putPC(getIReg64orZR(nn));
6485          dres->whatNext = Dis_StopHere;
6486          dres->jk_StopHere = Ijk_Ret;
6487          DIP("ret %s\n", nameIReg64orZR(nn));
6488          return True;
6489       }
6490       if (branch_type == BITS2(0,1) /* CALL */) {
6491          IRTemp dst = newTemp(Ity_I64);
6492          assign(dst, getIReg64orZR(nn));
6493          putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
6494          putPC(mkexpr(dst));
6495          dres->whatNext = Dis_StopHere;
6496          dres->jk_StopHere = Ijk_Call;
6497          DIP("blr %s\n", nameIReg64orZR(nn));
6498          return True;
6499       }
6500       if (branch_type == BITS2(0,0) /* JMP */) {
6501          putPC(getIReg64orZR(nn));
6502          dres->whatNext = Dis_StopHere;
6503          dres->jk_StopHere = Ijk_Boring;
6504          DIP("jmp %s\n", nameIReg64orZR(nn));
6505          return True;
6506       }
6507    }
6508 
6509    /* -------------------- CB{N}Z -------------------- */
6510    /* sf 011 010 1 imm19 Rt   CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
6511       sf 011 010 0 imm19 Rt   CBZ  Xt|Wt, (PC + sxTo64(imm19 << 2))
6512    */
6513    if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
6514       Bool    is64   = INSN(31,31) == 1;
6515       Bool    bIfZ   = INSN(24,24) == 0;
6516       ULong   uimm64 = INSN(23,5) << 2;
6517       UInt    rT     = INSN(4,0);
6518       Long    simm64 = (Long)sx_to_64(uimm64, 21);
6519       IRExpr* cond   = NULL;
6520       if (is64) {
6521          cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
6522                       getIReg64orZR(rT), mkU64(0));
6523       } else {
6524          cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
6525                       getIReg32orZR(rT), mkU32(0));
6526       }
6527       stmt( IRStmt_Exit(cond,
6528                         Ijk_Boring,
6529                         IRConst_U64(guest_PC_curr_instr + simm64),
6530                         OFFB_PC) );
6531       putPC(mkU64(guest_PC_curr_instr + 4));
6532       dres->whatNext    = Dis_StopHere;
6533       dres->jk_StopHere = Ijk_Boring;
6534       DIP("cb%sz %s, 0x%llx\n",
6535           bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
6536           guest_PC_curr_instr + simm64);
6537       return True;
6538    }
6539 
6540    /* -------------------- TB{N}Z -------------------- */
6541    /* 31 30      24 23  18  5 4
6542       b5 011 011 1  b40 imm14 t  TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6543       b5 011 011 0  b40 imm14 t  TBZ  Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6544    */
6545    if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
6546       UInt    b5     = INSN(31,31);
6547       Bool    bIfZ   = INSN(24,24) == 0;
6548       UInt    b40    = INSN(23,19);
6549       UInt    imm14  = INSN(18,5);
6550       UInt    tt     = INSN(4,0);
6551       UInt    bitNo  = (b5 << 5) | b40;
6552       ULong   uimm64 = imm14 << 2;
6553       Long    simm64 = sx_to_64(uimm64, 16);
6554       IRExpr* cond
6555          = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
6556                  binop(Iop_And64,
6557                        binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
6558                        mkU64(1)),
6559                  mkU64(0));
6560       stmt( IRStmt_Exit(cond,
6561                         Ijk_Boring,
6562                         IRConst_U64(guest_PC_curr_instr + simm64),
6563                         OFFB_PC) );
6564       putPC(mkU64(guest_PC_curr_instr + 4));
6565       dres->whatNext    = Dis_StopHere;
6566       dres->jk_StopHere = Ijk_Boring;
6567       DIP("tb%sz %s, #%u, 0x%llx\n",
6568           bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
6569           guest_PC_curr_instr + simm64);
6570       return True;
6571    }
6572 
6573    /* -------------------- SVC -------------------- */
6574    /* 11010100 000 imm16 000 01
6575       Don't bother with anything except the imm16==0 case.
6576    */
6577    if (INSN(31,0) == 0xD4000001) {
6578       putPC(mkU64(guest_PC_curr_instr + 4));
6579       dres->whatNext    = Dis_StopHere;
6580       dres->jk_StopHere = Ijk_Sys_syscall;
6581       DIP("svc #0\n");
6582       return True;
6583    }
6584 
6585    /* ------------------ M{SR,RS} ------------------ */
6586    /* ---- Cases for TPIDR_EL0 ----
6587       0xD51BD0 010 Rt   MSR tpidr_el0, rT
6588       0xD53BD0 010 Rt   MRS rT, tpidr_el0
6589    */
6590    if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
6591        || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
6592       Bool toSys = INSN(21,21) == 0;
6593       UInt tt    = INSN(4,0);
6594       if (toSys) {
6595          stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
6596          DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
6597       } else {
6598          putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
6599          DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
6600       }
6601       return True;
6602    }
6603    /* ---- Cases for FPCR ----
6604       0xD51B44 000 Rt  MSR fpcr, rT
6605       0xD53B44 000 Rt  MSR rT, fpcr
6606    */
6607    if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
6608        || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
6609       Bool toSys = INSN(21,21) == 0;
6610       UInt tt    = INSN(4,0);
6611       if (toSys) {
6612          stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
6613          DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
6614       } else {
6615          putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
6616          DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
6617       }
6618       return True;
6619    }
6620    /* ---- Cases for FPSR ----
6621       0xD51B44 001 Rt  MSR fpsr, rT
6622       0xD53B44 001 Rt  MSR rT, fpsr
6623       The only part of this we model is FPSR.QC.  All other bits
6624       are ignored when writing to it and RAZ when reading from it.
6625    */
6626    if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
6627        || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
6628       Bool toSys = INSN(21,21) == 0;
6629       UInt tt    = INSN(4,0);
6630       if (toSys) {
6631          /* Just deal with FPSR.QC.  Make up a V128 value which is
6632             zero if Xt[27] is zero and any other value if Xt[27] is
6633             nonzero. */
6634          IRTemp qc64 = newTemp(Ity_I64);
6635          assign(qc64, binop(Iop_And64,
6636                             binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)),
6637                             mkU64(1)));
6638          IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64));
6639          stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) );
6640          DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
6641       } else {
6642          /* Generate a value which is all zeroes except for bit 27,
6643             which must be zero if QCFLAG is all zeroes and one otherwise. */
6644          IRTemp qcV128 = newTempV128();
6645          assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 ));
6646          IRTemp qc64 = newTemp(Ity_I64);
6647          assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)),
6648                                       unop(Iop_V128to64,   mkexpr(qcV128))));
6649          IRExpr* res = binop(Iop_Shl64,
6650                              unop(Iop_1Uto64,
6651                                   binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))),
6652                              mkU8(27));
6653          putIReg64orZR(tt, res);
6654          DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
6655       }
6656       return True;
6657    }
6658    /* ---- Cases for NZCV ----
6659       D51B42 000 Rt  MSR nzcv, rT
6660       D53B42 000 Rt  MRS rT, nzcv
6661       The only parts of NZCV that actually exist are bits 31:28, which
6662       are the N Z C and V bits themselves.  Hence the flags thunk provides
6663       all the state we need.
6664    */
6665    if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
6666        || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
6667       Bool  toSys = INSN(21,21) == 0;
6668       UInt  tt    = INSN(4,0);
6669       if (toSys) {
6670          IRTemp t = newTemp(Ity_I64);
6671          assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
6672          setFlags_COPY(t);
6673          DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
6674       } else {
6675          IRTemp res = newTemp(Ity_I64);
6676          assign(res, mk_arm64g_calculate_flags_nzcv());
6677          putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
6678          DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
6679       }
6680       return True;
6681    }
6682    /* ---- Cases for DCZID_EL0 ----
6683       Don't support arbitrary reads and writes to this register.  Just
6684       return the value 16, which indicates that the DC ZVA instruction
6685       is not permitted, so we don't have to emulate it.
6686       D5 3B 00 111 Rt  MRS rT, dczid_el0
6687    */
6688    if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
6689       UInt tt = INSN(4,0);
6690       putIReg64orZR(tt, mkU64(1<<4));
6691       DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
6692       return True;
6693    }
6694    /* ---- Cases for CTR_EL0 ----
6695       We just handle reads, and make up a value from the D and I line
6696       sizes in the VexArchInfo we are given, and patch in the following
6697       fields that the Foundation model gives ("natively"):
6698       CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
6699       D5 3B 00 001 Rt  MRS rT, dczid_el0
6700    */
6701    if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
6702       UInt tt = INSN(4,0);
6703       /* Need to generate a value from dMinLine_lg2_szB and
6704          dMinLine_lg2_szB.  The value in the register is in 32-bit
6705          units, so need to subtract 2 from the values in the
6706          VexArchInfo.  We can assume that the values here are valid --
6707          disInstr_ARM64 checks them -- so there's no need to deal with
6708          out-of-range cases. */
6709       vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
6710               && archinfo->arm64_dMinLine_lg2_szB <= 17
6711               && archinfo->arm64_iMinLine_lg2_szB >= 2
6712               && archinfo->arm64_iMinLine_lg2_szB <= 17);
6713       UInt val
6714          = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
6715                       | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
6716       putIReg64orZR(tt, mkU64(val));
6717       DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
6718       return True;
6719    }
6720    /* ---- Cases for CNTVCT_EL0 ----
6721       This is a timestamp counter of some sort.  Support reads of it only
6722       by passing through to the host.
6723       D5 3B E0 010 Rt  MRS Xt, cntvct_el0
6724    */
6725    if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
6726       UInt     tt   = INSN(4,0);
6727       IRTemp   val  = newTemp(Ity_I64);
6728       IRExpr** args = mkIRExprVec_0();
6729       IRDirty* d    = unsafeIRDirty_1_N (
6730                          val,
6731                          0/*regparms*/,
6732                          "arm64g_dirtyhelper_MRS_CNTVCT_EL0",
6733                          &arm64g_dirtyhelper_MRS_CNTVCT_EL0,
6734                          args
6735                       );
6736       /* execute the dirty call, dumping the result in val. */
6737       stmt( IRStmt_Dirty(d) );
6738       putIReg64orZR(tt, mkexpr(val));
6739       DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt));
6740       return True;
6741    }
6742 
6743    /* ------------------ IC_IVAU ------------------ */
6744    /* D5 0B 75 001 Rt  ic ivau, rT
6745    */
6746    if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
6747       /* We will always be provided with a valid iMinLine value. */
6748       vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
6749               && archinfo->arm64_iMinLine_lg2_szB <= 17);
6750       /* Round the requested address, in rT, down to the start of the
6751          containing block. */
6752       UInt   tt      = INSN(4,0);
6753       ULong  lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
6754       IRTemp addr    = newTemp(Ity_I64);
6755       assign( addr, binop( Iop_And64,
6756                            getIReg64orZR(tt),
6757                            mkU64(~(lineszB - 1))) );
6758       /* Set the invalidation range, request exit-and-invalidate, with
6759          continuation at the next instruction. */
6760       stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
6761       stmt(IRStmt_Put(OFFB_CMLEN,   mkU64(lineszB)));
6762       /* be paranoid ... */
6763       stmt( IRStmt_MBE(Imbe_Fence) );
6764       putPC(mkU64( guest_PC_curr_instr + 4 ));
6765       dres->whatNext    = Dis_StopHere;
6766       dres->jk_StopHere = Ijk_InvalICache;
6767       DIP("ic ivau, %s\n", nameIReg64orZR(tt));
6768       return True;
6769    }
6770 
6771    /* ------------------ DC_CVAU ------------------ */
6772    /* D5 0B 7B 001 Rt  dc cvau, rT
6773    */
6774    if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
6775       /* Exactly the same scheme as for IC IVAU, except we observe the
6776          dMinLine size, and request an Ijk_FlushDCache instead of
6777          Ijk_InvalICache. */
6778       /* We will always be provided with a valid dMinLine value. */
6779       vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
6780               && archinfo->arm64_dMinLine_lg2_szB <= 17);
6781       /* Round the requested address, in rT, down to the start of the
6782          containing block. */
6783       UInt   tt      = INSN(4,0);
6784       ULong  lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
6785       IRTemp addr    = newTemp(Ity_I64);
6786       assign( addr, binop( Iop_And64,
6787                            getIReg64orZR(tt),
6788                            mkU64(~(lineszB - 1))) );
6789       /* Set the flush range, request exit-and-flush, with
6790          continuation at the next instruction. */
6791       stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
6792       stmt(IRStmt_Put(OFFB_CMLEN,   mkU64(lineszB)));
6793       /* be paranoid ... */
6794       stmt( IRStmt_MBE(Imbe_Fence) );
6795       putPC(mkU64( guest_PC_curr_instr + 4 ));
6796       dres->whatNext    = Dis_StopHere;
6797       dres->jk_StopHere = Ijk_FlushDCache;
6798       DIP("dc cvau, %s\n", nameIReg64orZR(tt));
6799       return True;
6800    }
6801 
6802    /* ------------------ ISB, DMB, DSB ------------------ */
6803    /* 31          21            11  7 6  4
6804       11010 10100 0 00 011 0011 CRm 1 01 11111  DMB opt
6805       11010 10100 0 00 011 0011 CRm 1 00 11111  DSB opt
6806       11010 10100 0 00 011 0011 CRm 1 10 11111  ISB opt
6807    */
6808    if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0)
6809        && INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1)
6810        && INSN(7,7) == 1
6811        && INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) {
6812       UInt opc = INSN(6,5);
6813       UInt CRm = INSN(11,8);
6814       vassert(opc <= 2 && CRm <= 15);
6815       stmt(IRStmt_MBE(Imbe_Fence));
6816       const HChar* opNames[3]
6817          = { "dsb", "dmb", "isb" };
6818       const HChar* howNames[16]
6819          = { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh",
6820              "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" };
6821       DIP("%s %s\n", opNames[opc], howNames[CRm]);
6822       return True;
6823    }
6824 
6825    /* -------------------- NOP -------------------- */
6826    if (INSN(31,0) == 0xD503201F) {
6827       DIP("nop\n");
6828       return True;
6829    }
6830 
6831    /* -------------------- BRK -------------------- */
6832    /* 31        23  20    4
6833       1101 0100 001 imm16 00000  BRK #imm16
6834    */
6835    if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0)
6836        && INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) {
6837       UInt imm16 = INSN(20,5);
6838       /* Request SIGTRAP and then restart of this insn. */
6839       putPC(mkU64(guest_PC_curr_instr + 0));
6840       dres->whatNext    = Dis_StopHere;
6841       dres->jk_StopHere = Ijk_SigTRAP;
6842       DIP("brk #%u\n", imm16);
6843       return True;
6844    }
6845 
6846   //fail:
6847    vex_printf("ARM64 front end: branch_etc\n");
6848    return False;
6849 #  undef INSN
6850 }
6851 
6852 
6853 /*------------------------------------------------------------*/
6854 /*--- SIMD and FP instructions: helper functions           ---*/
6855 /*------------------------------------------------------------*/
6856 
6857 /* Some constructors for interleave/deinterleave expressions. */
6858 
mk_CatEvenLanes64x2(IRTemp a10,IRTemp b10)6859 static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) {
6860    // returns a0 b0
6861    return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10));
6862 }
6863 
mk_CatOddLanes64x2(IRTemp a10,IRTemp b10)6864 static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) {
6865    // returns a1 b1
6866    return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10));
6867 }
6868 
mk_CatEvenLanes32x4(IRTemp a3210,IRTemp b3210)6869 static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
6870    // returns a2 a0 b2 b0
6871    return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210));
6872 }
6873 
mk_CatOddLanes32x4(IRTemp a3210,IRTemp b3210)6874 static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
6875    // returns a3 a1 b3 b1
6876    return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210));
6877 }
6878 
mk_InterleaveLO32x4(IRTemp a3210,IRTemp b3210)6879 static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) {
6880    // returns a1 b1 a0 b0
6881    return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210));
6882 }
6883 
mk_InterleaveHI32x4(IRTemp a3210,IRTemp b3210)6884 static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) {
6885    // returns a3 b3 a2 b2
6886    return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210));
6887 }
6888 
mk_CatEvenLanes16x8(IRTemp a76543210,IRTemp b76543210)6889 static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6890    // returns a6 a4 a2 a0 b6 b4 b2 b0
6891    return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
6892 }
6893 
mk_CatOddLanes16x8(IRTemp a76543210,IRTemp b76543210)6894 static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6895    // returns a7 a5 a3 a1 b7 b5 b3 b1
6896    return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
6897 }
6898 
mk_InterleaveLO16x8(IRTemp a76543210,IRTemp b76543210)6899 static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6900    // returns a3 b3 a2 b2 a1 b1 a0 b0
6901    return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210));
6902 }
6903 
mk_InterleaveHI16x8(IRTemp a76543210,IRTemp b76543210)6904 static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6905    // returns a7 b7 a6 b6 a5 b5 a4 b4
6906    return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210));
6907 }
6908 
mk_CatEvenLanes8x16(IRTemp aFEDCBA9876543210,IRTemp bFEDCBA9876543210)6909 static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
6910                                      IRTemp bFEDCBA9876543210 ) {
6911    // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
6912    return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210),
6913                                       mkexpr(bFEDCBA9876543210));
6914 }
6915 
mk_CatOddLanes8x16(IRTemp aFEDCBA9876543210,IRTemp bFEDCBA9876543210)6916 static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
6917                                     IRTemp bFEDCBA9876543210 ) {
6918    // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
6919    return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210),
6920                                      mkexpr(bFEDCBA9876543210));
6921 }
6922 
mk_InterleaveLO8x16(IRTemp aFEDCBA9876543210,IRTemp bFEDCBA9876543210)6923 static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
6924                                      IRTemp bFEDCBA9876543210 ) {
6925    // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
6926    return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210),
6927                                       mkexpr(bFEDCBA9876543210));
6928 }
6929 
mk_InterleaveHI8x16(IRTemp aFEDCBA9876543210,IRTemp bFEDCBA9876543210)6930 static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
6931                                      IRTemp bFEDCBA9876543210 ) {
6932    // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
6933    return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210),
6934                                       mkexpr(bFEDCBA9876543210));
6935 }
6936 
6937 /* Generate N copies of |bit| in the bottom of a ULong. */
Replicate(ULong bit,Int N)6938 static ULong Replicate ( ULong bit, Int N )
6939 {
6940    vassert(bit <= 1 && N >= 1 && N < 64);
6941    if (bit == 0) {
6942       return 0;
6943     } else {
6944       /* Careful.  This won't work for N == 64. */
6945       return (1ULL << N) - 1;
6946    }
6947 }
6948 
Replicate32x2(ULong bits32)6949 static ULong Replicate32x2 ( ULong bits32 )
6950 {
6951    vassert(0 == (bits32 & ~0xFFFFFFFFULL));
6952    return (bits32 << 32) | bits32;
6953 }
6954 
Replicate16x4(ULong bits16)6955 static ULong Replicate16x4 ( ULong bits16 )
6956 {
6957    vassert(0 == (bits16 & ~0xFFFFULL));
6958    return Replicate32x2((bits16 << 16) | bits16);
6959 }
6960 
Replicate8x8(ULong bits8)6961 static ULong Replicate8x8 ( ULong bits8 )
6962 {
6963    vassert(0 == (bits8 & ~0xFFULL));
6964    return Replicate16x4((bits8 << 8) | bits8);
6965 }
6966 
6967 /* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
6968    |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
6969    is 64.  In the former case, the upper 32 bits of the returned value
6970    are guaranteed to be zero. */
VFPExpandImm(ULong imm8,Int N)6971 static ULong VFPExpandImm ( ULong imm8, Int N )
6972 {
6973    vassert(imm8 <= 0xFF);
6974    vassert(N == 32 || N == 64);
6975    Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
6976    Int F = N - E - 1;
6977    ULong imm8_6 = (imm8 >> 6) & 1;
6978    /* sign: 1 bit */
6979    /* exp:  E bits */
6980    /* frac: F bits */
6981    ULong sign = (imm8 >> 7) & 1;
6982    ULong exp  = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
6983    ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
6984    vassert(sign < (1ULL << 1));
6985    vassert(exp  < (1ULL << E));
6986    vassert(frac < (1ULL << F));
6987    vassert(1 + E + F == N);
6988    ULong res = (sign << (E+F)) | (exp << F) | frac;
6989    return res;
6990 }
6991 
6992 /* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
6993    This might fail, as indicated by the returned Bool.  Page 2530 of
6994    the manual. */
AdvSIMDExpandImm(ULong * res,UInt op,UInt cmode,UInt imm8)6995 static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
6996                                UInt op, UInt cmode, UInt imm8 )
6997 {
6998    vassert(op <= 1);
6999    vassert(cmode <= 15);
7000    vassert(imm8 <= 255);
7001 
7002    *res = 0; /* will overwrite iff returning True */
7003 
7004    ULong imm64    = 0;
7005    Bool  testimm8 = False;
7006 
7007    switch (cmode >> 1) {
7008       case 0:
7009          testimm8 = False; imm64 = Replicate32x2(imm8); break;
7010       case 1:
7011          testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
7012       case 2:
7013          testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
7014       case 3:
7015          testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
7016       case 4:
7017           testimm8 = False; imm64 = Replicate16x4(imm8); break;
7018       case 5:
7019           testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
7020       case 6:
7021           testimm8 = True;
7022           if ((cmode & 1) == 0)
7023               imm64 = Replicate32x2((imm8 << 8) | 0xFF);
7024           else
7025               imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
7026           break;
7027       case 7:
7028          testimm8 = False;
7029          if ((cmode & 1) == 0 && op == 0)
7030              imm64 = Replicate8x8(imm8);
7031          if ((cmode & 1) == 0 && op == 1) {
7032              imm64 = 0;   imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
7033              imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
7034              imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
7035              imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
7036              imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
7037              imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
7038              imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
7039              imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
7040          }
7041          if ((cmode & 1) == 1 && op == 0) {
7042             ULong imm8_7  = (imm8 >> 7) & 1;
7043             ULong imm8_6  = (imm8 >> 6) & 1;
7044             ULong imm8_50 = imm8 & 63;
7045             ULong imm32 = (imm8_7                 << (1 + 5 + 6 + 19))
7046                           | ((imm8_6 ^ 1)         << (5 + 6 + 19))
7047                           | (Replicate(imm8_6, 5) << (6 + 19))
7048                           | (imm8_50              << 19);
7049             imm64 = Replicate32x2(imm32);
7050          }
7051          if ((cmode & 1) == 1 && op == 1) {
7052             // imm64 = imm8<7>:NOT(imm8<6>)
7053             //                :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
7054             ULong imm8_7  = (imm8 >> 7) & 1;
7055             ULong imm8_6  = (imm8 >> 6) & 1;
7056             ULong imm8_50 = imm8 & 63;
7057             imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
7058                     | (Replicate(imm8_6, 8) << 54)
7059                     | (imm8_50 << 48);
7060          }
7061          break;
7062       default:
7063         vassert(0);
7064    }
7065 
7066    if (testimm8 && imm8 == 0)
7067       return False;
7068 
7069    *res = imm64;
7070    return True;
7071 }
7072 
7073 /* Help a bit for decoding laneage for vector operations that can be
7074    of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
7075    and SZ bits, typically for vector floating point. */
getLaneInfo_Q_SZ(IRType * tyI,IRType * tyF,UInt * nLanes,Bool * zeroUpper,const HChar ** arrSpec,Bool bitQ,Bool bitSZ)7076 static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI,  /*OUT*/IRType* tyF,
7077                                /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
7078                                /*OUT*/const HChar** arrSpec,
7079                                Bool bitQ, Bool bitSZ )
7080 {
7081    vassert(bitQ == True || bitQ == False);
7082    vassert(bitSZ == True || bitSZ == False);
7083    if (bitQ && bitSZ) { // 2x64
7084       if (tyI)       *tyI       = Ity_I64;
7085       if (tyF)       *tyF       = Ity_F64;
7086       if (nLanes)    *nLanes    = 2;
7087       if (zeroUpper) *zeroUpper = False;
7088       if (arrSpec)   *arrSpec   = "2d";
7089       return True;
7090    }
7091    if (bitQ && !bitSZ) { // 4x32
7092       if (tyI)       *tyI       = Ity_I32;
7093       if (tyF)       *tyF       = Ity_F32;
7094       if (nLanes)    *nLanes    = 4;
7095       if (zeroUpper) *zeroUpper = False;
7096       if (arrSpec)   *arrSpec   = "4s";
7097       return True;
7098    }
7099    if (!bitQ && !bitSZ) { // 2x32
7100       if (tyI)       *tyI       = Ity_I32;
7101       if (tyF)       *tyF       = Ity_F32;
7102       if (nLanes)    *nLanes    = 2;
7103       if (zeroUpper) *zeroUpper = True;
7104       if (arrSpec)   *arrSpec   = "2s";
7105       return True;
7106    }
7107    // Else impliedly 1x64, which isn't allowed.
7108    return False;
7109 }
7110 
7111 /* Helper for decoding laneage for shift-style vector operations
7112    that involve an immediate shift amount. */
getLaneInfo_IMMH_IMMB(UInt * shift,UInt * szBlg2,UInt immh,UInt immb)7113 static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
7114                                     UInt immh, UInt immb )
7115 {
7116    vassert(immh < (1<<4));
7117    vassert(immb < (1<<3));
7118    UInt immhb = (immh << 3) | immb;
7119    if (immh & 8) {
7120       if (shift)  *shift  = 128 - immhb;
7121       if (szBlg2) *szBlg2 = 3;
7122       return True;
7123    }
7124    if (immh & 4) {
7125       if (shift)  *shift  = 64 - immhb;
7126       if (szBlg2) *szBlg2 = 2;
7127       return True;
7128    }
7129    if (immh & 2) {
7130       if (shift)  *shift  = 32 - immhb;
7131       if (szBlg2) *szBlg2 = 1;
7132       return True;
7133    }
7134    if (immh & 1) {
7135       if (shift)  *shift  = 16 - immhb;
7136       if (szBlg2) *szBlg2 = 0;
7137       return True;
7138    }
7139    return False;
7140 }
7141 
7142 /* Generate IR to fold all lanes of the V128 value in 'src' as
7143    characterised by the operator 'op', and return the result in the
7144    bottom bits of a V128, with all other bits set to zero. */
math_FOLDV(IRTemp src,IROp op)7145 static IRTemp math_FOLDV ( IRTemp src, IROp op )
7146 {
7147    /* The basic idea is to use repeated applications of Iop_CatEven*
7148       and Iop_CatOdd* operators to 'src' so as to clone each lane into
7149       a complete vector.  Then fold all those vectors with 'op' and
7150       zero out all but the least significant lane. */
7151    switch (op) {
7152       case Iop_Min8Sx16: case Iop_Min8Ux16:
7153       case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: {
7154          /* NB: temp naming here is misleading -- the naming is for 8
7155             lanes of 16 bit, whereas what is being operated on is 16
7156             lanes of 8 bits. */
7157          IRTemp x76543210 = src;
7158          IRTemp x76547654 = newTempV128();
7159          IRTemp x32103210 = newTempV128();
7160          assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7161          assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
7162          IRTemp x76767676 = newTempV128();
7163          IRTemp x54545454 = newTempV128();
7164          IRTemp x32323232 = newTempV128();
7165          IRTemp x10101010 = newTempV128();
7166          assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7167          assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7168          assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7169          assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
7170          IRTemp x77777777 = newTempV128();
7171          IRTemp x66666666 = newTempV128();
7172          IRTemp x55555555 = newTempV128();
7173          IRTemp x44444444 = newTempV128();
7174          IRTemp x33333333 = newTempV128();
7175          IRTemp x22222222 = newTempV128();
7176          IRTemp x11111111 = newTempV128();
7177          IRTemp x00000000 = newTempV128();
7178          assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7179          assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7180          assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7181          assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7182          assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7183          assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7184          assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7185          assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7186          /* Naming not misleading after here. */
7187          IRTemp xAllF = newTempV128();
7188          IRTemp xAllE = newTempV128();
7189          IRTemp xAllD = newTempV128();
7190          IRTemp xAllC = newTempV128();
7191          IRTemp xAllB = newTempV128();
7192          IRTemp xAllA = newTempV128();
7193          IRTemp xAll9 = newTempV128();
7194          IRTemp xAll8 = newTempV128();
7195          IRTemp xAll7 = newTempV128();
7196          IRTemp xAll6 = newTempV128();
7197          IRTemp xAll5 = newTempV128();
7198          IRTemp xAll4 = newTempV128();
7199          IRTemp xAll3 = newTempV128();
7200          IRTemp xAll2 = newTempV128();
7201          IRTemp xAll1 = newTempV128();
7202          IRTemp xAll0 = newTempV128();
7203          assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
7204          assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
7205          assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
7206          assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
7207          assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
7208          assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
7209          assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
7210          assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
7211          assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
7212          assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
7213          assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
7214          assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
7215          assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
7216          assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
7217          assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
7218          assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
7219          IRTemp maxFE = newTempV128();
7220          IRTemp maxDC = newTempV128();
7221          IRTemp maxBA = newTempV128();
7222          IRTemp max98 = newTempV128();
7223          IRTemp max76 = newTempV128();
7224          IRTemp max54 = newTempV128();
7225          IRTemp max32 = newTempV128();
7226          IRTemp max10 = newTempV128();
7227          assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
7228          assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
7229          assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
7230          assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
7231          assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
7232          assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
7233          assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
7234          assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
7235          IRTemp maxFEDC = newTempV128();
7236          IRTemp maxBA98 = newTempV128();
7237          IRTemp max7654 = newTempV128();
7238          IRTemp max3210 = newTempV128();
7239          assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
7240          assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
7241          assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7242          assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7243          IRTemp maxFEDCBA98 = newTempV128();
7244          IRTemp max76543210 = newTempV128();
7245          assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
7246          assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
7247          IRTemp maxAllLanes = newTempV128();
7248          assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
7249                                        mkexpr(max76543210)));
7250          IRTemp res = newTempV128();
7251          assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
7252          return res;
7253       }
7254       case Iop_Min16Sx8: case Iop_Min16Ux8:
7255       case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: {
7256          IRTemp x76543210 = src;
7257          IRTemp x76547654 = newTempV128();
7258          IRTemp x32103210 = newTempV128();
7259          assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7260          assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
7261          IRTemp x76767676 = newTempV128();
7262          IRTemp x54545454 = newTempV128();
7263          IRTemp x32323232 = newTempV128();
7264          IRTemp x10101010 = newTempV128();
7265          assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7266          assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7267          assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7268          assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
7269          IRTemp x77777777 = newTempV128();
7270          IRTemp x66666666 = newTempV128();
7271          IRTemp x55555555 = newTempV128();
7272          IRTemp x44444444 = newTempV128();
7273          IRTemp x33333333 = newTempV128();
7274          IRTemp x22222222 = newTempV128();
7275          IRTemp x11111111 = newTempV128();
7276          IRTemp x00000000 = newTempV128();
7277          assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7278          assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7279          assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7280          assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7281          assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7282          assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7283          assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7284          assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7285          IRTemp max76 = newTempV128();
7286          IRTemp max54 = newTempV128();
7287          IRTemp max32 = newTempV128();
7288          IRTemp max10 = newTempV128();
7289          assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
7290          assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
7291          assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
7292          assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
7293          IRTemp max7654 = newTempV128();
7294          IRTemp max3210 = newTempV128();
7295          assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7296          assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7297          IRTemp max76543210 = newTempV128();
7298          assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
7299          IRTemp res = newTempV128();
7300          assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
7301          return res;
7302       }
7303       case Iop_Max32Fx4: case Iop_Min32Fx4:
7304       case Iop_Min32Sx4: case Iop_Min32Ux4:
7305       case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
7306          IRTemp x3210 = src;
7307          IRTemp x3232 = newTempV128();
7308          IRTemp x1010 = newTempV128();
7309          assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
7310          assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
7311          IRTemp x3333 = newTempV128();
7312          IRTemp x2222 = newTempV128();
7313          IRTemp x1111 = newTempV128();
7314          IRTemp x0000 = newTempV128();
7315          assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
7316          assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
7317          assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
7318          assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
7319          IRTemp max32 = newTempV128();
7320          IRTemp max10 = newTempV128();
7321          assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
7322          assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
7323          IRTemp max3210 = newTempV128();
7324          assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7325          IRTemp res = newTempV128();
7326          assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
7327          return res;
7328       }
7329       case Iop_Add64x2: {
7330          IRTemp x10 = src;
7331          IRTemp x00 = newTempV128();
7332          IRTemp x11 = newTempV128();
7333          assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10)));
7334          assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10)));
7335          IRTemp max10 = newTempV128();
7336          assign(max10, binop(op, mkexpr(x11), mkexpr(x00)));
7337          IRTemp res = newTempV128();
7338          assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10)));
7339          return res;
7340       }
7341       default:
7342          vassert(0);
7343    }
7344 }
7345 
7346 
7347 /* Generate IR for TBL and TBX.  This deals with the 128 bit case
7348    only. */
math_TBL_TBX(IRTemp tab[4],UInt len,IRTemp src,IRTemp oor_values)7349 static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
7350                              IRTemp oor_values )
7351 {
7352    vassert(len >= 0 && len <= 3);
7353 
7354    /* Generate some useful constants as concisely as possible. */
7355    IRTemp half15 = newTemp(Ity_I64);
7356    assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
7357    IRTemp half16 = newTemp(Ity_I64);
7358    assign(half16, mkU64(0x1010101010101010ULL));
7359 
7360    /* A zero vector */
7361    IRTemp allZero = newTempV128();
7362    assign(allZero, mkV128(0x0000));
7363    /* A vector containing 15 in each 8-bit lane */
7364    IRTemp all15 = newTempV128();
7365    assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
7366    /* A vector containing 16 in each 8-bit lane */
7367    IRTemp all16 = newTempV128();
7368    assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
7369    /* A vector containing 32 in each 8-bit lane */
7370    IRTemp all32 = newTempV128();
7371    assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
7372    /* A vector containing 48 in each 8-bit lane */
7373    IRTemp all48 = newTempV128();
7374    assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
7375    /* A vector containing 64 in each 8-bit lane */
7376    IRTemp all64 = newTempV128();
7377    assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
7378 
7379    /* Group the 16/32/48/64 vectors so as to be indexable. */
7380    IRTemp allXX[4] = { all16, all32, all48, all64 };
7381 
7382    /* Compute the result for each table vector, with zeroes in places
7383       where the index values are out of range, and OR them into the
7384       running vector. */
7385    IRTemp running_result = newTempV128();
7386    assign(running_result, mkV128(0));
7387 
7388    UInt tabent;
7389    for (tabent = 0; tabent <= len; tabent++) {
7390       vassert(tabent >= 0 && tabent < 4);
7391       IRTemp bias = newTempV128();
7392       assign(bias,
7393              mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
7394       IRTemp biased_indices = newTempV128();
7395       assign(biased_indices,
7396              binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
7397       IRTemp valid_mask = newTempV128();
7398       assign(valid_mask,
7399              binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
7400       IRTemp safe_biased_indices = newTempV128();
7401       assign(safe_biased_indices,
7402              binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
7403       IRTemp results_or_junk = newTempV128();
7404       assign(results_or_junk,
7405              binop(Iop_Perm8x16, mkexpr(tab[tabent]),
7406                                  mkexpr(safe_biased_indices)));
7407       IRTemp results_or_zero = newTempV128();
7408       assign(results_or_zero,
7409              binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
7410       /* And OR that into the running result. */
7411       IRTemp tmp = newTempV128();
7412       assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
7413                         mkexpr(running_result)));
7414       running_result = tmp;
7415    }
7416 
7417    /* So now running_result holds the overall result where the indices
7418       are in range, and zero in out-of-range lanes.  Now we need to
7419       compute an overall validity mask and use this to copy in the
7420       lanes in the oor_values for out of range indices.  This is
7421       unnecessary for TBL but will get folded out by iropt, so we lean
7422       on that and generate the same code for TBL and TBX here. */
7423    IRTemp overall_valid_mask = newTempV128();
7424    assign(overall_valid_mask,
7425           binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
7426    IRTemp result = newTempV128();
7427    assign(result,
7428           binop(Iop_OrV128,
7429                 mkexpr(running_result),
7430                 binop(Iop_AndV128,
7431                       mkexpr(oor_values),
7432                       unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
7433    return result;
7434 }
7435 
7436 
7437 /* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
7438    an op which takes two I64s and produces a V128.  That is, a widening
7439    operator.  Generate IR which applies |opI64x2toV128| to either the
7440    lower (if |is2| is False) or upper (if |is2| is True) halves of
7441    |argL| and |argR|, and return the value in a new IRTemp.
7442 */
7443 static
math_BINARY_WIDENING_V128(Bool is2,IROp opI64x2toV128,IRExpr * argL,IRExpr * argR)7444 IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
7445                                    IRExpr* argL, IRExpr* argR )
7446 {
7447    IRTemp res   = newTempV128();
7448    IROp   slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
7449    assign(res, binop(opI64x2toV128, unop(slice, argL),
7450                                     unop(slice, argR)));
7451    return res;
7452 }
7453 
7454 
7455 /* Generate signed/unsigned absolute difference vector IR. */
7456 static
math_ABD(Bool isU,UInt size,IRExpr * argLE,IRExpr * argRE)7457 IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE )
7458 {
7459    vassert(size <= 3);
7460    IRTemp argL = newTempV128();
7461    IRTemp argR = newTempV128();
7462    IRTemp msk  = newTempV128();
7463    IRTemp res  = newTempV128();
7464    assign(argL, argLE);
7465    assign(argR, argRE);
7466    assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size),
7467                      mkexpr(argL), mkexpr(argR)));
7468    assign(res,
7469           binop(Iop_OrV128,
7470                 binop(Iop_AndV128,
7471                       binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)),
7472                       mkexpr(msk)),
7473                 binop(Iop_AndV128,
7474                       binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)),
7475                       unop(Iop_NotV128, mkexpr(msk)))));
7476    return res;
7477 }
7478 
7479 
7480 /* Generate IR that takes a V128 and sign- or zero-widens
7481    either the lower or upper set of lanes to twice-as-wide,
7482    resulting in a new V128 value. */
7483 static
math_WIDEN_LO_OR_HI_LANES(Bool zWiden,Bool fromUpperHalf,UInt sizeNarrow,IRExpr * srcE)7484 IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf,
7485                                    UInt sizeNarrow, IRExpr* srcE )
7486 {
7487    IRTemp src = newTempV128();
7488    IRTemp res = newTempV128();
7489    assign(src, srcE);
7490    switch (sizeNarrow) {
7491       case X10:
7492          assign(res,
7493                 binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2,
7494                       binop(fromUpperHalf ? Iop_InterleaveHI32x4
7495                                           : Iop_InterleaveLO32x4,
7496                             mkexpr(src),
7497                             mkexpr(src)),
7498                       mkU8(32)));
7499          break;
7500       case X01:
7501          assign(res,
7502                 binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4,
7503                       binop(fromUpperHalf ? Iop_InterleaveHI16x8
7504                                           : Iop_InterleaveLO16x8,
7505                             mkexpr(src),
7506                             mkexpr(src)),
7507                       mkU8(16)));
7508          break;
7509       case X00:
7510          assign(res,
7511                 binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8,
7512                       binop(fromUpperHalf ? Iop_InterleaveHI8x16
7513                                           : Iop_InterleaveLO8x16,
7514                             mkexpr(src),
7515                             mkexpr(src)),
7516                       mkU8(8)));
7517          break;
7518       default:
7519          vassert(0);
7520    }
7521    return res;
7522 }
7523 
7524 
7525 /* Generate IR that takes a V128 and sign- or zero-widens
7526    either the even or odd lanes to twice-as-wide,
7527    resulting in a new V128 value. */
7528 static
math_WIDEN_EVEN_OR_ODD_LANES(Bool zWiden,Bool fromOdd,UInt sizeNarrow,IRExpr * srcE)7529 IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd,
7530                                       UInt sizeNarrow, IRExpr* srcE )
7531 {
7532    IRTemp src   = newTempV128();
7533    IRTemp res   = newTempV128();
7534    IROp   opSAR = mkVecSARN(sizeNarrow+1);
7535    IROp   opSHR = mkVecSHRN(sizeNarrow+1);
7536    IROp   opSHL = mkVecSHLN(sizeNarrow+1);
7537    IROp   opSxR = zWiden ? opSHR : opSAR;
7538    UInt   amt   = 0;
7539    switch (sizeNarrow) {
7540       case X10: amt = 32; break;
7541       case X01: amt = 16; break;
7542       case X00: amt = 8;  break;
7543       default: vassert(0);
7544    }
7545    assign(src, srcE);
7546    if (fromOdd) {
7547       assign(res, binop(opSxR, mkexpr(src), mkU8(amt)));
7548    } else {
7549       assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)),
7550                                mkU8(amt)));
7551    }
7552    return res;
7553 }
7554 
7555 
7556 /* Generate IR that takes two V128s and narrows (takes lower half)
7557    of each lane, producing a single V128 value. */
7558 static
math_NARROW_LANES(IRTemp argHi,IRTemp argLo,UInt sizeNarrow)7559 IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow )
7560 {
7561    IRTemp res = newTempV128();
7562    assign(res, binop(mkVecCATEVENLANES(sizeNarrow),
7563                      mkexpr(argHi), mkexpr(argLo)));
7564    return res;
7565 }
7566 
7567 
7568 /* Return a temp which holds the vector dup of the lane of width
7569    (1 << size) obtained from src[laneNo]. */
7570 static
math_DUP_VEC_ELEM(IRExpr * src,UInt size,UInt laneNo)7571 IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo )
7572 {
7573    vassert(size <= 3);
7574    /* Normalise |laneNo| so it is of the form
7575       x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
7576       This puts the bits we want to inspect at constant offsets
7577       regardless of the value of |size|.
7578    */
7579    UInt ix = laneNo << size;
7580    vassert(ix <= 15);
7581    IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID };
7582    switch (size) {
7583       case 0: /* B */
7584          ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16;
7585          /* fallthrough */
7586       case 1: /* H */
7587          ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8;
7588          /* fallthrough */
7589       case 2: /* S */
7590          ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4;
7591          /* fallthrough */
7592       case 3: /* D */
7593          ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2;
7594          break;
7595       default:
7596          vassert(0);
7597    }
7598    IRTemp res = newTempV128();
7599    assign(res, src);
7600    Int i;
7601    for (i = 3; i >= 0; i--) {
7602       if (ops[i] == Iop_INVALID)
7603          break;
7604       IRTemp tmp = newTempV128();
7605       assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res)));
7606       res = tmp;
7607    }
7608    return res;
7609 }
7610 
7611 
7612 /* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size
7613    selector encoded as shown below.  Return a new V128 holding the
7614    selected lane from |srcV| dup'd out to V128, and also return the
7615    lane number, log2 of the lane size in bytes, and width-character via
7616    *laneNo, *laneSzLg2 and *laneCh respectively.  It may be that imm5
7617    is an invalid selector, in which case return
7618    IRTemp_INVALID, 0, 0 and '?' respectively.
7619 
7620    imm5 = xxxx1   signifies .b[xxxx]
7621         = xxx10   .h[xxx]
7622         = xx100   .s[xx]
7623         = x1000   .d[x]
7624         otherwise invalid
7625 */
7626 static
handle_DUP_VEC_ELEM(UInt * laneNo,UInt * laneSzLg2,HChar * laneCh,IRExpr * srcV,UInt imm5)7627 IRTemp handle_DUP_VEC_ELEM ( /*OUT*/UInt* laneNo,
7628                              /*OUT*/UInt* laneSzLg2, /*OUT*/HChar* laneCh,
7629                              IRExpr* srcV, UInt imm5 )
7630 {
7631    *laneNo    = 0;
7632    *laneSzLg2 = 0;
7633    *laneCh    = '?';
7634 
7635    if (imm5 & 1) {
7636       *laneNo    = (imm5 >> 1) & 15;
7637       *laneSzLg2 = 0;
7638       *laneCh    = 'b';
7639    }
7640    else if (imm5 & 2) {
7641       *laneNo    = (imm5 >> 2) & 7;
7642       *laneSzLg2 = 1;
7643       *laneCh    = 'h';
7644    }
7645    else if (imm5 & 4) {
7646       *laneNo    = (imm5 >> 3) & 3;
7647       *laneSzLg2 = 2;
7648       *laneCh    = 's';
7649    }
7650    else if (imm5 & 8) {
7651       *laneNo    = (imm5 >> 4) & 1;
7652       *laneSzLg2 = 3;
7653       *laneCh    = 'd';
7654    }
7655    else {
7656       /* invalid */
7657       return IRTemp_INVALID;
7658    }
7659 
7660    return math_DUP_VEC_ELEM(srcV, *laneSzLg2, *laneNo);
7661 }
7662 
7663 
7664 /* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */
7665 static
math_VEC_DUP_IMM(UInt size,ULong imm)7666 IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm )
7667 {
7668    IRType ty  = Ity_INVALID;
7669    IRTemp rcS = IRTemp_INVALID;
7670    switch (size) {
7671       case X01:
7672          vassert(imm <= 0xFFFFULL);
7673          ty  = Ity_I16;
7674          rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm ));
7675          break;
7676       case X10:
7677          vassert(imm <= 0xFFFFFFFFULL);
7678          ty  = Ity_I32;
7679          rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm ));
7680          break;
7681       case X11:
7682          ty  = Ity_I64;
7683          rcS = newTemp(ty); assign(rcS, mkU64(imm)); break;
7684       default:
7685          vassert(0);
7686    }
7687    IRTemp rcV = math_DUP_TO_V128(rcS, ty);
7688    return rcV;
7689 }
7690 
7691 
7692 /* Let |new64| be a V128 in which only the lower 64 bits are interesting,
7693    and the upper can contain any value -- it is ignored.  If |is2| is False,
7694    generate IR to put |new64| in the lower half of vector reg |dd| and zero
7695    the upper half.  If |is2| is True, generate IR to put |new64| in the upper
7696    half of vector reg |dd| and leave the lower half unchanged.  This
7697    simulates the behaviour of the "foo/foo2" instructions in which the
7698    destination is half the width of sources, for example addhn/addhn2.
7699 */
7700 static
putLO64andZUorPutHI64(Bool is2,UInt dd,IRTemp new64)7701 void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 )
7702 {
7703    if (is2) {
7704       /* Get the old contents of Vdd, zero the upper half, and replace
7705          it with 'x'. */
7706       IRTemp t_zero_oldLO = newTempV128();
7707       assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
7708       IRTemp t_newHI_zero = newTempV128();
7709       assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64),
7710                                                        mkV128(0x0000)));
7711       IRTemp res = newTempV128();
7712       assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO),
7713                                     mkexpr(t_newHI_zero)));
7714       putQReg128(dd, mkexpr(res));
7715    } else {
7716       /* This is simple. */
7717       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64)));
7718    }
7719 }
7720 
7721 
7722 /* Compute vector SQABS at lane size |size| for |srcE|, returning
7723    the q result in |*qabs| and the normal result in |*nabs|. */
7724 static
math_SQABS(IRTemp * qabs,IRTemp * nabs,IRExpr * srcE,UInt size)7725 void math_SQABS ( /*OUT*/IRTemp* qabs, /*OUT*/IRTemp* nabs,
7726                   IRExpr* srcE, UInt size )
7727 {
7728       IRTemp src, mask, maskn, nsub, qsub;
7729       src = mask = maskn = nsub = qsub = IRTemp_INVALID;
7730       newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs);
7731       assign(src,   srcE);
7732       assign(mask,  binop(mkVecCMPGTS(size),  mkV128(0x0000), mkexpr(src)));
7733       assign(maskn, unop(Iop_NotV128, mkexpr(mask)));
7734       assign(nsub,  binop(mkVecSUB(size),   mkV128(0x0000), mkexpr(src)));
7735       assign(qsub,  binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
7736       assign(*nabs, binop(Iop_OrV128,
7737                           binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)),
7738                           binop(Iop_AndV128, mkexpr(src),  mkexpr(maskn))));
7739       assign(*qabs, binop(Iop_OrV128,
7740                           binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)),
7741                           binop(Iop_AndV128, mkexpr(src),  mkexpr(maskn))));
7742 }
7743 
7744 
7745 /* Compute vector SQNEG at lane size |size| for |srcE|, returning
7746    the q result in |*qneg| and the normal result in |*nneg|. */
7747 static
math_SQNEG(IRTemp * qneg,IRTemp * nneg,IRExpr * srcE,UInt size)7748 void math_SQNEG ( /*OUT*/IRTemp* qneg, /*OUT*/IRTemp* nneg,
7749                   IRExpr* srcE, UInt size )
7750 {
7751       IRTemp src = IRTemp_INVALID;
7752       newTempsV128_3(&src, nneg, qneg);
7753       assign(src,   srcE);
7754       assign(*nneg, binop(mkVecSUB(size),   mkV128(0x0000), mkexpr(src)));
7755       assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
7756 }
7757 
7758 
7759 /* Zero all except the least significant lane of |srcE|, where |size|
7760    indicates the lane size in the usual way. */
math_ZERO_ALL_EXCEPT_LOWEST_LANE(UInt size,IRExpr * srcE)7761 static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
7762 {
7763    vassert(size < 4);
7764    IRTemp t = newTempV128();
7765    assign(t, unop(mkVecZEROHIxxOFV128(size), srcE));
7766    return t;
7767 }
7768 
7769 
7770 /* Generate IR to compute vector widening MULL from either the lower
7771    (is2==False) or upper (is2==True) halves of vecN and vecM.  The
7772    widening multiplies are unsigned when isU==True and signed when
7773    isU==False.  |size| is the narrow lane size indication.  Optionally,
7774    the product may be added to or subtracted from vecD, at the wide lane
7775    size.  This happens when |mas| is 'a' (add) or 's' (sub).  When |mas|
7776    is 'm' (only multiply) then the accumulate part does not happen, and
7777    |vecD| is expected to == IRTemp_INVALID.
7778 
7779    Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
7780    are allowed.  The result is returned in a new IRTemp, which is
7781    returned in *res. */
7782 static
math_MULL_ACC(IRTemp * res,Bool is2,Bool isU,UInt size,HChar mas,IRTemp vecN,IRTemp vecM,IRTemp vecD)7783 void math_MULL_ACC ( /*OUT*/IRTemp* res,
7784                      Bool is2, Bool isU, UInt size, HChar mas,
7785                      IRTemp vecN, IRTemp vecM, IRTemp vecD )
7786 {
7787    vassert(res && *res == IRTemp_INVALID);
7788    vassert(size <= 2);
7789    vassert(mas == 'm' || mas == 'a' || mas == 's');
7790    if (mas == 'm') vassert(vecD == IRTemp_INVALID);
7791    IROp   mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size);
7792    IROp   accOp = (mas == 'a') ? mkVecADD(size+1)
7793                   : (mas == 's' ? mkVecSUB(size+1)
7794                   : Iop_INVALID);
7795    IRTemp mul   = math_BINARY_WIDENING_V128(is2, mulOp,
7796                                             mkexpr(vecN), mkexpr(vecM));
7797    *res = newTempV128();
7798    assign(*res, mas == 'm' ? mkexpr(mul)
7799                            : binop(accOp, mkexpr(vecD), mkexpr(mul)));
7800 }
7801 
7802 
7803 /* Same as math_MULL_ACC, except the multiply is signed widening,
7804    the multiplied value is then doubled, before being added to or
7805    subtracted from the accumulated value.  And everything is
7806    saturated.  In all cases, saturation residuals are returned
7807    via (sat1q, sat1n), and in the accumulate cases,
7808    via (sat2q, sat2n) too.  All results are returned in new temporaries.
7809    In the no-accumulate case, *sat2q and *sat2n are never instantiated,
7810    so the caller can tell this has happened. */
7811 static
math_SQDMULL_ACC(IRTemp * res,IRTemp * sat1q,IRTemp * sat1n,IRTemp * sat2q,IRTemp * sat2n,Bool is2,UInt size,HChar mas,IRTemp vecN,IRTemp vecM,IRTemp vecD)7812 void math_SQDMULL_ACC ( /*OUT*/IRTemp* res,
7813                         /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
7814                         /*OUT*/IRTemp* sat2q, /*OUT*/IRTemp* sat2n,
7815                         Bool is2, UInt size, HChar mas,
7816                         IRTemp vecN, IRTemp vecM, IRTemp vecD )
7817 {
7818    vassert(size <= 2);
7819    vassert(mas == 'm' || mas == 'a' || mas == 's');
7820    /* Compute
7821          sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2
7822          sat1n = vecN.D[is2] *s  vecM.d[is2] *  2
7823       IOW take either the low or high halves of vecN and vecM, signed widen,
7824       multiply, double that, and signedly saturate.  Also compute the same
7825       but without saturation.
7826    */
7827    vassert(sat2q && *sat2q == IRTemp_INVALID);
7828    vassert(sat2n && *sat2n == IRTemp_INVALID);
7829    newTempsV128_3(sat1q, sat1n, res);
7830    IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size),
7831                                          mkexpr(vecN), mkexpr(vecM));
7832    IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size),
7833                                          mkexpr(vecN), mkexpr(vecM));
7834    assign(*sat1q, mkexpr(tq));
7835    assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn)));
7836 
7837    /* If there is no accumulation, the final result is sat1q,
7838       and there's no assignment to sat2q or sat2n. */
7839    if (mas == 'm') {
7840       assign(*res, mkexpr(*sat1q));
7841       return;
7842    }
7843 
7844    /* Compute
7845          sat2q  = vecD +sq/-sq sat1q
7846          sat2n  = vecD +/-     sat1n
7847          result = sat2q
7848    */
7849    newTempsV128_2(sat2q, sat2n);
7850    assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1),
7851                         mkexpr(vecD), mkexpr(*sat1q)));
7852    assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1),
7853                         mkexpr(vecD), mkexpr(*sat1n)));
7854    assign(*res, mkexpr(*sat2q));
7855 }
7856 
7857 
7858 /* Generate IR for widening signed vector multiplies.  The operands
7859    have their lane width signedly widened, and they are then multiplied
7860    at the wider width, returning results in two new IRTemps. */
7861 static
math_MULLS(IRTemp * resHI,IRTemp * resLO,UInt sizeNarrow,IRTemp argL,IRTemp argR)7862 void math_MULLS ( /*OUT*/IRTemp* resHI, /*OUT*/IRTemp* resLO,
7863                   UInt sizeNarrow, IRTemp argL, IRTemp argR )
7864 {
7865    vassert(sizeNarrow <= 2);
7866    newTempsV128_2(resHI, resLO);
7867    IRTemp argLhi = newTemp(Ity_I64);
7868    IRTemp argLlo = newTemp(Ity_I64);
7869    IRTemp argRhi = newTemp(Ity_I64);
7870    IRTemp argRlo = newTemp(Ity_I64);
7871    assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL)));
7872    assign(argLlo, unop(Iop_V128to64,   mkexpr(argL)));
7873    assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR)));
7874    assign(argRlo, unop(Iop_V128to64,   mkexpr(argR)));
7875    IROp opMulls = mkVecMULLS(sizeNarrow);
7876    assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi)));
7877    assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo)));
7878 }
7879 
7880 
7881 /* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
7882    double that, possibly add a rounding constant (R variants), and take
7883    the high half. */
7884 static
math_SQDMULH(IRTemp * res,IRTemp * sat1q,IRTemp * sat1n,Bool isR,UInt size,IRTemp vN,IRTemp vM)7885 void math_SQDMULH ( /*OUT*/IRTemp* res,
7886                     /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
7887                     Bool isR, UInt size, IRTemp vN, IRTemp vM )
7888 {
7889    vassert(size == X01 || size == X10); /* s or h only */
7890 
7891    newTempsV128_3(res, sat1q, sat1n);
7892 
7893    IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID;
7894    math_MULLS(&mullsHI, &mullsLO, size, vN, vM);
7895 
7896    IRTemp addWide = mkVecADD(size+1);
7897 
7898    if (isR) {
7899       assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM)));
7900 
7901       Int    rcShift    = size == X01 ? 15 : 31;
7902       IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift);
7903       assign(*sat1n,
7904              binop(mkVecCATODDLANES(size),
7905                    binop(addWide,
7906                          binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
7907                          mkexpr(roundConst)),
7908                    binop(addWide,
7909                          binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)),
7910                          mkexpr(roundConst))));
7911    } else {
7912       assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM)));
7913 
7914       assign(*sat1n,
7915              binop(mkVecCATODDLANES(size),
7916                    binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
7917                    binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO))));
7918    }
7919 
7920    assign(*res, mkexpr(*sat1q));
7921 }
7922 
7923 
7924 /* Generate IR for SQSHL, UQSHL, SQSHLU by imm.  Put the result in
7925    a new temp in *res, and the Q difference pair in new temps in
7926    *qDiff1 and *qDiff2 respectively.  |nm| denotes which of the
7927    three operations it is. */
7928 static
math_QSHL_IMM(IRTemp * res,IRTemp * qDiff1,IRTemp * qDiff2,IRTemp src,UInt size,UInt shift,const HChar * nm)7929 void math_QSHL_IMM ( /*OUT*/IRTemp* res,
7930                      /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2,
7931                      IRTemp src, UInt size, UInt shift, const HChar* nm )
7932 {
7933    vassert(size <= 3);
7934    UInt laneBits = 8 << size;
7935    vassert(shift < laneBits);
7936    newTempsV128_3(res, qDiff1, qDiff2);
7937    IRTemp z128 = newTempV128();
7938    assign(z128, mkV128(0x0000));
7939 
7940    /* UQSHL */
7941    if (vex_streq(nm, "uqshl")) {
7942       IROp qop = mkVecQSHLNSATUU(size);
7943       assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
7944       if (shift == 0) {
7945          /* No shift means no saturation. */
7946          assign(*qDiff1, mkexpr(z128));
7947          assign(*qDiff2, mkexpr(z128));
7948       } else {
7949          /* Saturation has occurred if any of the shifted-out bits are
7950             nonzero.  We get the shifted-out bits by right-shifting the
7951             original value. */
7952          UInt rshift = laneBits - shift;
7953          vassert(rshift >= 1 && rshift < laneBits);
7954          assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
7955          assign(*qDiff2, mkexpr(z128));
7956       }
7957       return;
7958    }
7959 
7960    /* SQSHL */
7961    if (vex_streq(nm, "sqshl")) {
7962       IROp qop = mkVecQSHLNSATSS(size);
7963       assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
7964       if (shift == 0) {
7965          /* No shift means no saturation. */
7966          assign(*qDiff1, mkexpr(z128));
7967          assign(*qDiff2, mkexpr(z128));
7968       } else {
7969          /* Saturation has occurred if any of the shifted-out bits are
7970             different from the top bit of the original value. */
7971          UInt rshift = laneBits - 1 - shift;
7972          vassert(rshift >= 0 && rshift < laneBits-1);
7973          /* qDiff1 is the shifted out bits, and the top bit of the original
7974             value, preceded by zeroes. */
7975          assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
7976          /* qDiff2 is the top bit of the original value, cloned the
7977             correct number of times. */
7978          assign(*qDiff2, binop(mkVecSHRN(size),
7979                                binop(mkVecSARN(size), mkexpr(src),
7980                                                       mkU8(laneBits-1)),
7981                                mkU8(rshift)));
7982          /* This also succeeds in comparing the top bit of the original
7983             value to itself, which is a bit stupid, but not wrong. */
7984       }
7985       return;
7986    }
7987 
7988    /* SQSHLU */
7989    if (vex_streq(nm, "sqshlu")) {
7990       IROp qop = mkVecQSHLNSATSU(size);
7991       assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
7992       if (shift == 0) {
7993          /* If there's no shift, saturation depends on the top bit
7994             of the source. */
7995          assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1)));
7996          assign(*qDiff2, mkexpr(z128));
7997       } else {
7998          /* Saturation has occurred if any of the shifted-out bits are
7999             nonzero.  We get the shifted-out bits by right-shifting the
8000             original value. */
8001          UInt rshift = laneBits - shift;
8002          vassert(rshift >= 1 && rshift < laneBits);
8003          assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8004          assign(*qDiff2, mkexpr(z128));
8005       }
8006       return;
8007    }
8008 
8009    vassert(0);
8010 }
8011 
8012 
8013 /* Generate IR to do SRHADD and URHADD. */
8014 static
math_RHADD(UInt size,Bool isU,IRTemp aa,IRTemp bb)8015 IRTemp math_RHADD ( UInt size, Bool isU, IRTemp aa, IRTemp bb )
8016 {
8017    /* Generate this:
8018       (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1)
8019    */
8020    vassert(size <= 3);
8021    IROp opSHR = isU ? mkVecSHRN(size) : mkVecSARN(size);
8022    IROp opADD = mkVecADD(size);
8023    /* The only tricky bit is to generate the correct vector 1 constant. */
8024    const ULong ones64[4]
8025       = { 0x0101010101010101ULL, 0x0001000100010001ULL,
8026           0x0000000100000001ULL, 0x0000000000000001ULL };
8027    IRTemp imm64 = newTemp(Ity_I64);
8028    assign(imm64, mkU64(ones64[size]));
8029    IRTemp vecOne = newTempV128();
8030    assign(vecOne, binop(Iop_64HLtoV128, mkexpr(imm64), mkexpr(imm64)));
8031    IRTemp scaOne = newTemp(Ity_I8);
8032    assign(scaOne, mkU8(1));
8033    IRTemp res = newTempV128();
8034    assign(res,
8035           binop(opADD,
8036                 binop(opSHR, mkexpr(aa), mkexpr(scaOne)),
8037                 binop(opADD,
8038                       binop(opSHR, mkexpr(bb), mkexpr(scaOne)),
8039                       binop(opSHR,
8040                             binop(opADD,
8041                                   binop(opADD,
8042                                         binop(Iop_AndV128, mkexpr(aa),
8043                                                            mkexpr(vecOne)),
8044                                         binop(Iop_AndV128, mkexpr(bb),
8045                                                            mkexpr(vecOne))
8046                                   ),
8047                                   mkexpr(vecOne)
8048                             ),
8049                             mkexpr(scaOne)
8050                       )
8051                 )
8052           )
8053    );
8054    return res;
8055 }
8056 
8057 
8058 /* QCFLAG tracks the SIMD sticky saturation status.  Update the status
8059    thusly: if, after application of |opZHI| to both |qres| and |nres|,
8060    they have the same value, leave QCFLAG unchanged.  Otherwise, set it
8061    (implicitly) to 1.  |opZHI| may only be one of the Iop_ZeroHIxxofV128
8062    operators, or Iop_INVALID, in which case |qres| and |nres| are used
8063    unmodified.  The presence |opZHI| means this function can be used to
8064    generate QCFLAG update code for both scalar and vector SIMD operations.
8065 */
8066 static
updateQCFLAGwithDifferenceZHI(IRTemp qres,IRTemp nres,IROp opZHI)8067 void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI )
8068 {
8069    IRTemp diff      = newTempV128();
8070    IRTemp oldQCFLAG = newTempV128();
8071    IRTemp newQCFLAG = newTempV128();
8072    if (opZHI == Iop_INVALID) {
8073       assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)));
8074    } else {
8075       vassert(opZHI == Iop_ZeroHI64ofV128
8076               || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128);
8077       assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))));
8078    }
8079    assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128));
8080    assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff)));
8081    stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG)));
8082 }
8083 
8084 
8085 /* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres|
8086    are used unmodified, hence suitable for QCFLAG updates for whole-vector
8087    operations. */
8088 static
updateQCFLAGwithDifference(IRTemp qres,IRTemp nres)8089 void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres )
8090 {
8091    updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID);
8092 }
8093 
8094 
8095 /* Generate IR to rearrange two vector values in a way which is useful
8096    for doing S/D add-pair etc operations.  There are 3 cases:
8097 
8098    2d:  [m1 m0] [n1 n0]  -->  [m1 n1] [m0 n0]
8099 
8100    4s:  [m3 m2 m1 m0] [n3 n2 n1 n0]  -->  [m3 m1 n3 n1] [m2 m0 n2 n0]
8101 
8102    2s:  [m2 m2 m1 m0] [n3 n2 n1 n0]  -->  [0 0 m1 n1] [0 0 m0 n0]
8103 
8104    The cases are distinguished as follows:
8105    isD == True,  bitQ == 1  =>  2d
8106    isD == False, bitQ == 1  =>  4s
8107    isD == False, bitQ == 0  =>  2s
8108 */
8109 static
math_REARRANGE_FOR_FLOATING_PAIRWISE(IRTemp * rearrL,IRTemp * rearrR,IRTemp vecM,IRTemp vecN,Bool isD,UInt bitQ)8110 void math_REARRANGE_FOR_FLOATING_PAIRWISE (
8111         /*OUT*/IRTemp* rearrL, /*OUT*/IRTemp* rearrR,
8112         IRTemp vecM, IRTemp vecN, Bool isD, UInt bitQ
8113      )
8114 {
8115    vassert(rearrL && *rearrL == IRTemp_INVALID);
8116    vassert(rearrR && *rearrR == IRTemp_INVALID);
8117    *rearrL = newTempV128();
8118    *rearrR = newTempV128();
8119    if (isD) {
8120       // 2d case
8121       vassert(bitQ == 1);
8122       assign(*rearrL, binop(Iop_InterleaveHI64x2, mkexpr(vecM), mkexpr(vecN)));
8123       assign(*rearrR, binop(Iop_InterleaveLO64x2, mkexpr(vecM), mkexpr(vecN)));
8124    }
8125    else if (!isD && bitQ == 1) {
8126       // 4s case
8127       assign(*rearrL, binop(Iop_CatOddLanes32x4,  mkexpr(vecM), mkexpr(vecN)));
8128       assign(*rearrR, binop(Iop_CatEvenLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8129    } else {
8130       // 2s case
8131       vassert(!isD && bitQ == 0);
8132       IRTemp m1n1m0n0 = newTempV128();
8133       IRTemp m0n0m1n1 = newTempV128();
8134       assign(m1n1m0n0, binop(Iop_InterleaveLO32x4,
8135                              mkexpr(vecM), mkexpr(vecN)));
8136       assign(m0n0m1n1, triop(Iop_SliceV128,
8137                              mkexpr(m1n1m0n0), mkexpr(m1n1m0n0), mkU8(8)));
8138       assign(*rearrL, unop(Iop_ZeroHI64ofV128, mkexpr(m1n1m0n0)));
8139       assign(*rearrR, unop(Iop_ZeroHI64ofV128, mkexpr(m0n0m1n1)));
8140    }
8141 }
8142 
8143 
8144 /* Returns 2.0 ^ (-n) for n in 1 .. 64 */
two_to_the_minus(Int n)8145 static Double two_to_the_minus ( Int n )
8146 {
8147    if (n == 1) return 0.5;
8148    vassert(n >= 2 && n <= 64);
8149    Int half = n / 2;
8150    return two_to_the_minus(half) * two_to_the_minus(n - half);
8151 }
8152 
8153 
8154 /* Returns 2.0 ^ n for n in 1 .. 64 */
two_to_the_plus(Int n)8155 static Double two_to_the_plus ( Int n )
8156 {
8157    if (n == 1) return 2.0;
8158    vassert(n >= 2 && n <= 64);
8159    Int half = n / 2;
8160    return two_to_the_plus(half) * two_to_the_plus(n - half);
8161 }
8162 
8163 
8164 /*------------------------------------------------------------*/
8165 /*--- SIMD and FP instructions                             ---*/
8166 /*------------------------------------------------------------*/
8167 
8168 static
dis_AdvSIMD_EXT(DisResult * dres,UInt insn)8169 Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn)
8170 {
8171    /* 31  29     23  21 20 15 14   10 9 4
8172       0 q 101110 op2 0  m  0  imm4 0  n d
8173       Decode fields: op2
8174    */
8175 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8176    if (INSN(31,31) != 0
8177        || INSN(29,24) != BITS6(1,0,1,1,1,0)
8178        || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
8179       return False;
8180    }
8181    UInt bitQ = INSN(30,30);
8182    UInt op2  = INSN(23,22);
8183    UInt mm   = INSN(20,16);
8184    UInt imm4 = INSN(14,11);
8185    UInt nn   = INSN(9,5);
8186    UInt dd   = INSN(4,0);
8187 
8188    if (op2 == BITS2(0,0)) {
8189       /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
8190       IRTemp sHi = newTempV128();
8191       IRTemp sLo = newTempV128();
8192       IRTemp res = newTempV128();
8193       assign(sHi, getQReg128(mm));
8194       assign(sLo, getQReg128(nn));
8195       if (bitQ == 1) {
8196          if (imm4 == 0) {
8197             assign(res, mkexpr(sLo));
8198          } else {
8199             vassert(imm4 >= 1 && imm4 <= 15);
8200             assign(res, triop(Iop_SliceV128,
8201                               mkexpr(sHi), mkexpr(sLo), mkU8(imm4)));
8202          }
8203          putQReg128(dd, mkexpr(res));
8204          DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
8205       } else {
8206          if (imm4 >= 8) return False;
8207          if (imm4 == 0) {
8208             assign(res, mkexpr(sLo));
8209          } else {
8210             vassert(imm4 >= 1 && imm4 <= 7);
8211             IRTemp hi64lo64 = newTempV128();
8212             assign(hi64lo64, binop(Iop_InterleaveLO64x2,
8213                                    mkexpr(sHi), mkexpr(sLo)));
8214             assign(res, triop(Iop_SliceV128,
8215                               mkexpr(hi64lo64), mkexpr(hi64lo64), mkU8(imm4)));
8216          }
8217          putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
8218          DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
8219       }
8220       return True;
8221    }
8222 
8223    return False;
8224 #  undef INSN
8225 }
8226 
8227 
8228 static
dis_AdvSIMD_TBL_TBX(DisResult * dres,UInt insn)8229 Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn)
8230 {
8231    /* 31  29     23  21 20 15 14  12 11 9 4
8232       0 q 001110 op2 0  m  0  len op 00 n d
8233       Decode fields: op2,len,op
8234    */
8235 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8236    if (INSN(31,31) != 0
8237        || INSN(29,24) != BITS6(0,0,1,1,1,0)
8238        || INSN(21,21) != 0
8239        || INSN(15,15) != 0
8240        || INSN(11,10) != BITS2(0,0)) {
8241       return False;
8242    }
8243    UInt bitQ  = INSN(30,30);
8244    UInt op2   = INSN(23,22);
8245    UInt mm    = INSN(20,16);
8246    UInt len   = INSN(14,13);
8247    UInt bitOP = INSN(12,12);
8248    UInt nn    = INSN(9,5);
8249    UInt dd    = INSN(4,0);
8250 
8251    if (op2 == X00) {
8252       /* -------- 00,xx,0 TBL, xx register table -------- */
8253       /* -------- 00,xx,1 TBX, xx register table -------- */
8254       /* 31  28        20 15 14  12  9 4
8255          0q0 01110 000 m  0  len 000 n d  TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8256          0q0 01110 000 m  0  len 100 n d  TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8257          where Ta = 16b(q=1) or 8b(q=0)
8258       */
8259       Bool isTBX = bitOP == 1;
8260       /* The out-of-range values to use. */
8261       IRTemp oor_values = newTempV128();
8262       assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
8263       /* src value */
8264       IRTemp src = newTempV128();
8265       assign(src, getQReg128(mm));
8266       /* The table values */
8267       IRTemp tab[4];
8268       UInt   i;
8269       for (i = 0; i <= len; i++) {
8270          vassert(i < 4);
8271          tab[i] = newTempV128();
8272          assign(tab[i], getQReg128((nn + i) % 32));
8273       }
8274       IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
8275       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8276       const HChar* Ta = bitQ ==1 ? "16b" : "8b";
8277       const HChar* nm = isTBX ? "tbx" : "tbl";
8278       DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n",
8279           nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
8280       return True;
8281    }
8282 
8283 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8284    return False;
8285 #  undef INSN
8286 }
8287 
8288 
8289 static
dis_AdvSIMD_ZIP_UZP_TRN(DisResult * dres,UInt insn)8290 Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn)
8291 {
8292    /* 31  29     23   21 20 15 14     11 9 4
8293       0 q 001110 size 0  m  0  opcode 10 n d
8294       Decode fields: opcode
8295    */
8296 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8297    if (INSN(31,31) != 0
8298        || INSN(29,24) != BITS6(0,0,1,1,1,0)
8299        || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) {
8300       return False;
8301    }
8302    UInt bitQ   = INSN(30,30);
8303    UInt size   = INSN(23,22);
8304    UInt mm     = INSN(20,16);
8305    UInt opcode = INSN(14,12);
8306    UInt nn     = INSN(9,5);
8307    UInt dd     = INSN(4,0);
8308 
8309    if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) {
8310       /* -------- 001 UZP1 std7_std7_std7 -------- */
8311       /* -------- 101 UZP2 std7_std7_std7 -------- */
8312       if (bitQ == 0 && size == X11) return False; // implied 1d case
8313       Bool   isUZP1 = opcode == BITS3(0,0,1);
8314       IROp   op     = isUZP1 ? mkVecCATEVENLANES(size)
8315                              : mkVecCATODDLANES(size);
8316       IRTemp preL = newTempV128();
8317       IRTemp preR = newTempV128();
8318       IRTemp res  = newTempV128();
8319       if (bitQ == 0) {
8320          assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm),
8321                                                   getQReg128(nn)));
8322          assign(preR, mkexpr(preL));
8323       } else {
8324          assign(preL, getQReg128(mm));
8325          assign(preR, getQReg128(nn));
8326       }
8327       assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8328       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8329       const HChar* nm  = isUZP1 ? "uzp1" : "uzp2";
8330       const HChar* arr = nameArr_Q_SZ(bitQ, size);
8331       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8332           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8333       return True;
8334    }
8335 
8336    if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) {
8337       /* -------- 010 TRN1 std7_std7_std7 -------- */
8338       /* -------- 110 TRN2 std7_std7_std7 -------- */
8339       if (bitQ == 0 && size == X11) return False; // implied 1d case
8340       Bool   isTRN1 = opcode == BITS3(0,1,0);
8341       IROp   op1    = isTRN1 ? mkVecCATEVENLANES(size)
8342                              : mkVecCATODDLANES(size);
8343       IROp op2 = mkVecINTERLEAVEHI(size);
8344       IRTemp srcM = newTempV128();
8345       IRTemp srcN = newTempV128();
8346       IRTemp res  = newTempV128();
8347       assign(srcM, getQReg128(mm));
8348       assign(srcN, getQReg128(nn));
8349       assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)),
8350                              binop(op1, mkexpr(srcN), mkexpr(srcN))));
8351       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8352       const HChar* nm  = isTRN1 ? "trn1" : "trn2";
8353       const HChar* arr = nameArr_Q_SZ(bitQ, size);
8354       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8355           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8356       return True;
8357    }
8358 
8359    if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) {
8360       /* -------- 011 ZIP1 std7_std7_std7 -------- */
8361       /* -------- 111 ZIP2 std7_std7_std7 -------- */
8362       if (bitQ == 0 && size == X11) return False; // implied 1d case
8363       Bool   isZIP1 = opcode == BITS3(0,1,1);
8364       IROp   op     = isZIP1 ? mkVecINTERLEAVELO(size)
8365                              : mkVecINTERLEAVEHI(size);
8366       IRTemp preL = newTempV128();
8367       IRTemp preR = newTempV128();
8368       IRTemp res  = newTempV128();
8369       if (bitQ == 0 && !isZIP1) {
8370          IRTemp z128 = newTempV128();
8371          assign(z128, mkV128(0x0000));
8372          // preL = Vm shifted left 32 bits
8373          // preR = Vn shifted left 32 bits
8374          assign(preL, triop(Iop_SliceV128,
8375                             getQReg128(mm), mkexpr(z128), mkU8(12)));
8376          assign(preR, triop(Iop_SliceV128,
8377                             getQReg128(nn), mkexpr(z128), mkU8(12)));
8378 
8379       } else {
8380          assign(preL, getQReg128(mm));
8381          assign(preR, getQReg128(nn));
8382       }
8383       assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8384       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8385       const HChar* nm  = isZIP1 ? "zip1" : "zip2";
8386       const HChar* arr = nameArr_Q_SZ(bitQ, size);
8387       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8388           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8389       return True;
8390    }
8391 
8392    return False;
8393 #  undef INSN
8394 }
8395 
8396 
8397 static
dis_AdvSIMD_across_lanes(DisResult * dres,UInt insn)8398 Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn)
8399 {
8400    /* 31    28    23   21    16     11 9 4
8401       0 q u 01110 size 11000 opcode 10 n d
8402       Decode fields: u,size,opcode
8403    */
8404 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8405    if (INSN(31,31) != 0
8406        || INSN(28,24) != BITS5(0,1,1,1,0)
8407        || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) {
8408       return False;
8409    }
8410    UInt bitQ   = INSN(30,30);
8411    UInt bitU   = INSN(29,29);
8412    UInt size   = INSN(23,22);
8413    UInt opcode = INSN(16,12);
8414    UInt nn     = INSN(9,5);
8415    UInt dd     = INSN(4,0);
8416 
8417    if (opcode == BITS5(0,0,0,1,1)) {
8418       /* -------- 0,xx,00011 SADDLV -------- */
8419       /* -------- 1,xx,00011 UADDLV -------- */
8420       /* size is the narrow size */
8421       if (size == X11 || (size == X10 && bitQ == 0)) return False;
8422       Bool   isU = bitU == 1;
8423       IRTemp src = newTempV128();
8424       assign(src, getQReg128(nn));
8425       /* The basic plan is to widen the lower half, and if Q = 1,
8426          the upper half too.  Add them together (if Q = 1), and in
8427          either case fold with add at twice the lane width.
8428       */
8429       IRExpr* widened
8430          = mkexpr(math_WIDEN_LO_OR_HI_LANES(
8431                      isU, False/*!fromUpperHalf*/, size, mkexpr(src)));
8432       if (bitQ == 1) {
8433          widened
8434             = binop(mkVecADD(size+1),
8435                     widened,
8436                     mkexpr(math_WIDEN_LO_OR_HI_LANES(
8437                               isU, True/*fromUpperHalf*/, size, mkexpr(src)))
8438               );
8439       }
8440       /* Now fold. */
8441       IRTemp tWi = newTempV128();
8442       assign(tWi, widened);
8443       IRTemp res = math_FOLDV(tWi, mkVecADD(size+1));
8444       putQReg128(dd, mkexpr(res));
8445       const HChar* arr = nameArr_Q_SZ(bitQ, size);
8446       const HChar  ch  = "bhsd"[size];
8447       DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv",
8448           nameQReg128(dd), ch, nameQReg128(nn), arr);
8449       return True;
8450    }
8451 
8452    UInt ix = 0;
8453    /**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; }
8454    else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; }
8455    else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; }
8456    /**/
8457    if (ix != 0) {
8458       /* -------- 0,xx,01010: SMAXV -------- (1) */
8459       /* -------- 1,xx,01010: UMAXV -------- (2) */
8460       /* -------- 0,xx,11010: SMINV -------- (3) */
8461       /* -------- 1,xx,11010: UMINV -------- (4) */
8462       /* -------- 0,xx,11011: ADDV  -------- (5) */
8463       vassert(ix >= 1 && ix <= 5);
8464       if (size == X11) return False; // 1d,2d cases not allowed
8465       if (size == X10 && bitQ == 0) return False; // 2s case not allowed
8466       const IROp opMAXS[3]
8467          = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
8468       const IROp opMAXU[3]
8469          = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
8470       const IROp opMINS[3]
8471          = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
8472       const IROp opMINU[3]
8473          = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
8474       const IROp opADD[3]
8475          = { Iop_Add8x16,  Iop_Add16x8,  Iop_Add32x4 };
8476       vassert(size < 3);
8477       IROp op = Iop_INVALID;
8478       const HChar* nm = NULL;
8479       switch (ix) {
8480          case 1: op = opMAXS[size]; nm = "smaxv"; break;
8481          case 2: op = opMAXU[size]; nm = "umaxv"; break;
8482          case 3: op = opMINS[size]; nm = "sminv"; break;
8483          case 4: op = opMINU[size]; nm = "uminv"; break;
8484          case 5: op = opADD[size];  nm = "addv";  break;
8485          default: vassert(0);
8486       }
8487       vassert(op != Iop_INVALID && nm != NULL);
8488       IRTemp tN1 = newTempV128();
8489       assign(tN1, getQReg128(nn));
8490       /* If Q == 0, we're just folding lanes in the lower half of
8491          the value.  In which case, copy the lower half of the
8492          source into the upper half, so we can then treat it the
8493          same as the full width case.  Except for the addition case,
8494          in which we have to zero out the upper half. */
8495       IRTemp tN2 = newTempV128();
8496       assign(tN2, bitQ == 0
8497                      ? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1))
8498                                 : mk_CatEvenLanes64x2(tN1,tN1))
8499                      : mkexpr(tN1));
8500       IRTemp res = math_FOLDV(tN2, op);
8501       if (res == IRTemp_INVALID)
8502          return False; /* means math_FOLDV
8503                           doesn't handle this case yet */
8504       putQReg128(dd, mkexpr(res));
8505       const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
8506       IRType laneTy = tys[size];
8507       const HChar* arr = nameArr_Q_SZ(bitQ, size);
8508       DIP("%s %s, %s.%s\n", nm,
8509           nameQRegLO(dd, laneTy), nameQReg128(nn), arr);
8510       return True;
8511    }
8512 
8513    if ((size == X00 || size == X10)
8514        && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
8515       /* -------- 0,00,01100: FMAXMNV s_4s -------- */
8516       /* -------- 0,10,01100: FMINMNV s_4s -------- */
8517       /* -------- 1,00,01111: FMAXV   s_4s -------- */
8518       /* -------- 1,10,01111: FMINV   s_4s -------- */
8519       /* FMAXNM, FMINNM: FIXME -- KLUDGED */
8520       if (bitQ == 0) return False; // Only 4s is allowed
8521       Bool   isMIN = (size & 2) == 2;
8522       Bool   isNM  = opcode == BITS5(0,1,1,0,0);
8523       IROp   opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(2);
8524       IRTemp src = newTempV128();
8525       assign(src, getQReg128(nn));
8526       IRTemp res = math_FOLDV(src, opMXX);
8527       putQReg128(dd, mkexpr(res));
8528       DIP("%s%sv s%u, %u.4s\n",
8529           isMIN ? "fmin" : "fmax", isNM ? "nm" : "", dd, nn);
8530       return True;
8531    }
8532 
8533 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8534    return False;
8535 #  undef INSN
8536 }
8537 
8538 
8539 static
dis_AdvSIMD_copy(DisResult * dres,UInt insn)8540 Bool dis_AdvSIMD_copy(/*MB_OUT*/DisResult* dres, UInt insn)
8541 {
8542    /* 31     28       20   15 14   10 9 4
8543       0 q op 01110000 imm5 0  imm4 1  n d
8544       Decode fields: q,op,imm4
8545    */
8546 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8547    if (INSN(31,31) != 0
8548        || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
8549        || INSN(15,15) != 0 || INSN(10,10) != 1) {
8550       return False;
8551    }
8552    UInt bitQ  = INSN(30,30);
8553    UInt bitOP = INSN(29,29);
8554    UInt imm5  = INSN(20,16);
8555    UInt imm4  = INSN(14,11);
8556    UInt nn    = INSN(9,5);
8557    UInt dd    = INSN(4,0);
8558 
8559    /* -------- x,0,0000: DUP (element, vector) -------- */
8560    /* 31  28       20   15     9 4
8561       0q0 01110000 imm5 000001 n d  DUP Vd.T, Vn.Ts[index]
8562    */
8563    if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
8564       UInt   laneNo    = 0;
8565       UInt   laneSzLg2 = 0;
8566       HChar  laneCh    = '?';
8567       IRTemp res       = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh,
8568                                              getQReg128(nn), imm5);
8569       if (res == IRTemp_INVALID)
8570          return False;
8571       if (bitQ == 0 && laneSzLg2 == X11)
8572          return False; /* .1d case */
8573       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8574       const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2);
8575       DIP("dup %s.%s, %s.%c[%u]\n",
8576            nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo);
8577       return True;
8578    }
8579 
8580    /* -------- x,0,0001: DUP (general, vector) -------- */
8581    /* 31  28       20   15       9 4
8582       0q0 01110000 imm5 0 0001 1 n d  DUP Vd.T, Rn
8583       Q=0 writes 64, Q=1 writes 128
8584       imm5: xxxx1  8B(q=0)      or 16b(q=1),     R=W
8585             xxx10  4H(q=0)      or 8H(q=1),      R=W
8586             xx100  2S(q=0)      or 4S(q=1),      R=W
8587             x1000  Invalid(q=0) or 2D(q=1),      R=X
8588             x0000  Invalid(q=0) or Invalid(q=1)
8589       Require op=0, imm4=0001
8590    */
8591    if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) {
8592       Bool   isQ = bitQ == 1;
8593       IRTemp w0  = newTemp(Ity_I64);
8594       const HChar* arT = "??";
8595       IRType laneTy = Ity_INVALID;
8596       if (imm5 & 1) {
8597          arT    = isQ ? "16b" : "8b";
8598          laneTy = Ity_I8;
8599          assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
8600       }
8601       else if (imm5 & 2) {
8602          arT    = isQ ? "8h" : "4h";
8603          laneTy = Ity_I16;
8604          assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
8605       }
8606       else if (imm5 & 4) {
8607          arT    = isQ ? "4s" : "2s";
8608          laneTy = Ity_I32;
8609          assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
8610       }
8611       else if ((imm5 & 8) && isQ) {
8612          arT    = "2d";
8613          laneTy = Ity_I64;
8614          assign(w0, getIReg64orZR(nn));
8615       }
8616       else {
8617          /* invalid; leave laneTy unchanged. */
8618       }
8619       /* */
8620       if (laneTy != Ity_INVALID) {
8621          IRTemp w1 = math_DUP_TO_64(w0, laneTy);
8622          putQReg128(dd, binop(Iop_64HLtoV128,
8623                               isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
8624          DIP("dup %s.%s, %s\n",
8625              nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
8626          return True;
8627       }
8628       /* invalid */
8629       return False;
8630    }
8631 
8632    /* -------- 1,0,0011: INS (general) -------- */
8633    /* 31  28       20   15     9 4
8634       010 01110000 imm5 000111 n d  INS Vd.Ts[ix], Rn
8635       where Ts,ix = case imm5 of xxxx1 -> B, xxxx
8636                                  xxx10 -> H, xxx
8637                                  xx100 -> S, xx
8638                                  x1000 -> D, x
8639    */
8640    if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) {
8641       HChar   ts     = '?';
8642       UInt    laneNo = 16;
8643       IRExpr* src    = NULL;
8644       if (imm5 & 1) {
8645          src    = unop(Iop_64to8, getIReg64orZR(nn));
8646          laneNo = (imm5 >> 1) & 15;
8647          ts     = 'b';
8648       }
8649       else if (imm5 & 2) {
8650          src    = unop(Iop_64to16, getIReg64orZR(nn));
8651          laneNo = (imm5 >> 2) & 7;
8652          ts     = 'h';
8653       }
8654       else if (imm5 & 4) {
8655          src    = unop(Iop_64to32, getIReg64orZR(nn));
8656          laneNo = (imm5 >> 3) & 3;
8657          ts     = 's';
8658       }
8659       else if (imm5 & 8) {
8660          src    = getIReg64orZR(nn);
8661          laneNo = (imm5 >> 4) & 1;
8662          ts     = 'd';
8663       }
8664       /* */
8665       if (src) {
8666          vassert(laneNo < 16);
8667          putQRegLane(dd, laneNo, src);
8668          DIP("ins %s.%c[%u], %s\n",
8669              nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
8670          return True;
8671       }
8672       /* invalid */
8673       return False;
8674    }
8675 
8676    /* -------- x,0,0101: SMOV -------- */
8677    /* -------- x,0,0111: UMOV -------- */
8678    /* 31  28        20   15     9 4
8679       0q0 01110 000 imm5 001111 n d  UMOV Xd/Wd, Vn.Ts[index]
8680       0q0 01110 000 imm5 001011 n d  SMOV Xd/Wd, Vn.Ts[index]
8681       dest is Xd when q==1, Wd when q==0
8682       UMOV:
8683          Ts,index,ops = case q:imm5 of
8684                           0:xxxx1 -> B, xxxx, 8Uto64
8685                           1:xxxx1 -> invalid
8686                           0:xxx10 -> H, xxx,  16Uto64
8687                           1:xxx10 -> invalid
8688                           0:xx100 -> S, xx,   32Uto64
8689                           1:xx100 -> invalid
8690                           1:x1000 -> D, x,    copy64
8691                           other   -> invalid
8692       SMOV:
8693          Ts,index,ops = case q:imm5 of
8694                           0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
8695                           1:xxxx1 -> B, xxxx, 8Sto64
8696                           0:xxx10 -> H, xxx,  (32Uto64 . 16Sto32)
8697                           1:xxx10 -> H, xxx,  16Sto64
8698                           0:xx100 -> invalid
8699                           1:xx100 -> S, xx,   32Sto64
8700                           1:x1000 -> invalid
8701                           other   -> invalid
8702    */
8703    if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) || imm4 == BITS4(0,1,1,1))) {
8704       Bool isU  = (imm4 & 2) == 2;
8705       const HChar* arTs = "??";
8706       UInt    laneNo = 16; /* invalid */
8707       // Setting 'res' to non-NULL determines valid/invalid
8708       IRExpr* res    = NULL;
8709       if (!bitQ && (imm5 & 1)) { // 0:xxxx1
8710          laneNo = (imm5 >> 1) & 15;
8711          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
8712          res = isU ? unop(Iop_8Uto64, lane)
8713                    : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
8714          arTs = "b";
8715       }
8716       else if (bitQ && (imm5 & 1)) { // 1:xxxx1
8717          laneNo = (imm5 >> 1) & 15;
8718          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
8719          res = isU ? NULL
8720                    : unop(Iop_8Sto64, lane);
8721          arTs = "b";
8722       }
8723       else if (!bitQ && (imm5 & 2)) { // 0:xxx10
8724          laneNo = (imm5 >> 2) & 7;
8725          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
8726          res = isU ? unop(Iop_16Uto64, lane)
8727                    : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
8728          arTs = "h";
8729       }
8730       else if (bitQ && (imm5 & 2)) { // 1:xxx10
8731          laneNo = (imm5 >> 2) & 7;
8732          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
8733          res = isU ? NULL
8734                    : unop(Iop_16Sto64, lane);
8735          arTs = "h";
8736       }
8737       else if (!bitQ && (imm5 & 4)) { // 0:xx100
8738          laneNo = (imm5 >> 3) & 3;
8739          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
8740          res = isU ? unop(Iop_32Uto64, lane)
8741                    : NULL;
8742          arTs = "s";
8743       }
8744       else if (bitQ && (imm5 & 4)) { // 1:xxx10
8745          laneNo = (imm5 >> 3) & 3;
8746          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
8747          res = isU ? NULL
8748                    : unop(Iop_32Sto64, lane);
8749          arTs = "s";
8750       }
8751       else if (bitQ && (imm5 & 8)) { // 1:x1000
8752          laneNo = (imm5 >> 4) & 1;
8753          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
8754          res = isU ? lane
8755                    : NULL;
8756          arTs = "d";
8757       }
8758       /* */
8759       if (res) {
8760          vassert(laneNo < 16);
8761          putIReg64orZR(dd, res);
8762          DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
8763              nameIRegOrZR(bitQ == 1, dd),
8764              nameQReg128(nn), arTs, laneNo);
8765          return True;
8766       }
8767       /* invalid */
8768       return False;
8769    }
8770 
8771    /* -------- 1,1,xxxx: INS (element) -------- */
8772    /* 31  28       20     14   9 4
8773       011 01110000 imm5 0 imm4 n d  INS Vd.Ts[ix1], Vn.Ts[ix2]
8774       where Ts,ix1,ix2
8775                = case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
8776                               xxx10 -> H, xxx,  imm4[3:1]
8777                               xx100 -> S, xx,   imm4[3:2]
8778                               x1000 -> D, x,    imm4[3:3]
8779    */
8780    if (bitQ == 1 && bitOP == 1) {
8781       HChar   ts  = '?';
8782       IRType  ity = Ity_INVALID;
8783       UInt    ix1 = 16;
8784       UInt    ix2 = 16;
8785       if (imm5 & 1) {
8786          ts  = 'b';
8787          ity = Ity_I8;
8788          ix1 = (imm5 >> 1) & 15;
8789          ix2 = (imm4 >> 0) & 15;
8790       }
8791       else if (imm5 & 2) {
8792          ts  = 'h';
8793          ity = Ity_I16;
8794          ix1 = (imm5 >> 2) & 7;
8795          ix2 = (imm4 >> 1) & 7;
8796       }
8797       else if (imm5 & 4) {
8798          ts  = 's';
8799          ity = Ity_I32;
8800          ix1 = (imm5 >> 3) & 3;
8801          ix2 = (imm4 >> 2) & 3;
8802       }
8803       else if (imm5 & 8) {
8804          ts  = 'd';
8805          ity = Ity_I64;
8806          ix1 = (imm5 >> 4) & 1;
8807          ix2 = (imm4 >> 3) & 1;
8808       }
8809       /* */
8810       if (ity != Ity_INVALID) {
8811          vassert(ix1 < 16);
8812          vassert(ix2 < 16);
8813          putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity));
8814          DIP("ins %s.%c[%u], %s.%c[%u]\n",
8815              nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2);
8816          return True;
8817       }
8818       /* invalid */
8819       return False;
8820    }
8821 
8822    return False;
8823 #  undef INSN
8824 }
8825 
8826 
8827 static
dis_AdvSIMD_modified_immediate(DisResult * dres,UInt insn)8828 Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
8829 {
8830    /* 31    28          18  15    11 9     4
8831       0q op 01111 00000 abc cmode 01 defgh d
8832       Decode fields: q,op,cmode
8833       Bit 11 is really "o2", but it is always zero.
8834    */
8835 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8836    if (INSN(31,31) != 0
8837        || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
8838        || INSN(11,10) != BITS2(0,1)) {
8839       return False;
8840    }
8841    UInt bitQ     = INSN(30,30);
8842    UInt bitOP    = INSN(29,29);
8843    UInt cmode    = INSN(15,12);
8844    UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5);
8845    UInt dd       = INSN(4,0);
8846 
8847    ULong imm64lo  = 0;
8848    UInt  op_cmode = (bitOP << 4) | cmode;
8849    Bool  ok       = False;
8850    Bool  isORR    = False;
8851    Bool  isBIC    = False;
8852    Bool  isMOV    = False;
8853    Bool  isMVN    = False;
8854    Bool  isFMOV   = False;
8855    switch (op_cmode) {
8856       /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
8857       /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
8858       /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
8859       /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
8860       case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
8861       case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
8862          ok = True; isMOV = True; break;
8863 
8864       /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
8865       /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
8866       /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
8867       /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
8868       case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
8869       case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
8870          ok = True; isORR = True; break;
8871 
8872       /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
8873       /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
8874       case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
8875          ok = True; isMOV = True; break;
8876 
8877       /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
8878       /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
8879       case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
8880          ok = True; isORR = True; break;
8881 
8882       /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
8883       /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
8884       case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
8885          ok = True; isMOV = True; break;
8886 
8887       /* -------- x,0,1110 MOVI 8-bit -------- */
8888       case BITS5(0,1,1,1,0):
8889          ok = True; isMOV = True; break;
8890 
8891       /* -------- x,0,1111 FMOV (vector, immediate, F32) -------- */
8892       case BITS5(0,1,1,1,1): // 0:1111
8893          ok = True; isFMOV = True; break;
8894 
8895       /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
8896       /* -------- x,1,0010 MVNI 32-bit shifted imm  -------- */
8897       /* -------- x,1,0100 MVNI 32-bit shifted imm  -------- */
8898       /* -------- x,1,0110 MVNI 32-bit shifted imm  -------- */
8899       case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
8900       case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
8901          ok = True; isMVN = True; break;
8902 
8903       /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
8904       /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
8905       /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
8906       /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
8907       case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
8908       case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
8909          ok = True; isBIC = True; break;
8910 
8911       /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
8912       /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
8913       case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
8914          ok = True; isMVN = True; break;
8915 
8916       /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
8917       /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
8918       case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
8919          ok = True; isBIC = True; break;
8920 
8921       /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
8922       /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
8923       case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
8924          ok = True; isMVN = True; break;
8925 
8926       /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
8927       /* -------- 1,1,1110 MOVI 64-bit vector -------- */
8928       case BITS5(1,1,1,1,0):
8929          ok = True; isMOV = True; break;
8930 
8931       /* -------- 1,1,1111 FMOV (vector, immediate, F64) -------- */
8932       case BITS5(1,1,1,1,1): // 1:1111
8933          ok = bitQ == 1; isFMOV = True; break;
8934 
8935       default:
8936         break;
8937    }
8938    if (ok) {
8939       vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0)
8940                    + (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0));
8941       ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
8942    }
8943    if (ok) {
8944       if (isORR || isBIC) {
8945          ULong inv
8946             = isORR ? 0ULL : ~0ULL;
8947          IRExpr* immV128
8948             = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
8949          IRExpr* res
8950             = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
8951          const HChar* nm = isORR ? "orr" : "bic";
8952          if (bitQ == 0) {
8953             putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
8954             DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
8955          } else {
8956             putQReg128(dd, res);
8957             DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
8958                 nameQReg128(dd), imm64lo, imm64lo);
8959          }
8960       }
8961       else if (isMOV || isMVN || isFMOV) {
8962          if (isMVN) imm64lo = ~imm64lo;
8963          ULong   imm64hi = bitQ == 0  ? 0  :  imm64lo;
8964          IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi),
8965                                                  mkU64(imm64lo));
8966          putQReg128(dd, immV128);
8967          DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
8968       }
8969       return True;
8970    }
8971    /* else fall through */
8972 
8973    return False;
8974 #  undef INSN
8975 }
8976 
8977 
8978 static
dis_AdvSIMD_scalar_copy(DisResult * dres,UInt insn)8979 Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn)
8980 {
8981    /* 31    28       20   15 14   10 9 4
8982       01 op 11110000 imm5 0  imm4 1  n d
8983       Decode fields: op,imm4
8984    */
8985 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8986    if (INSN(31,30) != BITS2(0,1)
8987        || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
8988        || INSN(15,15) != 0 || INSN(10,10) != 1) {
8989       return False;
8990    }
8991    UInt bitOP = INSN(29,29);
8992    UInt imm5  = INSN(20,16);
8993    UInt imm4  = INSN(14,11);
8994    UInt nn    = INSN(9,5);
8995    UInt dd    = INSN(4,0);
8996 
8997    if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
8998       /* -------- 0,0000 DUP (element, scalar) -------- */
8999       IRTemp w0     = newTemp(Ity_I64);
9000       const HChar* arTs = "??";
9001       IRType laneTy = Ity_INVALID;
9002       UInt   laneNo = 16; /* invalid */
9003       if (imm5 & 1) {
9004          arTs   = "b";
9005          laneNo = (imm5 >> 1) & 15;
9006          laneTy = Ity_I8;
9007          assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
9008       }
9009       else if (imm5 & 2) {
9010          arTs   = "h";
9011          laneNo = (imm5 >> 2) & 7;
9012          laneTy = Ity_I16;
9013          assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
9014       }
9015       else if (imm5 & 4) {
9016          arTs   = "s";
9017          laneNo = (imm5 >> 3) & 3;
9018          laneTy = Ity_I32;
9019          assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
9020       }
9021       else if (imm5 & 8) {
9022          arTs   = "d";
9023          laneNo = (imm5 >> 4) & 1;
9024          laneTy = Ity_I64;
9025          assign(w0, getQRegLane(nn, laneNo, laneTy));
9026       }
9027       else {
9028          /* invalid; leave laneTy unchanged. */
9029       }
9030       /* */
9031       if (laneTy != Ity_INVALID) {
9032          vassert(laneNo < 16);
9033          putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
9034          DIP("dup %s, %s.%s[%u]\n",
9035              nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
9036          return True;
9037       }
9038       /* else fall through */
9039    }
9040 
9041    return False;
9042 #  undef INSN
9043 }
9044 
9045 
9046 static
dis_AdvSIMD_scalar_pairwise(DisResult * dres,UInt insn)9047 Bool dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult* dres, UInt insn)
9048 {
9049    /* 31   28    23 21    16     11 9 4
9050       01 u 11110 sz 11000 opcode 10 n d
9051       Decode fields: u,sz,opcode
9052    */
9053 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
9054    if (INSN(31,30) != BITS2(0,1)
9055        || INSN(28,24) != BITS5(1,1,1,1,0)
9056        || INSN(21,17) != BITS5(1,1,0,0,0)
9057        || INSN(11,10) != BITS2(1,0)) {
9058       return False;
9059    }
9060    UInt bitU   = INSN(29,29);
9061    UInt sz     = INSN(23,22);
9062    UInt opcode = INSN(16,12);
9063    UInt nn     = INSN(9,5);
9064    UInt dd     = INSN(4,0);
9065 
9066    if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) {
9067       /* -------- 0,11,11011 ADDP d_2d -------- */
9068       IRTemp xy = newTempV128();
9069       IRTemp xx = newTempV128();
9070       assign(xy, getQReg128(nn));
9071       assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy)));
9072       putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9073                           binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx))));
9074       DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn));
9075       return True;
9076    }
9077 
9078    if (bitU == 1 && sz <= X01 && opcode == BITS5(0,1,1,0,1)) {
9079       /* -------- 1,00,01101 ADDP s_2s -------- */
9080       /* -------- 1,01,01101 ADDP d_2d -------- */
9081       Bool   isD   = sz == X01;
9082       IROp   opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9083       IROp   opADD = mkVecADDF(isD ? 3 : 2);
9084       IRTemp src   = newTempV128();
9085       IRTemp argL  = newTempV128();
9086       IRTemp argR  = newTempV128();
9087       assign(src, getQReg128(nn));
9088       assign(argL, unop(opZHI, mkexpr(src)));
9089       assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9090                                                     mkU8(isD ? 8 : 4))));
9091       putQReg128(dd, unop(opZHI,
9092                           triop(opADD, mkexpr(mk_get_IR_rounding_mode()),
9093                                               mkexpr(argL), mkexpr(argR))));
9094       DIP(isD ? "faddp d%u, v%u.2d\n" : "faddp s%u, v%u.2s\n", dd, nn);
9095       return True;
9096    }
9097 
9098    if (bitU == 1
9099        && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
9100       /* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */
9101       /* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */
9102       /* -------- 1,0x,01111 FMAXP   d_2d, s_2s -------- */
9103       /* -------- 1,1x,01111 FMINP   d_2d, s_2s -------- */
9104       /* FMAXNM, FMINNM: FIXME -- KLUDGED */
9105       Bool   isD   = (sz & 1) == 1;
9106       Bool   isMIN = (sz & 2) == 2;
9107       Bool   isNM  = opcode == BITS5(0,1,1,0,0);
9108       IROp   opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9109       IROp   opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
9110       IRTemp src   = newTempV128();
9111       IRTemp argL  = newTempV128();
9112       IRTemp argR  = newTempV128();
9113       assign(src, getQReg128(nn));
9114       assign(argL, unop(opZHI, mkexpr(src)));
9115       assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9116                                                     mkU8(isD ? 8 : 4))));
9117       putQReg128(dd, unop(opZHI,
9118                           binop(opMXX, mkexpr(argL), mkexpr(argR))));
9119       HChar c = isD ? 'd' : 's';
9120       DIP("%s%sp %c%u, v%u.2%c\n",
9121            isMIN ? "fmin" : "fmax", isNM ? "nm" : "", c, dd, nn, c);
9122       return True;
9123    }
9124 
9125    return False;
9126 #  undef INSN
9127 }
9128 
9129 
9130 static
dis_AdvSIMD_scalar_shift_by_imm(DisResult * dres,UInt insn)9131 Bool dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult* dres, UInt insn)
9132 {
9133    /* 31   28     22   18   15     10 9 4
9134       01 u 111110 immh immb opcode 1  n d
9135       Decode fields: u,immh,opcode
9136    */
9137 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
9138    if (INSN(31,30) != BITS2(0,1)
9139        || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) {
9140       return False;
9141    }
9142    UInt bitU   = INSN(29,29);
9143    UInt immh   = INSN(22,19);
9144    UInt immb   = INSN(18,16);
9145    UInt opcode = INSN(15,11);
9146    UInt nn     = INSN(9,5);
9147    UInt dd     = INSN(4,0);
9148    UInt immhb  = (immh << 3) | immb;
9149 
9150    if ((immh & 8) == 8
9151        && (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0))) {
9152       /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
9153       /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
9154       /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
9155       /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
9156       Bool isU   = bitU == 1;
9157       Bool isAcc = opcode == BITS5(0,0,0,1,0);
9158       UInt sh    = 128 - immhb;
9159       vassert(sh >= 1 && sh <= 64);
9160       IROp    op  = isU ? Iop_ShrN64x2 : Iop_SarN64x2;
9161       IRExpr* src = getQReg128(nn);
9162       IRTemp  shf = newTempV128();
9163       IRTemp  res = newTempV128();
9164       if (sh == 64 && isU) {
9165          assign(shf, mkV128(0x0000));
9166       } else {
9167          UInt nudge = 0;
9168          if (sh == 64) {
9169             vassert(!isU);
9170             nudge = 1;
9171          }
9172          assign(shf, binop(op, src, mkU8(sh - nudge)));
9173       }
9174       assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9175                         : mkexpr(shf));
9176       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9177       const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
9178                               : (isU ? "ushr" : "sshr");
9179       DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9180       return True;
9181    }
9182 
9183    if ((immh & 8) == 8
9184        && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0))) {
9185       /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
9186       /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
9187       /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
9188       /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
9189       Bool isU   = bitU == 1;
9190       Bool isAcc = opcode == BITS5(0,0,1,1,0);
9191       UInt sh    = 128 - immhb;
9192       vassert(sh >= 1 && sh <= 64);
9193       IROp    op  = isU ? Iop_Rsh64Ux2 : Iop_Rsh64Sx2;
9194       vassert(sh >= 1 && sh <= 64);
9195       IRExpr* src  = getQReg128(nn);
9196       IRTemp  imm8 = newTemp(Ity_I8);
9197       assign(imm8, mkU8((UChar)(-sh)));
9198       IRExpr* amt  = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
9199       IRTemp  shf  = newTempV128();
9200       IRTemp  res  = newTempV128();
9201       assign(shf, binop(op, src, amt));
9202       assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9203                         : mkexpr(shf));
9204       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9205       const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
9206                               : (isU ? "urshr" : "srshr");
9207       DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9208       return True;
9209    }
9210 
9211    if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) {
9212       /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
9213       UInt sh = 128 - immhb;
9214       vassert(sh >= 1 && sh <= 64);
9215       if (sh == 64) {
9216          putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
9217       } else {
9218          /* sh is in range 1 .. 63 */
9219          ULong   nmask  = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1));
9220          IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9221          IRTemp  res    = newTempV128();
9222          assign(res, binop(Iop_OrV128,
9223                            binop(Iop_AndV128, getQReg128(dd), nmaskV),
9224                            binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
9225          putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9226       }
9227       DIP("sri d%u, d%u, #%u\n", dd, nn, sh);
9228       return True;
9229    }
9230 
9231    if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9232       /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
9233       UInt sh = immhb - 64;
9234       vassert(sh >= 0 && sh < 64);
9235       putQReg128(dd,
9236                  unop(Iop_ZeroHI64ofV128,
9237                       sh == 0 ? getQReg128(nn)
9238                               : binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9239       DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
9240       return True;
9241    }
9242 
9243    if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9244       /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
9245       UInt sh = immhb - 64;
9246       vassert(sh >= 0 && sh < 64);
9247       if (sh == 0) {
9248          putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn)));
9249       } else {
9250          /* sh is in range 1 .. 63 */
9251          ULong   nmask  = (1ULL << sh) - 1;
9252          IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9253          IRTemp  res    = newTempV128();
9254          assign(res, binop(Iop_OrV128,
9255                            binop(Iop_AndV128, getQReg128(dd), nmaskV),
9256                            binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9257          putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9258       }
9259       DIP("sli d%u, d%u, #%u\n", dd, nn, sh);
9260       return True;
9261    }
9262 
9263    if (opcode == BITS5(0,1,1,1,0)
9264        || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
9265       /* -------- 0,01110  SQSHL  #imm -------- */
9266       /* -------- 1,01110  UQSHL  #imm -------- */
9267       /* -------- 1,01100  SQSHLU #imm -------- */
9268       UInt size  = 0;
9269       UInt shift = 0;
9270       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9271       if (!ok) return False;
9272       vassert(size >= 0 && size <= 3);
9273       /* The shift encoding has opposite sign for the leftwards case.
9274          Adjust shift to compensate. */
9275       UInt lanebits = 8 << size;
9276       shift = lanebits - shift;
9277       vassert(shift >= 0 && shift < lanebits);
9278       const HChar* nm = NULL;
9279       /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
9280       else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
9281       else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
9282       else vassert(0);
9283       IRTemp qDiff1 = IRTemp_INVALID;
9284       IRTemp qDiff2 = IRTemp_INVALID;
9285       IRTemp res = IRTemp_INVALID;
9286       IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn));
9287       /* This relies on the fact that the zeroed out lanes generate zeroed
9288          result lanes and don't saturate, so there's no point in trimming
9289          the resulting res, qDiff1 or qDiff2 values. */
9290       math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
9291       putQReg128(dd, mkexpr(res));
9292       updateQCFLAGwithDifference(qDiff1, qDiff2);
9293       const HChar arr = "bhsd"[size];
9294       DIP("%s %c%u, %c%u, #%u\n", nm, arr, dd, arr, nn, shift);
9295       return True;
9296    }
9297 
9298    if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
9299        || (bitU == 1
9300            && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
9301       /* -------- 0,10010   SQSHRN #imm -------- */
9302       /* -------- 1,10010   UQSHRN #imm -------- */
9303       /* -------- 0,10011  SQRSHRN #imm -------- */
9304       /* -------- 1,10011  UQRSHRN #imm -------- */
9305       /* -------- 1,10000  SQSHRUN #imm -------- */
9306       /* -------- 1,10001 SQRSHRUN #imm -------- */
9307       UInt size  = 0;
9308       UInt shift = 0;
9309       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9310       if (!ok || size == X11) return False;
9311       vassert(size >= X00 && size <= X10);
9312       vassert(shift >= 1 && shift <= (8 << size));
9313       const HChar* nm = "??";
9314       IROp op = Iop_INVALID;
9315       /* Decide on the name and the operation. */
9316       /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
9317          nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
9318       }
9319       else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
9320          nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
9321       }
9322       else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
9323          nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
9324       }
9325       else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
9326          nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
9327       }
9328       else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
9329          nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
9330       }
9331       else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
9332          nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
9333       }
9334       else vassert(0);
9335       /* Compute the result (Q, shifted value) pair. */
9336       IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn));
9337       IRTemp pair   = newTempV128();
9338       assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
9339       /* Update the result reg */
9340       IRTemp res64in128 = newTempV128();
9341       assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
9342       putQReg128(dd, mkexpr(res64in128));
9343       /* Update the Q flag. */
9344       IRTemp q64q64 = newTempV128();
9345       assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
9346       IRTemp z128 = newTempV128();
9347       assign(z128, mkV128(0x0000));
9348       updateQCFLAGwithDifference(q64q64, z128);
9349       /* */
9350       const HChar arrNarrow = "bhsd"[size];
9351       const HChar arrWide   = "bhsd"[size+1];
9352       DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift);
9353       return True;
9354    }
9355 
9356    if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,0,0)) {
9357       /* -------- 0,!=00xx,11100 SCVTF d_d_imm, s_s_imm -------- */
9358       /* -------- 1,!=00xx,11100 UCVTF d_d_imm, s_s_imm -------- */
9359       UInt size  = 0;
9360       UInt fbits = 0;
9361       Bool ok    = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
9362       /* The following holds because immh is never zero. */
9363       vassert(ok);
9364       /* The following holds because immh >= 0100. */
9365       vassert(size == X10 || size == X11);
9366       Bool isD = size == X11;
9367       Bool isU = bitU == 1;
9368       vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
9369       Double  scale  = two_to_the_minus(fbits);
9370       IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
9371                              : IRExpr_Const(IRConst_F32( (Float)scale ));
9372       IROp    opMUL  = isD ? Iop_MulF64 : Iop_MulF32;
9373       IROp    opCVT  = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
9374                            : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
9375       IRType tyF = isD ? Ity_F64 : Ity_F32;
9376       IRType tyI = isD ? Ity_I64 : Ity_I32;
9377       IRTemp src = newTemp(tyI);
9378       IRTemp res = newTemp(tyF);
9379       IRTemp rm  = mk_get_IR_rounding_mode();
9380       assign(src, getQRegLane(nn, 0, tyI));
9381       assign(res, triop(opMUL, mkexpr(rm),
9382                                binop(opCVT, mkexpr(rm), mkexpr(src)), scaleE));
9383       putQRegLane(dd, 0, mkexpr(res));
9384       if (!isD) {
9385          putQRegLane(dd, 1, mkU32(0));
9386       }
9387       putQRegLane(dd, 1, mkU64(0));
9388       const HChar ch = isD ? 'd' : 's';
9389       DIP("%s %c%u, %c%u, #%u\n", isU ? "ucvtf" : "scvtf",
9390           ch, dd, ch, nn, fbits);
9391       return True;
9392    }
9393 
9394    if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,1,1)) {
9395       /* -------- 0,!=00xx,11111 FCVTZS d_d_imm, s_s_imm -------- */
9396       /* -------- 1,!=00xx,11111 FCVTZU d_d_imm, s_s_imm -------- */
9397       UInt size  = 0;
9398       UInt fbits = 0;
9399       Bool ok    = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
9400       /* The following holds because immh is never zero. */
9401       vassert(ok);
9402       /* The following holds because immh >= 0100. */
9403       vassert(size == X10 || size == X11);
9404       Bool isD = size == X11;
9405       Bool isU = bitU == 1;
9406       vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
9407       Double  scale  = two_to_the_plus(fbits);
9408       IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
9409                            : IRExpr_Const(IRConst_F32( (Float)scale ));
9410       IROp    opMUL  = isD ? Iop_MulF64 : Iop_MulF32;
9411       IROp    opCVT  = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
9412                            : (isD ? Iop_F64toI64S : Iop_F32toI32S);
9413       IRType tyF = isD ? Ity_F64 : Ity_F32;
9414       IRType tyI = isD ? Ity_I64 : Ity_I32;
9415       IRTemp src = newTemp(tyF);
9416       IRTemp res = newTemp(tyI);
9417       IRTemp rm  = newTemp(Ity_I32);
9418       assign(src, getQRegLane(nn, 0, tyF));
9419       assign(rm,  mkU32(Irrm_ZERO));
9420       assign(res, binop(opCVT, mkexpr(rm),
9421                                triop(opMUL, mkexpr(rm), mkexpr(src), scaleE)));
9422       putQRegLane(dd, 0, mkexpr(res));
9423       if (!isD) {
9424          putQRegLane(dd, 1, mkU32(0));
9425       }
9426       putQRegLane(dd, 1, mkU64(0));
9427       const HChar ch = isD ? 'd' : 's';
9428       DIP("%s %c%u, %c%u, #%u\n", isU ? "fcvtzu" : "fcvtzs",
9429           ch, dd, ch, nn, fbits);
9430       return True;
9431    }
9432 
9433 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
9434    return False;
9435 #  undef INSN
9436 }
9437 
9438 
9439 static
dis_AdvSIMD_scalar_three_different(DisResult * dres,UInt insn)9440 Bool dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
9441 {
9442    /* 31 29 28    23   21 20 15     11 9 4
9443       01 U  11110 size 1  m  opcode 00 n d
9444       Decode fields: u,opcode
9445    */
9446 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
9447    if (INSN(31,30) != BITS2(0,1)
9448        || INSN(28,24) != BITS5(1,1,1,1,0)
9449        || INSN(21,21) != 1
9450        || INSN(11,10) != BITS2(0,0)) {
9451       return False;
9452    }
9453    UInt bitU   = INSN(29,29);
9454    UInt size   = INSN(23,22);
9455    UInt mm     = INSN(20,16);
9456    UInt opcode = INSN(15,12);
9457    UInt nn     = INSN(9,5);
9458    UInt dd     = INSN(4,0);
9459    vassert(size < 4);
9460 
9461    if (bitU == 0
9462        && (opcode == BITS4(1,1,0,1)
9463            || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
9464       /* -------- 0,1101  SQDMULL -------- */ // 0 (ks)
9465       /* -------- 0,1001  SQDMLAL -------- */ // 1
9466       /* -------- 0,1011  SQDMLSL -------- */ // 2
9467       /* Widens, and size refers to the narrowed lanes. */
9468       UInt ks = 3;
9469       switch (opcode) {
9470          case BITS4(1,1,0,1): ks = 0; break;
9471          case BITS4(1,0,0,1): ks = 1; break;
9472          case BITS4(1,0,1,1): ks = 2; break;
9473          default: vassert(0);
9474       }
9475       vassert(ks >= 0 && ks <= 2);
9476       if (size == X00 || size == X11) return False;
9477       vassert(size <= 2);
9478       IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
9479       vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
9480       newTempsV128_3(&vecN, &vecM, &vecD);
9481       assign(vecN, getQReg128(nn));
9482       assign(vecM, getQReg128(mm));
9483       assign(vecD, getQReg128(dd));
9484       math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
9485                        False/*!is2*/, size, "mas"[ks],
9486                        vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
9487       IROp opZHI = mkVecZEROHIxxOFV128(size+1);
9488       putQReg128(dd, unop(opZHI, mkexpr(res)));
9489       vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
9490       updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
9491       if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
9492          updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
9493       }
9494       const HChar* nm        = ks == 0 ? "sqdmull"
9495                                        : (ks == 1 ? "sqdmlal" : "sqdmlsl");
9496       const HChar  arrNarrow = "bhsd"[size];
9497       const HChar  arrWide   = "bhsd"[size+1];
9498       DIP("%s %c%d, %c%d, %c%d\n",
9499           nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm);
9500       return True;
9501    }
9502 
9503    return False;
9504 #  undef INSN
9505 }
9506 
9507 
9508 static
dis_AdvSIMD_scalar_three_same(DisResult * dres,UInt insn)9509 Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
9510 {
9511    /* 31 29 28    23   21 20 15     10 9 4
9512       01 U  11110 size 1  m  opcode 1  n d
9513       Decode fields: u,size,opcode
9514    */
9515 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
9516    if (INSN(31,30) != BITS2(0,1)
9517        || INSN(28,24) != BITS5(1,1,1,1,0)
9518        || INSN(21,21) != 1
9519        || INSN(10,10) != 1) {
9520       return False;
9521    }
9522    UInt bitU   = INSN(29,29);
9523    UInt size   = INSN(23,22);
9524    UInt mm     = INSN(20,16);
9525    UInt opcode = INSN(15,11);
9526    UInt nn     = INSN(9,5);
9527    UInt dd     = INSN(4,0);
9528    vassert(size < 4);
9529 
9530    if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
9531       /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
9532       /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
9533       /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
9534       /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
9535       Bool isADD = opcode == BITS5(0,0,0,0,1);
9536       Bool isU   = bitU == 1;
9537       IROp qop   = Iop_INVALID;
9538       IROp nop   = Iop_INVALID;
9539       if (isADD) {
9540          qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
9541          nop = mkVecADD(size);
9542       } else {
9543          qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
9544          nop = mkVecSUB(size);
9545       }
9546       IRTemp argL = newTempV128();
9547       IRTemp argR = newTempV128();
9548       IRTemp qres = newTempV128();
9549       IRTemp nres = newTempV128();
9550       assign(argL, getQReg128(nn));
9551       assign(argR, getQReg128(mm));
9552       assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9553                              size, binop(qop, mkexpr(argL), mkexpr(argR)))));
9554       assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9555                              size, binop(nop, mkexpr(argL), mkexpr(argR)))));
9556       putQReg128(dd, mkexpr(qres));
9557       updateQCFLAGwithDifference(qres, nres);
9558       const HChar* nm  = isADD ? (isU ? "uqadd" : "sqadd")
9559                                : (isU ? "uqsub" : "sqsub");
9560       const HChar  arr = "bhsd"[size];
9561       DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
9562       return True;
9563    }
9564 
9565    if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
9566       /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
9567       /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
9568       Bool    isGT = bitU == 0;
9569       IRExpr* argL = getQReg128(nn);
9570       IRExpr* argR = getQReg128(mm);
9571       IRTemp  res  = newTempV128();
9572       assign(res,
9573              isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
9574                   : binop(Iop_CmpGT64Ux2, argL, argR));
9575       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9576       DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
9577           nameQRegLO(dd, Ity_I64),
9578           nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9579       return True;
9580    }
9581 
9582    if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
9583       /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
9584       /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
9585       Bool    isGE = bitU == 0;
9586       IRExpr* argL = getQReg128(nn);
9587       IRExpr* argR = getQReg128(mm);
9588       IRTemp  res  = newTempV128();
9589       assign(res,
9590              isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
9591                   : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
9592       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9593       DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
9594           nameQRegLO(dd, Ity_I64),
9595           nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9596       return True;
9597    }
9598 
9599    if (size == X11 && (opcode == BITS5(0,1,0,0,0)
9600                        || opcode == BITS5(0,1,0,1,0))) {
9601       /* -------- 0,xx,01000 SSHL  d_d_d -------- */
9602       /* -------- 0,xx,01010 SRSHL d_d_d -------- */
9603       /* -------- 1,xx,01000 USHL  d_d_d -------- */
9604       /* -------- 1,xx,01010 URSHL d_d_d -------- */
9605       Bool isU = bitU == 1;
9606       Bool isR = opcode == BITS5(0,1,0,1,0);
9607       IROp op  = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
9608                      : (isU ? mkVecSHU(size)  : mkVecSHS(size));
9609       IRTemp res = newTempV128();
9610       assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
9611       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9612       const HChar* nm  = isR ? (isU ? "urshl" : "srshl")
9613                              : (isU ? "ushl"  : "sshl");
9614       DIP("%s %s, %s, %s\n", nm,
9615           nameQRegLO(dd, Ity_I64),
9616           nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9617       return True;
9618    }
9619 
9620    if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
9621       /* -------- 0,xx,01001 SQSHL  std4_std4_std4 -------- */
9622       /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
9623       /* -------- 1,xx,01001 UQSHL  std4_std4_std4 -------- */
9624       /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
9625       Bool isU = bitU == 1;
9626       Bool isR = opcode == BITS5(0,1,0,1,1);
9627       IROp op  = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
9628                      : (isU ? mkVecQANDUQSH(size)  : mkVecQANDSQSH(size));
9629       /* This is a bit tricky.  Since we're only interested in the lowest
9630          lane of the result, we zero out all the rest in the operands, so
9631          as to ensure that other lanes don't pollute the returned Q value.
9632          This works because it means, for the lanes we don't care about, we
9633          are shifting zero by zero, which can never saturate. */
9634       IRTemp res256 = newTemp(Ity_V256);
9635       IRTemp resSH  = newTempV128();
9636       IRTemp resQ   = newTempV128();
9637       IRTemp zero   = newTempV128();
9638       assign(
9639          res256,
9640          binop(op,
9641                mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))),
9642                mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm)))));
9643       assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
9644       assign(resQ,  unop(Iop_V256toV128_1, mkexpr(res256)));
9645       assign(zero,  mkV128(0x0000));
9646       putQReg128(dd, mkexpr(resSH));
9647       updateQCFLAGwithDifference(resQ, zero);
9648       const HChar* nm  = isR ? (isU ? "uqrshl" : "sqrshl")
9649                              : (isU ? "uqshl"  : "sqshl");
9650       const HChar  arr = "bhsd"[size];
9651       DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
9652       return True;
9653    }
9654 
9655    if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
9656       /* -------- 0,11,10000 ADD d_d_d -------- */
9657       /* -------- 1,11,10000 SUB d_d_d -------- */
9658       Bool   isSUB = bitU == 1;
9659       IRTemp res   = newTemp(Ity_I64);
9660       assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
9661                         getQRegLane(nn, 0, Ity_I64),
9662                         getQRegLane(mm, 0, Ity_I64)));
9663       putQRegLane(dd, 0, mkexpr(res));
9664       putQRegLane(dd, 1, mkU64(0));
9665       DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
9666           nameQRegLO(dd, Ity_I64),
9667           nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9668       return True;
9669    }
9670 
9671    if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
9672       /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
9673       /* -------- 1,11,10001 CMEQ  d_d_d -------- */ // ==
9674       Bool    isEQ = bitU == 1;
9675       IRExpr* argL = getQReg128(nn);
9676       IRExpr* argR = getQReg128(mm);
9677       IRTemp  res  = newTempV128();
9678       assign(res,
9679              isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
9680                   : unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
9681                                             binop(Iop_AndV128, argL, argR),
9682                                             mkV128(0x0000))));
9683       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9684       DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
9685           nameQRegLO(dd, Ity_I64),
9686           nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9687       return True;
9688    }
9689 
9690    if (opcode == BITS5(1,0,1,1,0)) {
9691       /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
9692       /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
9693       if (size == X00 || size == X11) return False;
9694       Bool isR = bitU == 1;
9695       IRTemp res, sat1q, sat1n, vN, vM;
9696       res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
9697       newTempsV128_2(&vN, &vM);
9698       assign(vN, getQReg128(nn));
9699       assign(vM, getQReg128(mm));
9700       math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
9701       putQReg128(dd,
9702                  mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
9703       updateQCFLAGwithDifference(
9704          math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)),
9705          math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n)));
9706       const HChar  arr = "bhsd"[size];
9707       const HChar* nm  = isR ? "sqrdmulh" : "sqdmulh";
9708       DIP("%s %c%d, %c%d, %c%d\n", nm, arr, dd, arr, nn, arr, mm);
9709       return True;
9710    }
9711 
9712    if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
9713       /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
9714       IRType ity = size == X11 ? Ity_F64 : Ity_F32;
9715       IRTemp res = newTemp(ity);
9716       assign(res, unop(mkABSF(ity),
9717                        triop(mkSUBF(ity),
9718                              mkexpr(mk_get_IR_rounding_mode()),
9719                              getQRegLO(nn,ity), getQRegLO(mm,ity))));
9720       putQReg128(dd, mkV128(0x0000));
9721       putQRegLO(dd, mkexpr(res));
9722       DIP("fabd %s, %s, %s\n",
9723           nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9724       return True;
9725    }
9726 
9727    if (bitU == 0 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
9728       /* -------- 0,0x,11011 FMULX d_d_d, s_s_s -------- */
9729       // KLUDGE: FMULX is treated the same way as FMUL.  That can't be right.
9730       IRType ity = size == X01 ? Ity_F64 : Ity_F32;
9731       IRTemp res = newTemp(ity);
9732       assign(res, triop(mkMULF(ity),
9733                         mkexpr(mk_get_IR_rounding_mode()),
9734                         getQRegLO(nn,ity), getQRegLO(mm,ity)));
9735       putQReg128(dd, mkV128(0x0000));
9736       putQRegLO(dd, mkexpr(res));
9737       DIP("fmulx %s, %s, %s\n",
9738           nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9739       return True;
9740    }
9741 
9742    if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
9743       /* -------- 0,0x,11100 FCMEQ d_d_d, s_s_s -------- */
9744       /* -------- 1,0x,11100 FCMGE d_d_d, s_s_s -------- */
9745       Bool   isD   = size == X01;
9746       IRType ity   = isD ? Ity_F64 : Ity_F32;
9747       Bool   isGE  = bitU == 1;
9748       IROp   opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
9749                           : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
9750       IRTemp res   = newTempV128();
9751       assign(res, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
9752                        : binop(opCMP, getQReg128(nn), getQReg128(mm)));
9753       putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9754                                                              mkexpr(res))));
9755       DIP("%s %s, %s, %s\n", isGE ? "fcmge" : "fcmeq",
9756           nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9757       return True;
9758    }
9759 
9760    if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
9761       /* -------- 1,1x,11100 FCMGT d_d_d, s_s_s -------- */
9762       Bool   isD   = size == X11;
9763       IRType ity   = isD ? Ity_F64 : Ity_F32;
9764       IROp   opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
9765       IRTemp res   = newTempV128();
9766       assign(res, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
9767       putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9768                                                              mkexpr(res))));
9769       DIP("%s %s, %s, %s\n", "fcmgt",
9770           nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9771       return True;
9772    }
9773 
9774    if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
9775       /* -------- 1,0x,11101 FACGE d_d_d, s_s_s -------- */
9776       /* -------- 1,1x,11101 FACGT d_d_d, s_s_s -------- */
9777       Bool   isD   = (size & 1) == 1;
9778       IRType ity   = isD ? Ity_F64 : Ity_F32;
9779       Bool   isGT  = (size & 2) == 2;
9780       IROp   opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
9781                           : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
9782       IROp   opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
9783       IRTemp res   = newTempV128();
9784       assign(res, binop(opCMP, unop(opABS, getQReg128(mm)),
9785                                unop(opABS, getQReg128(nn)))); // swapd
9786       putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9787                                                              mkexpr(res))));
9788       DIP("%s %s, %s, %s\n", isGT ? "facgt" : "facge",
9789           nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9790       return True;
9791    }
9792 
9793    if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
9794       /* -------- 0,0x,11111: FRECPS  d_d_d, s_s_s -------- */
9795       /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */
9796       Bool isSQRT = (size & 2) == 2;
9797       Bool isD    = (size & 1) == 1;
9798       IROp op     = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
9799                            : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
9800       IRTemp res = newTempV128();
9801       assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
9802       putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9803                                                              mkexpr(res))));
9804       HChar c = isD ? 'd' : 's';
9805       DIP("%s %c%u, %c%u, %c%u\n", isSQRT ? "frsqrts" : "frecps",
9806           c, dd, c, nn, c, mm);
9807       return True;
9808    }
9809 
9810    return False;
9811 #  undef INSN
9812 }
9813 
9814 
9815 static
dis_AdvSIMD_scalar_two_reg_misc(DisResult * dres,UInt insn)9816 Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
9817 {
9818    /* 31 29 28    23   21    16     11 9 4
9819       01 U  11110 size 10000 opcode 10 n d
9820       Decode fields: u,size,opcode
9821    */
9822 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
9823    if (INSN(31,30) != BITS2(0,1)
9824        || INSN(28,24) != BITS5(1,1,1,1,0)
9825        || INSN(21,17) != BITS5(1,0,0,0,0)
9826        || INSN(11,10) != BITS2(1,0)) {
9827       return False;
9828    }
9829    UInt bitU   = INSN(29,29);
9830    UInt size   = INSN(23,22);
9831    UInt opcode = INSN(16,12);
9832    UInt nn     = INSN(9,5);
9833    UInt dd     = INSN(4,0);
9834    vassert(size < 4);
9835 
9836    if (opcode == BITS5(0,0,0,1,1)) {
9837       /* -------- 0,xx,00011: SUQADD std4_std4 -------- */
9838       /* -------- 1,xx,00011: USQADD std4_std4 -------- */
9839       /* These are a bit tricky (to say the least).  See comments on
9840          the vector variants (in dis_AdvSIMD_two_reg_misc) below for
9841          details. */
9842       Bool   isUSQADD = bitU == 1;
9843       IROp   qop  = isUSQADD ? mkVecQADDEXTSUSATUU(size)
9844                              : mkVecQADDEXTUSSATSS(size);
9845       IROp   nop  = mkVecADD(size);
9846       IRTemp argL = newTempV128();
9847       IRTemp argR = newTempV128();
9848       assign(argL, getQReg128(nn));
9849       assign(argR, getQReg128(dd));
9850       IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9851                        size, binop(qop, mkexpr(argL), mkexpr(argR)));
9852       IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9853                        size, binop(nop, mkexpr(argL), mkexpr(argR)));
9854       putQReg128(dd, mkexpr(qres));
9855       updateQCFLAGwithDifference(qres, nres);
9856       const HChar arr = "bhsd"[size];
9857       DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn);
9858       return True;
9859    }
9860 
9861    if (opcode == BITS5(0,0,1,1,1)) {
9862       /* -------- 0,xx,00111 SQABS std4_std4 -------- */
9863       /* -------- 1,xx,00111 SQNEG std4_std4 -------- */
9864       Bool isNEG = bitU == 1;
9865       IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
9866       (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
9867                                          getQReg128(nn), size );
9868       IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW));
9869       IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW));
9870       putQReg128(dd, mkexpr(qres));
9871       updateQCFLAGwithDifference(qres, nres);
9872       const HChar arr = "bhsd"[size];
9873       DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn);
9874       return True;
9875    }
9876 
9877    if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
9878       /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
9879       /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
9880       Bool    isGT = bitU == 0;
9881       IRExpr* argL = getQReg128(nn);
9882       IRExpr* argR = mkV128(0x0000);
9883       IRTemp  res  = newTempV128();
9884       assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
9885                        : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
9886       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9887       DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
9888       return True;
9889    }
9890 
9891    if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
9892       /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
9893       /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
9894       Bool    isEQ = bitU == 0;
9895       IRExpr* argL = getQReg128(nn);
9896       IRExpr* argR = mkV128(0x0000);
9897       IRTemp  res  = newTempV128();
9898       assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
9899                        : unop(Iop_NotV128,
9900                               binop(Iop_CmpGT64Sx2, argL, argR)));
9901       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9902       DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
9903       return True;
9904    }
9905 
9906    if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
9907       /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
9908       putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9909                           binop(Iop_CmpGT64Sx2, mkV128(0x0000),
9910                                                 getQReg128(nn))));
9911       DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
9912       return True;
9913    }
9914 
9915    if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
9916       /* -------- 0,11,01011 ABS d_d -------- */
9917       putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9918                           unop(Iop_Abs64x2, getQReg128(nn))));
9919       DIP("abs d%u, d%u\n", dd, nn);
9920       return True;
9921    }
9922 
9923    if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
9924       /* -------- 1,11,01011 NEG d_d -------- */
9925       putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9926                           binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn))));
9927       DIP("neg d%u, d%u\n", dd, nn);
9928       return True;
9929    }
9930 
9931    UInt ix = 0; /*INVALID*/
9932    if (size >= X10) {
9933       switch (opcode) {
9934          case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
9935          case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
9936          case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
9937          default: break;
9938       }
9939    }
9940    if (ix > 0) {
9941       /* -------- 0,1x,01100 FCMGT d_d_#0.0, s_s_#0.0 (ix 1) -------- */
9942       /* -------- 0,1x,01101 FCMEQ d_d_#0.0, s_s_#0.0 (ix 2) -------- */
9943       /* -------- 0,1x,01110 FCMLT d_d_#0.0, s_s_#0.0 (ix 3) -------- */
9944       /* -------- 1,1x,01100 FCMGE d_d_#0.0, s_s_#0.0 (ix 4) -------- */
9945       /* -------- 1,1x,01101 FCMLE d_d_#0.0, s_s_#0.0 (ix 5) -------- */
9946       Bool   isD     = size == X11;
9947       IRType ity     = isD ? Ity_F64 : Ity_F32;
9948       IROp   opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
9949       IROp   opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
9950       IROp   opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
9951       IROp   opCmp   = Iop_INVALID;
9952       Bool   swap    = False;
9953       const HChar* nm = "??";
9954       switch (ix) {
9955          case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
9956          case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
9957          case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
9958          case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
9959          case 5: nm = "fcmle"; opCmp = opCmpLE; break;
9960          default: vassert(0);
9961       }
9962       IRExpr* zero = mkV128(0x0000);
9963       IRTemp res = newTempV128();
9964       assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
9965                        : binop(opCmp, getQReg128(nn), zero));
9966       putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9967                                                              mkexpr(res))));
9968 
9969       DIP("%s %s, %s, #0.0\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
9970       return True;
9971    }
9972 
9973    if (opcode == BITS5(1,0,1,0,0)
9974        || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
9975       /* -------- 0,xx,10100: SQXTN -------- */
9976       /* -------- 1,xx,10100: UQXTN -------- */
9977       /* -------- 1,xx,10010: SQXTUN -------- */
9978       if (size == X11) return False;
9979       vassert(size < 3);
9980       IROp  opN    = Iop_INVALID;
9981       Bool  zWiden = True;
9982       const HChar* nm = "??";
9983       /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
9984          opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
9985       }
9986       else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
9987          opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
9988       }
9989       else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
9990          opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
9991       }
9992       else vassert(0);
9993       IRTemp src  = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9994                        size+1, getQReg128(nn));
9995       IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9996                        size, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
9997       putQReg128(dd, mkexpr(resN));
9998       /* This widens zero lanes to zero, and compares it against zero, so all
9999          of the non-participating lanes make no contribution to the
10000          Q flag state. */
10001       IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
10002                                               size, mkexpr(resN));
10003       updateQCFLAGwithDifference(src, resW);
10004       const HChar arrNarrow = "bhsd"[size];
10005       const HChar arrWide   = "bhsd"[size+1];
10006       DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn);
10007       return True;
10008    }
10009 
10010    if (opcode == BITS5(1,0,1,1,0) && bitU == 1 && size == X01) {
10011       /* -------- 1,01,10110 FCVTXN s_d -------- */
10012       /* Using Irrm_NEAREST here isn't right.  The docs say "round to
10013          odd" but I don't know what that really means. */
10014       putQRegLO(dd,
10015                 binop(Iop_F64toF32, mkU32(Irrm_NEAREST),
10016                                     getQRegLO(nn, Ity_F64)));
10017       putQRegLane(dd, 1, mkU32(0));
10018       putQRegLane(dd, 1, mkU64(0));
10019       DIP("fcvtxn s%u, d%u\n", dd, nn);
10020       return True;
10021    }
10022 
10023    ix = 0; /*INVALID*/
10024    switch (opcode) {
10025       case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
10026       case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
10027       case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
10028       default: break;
10029    }
10030    if (ix > 0) {
10031       /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10032       /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10033       /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10034       /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10035       /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10036       /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10037       /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10038       /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10039       /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10040       /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10041       Bool           isD  = (size & 1) == 1;
10042       IRType         tyF  = isD ? Ity_F64 : Ity_F32;
10043       IRType         tyI  = isD ? Ity_I64 : Ity_I32;
10044       IRRoundingMode irrm = 8; /*impossible*/
10045       HChar          ch   = '?';
10046       switch (ix) {
10047          case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
10048          case 2: ch = 'm'; irrm = Irrm_NegINF;  break;
10049          case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
10050          case 4: ch = 'p'; irrm = Irrm_PosINF;  break;
10051          case 5: ch = 'z'; irrm = Irrm_ZERO;    break;
10052          default: vassert(0);
10053       }
10054       IROp cvt = Iop_INVALID;
10055       if (bitU == 1) {
10056          cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
10057       } else {
10058          cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
10059       }
10060       IRTemp src = newTemp(tyF);
10061       IRTemp res = newTemp(tyI);
10062       assign(src, getQRegLane(nn, 0, tyF));
10063       assign(res, binop(cvt, mkU32(irrm), mkexpr(src)));
10064       putQRegLane(dd, 0, mkexpr(res)); /* bits 31-0 or 63-0 */
10065       if (!isD) {
10066          putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10067       }
10068       putQRegLane(dd, 1, mkU64(0));    /* bits 127-64 */
10069       HChar sOrD = isD ? 'd' : 's';
10070       DIP("fcvt%c%c %c%u, %c%u\n", ch, bitU == 1 ? 'u' : 's',
10071           sOrD, dd, sOrD, nn);
10072       return True;
10073    }
10074 
10075    if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
10076       /* -------- 0,0x,11101: SCVTF d_d, s_s -------- */
10077       /* -------- 1,0x,11101: UCVTF d_d, s_s -------- */
10078       Bool   isU = bitU == 1;
10079       Bool   isD = (size & 1) == 1;
10080       IRType tyI = isD ? Ity_I64 : Ity_I32;
10081       IROp   iop = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
10082                        : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
10083       IRTemp rm  = mk_get_IR_rounding_mode();
10084       putQRegLO(dd, binop(iop, mkexpr(rm), getQRegLO(nn, tyI)));
10085       if (!isD) {
10086          putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10087       }
10088       putQRegLane(dd, 1, mkU64(0));    /* bits 127-64 */
10089       HChar c = isD ? 'd' : 's';
10090       DIP("%ccvtf %c%u, %c%u\n", isU ? 'u' : 's', c, dd, c, nn);
10091       return True;
10092    }
10093 
10094    if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
10095       /* -------- 0,1x,11101: FRECPE  d_d, s_s -------- */
10096       /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
10097       Bool isSQRT = bitU == 1;
10098       Bool isD    = (size & 1) == 1;
10099       IROp op     = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
10100                            : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
10101       IRTemp resV = newTempV128();
10102       assign(resV, unop(op, getQReg128(nn)));
10103       putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10104                                                              mkexpr(resV))));
10105       HChar c = isD ? 'd' : 's';
10106       DIP("%s %c%u, %c%u\n", isSQRT ? "frsqrte" : "frecpe", c, dd, c, nn);
10107       return True;
10108    }
10109 
10110    if (bitU == 0 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
10111       /* -------- 0,1x,11111: FRECPX  d_d, s_s -------- */
10112       Bool   isD = (size & 1) == 1;
10113       IRType ty  = isD ? Ity_F64 : Ity_F32;
10114       IROp   op  = isD ? Iop_RecpExpF64 : Iop_RecpExpF32;
10115       IRTemp res = newTemp(ty);
10116       IRTemp rm  = mk_get_IR_rounding_mode();
10117       assign(res, binop(op, mkexpr(rm), getQRegLane(nn, 0, ty)));
10118       putQReg128(dd, mkV128(0x0000));
10119       putQRegLane(dd, 0, mkexpr(res));
10120       HChar c = isD ? 'd' : 's';
10121       DIP("%s %c%u, %c%u\n", "frecpx", c, dd, c, nn);
10122       return True;
10123    }
10124 
10125    return False;
10126 #  undef INSN
10127 }
10128 
10129 
10130 static
dis_AdvSIMD_scalar_x_indexed_element(DisResult * dres,UInt insn)10131 Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn)
10132 {
10133    /* 31   28    23   21 20 19 15     11   9 4
10134       01 U 11111 size L  M  m  opcode H  0 n d
10135       Decode fields are: u,size,opcode
10136       M is really part of the mm register number.  Individual
10137       cases need to inspect L and H though.
10138    */
10139 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
10140    if (INSN(31,30) != BITS2(0,1)
10141        || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) !=0) {
10142       return False;
10143    }
10144    UInt bitU   = INSN(29,29);
10145    UInt size   = INSN(23,22);
10146    UInt bitL   = INSN(21,21);
10147    UInt bitM   = INSN(20,20);
10148    UInt mmLO4  = INSN(19,16);
10149    UInt opcode = INSN(15,12);
10150    UInt bitH   = INSN(11,11);
10151    UInt nn     = INSN(9,5);
10152    UInt dd     = INSN(4,0);
10153    vassert(size < 4);
10154    vassert(bitH < 2 && bitM < 2 && bitL < 2);
10155 
10156    if (bitU == 0 && size >= X10
10157        && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
10158       /* -------- 0,1x,0001 FMLA d_d_d[], s_s_s[] -------- */
10159       /* -------- 0,1x,0101 FMLS d_d_d[], s_s_s[] -------- */
10160       Bool isD   = (size & 1) == 1;
10161       Bool isSUB = opcode == BITS4(0,1,0,1);
10162       UInt index;
10163       if      (!isD)             index = (bitH << 1) | bitL;
10164       else if (isD && bitL == 0) index = bitH;
10165       else return False; // sz:L == x11 => unallocated encoding
10166       vassert(index < (isD ? 2 : 4));
10167       IRType ity   = isD ? Ity_F64 : Ity_F32;
10168       IRTemp elem  = newTemp(ity);
10169       UInt   mm    = (bitM << 4) | mmLO4;
10170       assign(elem, getQRegLane(mm, index, ity));
10171       IRTemp dupd  = math_DUP_TO_V128(elem, ity);
10172       IROp   opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
10173       IROp   opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
10174       IROp   opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10175       IRTemp rm    = mk_get_IR_rounding_mode();
10176       IRTemp t1    = newTempV128();
10177       IRTemp t2    = newTempV128();
10178       // FIXME: double rounding; use FMA primops instead
10179       assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10180       assign(t2, triop(isSUB ? opSUB : opADD,
10181                        mkexpr(rm), getQReg128(dd), mkexpr(t1)));
10182       putQReg128(dd,
10183                  mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10184                                                          mkexpr(t2))));
10185       const HChar c = isD ? 'd' : 's';
10186       DIP("%s %c%u, %c%u, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
10187           c, dd, c, nn, nameQReg128(mm), c, index);
10188       return True;
10189    }
10190 
10191    if (size >= X10 && opcode == BITS4(1,0,0,1)) {
10192       /* -------- 0,1x,1001 FMUL  d_d_d[], s_s_s[] -------- */
10193       /* -------- 1,1x,1001 FMULX d_d_d[], s_s_s[] -------- */
10194       Bool isD    = (size & 1) == 1;
10195       Bool isMULX = bitU == 1;
10196       UInt index;
10197       if      (!isD)             index = (bitH << 1) | bitL;
10198       else if (isD && bitL == 0) index = bitH;
10199       else return False; // sz:L == x11 => unallocated encoding
10200       vassert(index < (isD ? 2 : 4));
10201       IRType ity   = isD ? Ity_F64 : Ity_F32;
10202       IRTemp elem  = newTemp(ity);
10203       UInt   mm    = (bitM << 4) | mmLO4;
10204       assign(elem, getQRegLane(mm, index, ity));
10205       IRTemp dupd  = math_DUP_TO_V128(elem, ity);
10206       IROp   opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10207       IRTemp rm    = mk_get_IR_rounding_mode();
10208       IRTemp t1    = newTempV128();
10209       // KLUDGE: FMULX is treated the same way as FMUL.  That can't be right.
10210       assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10211       putQReg128(dd,
10212                  mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10213                                                          mkexpr(t1))));
10214       const HChar c = isD ? 'd' : 's';
10215       DIP("%s %c%u, %c%u, %s.%c[%u]\n", isMULX ? "fmulx" : "fmul",
10216           c, dd, c, nn, nameQReg128(mm), c, index);
10217       return True;
10218    }
10219 
10220    if (bitU == 0
10221        && (opcode == BITS4(1,0,1,1)
10222            || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
10223       /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
10224       /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
10225       /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
10226       /* Widens, and size refers to the narrowed lanes. */
10227       UInt ks = 3;
10228       switch (opcode) {
10229          case BITS4(1,0,1,1): ks = 0; break;
10230          case BITS4(0,0,1,1): ks = 1; break;
10231          case BITS4(0,1,1,1): ks = 2; break;
10232          default: vassert(0);
10233       }
10234       vassert(ks >= 0 && ks <= 2);
10235       UInt mm  = 32; // invalid
10236       UInt ix  = 16; // invalid
10237       switch (size) {
10238          case X00:
10239             return False; // h_b_b[] case is not allowed
10240          case X01:
10241             mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10242          case X10:
10243             mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10244          case X11:
10245             return False; // q_d_d[] case is not allowed
10246          default:
10247             vassert(0);
10248       }
10249       vassert(mm < 32 && ix < 16);
10250       IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
10251       vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
10252       newTempsV128_2(&vecN, &vecD);
10253       assign(vecN, getQReg128(nn));
10254       IRTemp vecM  = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10255       assign(vecD, getQReg128(dd));
10256       math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
10257                        False/*!is2*/, size, "mas"[ks],
10258                        vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
10259       IROp opZHI = mkVecZEROHIxxOFV128(size+1);
10260       putQReg128(dd, unop(opZHI, mkexpr(res)));
10261       vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
10262       updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10263       if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
10264          updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
10265       }
10266       const HChar* nm        = ks == 0 ? "sqmull"
10267                                        : (ks == 1 ? "sqdmlal" : "sqdmlsl");
10268       const HChar  arrNarrow = "bhsd"[size];
10269       const HChar  arrWide   = "bhsd"[size+1];
10270       DIP("%s %c%d, %c%d, v%d.%c[%u]\n",
10271           nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix);
10272       return True;
10273    }
10274 
10275    if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
10276       /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
10277       /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
10278       UInt mm  = 32; // invalid
10279       UInt ix  = 16; // invalid
10280       switch (size) {
10281          case X00:
10282             return False; // b case is not allowed
10283          case X01:
10284             mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10285          case X10:
10286             mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10287          case X11:
10288             return False; // q case is not allowed
10289          default:
10290             vassert(0);
10291       }
10292       vassert(mm < 32 && ix < 16);
10293       Bool isR = opcode == BITS4(1,1,0,1);
10294       IRTemp res, sat1q, sat1n, vN, vM;
10295       res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
10296       vN = newTempV128();
10297       assign(vN, getQReg128(nn));
10298       vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10299       math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
10300       IROp opZHI = mkVecZEROHIxxOFV128(size);
10301       putQReg128(dd, unop(opZHI, mkexpr(res)));
10302       updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10303       const HChar* nm  = isR ? "sqrdmulh" : "sqdmulh";
10304       HChar ch         = size == X01 ? 'h' : 's';
10305       DIP("%s %c%d, %c%d, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, dd, ix);
10306       return True;
10307    }
10308 
10309    return False;
10310 #  undef INSN
10311 }
10312 
10313 
10314 static
dis_AdvSIMD_shift_by_immediate(DisResult * dres,UInt insn)10315 Bool dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
10316 {
10317    /* 31    28     22   18   15     10 9 4
10318       0 q u 011110 immh immb opcode 1  n d
10319       Decode fields: u,opcode
10320    */
10321 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
10322    if (INSN(31,31) != 0
10323        || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) {
10324       return False;
10325    }
10326    UInt bitQ   = INSN(30,30);
10327    UInt bitU   = INSN(29,29);
10328    UInt immh   = INSN(22,19);
10329    UInt immb   = INSN(18,16);
10330    UInt opcode = INSN(15,11);
10331    UInt nn     = INSN(9,5);
10332    UInt dd     = INSN(4,0);
10333 
10334    if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0)) {
10335       /* -------- 0,00000 SSHR std7_std7_#imm -------- */
10336       /* -------- 1,00000 USHR std7_std7_#imm -------- */
10337       /* -------- 0,00010 SSRA std7_std7_#imm -------- */
10338       /* -------- 1,00010 USRA std7_std7_#imm -------- */
10339       /* laneTy, shift = case immh:immb of
10340                          0001:xxx -> B, SHR:8-xxx
10341                          001x:xxx -> H, SHR:16-xxxx
10342                          01xx:xxx -> S, SHR:32-xxxxx
10343                          1xxx:xxx -> D, SHR:64-xxxxxx
10344                          other    -> invalid
10345       */
10346       UInt size  = 0;
10347       UInt shift = 0;
10348       Bool isQ   = bitQ == 1;
10349       Bool isU   = bitU == 1;
10350       Bool isAcc = opcode == BITS5(0,0,0,1,0);
10351       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10352       if (!ok || (bitQ == 0 && size == X11)) return False;
10353       vassert(size >= 0 && size <= 3);
10354       UInt lanebits = 8 << size;
10355       vassert(shift >= 1 && shift <= lanebits);
10356       IROp    op  = isU ? mkVecSHRN(size) : mkVecSARN(size);
10357       IRExpr* src = getQReg128(nn);
10358       IRTemp  shf = newTempV128();
10359       IRTemp  res = newTempV128();
10360       if (shift == lanebits && isU) {
10361          assign(shf, mkV128(0x0000));
10362       } else {
10363          UInt nudge = 0;
10364          if (shift == lanebits) {
10365             vassert(!isU);
10366             nudge = 1;
10367          }
10368          assign(shf, binop(op, src, mkU8(shift - nudge)));
10369       }
10370       assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
10371                         : mkexpr(shf));
10372       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10373       HChar laneCh = "bhsd"[size];
10374       UInt  nLanes = (isQ ? 128 : 64) / lanebits;
10375       const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
10376                               : (isU ? "ushr" : "sshr");
10377       DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10378           nameQReg128(dd), nLanes, laneCh,
10379           nameQReg128(nn), nLanes, laneCh, shift);
10380       return True;
10381    }
10382 
10383    if (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0)) {
10384       /* -------- 0,00100 SRSHR std7_std7_#imm -------- */
10385       /* -------- 1,00100 URSHR std7_std7_#imm -------- */
10386       /* -------- 0,00110 SRSRA std7_std7_#imm -------- */
10387       /* -------- 1,00110 URSRA std7_std7_#imm -------- */
10388       /* laneTy, shift = case immh:immb of
10389                          0001:xxx -> B, SHR:8-xxx
10390                          001x:xxx -> H, SHR:16-xxxx
10391                          01xx:xxx -> S, SHR:32-xxxxx
10392                          1xxx:xxx -> D, SHR:64-xxxxxx
10393                          other    -> invalid
10394       */
10395       UInt size  = 0;
10396       UInt shift = 0;
10397       Bool isQ   = bitQ == 1;
10398       Bool isU   = bitU == 1;
10399       Bool isAcc = opcode == BITS5(0,0,1,1,0);
10400       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10401       if (!ok || (bitQ == 0 && size == X11)) return False;
10402       vassert(size >= 0 && size <= 3);
10403       UInt lanebits = 8 << size;
10404       vassert(shift >= 1 && shift <= lanebits);
10405       IROp    op   = isU ? mkVecRSHU(size) : mkVecRSHS(size);
10406       IRExpr* src  = getQReg128(nn);
10407       IRTemp  imm8 = newTemp(Ity_I8);
10408       assign(imm8, mkU8((UChar)(-shift)));
10409       IRExpr* amt  = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
10410       IRTemp  shf  = newTempV128();
10411       IRTemp  res  = newTempV128();
10412       assign(shf, binop(op, src, amt));
10413       assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
10414                         : mkexpr(shf));
10415       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10416       HChar laneCh = "bhsd"[size];
10417       UInt  nLanes = (isQ ? 128 : 64) / lanebits;
10418       const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
10419                               : (isU ? "urshr" : "srshr");
10420       DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10421           nameQReg128(dd), nLanes, laneCh,
10422           nameQReg128(nn), nLanes, laneCh, shift);
10423       return True;
10424    }
10425 
10426    if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) {
10427       /* -------- 1,01000 SRI std7_std7_#imm -------- */
10428       /* laneTy, shift = case immh:immb of
10429                          0001:xxx -> B, SHR:8-xxx
10430                          001x:xxx -> H, SHR:16-xxxx
10431                          01xx:xxx -> S, SHR:32-xxxxx
10432                          1xxx:xxx -> D, SHR:64-xxxxxx
10433                          other    -> invalid
10434       */
10435       UInt size  = 0;
10436       UInt shift = 0;
10437       Bool isQ   = bitQ == 1;
10438       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10439       if (!ok || (bitQ == 0 && size == X11)) return False;
10440       vassert(size >= 0 && size <= 3);
10441       UInt lanebits = 8 << size;
10442       vassert(shift >= 1 && shift <= lanebits);
10443       IRExpr* src = getQReg128(nn);
10444       IRTemp  res = newTempV128();
10445       if (shift == lanebits) {
10446          assign(res, getQReg128(dd));
10447       } else {
10448          assign(res, binop(mkVecSHRN(size), src, mkU8(shift)));
10449          IRExpr* nmask = binop(mkVecSHLN(size),
10450                                mkV128(0xFFFF), mkU8(lanebits - shift));
10451          IRTemp  tmp   = newTempV128();
10452          assign(tmp, binop(Iop_OrV128,
10453                            mkexpr(res),
10454                            binop(Iop_AndV128, getQReg128(dd), nmask)));
10455          res = tmp;
10456       }
10457       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10458       HChar laneCh = "bhsd"[size];
10459       UInt  nLanes = (isQ ? 128 : 64) / lanebits;
10460       DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
10461           nameQReg128(dd), nLanes, laneCh,
10462           nameQReg128(nn), nLanes, laneCh, shift);
10463       return True;
10464    }
10465 
10466    if (opcode == BITS5(0,1,0,1,0)) {
10467       /* -------- 0,01010 SHL std7_std7_#imm -------- */
10468       /* -------- 1,01010 SLI std7_std7_#imm -------- */
10469       /* laneTy, shift = case immh:immb of
10470                          0001:xxx -> B, xxx
10471                          001x:xxx -> H, xxxx
10472                          01xx:xxx -> S, xxxxx
10473                          1xxx:xxx -> D, xxxxxx
10474                          other    -> invalid
10475       */
10476       UInt size  = 0;
10477       UInt shift = 0;
10478       Bool isSLI = bitU == 1;
10479       Bool isQ   = bitQ == 1;
10480       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10481       if (!ok || (bitQ == 0 && size == X11)) return False;
10482       vassert(size >= 0 && size <= 3);
10483       /* The shift encoding has opposite sign for the leftwards case.
10484          Adjust shift to compensate. */
10485       UInt lanebits = 8 << size;
10486       shift = lanebits - shift;
10487       vassert(shift >= 0 && shift < lanebits);
10488       IROp    op  = mkVecSHLN(size);
10489       IRExpr* src = getQReg128(nn);
10490       IRTemp  res = newTempV128();
10491       if (shift == 0) {
10492          assign(res, src);
10493       } else {
10494          assign(res, binop(op, src, mkU8(shift)));
10495          if (isSLI) {
10496             IRExpr* nmask = binop(mkVecSHRN(size),
10497                                   mkV128(0xFFFF), mkU8(lanebits - shift));
10498             IRTemp  tmp   = newTempV128();
10499             assign(tmp, binop(Iop_OrV128,
10500                               mkexpr(res),
10501                               binop(Iop_AndV128, getQReg128(dd), nmask)));
10502             res = tmp;
10503          }
10504       }
10505       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10506       HChar laneCh = "bhsd"[size];
10507       UInt  nLanes = (isQ ? 128 : 64) / lanebits;
10508       const HChar* nm = isSLI ? "sli" : "shl";
10509       DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10510           nameQReg128(dd), nLanes, laneCh,
10511           nameQReg128(nn), nLanes, laneCh, shift);
10512       return True;
10513    }
10514 
10515    if (opcode == BITS5(0,1,1,1,0)
10516        || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
10517       /* -------- 0,01110  SQSHL  std7_std7_#imm -------- */
10518       /* -------- 1,01110  UQSHL  std7_std7_#imm -------- */
10519       /* -------- 1,01100  SQSHLU std7_std7_#imm -------- */
10520       UInt size  = 0;
10521       UInt shift = 0;
10522       Bool isQ   = bitQ == 1;
10523       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10524       if (!ok || (bitQ == 0 && size == X11)) return False;
10525       vassert(size >= 0 && size <= 3);
10526       /* The shift encoding has opposite sign for the leftwards case.
10527          Adjust shift to compensate. */
10528       UInt lanebits = 8 << size;
10529       shift = lanebits - shift;
10530       vassert(shift >= 0 && shift < lanebits);
10531       const HChar* nm = NULL;
10532       /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
10533       else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
10534       else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
10535       else vassert(0);
10536       IRTemp qDiff1 = IRTemp_INVALID;
10537       IRTemp qDiff2 = IRTemp_INVALID;
10538       IRTemp res = IRTemp_INVALID;
10539       IRTemp src = newTempV128();
10540       assign(src, getQReg128(nn));
10541       math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
10542       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10543       updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2,
10544                                     isQ ? Iop_INVALID : Iop_ZeroHI64ofV128);
10545       const HChar* arr = nameArr_Q_SZ(bitQ, size);
10546       DIP("%s %s.%s, %s.%s, #%u\n", nm,
10547           nameQReg128(dd), arr, nameQReg128(nn), arr, shift);
10548       return True;
10549    }
10550 
10551    if (bitU == 0
10552        && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
10553       /* -------- 0,10000  SHRN{,2} #imm -------- */
10554       /* -------- 0,10001 RSHRN{,2} #imm -------- */
10555       /* Narrows, and size is the narrow size. */
10556       UInt size  = 0;
10557       UInt shift = 0;
10558       Bool is2   = bitQ == 1;
10559       Bool isR   = opcode == BITS5(1,0,0,0,1);
10560       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10561       if (!ok || size == X11) return False;
10562       vassert(shift >= 1);
10563       IRTemp t1 = newTempV128();
10564       IRTemp t2 = newTempV128();
10565       IRTemp t3 = newTempV128();
10566       assign(t1, getQReg128(nn));
10567       assign(t2, isR ? binop(mkVecADD(size+1),
10568                              mkexpr(t1),
10569                              mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1))))
10570                      : mkexpr(t1));
10571       assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift)));
10572       IRTemp t4 = math_NARROW_LANES(t3, t3, size);
10573       putLO64andZUorPutHI64(is2, dd, t4);
10574       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10575       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
10576       DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn",
10577           nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
10578       return True;
10579    }
10580 
10581    if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
10582        || (bitU == 1
10583            && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
10584       /* -------- 0,10010   SQSHRN{,2} #imm -------- */
10585       /* -------- 1,10010   UQSHRN{,2} #imm -------- */
10586       /* -------- 0,10011  SQRSHRN{,2} #imm -------- */
10587       /* -------- 1,10011  UQRSHRN{,2} #imm -------- */
10588       /* -------- 1,10000  SQSHRUN{,2} #imm -------- */
10589       /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
10590       UInt size  = 0;
10591       UInt shift = 0;
10592       Bool is2   = bitQ == 1;
10593       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10594       if (!ok || size == X11) return False;
10595       vassert(shift >= 1 && shift <= (8 << size));
10596       const HChar* nm = "??";
10597       IROp op = Iop_INVALID;
10598       /* Decide on the name and the operation. */
10599       /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
10600          nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
10601       }
10602       else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
10603          nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
10604       }
10605       else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
10606          nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
10607       }
10608       else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
10609          nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
10610       }
10611       else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
10612          nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
10613       }
10614       else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
10615          nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
10616       }
10617       else vassert(0);
10618       /* Compute the result (Q, shifted value) pair. */
10619       IRTemp src128 = newTempV128();
10620       assign(src128, getQReg128(nn));
10621       IRTemp pair = newTempV128();
10622       assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
10623       /* Update the result reg */
10624       IRTemp res64in128 = newTempV128();
10625       assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
10626       putLO64andZUorPutHI64(is2, dd, res64in128);
10627       /* Update the Q flag. */
10628       IRTemp q64q64 = newTempV128();
10629       assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
10630       IRTemp z128 = newTempV128();
10631       assign(z128, mkV128(0x0000));
10632       updateQCFLAGwithDifference(q64q64, z128);
10633       /* */
10634       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10635       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
10636       DIP("%s %s.%s, %s.%s, #%u\n", nm,
10637           nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
10638       return True;
10639    }
10640 
10641    if (opcode == BITS5(1,0,1,0,0)) {
10642       /* -------- 0,10100 SSHLL{,2} #imm -------- */
10643       /* -------- 1,10100 USHLL{,2} #imm -------- */
10644       /* 31  28     22   18   15     9 4
10645          0q0 011110 immh immb 101001 n d  SSHLL Vd.Ta, Vn.Tb, #sh
10646          0q1 011110 immh immb 101001 n d  USHLL Vd.Ta, Vn.Tb, #sh
10647          where Ta,Tb,sh
10648            = case immh of 1xxx -> invalid
10649                           01xx -> 2d, 2s(q0)/4s(q1),  immh:immb - 32 (0..31)
10650                           001x -> 4s, 4h(q0)/8h(q1),  immh:immb - 16 (0..15)
10651                           0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8  (0..7)
10652                           0000 -> AdvSIMD modified immediate (???)
10653       */
10654       Bool    isQ   = bitQ == 1;
10655       Bool    isU   = bitU == 1;
10656       UInt    immhb = (immh << 3) | immb;
10657       IRTemp  src   = newTempV128();
10658       IRTemp  zero  = newTempV128();
10659       IRExpr* res   = NULL;
10660       UInt    sh    = 0;
10661       const HChar* ta = "??";
10662       const HChar* tb = "??";
10663       assign(src, getQReg128(nn));
10664       assign(zero, mkV128(0x0000));
10665       if (immh & 8) {
10666          /* invalid; don't assign to res */
10667       }
10668       else if (immh & 4) {
10669          sh = immhb - 32;
10670          vassert(sh < 32); /* so 32-sh is 1..32 */
10671          ta = "2d";
10672          tb = isQ ? "4s" : "2s";
10673          IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
10674                            : mk_InterleaveLO32x4(src, zero);
10675          res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
10676       }
10677       else if (immh & 2) {
10678          sh = immhb - 16;
10679          vassert(sh < 16); /* so 16-sh is 1..16 */
10680          ta = "4s";
10681          tb = isQ ? "8h" : "4h";
10682          IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
10683                            : mk_InterleaveLO16x8(src, zero);
10684          res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
10685       }
10686       else if (immh & 1) {
10687          sh = immhb - 8;
10688          vassert(sh < 8); /* so 8-sh is 1..8 */
10689          ta = "8h";
10690          tb = isQ ? "16b" : "8b";
10691          IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
10692                            : mk_InterleaveLO8x16(src, zero);
10693          res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
10694       } else {
10695          vassert(immh == 0);
10696          /* invalid; don't assign to res */
10697       }
10698       /* */
10699       if (res) {
10700          putQReg128(dd, res);
10701          DIP("%cshll%s %s.%s, %s.%s, #%d\n",
10702              isU ? 'u' : 's', isQ ? "2" : "",
10703              nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
10704          return True;
10705       }
10706       return False;
10707    }
10708 
10709    if (opcode == BITS5(1,1,1,0,0)) {
10710       /* -------- 0,11100 SCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
10711       /* -------- 1,11100 UCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
10712       /* If immh is of the form 00xx, the insn is invalid. */
10713       if (immh < BITS4(0,1,0,0)) return False;
10714       UInt size  = 0;
10715       UInt fbits = 0;
10716       Bool ok    = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
10717       /* The following holds because immh is never zero. */
10718       vassert(ok);
10719       /* The following holds because immh >= 0100. */
10720       vassert(size == X10 || size == X11);
10721       Bool isD = size == X11;
10722       Bool isU = bitU == 1;
10723       Bool isQ = bitQ == 1;
10724       if (isD && !isQ) return False; /* reject .1d case */
10725       vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
10726       Double  scale  = two_to_the_minus(fbits);
10727       IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
10728                            : IRExpr_Const(IRConst_F32( (Float)scale ));
10729       IROp    opMUL  = isD ? Iop_MulF64 : Iop_MulF32;
10730       IROp    opCVT  = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
10731                            : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
10732       IRType tyF = isD ? Ity_F64 : Ity_F32;
10733       IRType tyI = isD ? Ity_I64 : Ity_I32;
10734       UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
10735       vassert(nLanes == 2 || nLanes == 4);
10736       for (UInt i = 0; i < nLanes; i++) {
10737          IRTemp src = newTemp(tyI);
10738          IRTemp res = newTemp(tyF);
10739          IRTemp rm  = mk_get_IR_rounding_mode();
10740          assign(src, getQRegLane(nn, i, tyI));
10741          assign(res, triop(opMUL, mkexpr(rm),
10742                                   binop(opCVT, mkexpr(rm), mkexpr(src)),
10743                                   scaleE));
10744          putQRegLane(dd, i, mkexpr(res));
10745       }
10746       if (!isQ) {
10747          putQRegLane(dd, 1, mkU64(0));
10748       }
10749       const HChar* arr = nameArr_Q_SZ(bitQ, size);
10750       DIP("%s %s.%s, %s.%s, #%u\n", isU ? "ucvtf" : "scvtf",
10751           nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
10752       return True;
10753    }
10754 
10755    if (opcode == BITS5(1,1,1,1,1)) {
10756       /* -------- 0,11111 FCVTZS {2d_2d,4s_4s,2s_2s}_imm -------- */
10757       /* -------- 1,11111 FCVTZU {2d_2d,4s_4s,2s_2s}_imm -------- */
10758       /* If immh is of the form 00xx, the insn is invalid. */
10759       if (immh < BITS4(0,1,0,0)) return False;
10760       UInt size  = 0;
10761       UInt fbits = 0;
10762       Bool ok    = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
10763       /* The following holds because immh is never zero. */
10764       vassert(ok);
10765       /* The following holds because immh >= 0100. */
10766       vassert(size == X10 || size == X11);
10767       Bool isD = size == X11;
10768       Bool isU = bitU == 1;
10769       Bool isQ = bitQ == 1;
10770       if (isD && !isQ) return False; /* reject .1d case */
10771       vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
10772       Double  scale  = two_to_the_plus(fbits);
10773       IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
10774                            : IRExpr_Const(IRConst_F32( (Float)scale ));
10775       IROp    opMUL  = isD ? Iop_MulF64 : Iop_MulF32;
10776       IROp    opCVT  = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
10777                            : (isD ? Iop_F64toI64S : Iop_F32toI32S);
10778       IRType tyF = isD ? Ity_F64 : Ity_F32;
10779       IRType tyI = isD ? Ity_I64 : Ity_I32;
10780       UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
10781       vassert(nLanes == 2 || nLanes == 4);
10782       for (UInt i = 0; i < nLanes; i++) {
10783          IRTemp src = newTemp(tyF);
10784          IRTemp res = newTemp(tyI);
10785          IRTemp rm  = newTemp(Ity_I32);
10786          assign(src, getQRegLane(nn, i, tyF));
10787          assign(rm,  mkU32(Irrm_ZERO));
10788          assign(res, binop(opCVT, mkexpr(rm),
10789                                   triop(opMUL, mkexpr(rm),
10790                                                mkexpr(src), scaleE)));
10791          putQRegLane(dd, i, mkexpr(res));
10792       }
10793       if (!isQ) {
10794          putQRegLane(dd, 1, mkU64(0));
10795       }
10796       const HChar* arr = nameArr_Q_SZ(bitQ, size);
10797       DIP("%s %s.%s, %s.%s, #%u\n", isU ? "fcvtzu" : "fcvtzs",
10798           nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
10799       return True;
10800    }
10801 
10802 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
10803    return False;
10804 #  undef INSN
10805 }
10806 
10807 
10808 static
dis_AdvSIMD_three_different(DisResult * dres,UInt insn)10809 Bool dis_AdvSIMD_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
10810 {
10811    /* 31 30 29 28    23   21 20 15     11 9 4
10812       0  Q  U  01110 size 1  m  opcode 00 n d
10813       Decode fields: u,opcode
10814    */
10815 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
10816    if (INSN(31,31) != 0
10817        || INSN(28,24) != BITS5(0,1,1,1,0)
10818        || INSN(21,21) != 1
10819        || INSN(11,10) != BITS2(0,0)) {
10820       return False;
10821    }
10822    UInt bitQ   = INSN(30,30);
10823    UInt bitU   = INSN(29,29);
10824    UInt size   = INSN(23,22);
10825    UInt mm     = INSN(20,16);
10826    UInt opcode = INSN(15,12);
10827    UInt nn     = INSN(9,5);
10828    UInt dd     = INSN(4,0);
10829    vassert(size < 4);
10830    Bool is2    = bitQ == 1;
10831 
10832    if (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,1,0)) {
10833       /* -------- 0,0000 SADDL{2} -------- */
10834       /* -------- 1,0000 UADDL{2} -------- */
10835       /* -------- 0,0010 SSUBL{2} -------- */
10836       /* -------- 1,0010 USUBL{2} -------- */
10837       /* Widens, and size refers to the narrowed lanes. */
10838       if (size == X11) return False;
10839       vassert(size <= 2);
10840       Bool   isU   = bitU == 1;
10841       Bool   isADD = opcode == BITS4(0,0,0,0);
10842       IRTemp argL  = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
10843       IRTemp argR  = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
10844       IRTemp res   = newTempV128();
10845       assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
10846                         mkexpr(argL), mkexpr(argR)));
10847       putQReg128(dd, mkexpr(res));
10848       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10849       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
10850       const HChar* nm        = isADD ? (isU ? "uaddl" : "saddl")
10851                                      : (isU ? "usubl" : "ssubl");
10852       DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10853           nameQReg128(dd), arrWide,
10854           nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
10855       return True;
10856    }
10857 
10858    if (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,0,1,1)) {
10859       /* -------- 0,0001 SADDW{2} -------- */
10860       /* -------- 1,0001 UADDW{2} -------- */
10861       /* -------- 0,0011 SSUBW{2} -------- */
10862       /* -------- 1,0011 USUBW{2} -------- */
10863       /* Widens, and size refers to the narrowed lanes. */
10864       if (size == X11) return False;
10865       vassert(size <= 2);
10866       Bool   isU   = bitU == 1;
10867       Bool   isADD = opcode == BITS4(0,0,0,1);
10868       IRTemp argR  = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
10869       IRTemp res   = newTempV128();
10870       assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
10871                         getQReg128(nn), mkexpr(argR)));
10872       putQReg128(dd, mkexpr(res));
10873       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10874       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
10875       const HChar* nm        = isADD ? (isU ? "uaddw" : "saddw")
10876                                      : (isU ? "usubw" : "ssubw");
10877       DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10878           nameQReg128(dd), arrWide,
10879           nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow);
10880       return True;
10881    }
10882 
10883    if (opcode == BITS4(0,1,0,0) || opcode == BITS4(0,1,1,0)) {
10884       /* -------- 0,0100  ADDHN{2} -------- */
10885       /* -------- 1,0100 RADDHN{2} -------- */
10886       /* -------- 0,0110  SUBHN{2} -------- */
10887       /* -------- 1,0110 RSUBHN{2} -------- */
10888       /* Narrows, and size refers to the narrowed lanes. */
10889       if (size == X11) return False;
10890       vassert(size <= 2);
10891       const UInt shift[3] = { 8, 16, 32 };
10892       Bool isADD = opcode == BITS4(0,1,0,0);
10893       Bool isR   = bitU == 1;
10894       /* Combined elements in wide lanes */
10895       IRTemp  wide  = newTempV128();
10896       IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
10897                             getQReg128(nn), getQReg128(mm));
10898       if (isR) {
10899          wideE = binop(mkVecADD(size+1),
10900                        wideE,
10901                        mkexpr(math_VEC_DUP_IMM(size+1,
10902                                                1ULL << (shift[size]-1))));
10903       }
10904       assign(wide, wideE);
10905       /* Top halves of elements, still in wide lanes */
10906       IRTemp shrd = newTempV128();
10907       assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size])));
10908       /* Elements now compacted into lower 64 bits */
10909       IRTemp new64 = newTempV128();
10910       assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd)));
10911       putLO64andZUorPutHI64(is2, dd, new64);
10912       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10913       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
10914       const HChar* nm = isADD ? (isR ? "raddhn" : "addhn")
10915                               : (isR ? "rsubhn" : "subhn");
10916       DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10917           nameQReg128(dd), arrNarrow,
10918           nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
10919       return True;
10920    }
10921 
10922    if (opcode == BITS4(0,1,0,1) || opcode == BITS4(0,1,1,1)) {
10923       /* -------- 0,0101 SABAL{2} -------- */
10924       /* -------- 1,0101 UABAL{2} -------- */
10925       /* -------- 0,0111 SABDL{2} -------- */
10926       /* -------- 1,0111 UABDL{2} -------- */
10927       /* Widens, and size refers to the narrowed lanes. */
10928       if (size == X11) return False;
10929       vassert(size <= 2);
10930       Bool   isU   = bitU == 1;
10931       Bool   isACC = opcode == BITS4(0,1,0,1);
10932       IRTemp argL  = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
10933       IRTemp argR  = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
10934       IRTemp abd   = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR));
10935       IRTemp res   = newTempV128();
10936       assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd))
10937                         : mkexpr(abd));
10938       putQReg128(dd, mkexpr(res));
10939       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10940       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
10941       const HChar* nm        = isACC ? (isU ? "uabal" : "sabal")
10942                                      : (isU ? "uabdl" : "sabdl");
10943       DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10944           nameQReg128(dd), arrWide,
10945           nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
10946       return True;
10947    }
10948 
10949    if (opcode == BITS4(1,1,0,0)
10950        || opcode == BITS4(1,0,0,0) || opcode == BITS4(1,0,1,0)) {
10951       /* -------- 0,1100  SMULL{2} -------- */ // 0 (ks)
10952       /* -------- 1,1100  UMULL{2} -------- */ // 0
10953       /* -------- 0,1000  SMLAL{2} -------- */ // 1
10954       /* -------- 1,1000  UMLAL{2} -------- */ // 1
10955       /* -------- 0,1010  SMLSL{2} -------- */ // 2
10956       /* -------- 1,1010  UMLSL{2} -------- */ // 2
10957       /* Widens, and size refers to the narrowed lanes. */
10958       UInt ks = 3;
10959       switch (opcode) {
10960          case BITS4(1,1,0,0): ks = 0; break;
10961          case BITS4(1,0,0,0): ks = 1; break;
10962          case BITS4(1,0,1,0): ks = 2; break;
10963          default: vassert(0);
10964       }
10965       vassert(ks >= 0 && ks <= 2);
10966       if (size == X11) return False;
10967       vassert(size <= 2);
10968       Bool   isU  = bitU == 1;
10969       IRTemp vecN = newTempV128();
10970       IRTemp vecM = newTempV128();
10971       IRTemp vecD = newTempV128();
10972       assign(vecN, getQReg128(nn));
10973       assign(vecM, getQReg128(mm));
10974       assign(vecD, getQReg128(dd));
10975       IRTemp res = IRTemp_INVALID;
10976       math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
10977                     vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
10978       putQReg128(dd, mkexpr(res));
10979       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10980       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
10981       const HChar* nm        = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
10982       DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "",
10983           nameQReg128(dd), arrWide,
10984           nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
10985       return True;
10986    }
10987 
10988    if (bitU == 0
10989        && (opcode == BITS4(1,1,0,1)
10990            || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
10991       /* -------- 0,1101  SQDMULL{2} -------- */ // 0 (ks)
10992       /* -------- 0,1001  SQDMLAL{2} -------- */ // 1
10993       /* -------- 0,1011  SQDMLSL{2} -------- */ // 2
10994       /* Widens, and size refers to the narrowed lanes. */
10995       UInt ks = 3;
10996       switch (opcode) {
10997          case BITS4(1,1,0,1): ks = 0; break;
10998          case BITS4(1,0,0,1): ks = 1; break;
10999          case BITS4(1,0,1,1): ks = 2; break;
11000          default: vassert(0);
11001       }
11002       vassert(ks >= 0 && ks <= 2);
11003       if (size == X00 || size == X11) return False;
11004       vassert(size <= 2);
11005       IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
11006       vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
11007       newTempsV128_3(&vecN, &vecM, &vecD);
11008       assign(vecN, getQReg128(nn));
11009       assign(vecM, getQReg128(mm));
11010       assign(vecD, getQReg128(dd));
11011       math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
11012                        is2, size, "mas"[ks],
11013                        vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
11014       putQReg128(dd, mkexpr(res));
11015       vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
11016       updateQCFLAGwithDifference(sat1q, sat1n);
11017       if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
11018          updateQCFLAGwithDifference(sat2q, sat2n);
11019       }
11020       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11021       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
11022       const HChar* nm        = ks == 0 ? "sqdmull"
11023                                        : (ks == 1 ? "sqdmlal" : "sqdmlsl");
11024       DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11025           nameQReg128(dd), arrWide,
11026           nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11027       return True;
11028    }
11029 
11030    if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
11031       /* -------- 0,1110  PMULL{2} -------- */
11032       /* Widens, and size refers to the narrowed lanes. */
11033       if (size != X00) return False;
11034       IRTemp res
11035          = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
11036                                      getQReg128(nn), getQReg128(mm));
11037       putQReg128(dd, mkexpr(res));
11038       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11039       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
11040       DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
11041           nameQReg128(dd), arrNarrow,
11042           nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
11043       return True;
11044    }
11045 
11046    return False;
11047 #  undef INSN
11048 }
11049 
11050 
11051 static
dis_AdvSIMD_three_same(DisResult * dres,UInt insn)11052 Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
11053 {
11054    /* 31 30 29 28    23   21 20 15     10 9 4
11055       0  Q  U  01110 size 1  m  opcode 1  n d
11056       Decode fields: u,size,opcode
11057    */
11058 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
11059    if (INSN(31,31) != 0
11060        || INSN(28,24) != BITS5(0,1,1,1,0)
11061        || INSN(21,21) != 1
11062        || INSN(10,10) != 1) {
11063       return False;
11064    }
11065    UInt bitQ   = INSN(30,30);
11066    UInt bitU   = INSN(29,29);
11067    UInt size   = INSN(23,22);
11068    UInt mm     = INSN(20,16);
11069    UInt opcode = INSN(15,11);
11070    UInt nn     = INSN(9,5);
11071    UInt dd     = INSN(4,0);
11072    vassert(size < 4);
11073 
11074    if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,1,0,0)) {
11075       /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
11076       /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
11077       /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
11078       /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
11079       if (size == X11) return False;
11080       Bool isADD = opcode == BITS5(0,0,0,0,0);
11081       Bool isU   = bitU == 1;
11082       /* Widen both args out, do the math, narrow to final result. */
11083       IRTemp argL   = newTempV128();
11084       IRTemp argLhi = IRTemp_INVALID;
11085       IRTemp argLlo = IRTemp_INVALID;
11086       IRTemp argR   = newTempV128();
11087       IRTemp argRhi = IRTemp_INVALID;
11088       IRTemp argRlo = IRTemp_INVALID;
11089       IRTemp resHi  = newTempV128();
11090       IRTemp resLo  = newTempV128();
11091       IRTemp res    = IRTemp_INVALID;
11092       assign(argL, getQReg128(nn));
11093       argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL));
11094       argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True,  size, mkexpr(argL));
11095       assign(argR, getQReg128(mm));
11096       argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR));
11097       argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True,  size, mkexpr(argR));
11098       IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1);
11099       IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1);
11100       assign(resHi, binop(opSxR,
11101                           binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)),
11102                           mkU8(1)));
11103       assign(resLo, binop(opSxR,
11104                           binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)),
11105                           mkU8(1)));
11106       res = math_NARROW_LANES ( resHi, resLo, size );
11107       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11108       const HChar* nm  = isADD ? (isU ? "uhadd" : "shadd")
11109                                : (isU ? "uhsub" : "shsub");
11110       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11111       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11112           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11113       return True;
11114    }
11115 
11116    if (opcode == BITS5(0,0,0,1,0)) {
11117       /* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */
11118       /* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */
11119       if (bitQ == 0 && size == X11) return False; // implied 1d case
11120       Bool   isU  = bitU == 1;
11121       IRTemp argL = newTempV128();
11122       IRTemp argR = newTempV128();
11123       assign(argL, getQReg128(nn));
11124       assign(argR, getQReg128(mm));
11125       IRTemp res = math_RHADD(size, isU, argL, argR);
11126       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11127       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11128       DIP("%s %s.%s, %s.%s, %s.%s\n", isU ? "urhadd" : "srhadd",
11129           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11130       return True;
11131    }
11132 
11133    if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
11134       /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
11135       /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
11136       /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
11137       /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
11138       if (bitQ == 0 && size == X11) return False; // implied 1d case
11139       Bool isADD = opcode == BITS5(0,0,0,0,1);
11140       Bool isU   = bitU == 1;
11141       IROp qop   = Iop_INVALID;
11142       IROp nop   = Iop_INVALID;
11143       if (isADD) {
11144          qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
11145          nop = mkVecADD(size);
11146       } else {
11147          qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
11148          nop = mkVecSUB(size);
11149       }
11150       IRTemp argL = newTempV128();
11151       IRTemp argR = newTempV128();
11152       IRTemp qres = newTempV128();
11153       IRTemp nres = newTempV128();
11154       assign(argL, getQReg128(nn));
11155       assign(argR, getQReg128(mm));
11156       assign(qres, math_MAYBE_ZERO_HI64_fromE(
11157                       bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
11158       assign(nres, math_MAYBE_ZERO_HI64_fromE(
11159                       bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
11160       putQReg128(dd, mkexpr(qres));
11161       updateQCFLAGwithDifference(qres, nres);
11162       const HChar* nm  = isADD ? (isU ? "uqadd" : "sqadd")
11163                                : (isU ? "uqsub" : "sqsub");
11164       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11165       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11166           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11167       return True;
11168    }
11169 
11170    if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) {
11171       /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
11172       /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
11173       /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
11174       /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
11175       Bool   isORx  = (size & 2) == 2;
11176       Bool   invert = (size & 1) == 1;
11177       IRTemp res    = newTempV128();
11178       assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128,
11179                         getQReg128(nn),
11180                         invert ? unop(Iop_NotV128, getQReg128(mm))
11181                                : getQReg128(mm)));
11182       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11183       const HChar* names[4] = { "and", "bic", "orr", "orn" };
11184       const HChar* ar = bitQ == 1 ? "16b" : "8b";
11185       DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
11186           nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
11187       return True;
11188    }
11189 
11190    if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) {
11191       /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
11192       /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
11193       /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
11194       /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
11195       IRTemp argD = newTempV128();
11196       IRTemp argN = newTempV128();
11197       IRTemp argM = newTempV128();
11198       assign(argD, getQReg128(dd));
11199       assign(argN, getQReg128(nn));
11200       assign(argM, getQReg128(mm));
11201       const IROp opXOR = Iop_XorV128;
11202       const IROp opAND = Iop_AndV128;
11203       const IROp opNOT = Iop_NotV128;
11204       IRTemp res = newTempV128();
11205       switch (size) {
11206          case BITS2(0,0): /* EOR */
11207             assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN)));
11208             break;
11209          case BITS2(0,1): /* BSL */
11210             assign(res, binop(opXOR, mkexpr(argM),
11211                               binop(opAND,
11212                                     binop(opXOR, mkexpr(argM), mkexpr(argN)),
11213                                           mkexpr(argD))));
11214             break;
11215          case BITS2(1,0): /* BIT */
11216             assign(res, binop(opXOR, mkexpr(argD),
11217                               binop(opAND,
11218                                     binop(opXOR, mkexpr(argD), mkexpr(argN)),
11219                                     mkexpr(argM))));
11220             break;
11221          case BITS2(1,1): /* BIF */
11222             assign(res, binop(opXOR, mkexpr(argD),
11223                               binop(opAND,
11224                                     binop(opXOR, mkexpr(argD), mkexpr(argN)),
11225                                     unop(opNOT, mkexpr(argM)))));
11226             break;
11227          default:
11228             vassert(0);
11229       }
11230       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11231       const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
11232       const HChar* arr = bitQ == 1 ? "16b" : "8b";
11233       DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size],
11234           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11235       return True;
11236    }
11237 
11238    if (opcode == BITS5(0,0,1,1,0)) {
11239       /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
11240       /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
11241       if (bitQ == 0 && size == X11) return False; // implied 1d case
11242       Bool   isGT  = bitU == 0;
11243       IRExpr* argL = getQReg128(nn);
11244       IRExpr* argR = getQReg128(mm);
11245       IRTemp  res  = newTempV128();
11246       assign(res,
11247              isGT ? binop(mkVecCMPGTS(size), argL, argR)
11248                   : binop(mkVecCMPGTU(size), argL, argR));
11249       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11250       const HChar* nm  = isGT ? "cmgt" : "cmhi";
11251       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11252       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11253           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11254       return True;
11255    }
11256 
11257    if (opcode == BITS5(0,0,1,1,1)) {
11258       /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
11259       /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
11260       if (bitQ == 0 && size == X11) return False; // implied 1d case
11261       Bool    isGE = bitU == 0;
11262       IRExpr* argL = getQReg128(nn);
11263       IRExpr* argR = getQReg128(mm);
11264       IRTemp  res  = newTempV128();
11265       assign(res,
11266              isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL))
11267                   : unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL)));
11268       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11269       const HChar* nm  = isGE ? "cmge" : "cmhs";
11270       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11271       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11272           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11273       return True;
11274    }
11275 
11276    if (opcode == BITS5(0,1,0,0,0) || opcode == BITS5(0,1,0,1,0)) {
11277       /* -------- 0,xx,01000 SSHL  std7_std7_std7 -------- */
11278       /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
11279       /* -------- 1,xx,01000 USHL  std7_std7_std7 -------- */
11280       /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
11281       if (bitQ == 0 && size == X11) return False; // implied 1d case
11282       Bool isU = bitU == 1;
11283       Bool isR = opcode == BITS5(0,1,0,1,0);
11284       IROp op  = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
11285                      : (isU ? mkVecSHU(size)  : mkVecSHS(size));
11286       IRTemp res = newTempV128();
11287       assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
11288       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11289       const HChar* nm  = isR ? (isU ? "urshl" : "srshl")
11290                              : (isU ? "ushl"  : "sshl");
11291       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11292       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11293           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11294       return True;
11295    }
11296 
11297    if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
11298       /* -------- 0,xx,01001 SQSHL  std7_std7_std7 -------- */
11299       /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
11300       /* -------- 1,xx,01001 UQSHL  std7_std7_std7 -------- */
11301       /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
11302       if (bitQ == 0 && size == X11) return False; // implied 1d case
11303       Bool isU = bitU == 1;
11304       Bool isR = opcode == BITS5(0,1,0,1,1);
11305       IROp op  = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
11306                      : (isU ? mkVecQANDUQSH(size)  : mkVecQANDSQSH(size));
11307       /* This is a bit tricky.  If we're only interested in the lowest 64 bits
11308          of the result (viz, bitQ == 0), then we must adjust the operands to
11309          ensure that the upper part of the result, that we don't care about,
11310          doesn't pollute the returned Q value.  To do this, zero out the upper
11311          operand halves beforehand.  This works because it means, for the
11312          lanes we don't care about, we are shifting zero by zero, which can
11313          never saturate. */
11314       IRTemp res256 = newTemp(Ity_V256);
11315       IRTemp resSH  = newTempV128();
11316       IRTemp resQ   = newTempV128();
11317       IRTemp zero   = newTempV128();
11318       assign(res256, binop(op,
11319                            math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)),
11320                            math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm))));
11321       assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
11322       assign(resQ,  unop(Iop_V256toV128_1, mkexpr(res256)));
11323       assign(zero,  mkV128(0x0000));
11324       putQReg128(dd, mkexpr(resSH));
11325       updateQCFLAGwithDifference(resQ, zero);
11326       const HChar* nm  = isR ? (isU ? "uqrshl" : "sqrshl")
11327                              : (isU ? "uqshl"  : "sqshl");
11328       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11329       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11330           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11331       return True;
11332    }
11333 
11334    if (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,0,1)) {
11335       /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
11336       /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
11337       /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
11338       /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
11339       if (bitQ == 0 && size == X11) return False; // implied 1d case
11340       Bool isU   = bitU == 1;
11341       Bool isMAX = (opcode & 1) == 0;
11342       IROp op    = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
11343                          : (isU ? mkVecMINU(size) : mkVecMINS(size));
11344       IRTemp t   = newTempV128();
11345       assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
11346       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
11347       const HChar* nm = isMAX ? (isU ? "umax" : "smax")
11348                               : (isU ? "umin" : "smin");
11349       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11350       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11351           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11352       return True;
11353    }
11354 
11355    if (opcode == BITS5(0,1,1,1,0) || opcode == BITS5(0,1,1,1,1)) {
11356       /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
11357       /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
11358       /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
11359       /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
11360       if (size == X11) return False; // 1d/2d cases not allowed
11361       Bool isU   = bitU == 1;
11362       Bool isACC = opcode == BITS5(0,1,1,1,1);
11363       vassert(size <= 2);
11364       IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm));
11365       IRTemp t2 = newTempV128();
11366       assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd))
11367                        : mkexpr(t1));
11368       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11369       const HChar* nm  = isACC ? (isU ? "uaba" : "saba")
11370                                : (isU ? "uabd" : "sabd");
11371       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11372       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11373           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11374       return True;
11375    }
11376 
11377    if (opcode == BITS5(1,0,0,0,0)) {
11378       /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
11379       /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
11380       if (bitQ == 0 && size == X11) return False; // implied 1d case
11381       Bool   isSUB = bitU == 1;
11382       IROp   op    = isSUB ? mkVecSUB(size) : mkVecADD(size);
11383       IRTemp t     = newTempV128();
11384       assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
11385       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
11386       const HChar* nm  = isSUB ? "sub" : "add";
11387       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11388       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11389           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11390       return True;
11391    }
11392 
11393    if (opcode == BITS5(1,0,0,0,1)) {
11394       /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
11395       /* -------- 1,xx,10001 CMEQ  std7_std7_std7 -------- */ // ==
11396       if (bitQ == 0 && size == X11) return False; // implied 1d case
11397       Bool    isEQ = bitU == 1;
11398       IRExpr* argL = getQReg128(nn);
11399       IRExpr* argR = getQReg128(mm);
11400       IRTemp  res  = newTempV128();
11401       assign(res,
11402              isEQ ? binop(mkVecCMPEQ(size), argL, argR)
11403                   : unop(Iop_NotV128, binop(mkVecCMPEQ(size),
11404                                             binop(Iop_AndV128, argL, argR),
11405                                             mkV128(0x0000))));
11406       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11407       const HChar* nm  = isEQ ? "cmeq" : "cmtst";
11408       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11409       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11410           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11411       return True;
11412    }
11413 
11414    if (opcode == BITS5(1,0,0,1,0)) {
11415       /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
11416       /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
11417       if (bitQ == 0 && size == X11) return False; // implied 1d case
11418       Bool isMLS = bitU == 1;
11419       IROp   opMUL    = mkVecMUL(size);
11420       IROp   opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size);
11421       IRTemp res      = newTempV128();
11422       if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) {
11423          assign(res, binop(opADDSUB,
11424                            getQReg128(dd),
11425                            binop(opMUL, getQReg128(nn), getQReg128(mm))));
11426          putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11427          const HChar* arr = nameArr_Q_SZ(bitQ, size);
11428          DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla",
11429              nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11430          return True;
11431       }
11432       return False;
11433    }
11434 
11435    if (opcode == BITS5(1,0,0,1,1)) {
11436       /* -------- 0,xx,10011 MUL  std7_std7_std7 -------- */
11437       /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
11438       if (bitQ == 0 && size == X11) return False; // implied 1d case
11439       Bool isPMUL = bitU == 1;
11440       const IROp opsPMUL[4]
11441          = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
11442       IROp   opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size);
11443       IRTemp res   = newTempV128();
11444       if (opMUL != Iop_INVALID) {
11445          assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm)));
11446          putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11447          const HChar* arr = nameArr_Q_SZ(bitQ, size);
11448          DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul",
11449              nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11450          return True;
11451       }
11452       return False;
11453    }
11454 
11455    if (opcode == BITS5(1,0,1,0,0) || opcode == BITS5(1,0,1,0,1)) {
11456       /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
11457       /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
11458       /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
11459       /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
11460       if (size == X11) return False;
11461       Bool isU   = bitU == 1;
11462       Bool isMAX = opcode == BITS5(1,0,1,0,0);
11463       IRTemp vN  = newTempV128();
11464       IRTemp vM  = newTempV128();
11465       IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
11466                       : (isU ? mkVecMINU(size) : mkVecMINS(size));
11467       assign(vN, getQReg128(nn));
11468       assign(vM, getQReg128(mm));
11469       IRTemp res128 = newTempV128();
11470       assign(res128,
11471              binop(op,
11472                    binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
11473                    binop(mkVecCATODDLANES(size),  mkexpr(vM), mkexpr(vN))));
11474       /* In the half-width case, use CatEL32x4 to extract the half-width
11475          result from the full-width result. */
11476       IRExpr* res
11477          = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
11478                             binop(Iop_CatEvenLanes32x4, mkexpr(res128),
11479                                                         mkexpr(res128)))
11480                      : mkexpr(res128);
11481       putQReg128(dd, res);
11482       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11483       const HChar* nm  = isMAX ? (isU ? "umaxp" : "smaxp")
11484                                : (isU ? "uminp" : "sminp");
11485       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11486           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11487       return True;
11488    }
11489 
11490    if (opcode == BITS5(1,0,1,1,0)) {
11491       /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
11492       /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
11493       if (size == X00 || size == X11) return False;
11494       Bool isR = bitU == 1;
11495       IRTemp res, sat1q, sat1n, vN, vM;
11496       res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
11497       newTempsV128_2(&vN, &vM);
11498       assign(vN, getQReg128(nn));
11499       assign(vM, getQReg128(mm));
11500       math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
11501       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11502       IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
11503       updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
11504       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11505       const HChar* nm  = isR ? "sqrdmulh" : "sqdmulh";
11506       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11507           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11508       return True;
11509    }
11510 
11511    if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) {
11512       /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
11513       if (bitQ == 0 && size == X11) return False; // implied 1d case
11514       IRTemp vN = newTempV128();
11515       IRTemp vM = newTempV128();
11516       assign(vN, getQReg128(nn));
11517       assign(vM, getQReg128(mm));
11518       IRTemp res128 = newTempV128();
11519       assign(res128,
11520              binop(mkVecADD(size),
11521                    binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
11522                    binop(mkVecCATODDLANES(size),  mkexpr(vM), mkexpr(vN))));
11523       /* In the half-width case, use CatEL32x4 to extract the half-width
11524          result from the full-width result. */
11525       IRExpr* res
11526          = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
11527                             binop(Iop_CatEvenLanes32x4, mkexpr(res128),
11528                                                         mkexpr(res128)))
11529                      : mkexpr(res128);
11530       putQReg128(dd, res);
11531       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11532       DIP("addp %s.%s, %s.%s, %s.%s\n",
11533           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11534       return True;
11535    }
11536 
11537    if (bitU == 0
11538        && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
11539       /* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11540       /* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11541       /* -------- 0,0x,11110 FMAX   2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11542       /* -------- 0,1x,11110 FMIN   2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11543       /* FMAXNM, FMINNM: FIXME -- KLUDGED */
11544       Bool   isD   = (size & 1) == 1;
11545       if (bitQ == 0 && isD) return False; // implied 1d case
11546       Bool   isMIN = (size & 2) == 2;
11547       Bool   isNM  = opcode == BITS5(1,1,0,0,0);
11548       IROp   opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? X11 : X10);
11549       IRTemp res   = newTempV128();
11550       assign(res, binop(opMXX, getQReg128(nn), getQReg128(mm)));
11551       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11552       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11553       DIP("%s%s %s.%s, %s.%s, %s.%s\n",
11554           isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
11555           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11556       return True;
11557    }
11558 
11559    if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
11560       /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11561       /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11562       Bool isD   = (size & 1) == 1;
11563       Bool isSUB = (size & 2) == 2;
11564       if (bitQ == 0 && isD) return False; // implied 1d case
11565       IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
11566       IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11567       IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
11568       IRTemp rm = mk_get_IR_rounding_mode();
11569       IRTemp t1 = newTempV128();
11570       IRTemp t2 = newTempV128();
11571       // FIXME: double rounding; use FMA primops instead
11572       assign(t1, triop(opMUL,
11573                        mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11574       assign(t2, triop(isSUB ? opSUB : opADD,
11575                        mkexpr(rm), getQReg128(dd), mkexpr(t1)));
11576       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11577       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11578       DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla",
11579           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11580       return True;
11581    }
11582 
11583    if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) {
11584       /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11585       /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11586       Bool isD   = (size & 1) == 1;
11587       Bool isSUB = (size & 2) == 2;
11588       if (bitQ == 0 && isD) return False; // implied 1d case
11589       const IROp ops[4]
11590          = { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 };
11591       IROp   op = ops[size];
11592       IRTemp rm = mk_get_IR_rounding_mode();
11593       IRTemp t1 = newTempV128();
11594       IRTemp t2 = newTempV128();
11595       assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11596       assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
11597       putQReg128(dd, mkexpr(t2));
11598       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11599       DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd",
11600           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11601       return True;
11602    }
11603 
11604    if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
11605       /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11606       Bool isD = (size & 1) == 1;
11607       if (bitQ == 0 && isD) return False; // implied 1d case
11608       IROp   opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11609       IROp   opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
11610       IRTemp rm    = mk_get_IR_rounding_mode();
11611       IRTemp t1    = newTempV128();
11612       IRTemp t2    = newTempV128();
11613       // FIXME: use Abd primop instead?
11614       assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11615       assign(t2, unop(opABS, mkexpr(t1)));
11616       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11617       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11618       DIP("fabd %s.%s, %s.%s, %s.%s\n",
11619           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11620       return True;
11621    }
11622 
11623    if (size <= X01 && opcode == BITS5(1,1,0,1,1)) {
11624       /* -------- 0,0x,11011 FMULX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11625       /* -------- 1,0x,11011 FMUL  2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11626       // KLUDGE: FMULX is treated the same way as FMUL.  That can't be right.
11627       Bool isD    = (size & 1) == 1;
11628       Bool isMULX = bitU == 0;
11629       if (bitQ == 0 && isD) return False; // implied 1d case
11630       IRTemp rm = mk_get_IR_rounding_mode();
11631       IRTemp t1 = newTempV128();
11632       assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
11633                        mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11634       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
11635       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11636       DIP("%s %s.%s, %s.%s, %s.%s\n", isMULX ? "fmulx" : "fmul",
11637           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11638       return True;
11639    }
11640 
11641    if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
11642       /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11643       /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11644       Bool isD = (size & 1) == 1;
11645       if (bitQ == 0 && isD) return False; // implied 1d case
11646       Bool   isGE  = bitU == 1;
11647       IROp   opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
11648                           : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
11649       IRTemp t1    = newTempV128();
11650       assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
11651                       : binop(opCMP, getQReg128(nn), getQReg128(mm)));
11652       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
11653       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11654       DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq",
11655           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11656       return True;
11657    }
11658 
11659    if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
11660       /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11661       Bool isD = (size & 1) == 1;
11662       if (bitQ == 0 && isD) return False; // implied 1d case
11663       IROp   opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
11664       IRTemp t1    = newTempV128();
11665       assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
11666       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
11667       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11668       DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
11669           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11670       return True;
11671    }
11672 
11673    if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
11674       /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11675       /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11676       Bool isD  = (size & 1) == 1;
11677       Bool isGT = (size & 2) == 2;
11678       if (bitQ == 0 && isD) return False; // implied 1d case
11679       IROp   opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
11680                           : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
11681       IROp   opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
11682       IRTemp t1    = newTempV128();
11683       assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)),
11684                               unop(opABS, getQReg128(nn)))); // swapd
11685       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
11686       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11687       DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge",
11688           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11689       return True;
11690    }
11691 
11692    if (bitU == 1
11693        && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
11694       /* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11695       /* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11696       /* -------- 1,0x,11110 FMAXP   2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11697       /* -------- 1,1x,11110 FMINP   2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11698       /* FMAXNM, FMINNM: FIXME -- KLUDGED */
11699       Bool isD = (size & 1) == 1;
11700       if (bitQ == 0 && isD) return False; // implied 1d case
11701       Bool   isMIN = (size & 2) == 2;
11702       Bool   isNM  = opcode == BITS5(1,1,0,0,0);
11703       IROp   opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
11704       IRTemp srcN  = newTempV128();
11705       IRTemp srcM  = newTempV128();
11706       IRTemp preL  = IRTemp_INVALID;
11707       IRTemp preR  = IRTemp_INVALID;
11708       assign(srcN, getQReg128(nn));
11709       assign(srcM, getQReg128(mm));
11710       math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
11711                                            srcM, srcN, isD, bitQ);
11712       putQReg128(
11713          dd, math_MAYBE_ZERO_HI64_fromE(
11714                 bitQ,
11715                 binop(opMXX, mkexpr(preL), mkexpr(preR))));
11716       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11717       DIP("%s%sp %s.%s, %s.%s, %s.%s\n",
11718           isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
11719           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11720       return True;
11721    }
11722 
11723    if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,0)) {
11724       /* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11725       Bool isD = size == X01;
11726       if (bitQ == 0 && isD) return False; // implied 1d case
11727       IRTemp srcN = newTempV128();
11728       IRTemp srcM = newTempV128();
11729       IRTemp preL = IRTemp_INVALID;
11730       IRTemp preR = IRTemp_INVALID;
11731       assign(srcN, getQReg128(nn));
11732       assign(srcM, getQReg128(mm));
11733       math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
11734                                            srcM, srcN, isD, bitQ);
11735       putQReg128(
11736          dd, math_MAYBE_ZERO_HI64_fromE(
11737                 bitQ,
11738                 triop(mkVecADDF(isD ? 3 : 2),
11739                       mkexpr(mk_get_IR_rounding_mode()),
11740                       mkexpr(preL), mkexpr(preR))));
11741       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11742       DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
11743           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11744       return True;
11745    }
11746 
11747    if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) {
11748       /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11749       Bool isD = (size & 1) == 1;
11750       if (bitQ == 0 && isD) return False; // implied 1d case
11751       vassert(size <= 1);
11752       const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 };
11753       IROp   op = ops[size];
11754       IRTemp rm = mk_get_IR_rounding_mode();
11755       IRTemp t1 = newTempV128();
11756       IRTemp t2 = newTempV128();
11757       assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11758       assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
11759       putQReg128(dd, mkexpr(t2));
11760       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11761       DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
11762           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11763       return True;
11764    }
11765 
11766    if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
11767       /* -------- 0,0x,11111: FRECPS  2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11768       /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11769       Bool isSQRT = (size & 2) == 2;
11770       Bool isD    = (size & 1) == 1;
11771       if (bitQ == 0 && isD) return False; // implied 1d case
11772       IROp op     = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
11773                            : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
11774       IRTemp res = newTempV128();
11775       assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
11776       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11777       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11778       DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT ? "frsqrts" : "frecps",
11779           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11780       return True;
11781    }
11782 
11783    return False;
11784 #  undef INSN
11785 }
11786 
11787 
11788 static
dis_AdvSIMD_two_reg_misc(DisResult * dres,UInt insn)11789 Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
11790 {
11791    /* 31 30 29 28    23   21    16     11 9 4
11792       0  Q  U  01110 size 10000 opcode 10 n d
11793       Decode fields: U,size,opcode
11794    */
11795 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
11796    if (INSN(31,31) != 0
11797        || INSN(28,24) != BITS5(0,1,1,1,0)
11798        || INSN(21,17) != BITS5(1,0,0,0,0)
11799        || INSN(11,10) != BITS2(1,0)) {
11800       return False;
11801    }
11802    UInt bitQ   = INSN(30,30);
11803    UInt bitU   = INSN(29,29);
11804    UInt size   = INSN(23,22);
11805    UInt opcode = INSN(16,12);
11806    UInt nn     = INSN(9,5);
11807    UInt dd     = INSN(4,0);
11808    vassert(size < 4);
11809 
11810    if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) {
11811       /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
11812       /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
11813       /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
11814       const IROp iops[3] = { Iop_Reverse8sIn64_x2,
11815                              Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 };
11816       vassert(size <= 2);
11817       IRTemp res = newTempV128();
11818       assign(res, unop(iops[size], getQReg128(nn)));
11819       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11820       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11821       DIP("%s %s.%s, %s.%s\n", "rev64",
11822           nameQReg128(dd), arr, nameQReg128(nn), arr);
11823       return True;
11824    }
11825 
11826    if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) {
11827       /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
11828       /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
11829       Bool   isH = size == X01;
11830       IRTemp res = newTempV128();
11831       IROp   iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4;
11832       assign(res, unop(iop, getQReg128(nn)));
11833       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11834       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11835       DIP("%s %s.%s, %s.%s\n", "rev32",
11836           nameQReg128(dd), arr, nameQReg128(nn), arr);
11837       return True;
11838    }
11839 
11840    if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) {
11841       /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
11842       IRTemp res = newTempV128();
11843       assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn)));
11844       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11845       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11846       DIP("%s %s.%s, %s.%s\n", "rev16",
11847           nameQReg128(dd), arr, nameQReg128(nn), arr);
11848       return True;
11849    }
11850 
11851    if (opcode == BITS5(0,0,0,1,0) || opcode == BITS5(0,0,1,1,0)) {
11852       /* -------- 0,xx,00010: SADDLP std6_std6 -------- */
11853       /* -------- 1,xx,00010: UADDLP std6_std6 -------- */
11854       /* -------- 0,xx,00110: SADALP std6_std6 -------- */
11855       /* -------- 1,xx,00110: UADALP std6_std6 -------- */
11856       /* Widens, and size refers to the narrow size. */
11857       if (size == X11) return False; // no 1d or 2d cases
11858       Bool   isU   = bitU == 1;
11859       Bool   isACC = opcode == BITS5(0,0,1,1,0);
11860       IRTemp src   = newTempV128();
11861       IRTemp sum   = newTempV128();
11862       IRTemp res   = newTempV128();
11863       assign(src, getQReg128(nn));
11864       assign(sum,
11865              binop(mkVecADD(size+1),
11866                    mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
11867                              isU, True/*fromOdd*/, size, mkexpr(src))),
11868                    mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
11869                              isU, False/*!fromOdd*/, size, mkexpr(src)))));
11870       assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd))
11871                         : mkexpr(sum));
11872       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11873       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11874       const HChar* arrWide   = nameArr_Q_SZ(bitQ, size+1);
11875       DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp")
11876                                      : (isU ? "uaddlp" : "saddlp"),
11877           nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
11878       return True;
11879    }
11880 
11881    if (opcode == BITS5(0,0,0,1,1)) {
11882       /* -------- 0,xx,00011: SUQADD std7_std7 -------- */
11883       /* -------- 1,xx,00011: USQADD std7_std7 -------- */
11884       if (bitQ == 0 && size == X11) return False; // implied 1d case
11885       Bool isUSQADD = bitU == 1;
11886       /* This is switched (in the US vs SU sense) deliberately.
11887          SUQADD corresponds to the ExtUSsatSS variants and
11888          USQADD corresponds to the ExtSUsatUU variants.
11889          See libvex_ir for more details. */
11890       IROp   qop  = isUSQADD ? mkVecQADDEXTSUSATUU(size)
11891                              : mkVecQADDEXTUSSATSS(size);
11892       IROp   nop  = mkVecADD(size);
11893       IRTemp argL = newTempV128();
11894       IRTemp argR = newTempV128();
11895       IRTemp qres = newTempV128();
11896       IRTemp nres = newTempV128();
11897       /* Because the two arguments to the addition are implicitly
11898          extended differently (one signedly, the other unsignedly) it is
11899          important to present them to the primop in the correct order. */
11900       assign(argL, getQReg128(nn));
11901       assign(argR, getQReg128(dd));
11902       assign(qres, math_MAYBE_ZERO_HI64_fromE(
11903                       bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
11904       assign(nres, math_MAYBE_ZERO_HI64_fromE(
11905                       bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
11906       putQReg128(dd, mkexpr(qres));
11907       updateQCFLAGwithDifference(qres, nres);
11908       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11909       DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd",
11910           nameQReg128(dd), arr, nameQReg128(nn), arr);
11911       return True;
11912    }
11913 
11914    if (opcode == BITS5(0,0,1,0,0)) {
11915       /* -------- 0,xx,00100: CLS std6_std6 -------- */
11916       /* -------- 1,xx,00100: CLZ std6_std6 -------- */
11917       if (size == X11) return False; // no 1d or 2d cases
11918       const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 };
11919       const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 };
11920       Bool   isCLZ = bitU == 1;
11921       IRTemp res   = newTempV128();
11922       vassert(size <= 2);
11923       assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
11924       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11925       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11926       DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
11927           nameQReg128(dd), arr, nameQReg128(nn), arr);
11928       return True;
11929    }
11930 
11931    if (size == X00 && opcode == BITS5(0,0,1,0,1)) {
11932       /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
11933       /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
11934       IRTemp res = newTempV128();
11935       assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn)));
11936       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11937       const HChar* arr = nameArr_Q_SZ(bitQ, 0);
11938       DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not",
11939           nameQReg128(dd), arr, nameQReg128(nn), arr);
11940       return True;
11941    }
11942 
11943    if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) {
11944       /* -------- 1,01,00101  RBIT 16b_16b, 8b_8b -------- */
11945       IRTemp res = newTempV128();
11946       assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn)));
11947       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11948       const HChar* arr = nameArr_Q_SZ(bitQ, 0);
11949       DIP("%s %s.%s, %s.%s\n", "rbit",
11950           nameQReg128(dd), arr, nameQReg128(nn), arr);
11951       return True;
11952    }
11953 
11954    if (opcode == BITS5(0,0,1,1,1)) {
11955       /* -------- 0,xx,00111 SQABS std7_std7 -------- */
11956       /* -------- 1,xx,00111 SQNEG std7_std7 -------- */
11957       if (bitQ == 0 && size == X11) return False; // implied 1d case
11958       Bool   isNEG  = bitU == 1;
11959       IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
11960       (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
11961                                          getQReg128(nn), size );
11962       IRTemp qres = newTempV128(), nres = newTempV128();
11963       assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW));
11964       assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW));
11965       putQReg128(dd, mkexpr(qres));
11966       updateQCFLAGwithDifference(qres, nres);
11967       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11968       DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs",
11969           nameQReg128(dd), arr, nameQReg128(nn), arr);
11970       return True;
11971    }
11972 
11973    if (opcode == BITS5(0,1,0,0,0)) {
11974       /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
11975       /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
11976       if (bitQ == 0 && size == X11) return False; // implied 1d case
11977       Bool    isGT  = bitU == 0;
11978       IRExpr* argL  = getQReg128(nn);
11979       IRExpr* argR  = mkV128(0x0000);
11980       IRTemp  res   = newTempV128();
11981       IROp    opGTS = mkVecCMPGTS(size);
11982       assign(res, isGT ? binop(opGTS, argL, argR)
11983                        : unop(Iop_NotV128, binop(opGTS, argR, argL)));
11984       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11985       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11986       DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge",
11987           nameQReg128(dd), arr, nameQReg128(nn), arr);
11988       return True;
11989    }
11990 
11991    if (opcode == BITS5(0,1,0,0,1)) {
11992       /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
11993       /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
11994       if (bitQ == 0 && size == X11) return False; // implied 1d case
11995       Bool    isEQ = bitU == 0;
11996       IRExpr* argL = getQReg128(nn);
11997       IRExpr* argR = mkV128(0x0000);
11998       IRTemp  res  = newTempV128();
11999       assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR)
12000                        : unop(Iop_NotV128,
12001                               binop(mkVecCMPGTS(size), argL, argR)));
12002       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12003       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12004       DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le",
12005           nameQReg128(dd), arr, nameQReg128(nn), arr);
12006       return True;
12007    }
12008 
12009    if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) {
12010       /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
12011       if (bitQ == 0 && size == X11) return False; // implied 1d case
12012       IRExpr* argL = getQReg128(nn);
12013       IRExpr* argR = mkV128(0x0000);
12014       IRTemp  res  = newTempV128();
12015       assign(res, binop(mkVecCMPGTS(size), argR, argL));
12016       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12017       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12018       DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
12019           nameQReg128(dd), arr, nameQReg128(nn), arr);
12020       return True;
12021    }
12022 
12023    if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) {
12024       /* -------- 0,xx,01011: ABS std7_std7 -------- */
12025       if (bitQ == 0 && size == X11) return False; // implied 1d case
12026       IRTemp res = newTempV128();
12027       assign(res, unop(mkVecABS(size), getQReg128(nn)));
12028       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12029       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12030       DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
12031       return True;
12032    }
12033 
12034    if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) {
12035       /* -------- 1,xx,01011: NEG std7_std7 -------- */
12036       if (bitQ == 0 && size == X11) return False; // implied 1d case
12037       IRTemp res = newTempV128();
12038       assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn)));
12039       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12040       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12041       DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
12042       return True;
12043    }
12044 
12045    UInt ix = 0; /*INVALID*/
12046    if (size >= X10) {
12047       switch (opcode) {
12048          case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
12049          case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
12050          case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
12051          default: break;
12052       }
12053    }
12054    if (ix > 0) {
12055       /* -------- 0,1x,01100 FCMGT 2d_2d,4s_4s,2s_2s _#0.0 (ix 1) -------- */
12056       /* -------- 0,1x,01101 FCMEQ 2d_2d,4s_4s,2s_2s _#0.0 (ix 2) -------- */
12057       /* -------- 0,1x,01110 FCMLT 2d_2d,4s_4s,2s_2s _#0.0 (ix 3) -------- */
12058       /* -------- 1,1x,01100 FCMGE 2d_2d,4s_4s,2s_2s _#0.0 (ix 4) -------- */
12059       /* -------- 1,1x,01101 FCMLE 2d_2d,4s_4s,2s_2s _#0.0 (ix 5) -------- */
12060       if (bitQ == 0 && size == X11) return False; // implied 1d case
12061       Bool   isD     = size == X11;
12062       IROp   opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
12063       IROp   opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
12064       IROp   opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
12065       IROp   opCmp   = Iop_INVALID;
12066       Bool   swap    = False;
12067       const HChar* nm = "??";
12068       switch (ix) {
12069          case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
12070          case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
12071          case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
12072          case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
12073          case 5: nm = "fcmle"; opCmp = opCmpLE; break;
12074          default: vassert(0);
12075       }
12076       IRExpr* zero = mkV128(0x0000);
12077       IRTemp res = newTempV128();
12078       assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
12079                        : binop(opCmp, getQReg128(nn), zero));
12080       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12081       const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12082       DIP("%s %s.%s, %s.%s, #0.0\n", nm,
12083           nameQReg128(dd), arr, nameQReg128(nn), arr);
12084       return True;
12085    }
12086 
12087    if (size >= X10 && opcode == BITS5(0,1,1,1,1)) {
12088       /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
12089       /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
12090       if (bitQ == 0 && size == X11) return False; // implied 1d case
12091       Bool   isFNEG = bitU == 1;
12092       IROp   op     = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2)
12093                              : (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2);
12094       IRTemp res = newTempV128();
12095       assign(res, unop(op, getQReg128(nn)));
12096       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12097       const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12098       DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
12099           nameQReg128(dd), arr, nameQReg128(nn), arr);
12100       return True;
12101    }
12102 
12103    if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
12104       /* -------- 0,xx,10010: XTN{,2} -------- */
12105       if (size == X11) return False;
12106       vassert(size < 3);
12107       Bool   is2  = bitQ == 1;
12108       IROp   opN  = mkVecNARROWUN(size);
12109       IRTemp resN = newTempV128();
12110       assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn))));
12111       putLO64andZUorPutHI64(is2, dd, resN);
12112       const HChar* nm        = "xtn";
12113       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12114       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
12115       DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
12116           nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12117       return True;
12118    }
12119 
12120    if (opcode == BITS5(1,0,1,0,0)
12121        || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
12122       /* -------- 0,xx,10100: SQXTN{,2} -------- */
12123       /* -------- 1,xx,10100: UQXTN{,2} -------- */
12124       /* -------- 1,xx,10010: SQXTUN{,2} -------- */
12125       if (size == X11) return False;
12126       vassert(size < 3);
12127       Bool  is2    = bitQ == 1;
12128       IROp  opN    = Iop_INVALID;
12129       Bool  zWiden = True;
12130       const HChar* nm = "??";
12131       /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
12132          opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
12133       }
12134       else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
12135          opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
12136       }
12137       else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
12138          opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
12139       }
12140       else vassert(0);
12141       IRTemp src  = newTempV128();
12142       assign(src, getQReg128(nn));
12143       IRTemp resN = newTempV128();
12144       assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
12145       putLO64andZUorPutHI64(is2, dd, resN);
12146       IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
12147                                               size, mkexpr(resN));
12148       updateQCFLAGwithDifference(src, resW);
12149       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12150       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
12151       DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
12152           nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12153       return True;
12154    }
12155 
12156    if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
12157       /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
12158       /* Widens, and size is the narrow size. */
12159       if (size == X11) return False;
12160       Bool is2   = bitQ == 1;
12161       IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size);
12162       IROp opSHL = mkVecSHLN(size+1);
12163       IRTemp src = newTempV128();
12164       IRTemp res = newTempV128();
12165       assign(src, getQReg128(nn));
12166       assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)),
12167                                mkU8(8 << size)));
12168       putQReg128(dd, mkexpr(res));
12169       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12170       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
12171       DIP("shll%s %s.%s, %s.%s, #%u\n", is2 ? "2" : "",
12172           nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size);
12173       return True;
12174    }
12175 
12176    if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,0)) {
12177       /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */
12178       UInt   nLanes = size == X00 ? 4 : 2;
12179       IRType srcTy  = size == X00 ? Ity_F32 : Ity_F64;
12180       IROp   opCvt  = size == X00 ? Iop_F32toF16 : Iop_F64toF32;
12181       IRTemp rm     = mk_get_IR_rounding_mode();
12182       IRTemp src[nLanes];
12183       for (UInt i = 0; i < nLanes; i++) {
12184          src[i] = newTemp(srcTy);
12185          assign(src[i], getQRegLane(nn, i, srcTy));
12186       }
12187       for (UInt i = 0; i < nLanes; i++) {
12188          putQRegLane(dd, nLanes * bitQ + i,
12189                          binop(opCvt, mkexpr(rm), mkexpr(src[i])));
12190       }
12191       if (bitQ == 0) {
12192          putQRegLane(dd, 1, mkU64(0));
12193       }
12194       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12195       const HChar* arrWide   = nameArr_Q_SZ(1,    1+size+1);
12196       DIP("fcvtn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12197           nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12198       return True;
12199    }
12200 
12201    if (bitU == 1 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
12202       /* -------- 1,01,10110: FCVTXN 2s/4s_2d -------- */
12203       /* Using Irrm_NEAREST here isn't right.  The docs say "round to
12204          odd" but I don't know what that really means. */
12205       IRType srcTy = Ity_F64;
12206       IROp   opCvt = Iop_F64toF32;
12207       IRTemp src[2];
12208       for (UInt i = 0; i < 2; i++) {
12209          src[i] = newTemp(srcTy);
12210          assign(src[i], getQRegLane(nn, i, srcTy));
12211       }
12212       for (UInt i = 0; i < 2; i++) {
12213          putQRegLane(dd, 2 * bitQ + i,
12214                          binop(opCvt, mkU32(Irrm_NEAREST), mkexpr(src[i])));
12215       }
12216       if (bitQ == 0) {
12217          putQRegLane(dd, 1, mkU64(0));
12218       }
12219       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12220       const HChar* arrWide   = nameArr_Q_SZ(1,    1+size+1);
12221       DIP("fcvtxn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12222           nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12223       return True;
12224    }
12225 
12226    if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) {
12227       /* -------- 0,0x,10111: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
12228       UInt   nLanes = size == X00 ? 4 : 2;
12229       IRType srcTy  = size == X00 ? Ity_F16 : Ity_F32;
12230       IROp   opCvt  = size == X00 ? Iop_F16toF32 : Iop_F32toF64;
12231       IRTemp src[nLanes];
12232       for (UInt i = 0; i < nLanes; i++) {
12233          src[i] = newTemp(srcTy);
12234          assign(src[i], getQRegLane(nn, nLanes * bitQ + i, srcTy));
12235       }
12236       for (UInt i = 0; i < nLanes; i++) {
12237          putQRegLane(dd, i, unop(opCvt, mkexpr(src[i])));
12238       }
12239       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12240       const HChar* arrWide   = nameArr_Q_SZ(1,    1+size+1);
12241       DIP("fcvtl%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12242           nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
12243       return True;
12244    }
12245 
12246    ix = 0;
12247    if (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,0,0,1)) {
12248       ix = 1 + ((((bitU & 1) << 2) | ((size & 2) << 0)) | ((opcode & 1) << 0));
12249       // = 1 + bitU[0]:size[1]:opcode[0]
12250       vassert(ix >= 1 && ix <= 8);
12251       if (ix == 7) ix = 0;
12252    }
12253    if (ix > 0) {
12254       /* -------- 0,0x,11000 FRINTN 2d_2d, 4s_4s, 2s_2s (1) -------- */
12255       /* -------- 0,0x,11001 FRINTM 2d_2d, 4s_4s, 2s_2s (2) -------- */
12256       /* -------- 0,1x,11000 FRINTP 2d_2d, 4s_4s, 2s_2s (3) -------- */
12257       /* -------- 0,1x,11001 FRINTZ 2d_2d, 4s_4s, 2s_2s (4) -------- */
12258       /* -------- 1,0x,11000 FRINTA 2d_2d, 4s_4s, 2s_2s (5) -------- */
12259       /* -------- 1,0x,11001 FRINTX 2d_2d, 4s_4s, 2s_2s (6) -------- */
12260       /* -------- 1,1x,11000 (apparently unassigned)    (7) -------- */
12261       /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
12262       /* rm plan:
12263          FRINTN: tieeven -- !! FIXME KLUDGED !!
12264          FRINTM: -inf
12265          FRINTP: +inf
12266          FRINTZ: zero
12267          FRINTA: tieaway -- !! FIXME KLUDGED !!
12268          FRINTX: per FPCR + "exact = TRUE"
12269          FRINTI: per FPCR
12270       */
12271       Bool isD = (size & 1) == 1;
12272       if (bitQ == 0 && isD) return False; // implied 1d case
12273 
12274       IRTemp irrmRM = mk_get_IR_rounding_mode();
12275 
12276       UChar ch = '?';
12277       IRTemp irrm = newTemp(Ity_I32);
12278       switch (ix) {
12279          case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break;
12280          case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break;
12281          case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break;
12282          case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break;
12283          // The following is a kludge.  Should be: Irrm_NEAREST_TIE_AWAY_0
12284          case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break;
12285          // I am unsure about the following, due to the "integral exact"
12286          // description in the manual.  What does it mean? (frintx, that is)
12287          case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break;
12288          case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break;
12289          default: vassert(0);
12290       }
12291 
12292       IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
12293       if (isD) {
12294          for (UInt i = 0; i < 2; i++) {
12295             putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
12296                                             getQRegLane(nn, i, Ity_F64)));
12297          }
12298       } else {
12299          UInt n = bitQ==1 ? 4 : 2;
12300          for (UInt i = 0; i < n; i++) {
12301             putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
12302                                             getQRegLane(nn, i, Ity_F32)));
12303          }
12304          if (bitQ == 0)
12305             putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
12306       }
12307       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12308       DIP("frint%c %s.%s, %s.%s\n", ch,
12309           nameQReg128(dd), arr, nameQReg128(nn), arr);
12310       return True;
12311    }
12312 
12313    ix = 0; /*INVALID*/
12314    switch (opcode) {
12315       case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
12316       case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
12317       case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
12318       default: break;
12319    }
12320    if (ix > 0) {
12321       /* -------- 0,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
12322       /* -------- 0,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
12323       /* -------- 0,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
12324       /* -------- 0,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
12325       /* -------- 0,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
12326       /* -------- 1,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
12327       /* -------- 1,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
12328       /* -------- 1,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
12329       /* -------- 1,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
12330       /* -------- 1,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
12331       Bool isD = (size & 1) == 1;
12332       if (bitQ == 0 && isD) return False; // implied 1d case
12333 
12334       IRRoundingMode irrm = 8; /*impossible*/
12335       HChar          ch   = '?';
12336       switch (ix) {
12337          case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
12338          case 2: ch = 'm'; irrm = Irrm_NegINF;  break;
12339          case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
12340          case 4: ch = 'p'; irrm = Irrm_PosINF;  break;
12341          case 5: ch = 'z'; irrm = Irrm_ZERO;    break;
12342          default: vassert(0);
12343       }
12344       IROp cvt = Iop_INVALID;
12345       if (bitU == 1) {
12346          cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
12347       } else {
12348          cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
12349       }
12350       if (isD) {
12351          for (UInt i = 0; i < 2; i++) {
12352             putQRegLane(dd, i, binop(cvt, mkU32(irrm),
12353                                             getQRegLane(nn, i, Ity_F64)));
12354          }
12355       } else {
12356          UInt n = bitQ==1 ? 4 : 2;
12357          for (UInt i = 0; i < n; i++) {
12358             putQRegLane(dd, i, binop(cvt, mkU32(irrm),
12359                                             getQRegLane(nn, i, Ity_F32)));
12360          }
12361          if (bitQ == 0)
12362             putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
12363       }
12364       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12365       DIP("fcvt%c%c %s.%s, %s.%s\n", ch, bitU == 1 ? 'u' : 's',
12366           nameQReg128(dd), arr, nameQReg128(nn), arr);
12367       return True;
12368    }
12369 
12370    if (size == X10 && opcode == BITS5(1,1,1,0,0)) {
12371       /* -------- 0,10,11100: URECPE  4s_4s, 2s_2s -------- */
12372       /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
12373       Bool isREC = bitU == 0;
12374       IROp op    = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4;
12375       IRTemp res = newTempV128();
12376       assign(res, unop(op, getQReg128(nn)));
12377       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12378       const HChar* nm  = isREC ? "urecpe" : "ursqrte";
12379       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12380       DIP("%s %s.%s, %s.%s\n", nm,
12381           nameQReg128(dd), arr, nameQReg128(nn), arr);
12382       return True;
12383    }
12384 
12385    if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
12386       /* -------- 0,0x,11101: SCVTF -------- */
12387       /* -------- 1,0x,11101: UCVTF -------- */
12388       /* 31  28      22 21       15     9 4
12389          0q0 01110 0 sz 1  00001 110110 n d  SCVTF Vd, Vn
12390          0q1 01110 0 sz 1  00001 110110 n d  UCVTF Vd, Vn
12391          with laneage:
12392          case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
12393       */
12394       Bool isQ   = bitQ == 1;
12395       Bool isU   = bitU == 1;
12396       Bool isF64 = (size & 1) == 1;
12397       if (isQ || !isF64) {
12398          IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
12399          UInt   nLanes = 0;
12400          Bool   zeroHI = False;
12401          const HChar* arrSpec = NULL;
12402          Bool   ok  = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
12403                                        isQ, isF64 );
12404          IROp   iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
12405                           : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
12406          IRTemp rm  = mk_get_IR_rounding_mode();
12407          UInt   i;
12408          vassert(ok); /* the 'if' above should ensure this */
12409          for (i = 0; i < nLanes; i++) {
12410             putQRegLane(dd, i,
12411                         binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI)));
12412          }
12413          if (zeroHI) {
12414             putQRegLane(dd, 1, mkU64(0));
12415          }
12416          DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
12417              nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
12418          return True;
12419       }
12420       /* else fall through */
12421    }
12422 
12423    if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
12424       /* -------- 0,1x,11101: FRECPE  2d_2d, 4s_4s, 2s_2s -------- */
12425       /* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */
12426       Bool isSQRT = bitU == 1;
12427       Bool isD    = (size & 1) == 1;
12428       IROp op     = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
12429                            : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
12430       if (bitQ == 0 && isD) return False; // implied 1d case
12431       IRTemp resV = newTempV128();
12432       assign(resV, unop(op, getQReg128(nn)));
12433       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
12434       const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12435       DIP("%s %s.%s, %s.%s\n", isSQRT ? "frsqrte" : "frecpe",
12436           nameQReg128(dd), arr, nameQReg128(nn), arr);
12437       return True;
12438    }
12439 
12440    if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
12441       /* -------- 1,1x,11111: FSQRT 2d_2d, 4s_4s, 2s_2s -------- */
12442       Bool isD = (size & 1) == 1;
12443       IROp op  = isD ? Iop_Sqrt64Fx2 : Iop_Sqrt32Fx4;
12444       if (bitQ == 0 && isD) return False; // implied 1d case
12445       IRTemp resV = newTempV128();
12446       assign(resV, binop(op, mkexpr(mk_get_IR_rounding_mode()),
12447                              getQReg128(nn)));
12448       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
12449       const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12450       DIP("%s %s.%s, %s.%s\n", "fsqrt",
12451           nameQReg128(dd), arr, nameQReg128(nn), arr);
12452       return True;
12453    }
12454 
12455    return False;
12456 #  undef INSN
12457 }
12458 
12459 
12460 static
dis_AdvSIMD_vector_x_indexed_elem(DisResult * dres,UInt insn)12461 Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
12462 {
12463    /* 31    28    23   21 20 19 15     11   9 4
12464       0 Q U 01111 size L  M  m  opcode H  0 n d
12465       Decode fields are: u,size,opcode
12466       M is really part of the mm register number.  Individual
12467       cases need to inspect L and H though.
12468    */
12469 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
12470    if (INSN(31,31) != 0
12471        || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) {
12472       return False;
12473    }
12474    UInt bitQ   = INSN(30,30);
12475    UInt bitU   = INSN(29,29);
12476    UInt size   = INSN(23,22);
12477    UInt bitL   = INSN(21,21);
12478    UInt bitM   = INSN(20,20);
12479    UInt mmLO4  = INSN(19,16);
12480    UInt opcode = INSN(15,12);
12481    UInt bitH   = INSN(11,11);
12482    UInt nn     = INSN(9,5);
12483    UInt dd     = INSN(4,0);
12484    vassert(size < 4);
12485    vassert(bitH < 2 && bitM < 2 && bitL < 2);
12486 
12487    if (bitU == 0 && size >= X10
12488        && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
12489       /* -------- 0,1x,0001 FMLA 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12490       /* -------- 0,1x,0101 FMLS 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12491       if (bitQ == 0 && size == X11) return False; // implied 1d case
12492       Bool isD   = (size & 1) == 1;
12493       Bool isSUB = opcode == BITS4(0,1,0,1);
12494       UInt index;
12495       if      (!isD)             index = (bitH << 1) | bitL;
12496       else if (isD && bitL == 0) index = bitH;
12497       else return False; // sz:L == x11 => unallocated encoding
12498       vassert(index < (isD ? 2 : 4));
12499       IRType ity   = isD ? Ity_F64 : Ity_F32;
12500       IRTemp elem  = newTemp(ity);
12501       UInt   mm    = (bitM << 4) | mmLO4;
12502       assign(elem, getQRegLane(mm, index, ity));
12503       IRTemp dupd  = math_DUP_TO_V128(elem, ity);
12504       IROp   opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
12505       IROp   opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
12506       IROp   opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
12507       IRTemp rm    = mk_get_IR_rounding_mode();
12508       IRTemp t1    = newTempV128();
12509       IRTemp t2    = newTempV128();
12510       // FIXME: double rounding; use FMA primops instead
12511       assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
12512       assign(t2, triop(isSUB ? opSUB : opADD,
12513                        mkexpr(rm), getQReg128(dd), mkexpr(t1)));
12514       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
12515       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12516       DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
12517           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm),
12518           isD ? 'd' : 's', index);
12519       return True;
12520    }
12521 
12522    if (size >= X10 && opcode == BITS4(1,0,0,1)) {
12523       /* -------- 0,1x,1001 FMUL  2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12524       /* -------- 1,1x,1001 FMULX 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12525       if (bitQ == 0 && size == X11) return False; // implied 1d case
12526       Bool isD    = (size & 1) == 1;
12527       Bool isMULX = bitU == 1;
12528       UInt index;
12529       if      (!isD)             index = (bitH << 1) | bitL;
12530       else if (isD && bitL == 0) index = bitH;
12531       else return False; // sz:L == x11 => unallocated encoding
12532       vassert(index < (isD ? 2 : 4));
12533       IRType ity  = isD ? Ity_F64 : Ity_F32;
12534       IRTemp elem = newTemp(ity);
12535       UInt   mm   = (bitM << 4) | mmLO4;
12536       assign(elem, getQRegLane(mm, index, ity));
12537       IRTemp dupd = math_DUP_TO_V128(elem, ity);
12538       // KLUDGE: FMULX is treated the same way as FMUL.  That can't be right.
12539       IRTemp res  = newTempV128();
12540       assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
12541                         mkexpr(mk_get_IR_rounding_mode()),
12542                         getQReg128(nn), mkexpr(dupd)));
12543       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12544       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12545       DIP("%s %s.%s, %s.%s, %s.%c[%u]\n",
12546           isMULX ? "fmulx" : "fmul", nameQReg128(dd), arr,
12547           nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
12548       return True;
12549    }
12550 
12551    if ((bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,1,0,0)))
12552        || (bitU == 0 && opcode == BITS4(1,0,0,0))) {
12553       /* -------- 1,xx,0000 MLA s/h variants only -------- */
12554       /* -------- 1,xx,0100 MLS s/h variants only -------- */
12555       /* -------- 0,xx,1000 MUL s/h variants only -------- */
12556       Bool isMLA = opcode == BITS4(0,0,0,0);
12557       Bool isMLS = opcode == BITS4(0,1,0,0);
12558       UInt mm    = 32; // invalid
12559       UInt ix    = 16; // invalid
12560       switch (size) {
12561          case X00:
12562             return False; // b case is not allowed
12563          case X01:
12564             mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12565          case X10:
12566             mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12567          case X11:
12568             return False; // d case is not allowed
12569          default:
12570             vassert(0);
12571       }
12572       vassert(mm < 32 && ix < 16);
12573       IROp   opMUL = mkVecMUL(size);
12574       IROp   opADD = mkVecADD(size);
12575       IROp   opSUB = mkVecSUB(size);
12576       HChar  ch    = size == X01 ? 'h' : 's';
12577       IRTemp vecM  = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12578       IRTemp vecD  = newTempV128();
12579       IRTemp vecN  = newTempV128();
12580       IRTemp res   = newTempV128();
12581       assign(vecD, getQReg128(dd));
12582       assign(vecN, getQReg128(nn));
12583       IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM));
12584       if (isMLA || isMLS) {
12585          assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod));
12586       } else {
12587          assign(res, prod);
12588       }
12589       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12590       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12591       DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla"
12592                                                 : (isMLS ? "mls" : "mul"),
12593           nameQReg128(dd), arr,
12594           nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
12595       return True;
12596    }
12597 
12598    if (opcode == BITS4(1,0,1,0)
12599        || opcode == BITS4(0,0,1,0) || opcode == BITS4(0,1,1,0)) {
12600       /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
12601       /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
12602       /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
12603       /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
12604       /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
12605       /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
12606       /* Widens, and size refers to the narrowed lanes. */
12607       UInt ks = 3;
12608       switch (opcode) {
12609          case BITS4(1,0,1,0): ks = 0; break;
12610          case BITS4(0,0,1,0): ks = 1; break;
12611          case BITS4(0,1,1,0): ks = 2; break;
12612          default: vassert(0);
12613       }
12614       vassert(ks >= 0 && ks <= 2);
12615       Bool isU = bitU == 1;
12616       Bool is2 = bitQ == 1;
12617       UInt mm  = 32; // invalid
12618       UInt ix  = 16; // invalid
12619       switch (size) {
12620          case X00:
12621             return False; // h_b_b[] case is not allowed
12622          case X01:
12623             mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12624          case X10:
12625             mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12626          case X11:
12627             return False; // q_d_d[] case is not allowed
12628          default:
12629             vassert(0);
12630       }
12631       vassert(mm < 32 && ix < 16);
12632       IRTemp vecN  = newTempV128();
12633       IRTemp vecM  = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12634       IRTemp vecD  = newTempV128();
12635       assign(vecN, getQReg128(nn));
12636       assign(vecD, getQReg128(dd));
12637       IRTemp res = IRTemp_INVALID;
12638       math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
12639                     vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
12640       putQReg128(dd, mkexpr(res));
12641       const HChar* nm        = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
12642       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12643       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
12644       HChar ch               = size == X01 ? 'h' : 's';
12645       DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
12646           isU ? 'u' : 's', nm, is2 ? "2" : "",
12647           nameQReg128(dd), arrWide,
12648           nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
12649       return True;
12650    }
12651 
12652    if (bitU == 0
12653        && (opcode == BITS4(1,0,1,1)
12654            || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
12655       /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
12656       /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
12657       /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
12658       /* Widens, and size refers to the narrowed lanes. */
12659       UInt ks = 3;
12660       switch (opcode) {
12661          case BITS4(1,0,1,1): ks = 0; break;
12662          case BITS4(0,0,1,1): ks = 1; break;
12663          case BITS4(0,1,1,1): ks = 2; break;
12664          default: vassert(0);
12665       }
12666       vassert(ks >= 0 && ks <= 2);
12667       Bool is2 = bitQ == 1;
12668       UInt mm  = 32; // invalid
12669       UInt ix  = 16; // invalid
12670       switch (size) {
12671          case X00:
12672             return False; // h_b_b[] case is not allowed
12673          case X01:
12674             mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12675          case X10:
12676             mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12677          case X11:
12678             return False; // q_d_d[] case is not allowed
12679          default:
12680             vassert(0);
12681       }
12682       vassert(mm < 32 && ix < 16);
12683       IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
12684       vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
12685       newTempsV128_2(&vecN, &vecD);
12686       assign(vecN, getQReg128(nn));
12687       IRTemp vecM  = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12688       assign(vecD, getQReg128(dd));
12689       math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
12690                        is2, size, "mas"[ks],
12691                        vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
12692       putQReg128(dd, mkexpr(res));
12693       vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
12694       updateQCFLAGwithDifference(sat1q, sat1n);
12695       if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
12696          updateQCFLAGwithDifference(sat2q, sat2n);
12697       }
12698       const HChar* nm        = ks == 0 ? "sqdmull"
12699                                        : (ks == 1 ? "sqdmlal" : "sqdmlsl");
12700       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12701       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
12702       HChar ch               = size == X01 ? 'h' : 's';
12703       DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
12704           nm, is2 ? "2" : "",
12705           nameQReg128(dd), arrWide,
12706           nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
12707       return True;
12708    }
12709 
12710    if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
12711       /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
12712       /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
12713       UInt mm  = 32; // invalid
12714       UInt ix  = 16; // invalid
12715       switch (size) {
12716          case X00:
12717             return False; // b case is not allowed
12718          case X01:
12719             mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12720          case X10:
12721             mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12722          case X11:
12723             return False; // q case is not allowed
12724          default:
12725             vassert(0);
12726       }
12727       vassert(mm < 32 && ix < 16);
12728       Bool isR = opcode == BITS4(1,1,0,1);
12729       IRTemp res, sat1q, sat1n, vN, vM;
12730       res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
12731       vN = newTempV128();
12732       assign(vN, getQReg128(nn));
12733       vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12734       math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
12735       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12736       IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
12737       updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
12738       const HChar* nm  = isR ? "sqrdmulh" : "sqdmulh";
12739       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12740       HChar ch         = size == X01 ? 'h' : 's';
12741       DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
12742           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
12743       return True;
12744    }
12745 
12746    return False;
12747 #  undef INSN
12748 }
12749 
12750 
12751 static
dis_AdvSIMD_crypto_aes(DisResult * dres,UInt insn)12752 Bool dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult* dres, UInt insn)
12753 {
12754 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
12755    return False;
12756 #  undef INSN
12757 }
12758 
12759 
12760 static
dis_AdvSIMD_crypto_three_reg_sha(DisResult * dres,UInt insn)12761 Bool dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
12762 {
12763 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
12764    return False;
12765 #  undef INSN
12766 }
12767 
12768 
12769 static
dis_AdvSIMD_crypto_two_reg_sha(DisResult * dres,UInt insn)12770 Bool dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
12771 {
12772 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
12773    return False;
12774 #  undef INSN
12775 }
12776 
12777 
12778 static
dis_AdvSIMD_fp_compare(DisResult * dres,UInt insn)12779 Bool dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult* dres, UInt insn)
12780 {
12781    /* 31  28    23 21 20 15 13   9 4
12782       000 11110 ty 1  m  op 1000 n opcode2
12783       The first 3 bits are really "M 0 S", but M and S are always zero.
12784       Decode fields are: ty,op,opcode2
12785    */
12786 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
12787    if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
12788        || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) {
12789       return False;
12790    }
12791    UInt ty      = INSN(23,22);
12792    UInt mm      = INSN(20,16);
12793    UInt op      = INSN(15,14);
12794    UInt nn      = INSN(9,5);
12795    UInt opcode2 = INSN(4,0);
12796    vassert(ty < 4);
12797 
12798    if (ty <= X01 && op == X00
12799        && (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
12800       /* -------- 0x,00,00000 FCMP  d_d,   s_s -------- */
12801       /* -------- 0x,00,01000 FCMP  d_#0, s_#0 -------- */
12802       /* -------- 0x,00,10000 FCMPE d_d,   s_s -------- */
12803       /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
12804       /* 31        23   20    15      9 4
12805          000 11110 01 1     m 00 1000 n 10 000  FCMPE Dn, Dm
12806          000 11110 01 1 00000 00 1000 n 11 000  FCMPE Dn, #0.0
12807          000 11110 01 1     m 00 1000 n 00 000  FCMP  Dn, Dm
12808          000 11110 01 1 00000 00 1000 n 01 000  FCMP  Dn, #0.0
12809 
12810          000 11110 00 1     m 00 1000 n 10 000  FCMPE Sn, Sm
12811          000 11110 00 1 00000 00 1000 n 11 000  FCMPE Sn, #0.0
12812          000 11110 00 1     m 00 1000 n 00 000  FCMP  Sn, Sm
12813          000 11110 00 1 00000 00 1000 n 01 000  FCMP  Sn, #0.0
12814 
12815          FCMPE generates Invalid Operation exn if either arg is any kind
12816          of NaN.  FCMP generates Invalid Operation exn if either arg is a
12817          signalling NaN.  We ignore this detail here and produce the same
12818          IR for both.
12819       */
12820       Bool   isD     = (ty & 1) == 1;
12821       Bool   isCMPE  = (opcode2 & 16) == 16;
12822       Bool   cmpZero = (opcode2 & 8) == 8;
12823       IRType ity     = isD ? Ity_F64 : Ity_F32;
12824       Bool   valid   = True;
12825       if (cmpZero && mm != 0) valid = False;
12826       if (valid) {
12827          IRTemp argL  = newTemp(ity);
12828          IRTemp argR  = newTemp(ity);
12829          IRTemp irRes = newTemp(Ity_I32);
12830          assign(argL, getQRegLO(nn, ity));
12831          assign(argR,
12832                 cmpZero
12833                    ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
12834                    : getQRegLO(mm, ity));
12835          assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
12836                              mkexpr(argL), mkexpr(argR)));
12837          IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
12838          IRTemp nzcv_28x0 = newTemp(Ity_I64);
12839          assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
12840          setFlags_COPY(nzcv_28x0);
12841          DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity),
12842              cmpZero ? "#0.0" : nameQRegLO(mm, ity));
12843          return True;
12844       }
12845       return False;
12846    }
12847 
12848    return False;
12849 #  undef INSN
12850 }
12851 
12852 
12853 static
dis_AdvSIMD_fp_conditional_compare(DisResult * dres,UInt insn)12854 Bool dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult* dres, UInt insn)
12855 {
12856    /* 31  28    23 21 20 15   11 9 4  3
12857       000 11110 ty 1  m  cond 01 n op nzcv
12858       The first 3 bits are really "M 0 S", but M and S are always zero.
12859       Decode fields are: ty,op
12860    */
12861 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
12862    if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
12863        || INSN(21,21) != 1 || INSN(11,10) != BITS2(0,1)) {
12864       return False;
12865    }
12866    UInt ty   = INSN(23,22);
12867    UInt mm   = INSN(20,16);
12868    UInt cond = INSN(15,12);
12869    UInt nn   = INSN(9,5);
12870    UInt op   = INSN(4,4);
12871    UInt nzcv = INSN(3,0);
12872    vassert(ty < 4 && op <= 1);
12873 
12874    if (ty <= BITS2(0,1)) {
12875       /* -------- 00,0 FCCMP  s_s -------- */
12876       /* -------- 00,1 FCCMPE s_s -------- */
12877       /* -------- 01,0 FCCMP  d_d -------- */
12878       /* -------- 01,1 FCCMPE d_d -------- */
12879 
12880       /* FCCMPE generates Invalid Operation exn if either arg is any kind
12881          of NaN.  FCCMP generates Invalid Operation exn if either arg is a
12882          signalling NaN.  We ignore this detail here and produce the same
12883          IR for both.
12884       */
12885       Bool   isD    = (ty & 1) == 1;
12886       Bool   isCMPE = op == 1;
12887       IRType ity    = isD ? Ity_F64 : Ity_F32;
12888       IRTemp argL   = newTemp(ity);
12889       IRTemp argR   = newTemp(ity);
12890       IRTemp irRes  = newTemp(Ity_I32);
12891       assign(argL,  getQRegLO(nn, ity));
12892       assign(argR,  getQRegLO(mm, ity));
12893       assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
12894                           mkexpr(argL), mkexpr(argR)));
12895       IRTemp condT = newTemp(Ity_I1);
12896       assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
12897       IRTemp nzcvT = mk_convert_IRCmpF64Result_to_NZCV(irRes);
12898 
12899       IRTemp nzcvT_28x0 = newTemp(Ity_I64);
12900       assign(nzcvT_28x0, binop(Iop_Shl64, mkexpr(nzcvT), mkU8(28)));
12901 
12902       IRExpr* nzcvF_28x0 = mkU64(((ULong)nzcv) << 28);
12903 
12904       IRTemp nzcv_28x0 = newTemp(Ity_I64);
12905       assign(nzcv_28x0, IRExpr_ITE(mkexpr(condT),
12906                                    mkexpr(nzcvT_28x0), nzcvF_28x0));
12907       setFlags_COPY(nzcv_28x0);
12908       DIP("fccmp%s %s, %s, #%u, %s\n", isCMPE ? "e" : "",
12909           nameQRegLO(nn, ity), nameQRegLO(mm, ity), nzcv, nameCC(cond));
12910       return True;
12911    }
12912 
12913    return False;
12914 #  undef INSN
12915 }
12916 
12917 
12918 static
dis_AdvSIMD_fp_conditional_select(DisResult * dres,UInt insn)12919 Bool dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult* dres, UInt insn)
12920 {
12921    /* 31        23 21 20 15   11 9 5
12922       000 11110 ty 1  m  cond 11 n d
12923       The first 3 bits are really "M 0 S", but M and S are always zero.
12924       Decode fields: ty
12925    */
12926 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
12927    if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) || INSN(21,21) != 1
12928        || INSN(11,10) != BITS2(1,1)) {
12929       return False;
12930    }
12931    UInt ty   = INSN(23,22);
12932    UInt mm   = INSN(20,16);
12933    UInt cond = INSN(15,12);
12934    UInt nn   = INSN(9,5);
12935    UInt dd   = INSN(4,0);
12936    if (ty <= X01) {
12937       /* -------- 00: FCSEL s_s -------- */
12938       /* -------- 00: FCSEL d_d -------- */
12939       IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
12940       IRTemp srcT = newTemp(ity);
12941       IRTemp srcF = newTemp(ity);
12942       IRTemp res  = newTemp(ity);
12943       assign(srcT, getQRegLO(nn, ity));
12944       assign(srcF, getQRegLO(mm, ity));
12945       assign(res, IRExpr_ITE(
12946                      unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
12947                      mkexpr(srcT), mkexpr(srcF)));
12948       putQReg128(dd, mkV128(0x0000));
12949       putQRegLO(dd, mkexpr(res));
12950       DIP("fcsel %s, %s, %s, %s\n",
12951           nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity),
12952           nameCC(cond));
12953       return True;
12954    }
12955    return False;
12956 #  undef INSN
12957 }
12958 
12959 
12960 static
dis_AdvSIMD_fp_data_proc_1_source(DisResult * dres,UInt insn)12961 Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn)
12962 {
12963    /* 31  28    23 21 20     14    9 4
12964       000 11110 ty 1  opcode 10000 n d
12965       The first 3 bits are really "M 0 S", but M and S are always zero.
12966       Decode fields: ty,opcode
12967    */
12968 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
12969    if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
12970        || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) {
12971       return False;
12972    }
12973    UInt ty     = INSN(23,22);
12974    UInt opcode = INSN(20,15);
12975    UInt nn     = INSN(9,5);
12976    UInt dd     = INSN(4,0);
12977 
12978    if (ty <= X01 && opcode <= BITS6(0,0,0,0,1,1)) {
12979       /* -------- 0x,000000: FMOV  d_d, s_s -------- */
12980       /* -------- 0x,000001: FABS  d_d, s_s -------- */
12981       /* -------- 0x,000010: FNEG  d_d, s_s -------- */
12982       /* -------- 0x,000011: FSQRT d_d, s_s -------- */
12983       IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
12984       IRTemp src = newTemp(ity);
12985       IRTemp res = newTemp(ity);
12986       const HChar* nm = "??";
12987       assign(src, getQRegLO(nn, ity));
12988       switch (opcode) {
12989          case BITS6(0,0,0,0,0,0):
12990             nm = "fmov"; assign(res, mkexpr(src)); break;
12991          case BITS6(0,0,0,0,0,1):
12992             nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break;
12993          case BITS6(0,0,0,0,1,0):
12994             nm = "fabs"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break;
12995          case BITS6(0,0,0,0,1,1):
12996             nm = "fsqrt";
12997             assign(res, binop(mkSQRTF(ity),
12998                               mkexpr(mk_get_IR_rounding_mode()),
12999                               mkexpr(src))); break;
13000          default:
13001             vassert(0);
13002       }
13003       putQReg128(dd, mkV128(0x0000));
13004       putQRegLO(dd, mkexpr(res));
13005       DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
13006       return True;
13007    }
13008 
13009    if (   (ty == X11 && (opcode == BITS6(0,0,0,1,0,0)
13010                          || opcode == BITS6(0,0,0,1,0,1)))
13011        || (ty == X00 && (opcode == BITS6(0,0,0,1,1,1)
13012                          || opcode == BITS6(0,0,0,1,0,1)))
13013        || (ty == X01 && (opcode == BITS6(0,0,0,1,1,1)
13014                          || opcode == BITS6(0,0,0,1,0,0)))) {
13015       /* -------- 11,000100: FCVT s_h -------- */
13016       /* -------- 11,000101: FCVT d_h -------- */
13017       /* -------- 00,000111: FCVT h_s -------- */
13018       /* -------- 00,000101: FCVT d_s -------- */
13019       /* -------- 01,000111: FCVT h_d -------- */
13020       /* -------- 01,000100: FCVT s_d -------- */
13021       /* 31        23 21    16 14    9 4
13022          000 11110 11 10001 00 10000 n d   FCVT Sd, Hn
13023          --------- 11 ----- 01 ---------   FCVT Dd, Hn
13024          --------- 00 ----- 11 ---------   FCVT Hd, Sn
13025          --------- 00 ----- 01 ---------   FCVT Dd, Sn
13026          --------- 01 ----- 11 ---------   FCVT Hd, Dn
13027          --------- 01 ----- 00 ---------   FCVT Sd, Dn
13028          Rounding, when dst is smaller than src, is per the FPCR.
13029       */
13030       UInt b2322 = ty;
13031       UInt b1615 = opcode & BITS2(1,1);
13032       switch ((b2322 << 2) | b1615) {
13033          case BITS4(0,0,0,1):   // S -> D
13034          case BITS4(1,1,0,1): { // H -> D
13035             Bool   srcIsH = b2322 == BITS2(1,1);
13036             IRType srcTy  = srcIsH ? Ity_F16 : Ity_F32;
13037             IRTemp res    = newTemp(Ity_F64);
13038             assign(res, unop(srcIsH ? Iop_F16toF64 : Iop_F32toF64,
13039                              getQRegLO(nn, srcTy)));
13040             putQReg128(dd, mkV128(0x0000));
13041             putQRegLO(dd, mkexpr(res));
13042             DIP("fcvt %s, %s\n",
13043                 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, srcTy));
13044             return True;
13045          }
13046          case BITS4(0,1,0,0):   // D -> S
13047          case BITS4(0,1,1,1): { // D -> H
13048             Bool   dstIsH = b1615 == BITS2(1,1);
13049             IRType dstTy  = dstIsH ? Ity_F16 : Ity_F32;
13050             IRTemp res    = newTemp(dstTy);
13051             assign(res, binop(dstIsH ? Iop_F64toF16 : Iop_F64toF32,
13052                               mkexpr(mk_get_IR_rounding_mode()),
13053                               getQRegLO(nn, Ity_F64)));
13054             putQReg128(dd, mkV128(0x0000));
13055             putQRegLO(dd, mkexpr(res));
13056             DIP("fcvt %s, %s\n",
13057                 nameQRegLO(dd, dstTy), nameQRegLO(nn, Ity_F64));
13058             return True;
13059          }
13060          case BITS4(0,0,1,1):   // S -> H
13061          case BITS4(1,1,0,0): { // H -> S
13062             Bool   toH   = b1615 == BITS2(1,1);
13063             IRType srcTy = toH ? Ity_F32 : Ity_F16;
13064             IRType dstTy = toH ? Ity_F16 : Ity_F32;
13065             IRTemp res = newTemp(dstTy);
13066             if (toH) {
13067                assign(res, binop(Iop_F32toF16,
13068                                  mkexpr(mk_get_IR_rounding_mode()),
13069                                  getQRegLO(nn, srcTy)));
13070 
13071             } else {
13072                assign(res, unop(Iop_F16toF32,
13073                                 getQRegLO(nn, srcTy)));
13074             }
13075             putQReg128(dd, mkV128(0x0000));
13076             putQRegLO(dd, mkexpr(res));
13077             DIP("fcvt %s, %s\n",
13078                 nameQRegLO(dd, dstTy), nameQRegLO(nn, srcTy));
13079             return True;
13080          }
13081          default:
13082             break;
13083       }
13084       /* else unhandled */
13085       return False;
13086    }
13087 
13088    if (ty <= X01
13089        && opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1)
13090        && opcode != BITS6(0,0,1,1,0,1)) {
13091       /* -------- 0x,001000 FRINTN d_d, s_s -------- */
13092       /* -------- 0x,001001 FRINTP d_d, s_s -------- */
13093       /* -------- 0x,001010 FRINTM d_d, s_s -------- */
13094       /* -------- 0x,001011 FRINTZ d_d, s_s -------- */
13095       /* -------- 0x,001100 FRINTA d_d, s_s -------- */
13096       /* -------- 0x,001110 FRINTX d_d, s_s -------- */
13097       /* -------- 0x,001111 FRINTI d_d, s_s -------- */
13098       /* 31        23 21   17  14    9 4
13099          000 11110 0x 1001 111 10000 n d  FRINTI Fd, Fm (round per FPCR)
13100                            rm
13101          x==0 => S-registers, x==1 => D-registers
13102          rm (17:15) encodings:
13103             111 per FPCR  (FRINTI)
13104             001 +inf      (FRINTP)
13105             010 -inf      (FRINTM)
13106             011 zero      (FRINTZ)
13107             000 tieeven   (FRINTN) -- !! FIXME KLUDGED !!
13108             100 tieaway   (FRINTA) -- !! FIXME KLUDGED !!
13109             110 per FPCR + "exact = TRUE" (FRINTX)
13110             101 unallocated
13111       */
13112       Bool    isD   = (ty & 1) == 1;
13113       UInt    rm    = opcode & BITS6(0,0,0,1,1,1);
13114       IRType  ity   = isD ? Ity_F64 : Ity_F32;
13115       IRExpr* irrmE = NULL;
13116       UChar   ch    = '?';
13117       switch (rm) {
13118          case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
13119          case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
13120          case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
13121          // The following is a kludge.  Should be: Irrm_NEAREST_TIE_AWAY_0
13122          case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
13123          // I am unsure about the following, due to the "integral exact"
13124          // description in the manual.  What does it mean? (frintx, that is)
13125          case BITS3(1,1,0):
13126             ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
13127          case BITS3(1,1,1):
13128             ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
13129          // The following is a kludge.  There's no Irrm_ value to represent
13130          // this ("to nearest, with ties to even")
13131          case BITS3(0,0,0): ch = 'n'; irrmE = mkU32(Irrm_NEAREST); break;
13132          default: break;
13133       }
13134       if (irrmE) {
13135          IRTemp src = newTemp(ity);
13136          IRTemp dst = newTemp(ity);
13137          assign(src, getQRegLO(nn, ity));
13138          assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13139                            irrmE, mkexpr(src)));
13140          putQReg128(dd, mkV128(0x0000));
13141          putQRegLO(dd, mkexpr(dst));
13142          DIP("frint%c %s, %s\n",
13143              ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
13144          return True;
13145       }
13146       return False;
13147    }
13148 
13149    return False;
13150 #  undef INSN
13151 }
13152 
13153 
13154 static
dis_AdvSIMD_fp_data_proc_2_source(DisResult * dres,UInt insn)13155 Bool dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult* dres, UInt insn)
13156 {
13157    /* 31  28    23 21 20 15     11 9 4
13158       000 11110 ty 1  m  opcode 10 n d
13159       The first 3 bits are really "M 0 S", but M and S are always zero.
13160       Decode fields: ty, opcode
13161    */
13162 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13163    if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13164        || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) {
13165       return False;
13166    }
13167    UInt ty     = INSN(23,22);
13168    UInt mm     = INSN(20,16);
13169    UInt opcode = INSN(15,12);
13170    UInt nn     = INSN(9,5);
13171    UInt dd     = INSN(4,0);
13172 
13173    if (ty <= X01 && opcode <= BITS4(0,1,1,1)) {
13174       /* ------- 0x,0000: FMUL d_d, s_s ------- */
13175       /* ------- 0x,0001: FDIV d_d, s_s ------- */
13176       /* ------- 0x,0010: FADD d_d, s_s ------- */
13177       /* ------- 0x,0011: FSUB d_d, s_s ------- */
13178       /* ------- 0x,0100: FMAX d_d, s_s ------- */
13179       /* ------- 0x,0101: FMIN d_d, s_s ------- */
13180       /* ------- 0x,0110: FMAXNM d_d, s_s ------- (FIXME KLUDGED) */
13181       /* ------- 0x,0111: FMINNM d_d, s_s ------- (FIXME KLUDGED) */
13182       IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
13183       IROp   iop = Iop_INVALID;
13184       const HChar* nm = "???";
13185       switch (opcode) {
13186          case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break;
13187          case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break;
13188          case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break;
13189          case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break;
13190          case BITS4(0,1,0,0): nm = "fmax"; iop = mkVecMAXF(ty+2); break;
13191          case BITS4(0,1,0,1): nm = "fmin"; iop = mkVecMINF(ty+2); break;
13192          case BITS4(0,1,1,0): nm = "fmaxnm"; iop = mkVecMAXF(ty+2); break; //!!
13193          case BITS4(0,1,1,1): nm = "fminnm"; iop = mkVecMINF(ty+2); break; //!!
13194          default: vassert(0);
13195       }
13196       if (opcode <= BITS4(0,0,1,1)) {
13197          // This is really not good code.  TODO: avoid width-changing
13198          IRTemp res = newTemp(ity);
13199          assign(res, triop(iop, mkexpr(mk_get_IR_rounding_mode()),
13200                                 getQRegLO(nn, ity), getQRegLO(mm, ity)));
13201          putQReg128(dd, mkV128(0));
13202          putQRegLO(dd, mkexpr(res));
13203       } else {
13204          putQReg128(dd, unop(mkVecZEROHIxxOFV128(ty+2),
13205                              binop(iop, getQReg128(nn), getQReg128(mm))));
13206       }
13207       DIP("%s %s, %s, %s\n",
13208           nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
13209       return True;
13210    }
13211 
13212    if (ty <= X01 && opcode == BITS4(1,0,0,0)) {
13213       /* ------- 0x,1000: FNMUL d_d, s_s ------- */
13214       IRType ity  = ty == X00 ? Ity_F32 : Ity_F64;
13215       IROp   iop  = mkMULF(ity);
13216       IROp   iopn = mkNEGF(ity);
13217       const HChar* nm = "fnmul";
13218       IRExpr* resE = unop(iopn,
13219                           triop(iop, mkexpr(mk_get_IR_rounding_mode()),
13220                                 getQRegLO(nn, ity), getQRegLO(mm, ity)));
13221       IRTemp  res  = newTemp(ity);
13222       assign(res, resE);
13223       putQReg128(dd, mkV128(0));
13224       putQRegLO(dd, mkexpr(res));
13225       DIP("%s %s, %s, %s\n",
13226           nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
13227       return True;
13228    }
13229 
13230    return False;
13231 #  undef INSN
13232 }
13233 
13234 
13235 static
dis_AdvSIMD_fp_data_proc_3_source(DisResult * dres,UInt insn)13236 Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn)
13237 {
13238    /* 31  28    23 21 20 15 14 9 4
13239       000 11111 ty o1 m  o0 a  n d
13240       The first 3 bits are really "M 0 S", but M and S are always zero.
13241       Decode fields: ty,o1,o0
13242    */
13243 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13244    if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
13245       return False;
13246    }
13247    UInt ty    = INSN(23,22);
13248    UInt bitO1 = INSN(21,21);
13249    UInt mm    = INSN(20,16);
13250    UInt bitO0 = INSN(15,15);
13251    UInt aa    = INSN(14,10);
13252    UInt nn    = INSN(9,5);
13253    UInt dd    = INSN(4,0);
13254    vassert(ty < 4);
13255 
13256    if (ty <= X01) {
13257       /* -------- 0x,0,0 FMADD  d_d_d_d, s_s_s_s -------- */
13258       /* -------- 0x,0,1 FMSUB  d_d_d_d, s_s_s_s -------- */
13259       /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
13260       /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
13261       /* -------------------- F{N}M{ADD,SUB} -------------------- */
13262       /* 31          22   20 15 14 9 4   ix
13263          000 11111 0 sz 0 m  0  a  n d   0   FMADD  Fd,Fn,Fm,Fa
13264          000 11111 0 sz 0 m  1  a  n d   1   FMSUB  Fd,Fn,Fm,Fa
13265          000 11111 0 sz 1 m  0  a  n d   2   FNMADD Fd,Fn,Fm,Fa
13266          000 11111 0 sz 1 m  1  a  n d   3   FNMSUB Fd,Fn,Fm,Fa
13267          where Fx=Dx when sz=1, Fx=Sx when sz=0
13268 
13269                   -----SPEC------    ----IMPL----
13270          fmadd       a +    n * m    a + n * m
13271          fmsub       a + (-n) * m    a - n * m
13272          fnmadd   (-a) + (-n) * m    -(a + n * m)
13273          fnmsub   (-a) +    n * m    -(a - n * m)
13274       */
13275       Bool    isD   = (ty & 1) == 1;
13276       UInt    ix    = (bitO1 << 1) | bitO0;
13277       IRType  ity   = isD ? Ity_F64 : Ity_F32;
13278       IROp    opADD = mkADDF(ity);
13279       IROp    opSUB = mkSUBF(ity);
13280       IROp    opMUL = mkMULF(ity);
13281       IROp    opNEG = mkNEGF(ity);
13282       IRTemp  res   = newTemp(ity);
13283       IRExpr* eA    = getQRegLO(aa, ity);
13284       IRExpr* eN    = getQRegLO(nn, ity);
13285       IRExpr* eM    = getQRegLO(mm, ity);
13286       IRExpr* rm    = mkexpr(mk_get_IR_rounding_mode());
13287       IRExpr* eNxM  = triop(opMUL, rm, eN, eM);
13288       switch (ix) {
13289          case 0:  assign(res, triop(opADD, rm, eA, eNxM)); break;
13290          case 1:  assign(res, triop(opSUB, rm, eA, eNxM)); break;
13291          case 2:  assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
13292          case 3:  assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
13293          default: vassert(0);
13294       }
13295       putQReg128(dd, mkV128(0x0000));
13296       putQRegLO(dd, mkexpr(res));
13297       const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
13298       DIP("%s %s, %s, %s, %s\n",
13299           names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity),
13300                      nameQRegLO(mm, ity), nameQRegLO(aa, ity));
13301       return True;
13302    }
13303 
13304    return False;
13305 #  undef INSN
13306 }
13307 
13308 
13309 static
dis_AdvSIMD_fp_immediate(DisResult * dres,UInt insn)13310 Bool dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
13311 {
13312    /* 31  28    23 21 20   12  9    4
13313       000 11110 ty 1  imm8 100 imm5 d
13314       The first 3 bits are really "M 0 S", but M and S are always zero.
13315    */
13316 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13317    if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13318        || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) {
13319       return False;
13320    }
13321    UInt ty     = INSN(23,22);
13322    UInt imm8   = INSN(20,13);
13323    UInt imm5   = INSN(9,5);
13324    UInt dd     = INSN(4,0);
13325 
13326    /* ------- 00,00000: FMOV s_imm ------- */
13327    /* ------- 01,00000: FMOV d_imm ------- */
13328    if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) {
13329       Bool  isD  = (ty & 1) == 1;
13330       ULong imm  = VFPExpandImm(imm8, isD ? 64 : 32);
13331       if (!isD) {
13332          vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
13333       }
13334       putQReg128(dd, mkV128(0));
13335       putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
13336       DIP("fmov %s, #0x%llx\n",
13337           nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
13338       return True;
13339    }
13340 
13341    return False;
13342 #  undef INSN
13343 }
13344 
13345 
13346 static
dis_AdvSIMD_fp_to_from_fixedp_conv(DisResult * dres,UInt insn)13347 Bool dis_AdvSIMD_fp_to_from_fixedp_conv(/*MB_OUT*/DisResult* dres, UInt insn)
13348 {
13349 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13350    /* 31 30 29 28    23   21 20    18     15    9 4
13351       sf  0  0 11110 type 0  rmode opcode scale n d
13352       The first 3 bits are really "sf 0 S", but S is always zero.
13353       Decode fields: sf,type,rmode,opcode
13354    */
13355 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13356    if (INSN(30,29) != BITS2(0,0)
13357        || INSN(28,24) != BITS5(1,1,1,1,0)
13358        || INSN(21,21) != 0) {
13359       return False;
13360    }
13361    UInt bitSF = INSN(31,31);
13362    UInt ty    = INSN(23,22); // type
13363    UInt rm    = INSN(20,19); // rmode
13364    UInt op    = INSN(18,16); // opcode
13365    UInt sc    = INSN(15,10); // scale
13366    UInt nn    = INSN(9,5);
13367    UInt dd    = INSN(4,0);
13368 
13369    if (ty <= X01 && rm == X11
13370        && (op == BITS3(0,0,0) || op == BITS3(0,0,1))) {
13371       /* -------- (ix) sf ty rm opc -------- */
13372       /* -------- 0    0  00 11 000: FCVTZS w_s_#fbits -------- */
13373       /* -------- 1    0  01 11 000: FCVTZS w_d_#fbits -------- */
13374       /* -------- 2    1  00 11 000: FCVTZS x_s_#fbits -------- */
13375       /* -------- 3    1  01 11 000: FCVTZS x_d_#fbits -------- */
13376 
13377       /* -------- 4    0  00 11 001: FCVTZU w_s_#fbits -------- */
13378       /* -------- 5    0  01 11 001: FCVTZU w_d_#fbits -------- */
13379       /* -------- 6    1  00 11 001: FCVTZU x_s_#fbits -------- */
13380       /* -------- 7    1  01 11 001: FCVTZU x_d_#fbits -------- */
13381       Bool isI64 = bitSF == 1;
13382       Bool isF64 = (ty & 1) == 1;
13383       Bool isU   = (op & 1) == 1;
13384       UInt ix    = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
13385 
13386       Int fbits = 64 - sc;
13387       vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
13388 
13389       Double  scale  = two_to_the_plus(fbits);
13390       IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
13391                              : IRExpr_Const(IRConst_F32( (Float)scale ));
13392       IROp    opMUL  = isF64 ? Iop_MulF64 : Iop_MulF32;
13393 
13394       const IROp ops[8]
13395         = { Iop_F32toI32S, Iop_F64toI32S, Iop_F32toI64S, Iop_F64toI64S,
13396             Iop_F32toI32U, Iop_F64toI32U, Iop_F32toI64U, Iop_F64toI64U };
13397       IRTemp irrm = newTemp(Ity_I32);
13398       assign(irrm, mkU32(Irrm_ZERO));
13399 
13400       IRExpr* src = getQRegLO(nn, isF64 ? Ity_F64 : Ity_F32);
13401       IRExpr* res = binop(ops[ix], mkexpr(irrm),
13402                                    triop(opMUL, mkexpr(irrm), src, scaleE));
13403       putIRegOrZR(isI64, dd, res);
13404 
13405       DIP("fcvtz%c %s, %s, #%d\n",
13406           isU ? 'u' : 's', nameIRegOrZR(isI64, dd),
13407           nameQRegLO(nn, isF64 ? Ity_F64 : Ity_F32), fbits);
13408       return True;
13409    }
13410 
13411    /* ------ sf,ty,rm,opc ------ */
13412    /* ------ x,0x,00,010  SCVTF s/d, w/x, #fbits  ------ */
13413    /* ------ x,0x,00,011  UCVTF s/d, w/x, #fbits  ------ */
13414    /* (ix) sf  S 28    ty   rm opc 15    9 4
13415       0    0 0 0 11110 00 0 00 010 scale n d  SCVTF Sd, Wn, #fbits
13416       1    0 0 0 11110 01 0 00 010 scale n d  SCVTF Dd, Wn, #fbits
13417       2    1 0 0 11110 00 0 00 010 scale n d  SCVTF Sd, Xn, #fbits
13418       3    1 0 0 11110 01 0 00 010 scale n d  SCVTF Dd, Xn, #fbits
13419 
13420       4    0 0 0 11110 00 0 00 011 scale n d  UCVTF Sd, Wn, #fbits
13421       5    0 0 0 11110 01 0 00 011 scale n d  UCVTF Dd, Wn, #fbits
13422       6    1 0 0 11110 00 0 00 011 scale n d  UCVTF Sd, Xn, #fbits
13423       7    1 0 0 11110 01 0 00 011 scale n d  UCVTF Dd, Xn, #fbits
13424 
13425       These are signed/unsigned conversion from integer registers to
13426       FP registers, all 4 32/64-bit combinations, rounded per FPCR,
13427       scaled per |scale|.
13428    */
13429    if (ty <= X01 && rm == X00
13430        && (op == BITS3(0,1,0) || op == BITS3(0,1,1))
13431        && (bitSF == 1 || ((sc >> 5) & 1) == 1)) {
13432       Bool isI64 = bitSF == 1;
13433       Bool isF64 = (ty & 1) == 1;
13434       Bool isU   = (op & 1) == 1;
13435       UInt ix    = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
13436 
13437       Int fbits = 64 - sc;
13438       vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
13439 
13440       Double  scale  = two_to_the_minus(fbits);
13441       IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
13442                              : IRExpr_Const(IRConst_F32( (Float)scale ));
13443       IROp    opMUL  = isF64 ? Iop_MulF64 : Iop_MulF32;
13444 
13445       const IROp ops[8]
13446         = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
13447             Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
13448       IRExpr* src = getIRegOrZR(isI64, nn);
13449       IRExpr* res = (isF64 && !isI64)
13450                        ? unop(ops[ix], src)
13451                        : binop(ops[ix],
13452                                mkexpr(mk_get_IR_rounding_mode()), src);
13453       putQReg128(dd, mkV128(0));
13454       putQRegLO(dd, triop(opMUL, mkU32(Irrm_NEAREST), res, scaleE));
13455 
13456       DIP("%ccvtf %s, %s, #%d\n",
13457           isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
13458           nameIRegOrZR(isI64, nn), fbits);
13459       return True;
13460    }
13461 
13462    return False;
13463 #  undef INSN
13464 }
13465 
13466 
13467 static
dis_AdvSIMD_fp_to_from_int_conv(DisResult * dres,UInt insn)13468 Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn)
13469 {
13470    /* 31 30 29 28    23   21 20    18     15     9 4
13471       sf  0  0 11110 type 1  rmode opcode 000000 n d
13472       The first 3 bits are really "sf 0 S", but S is always zero.
13473       Decode fields: sf,type,rmode,opcode
13474    */
13475 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13476    if (INSN(30,29) != BITS2(0,0)
13477        || INSN(28,24) != BITS5(1,1,1,1,0)
13478        || INSN(21,21) != 1
13479        || INSN(15,10) != BITS6(0,0,0,0,0,0)) {
13480       return False;
13481    }
13482    UInt bitSF = INSN(31,31);
13483    UInt ty    = INSN(23,22); // type
13484    UInt rm    = INSN(20,19); // rmode
13485    UInt op    = INSN(18,16); // opcode
13486    UInt nn    = INSN(9,5);
13487    UInt dd    = INSN(4,0);
13488 
13489    // op = 000, 001
13490    /* -------- FCVT{N,P,M,Z,A}{S,U} (scalar, integer) -------- */
13491    /*    30       23   20 18  15     9 4
13492       sf 00 11110 0x 1 00 000 000000 n d  FCVTNS Rd, Fn (round to
13493       sf 00 11110 0x 1 00 001 000000 n d  FCVTNU Rd, Fn  nearest)
13494       ---------------- 01 --------------  FCVTP-------- (round to +inf)
13495       ---------------- 10 --------------  FCVTM-------- (round to -inf)
13496       ---------------- 11 --------------  FCVTZ-------- (round to zero)
13497       ---------------- 00 100 ----------  FCVTAS------- (nearest, ties away)
13498       ---------------- 00 101 ----------  FCVTAU------- (nearest, ties away)
13499 
13500       Rd is Xd when sf==1, Wd when sf==0
13501       Fn is Dn when x==1, Sn when x==0
13502       20:19 carry the rounding mode, using the same encoding as FPCR
13503    */
13504    if (ty <= X01
13505        && (   ((op == BITS3(0,0,0) || op == BITS3(0,0,1)) && True)
13506            || ((op == BITS3(1,0,0) || op == BITS3(1,0,1)) && rm == BITS2(0,0))
13507           )
13508       ) {
13509       Bool isI64 = bitSF == 1;
13510       Bool isF64 = (ty & 1) == 1;
13511       Bool isU   = (op & 1) == 1;
13512       /* Decide on the IR rounding mode to use. */
13513       IRRoundingMode irrm = 8; /*impossible*/
13514       HChar ch = '?';
13515       if (op == BITS3(0,0,0) || op == BITS3(0,0,1)) {
13516          switch (rm) {
13517             case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
13518             case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
13519             case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
13520             case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
13521             default: vassert(0);
13522          }
13523       } else {
13524          vassert(op == BITS3(1,0,0) || op == BITS3(1,0,1));
13525          switch (rm) {
13526             case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST; break;
13527             default: vassert(0);
13528          }
13529       }
13530       vassert(irrm != 8);
13531       /* Decide on the conversion primop, based on the source size,
13532          dest size and signedness (8 possibilities).  Case coding:
13533             F32 ->s I32   0
13534             F32 ->u I32   1
13535             F32 ->s I64   2
13536             F32 ->u I64   3
13537             F64 ->s I32   4
13538             F64 ->u I32   5
13539             F64 ->s I64   6
13540             F64 ->u I64   7
13541       */
13542       UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
13543       vassert(ix < 8);
13544       const IROp iops[8]
13545          = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
13546              Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
13547       IROp iop = iops[ix];
13548       // A bit of ATCery: bounce all cases we haven't seen an example of.
13549       if (/* F32toI32S */
13550              (iop == Iop_F32toI32S && irrm == Irrm_ZERO)   /* FCVTZS Wd,Sn */
13551           || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
13552           || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
13553           || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,S */
13554           /* F32toI32U */
13555           || (iop == Iop_F32toI32U && irrm == Irrm_ZERO)   /* FCVTZU Wd,Sn */
13556           || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
13557           || (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */
13558           || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,S */
13559           /* F32toI64S */
13560           || (iop == Iop_F32toI64S && irrm == Irrm_ZERO)   /* FCVTZS Xd,Sn */
13561           || (iop == Iop_F32toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Sn */
13562           || (iop == Iop_F32toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Sn */
13563           || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,S */
13564           /* F32toI64U */
13565           || (iop == Iop_F32toI64U && irrm == Irrm_ZERO)   /* FCVTZU Xd,Sn */
13566           || (iop == Iop_F32toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Sn */
13567           || (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */
13568           || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,S */
13569           /* F64toI32S */
13570           || (iop == Iop_F64toI32S && irrm == Irrm_ZERO)   /* FCVTZS Wd,Dn */
13571           || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
13572           || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
13573           || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,D */
13574           /* F64toI32U */
13575           || (iop == Iop_F64toI32U && irrm == Irrm_ZERO)   /* FCVTZU Wd,Dn */
13576           || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
13577           || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
13578           || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,D */
13579           /* F64toI64S */
13580           || (iop == Iop_F64toI64S && irrm == Irrm_ZERO)   /* FCVTZS Xd,Dn */
13581           || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
13582           || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
13583           || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,D */
13584           /* F64toI64U */
13585           || (iop == Iop_F64toI64U && irrm == Irrm_ZERO)   /* FCVTZU Xd,Dn */
13586           || (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */
13587           || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
13588           || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,D */
13589          ) {
13590         /* validated */
13591       } else {
13592         return False;
13593       }
13594       IRType srcTy  = isF64 ? Ity_F64 : Ity_F32;
13595       IRType dstTy  = isI64 ? Ity_I64 : Ity_I32;
13596       IRTemp src    = newTemp(srcTy);
13597       IRTemp dst    = newTemp(dstTy);
13598       assign(src, getQRegLO(nn, srcTy));
13599       assign(dst, binop(iop, mkU32(irrm), mkexpr(src)));
13600       putIRegOrZR(isI64, dd, mkexpr(dst));
13601       DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
13602           nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
13603       return True;
13604    }
13605 
13606    // op = 010, 011
13607    /* -------------- {S,U}CVTF (scalar, integer) -------------- */
13608    /* (ix) sf  S 28    ty   rm op  15     9 4
13609       0    0 0 0 11110 00 1 00 010 000000 n d  SCVTF Sd, Wn
13610       1    0 0 0 11110 01 1 00 010 000000 n d  SCVTF Dd, Wn
13611       2    1 0 0 11110 00 1 00 010 000000 n d  SCVTF Sd, Xn
13612       3    1 0 0 11110 01 1 00 010 000000 n d  SCVTF Dd, Xn
13613 
13614       4    0 0 0 11110 00 1 00 011 000000 n d  UCVTF Sd, Wn
13615       5    0 0 0 11110 01 1 00 011 000000 n d  UCVTF Dd, Wn
13616       6    1 0 0 11110 00 1 00 011 000000 n d  UCVTF Sd, Xn
13617       7    1 0 0 11110 01 1 00 011 000000 n d  UCVTF Dd, Xn
13618 
13619       These are signed/unsigned conversion from integer registers to
13620       FP registers, all 4 32/64-bit combinations, rounded per FPCR.
13621    */
13622    if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))) {
13623       Bool isI64 = bitSF == 1;
13624       Bool isF64 = (ty & 1) == 1;
13625       Bool isU   = (op & 1) == 1;
13626       UInt ix    = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
13627       const IROp ops[8]
13628         = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
13629             Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
13630       IRExpr* src = getIRegOrZR(isI64, nn);
13631       IRExpr* res = (isF64 && !isI64)
13632                        ? unop(ops[ix], src)
13633                        : binop(ops[ix],
13634                                mkexpr(mk_get_IR_rounding_mode()), src);
13635       putQReg128(dd, mkV128(0));
13636       putQRegLO(dd, res);
13637       DIP("%ccvtf %s, %s\n",
13638           isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
13639           nameIRegOrZR(isI64, nn));
13640       return True;
13641    }
13642 
13643    // op = 110, 111
13644    /* -------- FMOV (general) -------- */
13645    /* case sf  S       ty   rm op  15     9 4
13646        (1) 0 0 0 11110 00 1 00 111 000000 n d     FMOV Sd,      Wn
13647        (2) 1 0 0 11110 01 1 00 111 000000 n d     FMOV Dd,      Xn
13648        (3) 1 0 0 11110 10 1 01 111 000000 n d     FMOV Vd.D[1], Xn
13649 
13650        (4) 0 0 0 11110 00 1 00 110 000000 n d     FMOV Wd, Sn
13651        (5) 1 0 0 11110 01 1 00 110 000000 n d     FMOV Xd, Dn
13652        (6) 1 0 0 11110 10 1 01 110 000000 n d     FMOV Xd, Vn.D[1]
13653    */
13654    if (1) {
13655       UInt ix = 0; // case
13656       if (bitSF == 0) {
13657          if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
13658             ix = 1;
13659          else
13660          if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
13661             ix = 4;
13662       } else {
13663          vassert(bitSF == 1);
13664          if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
13665             ix = 2;
13666          else
13667          if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
13668             ix = 5;
13669          else
13670          if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
13671             ix = 3;
13672          else
13673          if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
13674             ix = 6;
13675       }
13676       if (ix > 0) {
13677          switch (ix) {
13678             case 1:
13679                putQReg128(dd, mkV128(0));
13680                putQRegLO(dd, getIReg32orZR(nn));
13681                DIP("fmov s%u, w%u\n", dd, nn);
13682                break;
13683             case 2:
13684                putQReg128(dd, mkV128(0));
13685                putQRegLO(dd, getIReg64orZR(nn));
13686                DIP("fmov d%u, x%u\n", dd, nn);
13687                break;
13688             case 3:
13689                putQRegHI64(dd, getIReg64orZR(nn));
13690                DIP("fmov v%u.d[1], x%u\n", dd, nn);
13691                break;
13692             case 4:
13693                putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
13694                DIP("fmov w%u, s%u\n", dd, nn);
13695                break;
13696             case 5:
13697                putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
13698                DIP("fmov x%u, d%u\n", dd, nn);
13699                break;
13700             case 6:
13701                putIReg64orZR(dd, getQRegHI64(nn));
13702                DIP("fmov x%u, v%u.d[1]\n", dd, nn);
13703                break;
13704             default:
13705                vassert(0);
13706          }
13707          return True;
13708       }
13709       /* undecodable; fall through */
13710    }
13711 
13712    return False;
13713 #  undef INSN
13714 }
13715 
13716 
13717 static
dis_ARM64_simd_and_fp(DisResult * dres,UInt insn)13718 Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
13719 {
13720    Bool ok;
13721    ok = dis_AdvSIMD_EXT(dres, insn);
13722    if (UNLIKELY(ok)) return True;
13723    ok = dis_AdvSIMD_TBL_TBX(dres, insn);
13724    if (UNLIKELY(ok)) return True;
13725    ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn);
13726    if (UNLIKELY(ok)) return True;
13727    ok = dis_AdvSIMD_across_lanes(dres, insn);
13728    if (UNLIKELY(ok)) return True;
13729    ok = dis_AdvSIMD_copy(dres, insn);
13730    if (UNLIKELY(ok)) return True;
13731    ok = dis_AdvSIMD_modified_immediate(dres, insn);
13732    if (UNLIKELY(ok)) return True;
13733    ok = dis_AdvSIMD_scalar_copy(dres, insn);
13734    if (UNLIKELY(ok)) return True;
13735    ok = dis_AdvSIMD_scalar_pairwise(dres, insn);
13736    if (UNLIKELY(ok)) return True;
13737    ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn);
13738    if (UNLIKELY(ok)) return True;
13739    ok = dis_AdvSIMD_scalar_three_different(dres, insn);
13740    if (UNLIKELY(ok)) return True;
13741    ok = dis_AdvSIMD_scalar_three_same(dres, insn);
13742    if (UNLIKELY(ok)) return True;
13743    ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
13744    if (UNLIKELY(ok)) return True;
13745    ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
13746    if (UNLIKELY(ok)) return True;
13747    ok = dis_AdvSIMD_shift_by_immediate(dres, insn);
13748    if (UNLIKELY(ok)) return True;
13749    ok = dis_AdvSIMD_three_different(dres, insn);
13750    if (UNLIKELY(ok)) return True;
13751    ok = dis_AdvSIMD_three_same(dres, insn);
13752    if (UNLIKELY(ok)) return True;
13753    ok = dis_AdvSIMD_two_reg_misc(dres, insn);
13754    if (UNLIKELY(ok)) return True;
13755    ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
13756    if (UNLIKELY(ok)) return True;
13757    ok = dis_AdvSIMD_crypto_aes(dres, insn);
13758    if (UNLIKELY(ok)) return True;
13759    ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn);
13760    if (UNLIKELY(ok)) return True;
13761    ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn);
13762    if (UNLIKELY(ok)) return True;
13763    ok = dis_AdvSIMD_fp_compare(dres, insn);
13764    if (UNLIKELY(ok)) return True;
13765    ok = dis_AdvSIMD_fp_conditional_compare(dres, insn);
13766    if (UNLIKELY(ok)) return True;
13767    ok = dis_AdvSIMD_fp_conditional_select(dres, insn);
13768    if (UNLIKELY(ok)) return True;
13769    ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn);
13770    if (UNLIKELY(ok)) return True;
13771    ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn);
13772    if (UNLIKELY(ok)) return True;
13773    ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn);
13774    if (UNLIKELY(ok)) return True;
13775    ok = dis_AdvSIMD_fp_immediate(dres, insn);
13776    if (UNLIKELY(ok)) return True;
13777    ok = dis_AdvSIMD_fp_to_from_fixedp_conv(dres, insn);
13778    if (UNLIKELY(ok)) return True;
13779    ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn);
13780    if (UNLIKELY(ok)) return True;
13781    return False;
13782 }
13783 
13784 
13785 /*------------------------------------------------------------*/
13786 /*--- Disassemble a single ARM64 instruction               ---*/
13787 /*------------------------------------------------------------*/
13788 
13789 /* Disassemble a single ARM64 instruction into IR.  The instruction
13790    has is located at |guest_instr| and has guest IP of
13791    |guest_PC_curr_instr|, which will have been set before the call
13792    here.  Returns True iff the instruction was decoded, in which case
13793    *dres will be set accordingly, or False, in which case *dres should
13794    be ignored by the caller. */
13795 
13796 static
disInstr_ARM64_WRK(DisResult * dres,Bool (* resteerOkFn)(void *,Addr),Bool resteerCisOk,void * callback_opaque,const UChar * guest_instr,const VexArchInfo * archinfo,const VexAbiInfo * abiinfo)13797 Bool disInstr_ARM64_WRK (
13798         /*MB_OUT*/DisResult* dres,
13799         Bool         (*resteerOkFn) ( /*opaque*/void*, Addr ),
13800         Bool         resteerCisOk,
13801         void*        callback_opaque,
13802         const UChar* guest_instr,
13803         const VexArchInfo* archinfo,
13804         const VexAbiInfo*  abiinfo
13805      )
13806 {
13807    // A macro to fish bits out of 'insn'.
13808 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13809 
13810 //ZZ    DisResult dres;
13811 //ZZ    UInt      insn;
13812 //ZZ    //Bool      allow_VFP = False;
13813 //ZZ    //UInt      hwcaps = archinfo->hwcaps;
13814 //ZZ    IRTemp    condT; /* :: Ity_I32 */
13815 //ZZ    UInt      summary;
13816 //ZZ    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
13817 //ZZ
13818 //ZZ    /* What insn variants are we supporting today? */
13819 //ZZ    //allow_VFP  = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
13820 //ZZ    // etc etc
13821 
13822    /* Set result defaults. */
13823    dres->whatNext    = Dis_Continue;
13824    dres->len         = 4;
13825    dres->continueAt  = 0;
13826    dres->jk_StopHere = Ijk_INVALID;
13827 
13828    /* At least this is simple on ARM64: insns are all 4 bytes long, and
13829       4-aligned.  So just fish the whole thing out of memory right now
13830       and have done. */
13831    UInt insn = getUIntLittleEndianly( guest_instr );
13832 
13833    if (0) vex_printf("insn: 0x%x\n", insn);
13834 
13835    DIP("\t(arm64) 0x%llx:  ", (ULong)guest_PC_curr_instr);
13836 
13837    vassert(0 == (guest_PC_curr_instr & 3ULL));
13838 
13839    /* ----------------------------------------------------------- */
13840 
13841    /* Spot "Special" instructions (see comment at top of file). */
13842    {
13843       const UChar* code = guest_instr;
13844       /* Spot the 16-byte preamble:
13845             93CC0D8C   ror x12, x12, #3
13846             93CC358C   ror x12, x12, #13
13847             93CCCD8C   ror x12, x12, #51
13848             93CCF58C   ror x12, x12, #61
13849       */
13850       UInt word1 = 0x93CC0D8C;
13851       UInt word2 = 0x93CC358C;
13852       UInt word3 = 0x93CCCD8C;
13853       UInt word4 = 0x93CCF58C;
13854       if (getUIntLittleEndianly(code+ 0) == word1 &&
13855           getUIntLittleEndianly(code+ 4) == word2 &&
13856           getUIntLittleEndianly(code+ 8) == word3 &&
13857           getUIntLittleEndianly(code+12) == word4) {
13858          /* Got a "Special" instruction preamble.  Which one is it? */
13859          if (getUIntLittleEndianly(code+16) == 0xAA0A014A
13860                                                /* orr x10,x10,x10 */) {
13861             /* X3 = client_request ( X4 ) */
13862             DIP("x3 = client_request ( x4 )\n");
13863             putPC(mkU64( guest_PC_curr_instr + 20 ));
13864             dres->jk_StopHere = Ijk_ClientReq;
13865             dres->whatNext    = Dis_StopHere;
13866             return True;
13867          }
13868          else
13869          if (getUIntLittleEndianly(code+16) == 0xAA0B016B
13870                                                /* orr x11,x11,x11 */) {
13871             /* X3 = guest_NRADDR */
13872             DIP("x3 = guest_NRADDR\n");
13873             dres->len = 20;
13874             putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
13875             return True;
13876          }
13877          else
13878          if (getUIntLittleEndianly(code+16) == 0xAA0C018C
13879                                                /* orr x12,x12,x12 */) {
13880             /*  branch-and-link-to-noredir X8 */
13881             DIP("branch-and-link-to-noredir x8\n");
13882             putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
13883             putPC(getIReg64orZR(8));
13884             dres->jk_StopHere = Ijk_NoRedir;
13885             dres->whatNext    = Dis_StopHere;
13886             return True;
13887          }
13888          else
13889          if (getUIntLittleEndianly(code+16) == 0xAA090129
13890                                                /* orr x9,x9,x9 */) {
13891             /* IR injection */
13892             DIP("IR injection\n");
13893             vex_inject_ir(irsb, Iend_LE);
13894             // Invalidate the current insn. The reason is that the IRop we're
13895             // injecting here can change. In which case the translation has to
13896             // be redone. For ease of handling, we simply invalidate all the
13897             // time.
13898             stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
13899             stmt(IRStmt_Put(OFFB_CMLEN,   mkU64(20)));
13900             putPC(mkU64( guest_PC_curr_instr + 20 ));
13901             dres->whatNext    = Dis_StopHere;
13902             dres->jk_StopHere = Ijk_InvalICache;
13903             return True;
13904          }
13905          /* We don't know what it is. */
13906          return False;
13907          /*NOTREACHED*/
13908       }
13909    }
13910 
13911    /* ----------------------------------------------------------- */
13912 
13913    /* Main ARM64 instruction decoder starts here. */
13914 
13915    Bool ok = False;
13916 
13917    /* insn[28:25] determines the top-level grouping, so let's start
13918       off with that.
13919 
13920       For all of these dis_ARM64_ functions, we pass *dres with the
13921       normal default results "insn OK, 4 bytes long, keep decoding" so
13922       they don't need to change it.  However, decodes of control-flow
13923       insns may cause *dres to change.
13924    */
13925    switch (INSN(28,25)) {
13926       case BITS4(1,0,0,0): case BITS4(1,0,0,1):
13927          // Data processing - immediate
13928          ok = dis_ARM64_data_processing_immediate(dres, insn);
13929          break;
13930       case BITS4(1,0,1,0): case BITS4(1,0,1,1):
13931          // Branch, exception generation and system instructions
13932          ok = dis_ARM64_branch_etc(dres, insn, archinfo);
13933          break;
13934       case BITS4(0,1,0,0): case BITS4(0,1,1,0):
13935       case BITS4(1,1,0,0): case BITS4(1,1,1,0):
13936          // Loads and stores
13937          ok = dis_ARM64_load_store(dres, insn);
13938          break;
13939       case BITS4(0,1,0,1): case BITS4(1,1,0,1):
13940          // Data processing - register
13941          ok = dis_ARM64_data_processing_register(dres, insn);
13942          break;
13943       case BITS4(0,1,1,1): case BITS4(1,1,1,1):
13944          // Data processing - SIMD and floating point
13945          ok = dis_ARM64_simd_and_fp(dres, insn);
13946          break;
13947       case BITS4(0,0,0,0): case BITS4(0,0,0,1):
13948       case BITS4(0,0,1,0): case BITS4(0,0,1,1):
13949          // UNALLOCATED
13950          break;
13951       default:
13952          vassert(0); /* Can't happen */
13953    }
13954 
13955    /* If the next-level down decoders failed, make sure |dres| didn't
13956       get changed. */
13957    if (!ok) {
13958       vassert(dres->whatNext    == Dis_Continue);
13959       vassert(dres->len         == 4);
13960       vassert(dres->continueAt  == 0);
13961       vassert(dres->jk_StopHere == Ijk_INVALID);
13962    }
13963 
13964    return ok;
13965 
13966 #  undef INSN
13967 }
13968 
13969 
13970 /*------------------------------------------------------------*/
13971 /*--- Top-level fn                                         ---*/
13972 /*------------------------------------------------------------*/
13973 
13974 /* Disassemble a single instruction into IR.  The instruction
13975    is located in host memory at &guest_code[delta]. */
13976 
disInstr_ARM64(IRSB * irsb_IN,Bool (* resteerOkFn)(void *,Addr),Bool resteerCisOk,void * callback_opaque,const UChar * guest_code_IN,Long delta_IN,Addr guest_IP,VexArch guest_arch,const VexArchInfo * archinfo,const VexAbiInfo * abiinfo,VexEndness host_endness_IN,Bool sigill_diag_IN)13977 DisResult disInstr_ARM64 ( IRSB*        irsb_IN,
13978                            Bool         (*resteerOkFn) ( void*, Addr ),
13979                            Bool         resteerCisOk,
13980                            void*        callback_opaque,
13981                            const UChar* guest_code_IN,
13982                            Long         delta_IN,
13983                            Addr         guest_IP,
13984                            VexArch      guest_arch,
13985                            const VexArchInfo* archinfo,
13986                            const VexAbiInfo*  abiinfo,
13987                            VexEndness   host_endness_IN,
13988                            Bool         sigill_diag_IN )
13989 {
13990    DisResult dres;
13991    vex_bzero(&dres, sizeof(dres));
13992 
13993    /* Set globals (see top of this file) */
13994    vassert(guest_arch == VexArchARM64);
13995 
13996    irsb                = irsb_IN;
13997    host_endness        = host_endness_IN;
13998    guest_PC_curr_instr = (Addr64)guest_IP;
13999 
14000    /* Sanity checks */
14001    /* (x::UInt - 2) <= 15   ===   x >= 2 && x <= 17 (I hope) */
14002    vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
14003    vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
14004 
14005    /* Try to decode */
14006    Bool ok = disInstr_ARM64_WRK( &dres,
14007                                  resteerOkFn, resteerCisOk, callback_opaque,
14008                                  &guest_code_IN[delta_IN],
14009                                  archinfo, abiinfo );
14010    if (ok) {
14011       /* All decode successes end up here. */
14012       vassert(dres.len == 4 || dres.len == 20);
14013       switch (dres.whatNext) {
14014          case Dis_Continue:
14015             putPC( mkU64(dres.len + guest_PC_curr_instr) );
14016             break;
14017          case Dis_ResteerU:
14018          case Dis_ResteerC:
14019             putPC(mkU64(dres.continueAt));
14020             break;
14021          case Dis_StopHere:
14022             break;
14023          default:
14024             vassert(0);
14025       }
14026       DIP("\n");
14027    } else {
14028       /* All decode failures end up here. */
14029       if (sigill_diag_IN) {
14030          Int   i, j;
14031          UChar buf[64];
14032          UInt  insn
14033                   = getUIntLittleEndianly( &guest_code_IN[delta_IN] );
14034          vex_bzero(buf, sizeof(buf));
14035          for (i = j = 0; i < 32; i++) {
14036             if (i > 0) {
14037               if ((i & 7) == 0) buf[j++] = ' ';
14038               else if ((i & 3) == 0) buf[j++] = '\'';
14039             }
14040             buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
14041          }
14042          vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
14043          vex_printf("disInstr(arm64): %s\n", buf);
14044       }
14045 
14046       /* Tell the dispatcher that this insn cannot be decoded, and so
14047          has not been executed, and (is currently) the next to be
14048          executed.  PC should be up-to-date since it is made so at the
14049          start of each insn, but nevertheless be paranoid and update
14050          it again right now. */
14051       putPC( mkU64(guest_PC_curr_instr) );
14052       dres.len         = 0;
14053       dres.whatNext    = Dis_StopHere;
14054       dres.jk_StopHere = Ijk_NoDecode;
14055       dres.continueAt  = 0;
14056    }
14057    return dres;
14058 }
14059 
14060 
14061 /*--------------------------------------------------------------------*/
14062 /*--- end                                       guest_arm64_toIR.c ---*/
14063 /*--------------------------------------------------------------------*/
14064