1 /* -*- mode: C; c-basic-offset: 3; -*- */
2
3 /*--------------------------------------------------------------------*/
4 /*--- begin guest_arm64_toIR.c ---*/
5 /*--------------------------------------------------------------------*/
6
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2013-2015 OpenWorks
12 info@open-works.net
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30 */
31
32 /* KNOWN LIMITATIONS 2014-Nov-16
33
34 * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN.
35
36 Also FP comparison "unordered" .. is implemented as normal FP
37 comparison.
38
39 Both should be fixed. They behave incorrectly in the presence of
40 NaNs.
41
42 FMULX is treated the same as FMUL. That's also not correct.
43
44 * Floating multiply-add (etc) insns. Are split into a multiply and
45 an add, and so suffer double rounding and hence sometimes the
46 least significant mantissa bit is incorrect. Fix: use the IR
47 multiply-add IROps instead.
48
49 * FRINTA, FRINTN are kludged .. they just round to nearest. No special
50 handling for the "ties" case. FRINTX might be dubious too.
51
52 * Ditto FCVTXN. No idea what "round to odd" means. This implementation
53 just rounds to nearest.
54 */
55
56 /* "Special" instructions.
57
58 This instruction decoder can decode four special instructions
59 which mean nothing natively (are no-ops as far as regs/mem are
60 concerned) but have meaning for supporting Valgrind. A special
61 instruction is flagged by a 16-byte preamble:
62
63 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
64 (ror x12, x12, #3; ror x12, x12, #13
65 ror x12, x12, #51; ror x12, x12, #61)
66
67 Following that, one of the following 3 are allowed
68 (standard interpretation in parentheses):
69
70 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
71 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
72 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
73 AA090129 (orr x9,x9,x9) IR injection
74
75 Any other bytes following the 16-byte preamble are illegal and
76 constitute a failure in instruction decoding. This all assumes
77 that the preamble will never occur except in specific code
78 fragments designed for Valgrind to catch.
79 */
80
81 /* Translates ARM64 code to IR. */
82
83 #include "libvex_basictypes.h"
84 #include "libvex_ir.h"
85 #include "libvex.h"
86 #include "libvex_guest_arm64.h"
87
88 #include "main_util.h"
89 #include "main_globals.h"
90 #include "guest_generic_bb_to_IR.h"
91 #include "guest_arm64_defs.h"
92
93
94 /*------------------------------------------------------------*/
95 /*--- Globals ---*/
96 /*------------------------------------------------------------*/
97
98 /* These are set at the start of the translation of a instruction, so
99 that we don't have to pass them around endlessly. CONST means does
100 not change during translation of the instruction.
101 */
102
103 /* CONST: what is the host's endianness? We need to know this in
104 order to do sub-register accesses to the SIMD/FP registers
105 correctly. */
106 static VexEndness host_endness;
107
108 /* CONST: The guest address for the instruction currently being
109 translated. */
110 static Addr64 guest_PC_curr_instr;
111
112 /* MOD: The IRSB* into which we're generating code. */
113 static IRSB* irsb;
114
115
116 /*------------------------------------------------------------*/
117 /*--- Debugging output ---*/
118 /*------------------------------------------------------------*/
119
120 #define DIP(format, args...) \
121 if (vex_traceflags & VEX_TRACE_FE) \
122 vex_printf(format, ## args)
123
124 #define DIS(buf, format, args...) \
125 if (vex_traceflags & VEX_TRACE_FE) \
126 vex_sprintf(buf, format, ## args)
127
128
129 /*------------------------------------------------------------*/
130 /*--- Helper bits and pieces for deconstructing the ---*/
131 /*--- arm insn stream. ---*/
132 /*------------------------------------------------------------*/
133
134 /* Do a little-endian load of a 32-bit word, regardless of the
135 endianness of the underlying host. */
getUIntLittleEndianly(const UChar * p)136 static inline UInt getUIntLittleEndianly ( const UChar* p )
137 {
138 UInt w = 0;
139 w = (w << 8) | p[3];
140 w = (w << 8) | p[2];
141 w = (w << 8) | p[1];
142 w = (w << 8) | p[0];
143 return w;
144 }
145
146 /* Sign extend a N-bit value up to 64 bits, by copying
147 bit N-1 into all higher positions. */
sx_to_64(ULong x,UInt n)148 static ULong sx_to_64 ( ULong x, UInt n )
149 {
150 vassert(n > 1 && n < 64);
151 Long r = (Long)x;
152 r = (r << (64-n)) >> (64-n);
153 return (ULong)r;
154 }
155
156 //ZZ /* Do a little-endian load of a 16-bit word, regardless of the
157 //ZZ endianness of the underlying host. */
158 //ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
159 //ZZ {
160 //ZZ UShort w = 0;
161 //ZZ w = (w << 8) | p[1];
162 //ZZ w = (w << 8) | p[0];
163 //ZZ return w;
164 //ZZ }
165 //ZZ
166 //ZZ static UInt ROR32 ( UInt x, UInt sh ) {
167 //ZZ vassert(sh >= 0 && sh < 32);
168 //ZZ if (sh == 0)
169 //ZZ return x;
170 //ZZ else
171 //ZZ return (x << (32-sh)) | (x >> sh);
172 //ZZ }
173 //ZZ
174 //ZZ static Int popcount32 ( UInt x )
175 //ZZ {
176 //ZZ Int res = 0, i;
177 //ZZ for (i = 0; i < 32; i++) {
178 //ZZ res += (x & 1);
179 //ZZ x >>= 1;
180 //ZZ }
181 //ZZ return res;
182 //ZZ }
183 //ZZ
184 //ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
185 //ZZ {
186 //ZZ UInt mask = 1 << ix;
187 //ZZ x &= ~mask;
188 //ZZ x |= ((b << ix) & mask);
189 //ZZ return x;
190 //ZZ }
191
192 #define BITS2(_b1,_b0) \
193 (((_b1) << 1) | (_b0))
194
195 #define BITS3(_b2,_b1,_b0) \
196 (((_b2) << 2) | ((_b1) << 1) | (_b0))
197
198 #define BITS4(_b3,_b2,_b1,_b0) \
199 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
200
201 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
202 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
203 | BITS4((_b3),(_b2),(_b1),(_b0)))
204
205 #define BITS5(_b4,_b3,_b2,_b1,_b0) \
206 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
207 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
208 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
209 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
210 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
211
212 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
213 (((_b8) << 8) \
214 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
215
216 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
217 (((_b9) << 9) | ((_b8) << 8) \
218 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
219
220 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
221 (((_b10) << 10) \
222 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
223
224 #define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
225 (((_b11) << 11) \
226 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
227
228 #define X00 BITS2(0,0)
229 #define X01 BITS2(0,1)
230 #define X10 BITS2(1,0)
231 #define X11 BITS2(1,1)
232
233 // produces _uint[_bMax:_bMin]
234 #define SLICE_UInt(_uint,_bMax,_bMin) \
235 (( ((UInt)(_uint)) >> (_bMin)) \
236 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
237
238
239 /*------------------------------------------------------------*/
240 /*--- Helper bits and pieces for creating IR fragments. ---*/
241 /*------------------------------------------------------------*/
242
mkV128(UShort w)243 static IRExpr* mkV128 ( UShort w )
244 {
245 return IRExpr_Const(IRConst_V128(w));
246 }
247
mkU64(ULong i)248 static IRExpr* mkU64 ( ULong i )
249 {
250 return IRExpr_Const(IRConst_U64(i));
251 }
252
mkU32(UInt i)253 static IRExpr* mkU32 ( UInt i )
254 {
255 return IRExpr_Const(IRConst_U32(i));
256 }
257
mkU16(UInt i)258 static IRExpr* mkU16 ( UInt i )
259 {
260 vassert(i < 65536);
261 return IRExpr_Const(IRConst_U16(i));
262 }
263
mkU8(UInt i)264 static IRExpr* mkU8 ( UInt i )
265 {
266 vassert(i < 256);
267 return IRExpr_Const(IRConst_U8( (UChar)i ));
268 }
269
mkexpr(IRTemp tmp)270 static IRExpr* mkexpr ( IRTemp tmp )
271 {
272 return IRExpr_RdTmp(tmp);
273 }
274
unop(IROp op,IRExpr * a)275 static IRExpr* unop ( IROp op, IRExpr* a )
276 {
277 return IRExpr_Unop(op, a);
278 }
279
binop(IROp op,IRExpr * a1,IRExpr * a2)280 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
281 {
282 return IRExpr_Binop(op, a1, a2);
283 }
284
triop(IROp op,IRExpr * a1,IRExpr * a2,IRExpr * a3)285 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
286 {
287 return IRExpr_Triop(op, a1, a2, a3);
288 }
289
loadLE(IRType ty,IRExpr * addr)290 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
291 {
292 return IRExpr_Load(Iend_LE, ty, addr);
293 }
294
295 /* Add a statement to the list held by "irbb". */
stmt(IRStmt * st)296 static void stmt ( IRStmt* st )
297 {
298 addStmtToIRSB( irsb, st );
299 }
300
assign(IRTemp dst,IRExpr * e)301 static void assign ( IRTemp dst, IRExpr* e )
302 {
303 stmt( IRStmt_WrTmp(dst, e) );
304 }
305
storeLE(IRExpr * addr,IRExpr * data)306 static void storeLE ( IRExpr* addr, IRExpr* data )
307 {
308 stmt( IRStmt_Store(Iend_LE, addr, data) );
309 }
310
311 //ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
312 //ZZ {
313 //ZZ if (guardT == IRTemp_INVALID) {
314 //ZZ /* unconditional */
315 //ZZ storeLE(addr, data);
316 //ZZ } else {
317 //ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
318 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
319 //ZZ }
320 //ZZ }
321 //ZZ
322 //ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
323 //ZZ IRExpr* addr, IRExpr* alt,
324 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
325 //ZZ {
326 //ZZ if (guardT == IRTemp_INVALID) {
327 //ZZ /* unconditional */
328 //ZZ IRExpr* loaded = NULL;
329 //ZZ switch (cvt) {
330 //ZZ case ILGop_Ident32:
331 //ZZ loaded = loadLE(Ity_I32, addr); break;
332 //ZZ case ILGop_8Uto32:
333 //ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
334 //ZZ case ILGop_8Sto32:
335 //ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
336 //ZZ case ILGop_16Uto32:
337 //ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
338 //ZZ case ILGop_16Sto32:
339 //ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
340 //ZZ default:
341 //ZZ vassert(0);
342 //ZZ }
343 //ZZ vassert(loaded != NULL);
344 //ZZ assign(dst, loaded);
345 //ZZ } else {
346 //ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
347 //ZZ loaded data before putting the data in 'dst'. If the load
348 //ZZ does not take place, 'alt' is placed directly in 'dst'. */
349 //ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
350 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
351 //ZZ }
352 //ZZ }
353
354 /* Generate a new temporary of the given type. */
newTemp(IRType ty)355 static IRTemp newTemp ( IRType ty )
356 {
357 vassert(isPlausibleIRType(ty));
358 return newIRTemp( irsb->tyenv, ty );
359 }
360
361 /* This is used in many places, so the brevity is an advantage. */
newTempV128(void)362 static IRTemp newTempV128(void)
363 {
364 return newTemp(Ity_V128);
365 }
366
367 /* Initialise V128 temporaries en masse. */
368 static
newTempsV128_2(IRTemp * t1,IRTemp * t2)369 void newTempsV128_2(IRTemp* t1, IRTemp* t2)
370 {
371 vassert(t1 && *t1 == IRTemp_INVALID);
372 vassert(t2 && *t2 == IRTemp_INVALID);
373 *t1 = newTempV128();
374 *t2 = newTempV128();
375 }
376
377 static
newTempsV128_3(IRTemp * t1,IRTemp * t2,IRTemp * t3)378 void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3)
379 {
380 vassert(t1 && *t1 == IRTemp_INVALID);
381 vassert(t2 && *t2 == IRTemp_INVALID);
382 vassert(t3 && *t3 == IRTemp_INVALID);
383 *t1 = newTempV128();
384 *t2 = newTempV128();
385 *t3 = newTempV128();
386 }
387
388 static
newTempsV128_4(IRTemp * t1,IRTemp * t2,IRTemp * t3,IRTemp * t4)389 void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4)
390 {
391 vassert(t1 && *t1 == IRTemp_INVALID);
392 vassert(t2 && *t2 == IRTemp_INVALID);
393 vassert(t3 && *t3 == IRTemp_INVALID);
394 vassert(t4 && *t4 == IRTemp_INVALID);
395 *t1 = newTempV128();
396 *t2 = newTempV128();
397 *t3 = newTempV128();
398 *t4 = newTempV128();
399 }
400
401 static
newTempsV128_7(IRTemp * t1,IRTemp * t2,IRTemp * t3,IRTemp * t4,IRTemp * t5,IRTemp * t6,IRTemp * t7)402 void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3,
403 IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7)
404 {
405 vassert(t1 && *t1 == IRTemp_INVALID);
406 vassert(t2 && *t2 == IRTemp_INVALID);
407 vassert(t3 && *t3 == IRTemp_INVALID);
408 vassert(t4 && *t4 == IRTemp_INVALID);
409 vassert(t5 && *t5 == IRTemp_INVALID);
410 vassert(t6 && *t6 == IRTemp_INVALID);
411 vassert(t7 && *t7 == IRTemp_INVALID);
412 *t1 = newTempV128();
413 *t2 = newTempV128();
414 *t3 = newTempV128();
415 *t4 = newTempV128();
416 *t5 = newTempV128();
417 *t6 = newTempV128();
418 *t7 = newTempV128();
419 }
420
421 //ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
422 //ZZ IRRoundingMode. */
423 //ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
424 //ZZ {
425 //ZZ return mkU32(Irrm_NEAREST);
426 //ZZ }
427 //ZZ
428 //ZZ /* Generate an expression for SRC rotated right by ROT. */
429 //ZZ static IRExpr* genROR32( IRTemp src, Int rot )
430 //ZZ {
431 //ZZ vassert(rot >= 0 && rot < 32);
432 //ZZ if (rot == 0)
433 //ZZ return mkexpr(src);
434 //ZZ return
435 //ZZ binop(Iop_Or32,
436 //ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
437 //ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
438 //ZZ }
439 //ZZ
440 //ZZ static IRExpr* mkU128 ( ULong i )
441 //ZZ {
442 //ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
443 //ZZ }
444 //ZZ
445 //ZZ /* Generate a 4-aligned version of the given expression if
446 //ZZ the given condition is true. Else return it unchanged. */
447 //ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
448 //ZZ {
449 //ZZ if (b)
450 //ZZ return binop(Iop_And32, e, mkU32(~3));
451 //ZZ else
452 //ZZ return e;
453 //ZZ }
454
455 /* Other IR construction helpers. */
mkAND(IRType ty)456 static IROp mkAND ( IRType ty ) {
457 switch (ty) {
458 case Ity_I32: return Iop_And32;
459 case Ity_I64: return Iop_And64;
460 default: vpanic("mkAND");
461 }
462 }
463
mkOR(IRType ty)464 static IROp mkOR ( IRType ty ) {
465 switch (ty) {
466 case Ity_I32: return Iop_Or32;
467 case Ity_I64: return Iop_Or64;
468 default: vpanic("mkOR");
469 }
470 }
471
mkXOR(IRType ty)472 static IROp mkXOR ( IRType ty ) {
473 switch (ty) {
474 case Ity_I32: return Iop_Xor32;
475 case Ity_I64: return Iop_Xor64;
476 default: vpanic("mkXOR");
477 }
478 }
479
mkSHL(IRType ty)480 static IROp mkSHL ( IRType ty ) {
481 switch (ty) {
482 case Ity_I32: return Iop_Shl32;
483 case Ity_I64: return Iop_Shl64;
484 default: vpanic("mkSHL");
485 }
486 }
487
mkSHR(IRType ty)488 static IROp mkSHR ( IRType ty ) {
489 switch (ty) {
490 case Ity_I32: return Iop_Shr32;
491 case Ity_I64: return Iop_Shr64;
492 default: vpanic("mkSHR");
493 }
494 }
495
mkSAR(IRType ty)496 static IROp mkSAR ( IRType ty ) {
497 switch (ty) {
498 case Ity_I32: return Iop_Sar32;
499 case Ity_I64: return Iop_Sar64;
500 default: vpanic("mkSAR");
501 }
502 }
503
mkNOT(IRType ty)504 static IROp mkNOT ( IRType ty ) {
505 switch (ty) {
506 case Ity_I32: return Iop_Not32;
507 case Ity_I64: return Iop_Not64;
508 default: vpanic("mkNOT");
509 }
510 }
511
mkADD(IRType ty)512 static IROp mkADD ( IRType ty ) {
513 switch (ty) {
514 case Ity_I32: return Iop_Add32;
515 case Ity_I64: return Iop_Add64;
516 default: vpanic("mkADD");
517 }
518 }
519
mkSUB(IRType ty)520 static IROp mkSUB ( IRType ty ) {
521 switch (ty) {
522 case Ity_I32: return Iop_Sub32;
523 case Ity_I64: return Iop_Sub64;
524 default: vpanic("mkSUB");
525 }
526 }
527
mkADDF(IRType ty)528 static IROp mkADDF ( IRType ty ) {
529 switch (ty) {
530 case Ity_F32: return Iop_AddF32;
531 case Ity_F64: return Iop_AddF64;
532 default: vpanic("mkADDF");
533 }
534 }
535
mkSUBF(IRType ty)536 static IROp mkSUBF ( IRType ty ) {
537 switch (ty) {
538 case Ity_F32: return Iop_SubF32;
539 case Ity_F64: return Iop_SubF64;
540 default: vpanic("mkSUBF");
541 }
542 }
543
mkMULF(IRType ty)544 static IROp mkMULF ( IRType ty ) {
545 switch (ty) {
546 case Ity_F32: return Iop_MulF32;
547 case Ity_F64: return Iop_MulF64;
548 default: vpanic("mkMULF");
549 }
550 }
551
mkDIVF(IRType ty)552 static IROp mkDIVF ( IRType ty ) {
553 switch (ty) {
554 case Ity_F32: return Iop_DivF32;
555 case Ity_F64: return Iop_DivF64;
556 default: vpanic("mkMULF");
557 }
558 }
559
mkNEGF(IRType ty)560 static IROp mkNEGF ( IRType ty ) {
561 switch (ty) {
562 case Ity_F32: return Iop_NegF32;
563 case Ity_F64: return Iop_NegF64;
564 default: vpanic("mkNEGF");
565 }
566 }
567
mkABSF(IRType ty)568 static IROp mkABSF ( IRType ty ) {
569 switch (ty) {
570 case Ity_F32: return Iop_AbsF32;
571 case Ity_F64: return Iop_AbsF64;
572 default: vpanic("mkNEGF");
573 }
574 }
575
mkSQRTF(IRType ty)576 static IROp mkSQRTF ( IRType ty ) {
577 switch (ty) {
578 case Ity_F32: return Iop_SqrtF32;
579 case Ity_F64: return Iop_SqrtF64;
580 default: vpanic("mkNEGF");
581 }
582 }
583
mkVecADD(UInt size)584 static IROp mkVecADD ( UInt size ) {
585 const IROp ops[4]
586 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
587 vassert(size < 4);
588 return ops[size];
589 }
590
mkVecQADDU(UInt size)591 static IROp mkVecQADDU ( UInt size ) {
592 const IROp ops[4]
593 = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 };
594 vassert(size < 4);
595 return ops[size];
596 }
597
mkVecQADDS(UInt size)598 static IROp mkVecQADDS ( UInt size ) {
599 const IROp ops[4]
600 = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 };
601 vassert(size < 4);
602 return ops[size];
603 }
604
mkVecQADDEXTSUSATUU(UInt size)605 static IROp mkVecQADDEXTSUSATUU ( UInt size ) {
606 const IROp ops[4]
607 = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
608 Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 };
609 vassert(size < 4);
610 return ops[size];
611 }
612
mkVecQADDEXTUSSATSS(UInt size)613 static IROp mkVecQADDEXTUSSATSS ( UInt size ) {
614 const IROp ops[4]
615 = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
616 Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 };
617 vassert(size < 4);
618 return ops[size];
619 }
620
mkVecSUB(UInt size)621 static IROp mkVecSUB ( UInt size ) {
622 const IROp ops[4]
623 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
624 vassert(size < 4);
625 return ops[size];
626 }
627
mkVecQSUBU(UInt size)628 static IROp mkVecQSUBU ( UInt size ) {
629 const IROp ops[4]
630 = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 };
631 vassert(size < 4);
632 return ops[size];
633 }
634
mkVecQSUBS(UInt size)635 static IROp mkVecQSUBS ( UInt size ) {
636 const IROp ops[4]
637 = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 };
638 vassert(size < 4);
639 return ops[size];
640 }
641
mkVecSARN(UInt size)642 static IROp mkVecSARN ( UInt size ) {
643 const IROp ops[4]
644 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
645 vassert(size < 4);
646 return ops[size];
647 }
648
mkVecSHRN(UInt size)649 static IROp mkVecSHRN ( UInt size ) {
650 const IROp ops[4]
651 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
652 vassert(size < 4);
653 return ops[size];
654 }
655
mkVecSHLN(UInt size)656 static IROp mkVecSHLN ( UInt size ) {
657 const IROp ops[4]
658 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
659 vassert(size < 4);
660 return ops[size];
661 }
662
mkVecCATEVENLANES(UInt size)663 static IROp mkVecCATEVENLANES ( UInt size ) {
664 const IROp ops[4]
665 = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8,
666 Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 };
667 vassert(size < 4);
668 return ops[size];
669 }
670
mkVecCATODDLANES(UInt size)671 static IROp mkVecCATODDLANES ( UInt size ) {
672 const IROp ops[4]
673 = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8,
674 Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 };
675 vassert(size < 4);
676 return ops[size];
677 }
678
mkVecINTERLEAVELO(UInt size)679 static IROp mkVecINTERLEAVELO ( UInt size ) {
680 const IROp ops[4]
681 = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
682 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 };
683 vassert(size < 4);
684 return ops[size];
685 }
686
mkVecINTERLEAVEHI(UInt size)687 static IROp mkVecINTERLEAVEHI ( UInt size ) {
688 const IROp ops[4]
689 = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
690 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 };
691 vassert(size < 4);
692 return ops[size];
693 }
694
mkVecMAXU(UInt size)695 static IROp mkVecMAXU ( UInt size ) {
696 const IROp ops[4]
697 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
698 vassert(size < 4);
699 return ops[size];
700 }
701
mkVecMAXS(UInt size)702 static IROp mkVecMAXS ( UInt size ) {
703 const IROp ops[4]
704 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
705 vassert(size < 4);
706 return ops[size];
707 }
708
mkVecMINU(UInt size)709 static IROp mkVecMINU ( UInt size ) {
710 const IROp ops[4]
711 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
712 vassert(size < 4);
713 return ops[size];
714 }
715
mkVecMINS(UInt size)716 static IROp mkVecMINS ( UInt size ) {
717 const IROp ops[4]
718 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
719 vassert(size < 4);
720 return ops[size];
721 }
722
mkVecMUL(UInt size)723 static IROp mkVecMUL ( UInt size ) {
724 const IROp ops[4]
725 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
726 vassert(size < 3);
727 return ops[size];
728 }
729
mkVecMULLU(UInt sizeNarrow)730 static IROp mkVecMULLU ( UInt sizeNarrow ) {
731 const IROp ops[4]
732 = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID };
733 vassert(sizeNarrow < 3);
734 return ops[sizeNarrow];
735 }
736
mkVecMULLS(UInt sizeNarrow)737 static IROp mkVecMULLS ( UInt sizeNarrow ) {
738 const IROp ops[4]
739 = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID };
740 vassert(sizeNarrow < 3);
741 return ops[sizeNarrow];
742 }
743
mkVecQDMULLS(UInt sizeNarrow)744 static IROp mkVecQDMULLS ( UInt sizeNarrow ) {
745 const IROp ops[4]
746 = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID };
747 vassert(sizeNarrow < 3);
748 return ops[sizeNarrow];
749 }
750
mkVecCMPEQ(UInt size)751 static IROp mkVecCMPEQ ( UInt size ) {
752 const IROp ops[4]
753 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
754 vassert(size < 4);
755 return ops[size];
756 }
757
mkVecCMPGTU(UInt size)758 static IROp mkVecCMPGTU ( UInt size ) {
759 const IROp ops[4]
760 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
761 vassert(size < 4);
762 return ops[size];
763 }
764
mkVecCMPGTS(UInt size)765 static IROp mkVecCMPGTS ( UInt size ) {
766 const IROp ops[4]
767 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
768 vassert(size < 4);
769 return ops[size];
770 }
771
mkVecABS(UInt size)772 static IROp mkVecABS ( UInt size ) {
773 const IROp ops[4]
774 = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 };
775 vassert(size < 4);
776 return ops[size];
777 }
778
mkVecZEROHIxxOFV128(UInt size)779 static IROp mkVecZEROHIxxOFV128 ( UInt size ) {
780 const IROp ops[4]
781 = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128,
782 Iop_ZeroHI96ofV128, Iop_ZeroHI64ofV128 };
783 vassert(size < 4);
784 return ops[size];
785 }
786
mkU(IRType ty,ULong imm)787 static IRExpr* mkU ( IRType ty, ULong imm ) {
788 switch (ty) {
789 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
790 case Ity_I64: return mkU64(imm);
791 default: vpanic("mkU");
792 }
793 }
794
mkVecQDMULHIS(UInt size)795 static IROp mkVecQDMULHIS ( UInt size ) {
796 const IROp ops[4]
797 = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID };
798 vassert(size < 4);
799 return ops[size];
800 }
801
mkVecQRDMULHIS(UInt size)802 static IROp mkVecQRDMULHIS ( UInt size ) {
803 const IROp ops[4]
804 = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID };
805 vassert(size < 4);
806 return ops[size];
807 }
808
mkVecQANDUQSH(UInt size)809 static IROp mkVecQANDUQSH ( UInt size ) {
810 const IROp ops[4]
811 = { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
812 Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
813 vassert(size < 4);
814 return ops[size];
815 }
816
mkVecQANDSQSH(UInt size)817 static IROp mkVecQANDSQSH ( UInt size ) {
818 const IROp ops[4]
819 = { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
820 Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
821 vassert(size < 4);
822 return ops[size];
823 }
824
mkVecQANDUQRSH(UInt size)825 static IROp mkVecQANDUQRSH ( UInt size ) {
826 const IROp ops[4]
827 = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
828 Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
829 vassert(size < 4);
830 return ops[size];
831 }
832
mkVecQANDSQRSH(UInt size)833 static IROp mkVecQANDSQRSH ( UInt size ) {
834 const IROp ops[4]
835 = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
836 Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
837 vassert(size < 4);
838 return ops[size];
839 }
840
mkVecSHU(UInt size)841 static IROp mkVecSHU ( UInt size ) {
842 const IROp ops[4]
843 = { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 };
844 vassert(size < 4);
845 return ops[size];
846 }
847
mkVecSHS(UInt size)848 static IROp mkVecSHS ( UInt size ) {
849 const IROp ops[4]
850 = { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 };
851 vassert(size < 4);
852 return ops[size];
853 }
854
mkVecRSHU(UInt size)855 static IROp mkVecRSHU ( UInt size ) {
856 const IROp ops[4]
857 = { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 };
858 vassert(size < 4);
859 return ops[size];
860 }
861
mkVecRSHS(UInt size)862 static IROp mkVecRSHS ( UInt size ) {
863 const IROp ops[4]
864 = { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 };
865 vassert(size < 4);
866 return ops[size];
867 }
868
mkVecNARROWUN(UInt sizeNarrow)869 static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
870 const IROp ops[4]
871 = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
872 Iop_NarrowUn64to32x2, Iop_INVALID };
873 vassert(sizeNarrow < 4);
874 return ops[sizeNarrow];
875 }
876
mkVecQNARROWUNSU(UInt sizeNarrow)877 static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
878 const IROp ops[4]
879 = { Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4,
880 Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
881 vassert(sizeNarrow < 4);
882 return ops[sizeNarrow];
883 }
884
mkVecQNARROWUNSS(UInt sizeNarrow)885 static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
886 const IROp ops[4]
887 = { Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4,
888 Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
889 vassert(sizeNarrow < 4);
890 return ops[sizeNarrow];
891 }
892
mkVecQNARROWUNUU(UInt sizeNarrow)893 static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
894 const IROp ops[4]
895 = { Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4,
896 Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
897 vassert(sizeNarrow < 4);
898 return ops[sizeNarrow];
899 }
900
mkVecQANDqshrNNARROWUU(UInt sizeNarrow)901 static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
902 const IROp ops[4]
903 = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
904 Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
905 vassert(sizeNarrow < 4);
906 return ops[sizeNarrow];
907 }
908
mkVecQANDqsarNNARROWSS(UInt sizeNarrow)909 static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
910 const IROp ops[4]
911 = { Iop_QandQSarNnarrow16Sto8Sx8, Iop_QandQSarNnarrow32Sto16Sx4,
912 Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
913 vassert(sizeNarrow < 4);
914 return ops[sizeNarrow];
915 }
916
mkVecQANDqsarNNARROWSU(UInt sizeNarrow)917 static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
918 const IROp ops[4]
919 = { Iop_QandQSarNnarrow16Sto8Ux8, Iop_QandQSarNnarrow32Sto16Ux4,
920 Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
921 vassert(sizeNarrow < 4);
922 return ops[sizeNarrow];
923 }
924
mkVecQANDqrshrNNARROWUU(UInt sizeNarrow)925 static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
926 const IROp ops[4]
927 = { Iop_QandQRShrNnarrow16Uto8Ux8, Iop_QandQRShrNnarrow32Uto16Ux4,
928 Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
929 vassert(sizeNarrow < 4);
930 return ops[sizeNarrow];
931 }
932
mkVecQANDqrsarNNARROWSS(UInt sizeNarrow)933 static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
934 const IROp ops[4]
935 = { Iop_QandQRSarNnarrow16Sto8Sx8, Iop_QandQRSarNnarrow32Sto16Sx4,
936 Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
937 vassert(sizeNarrow < 4);
938 return ops[sizeNarrow];
939 }
940
mkVecQANDqrsarNNARROWSU(UInt sizeNarrow)941 static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
942 const IROp ops[4]
943 = { Iop_QandQRSarNnarrow16Sto8Ux8, Iop_QandQRSarNnarrow32Sto16Ux4,
944 Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
945 vassert(sizeNarrow < 4);
946 return ops[sizeNarrow];
947 }
948
mkVecQSHLNSATUU(UInt size)949 static IROp mkVecQSHLNSATUU ( UInt size ) {
950 const IROp ops[4]
951 = { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8,
952 Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 };
953 vassert(size < 4);
954 return ops[size];
955 }
956
mkVecQSHLNSATSS(UInt size)957 static IROp mkVecQSHLNSATSS ( UInt size ) {
958 const IROp ops[4]
959 = { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8,
960 Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 };
961 vassert(size < 4);
962 return ops[size];
963 }
964
mkVecQSHLNSATSU(UInt size)965 static IROp mkVecQSHLNSATSU ( UInt size ) {
966 const IROp ops[4]
967 = { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8,
968 Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 };
969 vassert(size < 4);
970 return ops[size];
971 }
972
mkVecADDF(UInt size)973 static IROp mkVecADDF ( UInt size ) {
974 const IROp ops[4]
975 = { Iop_INVALID, Iop_INVALID, Iop_Add32Fx4, Iop_Add64Fx2 };
976 vassert(size < 4);
977 return ops[size];
978 }
979
mkVecMAXF(UInt size)980 static IROp mkVecMAXF ( UInt size ) {
981 const IROp ops[4]
982 = { Iop_INVALID, Iop_INVALID, Iop_Max32Fx4, Iop_Max64Fx2 };
983 vassert(size < 4);
984 return ops[size];
985 }
986
mkVecMINF(UInt size)987 static IROp mkVecMINF ( UInt size ) {
988 const IROp ops[4]
989 = { Iop_INVALID, Iop_INVALID, Iop_Min32Fx4, Iop_Min64Fx2 };
990 vassert(size < 4);
991 return ops[size];
992 }
993
994 /* Generate IR to create 'arg rotated right by imm', for sane values
995 of 'ty' and 'imm'. */
mathROR(IRType ty,IRTemp arg,UInt imm)996 static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
997 {
998 UInt w = 0;
999 if (ty == Ity_I64) {
1000 w = 64;
1001 } else {
1002 vassert(ty == Ity_I32);
1003 w = 32;
1004 }
1005 vassert(w != 0);
1006 vassert(imm < w);
1007 if (imm == 0) {
1008 return arg;
1009 }
1010 IRTemp res = newTemp(ty);
1011 assign(res, binop(mkOR(ty),
1012 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
1013 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
1014 return res;
1015 }
1016
1017 /* Generate IR to set the returned temp to either all-zeroes or
1018 all ones, as a copy of arg<imm>. */
mathREPLICATE(IRType ty,IRTemp arg,UInt imm)1019 static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
1020 {
1021 UInt w = 0;
1022 if (ty == Ity_I64) {
1023 w = 64;
1024 } else {
1025 vassert(ty == Ity_I32);
1026 w = 32;
1027 }
1028 vassert(w != 0);
1029 vassert(imm < w);
1030 IRTemp res = newTemp(ty);
1031 assign(res, binop(mkSAR(ty),
1032 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
1033 mkU8(w - 1)));
1034 return res;
1035 }
1036
1037 /* U-widen 8/16/32/64 bit int expr to 64. */
widenUto64(IRType srcTy,IRExpr * e)1038 static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
1039 {
1040 switch (srcTy) {
1041 case Ity_I64: return e;
1042 case Ity_I32: return unop(Iop_32Uto64, e);
1043 case Ity_I16: return unop(Iop_16Uto64, e);
1044 case Ity_I8: return unop(Iop_8Uto64, e);
1045 default: vpanic("widenUto64(arm64)");
1046 }
1047 }
1048
1049 /* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
1050 of these combinations make sense. */
narrowFrom64(IRType dstTy,IRExpr * e)1051 static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
1052 {
1053 switch (dstTy) {
1054 case Ity_I64: return e;
1055 case Ity_I32: return unop(Iop_64to32, e);
1056 case Ity_I16: return unop(Iop_64to16, e);
1057 case Ity_I8: return unop(Iop_64to8, e);
1058 default: vpanic("narrowFrom64(arm64)");
1059 }
1060 }
1061
1062
1063 /*------------------------------------------------------------*/
1064 /*--- Helpers for accessing guest registers. ---*/
1065 /*------------------------------------------------------------*/
1066
1067 #define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
1068 #define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
1069 #define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
1070 #define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
1071 #define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
1072 #define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
1073 #define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
1074 #define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
1075 #define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
1076 #define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
1077 #define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
1078 #define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
1079 #define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
1080 #define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
1081 #define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
1082 #define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
1083 #define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
1084 #define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
1085 #define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
1086 #define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
1087 #define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
1088 #define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
1089 #define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
1090 #define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
1091 #define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
1092 #define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
1093 #define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
1094 #define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
1095 #define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
1096 #define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
1097 #define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
1098
1099 #define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
1100 #define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
1101
1102 #define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
1103 #define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
1104 #define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
1105 #define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
1106
1107 #define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
1108 #define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
1109
1110 #define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
1111 #define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
1112 #define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
1113 #define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
1114 #define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
1115 #define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
1116 #define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
1117 #define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
1118 #define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
1119 #define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
1120 #define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
1121 #define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
1122 #define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
1123 #define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
1124 #define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
1125 #define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
1126 #define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
1127 #define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
1128 #define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
1129 #define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
1130 #define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
1131 #define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
1132 #define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
1133 #define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
1134 #define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
1135 #define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
1136 #define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
1137 #define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
1138 #define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
1139 #define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
1140 #define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
1141 #define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
1142
1143 #define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
1144 #define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG)
1145
1146 #define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
1147 #define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
1148
1149
1150 /* ---------------- Integer registers ---------------- */
1151
offsetIReg64(UInt iregNo)1152 static Int offsetIReg64 ( UInt iregNo )
1153 {
1154 /* Do we care about endianness here? We do if sub-parts of integer
1155 registers are accessed. */
1156 switch (iregNo) {
1157 case 0: return OFFB_X0;
1158 case 1: return OFFB_X1;
1159 case 2: return OFFB_X2;
1160 case 3: return OFFB_X3;
1161 case 4: return OFFB_X4;
1162 case 5: return OFFB_X5;
1163 case 6: return OFFB_X6;
1164 case 7: return OFFB_X7;
1165 case 8: return OFFB_X8;
1166 case 9: return OFFB_X9;
1167 case 10: return OFFB_X10;
1168 case 11: return OFFB_X11;
1169 case 12: return OFFB_X12;
1170 case 13: return OFFB_X13;
1171 case 14: return OFFB_X14;
1172 case 15: return OFFB_X15;
1173 case 16: return OFFB_X16;
1174 case 17: return OFFB_X17;
1175 case 18: return OFFB_X18;
1176 case 19: return OFFB_X19;
1177 case 20: return OFFB_X20;
1178 case 21: return OFFB_X21;
1179 case 22: return OFFB_X22;
1180 case 23: return OFFB_X23;
1181 case 24: return OFFB_X24;
1182 case 25: return OFFB_X25;
1183 case 26: return OFFB_X26;
1184 case 27: return OFFB_X27;
1185 case 28: return OFFB_X28;
1186 case 29: return OFFB_X29;
1187 case 30: return OFFB_X30;
1188 /* but not 31 */
1189 default: vassert(0);
1190 }
1191 }
1192
offsetIReg64orSP(UInt iregNo)1193 static Int offsetIReg64orSP ( UInt iregNo )
1194 {
1195 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo);
1196 }
1197
nameIReg64orZR(UInt iregNo)1198 static const HChar* nameIReg64orZR ( UInt iregNo )
1199 {
1200 vassert(iregNo < 32);
1201 static const HChar* names[32]
1202 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
1203 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
1204 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
1205 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
1206 return names[iregNo];
1207 }
1208
nameIReg64orSP(UInt iregNo)1209 static const HChar* nameIReg64orSP ( UInt iregNo )
1210 {
1211 if (iregNo == 31) {
1212 return "sp";
1213 }
1214 vassert(iregNo < 31);
1215 return nameIReg64orZR(iregNo);
1216 }
1217
getIReg64orSP(UInt iregNo)1218 static IRExpr* getIReg64orSP ( UInt iregNo )
1219 {
1220 vassert(iregNo < 32);
1221 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1222 }
1223
getIReg64orZR(UInt iregNo)1224 static IRExpr* getIReg64orZR ( UInt iregNo )
1225 {
1226 if (iregNo == 31) {
1227 return mkU64(0);
1228 }
1229 vassert(iregNo < 31);
1230 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1231 }
1232
putIReg64orSP(UInt iregNo,IRExpr * e)1233 static void putIReg64orSP ( UInt iregNo, IRExpr* e )
1234 {
1235 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1236 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1237 }
1238
putIReg64orZR(UInt iregNo,IRExpr * e)1239 static void putIReg64orZR ( UInt iregNo, IRExpr* e )
1240 {
1241 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1242 if (iregNo == 31) {
1243 return;
1244 }
1245 vassert(iregNo < 31);
1246 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1247 }
1248
nameIReg32orZR(UInt iregNo)1249 static const HChar* nameIReg32orZR ( UInt iregNo )
1250 {
1251 vassert(iregNo < 32);
1252 static const HChar* names[32]
1253 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
1254 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
1255 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
1256 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
1257 return names[iregNo];
1258 }
1259
nameIReg32orSP(UInt iregNo)1260 static const HChar* nameIReg32orSP ( UInt iregNo )
1261 {
1262 if (iregNo == 31) {
1263 return "wsp";
1264 }
1265 vassert(iregNo < 31);
1266 return nameIReg32orZR(iregNo);
1267 }
1268
getIReg32orSP(UInt iregNo)1269 static IRExpr* getIReg32orSP ( UInt iregNo )
1270 {
1271 vassert(iregNo < 32);
1272 return unop(Iop_64to32,
1273 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1274 }
1275
getIReg32orZR(UInt iregNo)1276 static IRExpr* getIReg32orZR ( UInt iregNo )
1277 {
1278 if (iregNo == 31) {
1279 return mkU32(0);
1280 }
1281 vassert(iregNo < 31);
1282 return unop(Iop_64to32,
1283 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1284 }
1285
putIReg32orSP(UInt iregNo,IRExpr * e)1286 static void putIReg32orSP ( UInt iregNo, IRExpr* e )
1287 {
1288 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1289 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1290 }
1291
putIReg32orZR(UInt iregNo,IRExpr * e)1292 static void putIReg32orZR ( UInt iregNo, IRExpr* e )
1293 {
1294 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1295 if (iregNo == 31) {
1296 return;
1297 }
1298 vassert(iregNo < 31);
1299 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1300 }
1301
nameIRegOrSP(Bool is64,UInt iregNo)1302 static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
1303 {
1304 vassert(is64 == True || is64 == False);
1305 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
1306 }
1307
nameIRegOrZR(Bool is64,UInt iregNo)1308 static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
1309 {
1310 vassert(is64 == True || is64 == False);
1311 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
1312 }
1313
getIRegOrZR(Bool is64,UInt iregNo)1314 static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
1315 {
1316 vassert(is64 == True || is64 == False);
1317 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
1318 }
1319
putIRegOrZR(Bool is64,UInt iregNo,IRExpr * e)1320 static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
1321 {
1322 vassert(is64 == True || is64 == False);
1323 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
1324 }
1325
putPC(IRExpr * e)1326 static void putPC ( IRExpr* e )
1327 {
1328 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1329 stmt( IRStmt_Put(OFFB_PC, e) );
1330 }
1331
1332
1333 /* ---------------- Vector (Q) registers ---------------- */
1334
offsetQReg128(UInt qregNo)1335 static Int offsetQReg128 ( UInt qregNo )
1336 {
1337 /* We don't care about endianness at this point. It only becomes
1338 relevant when dealing with sections of these registers.*/
1339 switch (qregNo) {
1340 case 0: return OFFB_Q0;
1341 case 1: return OFFB_Q1;
1342 case 2: return OFFB_Q2;
1343 case 3: return OFFB_Q3;
1344 case 4: return OFFB_Q4;
1345 case 5: return OFFB_Q5;
1346 case 6: return OFFB_Q6;
1347 case 7: return OFFB_Q7;
1348 case 8: return OFFB_Q8;
1349 case 9: return OFFB_Q9;
1350 case 10: return OFFB_Q10;
1351 case 11: return OFFB_Q11;
1352 case 12: return OFFB_Q12;
1353 case 13: return OFFB_Q13;
1354 case 14: return OFFB_Q14;
1355 case 15: return OFFB_Q15;
1356 case 16: return OFFB_Q16;
1357 case 17: return OFFB_Q17;
1358 case 18: return OFFB_Q18;
1359 case 19: return OFFB_Q19;
1360 case 20: return OFFB_Q20;
1361 case 21: return OFFB_Q21;
1362 case 22: return OFFB_Q22;
1363 case 23: return OFFB_Q23;
1364 case 24: return OFFB_Q24;
1365 case 25: return OFFB_Q25;
1366 case 26: return OFFB_Q26;
1367 case 27: return OFFB_Q27;
1368 case 28: return OFFB_Q28;
1369 case 29: return OFFB_Q29;
1370 case 30: return OFFB_Q30;
1371 case 31: return OFFB_Q31;
1372 default: vassert(0);
1373 }
1374 }
1375
1376 /* Write to a complete Qreg. */
putQReg128(UInt qregNo,IRExpr * e)1377 static void putQReg128 ( UInt qregNo, IRExpr* e )
1378 {
1379 vassert(qregNo < 32);
1380 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
1381 stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
1382 }
1383
1384 /* Read a complete Qreg. */
getQReg128(UInt qregNo)1385 static IRExpr* getQReg128 ( UInt qregNo )
1386 {
1387 vassert(qregNo < 32);
1388 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
1389 }
1390
1391 /* Produce the IR type for some sub-part of a vector. For 32- and 64-
1392 bit sub-parts we can choose either integer or float types, and
1393 choose float on the basis that that is the common use case and so
1394 will give least interference with Put-to-Get forwarding later
1395 on. */
preferredVectorSubTypeFromSize(UInt szB)1396 static IRType preferredVectorSubTypeFromSize ( UInt szB )
1397 {
1398 switch (szB) {
1399 case 1: return Ity_I8;
1400 case 2: return Ity_I16;
1401 case 4: return Ity_I32; //Ity_F32;
1402 case 8: return Ity_F64;
1403 case 16: return Ity_V128;
1404 default: vassert(0);
1405 }
1406 }
1407
1408 /* Find the offset of the laneNo'th lane of type laneTy in the given
1409 Qreg. Since the host is little-endian, the least significant lane
1410 has the lowest offset. */
offsetQRegLane(UInt qregNo,IRType laneTy,UInt laneNo)1411 static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
1412 {
1413 vassert(host_endness == VexEndnessLE);
1414 Int base = offsetQReg128(qregNo);
1415 /* Since the host is little-endian, the least significant lane
1416 will be at the lowest address. */
1417 /* Restrict this to known types, so as to avoid silently accepting
1418 stupid types. */
1419 UInt laneSzB = 0;
1420 switch (laneTy) {
1421 case Ity_I8: laneSzB = 1; break;
1422 case Ity_F16: case Ity_I16: laneSzB = 2; break;
1423 case Ity_F32: case Ity_I32: laneSzB = 4; break;
1424 case Ity_F64: case Ity_I64: laneSzB = 8; break;
1425 case Ity_V128: laneSzB = 16; break;
1426 default: break;
1427 }
1428 vassert(laneSzB > 0);
1429 UInt minOff = laneNo * laneSzB;
1430 UInt maxOff = minOff + laneSzB - 1;
1431 vassert(maxOff < 16);
1432 return base + minOff;
1433 }
1434
1435 /* Put to the least significant lane of a Qreg. */
putQRegLO(UInt qregNo,IRExpr * e)1436 static void putQRegLO ( UInt qregNo, IRExpr* e )
1437 {
1438 IRType ty = typeOfIRExpr(irsb->tyenv, e);
1439 Int off = offsetQRegLane(qregNo, ty, 0);
1440 switch (ty) {
1441 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
1442 case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128:
1443 break;
1444 default:
1445 vassert(0); // Other cases are probably invalid
1446 }
1447 stmt(IRStmt_Put(off, e));
1448 }
1449
1450 /* Get from the least significant lane of a Qreg. */
getQRegLO(UInt qregNo,IRType ty)1451 static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
1452 {
1453 Int off = offsetQRegLane(qregNo, ty, 0);
1454 switch (ty) {
1455 case Ity_I8:
1456 case Ity_F16: case Ity_I16:
1457 case Ity_I32: case Ity_I64:
1458 case Ity_F32: case Ity_F64: case Ity_V128:
1459 break;
1460 default:
1461 vassert(0); // Other cases are ATC
1462 }
1463 return IRExpr_Get(off, ty);
1464 }
1465
nameQRegLO(UInt qregNo,IRType laneTy)1466 static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
1467 {
1468 static const HChar* namesQ[32]
1469 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1470 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
1471 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1472 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1473 static const HChar* namesD[32]
1474 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
1475 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
1476 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1477 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1478 static const HChar* namesS[32]
1479 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
1480 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
1481 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1482 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1483 static const HChar* namesH[32]
1484 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
1485 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
1486 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1487 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1488 static const HChar* namesB[32]
1489 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
1490 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
1491 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1492 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1493 vassert(qregNo < 32);
1494 switch (sizeofIRType(laneTy)) {
1495 case 1: return namesB[qregNo];
1496 case 2: return namesH[qregNo];
1497 case 4: return namesS[qregNo];
1498 case 8: return namesD[qregNo];
1499 case 16: return namesQ[qregNo];
1500 default: vassert(0);
1501 }
1502 /*NOTREACHED*/
1503 }
1504
nameQReg128(UInt qregNo)1505 static const HChar* nameQReg128 ( UInt qregNo )
1506 {
1507 return nameQRegLO(qregNo, Ity_V128);
1508 }
1509
1510 /* Find the offset of the most significant half (8 bytes) of the given
1511 Qreg. This requires knowing the endianness of the host. */
offsetQRegHI64(UInt qregNo)1512 static Int offsetQRegHI64 ( UInt qregNo )
1513 {
1514 return offsetQRegLane(qregNo, Ity_I64, 1);
1515 }
1516
getQRegHI64(UInt qregNo)1517 static IRExpr* getQRegHI64 ( UInt qregNo )
1518 {
1519 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
1520 }
1521
putQRegHI64(UInt qregNo,IRExpr * e)1522 static void putQRegHI64 ( UInt qregNo, IRExpr* e )
1523 {
1524 IRType ty = typeOfIRExpr(irsb->tyenv, e);
1525 Int off = offsetQRegHI64(qregNo);
1526 switch (ty) {
1527 case Ity_I64: case Ity_F64:
1528 break;
1529 default:
1530 vassert(0); // Other cases are plain wrong
1531 }
1532 stmt(IRStmt_Put(off, e));
1533 }
1534
1535 /* Put to a specified lane of a Qreg. */
putQRegLane(UInt qregNo,UInt laneNo,IRExpr * e)1536 static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1537 {
1538 IRType laneTy = typeOfIRExpr(irsb->tyenv, e);
1539 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1540 switch (laneTy) {
1541 case Ity_F64: case Ity_I64:
1542 case Ity_I32: case Ity_F32:
1543 case Ity_I16: case Ity_F16:
1544 case Ity_I8:
1545 break;
1546 default:
1547 vassert(0); // Other cases are ATC
1548 }
1549 stmt(IRStmt_Put(off, e));
1550 }
1551
1552 /* Get from a specified lane of a Qreg. */
getQRegLane(UInt qregNo,UInt laneNo,IRType laneTy)1553 static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1554 {
1555 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1556 switch (laneTy) {
1557 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
1558 case Ity_F64: case Ity_F32: case Ity_F16:
1559 break;
1560 default:
1561 vassert(0); // Other cases are ATC
1562 }
1563 return IRExpr_Get(off, laneTy);
1564 }
1565
1566
1567 //ZZ /* ---------------- Misc registers ---------------- */
1568 //ZZ
1569 //ZZ static void putMiscReg32 ( UInt gsoffset,
1570 //ZZ IRExpr* e, /* :: Ity_I32 */
1571 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
1572 //ZZ {
1573 //ZZ switch (gsoffset) {
1574 //ZZ case OFFB_FPSCR: break;
1575 //ZZ case OFFB_QFLAG32: break;
1576 //ZZ case OFFB_GEFLAG0: break;
1577 //ZZ case OFFB_GEFLAG1: break;
1578 //ZZ case OFFB_GEFLAG2: break;
1579 //ZZ case OFFB_GEFLAG3: break;
1580 //ZZ default: vassert(0); /* awaiting more cases */
1581 //ZZ }
1582 //ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1583 //ZZ
1584 //ZZ if (guardT == IRTemp_INVALID) {
1585 //ZZ /* unconditional write */
1586 //ZZ stmt(IRStmt_Put(gsoffset, e));
1587 //ZZ } else {
1588 //ZZ stmt(IRStmt_Put(
1589 //ZZ gsoffset,
1590 //ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1591 //ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
1592 //ZZ ));
1593 //ZZ }
1594 //ZZ }
1595 //ZZ
1596 //ZZ static IRTemp get_ITSTATE ( void )
1597 //ZZ {
1598 //ZZ ASSERT_IS_THUMB;
1599 //ZZ IRTemp t = newTemp(Ity_I32);
1600 //ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1601 //ZZ return t;
1602 //ZZ }
1603 //ZZ
1604 //ZZ static void put_ITSTATE ( IRTemp t )
1605 //ZZ {
1606 //ZZ ASSERT_IS_THUMB;
1607 //ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1608 //ZZ }
1609 //ZZ
1610 //ZZ static IRTemp get_QFLAG32 ( void )
1611 //ZZ {
1612 //ZZ IRTemp t = newTemp(Ity_I32);
1613 //ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1614 //ZZ return t;
1615 //ZZ }
1616 //ZZ
1617 //ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1618 //ZZ {
1619 //ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1620 //ZZ }
1621 //ZZ
1622 //ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1623 //ZZ Status Register) to indicate that overflow or saturation occurred.
1624 //ZZ Nb: t must be zero to denote no saturation, and any nonzero
1625 //ZZ value to indicate saturation. */
1626 //ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1627 //ZZ {
1628 //ZZ IRTemp old = get_QFLAG32();
1629 //ZZ IRTemp nyu = newTemp(Ity_I32);
1630 //ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1631 //ZZ put_QFLAG32(nyu, condT);
1632 //ZZ }
1633
1634
1635 /* ---------------- FPCR stuff ---------------- */
1636
1637 /* Generate IR to get hold of the rounding mode bits in FPCR, and
1638 convert them to IR format. Bind the final result to the
1639 returned temp. */
mk_get_IR_rounding_mode(void)1640 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1641 {
1642 /* The ARMvfp encoding for rounding mode bits is:
1643 00 to nearest
1644 01 to +infinity
1645 10 to -infinity
1646 11 to zero
1647 We need to convert that to the IR encoding:
1648 00 to nearest (the default)
1649 10 to +infinity
1650 01 to -infinity
1651 11 to zero
1652 Which can be done by swapping bits 0 and 1.
1653 The rmode bits are at 23:22 in FPSCR.
1654 */
1655 IRTemp armEncd = newTemp(Ity_I32);
1656 IRTemp swapped = newTemp(Ity_I32);
1657 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
1658 we don't zero out bits 24 and above, since the assignment to
1659 'swapped' will mask them out anyway. */
1660 assign(armEncd,
1661 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1662 /* Now swap them. */
1663 assign(swapped,
1664 binop(Iop_Or32,
1665 binop(Iop_And32,
1666 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1667 mkU32(2)),
1668 binop(Iop_And32,
1669 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1670 mkU32(1))
1671 ));
1672 return swapped;
1673 }
1674
1675
1676 /*------------------------------------------------------------*/
1677 /*--- Helpers for flag handling and conditional insns ---*/
1678 /*------------------------------------------------------------*/
1679
nameARM64Condcode(ARM64Condcode cond)1680 static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1681 {
1682 switch (cond) {
1683 case ARM64CondEQ: return "eq";
1684 case ARM64CondNE: return "ne";
1685 case ARM64CondCS: return "cs"; // or 'hs'
1686 case ARM64CondCC: return "cc"; // or 'lo'
1687 case ARM64CondMI: return "mi";
1688 case ARM64CondPL: return "pl";
1689 case ARM64CondVS: return "vs";
1690 case ARM64CondVC: return "vc";
1691 case ARM64CondHI: return "hi";
1692 case ARM64CondLS: return "ls";
1693 case ARM64CondGE: return "ge";
1694 case ARM64CondLT: return "lt";
1695 case ARM64CondGT: return "gt";
1696 case ARM64CondLE: return "le";
1697 case ARM64CondAL: return "al";
1698 case ARM64CondNV: return "nv";
1699 default: vpanic("name_ARM64Condcode");
1700 }
1701 }
1702
1703 /* and a handy shorthand for it */
nameCC(ARM64Condcode cond)1704 static const HChar* nameCC ( ARM64Condcode cond ) {
1705 return nameARM64Condcode(cond);
1706 }
1707
1708
1709 /* Build IR to calculate some particular condition from stored
1710 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1711 Ity_I64, suitable for narrowing. Although the return type is
1712 Ity_I64, the returned value is either 0 or 1. 'cond' must be
1713 :: Ity_I64 and must denote the condition to compute in
1714 bits 7:4, and be zero everywhere else.
1715 */
mk_arm64g_calculate_condition_dyn(IRExpr * cond)1716 static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1717 {
1718 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1719 /* And 'cond' had better produce a value in which only bits 7:4 are
1720 nonzero. However, obviously we can't assert for that. */
1721
1722 /* So what we're constructing for the first argument is
1723 "(cond << 4) | stored-operation".
1724 However, as per comments above, 'cond' must be supplied
1725 pre-shifted to this function.
1726
1727 This pairing scheme requires that the ARM64_CC_OP_ values all fit
1728 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
1729 8 bits of the first argument. */
1730 IRExpr** args
1731 = mkIRExprVec_4(
1732 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1733 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1734 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1735 IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1736 );
1737 IRExpr* call
1738 = mkIRExprCCall(
1739 Ity_I64,
1740 0/*regparm*/,
1741 "arm64g_calculate_condition", &arm64g_calculate_condition,
1742 args
1743 );
1744
1745 /* Exclude the requested condition, OP and NDEP from definedness
1746 checking. We're only interested in DEP1 and DEP2. */
1747 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1748 return call;
1749 }
1750
1751
1752 /* Build IR to calculate some particular condition from stored
1753 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1754 Ity_I64, suitable for narrowing. Although the return type is
1755 Ity_I64, the returned value is either 0 or 1.
1756 */
mk_arm64g_calculate_condition(ARM64Condcode cond)1757 static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1758 {
1759 /* First arg is "(cond << 4) | condition". This requires that the
1760 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
1761 (COND, OP) pair in the lowest 8 bits of the first argument. */
1762 vassert(cond >= 0 && cond <= 15);
1763 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1764 }
1765
1766
1767 /* Build IR to calculate just the carry flag from stored
1768 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1769 Ity_I64. */
mk_arm64g_calculate_flag_c(void)1770 static IRExpr* mk_arm64g_calculate_flag_c ( void )
1771 {
1772 IRExpr** args
1773 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1774 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1775 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1776 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1777 IRExpr* call
1778 = mkIRExprCCall(
1779 Ity_I64,
1780 0/*regparm*/,
1781 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
1782 args
1783 );
1784 /* Exclude OP and NDEP from definedness checking. We're only
1785 interested in DEP1 and DEP2. */
1786 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1787 return call;
1788 }
1789
1790
1791 //ZZ /* Build IR to calculate just the overflow flag from stored
1792 //ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1793 //ZZ Ity_I32. */
1794 //ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1795 //ZZ {
1796 //ZZ IRExpr** args
1797 //ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1798 //ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1799 //ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1800 //ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1801 //ZZ IRExpr* call
1802 //ZZ = mkIRExprCCall(
1803 //ZZ Ity_I32,
1804 //ZZ 0/*regparm*/,
1805 //ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
1806 //ZZ args
1807 //ZZ );
1808 //ZZ /* Exclude OP and NDEP from definedness checking. We're only
1809 //ZZ interested in DEP1 and DEP2. */
1810 //ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1811 //ZZ return call;
1812 //ZZ }
1813
1814
1815 /* Build IR to calculate N Z C V in bits 31:28 of the
1816 returned word. */
mk_arm64g_calculate_flags_nzcv(void)1817 static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1818 {
1819 IRExpr** args
1820 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1821 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1822 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1823 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1824 IRExpr* call
1825 = mkIRExprCCall(
1826 Ity_I64,
1827 0/*regparm*/,
1828 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1829 args
1830 );
1831 /* Exclude OP and NDEP from definedness checking. We're only
1832 interested in DEP1 and DEP2. */
1833 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1834 return call;
1835 }
1836
1837
1838 /* Build IR to set the flags thunk, in the most general case. */
1839 static
setFlags_D1_D2_ND(UInt cc_op,IRTemp t_dep1,IRTemp t_dep2,IRTemp t_ndep)1840 void setFlags_D1_D2_ND ( UInt cc_op,
1841 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1842 {
1843 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1844 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1845 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1846 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1847 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
1848 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1849 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1850 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1851 }
1852
1853 /* Build IR to set the flags thunk after ADD or SUB. */
1854 static
setFlags_ADD_SUB(Bool is64,Bool isSUB,IRTemp argL,IRTemp argR)1855 void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1856 {
1857 IRTemp argL64 = IRTemp_INVALID;
1858 IRTemp argR64 = IRTemp_INVALID;
1859 IRTemp z64 = newTemp(Ity_I64);
1860 if (is64) {
1861 argL64 = argL;
1862 argR64 = argR;
1863 } else {
1864 argL64 = newTemp(Ity_I64);
1865 argR64 = newTemp(Ity_I64);
1866 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1867 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1868 }
1869 assign(z64, mkU64(0));
1870 UInt cc_op = ARM64G_CC_OP_NUMBER;
1871 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
1872 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1873 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
1874 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1875 else { vassert(0); }
1876 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1877 }
1878
1879 /* Build IR to set the flags thunk after ADC or SBC. */
1880 static
setFlags_ADC_SBC(Bool is64,Bool isSBC,IRTemp argL,IRTemp argR,IRTemp oldC)1881 void setFlags_ADC_SBC ( Bool is64, Bool isSBC,
1882 IRTemp argL, IRTemp argR, IRTemp oldC )
1883 {
1884 IRTemp argL64 = IRTemp_INVALID;
1885 IRTemp argR64 = IRTemp_INVALID;
1886 IRTemp oldC64 = IRTemp_INVALID;
1887 if (is64) {
1888 argL64 = argL;
1889 argR64 = argR;
1890 oldC64 = oldC;
1891 } else {
1892 argL64 = newTemp(Ity_I64);
1893 argR64 = newTemp(Ity_I64);
1894 oldC64 = newTemp(Ity_I64);
1895 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1896 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1897 assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
1898 }
1899 UInt cc_op = ARM64G_CC_OP_NUMBER;
1900 /**/ if ( isSBC && is64) { cc_op = ARM64G_CC_OP_SBC64; }
1901 else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
1902 else if (!isSBC && is64) { cc_op = ARM64G_CC_OP_ADC64; }
1903 else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
1904 else { vassert(0); }
1905 setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
1906 }
1907
1908 /* Build IR to set the flags thunk after ADD or SUB, if the given
1909 condition evaluates to True at run time. If not, the flags are set
1910 to the specified NZCV value. */
1911 static
setFlags_ADD_SUB_conditionally(Bool is64,Bool isSUB,IRTemp cond,IRTemp argL,IRTemp argR,UInt nzcv)1912 void setFlags_ADD_SUB_conditionally (
1913 Bool is64, Bool isSUB,
1914 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1915 )
1916 {
1917 /* Generate IR as follows:
1918 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1919 CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1920 CC_DEP2 = ITE(cond, argR64, 0)
1921 CC_NDEP = 0
1922 */
1923
1924 IRTemp z64 = newTemp(Ity_I64);
1925 assign(z64, mkU64(0));
1926
1927 /* Establish the operation and operands for the True case. */
1928 IRTemp t_dep1 = IRTemp_INVALID;
1929 IRTemp t_dep2 = IRTemp_INVALID;
1930 UInt t_op = ARM64G_CC_OP_NUMBER;
1931 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
1932 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1933 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
1934 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1935 else { vassert(0); }
1936 /* */
1937 if (is64) {
1938 t_dep1 = argL;
1939 t_dep2 = argR;
1940 } else {
1941 t_dep1 = newTemp(Ity_I64);
1942 t_dep2 = newTemp(Ity_I64);
1943 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1944 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1945 }
1946
1947 /* Establish the operation and operands for the False case. */
1948 IRTemp f_dep1 = newTemp(Ity_I64);
1949 IRTemp f_dep2 = z64;
1950 UInt f_op = ARM64G_CC_OP_COPY;
1951 assign(f_dep1, mkU64(nzcv << 28));
1952
1953 /* Final thunk values */
1954 IRTemp dep1 = newTemp(Ity_I64);
1955 IRTemp dep2 = newTemp(Ity_I64);
1956 IRTemp op = newTemp(Ity_I64);
1957
1958 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1959 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1960 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1961
1962 /* finally .. */
1963 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
1964 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
1965 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
1966 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
1967 }
1968
1969 /* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1970 static
setFlags_LOGIC(Bool is64,IRTemp res)1971 void setFlags_LOGIC ( Bool is64, IRTemp res )
1972 {
1973 IRTemp res64 = IRTemp_INVALID;
1974 IRTemp z64 = newTemp(Ity_I64);
1975 UInt cc_op = ARM64G_CC_OP_NUMBER;
1976 if (is64) {
1977 res64 = res;
1978 cc_op = ARM64G_CC_OP_LOGIC64;
1979 } else {
1980 res64 = newTemp(Ity_I64);
1981 assign(res64, unop(Iop_32Uto64, mkexpr(res)));
1982 cc_op = ARM64G_CC_OP_LOGIC32;
1983 }
1984 assign(z64, mkU64(0));
1985 setFlags_D1_D2_ND(cc_op, res64, z64, z64);
1986 }
1987
1988 /* Build IR to set the flags thunk to a given NZCV value. NZCV is
1989 located in bits 31:28 of the supplied value. */
1990 static
setFlags_COPY(IRTemp nzcv_28x0)1991 void setFlags_COPY ( IRTemp nzcv_28x0 )
1992 {
1993 IRTemp z64 = newTemp(Ity_I64);
1994 assign(z64, mkU64(0));
1995 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
1996 }
1997
1998
1999 //ZZ /* Minor variant of the above that sets NDEP to zero (if it
2000 //ZZ sets it at all) */
2001 //ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
2002 //ZZ IRTemp t_dep2,
2003 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2004 //ZZ {
2005 //ZZ IRTemp z32 = newTemp(Ity_I32);
2006 //ZZ assign( z32, mkU32(0) );
2007 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
2008 //ZZ }
2009 //ZZ
2010 //ZZ
2011 //ZZ /* Minor variant of the above that sets DEP2 to zero (if it
2012 //ZZ sets it at all) */
2013 //ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
2014 //ZZ IRTemp t_ndep,
2015 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2016 //ZZ {
2017 //ZZ IRTemp z32 = newTemp(Ity_I32);
2018 //ZZ assign( z32, mkU32(0) );
2019 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
2020 //ZZ }
2021 //ZZ
2022 //ZZ
2023 //ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
2024 //ZZ sets them at all) */
2025 //ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
2026 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2027 //ZZ {
2028 //ZZ IRTemp z32 = newTemp(Ity_I32);
2029 //ZZ assign( z32, mkU32(0) );
2030 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
2031 //ZZ }
2032
2033
2034 /*------------------------------------------------------------*/
2035 /*--- Misc math helpers ---*/
2036 /*------------------------------------------------------------*/
2037
2038 /* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
math_SWAPHELPER(IRTemp x,ULong mask,Int sh)2039 static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
2040 {
2041 IRTemp maskT = newTemp(Ity_I64);
2042 IRTemp res = newTemp(Ity_I64);
2043 vassert(sh >= 1 && sh <= 63);
2044 assign(maskT, mkU64(mask));
2045 assign( res,
2046 binop(Iop_Or64,
2047 binop(Iop_Shr64,
2048 binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
2049 mkU8(sh)),
2050 binop(Iop_And64,
2051 binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
2052 mkexpr(maskT))
2053 )
2054 );
2055 return res;
2056 }
2057
2058 /* Generates byte swaps within 32-bit lanes. */
math_UINTSWAP64(IRTemp src)2059 static IRTemp math_UINTSWAP64 ( IRTemp src )
2060 {
2061 IRTemp res;
2062 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2063 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2064 return res;
2065 }
2066
2067 /* Generates byte swaps within 16-bit lanes. */
math_USHORTSWAP64(IRTemp src)2068 static IRTemp math_USHORTSWAP64 ( IRTemp src )
2069 {
2070 IRTemp res;
2071 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2072 return res;
2073 }
2074
2075 /* Generates a 64-bit byte swap. */
math_BYTESWAP64(IRTemp src)2076 static IRTemp math_BYTESWAP64 ( IRTemp src )
2077 {
2078 IRTemp res;
2079 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2080 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2081 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
2082 return res;
2083 }
2084
2085 /* Generates a 64-bit bit swap. */
math_BITSWAP64(IRTemp src)2086 static IRTemp math_BITSWAP64 ( IRTemp src )
2087 {
2088 IRTemp res;
2089 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
2090 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
2091 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
2092 return math_BYTESWAP64(res);
2093 }
2094
2095 /* Duplicates the bits at the bottom of the given word to fill the
2096 whole word. src :: Ity_I64 is assumed to have zeroes everywhere
2097 except for the bottom bits. */
math_DUP_TO_64(IRTemp src,IRType srcTy)2098 static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
2099 {
2100 if (srcTy == Ity_I8) {
2101 IRTemp t16 = newTemp(Ity_I64);
2102 assign(t16, binop(Iop_Or64, mkexpr(src),
2103 binop(Iop_Shl64, mkexpr(src), mkU8(8))));
2104 IRTemp t32 = newTemp(Ity_I64);
2105 assign(t32, binop(Iop_Or64, mkexpr(t16),
2106 binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
2107 IRTemp t64 = newTemp(Ity_I64);
2108 assign(t64, binop(Iop_Or64, mkexpr(t32),
2109 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2110 return t64;
2111 }
2112 if (srcTy == Ity_I16) {
2113 IRTemp t32 = newTemp(Ity_I64);
2114 assign(t32, binop(Iop_Or64, mkexpr(src),
2115 binop(Iop_Shl64, mkexpr(src), mkU8(16))));
2116 IRTemp t64 = newTemp(Ity_I64);
2117 assign(t64, binop(Iop_Or64, mkexpr(t32),
2118 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2119 return t64;
2120 }
2121 if (srcTy == Ity_I32) {
2122 IRTemp t64 = newTemp(Ity_I64);
2123 assign(t64, binop(Iop_Or64, mkexpr(src),
2124 binop(Iop_Shl64, mkexpr(src), mkU8(32))));
2125 return t64;
2126 }
2127 if (srcTy == Ity_I64) {
2128 return src;
2129 }
2130 vassert(0);
2131 }
2132
2133
2134 /* Duplicates the src element exactly so as to fill a V128 value. */
math_DUP_TO_V128(IRTemp src,IRType srcTy)2135 static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
2136 {
2137 IRTemp res = newTempV128();
2138 if (srcTy == Ity_F64) {
2139 IRTemp i64 = newTemp(Ity_I64);
2140 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
2141 assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
2142 return res;
2143 }
2144 if (srcTy == Ity_F32) {
2145 IRTemp i64a = newTemp(Ity_I64);
2146 assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
2147 IRTemp i64b = newTemp(Ity_I64);
2148 assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
2149 mkexpr(i64a)));
2150 assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
2151 return res;
2152 }
2153 if (srcTy == Ity_I64) {
2154 assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src)));
2155 return res;
2156 }
2157 if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) {
2158 IRTemp t1 = newTemp(Ity_I64);
2159 assign(t1, widenUto64(srcTy, mkexpr(src)));
2160 IRTemp t2 = math_DUP_TO_64(t1, srcTy);
2161 assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2)));
2162 return res;
2163 }
2164 vassert(0);
2165 }
2166
2167
2168 /* |fullWidth| is a full V128 width result. Depending on bitQ,
2169 zero out the upper half. */
math_MAYBE_ZERO_HI64(UInt bitQ,IRTemp fullWidth)2170 static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth )
2171 {
2172 if (bitQ == 1) return mkexpr(fullWidth);
2173 if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth));
2174 vassert(0);
2175 }
2176
2177 /* The same, but from an expression instead. */
math_MAYBE_ZERO_HI64_fromE(UInt bitQ,IRExpr * fullWidth)2178 static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth )
2179 {
2180 IRTemp fullWidthT = newTempV128();
2181 assign(fullWidthT, fullWidth);
2182 return math_MAYBE_ZERO_HI64(bitQ, fullWidthT);
2183 }
2184
2185
2186 /*------------------------------------------------------------*/
2187 /*--- FP comparison helpers ---*/
2188 /*------------------------------------------------------------*/
2189
2190 /* irRes :: Ity_I32 holds a floating point comparison result encoded
2191 as an IRCmpF64Result. Generate code to convert it to an
2192 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
2193 Assign a new temp to hold that value, and return the temp. */
2194 static
mk_convert_IRCmpF64Result_to_NZCV(IRTemp irRes32)2195 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
2196 {
2197 IRTemp ix = newTemp(Ity_I64);
2198 IRTemp termL = newTemp(Ity_I64);
2199 IRTemp termR = newTemp(Ity_I64);
2200 IRTemp nzcv = newTemp(Ity_I64);
2201 IRTemp irRes = newTemp(Ity_I64);
2202
2203 /* This is where the fun starts. We have to convert 'irRes' from
2204 an IR-convention return result (IRCmpF64Result) to an
2205 ARM-encoded (N,Z,C,V) group. The final result is in the bottom
2206 4 bits of 'nzcv'. */
2207 /* Map compare result from IR to ARM(nzcv) */
2208 /*
2209 FP cmp result | IR | ARM(nzcv)
2210 --------------------------------
2211 UN 0x45 0011
2212 LT 0x01 1000
2213 GT 0x00 0010
2214 EQ 0x40 0110
2215 */
2216 /* Now since you're probably wondering WTF ..
2217
2218 ix fishes the useful bits out of the IR value, bits 6 and 0, and
2219 places them side by side, giving a number which is 0, 1, 2 or 3.
2220
2221 termL is a sequence cooked up by GNU superopt. It converts ix
2222 into an almost correct value NZCV value (incredibly), except
2223 for the case of UN, where it produces 0100 instead of the
2224 required 0011.
2225
2226 termR is therefore a correction term, also computed from ix. It
2227 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
2228 the final correct value, we subtract termR from termL.
2229
2230 Don't take my word for it. There's a test program at the bottom
2231 of guest_arm_toIR.c, to try this out with.
2232 */
2233 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
2234
2235 assign(
2236 ix,
2237 binop(Iop_Or64,
2238 binop(Iop_And64,
2239 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
2240 mkU64(3)),
2241 binop(Iop_And64, mkexpr(irRes), mkU64(1))));
2242
2243 assign(
2244 termL,
2245 binop(Iop_Add64,
2246 binop(Iop_Shr64,
2247 binop(Iop_Sub64,
2248 binop(Iop_Shl64,
2249 binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
2250 mkU8(62)),
2251 mkU64(1)),
2252 mkU8(61)),
2253 mkU64(1)));
2254
2255 assign(
2256 termR,
2257 binop(Iop_And64,
2258 binop(Iop_And64,
2259 mkexpr(ix),
2260 binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
2261 mkU64(1)));
2262
2263 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
2264 return nzcv;
2265 }
2266
2267
2268 /*------------------------------------------------------------*/
2269 /*--- Data processing (immediate) ---*/
2270 /*------------------------------------------------------------*/
2271
2272 /* Helper functions for supporting "DecodeBitMasks" */
2273
dbm_ROR(Int width,ULong x,Int rot)2274 static ULong dbm_ROR ( Int width, ULong x, Int rot )
2275 {
2276 vassert(width > 0 && width <= 64);
2277 vassert(rot >= 0 && rot < width);
2278 if (rot == 0) return x;
2279 ULong res = x >> rot;
2280 res |= (x << (width - rot));
2281 if (width < 64)
2282 res &= ((1ULL << width) - 1);
2283 return res;
2284 }
2285
dbm_RepTo64(Int esize,ULong x)2286 static ULong dbm_RepTo64( Int esize, ULong x )
2287 {
2288 switch (esize) {
2289 case 64:
2290 return x;
2291 case 32:
2292 x &= 0xFFFFFFFF; x |= (x << 32);
2293 return x;
2294 case 16:
2295 x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
2296 return x;
2297 case 8:
2298 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
2299 return x;
2300 case 4:
2301 x &= 0xF; x |= (x << 4); x |= (x << 8);
2302 x |= (x << 16); x |= (x << 32);
2303 return x;
2304 case 2:
2305 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
2306 x |= (x << 16); x |= (x << 32);
2307 return x;
2308 default:
2309 break;
2310 }
2311 vpanic("dbm_RepTo64");
2312 /*NOTREACHED*/
2313 return 0;
2314 }
2315
dbm_highestSetBit(ULong x)2316 static Int dbm_highestSetBit ( ULong x )
2317 {
2318 Int i;
2319 for (i = 63; i >= 0; i--) {
2320 if (x & (1ULL << i))
2321 return i;
2322 }
2323 vassert(x == 0);
2324 return -1;
2325 }
2326
2327 static
dbm_DecodeBitMasks(ULong * wmask,ULong * tmask,ULong immN,ULong imms,ULong immr,Bool immediate,UInt M)2328 Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
2329 ULong immN, ULong imms, ULong immr, Bool immediate,
2330 UInt M /*32 or 64*/)
2331 {
2332 vassert(immN < (1ULL << 1));
2333 vassert(imms < (1ULL << 6));
2334 vassert(immr < (1ULL << 6));
2335 vassert(immediate == False || immediate == True);
2336 vassert(M == 32 || M == 64);
2337
2338 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
2339 if (len < 1) { /* printf("fail1\n"); */ return False; }
2340 vassert(len <= 6);
2341 vassert(M >= (1 << len));
2342
2343 vassert(len >= 1 && len <= 6);
2344 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
2345 (1 << len) - 1;
2346 vassert(levels >= 1 && levels <= 63);
2347
2348 if (immediate && ((imms & levels) == levels)) {
2349 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
2350 return False;
2351 }
2352
2353 ULong S = imms & levels;
2354 ULong R = immr & levels;
2355 Int diff = S - R;
2356 diff &= 63;
2357 Int esize = 1 << len;
2358 vassert(2 <= esize && esize <= 64);
2359
2360 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
2361 same below with d. S can be 63 in which case we have an out of
2362 range and hence undefined shift. */
2363 vassert(S >= 0 && S <= 63);
2364 vassert(esize >= (S+1));
2365 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
2366 //(1ULL << (S+1)) - 1;
2367 ((1ULL << S) - 1) + (1ULL << S);
2368
2369 Int d = // diff<len-1:0>
2370 diff & ((1 << len)-1);
2371 vassert(esize >= (d+1));
2372 vassert(d >= 0 && d <= 63);
2373
2374 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
2375 //(1ULL << (d+1)) - 1;
2376 ((1ULL << d) - 1) + (1ULL << d);
2377
2378 if (esize != 64) vassert(elem_s < (1ULL << esize));
2379 if (esize != 64) vassert(elem_d < (1ULL << esize));
2380
2381 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
2382 if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
2383
2384 return True;
2385 }
2386
2387
2388 static
dis_ARM64_data_processing_immediate(DisResult * dres,UInt insn)2389 Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
2390 UInt insn)
2391 {
2392 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2393
2394 /* insn[28:23]
2395 10000x PC-rel addressing
2396 10001x Add/subtract (immediate)
2397 100100 Logical (immediate)
2398 100101 Move Wide (immediate)
2399 100110 Bitfield
2400 100111 Extract
2401 */
2402
2403 /* ------------------ ADD/SUB{,S} imm12 ------------------ */
2404 if (INSN(28,24) == BITS5(1,0,0,0,1)) {
2405 Bool is64 = INSN(31,31) == 1;
2406 Bool isSub = INSN(30,30) == 1;
2407 Bool setCC = INSN(29,29) == 1;
2408 UInt sh = INSN(23,22);
2409 UInt uimm12 = INSN(21,10);
2410 UInt nn = INSN(9,5);
2411 UInt dd = INSN(4,0);
2412 const HChar* nm = isSub ? "sub" : "add";
2413 if (sh >= 2) {
2414 /* Invalid; fall through */
2415 } else {
2416 vassert(sh <= 1);
2417 uimm12 <<= (12 * sh);
2418 if (is64) {
2419 IRTemp argL = newTemp(Ity_I64);
2420 IRTemp argR = newTemp(Ity_I64);
2421 IRTemp res = newTemp(Ity_I64);
2422 assign(argL, getIReg64orSP(nn));
2423 assign(argR, mkU64(uimm12));
2424 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
2425 mkexpr(argL), mkexpr(argR)));
2426 if (setCC) {
2427 putIReg64orZR(dd, mkexpr(res));
2428 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2429 DIP("%ss %s, %s, 0x%x\n",
2430 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
2431 } else {
2432 putIReg64orSP(dd, mkexpr(res));
2433 DIP("%s %s, %s, 0x%x\n",
2434 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
2435 }
2436 } else {
2437 IRTemp argL = newTemp(Ity_I32);
2438 IRTemp argR = newTemp(Ity_I32);
2439 IRTemp res = newTemp(Ity_I32);
2440 assign(argL, getIReg32orSP(nn));
2441 assign(argR, mkU32(uimm12));
2442 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
2443 mkexpr(argL), mkexpr(argR)));
2444 if (setCC) {
2445 putIReg32orZR(dd, mkexpr(res));
2446 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
2447 DIP("%ss %s, %s, 0x%x\n",
2448 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
2449 } else {
2450 putIReg32orSP(dd, mkexpr(res));
2451 DIP("%s %s, %s, 0x%x\n",
2452 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
2453 }
2454 }
2455 return True;
2456 }
2457 }
2458
2459 /* -------------------- ADR/ADRP -------------------- */
2460 if (INSN(28,24) == BITS5(1,0,0,0,0)) {
2461 UInt bP = INSN(31,31);
2462 UInt immLo = INSN(30,29);
2463 UInt immHi = INSN(23,5);
2464 UInt rD = INSN(4,0);
2465 ULong uimm = (immHi << 2) | immLo;
2466 ULong simm = sx_to_64(uimm, 21);
2467 ULong val;
2468 if (bP) {
2469 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
2470 } else {
2471 val = guest_PC_curr_instr + simm;
2472 }
2473 putIReg64orZR(rD, mkU64(val));
2474 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
2475 return True;
2476 }
2477
2478 /* -------------------- LOGIC(imm) -------------------- */
2479 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
2480 /* 31 30 28 22 21 15 9 4
2481 sf op 100100 N immr imms Rn Rd
2482 op=00: AND Rd|SP, Rn, #imm
2483 op=01: ORR Rd|SP, Rn, #imm
2484 op=10: EOR Rd|SP, Rn, #imm
2485 op=11: ANDS Rd|ZR, Rn, #imm
2486 */
2487 Bool is64 = INSN(31,31) == 1;
2488 UInt op = INSN(30,29);
2489 UInt N = INSN(22,22);
2490 UInt immR = INSN(21,16);
2491 UInt immS = INSN(15,10);
2492 UInt nn = INSN(9,5);
2493 UInt dd = INSN(4,0);
2494 ULong imm = 0;
2495 Bool ok;
2496 if (N == 1 && !is64)
2497 goto after_logic_imm; /* not allowed; fall through */
2498 ok = dbm_DecodeBitMasks(&imm, NULL,
2499 N, immS, immR, True, is64 ? 64 : 32);
2500 if (!ok)
2501 goto after_logic_imm;
2502
2503 const HChar* names[4] = { "and", "orr", "eor", "ands" };
2504 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
2505 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
2506
2507 vassert(op < 4);
2508 if (is64) {
2509 IRExpr* argL = getIReg64orZR(nn);
2510 IRExpr* argR = mkU64(imm);
2511 IRTemp res = newTemp(Ity_I64);
2512 assign(res, binop(ops64[op], argL, argR));
2513 if (op < 3) {
2514 putIReg64orSP(dd, mkexpr(res));
2515 DIP("%s %s, %s, 0x%llx\n", names[op],
2516 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
2517 } else {
2518 putIReg64orZR(dd, mkexpr(res));
2519 setFlags_LOGIC(True/*is64*/, res);
2520 DIP("%s %s, %s, 0x%llx\n", names[op],
2521 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
2522 }
2523 } else {
2524 IRExpr* argL = getIReg32orZR(nn);
2525 IRExpr* argR = mkU32((UInt)imm);
2526 IRTemp res = newTemp(Ity_I32);
2527 assign(res, binop(ops32[op], argL, argR));
2528 if (op < 3) {
2529 putIReg32orSP(dd, mkexpr(res));
2530 DIP("%s %s, %s, 0x%x\n", names[op],
2531 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2532 } else {
2533 putIReg32orZR(dd, mkexpr(res));
2534 setFlags_LOGIC(False/*!is64*/, res);
2535 DIP("%s %s, %s, 0x%x\n", names[op],
2536 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2537 }
2538 }
2539 return True;
2540 }
2541 after_logic_imm:
2542
2543 /* -------------------- MOV{Z,N,K} -------------------- */
2544 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2545 /* 31 30 28 22 20 4
2546 | | | | | |
2547 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
2548 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
2549 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
2550 */
2551 Bool is64 = INSN(31,31) == 1;
2552 UInt subopc = INSN(30,29);
2553 UInt hw = INSN(22,21);
2554 UInt imm16 = INSN(20,5);
2555 UInt dd = INSN(4,0);
2556 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2557 /* invalid; fall through */
2558 } else {
2559 ULong imm64 = ((ULong)imm16) << (16 * hw);
2560 if (!is64)
2561 vassert(imm64 < 0x100000000ULL);
2562 switch (subopc) {
2563 case BITS2(1,0): // MOVZ
2564 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2565 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2566 break;
2567 case BITS2(0,0): // MOVN
2568 imm64 = ~imm64;
2569 if (!is64)
2570 imm64 &= 0xFFFFFFFFULL;
2571 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2572 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2573 break;
2574 case BITS2(1,1): // MOVK
2575 /* This is more complex. We are inserting a slice into
2576 the destination register, so we need to have the old
2577 value of it. */
2578 if (is64) {
2579 IRTemp old = newTemp(Ity_I64);
2580 assign(old, getIReg64orZR(dd));
2581 ULong mask = 0xFFFFULL << (16 * hw);
2582 IRExpr* res
2583 = binop(Iop_Or64,
2584 binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2585 mkU64(imm64));
2586 putIReg64orZR(dd, res);
2587 DIP("movk %s, 0x%x, lsl %u\n",
2588 nameIReg64orZR(dd), imm16, 16*hw);
2589 } else {
2590 IRTemp old = newTemp(Ity_I32);
2591 assign(old, getIReg32orZR(dd));
2592 vassert(hw <= 1);
2593 UInt mask = 0xFFFF << (16 * hw);
2594 IRExpr* res
2595 = binop(Iop_Or32,
2596 binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2597 mkU32((UInt)imm64));
2598 putIReg32orZR(dd, res);
2599 DIP("movk %s, 0x%x, lsl %u\n",
2600 nameIReg32orZR(dd), imm16, 16*hw);
2601 }
2602 break;
2603 default:
2604 vassert(0);
2605 }
2606 return True;
2607 }
2608 }
2609
2610 /* -------------------- {U,S,}BFM -------------------- */
2611 /* 30 28 22 21 15 9 4
2612
2613 sf 10 100110 N immr imms nn dd
2614 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2615 UBFM Xd, Xn, #immr, #imms when sf=1, N=1
2616
2617 sf 00 100110 N immr imms nn dd
2618 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2619 SBFM Xd, Xn, #immr, #imms when sf=1, N=1
2620
2621 sf 01 100110 N immr imms nn dd
2622 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2623 BFM Xd, Xn, #immr, #imms when sf=1, N=1
2624 */
2625 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2626 UInt sf = INSN(31,31);
2627 UInt opc = INSN(30,29);
2628 UInt N = INSN(22,22);
2629 UInt immR = INSN(21,16);
2630 UInt immS = INSN(15,10);
2631 UInt nn = INSN(9,5);
2632 UInt dd = INSN(4,0);
2633 Bool inZero = False;
2634 Bool extend = False;
2635 const HChar* nm = "???";
2636 /* skip invalid combinations */
2637 switch (opc) {
2638 case BITS2(0,0):
2639 inZero = True; extend = True; nm = "sbfm"; break;
2640 case BITS2(0,1):
2641 inZero = False; extend = False; nm = "bfm"; break;
2642 case BITS2(1,0):
2643 inZero = True; extend = False; nm = "ubfm"; break;
2644 case BITS2(1,1):
2645 goto after_bfm; /* invalid */
2646 default:
2647 vassert(0);
2648 }
2649 if (sf == 1 && N != 1) goto after_bfm;
2650 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2651 || ((immS >> 5) & 1) != 0)) goto after_bfm;
2652 ULong wmask = 0, tmask = 0;
2653 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2654 N, immS, immR, False, sf == 1 ? 64 : 32);
2655 if (!ok) goto after_bfm; /* hmmm */
2656
2657 Bool is64 = sf == 1;
2658 IRType ty = is64 ? Ity_I64 : Ity_I32;
2659
2660 IRTemp dst = newTemp(ty);
2661 IRTemp src = newTemp(ty);
2662 IRTemp bot = newTemp(ty);
2663 IRTemp top = newTemp(ty);
2664 IRTemp res = newTemp(ty);
2665 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2666 assign(src, getIRegOrZR(is64, nn));
2667 /* perform bitfield move on low bits */
2668 assign(bot, binop(mkOR(ty),
2669 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2670 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2671 mkU(ty, wmask))));
2672 /* determine extension bits (sign, zero or dest register) */
2673 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2674 /* combine extension bits and result bits */
2675 assign(res, binop(mkOR(ty),
2676 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2677 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2678 putIRegOrZR(is64, dd, mkexpr(res));
2679 DIP("%s %s, %s, immR=%u, immS=%u\n",
2680 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2681 return True;
2682 }
2683 after_bfm:
2684
2685 /* ---------------------- EXTR ---------------------- */
2686 /* 30 28 22 20 15 9 4
2687 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
2688 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2689 */
2690 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2691 Bool is64 = INSN(31,31) == 1;
2692 UInt mm = INSN(20,16);
2693 UInt imm6 = INSN(15,10);
2694 UInt nn = INSN(9,5);
2695 UInt dd = INSN(4,0);
2696 Bool valid = True;
2697 if (INSN(31,31) != INSN(22,22))
2698 valid = False;
2699 if (!is64 && imm6 >= 32)
2700 valid = False;
2701 if (!valid) goto after_extr;
2702 IRType ty = is64 ? Ity_I64 : Ity_I32;
2703 IRTemp srcHi = newTemp(ty);
2704 IRTemp srcLo = newTemp(ty);
2705 IRTemp res = newTemp(ty);
2706 assign(srcHi, getIRegOrZR(is64, nn));
2707 assign(srcLo, getIRegOrZR(is64, mm));
2708 if (imm6 == 0) {
2709 assign(res, mkexpr(srcLo));
2710 } else {
2711 UInt szBits = 8 * sizeofIRType(ty);
2712 vassert(imm6 > 0 && imm6 < szBits);
2713 assign(res, binop(mkOR(ty),
2714 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2715 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2716 }
2717 putIRegOrZR(is64, dd, mkexpr(res));
2718 DIP("extr %s, %s, %s, #%u\n",
2719 nameIRegOrZR(is64,dd),
2720 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2721 return True;
2722 }
2723 after_extr:
2724
2725 vex_printf("ARM64 front end: data_processing_immediate\n");
2726 return False;
2727 # undef INSN
2728 }
2729
2730
2731 /*------------------------------------------------------------*/
2732 /*--- Data processing (register) instructions ---*/
2733 /*------------------------------------------------------------*/
2734
nameSH(UInt sh)2735 static const HChar* nameSH ( UInt sh ) {
2736 switch (sh) {
2737 case 0: return "lsl";
2738 case 1: return "lsr";
2739 case 2: return "asr";
2740 case 3: return "ror";
2741 default: vassert(0);
2742 }
2743 }
2744
2745 /* Generate IR to get a register value, possibly shifted by an
2746 immediate. Returns either a 32- or 64-bit temporary holding the
2747 result. After the shift, the value can optionally be NOT-ed
2748 too.
2749
2750 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
2751 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
2752 isn't allowed, but it's the job of the caller to check that.
2753 */
getShiftedIRegOrZR(Bool is64,UInt sh_how,UInt sh_amt,UInt regNo,Bool invert)2754 static IRTemp getShiftedIRegOrZR ( Bool is64,
2755 UInt sh_how, UInt sh_amt, UInt regNo,
2756 Bool invert )
2757 {
2758 vassert(sh_how < 4);
2759 vassert(sh_amt < (is64 ? 64 : 32));
2760 IRType ty = is64 ? Ity_I64 : Ity_I32;
2761 IRTemp t0 = newTemp(ty);
2762 assign(t0, getIRegOrZR(is64, regNo));
2763 IRTemp t1 = newTemp(ty);
2764 switch (sh_how) {
2765 case BITS2(0,0):
2766 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2767 break;
2768 case BITS2(0,1):
2769 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2770 break;
2771 case BITS2(1,0):
2772 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2773 break;
2774 case BITS2(1,1):
2775 assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2776 break;
2777 default:
2778 vassert(0);
2779 }
2780 if (invert) {
2781 IRTemp t2 = newTemp(ty);
2782 assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2783 return t2;
2784 } else {
2785 return t1;
2786 }
2787 }
2788
2789
2790 static
dis_ARM64_data_processing_register(DisResult * dres,UInt insn)2791 Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2792 UInt insn)
2793 {
2794 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2795
2796 /* ------------------- ADD/SUB(reg) ------------------- */
2797 /* x==0 => 32 bit op x==1 => 64 bit op
2798 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2799
2800 31 30 29 28 23 21 20 15 9 4
2801 | | | | | | | | | |
2802 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
2803 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
2804 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
2805 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
2806 */
2807 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2808 UInt bX = INSN(31,31);
2809 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
2810 UInt bS = INSN(29, 29); /* set flags? */
2811 UInt sh = INSN(23,22);
2812 UInt rM = INSN(20,16);
2813 UInt imm6 = INSN(15,10);
2814 UInt rN = INSN(9,5);
2815 UInt rD = INSN(4,0);
2816 Bool isSUB = bOP == 1;
2817 Bool is64 = bX == 1;
2818 IRType ty = is64 ? Ity_I64 : Ity_I32;
2819 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2820 /* invalid; fall through */
2821 } else {
2822 IRTemp argL = newTemp(ty);
2823 assign(argL, getIRegOrZR(is64, rN));
2824 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2825 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2826 IRTemp res = newTemp(ty);
2827 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2828 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2829 if (bS) {
2830 setFlags_ADD_SUB(is64, isSUB, argL, argR);
2831 }
2832 DIP("%s%s %s, %s, %s, %s #%u\n",
2833 bOP ? "sub" : "add", bS ? "s" : "",
2834 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2835 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2836 return True;
2837 }
2838 }
2839
2840 /* ------------------- ADC/SBC(reg) ------------------- */
2841 /* x==0 => 32 bit op x==1 => 64 bit op
2842
2843 31 30 29 28 23 21 20 15 9 4
2844 | | | | | | | | | |
2845 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm
2846 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm
2847 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm
2848 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm
2849 */
2850
2851 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2852 UInt bX = INSN(31,31);
2853 UInt bOP = INSN(30,30); /* 0: ADC, 1: SBC */
2854 UInt bS = INSN(29,29); /* set flags */
2855 UInt rM = INSN(20,16);
2856 UInt rN = INSN(9,5);
2857 UInt rD = INSN(4,0);
2858
2859 Bool isSUB = bOP == 1;
2860 Bool is64 = bX == 1;
2861 IRType ty = is64 ? Ity_I64 : Ity_I32;
2862
2863 IRTemp oldC = newTemp(ty);
2864 assign(oldC,
2865 is64 ? mk_arm64g_calculate_flag_c()
2866 : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
2867
2868 IRTemp argL = newTemp(ty);
2869 assign(argL, getIRegOrZR(is64, rN));
2870 IRTemp argR = newTemp(ty);
2871 assign(argR, getIRegOrZR(is64, rM));
2872
2873 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2874 IRTemp res = newTemp(ty);
2875 if (isSUB) {
2876 IRExpr* one = is64 ? mkU64(1) : mkU32(1);
2877 IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
2878 assign(res,
2879 binop(op,
2880 binop(op, mkexpr(argL), mkexpr(argR)),
2881 binop(xorOp, mkexpr(oldC), one)));
2882 } else {
2883 assign(res,
2884 binop(op,
2885 binop(op, mkexpr(argL), mkexpr(argR)),
2886 mkexpr(oldC)));
2887 }
2888
2889 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2890
2891 if (bS) {
2892 setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
2893 }
2894
2895 DIP("%s%s %s, %s, %s\n",
2896 bOP ? "sbc" : "adc", bS ? "s" : "",
2897 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2898 nameIRegOrZR(is64, rM));
2899 return True;
2900 }
2901
2902 /* -------------------- LOGIC(reg) -------------------- */
2903 /* x==0 => 32 bit op x==1 => 64 bit op
2904 N==0 => inv? is no-op (no inversion)
2905 N==1 => inv? is NOT
2906 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2907
2908 31 30 28 23 21 20 15 9 4
2909 | | | | | | | | |
2910 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
2911 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
2912 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
2913 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
2914 With N=1, the names are: BIC ORN EON BICS
2915 */
2916 if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2917 UInt bX = INSN(31,31);
2918 UInt sh = INSN(23,22);
2919 UInt bN = INSN(21,21);
2920 UInt rM = INSN(20,16);
2921 UInt imm6 = INSN(15,10);
2922 UInt rN = INSN(9,5);
2923 UInt rD = INSN(4,0);
2924 Bool is64 = bX == 1;
2925 IRType ty = is64 ? Ity_I64 : Ity_I32;
2926 if (!is64 && imm6 > 31) {
2927 /* invalid; fall though */
2928 } else {
2929 IRTemp argL = newTemp(ty);
2930 assign(argL, getIRegOrZR(is64, rN));
2931 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
2932 IROp op = Iop_INVALID;
2933 switch (INSN(30,29)) {
2934 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
2935 case BITS2(0,1): op = mkOR(ty); break;
2936 case BITS2(1,0): op = mkXOR(ty); break;
2937 default: vassert(0);
2938 }
2939 IRTemp res = newTemp(ty);
2940 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2941 if (INSN(30,29) == BITS2(1,1)) {
2942 setFlags_LOGIC(is64, res);
2943 }
2944 putIRegOrZR(is64, rD, mkexpr(res));
2945
2946 static const HChar* names_op[8]
2947 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2948 vassert(((bN << 2) | INSN(30,29)) < 8);
2949 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
2950 /* Special-case the printing of "MOV" */
2951 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
2952 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
2953 nameIRegOrZR(is64, rM));
2954 } else {
2955 DIP("%s %s, %s, %s, %s #%u\n", nm_op,
2956 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2957 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2958 }
2959 return True;
2960 }
2961 }
2962
2963 /* -------------------- {U,S}MULH -------------------- */
2964 /* 31 23 22 20 15 9 4
2965 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
2966 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
2967 */
2968 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
2969 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
2970 Bool isU = INSN(23,23) == 1;
2971 UInt mm = INSN(20,16);
2972 UInt nn = INSN(9,5);
2973 UInt dd = INSN(4,0);
2974 putIReg64orZR(dd, unop(Iop_128HIto64,
2975 binop(isU ? Iop_MullU64 : Iop_MullS64,
2976 getIReg64orZR(nn), getIReg64orZR(mm))));
2977 DIP("%cmulh %s, %s, %s\n",
2978 isU ? 'u' : 's',
2979 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
2980 return True;
2981 }
2982
2983 /* -------------------- M{ADD,SUB} -------------------- */
2984 /* 31 30 20 15 14 9 4
2985 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
2986 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
2987 */
2988 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
2989 Bool is64 = INSN(31,31) == 1;
2990 UInt mm = INSN(20,16);
2991 Bool isAdd = INSN(15,15) == 0;
2992 UInt aa = INSN(14,10);
2993 UInt nn = INSN(9,5);
2994 UInt dd = INSN(4,0);
2995 if (is64) {
2996 putIReg64orZR(
2997 dd,
2998 binop(isAdd ? Iop_Add64 : Iop_Sub64,
2999 getIReg64orZR(aa),
3000 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
3001 } else {
3002 putIReg32orZR(
3003 dd,
3004 binop(isAdd ? Iop_Add32 : Iop_Sub32,
3005 getIReg32orZR(aa),
3006 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
3007 }
3008 DIP("%s %s, %s, %s, %s\n",
3009 isAdd ? "madd" : "msub",
3010 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3011 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
3012 return True;
3013 }
3014
3015 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
3016 /* 31 30 28 20 15 11 9 4
3017 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
3018 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
3019 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
3020 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
3021 In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
3022 */
3023 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
3024 Bool is64 = INSN(31,31) == 1;
3025 UInt b30 = INSN(30,30);
3026 UInt mm = INSN(20,16);
3027 UInt cond = INSN(15,12);
3028 UInt b10 = INSN(10,10);
3029 UInt nn = INSN(9,5);
3030 UInt dd = INSN(4,0);
3031 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
3032 IRType ty = is64 ? Ity_I64 : Ity_I32;
3033 IRExpr* argL = getIRegOrZR(is64, nn);
3034 IRExpr* argR = getIRegOrZR(is64, mm);
3035 switch (op) {
3036 case BITS2(0,0):
3037 break;
3038 case BITS2(0,1):
3039 argR = binop(mkADD(ty), argR, mkU(ty,1));
3040 break;
3041 case BITS2(1,0):
3042 argR = unop(mkNOT(ty), argR);
3043 break;
3044 case BITS2(1,1):
3045 argR = binop(mkSUB(ty), mkU(ty,0), argR);
3046 break;
3047 default:
3048 vassert(0);
3049 }
3050 putIRegOrZR(
3051 is64, dd,
3052 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
3053 argL, argR)
3054 );
3055 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
3056 DIP("%s %s, %s, %s, %s\n", op_nm[op],
3057 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3058 nameIRegOrZR(is64, mm), nameCC(cond));
3059 return True;
3060 }
3061
3062 /* -------------- ADD/SUB(extended reg) -------------- */
3063 /* 28 20 15 12 9 4
3064 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
3065 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
3066
3067 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
3068 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
3069
3070 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
3071 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
3072
3073 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
3074 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
3075
3076 The 'm' operand is extended per opt, thusly:
3077
3078 000 Xm & 0xFF UXTB
3079 001 Xm & 0xFFFF UXTH
3080 010 Xm & (2^32)-1 UXTW
3081 011 Xm UXTX
3082
3083 100 Xm sx from bit 7 SXTB
3084 101 Xm sx from bit 15 SXTH
3085 110 Xm sx from bit 31 SXTW
3086 111 Xm SXTX
3087
3088 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
3089 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
3090 are the identity operation on Wm.
3091
3092 After extension, the value is shifted left by imm3 bits, which
3093 may only be in the range 0 .. 4 inclusive.
3094 */
3095 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
3096 Bool is64 = INSN(31,31) == 1;
3097 Bool isSub = INSN(30,30) == 1;
3098 Bool setCC = INSN(29,29) == 1;
3099 UInt mm = INSN(20,16);
3100 UInt opt = INSN(15,13);
3101 UInt imm3 = INSN(12,10);
3102 UInt nn = INSN(9,5);
3103 UInt dd = INSN(4,0);
3104 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
3105 "sxtb", "sxth", "sxtw", "sxtx" };
3106 /* Do almost the same thing in the 32- and 64-bit cases. */
3107 IRTemp xN = newTemp(Ity_I64);
3108 IRTemp xM = newTemp(Ity_I64);
3109 assign(xN, getIReg64orSP(nn));
3110 assign(xM, getIReg64orZR(mm));
3111 IRExpr* xMw = mkexpr(xM); /* "xM widened" */
3112 Int shSX = 0;
3113 /* widen Xm .. */
3114 switch (opt) {
3115 case BITS3(0,0,0): // UXTB
3116 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
3117 case BITS3(0,0,1): // UXTH
3118 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
3119 case BITS3(0,1,0): // UXTW -- noop for the 32bit case
3120 if (is64) {
3121 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
3122 }
3123 break;
3124 case BITS3(0,1,1): // UXTX -- always a noop
3125 break;
3126 case BITS3(1,0,0): // SXTB
3127 shSX = 56; goto sxTo64;
3128 case BITS3(1,0,1): // SXTH
3129 shSX = 48; goto sxTo64;
3130 case BITS3(1,1,0): // SXTW -- noop for the 32bit case
3131 if (is64) {
3132 shSX = 32; goto sxTo64;
3133 }
3134 break;
3135 case BITS3(1,1,1): // SXTX -- always a noop
3136 break;
3137 sxTo64:
3138 vassert(shSX >= 32);
3139 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
3140 mkU8(shSX));
3141 break;
3142 default:
3143 vassert(0);
3144 }
3145 /* and now shift */
3146 IRTemp argL = xN;
3147 IRTemp argR = newTemp(Ity_I64);
3148 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
3149 IRTemp res = newTemp(Ity_I64);
3150 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
3151 mkexpr(argL), mkexpr(argR)));
3152 if (is64) {
3153 if (setCC) {
3154 putIReg64orZR(dd, mkexpr(res));
3155 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
3156 } else {
3157 putIReg64orSP(dd, mkexpr(res));
3158 }
3159 } else {
3160 if (setCC) {
3161 IRTemp argL32 = newTemp(Ity_I32);
3162 IRTemp argR32 = newTemp(Ity_I32);
3163 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
3164 assign(argL32, unop(Iop_64to32, mkexpr(argL)));
3165 assign(argR32, unop(Iop_64to32, mkexpr(argR)));
3166 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
3167 } else {
3168 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
3169 }
3170 }
3171 DIP("%s%s %s, %s, %s %s lsl %u\n",
3172 isSub ? "sub" : "add", setCC ? "s" : "",
3173 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
3174 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
3175 nameExt[opt], imm3);
3176 return True;
3177 }
3178
3179 /* ---------------- CCMP/CCMN(imm) ---------------- */
3180 /* Bizarrely, these appear in the "data processing register"
3181 category, even though they are operations against an
3182 immediate. */
3183 /* 31 29 20 15 11 9 3
3184 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
3185 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
3186
3187 Operation is:
3188 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
3189 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
3190 */
3191 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3192 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
3193 Bool is64 = INSN(31,31) == 1;
3194 Bool isSUB = INSN(30,30) == 1;
3195 UInt imm5 = INSN(20,16);
3196 UInt cond = INSN(15,12);
3197 UInt nn = INSN(9,5);
3198 UInt nzcv = INSN(3,0);
3199
3200 IRTemp condT = newTemp(Ity_I1);
3201 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3202
3203 IRType ty = is64 ? Ity_I64 : Ity_I32;
3204 IRTemp argL = newTemp(ty);
3205 IRTemp argR = newTemp(ty);
3206
3207 if (is64) {
3208 assign(argL, getIReg64orZR(nn));
3209 assign(argR, mkU64(imm5));
3210 } else {
3211 assign(argL, getIReg32orZR(nn));
3212 assign(argR, mkU32(imm5));
3213 }
3214 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3215
3216 DIP("ccm%c %s, #%u, #%u, %s\n",
3217 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3218 imm5, nzcv, nameCC(cond));
3219 return True;
3220 }
3221
3222 /* ---------------- CCMP/CCMN(reg) ---------------- */
3223 /* 31 29 20 15 11 9 3
3224 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
3225 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
3226 Operation is:
3227 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
3228 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
3229 */
3230 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3231 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
3232 Bool is64 = INSN(31,31) == 1;
3233 Bool isSUB = INSN(30,30) == 1;
3234 UInt mm = INSN(20,16);
3235 UInt cond = INSN(15,12);
3236 UInt nn = INSN(9,5);
3237 UInt nzcv = INSN(3,0);
3238
3239 IRTemp condT = newTemp(Ity_I1);
3240 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3241
3242 IRType ty = is64 ? Ity_I64 : Ity_I32;
3243 IRTemp argL = newTemp(ty);
3244 IRTemp argR = newTemp(ty);
3245
3246 if (is64) {
3247 assign(argL, getIReg64orZR(nn));
3248 assign(argR, getIReg64orZR(mm));
3249 } else {
3250 assign(argL, getIReg32orZR(nn));
3251 assign(argR, getIReg32orZR(mm));
3252 }
3253 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3254
3255 DIP("ccm%c %s, %s, #%u, %s\n",
3256 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3257 nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
3258 return True;
3259 }
3260
3261
3262 /* -------------- REV/REV16/REV32/RBIT -------------- */
3263 /* 31 30 28 20 15 11 9 4
3264
3265 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
3266 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
3267
3268 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
3269 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
3270
3271 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
3272 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
3273
3274 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
3275 */
3276 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3277 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
3278 UInt b31 = INSN(31,31);
3279 UInt opc = INSN(11,10);
3280
3281 UInt ix = 0;
3282 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
3283 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
3284 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
3285 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
3286 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
3287 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
3288 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
3289 if (ix >= 1 && ix <= 7) {
3290 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7;
3291 UInt nn = INSN(9,5);
3292 UInt dd = INSN(4,0);
3293 IRTemp src = newTemp(Ity_I64);
3294 IRTemp dst = IRTemp_INVALID;
3295 IRTemp (*math)(IRTemp) = NULL;
3296 switch (ix) {
3297 case 1: case 2: math = math_BYTESWAP64; break;
3298 case 3: case 4: math = math_BITSWAP64; break;
3299 case 5: case 6: math = math_USHORTSWAP64; break;
3300 case 7: math = math_UINTSWAP64; break;
3301 default: vassert(0);
3302 }
3303 const HChar* names[7]
3304 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
3305 const HChar* nm = names[ix-1];
3306 vassert(math);
3307 if (ix == 6) {
3308 /* This has to be special cased, since the logic below doesn't
3309 handle it correctly. */
3310 assign(src, getIReg64orZR(nn));
3311 dst = math(src);
3312 putIReg64orZR(dd,
3313 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
3314 } else if (is64) {
3315 assign(src, getIReg64orZR(nn));
3316 dst = math(src);
3317 putIReg64orZR(dd, mkexpr(dst));
3318 } else {
3319 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
3320 dst = math(src);
3321 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3322 }
3323 DIP("%s %s, %s\n", nm,
3324 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
3325 return True;
3326 }
3327 /* else fall through */
3328 }
3329
3330 /* -------------------- CLZ/CLS -------------------- */
3331 /* 30 28 24 20 15 9 4
3332 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
3333 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
3334 */
3335 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3336 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
3337 Bool is64 = INSN(31,31) == 1;
3338 Bool isCLS = INSN(10,10) == 1;
3339 UInt nn = INSN(9,5);
3340 UInt dd = INSN(4,0);
3341 IRTemp src = newTemp(Ity_I64);
3342 IRTemp srcZ = newTemp(Ity_I64);
3343 IRTemp dst = newTemp(Ity_I64);
3344 /* Get the argument, widened out to 64 bit */
3345 if (is64) {
3346 assign(src, getIReg64orZR(nn));
3347 } else {
3348 assign(src, binop(Iop_Shl64,
3349 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
3350 }
3351 /* If this is CLS, mash the arg around accordingly */
3352 if (isCLS) {
3353 IRExpr* one = mkU8(1);
3354 assign(srcZ,
3355 binop(Iop_Xor64,
3356 binop(Iop_Shl64, mkexpr(src), one),
3357 binop(Iop_Shl64, binop(Iop_Shr64, mkexpr(src), one), one)));
3358 } else {
3359 assign(srcZ, mkexpr(src));
3360 }
3361 /* And compute CLZ. */
3362 if (is64) {
3363 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3364 mkU64(isCLS ? 63 : 64),
3365 unop(Iop_Clz64, mkexpr(srcZ))));
3366 putIReg64orZR(dd, mkexpr(dst));
3367 } else {
3368 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3369 mkU64(isCLS ? 31 : 32),
3370 unop(Iop_Clz64, mkexpr(srcZ))));
3371 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3372 }
3373 DIP("cl%c %s, %s\n", isCLS ? 's' : 'z',
3374 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
3375 return True;
3376 }
3377
3378 /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */
3379 /* 30 28 20 15 11 9 4
3380 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
3381 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
3382 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
3383 sf 00 1101 0110 m 0010 11 n d RORV Rd,Rn,Rm
3384 */
3385 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3386 && INSN(15,12) == BITS4(0,0,1,0)) {
3387 Bool is64 = INSN(31,31) == 1;
3388 UInt mm = INSN(20,16);
3389 UInt op = INSN(11,10);
3390 UInt nn = INSN(9,5);
3391 UInt dd = INSN(4,0);
3392 IRType ty = is64 ? Ity_I64 : Ity_I32;
3393 IRTemp srcL = newTemp(ty);
3394 IRTemp srcR = newTemp(Ity_I64);
3395 IRTemp res = newTemp(ty);
3396 IROp iop = Iop_INVALID;
3397 assign(srcL, getIRegOrZR(is64, nn));
3398 assign(srcR, binop(Iop_And64, getIReg64orZR(mm),
3399 mkU64(is64 ? 63 : 31)));
3400 if (op < 3) {
3401 // LSLV, LSRV, ASRV
3402 switch (op) {
3403 case BITS2(0,0): iop = mkSHL(ty); break;
3404 case BITS2(0,1): iop = mkSHR(ty); break;
3405 case BITS2(1,0): iop = mkSAR(ty); break;
3406 default: vassert(0);
3407 }
3408 assign(res, binop(iop, mkexpr(srcL),
3409 unop(Iop_64to8, mkexpr(srcR))));
3410 } else {
3411 // RORV
3412 IROp opSHL = mkSHL(ty);
3413 IROp opSHR = mkSHR(ty);
3414 IROp opOR = mkOR(ty);
3415 IRExpr* width = mkU64(is64 ? 64: 32);
3416 assign(
3417 res,
3418 IRExpr_ITE(
3419 binop(Iop_CmpEQ64, mkexpr(srcR), mkU64(0)),
3420 mkexpr(srcL),
3421 binop(opOR,
3422 binop(opSHL,
3423 mkexpr(srcL),
3424 unop(Iop_64to8, binop(Iop_Sub64, width,
3425 mkexpr(srcR)))),
3426 binop(opSHR,
3427 mkexpr(srcL), unop(Iop_64to8, mkexpr(srcR))))
3428 ));
3429 }
3430 putIRegOrZR(is64, dd, mkexpr(res));
3431 vassert(op < 4);
3432 const HChar* names[4] = { "lslv", "lsrv", "asrv", "rorv" };
3433 DIP("%s %s, %s, %s\n",
3434 names[op], nameIRegOrZR(is64,dd),
3435 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
3436 return True;
3437 }
3438
3439 /* -------------------- SDIV/UDIV -------------------- */
3440 /* 30 28 20 15 10 9 4
3441 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
3442 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
3443 */
3444 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3445 && INSN(15,11) == BITS5(0,0,0,0,1)) {
3446 Bool is64 = INSN(31,31) == 1;
3447 UInt mm = INSN(20,16);
3448 Bool isS = INSN(10,10) == 1;
3449 UInt nn = INSN(9,5);
3450 UInt dd = INSN(4,0);
3451 if (isS) {
3452 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
3453 getIRegOrZR(is64, nn),
3454 getIRegOrZR(is64, mm)));
3455 } else {
3456 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
3457 getIRegOrZR(is64, nn),
3458 getIRegOrZR(is64, mm)));
3459 }
3460 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
3461 nameIRegOrZR(is64, dd),
3462 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
3463 return True;
3464 }
3465
3466 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
3467 /* 31 23 20 15 14 9 4
3468 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
3469 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
3470 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
3471 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
3472 with operation
3473 Xd = Xa +/- (Wn *u/s Wm)
3474 */
3475 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
3476 Bool isU = INSN(23,23) == 1;
3477 UInt mm = INSN(20,16);
3478 Bool isAdd = INSN(15,15) == 0;
3479 UInt aa = INSN(14,10);
3480 UInt nn = INSN(9,5);
3481 UInt dd = INSN(4,0);
3482 IRTemp wN = newTemp(Ity_I32);
3483 IRTemp wM = newTemp(Ity_I32);
3484 IRTemp xA = newTemp(Ity_I64);
3485 IRTemp muld = newTemp(Ity_I64);
3486 IRTemp res = newTemp(Ity_I64);
3487 assign(wN, getIReg32orZR(nn));
3488 assign(wM, getIReg32orZR(mm));
3489 assign(xA, getIReg64orZR(aa));
3490 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
3491 mkexpr(wN), mkexpr(wM)));
3492 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
3493 mkexpr(xA), mkexpr(muld)));
3494 putIReg64orZR(dd, mkexpr(res));
3495 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
3496 nameIReg64orZR(dd), nameIReg32orZR(nn),
3497 nameIReg32orZR(mm), nameIReg64orZR(aa));
3498 return True;
3499 }
3500 vex_printf("ARM64 front end: data_processing_register\n");
3501 return False;
3502 # undef INSN
3503 }
3504
3505
3506 /*------------------------------------------------------------*/
3507 /*--- Math helpers for vector interleave/deinterleave ---*/
3508 /*------------------------------------------------------------*/
3509
3510 #define EX(_tmp) \
3511 mkexpr(_tmp)
3512 #define SL(_hi128,_lo128,_nbytes) \
3513 ( (_nbytes) == 0 \
3514 ? (_lo128) \
3515 : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) )
3516 #define ROR(_v128,_nbytes) \
3517 SL((_v128),(_v128),(_nbytes))
3518 #define ROL(_v128,_nbytes) \
3519 SL((_v128),(_v128),16-(_nbytes))
3520 #define SHR(_v128,_nbytes) \
3521 binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes)))
3522 #define SHL(_v128,_nbytes) \
3523 binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes)))
3524 #define ILO64x2(_argL,_argR) \
3525 binop(Iop_InterleaveLO64x2,(_argL),(_argR))
3526 #define IHI64x2(_argL,_argR) \
3527 binop(Iop_InterleaveHI64x2,(_argL),(_argR))
3528 #define ILO32x4(_argL,_argR) \
3529 binop(Iop_InterleaveLO32x4,(_argL),(_argR))
3530 #define IHI32x4(_argL,_argR) \
3531 binop(Iop_InterleaveHI32x4,(_argL),(_argR))
3532 #define ILO16x8(_argL,_argR) \
3533 binop(Iop_InterleaveLO16x8,(_argL),(_argR))
3534 #define IHI16x8(_argL,_argR) \
3535 binop(Iop_InterleaveHI16x8,(_argL),(_argR))
3536 #define ILO8x16(_argL,_argR) \
3537 binop(Iop_InterleaveLO8x16,(_argL),(_argR))
3538 #define IHI8x16(_argL,_argR) \
3539 binop(Iop_InterleaveHI8x16,(_argL),(_argR))
3540 #define CEV32x4(_argL,_argR) \
3541 binop(Iop_CatEvenLanes32x4,(_argL),(_argR))
3542 #define COD32x4(_argL,_argR) \
3543 binop(Iop_CatOddLanes32x4,(_argL),(_argR))
3544 #define COD16x8(_argL,_argR) \
3545 binop(Iop_CatOddLanes16x8,(_argL),(_argR))
3546 #define COD8x16(_argL,_argR) \
3547 binop(Iop_CatOddLanes8x16,(_argL),(_argR))
3548 #define CEV8x16(_argL,_argR) \
3549 binop(Iop_CatEvenLanes8x16,(_argL),(_argR))
3550 #define AND(_arg1,_arg2) \
3551 binop(Iop_AndV128,(_arg1),(_arg2))
3552 #define OR2(_arg1,_arg2) \
3553 binop(Iop_OrV128,(_arg1),(_arg2))
3554 #define OR3(_arg1,_arg2,_arg3) \
3555 binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3)))
3556 #define OR4(_arg1,_arg2,_arg3,_arg4) \
3557 binop(Iop_OrV128, \
3558 binop(Iop_OrV128,(_arg1),(_arg2)), \
3559 binop(Iop_OrV128,(_arg3),(_arg4)))
3560
3561
3562 /* Do interleaving for 1 128 bit vector, for ST1 insns. */
3563 static
math_INTERLEAVE1_128(IRTemp * i0,UInt laneSzBlg2,IRTemp u0)3564 void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp* i0,
3565 UInt laneSzBlg2, IRTemp u0 )
3566 {
3567 assign(*i0, mkexpr(u0));
3568 }
3569
3570
3571 /* Do interleaving for 2 128 bit vectors, for ST2 insns. */
3572 static
math_INTERLEAVE2_128(IRTemp * i0,IRTemp * i1,UInt laneSzBlg2,IRTemp u0,IRTemp u1)3573 void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
3574 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
3575 {
3576 /* This is pretty easy, since we have primitives directly to
3577 hand. */
3578 if (laneSzBlg2 == 3) {
3579 // 64x2
3580 // u1 == B1 B0, u0 == A1 A0
3581 // i1 == B1 A1, i0 == B0 A0
3582 assign(*i0, binop(Iop_InterleaveLO64x2, mkexpr(u1), mkexpr(u0)));
3583 assign(*i1, binop(Iop_InterleaveHI64x2, mkexpr(u1), mkexpr(u0)));
3584 return;
3585 }
3586 if (laneSzBlg2 == 2) {
3587 // 32x4
3588 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3589 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3590 assign(*i0, binop(Iop_InterleaveLO32x4, mkexpr(u1), mkexpr(u0)));
3591 assign(*i1, binop(Iop_InterleaveHI32x4, mkexpr(u1), mkexpr(u0)));
3592 return;
3593 }
3594 if (laneSzBlg2 == 1) {
3595 // 16x8
3596 // u1 == B{7..0}, u0 == A{7..0}
3597 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3598 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3599 assign(*i0, binop(Iop_InterleaveLO16x8, mkexpr(u1), mkexpr(u0)));
3600 assign(*i1, binop(Iop_InterleaveHI16x8, mkexpr(u1), mkexpr(u0)));
3601 return;
3602 }
3603 if (laneSzBlg2 == 0) {
3604 // 8x16
3605 // u1 == B{f..0}, u0 == A{f..0}
3606 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3607 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3608 assign(*i0, binop(Iop_InterleaveLO8x16, mkexpr(u1), mkexpr(u0)));
3609 assign(*i1, binop(Iop_InterleaveHI8x16, mkexpr(u1), mkexpr(u0)));
3610 return;
3611 }
3612 /*NOTREACHED*/
3613 vassert(0);
3614 }
3615
3616
3617 /* Do interleaving for 3 128 bit vectors, for ST3 insns. */
3618 static
math_INTERLEAVE3_128(IRTemp * i0,IRTemp * i1,IRTemp * i2,UInt laneSzBlg2,IRTemp u0,IRTemp u1,IRTemp u2)3619 void math_INTERLEAVE3_128(
3620 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
3621 UInt laneSzBlg2,
3622 IRTemp u0, IRTemp u1, IRTemp u2 )
3623 {
3624 if (laneSzBlg2 == 3) {
3625 // 64x2
3626 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3627 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3628 assign(*i2, IHI64x2( EX(u2), EX(u1) ));
3629 assign(*i1, ILO64x2( ROR(EX(u0),8), EX(u2) ));
3630 assign(*i0, ILO64x2( EX(u1), EX(u0) ));
3631 return;
3632 }
3633
3634 if (laneSzBlg2 == 2) {
3635 // 32x4
3636 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3637 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3638 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3639 IRTemp p0 = newTempV128();
3640 IRTemp p1 = newTempV128();
3641 IRTemp p2 = newTempV128();
3642 IRTemp c1100 = newTempV128();
3643 IRTemp c0011 = newTempV128();
3644 IRTemp c0110 = newTempV128();
3645 assign(c1100, mkV128(0xFF00));
3646 assign(c0011, mkV128(0x00FF));
3647 assign(c0110, mkV128(0x0FF0));
3648 // First interleave them at 64x2 granularity,
3649 // generating partial ("p") values.
3650 math_INTERLEAVE3_128(&p0, &p1, &p2, 3, u0, u1, u2);
3651 // And more shuffling around for the final answer
3652 assign(*i2, OR2( AND( IHI32x4(EX(p2), ROL(EX(p2),8)), EX(c1100) ),
3653 AND( IHI32x4(ROR(EX(p1),4), EX(p2)), EX(c0011) ) ));
3654 assign(*i1, OR3( SHL(EX(p2),12),
3655 AND(EX(p1),EX(c0110)),
3656 SHR(EX(p0),12) ));
3657 assign(*i0, OR2( AND( ILO32x4(EX(p0),ROL(EX(p1),4)), EX(c1100) ),
3658 AND( ILO32x4(ROR(EX(p0),8),EX(p0)), EX(c0011) ) ));
3659 return;
3660 }
3661
3662 if (laneSzBlg2 == 1) {
3663 // 16x8
3664 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3665 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3666 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3667 //
3668 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3669 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3670 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3671 //
3672 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3673 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3674 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3675 IRTemp p0 = newTempV128();
3676 IRTemp p1 = newTempV128();
3677 IRTemp p2 = newTempV128();
3678 IRTemp c1000 = newTempV128();
3679 IRTemp c0100 = newTempV128();
3680 IRTemp c0010 = newTempV128();
3681 IRTemp c0001 = newTempV128();
3682 assign(c1000, mkV128(0xF000));
3683 assign(c0100, mkV128(0x0F00));
3684 assign(c0010, mkV128(0x00F0));
3685 assign(c0001, mkV128(0x000F));
3686 // First interleave them at 32x4 granularity,
3687 // generating partial ("p") values.
3688 math_INTERLEAVE3_128(&p0, &p1, &p2, 2, u0, u1, u2);
3689 // And more shuffling around for the final answer
3690 assign(*i2,
3691 OR4( AND( IHI16x8( EX(p2), ROL(EX(p2),4) ), EX(c1000) ),
3692 AND( IHI16x8( ROL(EX(p2),6), EX(p2) ), EX(c0100) ),
3693 AND( IHI16x8( ROL(EX(p2),2), ROL(EX(p2),6) ), EX(c0010) ),
3694 AND( ILO16x8( ROR(EX(p2),2), ROL(EX(p1),2) ), EX(c0001) )
3695 ));
3696 assign(*i1,
3697 OR4( AND( IHI16x8( ROL(EX(p1),4), ROR(EX(p2),2) ), EX(c1000) ),
3698 AND( IHI16x8( EX(p1), ROL(EX(p1),4) ), EX(c0100) ),
3699 AND( IHI16x8( ROL(EX(p1),4), ROL(EX(p1),8) ), EX(c0010) ),
3700 AND( IHI16x8( ROR(EX(p0),6), ROL(EX(p1),4) ), EX(c0001) )
3701 ));
3702 assign(*i0,
3703 OR4( AND( IHI16x8( ROR(EX(p1),2), ROL(EX(p0),2) ), EX(c1000) ),
3704 AND( IHI16x8( ROL(EX(p0),2), ROL(EX(p0),6) ), EX(c0100) ),
3705 AND( IHI16x8( ROL(EX(p0),8), ROL(EX(p0),2) ), EX(c0010) ),
3706 AND( IHI16x8( ROL(EX(p0),4), ROL(EX(p0),8) ), EX(c0001) )
3707 ));
3708 return;
3709 }
3710
3711 if (laneSzBlg2 == 0) {
3712 // 8x16. It doesn't seem worth the hassle of first doing a
3713 // 16x8 interleave, so just generate all 24 partial results
3714 // directly :-(
3715 // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0
3716 // i2 == Cf Bf Af Ce .. Bb Ab Ca
3717 // i1 == Ba Aa C9 B9 .. A6 C5 B5
3718 // i0 == A5 C4 B4 A4 .. C0 B0 A0
3719
3720 IRTemp i2_FEDC = newTempV128(); IRTemp i2_BA98 = newTempV128();
3721 IRTemp i2_7654 = newTempV128(); IRTemp i2_3210 = newTempV128();
3722 IRTemp i1_FEDC = newTempV128(); IRTemp i1_BA98 = newTempV128();
3723 IRTemp i1_7654 = newTempV128(); IRTemp i1_3210 = newTempV128();
3724 IRTemp i0_FEDC = newTempV128(); IRTemp i0_BA98 = newTempV128();
3725 IRTemp i0_7654 = newTempV128(); IRTemp i0_3210 = newTempV128();
3726 IRTemp i2_hi64 = newTempV128(); IRTemp i2_lo64 = newTempV128();
3727 IRTemp i1_hi64 = newTempV128(); IRTemp i1_lo64 = newTempV128();
3728 IRTemp i0_hi64 = newTempV128(); IRTemp i0_lo64 = newTempV128();
3729
3730 // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector
3731 // of the form 14 bytes junk : CC[0xF] : BB[0xA]
3732 //
3733 # define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \
3734 IRTemp t_##_tempName = newTempV128(); \
3735 assign(t_##_tempName, \
3736 ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \
3737 ROR(EX(_srcVec2),(_srcShift2)) ) )
3738
3739 // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively
3740 IRTemp CC = u2; IRTemp BB = u1; IRTemp AA = u0;
3741
3742 // The slicing and reassembly are done as interleavedly as possible,
3743 // so as to minimise the demand for registers in the back end, which
3744 // was observed to be a problem in testing.
3745
3746 XXXX(CfBf, CC, 0xf, BB, 0xf); // i2[15:14]
3747 XXXX(AfCe, AA, 0xf, CC, 0xe);
3748 assign(i2_FEDC, ILO16x8(EX(t_CfBf), EX(t_AfCe)));
3749
3750 XXXX(BeAe, BB, 0xe, AA, 0xe);
3751 XXXX(CdBd, CC, 0xd, BB, 0xd);
3752 assign(i2_BA98, ILO16x8(EX(t_BeAe), EX(t_CdBd)));
3753 assign(i2_hi64, ILO32x4(EX(i2_FEDC), EX(i2_BA98)));
3754
3755 XXXX(AdCc, AA, 0xd, CC, 0xc);
3756 XXXX(BcAc, BB, 0xc, AA, 0xc);
3757 assign(i2_7654, ILO16x8(EX(t_AdCc), EX(t_BcAc)));
3758
3759 XXXX(CbBb, CC, 0xb, BB, 0xb);
3760 XXXX(AbCa, AA, 0xb, CC, 0xa); // i2[1:0]
3761 assign(i2_3210, ILO16x8(EX(t_CbBb), EX(t_AbCa)));
3762 assign(i2_lo64, ILO32x4(EX(i2_7654), EX(i2_3210)));
3763 assign(*i2, ILO64x2(EX(i2_hi64), EX(i2_lo64)));
3764
3765 XXXX(BaAa, BB, 0xa, AA, 0xa); // i1[15:14]
3766 XXXX(C9B9, CC, 0x9, BB, 0x9);
3767 assign(i1_FEDC, ILO16x8(EX(t_BaAa), EX(t_C9B9)));
3768
3769 XXXX(A9C8, AA, 0x9, CC, 0x8);
3770 XXXX(B8A8, BB, 0x8, AA, 0x8);
3771 assign(i1_BA98, ILO16x8(EX(t_A9C8), EX(t_B8A8)));
3772 assign(i1_hi64, ILO32x4(EX(i1_FEDC), EX(i1_BA98)));
3773
3774 XXXX(C7B7, CC, 0x7, BB, 0x7);
3775 XXXX(A7C6, AA, 0x7, CC, 0x6);
3776 assign(i1_7654, ILO16x8(EX(t_C7B7), EX(t_A7C6)));
3777
3778 XXXX(B6A6, BB, 0x6, AA, 0x6);
3779 XXXX(C5B5, CC, 0x5, BB, 0x5); // i1[1:0]
3780 assign(i1_3210, ILO16x8(EX(t_B6A6), EX(t_C5B5)));
3781 assign(i1_lo64, ILO32x4(EX(i1_7654), EX(i1_3210)));
3782 assign(*i1, ILO64x2(EX(i1_hi64), EX(i1_lo64)));
3783
3784 XXXX(A5C4, AA, 0x5, CC, 0x4); // i0[15:14]
3785 XXXX(B4A4, BB, 0x4, AA, 0x4);
3786 assign(i0_FEDC, ILO16x8(EX(t_A5C4), EX(t_B4A4)));
3787
3788 XXXX(C3B3, CC, 0x3, BB, 0x3);
3789 XXXX(A3C2, AA, 0x3, CC, 0x2);
3790 assign(i0_BA98, ILO16x8(EX(t_C3B3), EX(t_A3C2)));
3791 assign(i0_hi64, ILO32x4(EX(i0_FEDC), EX(i0_BA98)));
3792
3793 XXXX(B2A2, BB, 0x2, AA, 0x2);
3794 XXXX(C1B1, CC, 0x1, BB, 0x1);
3795 assign(i0_7654, ILO16x8(EX(t_B2A2), EX(t_C1B1)));
3796
3797 XXXX(A1C0, AA, 0x1, CC, 0x0);
3798 XXXX(B0A0, BB, 0x0, AA, 0x0); // i0[1:0]
3799 assign(i0_3210, ILO16x8(EX(t_A1C0), EX(t_B0A0)));
3800 assign(i0_lo64, ILO32x4(EX(i0_7654), EX(i0_3210)));
3801 assign(*i0, ILO64x2(EX(i0_hi64), EX(i0_lo64)));
3802
3803 # undef XXXX
3804 return;
3805 }
3806
3807 /*NOTREACHED*/
3808 vassert(0);
3809 }
3810
3811
3812 /* Do interleaving for 4 128 bit vectors, for ST4 insns. */
3813 static
math_INTERLEAVE4_128(IRTemp * i0,IRTemp * i1,IRTemp * i2,IRTemp * i3,UInt laneSzBlg2,IRTemp u0,IRTemp u1,IRTemp u2,IRTemp u3)3814 void math_INTERLEAVE4_128(
3815 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
3816 UInt laneSzBlg2,
3817 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
3818 {
3819 if (laneSzBlg2 == 3) {
3820 // 64x2
3821 assign(*i0, ILO64x2(EX(u1), EX(u0)));
3822 assign(*i1, ILO64x2(EX(u3), EX(u2)));
3823 assign(*i2, IHI64x2(EX(u1), EX(u0)));
3824 assign(*i3, IHI64x2(EX(u3), EX(u2)));
3825 return;
3826 }
3827 if (laneSzBlg2 == 2) {
3828 // 32x4
3829 // First, interleave at the 64-bit lane size.
3830 IRTemp p0 = newTempV128();
3831 IRTemp p1 = newTempV128();
3832 IRTemp p2 = newTempV128();
3833 IRTemp p3 = newTempV128();
3834 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 3, u0, u1, u2, u3);
3835 // And interleave (cat) at the 32 bit size.
3836 assign(*i0, CEV32x4(EX(p1), EX(p0)));
3837 assign(*i1, COD32x4(EX(p1), EX(p0)));
3838 assign(*i2, CEV32x4(EX(p3), EX(p2)));
3839 assign(*i3, COD32x4(EX(p3), EX(p2)));
3840 return;
3841 }
3842 if (laneSzBlg2 == 1) {
3843 // 16x8
3844 // First, interleave at the 32-bit lane size.
3845 IRTemp p0 = newTempV128();
3846 IRTemp p1 = newTempV128();
3847 IRTemp p2 = newTempV128();
3848 IRTemp p3 = newTempV128();
3849 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 2, u0, u1, u2, u3);
3850 // And rearrange within each vector, to get the right 16 bit lanes.
3851 assign(*i0, COD16x8(EX(p0), SHL(EX(p0), 2)));
3852 assign(*i1, COD16x8(EX(p1), SHL(EX(p1), 2)));
3853 assign(*i2, COD16x8(EX(p2), SHL(EX(p2), 2)));
3854 assign(*i3, COD16x8(EX(p3), SHL(EX(p3), 2)));
3855 return;
3856 }
3857 if (laneSzBlg2 == 0) {
3858 // 8x16
3859 // First, interleave at the 16-bit lane size.
3860 IRTemp p0 = newTempV128();
3861 IRTemp p1 = newTempV128();
3862 IRTemp p2 = newTempV128();
3863 IRTemp p3 = newTempV128();
3864 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 1, u0, u1, u2, u3);
3865 // And rearrange within each vector, to get the right 8 bit lanes.
3866 assign(*i0, IHI32x4(COD8x16(EX(p0),EX(p0)), CEV8x16(EX(p0),EX(p0))));
3867 assign(*i1, IHI32x4(COD8x16(EX(p1),EX(p1)), CEV8x16(EX(p1),EX(p1))));
3868 assign(*i2, IHI32x4(COD8x16(EX(p2),EX(p2)), CEV8x16(EX(p2),EX(p2))));
3869 assign(*i3, IHI32x4(COD8x16(EX(p3),EX(p3)), CEV8x16(EX(p3),EX(p3))));
3870 return;
3871 }
3872 /*NOTREACHED*/
3873 vassert(0);
3874 }
3875
3876
3877 /* Do deinterleaving for 1 128 bit vector, for LD1 insns. */
3878 static
math_DEINTERLEAVE1_128(IRTemp * u0,UInt laneSzBlg2,IRTemp i0)3879 void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp* u0,
3880 UInt laneSzBlg2, IRTemp i0 )
3881 {
3882 assign(*u0, mkexpr(i0));
3883 }
3884
3885
3886 /* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */
3887 static
math_DEINTERLEAVE2_128(IRTemp * u0,IRTemp * u1,UInt laneSzBlg2,IRTemp i0,IRTemp i1)3888 void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
3889 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
3890 {
3891 /* This is pretty easy, since we have primitives directly to
3892 hand. */
3893 if (laneSzBlg2 == 3) {
3894 // 64x2
3895 // i1 == B1 A1, i0 == B0 A0
3896 // u1 == B1 B0, u0 == A1 A0
3897 assign(*u0, binop(Iop_InterleaveLO64x2, mkexpr(i1), mkexpr(i0)));
3898 assign(*u1, binop(Iop_InterleaveHI64x2, mkexpr(i1), mkexpr(i0)));
3899 return;
3900 }
3901 if (laneSzBlg2 == 2) {
3902 // 32x4
3903 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3904 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3905 assign(*u0, binop(Iop_CatEvenLanes32x4, mkexpr(i1), mkexpr(i0)));
3906 assign(*u1, binop(Iop_CatOddLanes32x4, mkexpr(i1), mkexpr(i0)));
3907 return;
3908 }
3909 if (laneSzBlg2 == 1) {
3910 // 16x8
3911 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3912 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3913 // u1 == B{7..0}, u0 == A{7..0}
3914 assign(*u0, binop(Iop_CatEvenLanes16x8, mkexpr(i1), mkexpr(i0)));
3915 assign(*u1, binop(Iop_CatOddLanes16x8, mkexpr(i1), mkexpr(i0)));
3916 return;
3917 }
3918 if (laneSzBlg2 == 0) {
3919 // 8x16
3920 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3921 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3922 // u1 == B{f..0}, u0 == A{f..0}
3923 assign(*u0, binop(Iop_CatEvenLanes8x16, mkexpr(i1), mkexpr(i0)));
3924 assign(*u1, binop(Iop_CatOddLanes8x16, mkexpr(i1), mkexpr(i0)));
3925 return;
3926 }
3927 /*NOTREACHED*/
3928 vassert(0);
3929 }
3930
3931
3932 /* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */
3933 static
math_DEINTERLEAVE3_128(IRTemp * u0,IRTemp * u1,IRTemp * u2,UInt laneSzBlg2,IRTemp i0,IRTemp i1,IRTemp i2)3934 void math_DEINTERLEAVE3_128(
3935 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
3936 UInt laneSzBlg2,
3937 IRTemp i0, IRTemp i1, IRTemp i2 )
3938 {
3939 if (laneSzBlg2 == 3) {
3940 // 64x2
3941 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3942 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3943 assign(*u2, ILO64x2( ROL(EX(i2),8), EX(i1) ));
3944 assign(*u1, ILO64x2( EX(i2), ROL(EX(i0),8) ));
3945 assign(*u0, ILO64x2( ROL(EX(i1),8), EX(i0) ));
3946 return;
3947 }
3948
3949 if (laneSzBlg2 == 2) {
3950 // 32x4
3951 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3952 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3953 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3954 IRTemp t_a1c0b0a0 = newTempV128();
3955 IRTemp t_a2c1b1a1 = newTempV128();
3956 IRTemp t_a3c2b2a2 = newTempV128();
3957 IRTemp t_a0c3b3a3 = newTempV128();
3958 IRTemp p0 = newTempV128();
3959 IRTemp p1 = newTempV128();
3960 IRTemp p2 = newTempV128();
3961 // Compute some intermediate values.
3962 assign(t_a1c0b0a0, EX(i0));
3963 assign(t_a2c1b1a1, SL(EX(i1),EX(i0),3*4));
3964 assign(t_a3c2b2a2, SL(EX(i2),EX(i1),2*4));
3965 assign(t_a0c3b3a3, SL(EX(i0),EX(i2),1*4));
3966 // First deinterleave into lane-pairs
3967 assign(p0, ILO32x4(EX(t_a2c1b1a1),EX(t_a1c0b0a0)));
3968 assign(p1, ILO64x2(ILO32x4(EX(t_a0c3b3a3), EX(t_a3c2b2a2)),
3969 IHI32x4(EX(t_a2c1b1a1), EX(t_a1c0b0a0))));
3970 assign(p2, ILO32x4(ROR(EX(t_a0c3b3a3),1*4), ROR(EX(t_a3c2b2a2),1*4)));
3971 // Then deinterleave at 64x2 granularity.
3972 math_DEINTERLEAVE3_128(u0, u1, u2, 3, p0, p1, p2);
3973 return;
3974 }
3975
3976 if (laneSzBlg2 == 1) {
3977 // 16x8
3978 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3979 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3980 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3981 //
3982 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3983 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3984 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3985 //
3986 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3987 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3988 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3989
3990 IRTemp s0, s1, s2, s3, t0, t1, t2, t3, p0, p1, p2, c00111111;
3991 s0 = s1 = s2 = s3
3992 = t0 = t1 = t2 = t3 = p0 = p1 = p2 = c00111111 = IRTemp_INVALID;
3993 newTempsV128_4(&s0, &s1, &s2, &s3);
3994 newTempsV128_4(&t0, &t1, &t2, &t3);
3995 newTempsV128_4(&p0, &p1, &p2, &c00111111);
3996
3997 // s0 == b2a2 c1b1a1 c0b0a0
3998 // s1 == b4a4 c3b3c3 c2b2a2
3999 // s2 == b6a6 c5b5a5 c4b4a4
4000 // s3 == b0a0 c7b7a7 c6b6a6
4001 assign(s0, EX(i0));
4002 assign(s1, SL(EX(i1),EX(i0),6*2));
4003 assign(s2, SL(EX(i2),EX(i1),4*2));
4004 assign(s3, SL(EX(i0),EX(i2),2*2));
4005
4006 // t0 == 0 0 c1c0 b1b0 a1a0
4007 // t1 == 0 0 c3c2 b3b2 a3a2
4008 // t2 == 0 0 c5c4 b5b4 a5a4
4009 // t3 == 0 0 c7c6 b7b6 a7a6
4010 assign(c00111111, mkV128(0x0FFF));
4011 assign(t0, AND( ILO16x8( ROR(EX(s0),3*2), EX(s0)), EX(c00111111)));
4012 assign(t1, AND( ILO16x8( ROR(EX(s1),3*2), EX(s1)), EX(c00111111)));
4013 assign(t2, AND( ILO16x8( ROR(EX(s2),3*2), EX(s2)), EX(c00111111)));
4014 assign(t3, AND( ILO16x8( ROR(EX(s3),3*2), EX(s3)), EX(c00111111)));
4015
4016 assign(p0, OR2(EX(t0), SHL(EX(t1),6*2)));
4017 assign(p1, OR2(SHL(EX(t2),4*2), SHR(EX(t1),2*2)));
4018 assign(p2, OR2(SHL(EX(t3),2*2), SHR(EX(t2),4*2)));
4019
4020 // Then deinterleave at 32x4 granularity.
4021 math_DEINTERLEAVE3_128(u0, u1, u2, 2, p0, p1, p2);
4022 return;
4023 }
4024
4025 if (laneSzBlg2 == 0) {
4026 // 8x16. This is the same scheme as for 16x8, with twice the
4027 // number of intermediate values.
4028 //
4029 // u2 == C{f..0}
4030 // u1 == B{f..0}
4031 // u0 == A{f..0}
4032 //
4033 // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a}
4034 // i1 == BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5}
4035 // i0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4036 //
4037 // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba}
4038 // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54}
4039 // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10}
4040 //
4041 IRTemp s0, s1, s2, s3, s4, s5, s6, s7,
4042 t0, t1, t2, t3, t4, t5, t6, t7, p0, p1, p2, cMASK;
4043 s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7
4044 = t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = p0 = p1 = p2 = cMASK
4045 = IRTemp_INVALID;
4046 newTempsV128_4(&s0, &s1, &s2, &s3);
4047 newTempsV128_4(&s4, &s5, &s6, &s7);
4048 newTempsV128_4(&t0, &t1, &t2, &t3);
4049 newTempsV128_4(&t4, &t5, &t6, &t7);
4050 newTempsV128_4(&p0, &p1, &p2, &cMASK);
4051
4052 // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4053 // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2}
4054 // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4}
4055 // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6}
4056 // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8}
4057 // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a}
4058 // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c}
4059 // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e}
4060 assign(s0, SL(EX(i1),EX(i0), 0));
4061 assign(s1, SL(EX(i1),EX(i0), 6));
4062 assign(s2, SL(EX(i1),EX(i0),12));
4063 assign(s3, SL(EX(i2),EX(i1), 2));
4064 assign(s4, SL(EX(i2),EX(i1), 8));
4065 assign(s5, SL(EX(i2),EX(i1),14));
4066 assign(s6, SL(EX(i0),EX(i2), 4));
4067 assign(s7, SL(EX(i0),EX(i2),10));
4068
4069 // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0
4070 // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2
4071 // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4
4072 // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6
4073 // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8
4074 // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa
4075 // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac
4076 // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae
4077 assign(cMASK, mkV128(0x003F));
4078 assign(t0, AND( ILO8x16( ROR(EX(s0),3), EX(s0)), EX(cMASK)));
4079 assign(t1, AND( ILO8x16( ROR(EX(s1),3), EX(s1)), EX(cMASK)));
4080 assign(t2, AND( ILO8x16( ROR(EX(s2),3), EX(s2)), EX(cMASK)));
4081 assign(t3, AND( ILO8x16( ROR(EX(s3),3), EX(s3)), EX(cMASK)));
4082 assign(t4, AND( ILO8x16( ROR(EX(s4),3), EX(s4)), EX(cMASK)));
4083 assign(t5, AND( ILO8x16( ROR(EX(s5),3), EX(s5)), EX(cMASK)));
4084 assign(t6, AND( ILO8x16( ROR(EX(s6),3), EX(s6)), EX(cMASK)));
4085 assign(t7, AND( ILO8x16( ROR(EX(s7),3), EX(s7)), EX(cMASK)));
4086
4087 assign(p0, OR3( SHL(EX(t2),12), SHL(EX(t1),6), EX(t0) ));
4088 assign(p1, OR4( SHL(EX(t5),14), SHL(EX(t4),8),
4089 SHL(EX(t3),2), SHR(EX(t2),4) ));
4090 assign(p2, OR3( SHL(EX(t7),10), SHL(EX(t6),4), SHR(EX(t5),2) ));
4091
4092 // Then deinterleave at 16x8 granularity.
4093 math_DEINTERLEAVE3_128(u0, u1, u2, 1, p0, p1, p2);
4094 return;
4095 }
4096
4097 /*NOTREACHED*/
4098 vassert(0);
4099 }
4100
4101
4102 /* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */
4103 static
math_DEINTERLEAVE4_128(IRTemp * u0,IRTemp * u1,IRTemp * u2,IRTemp * u3,UInt laneSzBlg2,IRTemp i0,IRTemp i1,IRTemp i2,IRTemp i3)4104 void math_DEINTERLEAVE4_128(
4105 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4106 UInt laneSzBlg2,
4107 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4108 {
4109 if (laneSzBlg2 == 3) {
4110 // 64x2
4111 assign(*u0, ILO64x2(EX(i2), EX(i0)));
4112 assign(*u1, IHI64x2(EX(i2), EX(i0)));
4113 assign(*u2, ILO64x2(EX(i3), EX(i1)));
4114 assign(*u3, IHI64x2(EX(i3), EX(i1)));
4115 return;
4116 }
4117 if (laneSzBlg2 == 2) {
4118 // 32x4
4119 IRTemp p0 = newTempV128();
4120 IRTemp p2 = newTempV128();
4121 IRTemp p1 = newTempV128();
4122 IRTemp p3 = newTempV128();
4123 assign(p0, ILO32x4(EX(i1), EX(i0)));
4124 assign(p1, IHI32x4(EX(i1), EX(i0)));
4125 assign(p2, ILO32x4(EX(i3), EX(i2)));
4126 assign(p3, IHI32x4(EX(i3), EX(i2)));
4127 // And now do what we did for the 64-bit case.
4128 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 3, p0, p1, p2, p3);
4129 return;
4130 }
4131 if (laneSzBlg2 == 1) {
4132 // 16x8
4133 // Deinterleave into 32-bit chunks, then do as the 32-bit case.
4134 IRTemp p0 = newTempV128();
4135 IRTemp p1 = newTempV128();
4136 IRTemp p2 = newTempV128();
4137 IRTemp p3 = newTempV128();
4138 assign(p0, IHI16x8(EX(i0), SHL(EX(i0), 8)));
4139 assign(p1, IHI16x8(EX(i1), SHL(EX(i1), 8)));
4140 assign(p2, IHI16x8(EX(i2), SHL(EX(i2), 8)));
4141 assign(p3, IHI16x8(EX(i3), SHL(EX(i3), 8)));
4142 // From here on is like the 32 bit case.
4143 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 2, p0, p1, p2, p3);
4144 return;
4145 }
4146 if (laneSzBlg2 == 0) {
4147 // 8x16
4148 // Deinterleave into 16-bit chunks, then do as the 16-bit case.
4149 IRTemp p0 = newTempV128();
4150 IRTemp p1 = newTempV128();
4151 IRTemp p2 = newTempV128();
4152 IRTemp p3 = newTempV128();
4153 assign(p0, IHI64x2( IHI8x16(EX(i0),ROL(EX(i0),4)),
4154 ILO8x16(EX(i0),ROL(EX(i0),4)) ));
4155 assign(p1, IHI64x2( IHI8x16(EX(i1),ROL(EX(i1),4)),
4156 ILO8x16(EX(i1),ROL(EX(i1),4)) ));
4157 assign(p2, IHI64x2( IHI8x16(EX(i2),ROL(EX(i2),4)),
4158 ILO8x16(EX(i2),ROL(EX(i2),4)) ));
4159 assign(p3, IHI64x2( IHI8x16(EX(i3),ROL(EX(i3),4)),
4160 ILO8x16(EX(i3),ROL(EX(i3),4)) ));
4161 // From here on is like the 16 bit case.
4162 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 1, p0, p1, p2, p3);
4163 return;
4164 }
4165 /*NOTREACHED*/
4166 vassert(0);
4167 }
4168
4169
4170 /* Wrappers that use the full-width (de)interleavers to do half-width
4171 (de)interleaving. The scheme is to clone each input lane in the
4172 lower half of each incoming value, do a full width (de)interleave
4173 at the next lane size up, and remove every other lane of the the
4174 result. The returned values may have any old junk in the upper
4175 64 bits -- the caller must ignore that. */
4176
4177 /* Helper function -- get doubling and narrowing operations. */
4178 static
math_get_doubler_and_halver(IROp * doubler,IROp * halver,UInt laneSzBlg2)4179 void math_get_doubler_and_halver ( /*OUT*/IROp* doubler,
4180 /*OUT*/IROp* halver,
4181 UInt laneSzBlg2 )
4182 {
4183 switch (laneSzBlg2) {
4184 case 2:
4185 *doubler = Iop_InterleaveLO32x4; *halver = Iop_CatEvenLanes32x4;
4186 break;
4187 case 1:
4188 *doubler = Iop_InterleaveLO16x8; *halver = Iop_CatEvenLanes16x8;
4189 break;
4190 case 0:
4191 *doubler = Iop_InterleaveLO8x16; *halver = Iop_CatEvenLanes8x16;
4192 break;
4193 default:
4194 vassert(0);
4195 }
4196 }
4197
4198 /* Do interleaving for 1 64 bit vector, for ST1 insns. */
4199 static
math_INTERLEAVE1_64(IRTemp * i0,UInt laneSzBlg2,IRTemp u0)4200 void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp* i0,
4201 UInt laneSzBlg2, IRTemp u0 )
4202 {
4203 assign(*i0, mkexpr(u0));
4204 }
4205
4206
4207 /* Do interleaving for 2 64 bit vectors, for ST2 insns. */
4208 static
math_INTERLEAVE2_64(IRTemp * i0,IRTemp * i1,UInt laneSzBlg2,IRTemp u0,IRTemp u1)4209 void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
4210 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
4211 {
4212 if (laneSzBlg2 == 3) {
4213 // 1x64, degenerate case
4214 assign(*i0, EX(u0));
4215 assign(*i1, EX(u1));
4216 return;
4217 }
4218
4219 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4220 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4221 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4222
4223 IRTemp du0 = newTempV128();
4224 IRTemp du1 = newTempV128();
4225 assign(du0, binop(doubler, EX(u0), EX(u0)));
4226 assign(du1, binop(doubler, EX(u1), EX(u1)));
4227 IRTemp di0 = newTempV128();
4228 IRTemp di1 = newTempV128();
4229 math_INTERLEAVE2_128(&di0, &di1, laneSzBlg2 + 1, du0, du1);
4230 assign(*i0, binop(halver, EX(di0), EX(di0)));
4231 assign(*i1, binop(halver, EX(di1), EX(di1)));
4232 }
4233
4234
4235 /* Do interleaving for 3 64 bit vectors, for ST3 insns. */
4236 static
math_INTERLEAVE3_64(IRTemp * i0,IRTemp * i1,IRTemp * i2,UInt laneSzBlg2,IRTemp u0,IRTemp u1,IRTemp u2)4237 void math_INTERLEAVE3_64(
4238 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
4239 UInt laneSzBlg2,
4240 IRTemp u0, IRTemp u1, IRTemp u2 )
4241 {
4242 if (laneSzBlg2 == 3) {
4243 // 1x64, degenerate case
4244 assign(*i0, EX(u0));
4245 assign(*i1, EX(u1));
4246 assign(*i2, EX(u2));
4247 return;
4248 }
4249
4250 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4251 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4252 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4253
4254 IRTemp du0 = newTempV128();
4255 IRTemp du1 = newTempV128();
4256 IRTemp du2 = newTempV128();
4257 assign(du0, binop(doubler, EX(u0), EX(u0)));
4258 assign(du1, binop(doubler, EX(u1), EX(u1)));
4259 assign(du2, binop(doubler, EX(u2), EX(u2)));
4260 IRTemp di0 = newTempV128();
4261 IRTemp di1 = newTempV128();
4262 IRTemp di2 = newTempV128();
4263 math_INTERLEAVE3_128(&di0, &di1, &di2, laneSzBlg2 + 1, du0, du1, du2);
4264 assign(*i0, binop(halver, EX(di0), EX(di0)));
4265 assign(*i1, binop(halver, EX(di1), EX(di1)));
4266 assign(*i2, binop(halver, EX(di2), EX(di2)));
4267 }
4268
4269
4270 /* Do interleaving for 4 64 bit vectors, for ST4 insns. */
4271 static
math_INTERLEAVE4_64(IRTemp * i0,IRTemp * i1,IRTemp * i2,IRTemp * i3,UInt laneSzBlg2,IRTemp u0,IRTemp u1,IRTemp u2,IRTemp u3)4272 void math_INTERLEAVE4_64(
4273 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
4274 UInt laneSzBlg2,
4275 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
4276 {
4277 if (laneSzBlg2 == 3) {
4278 // 1x64, degenerate case
4279 assign(*i0, EX(u0));
4280 assign(*i1, EX(u1));
4281 assign(*i2, EX(u2));
4282 assign(*i3, EX(u3));
4283 return;
4284 }
4285
4286 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4287 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4288 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4289
4290 IRTemp du0 = newTempV128();
4291 IRTemp du1 = newTempV128();
4292 IRTemp du2 = newTempV128();
4293 IRTemp du3 = newTempV128();
4294 assign(du0, binop(doubler, EX(u0), EX(u0)));
4295 assign(du1, binop(doubler, EX(u1), EX(u1)));
4296 assign(du2, binop(doubler, EX(u2), EX(u2)));
4297 assign(du3, binop(doubler, EX(u3), EX(u3)));
4298 IRTemp di0 = newTempV128();
4299 IRTemp di1 = newTempV128();
4300 IRTemp di2 = newTempV128();
4301 IRTemp di3 = newTempV128();
4302 math_INTERLEAVE4_128(&di0, &di1, &di2, &di3,
4303 laneSzBlg2 + 1, du0, du1, du2, du3);
4304 assign(*i0, binop(halver, EX(di0), EX(di0)));
4305 assign(*i1, binop(halver, EX(di1), EX(di1)));
4306 assign(*i2, binop(halver, EX(di2), EX(di2)));
4307 assign(*i3, binop(halver, EX(di3), EX(di3)));
4308 }
4309
4310
4311 /* Do deinterleaving for 1 64 bit vector, for LD1 insns. */
4312 static
math_DEINTERLEAVE1_64(IRTemp * u0,UInt laneSzBlg2,IRTemp i0)4313 void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp* u0,
4314 UInt laneSzBlg2, IRTemp i0 )
4315 {
4316 assign(*u0, mkexpr(i0));
4317 }
4318
4319
4320 /* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */
4321 static
math_DEINTERLEAVE2_64(IRTemp * u0,IRTemp * u1,UInt laneSzBlg2,IRTemp i0,IRTemp i1)4322 void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
4323 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
4324 {
4325 if (laneSzBlg2 == 3) {
4326 // 1x64, degenerate case
4327 assign(*u0, EX(i0));
4328 assign(*u1, EX(i1));
4329 return;
4330 }
4331
4332 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4333 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4334 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4335
4336 IRTemp di0 = newTempV128();
4337 IRTemp di1 = newTempV128();
4338 assign(di0, binop(doubler, EX(i0), EX(i0)));
4339 assign(di1, binop(doubler, EX(i1), EX(i1)));
4340
4341 IRTemp du0 = newTempV128();
4342 IRTemp du1 = newTempV128();
4343 math_DEINTERLEAVE2_128(&du0, &du1, laneSzBlg2 + 1, di0, di1);
4344 assign(*u0, binop(halver, EX(du0), EX(du0)));
4345 assign(*u1, binop(halver, EX(du1), EX(du1)));
4346 }
4347
4348
4349 /* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */
4350 static
math_DEINTERLEAVE3_64(IRTemp * u0,IRTemp * u1,IRTemp * u2,UInt laneSzBlg2,IRTemp i0,IRTemp i1,IRTemp i2)4351 void math_DEINTERLEAVE3_64(
4352 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4353 UInt laneSzBlg2,
4354 IRTemp i0, IRTemp i1, IRTemp i2 )
4355 {
4356 if (laneSzBlg2 == 3) {
4357 // 1x64, degenerate case
4358 assign(*u0, EX(i0));
4359 assign(*u1, EX(i1));
4360 assign(*u2, EX(i2));
4361 return;
4362 }
4363
4364 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4365 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4366 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4367
4368 IRTemp di0 = newTempV128();
4369 IRTemp di1 = newTempV128();
4370 IRTemp di2 = newTempV128();
4371 assign(di0, binop(doubler, EX(i0), EX(i0)));
4372 assign(di1, binop(doubler, EX(i1), EX(i1)));
4373 assign(di2, binop(doubler, EX(i2), EX(i2)));
4374 IRTemp du0 = newTempV128();
4375 IRTemp du1 = newTempV128();
4376 IRTemp du2 = newTempV128();
4377 math_DEINTERLEAVE3_128(&du0, &du1, &du2, laneSzBlg2 + 1, di0, di1, di2);
4378 assign(*u0, binop(halver, EX(du0), EX(du0)));
4379 assign(*u1, binop(halver, EX(du1), EX(du1)));
4380 assign(*u2, binop(halver, EX(du2), EX(du2)));
4381 }
4382
4383
4384 /* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */
4385 static
math_DEINTERLEAVE4_64(IRTemp * u0,IRTemp * u1,IRTemp * u2,IRTemp * u3,UInt laneSzBlg2,IRTemp i0,IRTemp i1,IRTemp i2,IRTemp i3)4386 void math_DEINTERLEAVE4_64(
4387 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4388 UInt laneSzBlg2,
4389 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4390 {
4391 if (laneSzBlg2 == 3) {
4392 // 1x64, degenerate case
4393 assign(*u0, EX(i0));
4394 assign(*u1, EX(i1));
4395 assign(*u2, EX(i2));
4396 assign(*u3, EX(i3));
4397 return;
4398 }
4399
4400 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4401 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4402 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4403
4404 IRTemp di0 = newTempV128();
4405 IRTemp di1 = newTempV128();
4406 IRTemp di2 = newTempV128();
4407 IRTemp di3 = newTempV128();
4408 assign(di0, binop(doubler, EX(i0), EX(i0)));
4409 assign(di1, binop(doubler, EX(i1), EX(i1)));
4410 assign(di2, binop(doubler, EX(i2), EX(i2)));
4411 assign(di3, binop(doubler, EX(i3), EX(i3)));
4412 IRTemp du0 = newTempV128();
4413 IRTemp du1 = newTempV128();
4414 IRTemp du2 = newTempV128();
4415 IRTemp du3 = newTempV128();
4416 math_DEINTERLEAVE4_128(&du0, &du1, &du2, &du3,
4417 laneSzBlg2 + 1, di0, di1, di2, di3);
4418 assign(*u0, binop(halver, EX(du0), EX(du0)));
4419 assign(*u1, binop(halver, EX(du1), EX(du1)));
4420 assign(*u2, binop(halver, EX(du2), EX(du2)));
4421 assign(*u3, binop(halver, EX(du3), EX(du3)));
4422 }
4423
4424
4425 #undef EX
4426 #undef SL
4427 #undef ROR
4428 #undef ROL
4429 #undef SHR
4430 #undef SHL
4431 #undef ILO64x2
4432 #undef IHI64x2
4433 #undef ILO32x4
4434 #undef IHI32x4
4435 #undef ILO16x8
4436 #undef IHI16x8
4437 #undef ILO16x8
4438 #undef IHI16x8
4439 #undef CEV32x4
4440 #undef COD32x4
4441 #undef COD16x8
4442 #undef COD8x16
4443 #undef CEV8x16
4444 #undef AND
4445 #undef OR2
4446 #undef OR3
4447 #undef OR4
4448
4449
4450 /*------------------------------------------------------------*/
4451 /*--- Load and Store instructions ---*/
4452 /*------------------------------------------------------------*/
4453
4454 /* Generate the EA for a "reg + reg" style amode. This is done from
4455 parts of the insn, but for sanity checking sake it takes the whole
4456 insn. This appears to depend on insn[15:12], with opt=insn[15:13]
4457 and S=insn[12]:
4458
4459 The possible forms, along with their opt:S values, are:
4460 011:0 Xn|SP + Xm
4461 111:0 Xn|SP + Xm
4462 011:1 Xn|SP + Xm * transfer_szB
4463 111:1 Xn|SP + Xm * transfer_szB
4464 010:0 Xn|SP + 32Uto64(Wm)
4465 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
4466 110:0 Xn|SP + 32Sto64(Wm)
4467 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
4468
4469 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
4470 the transfer size is insn[23,31,30]. For integer loads/stores,
4471 insn[23] is zero, hence szLg2 can be at most 3 in such cases.
4472
4473 If the decoding fails, it returns IRTemp_INVALID.
4474
4475 isInt is True iff this is decoding is for transfers to/from integer
4476 registers. If False it is for transfers to/from vector registers.
4477 */
gen_indexed_EA(HChar * buf,UInt insn,Bool isInt)4478 static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
4479 {
4480 UInt optS = SLICE_UInt(insn, 15, 12);
4481 UInt mm = SLICE_UInt(insn, 20, 16);
4482 UInt nn = SLICE_UInt(insn, 9, 5);
4483 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
4484 | SLICE_UInt(insn, 31, 30); // Log2 of the size
4485
4486 buf[0] = 0;
4487
4488 /* Sanity checks, that this really is a load/store insn. */
4489 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
4490 goto fail;
4491
4492 if (isInt
4493 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
4494 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
4495 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
4496 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
4497 goto fail;
4498
4499 if (!isInt
4500 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
4501 goto fail;
4502
4503 /* Throw out non-verified but possibly valid cases. */
4504 switch (szLg2) {
4505 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
4506 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
4507 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
4508 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
4509 case BITS3(1,0,0): // can only ever be valid for the vector case
4510 if (isInt) goto fail; else break;
4511 case BITS3(1,0,1): // these sizes are never valid
4512 case BITS3(1,1,0):
4513 case BITS3(1,1,1): goto fail;
4514
4515 default: vassert(0);
4516 }
4517
4518 IRExpr* rhs = NULL;
4519 switch (optS) {
4520 case BITS4(1,1,1,0): goto fail; //ATC
4521 case BITS4(0,1,1,0):
4522 rhs = getIReg64orZR(mm);
4523 vex_sprintf(buf, "[%s, %s]",
4524 nameIReg64orZR(nn), nameIReg64orZR(mm));
4525 break;
4526 case BITS4(1,1,1,1): goto fail; //ATC
4527 case BITS4(0,1,1,1):
4528 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
4529 vex_sprintf(buf, "[%s, %s lsl %u]",
4530 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
4531 break;
4532 case BITS4(0,1,0,0):
4533 rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
4534 vex_sprintf(buf, "[%s, %s uxtx]",
4535 nameIReg64orZR(nn), nameIReg32orZR(mm));
4536 break;
4537 case BITS4(0,1,0,1):
4538 rhs = binop(Iop_Shl64,
4539 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
4540 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
4541 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4542 break;
4543 case BITS4(1,1,0,0):
4544 rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
4545 vex_sprintf(buf, "[%s, %s sxtx]",
4546 nameIReg64orZR(nn), nameIReg32orZR(mm));
4547 break;
4548 case BITS4(1,1,0,1):
4549 rhs = binop(Iop_Shl64,
4550 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
4551 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
4552 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4553 break;
4554 default:
4555 /* The rest appear to be genuinely invalid */
4556 goto fail;
4557 }
4558
4559 vassert(rhs);
4560 IRTemp res = newTemp(Ity_I64);
4561 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
4562 return res;
4563
4564 fail:
4565 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
4566 return IRTemp_INVALID;
4567 }
4568
4569
4570 /* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
4571 bits of DATAE :: Ity_I64. */
gen_narrowing_store(UInt szB,IRTemp addr,IRExpr * dataE)4572 static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
4573 {
4574 IRExpr* addrE = mkexpr(addr);
4575 switch (szB) {
4576 case 8:
4577 storeLE(addrE, dataE);
4578 break;
4579 case 4:
4580 storeLE(addrE, unop(Iop_64to32, dataE));
4581 break;
4582 case 2:
4583 storeLE(addrE, unop(Iop_64to16, dataE));
4584 break;
4585 case 1:
4586 storeLE(addrE, unop(Iop_64to8, dataE));
4587 break;
4588 default:
4589 vassert(0);
4590 }
4591 }
4592
4593
4594 /* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
4595 placing the result in an Ity_I64 temporary. */
gen_zwidening_load(UInt szB,IRTemp addr)4596 static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
4597 {
4598 IRTemp res = newTemp(Ity_I64);
4599 IRExpr* addrE = mkexpr(addr);
4600 switch (szB) {
4601 case 8:
4602 assign(res, loadLE(Ity_I64,addrE));
4603 break;
4604 case 4:
4605 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
4606 break;
4607 case 2:
4608 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
4609 break;
4610 case 1:
4611 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
4612 break;
4613 default:
4614 vassert(0);
4615 }
4616 return res;
4617 }
4618
4619
4620 /* Generate a "standard 7" name, from bitQ and size. But also
4621 allow ".1d" since that's occasionally useful. */
4622 static
nameArr_Q_SZ(UInt bitQ,UInt size)4623 const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
4624 {
4625 vassert(bitQ <= 1 && size <= 3);
4626 const HChar* nms[8]
4627 = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
4628 UInt ix = (bitQ << 2) | size;
4629 vassert(ix < 8);
4630 return nms[ix];
4631 }
4632
4633
4634 static
dis_ARM64_load_store(DisResult * dres,UInt insn)4635 Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
4636 {
4637 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
4638
4639 /* ------------ LDR,STR (immediate, uimm12) ----------- */
4640 /* uimm12 is scaled by the transfer size
4641
4642 31 29 26 21 9 4
4643 | | | | | |
4644 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
4645 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
4646
4647 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
4648 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
4649
4650 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
4651 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
4652
4653 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
4654 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
4655 */
4656 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
4657 UInt szLg2 = INSN(31,30);
4658 UInt szB = 1 << szLg2;
4659 Bool isLD = INSN(22,22) == 1;
4660 UInt offs = INSN(21,10) * szB;
4661 UInt nn = INSN(9,5);
4662 UInt tt = INSN(4,0);
4663 IRTemp ta = newTemp(Ity_I64);
4664 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
4665 if (nn == 31) { /* FIXME generate stack alignment check */ }
4666 vassert(szLg2 < 4);
4667 if (isLD) {
4668 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
4669 } else {
4670 gen_narrowing_store(szB, ta, getIReg64orZR(tt));
4671 }
4672 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
4673 const HChar* st_name[4] = { "strb", "strh", "str", "str" };
4674 DIP("%s %s, [%s, #%u]\n",
4675 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
4676 nameIReg64orSP(nn), offs);
4677 return True;
4678 }
4679
4680 /* ------------ LDUR,STUR (immediate, simm9) ----------- */
4681 /*
4682 31 29 26 20 11 9 4
4683 | | | | | | |
4684 (at-Rn-then-Rn=EA) | | |
4685 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
4686 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
4687
4688 (at-EA-then-Rn=EA)
4689 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
4690 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
4691
4692 (at-EA)
4693 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
4694 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
4695
4696 simm9 is unscaled.
4697
4698 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
4699 load case this is because would create two competing values for
4700 Rt. In the store case the reason is unclear, but the spec
4701 disallows it anyway.
4702
4703 Stores are narrowing, loads are unsigned widening. sz encodes
4704 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
4705 */
4706 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
4707 == BITS9(1,1,1, 0,0,0,0,0, 0)) {
4708 UInt szLg2 = INSN(31,30);
4709 UInt szB = 1 << szLg2;
4710 Bool isLoad = INSN(22,22) == 1;
4711 UInt imm9 = INSN(20,12);
4712 UInt nn = INSN(9,5);
4713 UInt tt = INSN(4,0);
4714 Bool wBack = INSN(10,10) == 1;
4715 UInt how = INSN(11,10);
4716 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
4717 /* undecodable; fall through */
4718 } else {
4719 if (nn == 31) { /* FIXME generate stack alignment check */ }
4720
4721 // Compute the transfer address TA and the writeback address WA.
4722 IRTemp tRN = newTemp(Ity_I64);
4723 assign(tRN, getIReg64orSP(nn));
4724 IRTemp tEA = newTemp(Ity_I64);
4725 Long simm9 = (Long)sx_to_64(imm9, 9);
4726 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4727
4728 IRTemp tTA = newTemp(Ity_I64);
4729 IRTemp tWA = newTemp(Ity_I64);
4730 switch (how) {
4731 case BITS2(0,1):
4732 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4733 case BITS2(1,1):
4734 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4735 case BITS2(0,0):
4736 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4737 default:
4738 vassert(0); /* NOTREACHED */
4739 }
4740
4741 /* Normally rN would be updated after the transfer. However, in
4742 the special case typifed by
4743 str x30, [sp,#-16]!
4744 it is necessary to update SP before the transfer, (1)
4745 because Memcheck will otherwise complain about a write
4746 below the stack pointer, and (2) because the segfault
4747 stack extension mechanism will otherwise extend the stack
4748 only down to SP before the instruction, which might not be
4749 far enough, if the -16 bit takes the actual access
4750 address to the next page.
4751 */
4752 Bool earlyWBack
4753 = wBack && simm9 < 0 && szB == 8
4754 && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
4755
4756 if (wBack && earlyWBack)
4757 putIReg64orSP(nn, mkexpr(tEA));
4758
4759 if (isLoad) {
4760 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
4761 } else {
4762 gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
4763 }
4764
4765 if (wBack && !earlyWBack)
4766 putIReg64orSP(nn, mkexpr(tEA));
4767
4768 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
4769 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
4770 const HChar* fmt_str = NULL;
4771 switch (how) {
4772 case BITS2(0,1):
4773 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4774 break;
4775 case BITS2(1,1):
4776 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4777 break;
4778 case BITS2(0,0):
4779 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
4780 break;
4781 default:
4782 vassert(0);
4783 }
4784 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
4785 nameIRegOrZR(szB == 8, tt),
4786 nameIReg64orSP(nn), simm9);
4787 return True;
4788 }
4789 }
4790
4791 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
4792 /* L==1 => mm==LD
4793 L==0 => mm==ST
4794 x==0 => 32 bit transfers, and zero extended loads
4795 x==1 => 64 bit transfers
4796 simm7 is scaled by the (single-register) transfer size
4797
4798 (at-Rn-then-Rn=EA)
4799 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
4800
4801 (at-EA-then-Rn=EA)
4802 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
4803
4804 (at-EA)
4805 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
4806 */
4807
4808 UInt insn_30_23 = INSN(30,23);
4809 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
4810 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
4811 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
4812 UInt bL = INSN(22,22);
4813 UInt bX = INSN(31,31);
4814 UInt bWBack = INSN(23,23);
4815 UInt rT1 = INSN(4,0);
4816 UInt rN = INSN(9,5);
4817 UInt rT2 = INSN(14,10);
4818 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
4819 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
4820 || (bL && rT1 == rT2)) {
4821 /* undecodable; fall through */
4822 } else {
4823 if (rN == 31) { /* FIXME generate stack alignment check */ }
4824
4825 // Compute the transfer address TA and the writeback address WA.
4826 IRTemp tRN = newTemp(Ity_I64);
4827 assign(tRN, getIReg64orSP(rN));
4828 IRTemp tEA = newTemp(Ity_I64);
4829 simm7 = (bX ? 8 : 4) * simm7;
4830 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
4831
4832 IRTemp tTA = newTemp(Ity_I64);
4833 IRTemp tWA = newTemp(Ity_I64);
4834 switch (INSN(24,23)) {
4835 case BITS2(0,1):
4836 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4837 case BITS2(1,1):
4838 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4839 case BITS2(1,0):
4840 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4841 default:
4842 vassert(0); /* NOTREACHED */
4843 }
4844
4845 /* Normally rN would be updated after the transfer. However, in
4846 the special case typifed by
4847 stp x29, x30, [sp,#-112]!
4848 it is necessary to update SP before the transfer, (1)
4849 because Memcheck will otherwise complain about a write
4850 below the stack pointer, and (2) because the segfault
4851 stack extension mechanism will otherwise extend the stack
4852 only down to SP before the instruction, which might not be
4853 far enough, if the -112 bit takes the actual access
4854 address to the next page.
4855 */
4856 Bool earlyWBack
4857 = bWBack && simm7 < 0
4858 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
4859
4860 if (bWBack && earlyWBack)
4861 putIReg64orSP(rN, mkexpr(tEA));
4862
4863 /**/ if (bL == 1 && bX == 1) {
4864 // 64 bit load
4865 putIReg64orZR(rT1, loadLE(Ity_I64,
4866 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4867 putIReg64orZR(rT2, loadLE(Ity_I64,
4868 binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
4869 } else if (bL == 1 && bX == 0) {
4870 // 32 bit load
4871 putIReg32orZR(rT1, loadLE(Ity_I32,
4872 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4873 putIReg32orZR(rT2, loadLE(Ity_I32,
4874 binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
4875 } else if (bL == 0 && bX == 1) {
4876 // 64 bit store
4877 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4878 getIReg64orZR(rT1));
4879 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
4880 getIReg64orZR(rT2));
4881 } else {
4882 vassert(bL == 0 && bX == 0);
4883 // 32 bit store
4884 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4885 getIReg32orZR(rT1));
4886 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
4887 getIReg32orZR(rT2));
4888 }
4889
4890 if (bWBack && !earlyWBack)
4891 putIReg64orSP(rN, mkexpr(tEA));
4892
4893 const HChar* fmt_str = NULL;
4894 switch (INSN(24,23)) {
4895 case BITS2(0,1):
4896 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4897 break;
4898 case BITS2(1,1):
4899 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4900 break;
4901 case BITS2(1,0):
4902 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
4903 break;
4904 default:
4905 vassert(0);
4906 }
4907 DIP(fmt_str, bL == 0 ? "st" : "ld",
4908 nameIRegOrZR(bX == 1, rT1),
4909 nameIRegOrZR(bX == 1, rT2),
4910 nameIReg64orSP(rN), simm7);
4911 return True;
4912 }
4913 }
4914
4915 /* ---------------- LDR (literal, int reg) ---------------- */
4916 /* 31 29 23 4
4917 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
4918 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
4919 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
4920 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
4921 Just handles the first two cases for now.
4922 */
4923 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
4924 UInt imm19 = INSN(23,5);
4925 UInt rT = INSN(4,0);
4926 UInt bX = INSN(30,30);
4927 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
4928 if (bX) {
4929 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
4930 } else {
4931 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
4932 }
4933 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
4934 return True;
4935 }
4936
4937 /* -------------- {LD,ST}R (integer register) --------------- */
4938 /* 31 29 20 15 12 11 9 4
4939 | | | | | | | |
4940 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
4941 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
4942 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
4943 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
4944
4945 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
4946 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
4947 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
4948 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
4949 */
4950 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
4951 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
4952 HChar dis_buf[64];
4953 UInt szLg2 = INSN(31,30);
4954 Bool isLD = INSN(22,22) == 1;
4955 UInt tt = INSN(4,0);
4956 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
4957 if (ea != IRTemp_INVALID) {
4958 switch (szLg2) {
4959 case 3: /* 64 bit */
4960 if (isLD) {
4961 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
4962 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
4963 } else {
4964 storeLE(mkexpr(ea), getIReg64orZR(tt));
4965 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
4966 }
4967 break;
4968 case 2: /* 32 bit */
4969 if (isLD) {
4970 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
4971 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
4972 } else {
4973 storeLE(mkexpr(ea), getIReg32orZR(tt));
4974 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
4975 }
4976 break;
4977 case 1: /* 16 bit */
4978 if (isLD) {
4979 putIReg64orZR(tt, unop(Iop_16Uto64,
4980 loadLE(Ity_I16, mkexpr(ea))));
4981 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
4982 } else {
4983 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
4984 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
4985 }
4986 break;
4987 case 0: /* 8 bit */
4988 if (isLD) {
4989 putIReg64orZR(tt, unop(Iop_8Uto64,
4990 loadLE(Ity_I8, mkexpr(ea))));
4991 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
4992 } else {
4993 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
4994 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
4995 }
4996 break;
4997 default:
4998 vassert(0);
4999 }
5000 return True;
5001 }
5002 }
5003
5004 /* -------------- LDRS{B,H,W} (uimm12) -------------- */
5005 /* 31 29 26 23 21 9 4
5006 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
5007 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
5008 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
5009 where
5010 Rt is Wt when x==1, Xt when x==0
5011 */
5012 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
5013 /* Further checks on bits 31:30 and 22 */
5014 Bool valid = False;
5015 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5016 case BITS3(1,0,0):
5017 case BITS3(0,1,0): case BITS3(0,1,1):
5018 case BITS3(0,0,0): case BITS3(0,0,1):
5019 valid = True;
5020 break;
5021 }
5022 if (valid) {
5023 UInt szLg2 = INSN(31,30);
5024 UInt bitX = INSN(22,22);
5025 UInt imm12 = INSN(21,10);
5026 UInt nn = INSN(9,5);
5027 UInt tt = INSN(4,0);
5028 UInt szB = 1 << szLg2;
5029 IRExpr* ea = binop(Iop_Add64,
5030 getIReg64orSP(nn), mkU64(imm12 * szB));
5031 switch (szB) {
5032 case 4:
5033 vassert(bitX == 0);
5034 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
5035 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
5036 nameIReg64orSP(nn), imm12 * szB);
5037 break;
5038 case 2:
5039 if (bitX == 1) {
5040 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
5041 } else {
5042 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
5043 }
5044 DIP("ldrsh %s, [%s, #%u]\n",
5045 nameIRegOrZR(bitX == 0, tt),
5046 nameIReg64orSP(nn), imm12 * szB);
5047 break;
5048 case 1:
5049 if (bitX == 1) {
5050 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
5051 } else {
5052 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
5053 }
5054 DIP("ldrsb %s, [%s, #%u]\n",
5055 nameIRegOrZR(bitX == 0, tt),
5056 nameIReg64orSP(nn), imm12 * szB);
5057 break;
5058 default:
5059 vassert(0);
5060 }
5061 return True;
5062 }
5063 /* else fall through */
5064 }
5065
5066 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
5067 /* (at-Rn-then-Rn=EA)
5068 31 29 23 21 20 11 9 4
5069 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
5070 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
5071 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
5072
5073 (at-EA-then-Rn=EA)
5074 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
5075 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
5076 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
5077 where
5078 Rt is Wt when x==1, Xt when x==0
5079 transfer-at-Rn when [11]==0, at EA when [11]==1
5080 */
5081 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5082 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5083 /* Further checks on bits 31:30 and 22 */
5084 Bool valid = False;
5085 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5086 case BITS3(1,0,0): // LDRSW Xt
5087 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
5088 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
5089 valid = True;
5090 break;
5091 }
5092 if (valid) {
5093 UInt szLg2 = INSN(31,30);
5094 UInt imm9 = INSN(20,12);
5095 Bool atRN = INSN(11,11) == 0;
5096 UInt nn = INSN(9,5);
5097 UInt tt = INSN(4,0);
5098 IRTemp tRN = newTemp(Ity_I64);
5099 IRTemp tEA = newTemp(Ity_I64);
5100 IRTemp tTA = IRTemp_INVALID;
5101 ULong simm9 = sx_to_64(imm9, 9);
5102 Bool is64 = INSN(22,22) == 0;
5103 assign(tRN, getIReg64orSP(nn));
5104 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5105 tTA = atRN ? tRN : tEA;
5106 HChar ch = '?';
5107 /* There are 5 cases:
5108 byte load, SX to 64
5109 byte load, SX to 32, ZX to 64
5110 halfword load, SX to 64
5111 halfword load, SX to 32, ZX to 64
5112 word load, SX to 64
5113 The ifs below handle them in the listed order.
5114 */
5115 if (szLg2 == 0) {
5116 ch = 'b';
5117 if (is64) {
5118 putIReg64orZR(tt, unop(Iop_8Sto64,
5119 loadLE(Ity_I8, mkexpr(tTA))));
5120 } else {
5121 putIReg32orZR(tt, unop(Iop_8Sto32,
5122 loadLE(Ity_I8, mkexpr(tTA))));
5123 }
5124 }
5125 else if (szLg2 == 1) {
5126 ch = 'h';
5127 if (is64) {
5128 putIReg64orZR(tt, unop(Iop_16Sto64,
5129 loadLE(Ity_I16, mkexpr(tTA))));
5130 } else {
5131 putIReg32orZR(tt, unop(Iop_16Sto32,
5132 loadLE(Ity_I16, mkexpr(tTA))));
5133 }
5134 }
5135 else if (szLg2 == 2 && is64) {
5136 ch = 'w';
5137 putIReg64orZR(tt, unop(Iop_32Sto64,
5138 loadLE(Ity_I32, mkexpr(tTA))));
5139 }
5140 else {
5141 vassert(0);
5142 }
5143 putIReg64orSP(nn, mkexpr(tEA));
5144 DIP(atRN ? "ldrs%c %s, [%s], #%llu\n" : "ldrs%c %s, [%s, #%llu]!",
5145 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
5146 return True;
5147 }
5148 /* else fall through */
5149 }
5150
5151 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
5152 /* 31 29 23 21 20 11 9 4
5153 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
5154 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
5155 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
5156 where
5157 Rt is Wt when x==1, Xt when x==0
5158 */
5159 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5160 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5161 /* Further checks on bits 31:30 and 22 */
5162 Bool valid = False;
5163 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5164 case BITS3(1,0,0): // LDURSW Xt
5165 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
5166 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
5167 valid = True;
5168 break;
5169 }
5170 if (valid) {
5171 UInt szLg2 = INSN(31,30);
5172 UInt imm9 = INSN(20,12);
5173 UInt nn = INSN(9,5);
5174 UInt tt = INSN(4,0);
5175 IRTemp tRN = newTemp(Ity_I64);
5176 IRTemp tEA = newTemp(Ity_I64);
5177 ULong simm9 = sx_to_64(imm9, 9);
5178 Bool is64 = INSN(22,22) == 0;
5179 assign(tRN, getIReg64orSP(nn));
5180 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5181 HChar ch = '?';
5182 /* There are 5 cases:
5183 byte load, SX to 64
5184 byte load, SX to 32, ZX to 64
5185 halfword load, SX to 64
5186 halfword load, SX to 32, ZX to 64
5187 word load, SX to 64
5188 The ifs below handle them in the listed order.
5189 */
5190 if (szLg2 == 0) {
5191 ch = 'b';
5192 if (is64) {
5193 putIReg64orZR(tt, unop(Iop_8Sto64,
5194 loadLE(Ity_I8, mkexpr(tEA))));
5195 } else {
5196 putIReg32orZR(tt, unop(Iop_8Sto32,
5197 loadLE(Ity_I8, mkexpr(tEA))));
5198 }
5199 }
5200 else if (szLg2 == 1) {
5201 ch = 'h';
5202 if (is64) {
5203 putIReg64orZR(tt, unop(Iop_16Sto64,
5204 loadLE(Ity_I16, mkexpr(tEA))));
5205 } else {
5206 putIReg32orZR(tt, unop(Iop_16Sto32,
5207 loadLE(Ity_I16, mkexpr(tEA))));
5208 }
5209 }
5210 else if (szLg2 == 2 && is64) {
5211 ch = 'w';
5212 putIReg64orZR(tt, unop(Iop_32Sto64,
5213 loadLE(Ity_I32, mkexpr(tEA))));
5214 }
5215 else {
5216 vassert(0);
5217 }
5218 DIP("ldurs%c %s, [%s, #%lld]",
5219 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), (Long)simm9);
5220 return True;
5221 }
5222 /* else fall through */
5223 }
5224
5225 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
5226 /* L==1 => mm==LD
5227 L==0 => mm==ST
5228 sz==00 => 32 bit (S) transfers
5229 sz==01 => 64 bit (D) transfers
5230 sz==10 => 128 bit (Q) transfers
5231 sz==11 isn't allowed
5232 simm7 is scaled by the (single-register) transfer size
5233
5234 31 29 26 22 21 14 9 4
5235
5236 sz 101 1000 L imm7 t2 n t1 mmNP SDQt1, SDQt2, [Xn|SP, #imm]
5237 (at-EA, with nontemporal hint)
5238
5239 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
5240 (at-Rn-then-Rn=EA)
5241
5242 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
5243 (at-EA)
5244
5245 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
5246 (at-EA-then-Rn=EA)
5247 */
5248 if (INSN(29,25) == BITS5(1,0,1,1,0)) {
5249 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
5250 Bool isLD = INSN(22,22) == 1;
5251 Bool wBack = INSN(23,23) == 1;
5252 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
5253 UInt tt2 = INSN(14,10);
5254 UInt nn = INSN(9,5);
5255 UInt tt1 = INSN(4,0);
5256 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
5257 /* undecodable; fall through */
5258 } else {
5259 if (nn == 31) { /* FIXME generate stack alignment check */ }
5260
5261 // Compute the transfer address TA and the writeback address WA.
5262 UInt szB = 4 << szSlg2; /* szB is the per-register size */
5263 IRTemp tRN = newTemp(Ity_I64);
5264 assign(tRN, getIReg64orSP(nn));
5265 IRTemp tEA = newTemp(Ity_I64);
5266 simm7 = szB * simm7;
5267 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5268
5269 IRTemp tTA = newTemp(Ity_I64);
5270 IRTemp tWA = newTemp(Ity_I64);
5271 switch (INSN(24,23)) {
5272 case BITS2(0,1):
5273 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5274 case BITS2(1,1):
5275 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5276 case BITS2(1,0):
5277 case BITS2(0,0):
5278 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5279 default:
5280 vassert(0); /* NOTREACHED */
5281 }
5282
5283 IRType ty = Ity_INVALID;
5284 switch (szB) {
5285 case 4: ty = Ity_F32; break;
5286 case 8: ty = Ity_F64; break;
5287 case 16: ty = Ity_V128; break;
5288 default: vassert(0);
5289 }
5290
5291 /* Normally rN would be updated after the transfer. However, in
5292 the special cases typifed by
5293 stp q0, q1, [sp,#-512]!
5294 stp d0, d1, [sp,#-512]!
5295 stp s0, s1, [sp,#-512]!
5296 it is necessary to update SP before the transfer, (1)
5297 because Memcheck will otherwise complain about a write
5298 below the stack pointer, and (2) because the segfault
5299 stack extension mechanism will otherwise extend the stack
5300 only down to SP before the instruction, which might not be
5301 far enough, if the -512 bit takes the actual access
5302 address to the next page.
5303 */
5304 Bool earlyWBack
5305 = wBack && simm7 < 0
5306 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
5307
5308 if (wBack && earlyWBack)
5309 putIReg64orSP(nn, mkexpr(tEA));
5310
5311 if (isLD) {
5312 if (szB < 16) {
5313 putQReg128(tt1, mkV128(0x0000));
5314 }
5315 putQRegLO(tt1,
5316 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
5317 if (szB < 16) {
5318 putQReg128(tt2, mkV128(0x0000));
5319 }
5320 putQRegLO(tt2,
5321 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
5322 } else {
5323 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
5324 getQRegLO(tt1, ty));
5325 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
5326 getQRegLO(tt2, ty));
5327 }
5328
5329 if (wBack && !earlyWBack)
5330 putIReg64orSP(nn, mkexpr(tEA));
5331
5332 const HChar* fmt_str = NULL;
5333 switch (INSN(24,23)) {
5334 case BITS2(0,1):
5335 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5336 break;
5337 case BITS2(1,1):
5338 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5339 break;
5340 case BITS2(1,0):
5341 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
5342 break;
5343 case BITS2(0,0):
5344 fmt_str = "%snp %s, %s, [%s, #%lld] (at-Rn)\n";
5345 break;
5346 default:
5347 vassert(0);
5348 }
5349 DIP(fmt_str, isLD ? "ld" : "st",
5350 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
5351 nameIReg64orSP(nn), simm7);
5352 return True;
5353 }
5354 }
5355
5356 /* -------------- {LD,ST}R (vector register) --------------- */
5357 /* 31 29 23 20 15 12 11 9 4
5358 | | | | | | | | |
5359 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
5360 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
5361 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
5362 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
5363 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
5364
5365 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
5366 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
5367 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
5368 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
5369 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
5370 */
5371 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5372 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5373 HChar dis_buf[64];
5374 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5375 Bool isLD = INSN(22,22) == 1;
5376 UInt tt = INSN(4,0);
5377 if (szLg2 > 4) goto after_LDR_STR_vector_register;
5378 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
5379 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
5380 switch (szLg2) {
5381 case 0: /* 8 bit */
5382 if (isLD) {
5383 putQReg128(tt, mkV128(0x0000));
5384 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
5385 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5386 } else {
5387 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
5388 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5389 }
5390 break;
5391 case 1:
5392 if (isLD) {
5393 putQReg128(tt, mkV128(0x0000));
5394 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
5395 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5396 } else {
5397 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
5398 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5399 }
5400 break;
5401 case 2: /* 32 bit */
5402 if (isLD) {
5403 putQReg128(tt, mkV128(0x0000));
5404 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
5405 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5406 } else {
5407 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
5408 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5409 }
5410 break;
5411 case 3: /* 64 bit */
5412 if (isLD) {
5413 putQReg128(tt, mkV128(0x0000));
5414 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
5415 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5416 } else {
5417 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
5418 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5419 }
5420 break;
5421 case 4:
5422 if (isLD) {
5423 putQReg128(tt, loadLE(Ity_V128, mkexpr(ea)));
5424 DIP("ldr %s, %s\n", nameQReg128(tt), dis_buf);
5425 } else {
5426 storeLE(mkexpr(ea), getQReg128(tt));
5427 DIP("str %s, %s\n", nameQReg128(tt), dis_buf);
5428 }
5429 break;
5430 default:
5431 vassert(0);
5432 }
5433 return True;
5434 }
5435 after_LDR_STR_vector_register:
5436
5437 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
5438 /* 31 29 22 20 15 12 11 9 4
5439 | | | | | | | | |
5440 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
5441
5442 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
5443 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
5444
5445 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
5446 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
5447 */
5448 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5449 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5450 HChar dis_buf[64];
5451 UInt szLg2 = INSN(31,30);
5452 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
5453 UInt tt = INSN(4,0);
5454 if (szLg2 == 3) goto after_LDRS_integer_register;
5455 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5456 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
5457 /* Enumerate the 5 variants explicitly. */
5458 if (szLg2 == 2/*32 bit*/ && sxTo64) {
5459 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
5460 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
5461 return True;
5462 }
5463 else
5464 if (szLg2 == 1/*16 bit*/) {
5465 if (sxTo64) {
5466 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
5467 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
5468 } else {
5469 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
5470 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5471 }
5472 return True;
5473 }
5474 else
5475 if (szLg2 == 0/*8 bit*/) {
5476 if (sxTo64) {
5477 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
5478 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
5479 } else {
5480 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
5481 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5482 }
5483 return True;
5484 }
5485 /* else it's an invalid combination */
5486 }
5487 after_LDRS_integer_register:
5488
5489 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
5490 /* This is the Unsigned offset variant only. The Post-Index and
5491 Pre-Index variants are below.
5492
5493 31 29 23 21 9 4
5494 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
5495 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
5496 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
5497 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
5498 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
5499
5500 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
5501 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
5502 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
5503 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
5504 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
5505 */
5506 if (INSN(29,24) == BITS6(1,1,1,1,0,1)
5507 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
5508 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5509 Bool isLD = INSN(22,22) == 1;
5510 UInt pimm12 = INSN(21,10) << szLg2;
5511 UInt nn = INSN(9,5);
5512 UInt tt = INSN(4,0);
5513 IRTemp tEA = newTemp(Ity_I64);
5514 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5515 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
5516 if (isLD) {
5517 if (szLg2 < 4) {
5518 putQReg128(tt, mkV128(0x0000));
5519 }
5520 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5521 } else {
5522 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5523 }
5524 DIP("%s %s, [%s, #%u]\n",
5525 isLD ? "ldr" : "str",
5526 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
5527 return True;
5528 }
5529
5530 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
5531 /* These are the Post-Index and Pre-Index variants.
5532
5533 31 29 23 20 11 9 4
5534 (at-Rn-then-Rn=EA)
5535 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
5536 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
5537 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
5538 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
5539 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
5540
5541 (at-EA-then-Rn=EA)
5542 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
5543 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
5544 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
5545 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
5546 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
5547
5548 Stores are the same except with bit 22 set to 0.
5549 */
5550 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5551 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5552 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5553 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5554 Bool isLD = INSN(22,22) == 1;
5555 UInt imm9 = INSN(20,12);
5556 Bool atRN = INSN(11,11) == 0;
5557 UInt nn = INSN(9,5);
5558 UInt tt = INSN(4,0);
5559 IRTemp tRN = newTemp(Ity_I64);
5560 IRTemp tEA = newTemp(Ity_I64);
5561 IRTemp tTA = IRTemp_INVALID;
5562 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5563 ULong simm9 = sx_to_64(imm9, 9);
5564 assign(tRN, getIReg64orSP(nn));
5565 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5566 tTA = atRN ? tRN : tEA;
5567 if (isLD) {
5568 if (szLg2 < 4) {
5569 putQReg128(tt, mkV128(0x0000));
5570 }
5571 putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
5572 } else {
5573 storeLE(mkexpr(tTA), getQRegLO(tt, ty));
5574 }
5575 putIReg64orSP(nn, mkexpr(tEA));
5576 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
5577 isLD ? "ldr" : "str",
5578 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
5579 return True;
5580 }
5581
5582 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
5583 /* 31 29 23 20 11 9 4
5584 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
5585 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
5586 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
5587 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
5588 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
5589
5590 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
5591 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
5592 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
5593 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
5594 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
5595 */
5596 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5597 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5598 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5599 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5600 Bool isLD = INSN(22,22) == 1;
5601 UInt imm9 = INSN(20,12);
5602 UInt nn = INSN(9,5);
5603 UInt tt = INSN(4,0);
5604 ULong simm9 = sx_to_64(imm9, 9);
5605 IRTemp tEA = newTemp(Ity_I64);
5606 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5607 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
5608 if (isLD) {
5609 if (szLg2 < 4) {
5610 putQReg128(tt, mkV128(0x0000));
5611 }
5612 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5613 } else {
5614 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5615 }
5616 DIP("%s %s, [%s, #%lld]\n",
5617 isLD ? "ldur" : "stur",
5618 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
5619 return True;
5620 }
5621
5622 /* ---------------- LDR (literal, SIMD&FP) ---------------- */
5623 /* 31 29 23 4
5624 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
5625 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
5626 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
5627 */
5628 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
5629 UInt szB = 4 << INSN(31,30);
5630 UInt imm19 = INSN(23,5);
5631 UInt tt = INSN(4,0);
5632 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5633 IRType ty = preferredVectorSubTypeFromSize(szB);
5634 putQReg128(tt, mkV128(0x0000));
5635 putQRegLO(tt, loadLE(ty, mkU64(ea)));
5636 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
5637 return True;
5638 }
5639
5640 /* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg ------ */
5641 /* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */
5642 /* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */
5643 /* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */
5644 /* 31 29 26 22 21 20 15 11 9 4
5645
5646 0q 001 1000 L 0 00000 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP]
5647 0q 001 1001 L 0 m 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP], step
5648
5649 0q 001 1000 L 0 00000 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP]
5650 0q 001 1001 L 0 m 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP], step
5651
5652 0q 001 1000 L 0 00000 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP]
5653 0q 001 1001 L 0 m 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP], step
5654
5655 0q 001 1000 L 0 00000 0111 sz n t xx1 {Vt.T}, [Xn|SP]
5656 0q 001 1001 L 0 m 0111 sz n t xx1 {Vt.T}, [Xn|SP], step
5657
5658 T = defined by Q and sz in the normal way
5659 step = if m == 11111 then transfer-size else Xm
5660 xx = case L of 1 -> LD ; 0 -> ST
5661 */
5662 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
5663 && INSN(21,21) == 0) {
5664 Bool bitQ = INSN(30,30);
5665 Bool isPX = INSN(23,23) == 1;
5666 Bool isLD = INSN(22,22) == 1;
5667 UInt mm = INSN(20,16);
5668 UInt opc = INSN(15,12);
5669 UInt sz = INSN(11,10);
5670 UInt nn = INSN(9,5);
5671 UInt tt = INSN(4,0);
5672 Bool isQ = bitQ == 1;
5673 Bool is1d = sz == BITS2(1,1) && !isQ;
5674 UInt nRegs = 0;
5675 switch (opc) {
5676 case BITS4(0,0,0,0): nRegs = 4; break;
5677 case BITS4(0,1,0,0): nRegs = 3; break;
5678 case BITS4(1,0,0,0): nRegs = 2; break;
5679 case BITS4(0,1,1,1): nRegs = 1; break;
5680 default: break;
5681 }
5682
5683 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
5684 If we see it, set nRegs to 0 so as to cause the next conditional
5685 to fail. */
5686 if (!isPX && mm != 0)
5687 nRegs = 0;
5688
5689 if (nRegs == 1 /* .1d is allowed */
5690 || (nRegs >= 2 && nRegs <= 4 && !is1d) /* .1d is not allowed */) {
5691
5692 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
5693
5694 /* Generate the transfer address (TA) and if necessary the
5695 writeback address (WB) */
5696 IRTemp tTA = newTemp(Ity_I64);
5697 assign(tTA, getIReg64orSP(nn));
5698 if (nn == 31) { /* FIXME generate stack alignment check */ }
5699 IRTemp tWB = IRTemp_INVALID;
5700 if (isPX) {
5701 tWB = newTemp(Ity_I64);
5702 assign(tWB, binop(Iop_Add64,
5703 mkexpr(tTA),
5704 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
5705 : getIReg64orZR(mm)));
5706 }
5707
5708 /* -- BEGIN generate the transfers -- */
5709
5710 IRTemp u0, u1, u2, u3, i0, i1, i2, i3;
5711 u0 = u1 = u2 = u3 = i0 = i1 = i2 = i3 = IRTemp_INVALID;
5712 switch (nRegs) {
5713 case 4: u3 = newTempV128(); i3 = newTempV128(); /* fallthru */
5714 case 3: u2 = newTempV128(); i2 = newTempV128(); /* fallthru */
5715 case 2: u1 = newTempV128(); i1 = newTempV128(); /* fallthru */
5716 case 1: u0 = newTempV128(); i0 = newTempV128(); break;
5717 default: vassert(0);
5718 }
5719
5720 /* -- Multiple 128 or 64 bit stores -- */
5721 if (!isLD) {
5722 switch (nRegs) {
5723 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
5724 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
5725 case 2: assign(u1, getQReg128((tt+1) % 32)); /* fallthru */
5726 case 1: assign(u0, getQReg128((tt+0) % 32)); break;
5727 default: vassert(0);
5728 }
5729 switch (nRegs) {
5730 case 4: (isQ ? math_INTERLEAVE4_128 : math_INTERLEAVE4_64)
5731 (&i0, &i1, &i2, &i3, sz, u0, u1, u2, u3);
5732 break;
5733 case 3: (isQ ? math_INTERLEAVE3_128 : math_INTERLEAVE3_64)
5734 (&i0, &i1, &i2, sz, u0, u1, u2);
5735 break;
5736 case 2: (isQ ? math_INTERLEAVE2_128 : math_INTERLEAVE2_64)
5737 (&i0, &i1, sz, u0, u1);
5738 break;
5739 case 1: (isQ ? math_INTERLEAVE1_128 : math_INTERLEAVE1_64)
5740 (&i0, sz, u0);
5741 break;
5742 default: vassert(0);
5743 }
5744 # define MAYBE_NARROW_TO_64(_expr) \
5745 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
5746 UInt step = isQ ? 16 : 8;
5747 switch (nRegs) {
5748 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
5749 MAYBE_NARROW_TO_64(mkexpr(i3)) );
5750 /* fallthru */
5751 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
5752 MAYBE_NARROW_TO_64(mkexpr(i2)) );
5753 /* fallthru */
5754 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
5755 MAYBE_NARROW_TO_64(mkexpr(i1)) );
5756 /* fallthru */
5757 case 1: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
5758 MAYBE_NARROW_TO_64(mkexpr(i0)) );
5759 break;
5760 default: vassert(0);
5761 }
5762 # undef MAYBE_NARROW_TO_64
5763 }
5764
5765 /* -- Multiple 128 or 64 bit loads -- */
5766 else /* isLD */ {
5767 UInt step = isQ ? 16 : 8;
5768 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
5769 # define MAYBE_WIDEN_FROM_64(_expr) \
5770 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
5771 switch (nRegs) {
5772 case 4:
5773 assign(i3, MAYBE_WIDEN_FROM_64(
5774 loadLE(loadTy,
5775 binop(Iop_Add64, mkexpr(tTA),
5776 mkU64(3 * step)))));
5777 /* fallthru */
5778 case 3:
5779 assign(i2, MAYBE_WIDEN_FROM_64(
5780 loadLE(loadTy,
5781 binop(Iop_Add64, mkexpr(tTA),
5782 mkU64(2 * step)))));
5783 /* fallthru */
5784 case 2:
5785 assign(i1, MAYBE_WIDEN_FROM_64(
5786 loadLE(loadTy,
5787 binop(Iop_Add64, mkexpr(tTA),
5788 mkU64(1 * step)))));
5789 /* fallthru */
5790 case 1:
5791 assign(i0, MAYBE_WIDEN_FROM_64(
5792 loadLE(loadTy,
5793 binop(Iop_Add64, mkexpr(tTA),
5794 mkU64(0 * step)))));
5795 break;
5796 default:
5797 vassert(0);
5798 }
5799 # undef MAYBE_WIDEN_FROM_64
5800 switch (nRegs) {
5801 case 4: (isQ ? math_DEINTERLEAVE4_128 : math_DEINTERLEAVE4_64)
5802 (&u0, &u1, &u2, &u3, sz, i0,i1,i2,i3);
5803 break;
5804 case 3: (isQ ? math_DEINTERLEAVE3_128 : math_DEINTERLEAVE3_64)
5805 (&u0, &u1, &u2, sz, i0, i1, i2);
5806 break;
5807 case 2: (isQ ? math_DEINTERLEAVE2_128 : math_DEINTERLEAVE2_64)
5808 (&u0, &u1, sz, i0, i1);
5809 break;
5810 case 1: (isQ ? math_DEINTERLEAVE1_128 : math_DEINTERLEAVE1_64)
5811 (&u0, sz, i0);
5812 break;
5813 default: vassert(0);
5814 }
5815 switch (nRegs) {
5816 case 4: putQReg128( (tt+3) % 32,
5817 math_MAYBE_ZERO_HI64(bitQ, u3));
5818 /* fallthru */
5819 case 3: putQReg128( (tt+2) % 32,
5820 math_MAYBE_ZERO_HI64(bitQ, u2));
5821 /* fallthru */
5822 case 2: putQReg128( (tt+1) % 32,
5823 math_MAYBE_ZERO_HI64(bitQ, u1));
5824 /* fallthru */
5825 case 1: putQReg128( (tt+0) % 32,
5826 math_MAYBE_ZERO_HI64(bitQ, u0));
5827 break;
5828 default: vassert(0);
5829 }
5830 }
5831
5832 /* -- END generate the transfers -- */
5833
5834 /* Do the writeback, if necessary */
5835 if (isPX) {
5836 putIReg64orSP(nn, mkexpr(tWB));
5837 }
5838
5839 HChar pxStr[20];
5840 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
5841 if (isPX) {
5842 if (mm == BITS5(1,1,1,1,1))
5843 vex_sprintf(pxStr, ", #%u", xferSzB);
5844 else
5845 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
5846 }
5847 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
5848 DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n",
5849 isLD ? "ld" : "st", nRegs,
5850 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
5851 pxStr);
5852
5853 return True;
5854 }
5855 /* else fall through */
5856 }
5857
5858 /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs ------ */
5859 /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs ------ */
5860 /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs ------ */
5861 /* 31 29 26 22 21 20 15 11 9 4
5862
5863 0q 001 1000 L 0 00000 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP]
5864 0q 001 1001 L 0 m 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP], step
5865
5866 0q 001 1000 L 0 00000 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP]
5867 0q 001 1001 L 0 m 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP], step
5868
5869 0q 001 1000 L 0 00000 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP]
5870 0q 001 1001 L 0 m 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP], step
5871
5872 T = defined by Q and sz in the normal way
5873 step = if m == 11111 then transfer-size else Xm
5874 xx = case L of 1 -> LD ; 0 -> ST
5875 */
5876 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
5877 && INSN(21,21) == 0) {
5878 Bool bitQ = INSN(30,30);
5879 Bool isPX = INSN(23,23) == 1;
5880 Bool isLD = INSN(22,22) == 1;
5881 UInt mm = INSN(20,16);
5882 UInt opc = INSN(15,12);
5883 UInt sz = INSN(11,10);
5884 UInt nn = INSN(9,5);
5885 UInt tt = INSN(4,0);
5886 Bool isQ = bitQ == 1;
5887 UInt nRegs = 0;
5888 switch (opc) {
5889 case BITS4(0,0,1,0): nRegs = 4; break;
5890 case BITS4(0,1,1,0): nRegs = 3; break;
5891 case BITS4(1,0,1,0): nRegs = 2; break;
5892 default: break;
5893 }
5894
5895 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
5896 If we see it, set nRegs to 0 so as to cause the next conditional
5897 to fail. */
5898 if (!isPX && mm != 0)
5899 nRegs = 0;
5900
5901 if (nRegs >= 2 && nRegs <= 4) {
5902
5903 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
5904
5905 /* Generate the transfer address (TA) and if necessary the
5906 writeback address (WB) */
5907 IRTemp tTA = newTemp(Ity_I64);
5908 assign(tTA, getIReg64orSP(nn));
5909 if (nn == 31) { /* FIXME generate stack alignment check */ }
5910 IRTemp tWB = IRTemp_INVALID;
5911 if (isPX) {
5912 tWB = newTemp(Ity_I64);
5913 assign(tWB, binop(Iop_Add64,
5914 mkexpr(tTA),
5915 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
5916 : getIReg64orZR(mm)));
5917 }
5918
5919 /* -- BEGIN generate the transfers -- */
5920
5921 IRTemp u0, u1, u2, u3;
5922 u0 = u1 = u2 = u3 = IRTemp_INVALID;
5923 switch (nRegs) {
5924 case 4: u3 = newTempV128(); /* fallthru */
5925 case 3: u2 = newTempV128(); /* fallthru */
5926 case 2: u1 = newTempV128();
5927 u0 = newTempV128(); break;
5928 default: vassert(0);
5929 }
5930
5931 /* -- Multiple 128 or 64 bit stores -- */
5932 if (!isLD) {
5933 switch (nRegs) {
5934 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
5935 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
5936 case 2: assign(u1, getQReg128((tt+1) % 32));
5937 assign(u0, getQReg128((tt+0) % 32)); break;
5938 default: vassert(0);
5939 }
5940 # define MAYBE_NARROW_TO_64(_expr) \
5941 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
5942 UInt step = isQ ? 16 : 8;
5943 switch (nRegs) {
5944 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
5945 MAYBE_NARROW_TO_64(mkexpr(u3)) );
5946 /* fallthru */
5947 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
5948 MAYBE_NARROW_TO_64(mkexpr(u2)) );
5949 /* fallthru */
5950 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
5951 MAYBE_NARROW_TO_64(mkexpr(u1)) );
5952 storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
5953 MAYBE_NARROW_TO_64(mkexpr(u0)) );
5954 break;
5955 default: vassert(0);
5956 }
5957 # undef MAYBE_NARROW_TO_64
5958 }
5959
5960 /* -- Multiple 128 or 64 bit loads -- */
5961 else /* isLD */ {
5962 UInt step = isQ ? 16 : 8;
5963 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
5964 # define MAYBE_WIDEN_FROM_64(_expr) \
5965 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
5966 switch (nRegs) {
5967 case 4:
5968 assign(u3, MAYBE_WIDEN_FROM_64(
5969 loadLE(loadTy,
5970 binop(Iop_Add64, mkexpr(tTA),
5971 mkU64(3 * step)))));
5972 /* fallthru */
5973 case 3:
5974 assign(u2, MAYBE_WIDEN_FROM_64(
5975 loadLE(loadTy,
5976 binop(Iop_Add64, mkexpr(tTA),
5977 mkU64(2 * step)))));
5978 /* fallthru */
5979 case 2:
5980 assign(u1, MAYBE_WIDEN_FROM_64(
5981 loadLE(loadTy,
5982 binop(Iop_Add64, mkexpr(tTA),
5983 mkU64(1 * step)))));
5984 assign(u0, MAYBE_WIDEN_FROM_64(
5985 loadLE(loadTy,
5986 binop(Iop_Add64, mkexpr(tTA),
5987 mkU64(0 * step)))));
5988 break;
5989 default:
5990 vassert(0);
5991 }
5992 # undef MAYBE_WIDEN_FROM_64
5993 switch (nRegs) {
5994 case 4: putQReg128( (tt+3) % 32,
5995 math_MAYBE_ZERO_HI64(bitQ, u3));
5996 /* fallthru */
5997 case 3: putQReg128( (tt+2) % 32,
5998 math_MAYBE_ZERO_HI64(bitQ, u2));
5999 /* fallthru */
6000 case 2: putQReg128( (tt+1) % 32,
6001 math_MAYBE_ZERO_HI64(bitQ, u1));
6002 putQReg128( (tt+0) % 32,
6003 math_MAYBE_ZERO_HI64(bitQ, u0));
6004 break;
6005 default: vassert(0);
6006 }
6007 }
6008
6009 /* -- END generate the transfers -- */
6010
6011 /* Do the writeback, if necessary */
6012 if (isPX) {
6013 putIReg64orSP(nn, mkexpr(tWB));
6014 }
6015
6016 HChar pxStr[20];
6017 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6018 if (isPX) {
6019 if (mm == BITS5(1,1,1,1,1))
6020 vex_sprintf(pxStr, ", #%u", xferSzB);
6021 else
6022 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6023 }
6024 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6025 DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n",
6026 isLD ? "ld" : "st",
6027 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6028 pxStr);
6029
6030 return True;
6031 }
6032 /* else fall through */
6033 }
6034
6035 /* ---------- LD1R (single structure, replicate) ---------- */
6036 /* ---------- LD2R (single structure, replicate) ---------- */
6037 /* ---------- LD3R (single structure, replicate) ---------- */
6038 /* ---------- LD4R (single structure, replicate) ---------- */
6039 /* 31 29 22 20 15 11 9 4
6040 0q 001 1010 10 00000 110 0 sz n t LD1R {Vt.T}, [Xn|SP]
6041 0q 001 1011 10 m 110 0 sz n t LD1R {Vt.T}, [Xn|SP], step
6042
6043 0q 001 1010 11 00000 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP]
6044 0q 001 1011 11 m 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP], step
6045
6046 0q 001 1010 10 00000 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP]
6047 0q 001 1011 10 m 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP], step
6048
6049 0q 001 1010 11 00000 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP]
6050 0q 001 1011 11 m 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP], step
6051
6052 step = if m == 11111 then transfer-size else Xm
6053 */
6054 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
6055 && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1)
6056 && INSN(12,12) == 0) {
6057 UInt bitQ = INSN(30,30);
6058 Bool isPX = INSN(23,23) == 1;
6059 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6060 UInt mm = INSN(20,16);
6061 UInt sz = INSN(11,10);
6062 UInt nn = INSN(9,5);
6063 UInt tt = INSN(4,0);
6064
6065 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6066 if (isPX || mm == 0) {
6067
6068 IRType ty = integerIRTypeOfSize(1 << sz);
6069
6070 UInt laneSzB = 1 << sz;
6071 UInt xferSzB = laneSzB * nRegs;
6072
6073 /* Generate the transfer address (TA) and if necessary the
6074 writeback address (WB) */
6075 IRTemp tTA = newTemp(Ity_I64);
6076 assign(tTA, getIReg64orSP(nn));
6077 if (nn == 31) { /* FIXME generate stack alignment check */ }
6078 IRTemp tWB = IRTemp_INVALID;
6079 if (isPX) {
6080 tWB = newTemp(Ity_I64);
6081 assign(tWB, binop(Iop_Add64,
6082 mkexpr(tTA),
6083 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6084 : getIReg64orZR(mm)));
6085 }
6086
6087 /* Do the writeback, if necessary */
6088 if (isPX) {
6089 putIReg64orSP(nn, mkexpr(tWB));
6090 }
6091
6092 IRTemp e0, e1, e2, e3, v0, v1, v2, v3;
6093 e0 = e1 = e2 = e3 = v0 = v1 = v2 = v3 = IRTemp_INVALID;
6094 switch (nRegs) {
6095 case 4:
6096 e3 = newTemp(ty);
6097 assign(e3, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6098 mkU64(3 * laneSzB))));
6099 v3 = math_DUP_TO_V128(e3, ty);
6100 putQReg128((tt+3) % 32, math_MAYBE_ZERO_HI64(bitQ, v3));
6101 /* fallthrough */
6102 case 3:
6103 e2 = newTemp(ty);
6104 assign(e2, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6105 mkU64(2 * laneSzB))));
6106 v2 = math_DUP_TO_V128(e2, ty);
6107 putQReg128((tt+2) % 32, math_MAYBE_ZERO_HI64(bitQ, v2));
6108 /* fallthrough */
6109 case 2:
6110 e1 = newTemp(ty);
6111 assign(e1, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6112 mkU64(1 * laneSzB))));
6113 v1 = math_DUP_TO_V128(e1, ty);
6114 putQReg128((tt+1) % 32, math_MAYBE_ZERO_HI64(bitQ, v1));
6115 /* fallthrough */
6116 case 1:
6117 e0 = newTemp(ty);
6118 assign(e0, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6119 mkU64(0 * laneSzB))));
6120 v0 = math_DUP_TO_V128(e0, ty);
6121 putQReg128((tt+0) % 32, math_MAYBE_ZERO_HI64(bitQ, v0));
6122 break;
6123 default:
6124 vassert(0);
6125 }
6126
6127 HChar pxStr[20];
6128 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6129 if (isPX) {
6130 if (mm == BITS5(1,1,1,1,1))
6131 vex_sprintf(pxStr, ", #%u", xferSzB);
6132 else
6133 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6134 }
6135 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6136 DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n",
6137 nRegs,
6138 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6139 pxStr);
6140
6141 return True;
6142 }
6143 /* else fall through */
6144 }
6145
6146 /* ------ LD1/ST1 (single structure, to/from one lane) ------ */
6147 /* ------ LD2/ST2 (single structure, to/from one lane) ------ */
6148 /* ------ LD3/ST3 (single structure, to/from one lane) ------ */
6149 /* ------ LD4/ST4 (single structure, to/from one lane) ------ */
6150 /* 31 29 22 21 20 15 11 9 4
6151 0q 001 1010 L 0 00000 xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP]
6152 0q 001 1011 L 0 m xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP], step
6153
6154 0q 001 1010 L 1 00000 xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP]
6155 0q 001 1011 L 1 m xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP], step
6156
6157 0q 001 1010 L 0 00000 xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP]
6158 0q 001 1011 L 0 m xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP], step
6159
6160 0q 001 1010 L 1 00000 xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP]
6161 0q 001 1011 L 1 m xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP], step
6162
6163 step = if m == 11111 then transfer-size else Xm
6164 op = case L of 1 -> LD ; 0 -> ST
6165
6166 laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb
6167 01:b:b:b0 -> 2, bbb
6168 10:b:b:00 -> 4, bb
6169 10:b:0:01 -> 8, b
6170 */
6171 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) {
6172 UInt bitQ = INSN(30,30);
6173 Bool isPX = INSN(23,23) == 1;
6174 Bool isLD = INSN(22,22) == 1;
6175 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6176 UInt mm = INSN(20,16);
6177 UInt xx = INSN(15,14);
6178 UInt bitS = INSN(12,12);
6179 UInt sz = INSN(11,10);
6180 UInt nn = INSN(9,5);
6181 UInt tt = INSN(4,0);
6182
6183 Bool valid = True;
6184
6185 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6186 if (!isPX && mm != 0)
6187 valid = False;
6188
6189 UInt laneSzB = 0; /* invalid */
6190 UInt ix = 16; /* invalid */
6191
6192 UInt xx_q_S_sz = (xx << 4) | (bitQ << 3) | (bitS << 2) | sz;
6193 switch (xx_q_S_sz) {
6194 case 0x00: case 0x01: case 0x02: case 0x03:
6195 case 0x04: case 0x05: case 0x06: case 0x07:
6196 case 0x08: case 0x09: case 0x0A: case 0x0B:
6197 case 0x0C: case 0x0D: case 0x0E: case 0x0F:
6198 laneSzB = 1; ix = xx_q_S_sz & 0xF;
6199 break;
6200 case 0x10: case 0x12: case 0x14: case 0x16:
6201 case 0x18: case 0x1A: case 0x1C: case 0x1E:
6202 laneSzB = 2; ix = (xx_q_S_sz >> 1) & 7;
6203 break;
6204 case 0x20: case 0x24: case 0x28: case 0x2C:
6205 laneSzB = 4; ix = (xx_q_S_sz >> 2) & 3;
6206 break;
6207 case 0x21: case 0x29:
6208 laneSzB = 8; ix = (xx_q_S_sz >> 3) & 1;
6209 break;
6210 default:
6211 break;
6212 }
6213
6214 if (valid && laneSzB != 0) {
6215
6216 IRType ty = integerIRTypeOfSize(laneSzB);
6217 UInt xferSzB = laneSzB * nRegs;
6218
6219 /* Generate the transfer address (TA) and if necessary the
6220 writeback address (WB) */
6221 IRTemp tTA = newTemp(Ity_I64);
6222 assign(tTA, getIReg64orSP(nn));
6223 if (nn == 31) { /* FIXME generate stack alignment check */ }
6224 IRTemp tWB = IRTemp_INVALID;
6225 if (isPX) {
6226 tWB = newTemp(Ity_I64);
6227 assign(tWB, binop(Iop_Add64,
6228 mkexpr(tTA),
6229 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6230 : getIReg64orZR(mm)));
6231 }
6232
6233 /* Do the writeback, if necessary */
6234 if (isPX) {
6235 putIReg64orSP(nn, mkexpr(tWB));
6236 }
6237
6238 switch (nRegs) {
6239 case 4: {
6240 IRExpr* addr
6241 = binop(Iop_Add64, mkexpr(tTA), mkU64(3 * laneSzB));
6242 if (isLD) {
6243 putQRegLane((tt+3) % 32, ix, loadLE(ty, addr));
6244 } else {
6245 storeLE(addr, getQRegLane((tt+3) % 32, ix, ty));
6246 }
6247 /* fallthrough */
6248 }
6249 case 3: {
6250 IRExpr* addr
6251 = binop(Iop_Add64, mkexpr(tTA), mkU64(2 * laneSzB));
6252 if (isLD) {
6253 putQRegLane((tt+2) % 32, ix, loadLE(ty, addr));
6254 } else {
6255 storeLE(addr, getQRegLane((tt+2) % 32, ix, ty));
6256 }
6257 /* fallthrough */
6258 }
6259 case 2: {
6260 IRExpr* addr
6261 = binop(Iop_Add64, mkexpr(tTA), mkU64(1 * laneSzB));
6262 if (isLD) {
6263 putQRegLane((tt+1) % 32, ix, loadLE(ty, addr));
6264 } else {
6265 storeLE(addr, getQRegLane((tt+1) % 32, ix, ty));
6266 }
6267 /* fallthrough */
6268 }
6269 case 1: {
6270 IRExpr* addr
6271 = binop(Iop_Add64, mkexpr(tTA), mkU64(0 * laneSzB));
6272 if (isLD) {
6273 putQRegLane((tt+0) % 32, ix, loadLE(ty, addr));
6274 } else {
6275 storeLE(addr, getQRegLane((tt+0) % 32, ix, ty));
6276 }
6277 break;
6278 }
6279 default:
6280 vassert(0);
6281 }
6282
6283 HChar pxStr[20];
6284 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6285 if (isPX) {
6286 if (mm == BITS5(1,1,1,1,1))
6287 vex_sprintf(pxStr, ", #%u", xferSzB);
6288 else
6289 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6290 }
6291 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6292 DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n",
6293 isLD ? "ld" : "st", nRegs,
6294 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr,
6295 ix, nameIReg64orSP(nn), pxStr);
6296
6297 return True;
6298 }
6299 /* else fall through */
6300 }
6301
6302 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
6303 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
6304 /* 31 29 23 20 14 9 4
6305 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP]
6306 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP]
6307 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
6308 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
6309 */
6310 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
6311 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
6312 && INSN(14,10) == BITS5(1,1,1,1,1)) {
6313 UInt szBlg2 = INSN(31,30);
6314 Bool isLD = INSN(22,22) == 1;
6315 Bool isAcqOrRel = INSN(15,15) == 1;
6316 UInt ss = INSN(20,16);
6317 UInt nn = INSN(9,5);
6318 UInt tt = INSN(4,0);
6319
6320 vassert(szBlg2 < 4);
6321 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6322 IRType ty = integerIRTypeOfSize(szB);
6323 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6324
6325 IRTemp ea = newTemp(Ity_I64);
6326 assign(ea, getIReg64orSP(nn));
6327 /* FIXME generate check that ea is szB-aligned */
6328
6329 if (isLD && ss == BITS5(1,1,1,1,1)) {
6330 IRTemp res = newTemp(ty);
6331 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
6332 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6333 if (isAcqOrRel) {
6334 stmt(IRStmt_MBE(Imbe_Fence));
6335 }
6336 DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6337 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6338 return True;
6339 }
6340 if (!isLD) {
6341 if (isAcqOrRel) {
6342 stmt(IRStmt_MBE(Imbe_Fence));
6343 }
6344 IRTemp res = newTemp(Ity_I1);
6345 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6346 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
6347 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
6348 Need to set rS to 1 on failure, 0 on success. */
6349 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
6350 mkU64(1)));
6351 DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6352 nameIRegOrZR(False, ss),
6353 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6354 return True;
6355 }
6356 /* else fall through */
6357 }
6358
6359 /* ------------------ LDA{R,RH,RB} ------------------ */
6360 /* ------------------ STL{R,RH,RB} ------------------ */
6361 /* 31 29 23 20 14 9 4
6362 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP]
6363 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP]
6364 */
6365 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
6366 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
6367 UInt szBlg2 = INSN(31,30);
6368 Bool isLD = INSN(22,22) == 1;
6369 UInt nn = INSN(9,5);
6370 UInt tt = INSN(4,0);
6371
6372 vassert(szBlg2 < 4);
6373 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6374 IRType ty = integerIRTypeOfSize(szB);
6375 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6376
6377 IRTemp ea = newTemp(Ity_I64);
6378 assign(ea, getIReg64orSP(nn));
6379 /* FIXME generate check that ea is szB-aligned */
6380
6381 if (isLD) {
6382 IRTemp res = newTemp(ty);
6383 assign(res, loadLE(ty, mkexpr(ea)));
6384 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6385 stmt(IRStmt_MBE(Imbe_Fence));
6386 DIP("lda%s %s, [%s]\n", suffix[szBlg2],
6387 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6388 } else {
6389 stmt(IRStmt_MBE(Imbe_Fence));
6390 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6391 storeLE(mkexpr(ea), data);
6392 DIP("stl%s %s, [%s]\n", suffix[szBlg2],
6393 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6394 }
6395 return True;
6396 }
6397
6398 /* ------------------ PRFM (immediate) ------------------ */
6399 /* 31 21 9 4
6400 11 111 00110 imm12 n t PRFM pfrop=Rt, [Xn|SP, #pimm]
6401 */
6402 if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
6403 UInt imm12 = INSN(21,10);
6404 UInt nn = INSN(9,5);
6405 UInt tt = INSN(4,0);
6406 /* Generating any IR here is pointless, except for documentation
6407 purposes, as it will get optimised away later. */
6408 IRTemp ea = newTemp(Ity_I64);
6409 assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(imm12 * 8)));
6410 DIP("prfm prfop=%u, [%s, #%u]\n", tt, nameIReg64orSP(nn), imm12 * 8);
6411 return True;
6412 }
6413
6414 /* ------------------ PRFM (register) ------------------ */
6415 /* 31 29 22 20 15 12 11 9 4
6416 11 1110001 01 Rm opt S 10 Rn Rt PRFM pfrop=Rt, [Xn|SP, R<m>{ext/sh}]
6417 */
6418 if (INSN(31,21) == BITS11(1,1,1,1,1,0,0,0,1,0,1)
6419 && INSN(11,10) == BITS2(1,0)) {
6420 HChar dis_buf[64];
6421 UInt tt = INSN(4,0);
6422 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
6423 if (ea != IRTemp_INVALID) {
6424 /* No actual code to generate. */
6425 DIP("prfm prfop=%u, %s\n", tt, dis_buf);
6426 return True;
6427 }
6428 }
6429
6430 vex_printf("ARM64 front end: load_store\n");
6431 return False;
6432 # undef INSN
6433 }
6434
6435
6436 /*------------------------------------------------------------*/
6437 /*--- Control flow and misc instructions ---*/
6438 /*------------------------------------------------------------*/
6439
6440 static
dis_ARM64_branch_etc(DisResult * dres,UInt insn,const VexArchInfo * archinfo)6441 Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
6442 const VexArchInfo* archinfo)
6443 {
6444 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6445
6446 /* ---------------------- B cond ----------------------- */
6447 /* 31 24 4 3
6448 0101010 0 imm19 0 cond */
6449 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
6450 UInt cond = INSN(3,0);
6451 ULong uimm64 = INSN(23,5) << 2;
6452 Long simm64 = (Long)sx_to_64(uimm64, 21);
6453 vassert(dres->whatNext == Dis_Continue);
6454 vassert(dres->len == 4);
6455 vassert(dres->continueAt == 0);
6456 vassert(dres->jk_StopHere == Ijk_INVALID);
6457 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
6458 Ijk_Boring,
6459 IRConst_U64(guest_PC_curr_instr + simm64),
6460 OFFB_PC) );
6461 putPC(mkU64(guest_PC_curr_instr + 4));
6462 dres->whatNext = Dis_StopHere;
6463 dres->jk_StopHere = Ijk_Boring;
6464 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
6465 return True;
6466 }
6467
6468 /* -------------------- B{L} uncond -------------------- */
6469 if (INSN(30,26) == BITS5(0,0,1,0,1)) {
6470 /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
6471 100101 imm26 B (PC + sxTo64(imm26 << 2))
6472 */
6473 UInt bLink = INSN(31,31);
6474 ULong uimm64 = INSN(25,0) << 2;
6475 Long simm64 = (Long)sx_to_64(uimm64, 28);
6476 if (bLink) {
6477 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
6478 }
6479 putPC(mkU64(guest_PC_curr_instr + simm64));
6480 dres->whatNext = Dis_StopHere;
6481 dres->jk_StopHere = Ijk_Call;
6482 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
6483 guest_PC_curr_instr + simm64);
6484 return True;
6485 }
6486
6487 /* --------------------- B{L} reg --------------------- */
6488 /* 31 24 22 20 15 9 4
6489 1101011 00 10 11111 000000 nn 00000 RET Rn
6490 1101011 00 01 11111 000000 nn 00000 CALL Rn
6491 1101011 00 00 11111 000000 nn 00000 JMP Rn
6492 */
6493 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
6494 && INSN(20,16) == BITS5(1,1,1,1,1)
6495 && INSN(15,10) == BITS6(0,0,0,0,0,0)
6496 && INSN(4,0) == BITS5(0,0,0,0,0)) {
6497 UInt branch_type = INSN(22,21);
6498 UInt nn = INSN(9,5);
6499 if (branch_type == BITS2(1,0) /* RET */) {
6500 putPC(getIReg64orZR(nn));
6501 dres->whatNext = Dis_StopHere;
6502 dres->jk_StopHere = Ijk_Ret;
6503 DIP("ret %s\n", nameIReg64orZR(nn));
6504 return True;
6505 }
6506 if (branch_type == BITS2(0,1) /* CALL */) {
6507 IRTemp dst = newTemp(Ity_I64);
6508 assign(dst, getIReg64orZR(nn));
6509 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
6510 putPC(mkexpr(dst));
6511 dres->whatNext = Dis_StopHere;
6512 dres->jk_StopHere = Ijk_Call;
6513 DIP("blr %s\n", nameIReg64orZR(nn));
6514 return True;
6515 }
6516 if (branch_type == BITS2(0,0) /* JMP */) {
6517 putPC(getIReg64orZR(nn));
6518 dres->whatNext = Dis_StopHere;
6519 dres->jk_StopHere = Ijk_Boring;
6520 DIP("jmp %s\n", nameIReg64orZR(nn));
6521 return True;
6522 }
6523 }
6524
6525 /* -------------------- CB{N}Z -------------------- */
6526 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
6527 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
6528 */
6529 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
6530 Bool is64 = INSN(31,31) == 1;
6531 Bool bIfZ = INSN(24,24) == 0;
6532 ULong uimm64 = INSN(23,5) << 2;
6533 UInt rT = INSN(4,0);
6534 Long simm64 = (Long)sx_to_64(uimm64, 21);
6535 IRExpr* cond = NULL;
6536 if (is64) {
6537 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
6538 getIReg64orZR(rT), mkU64(0));
6539 } else {
6540 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
6541 getIReg32orZR(rT), mkU32(0));
6542 }
6543 stmt( IRStmt_Exit(cond,
6544 Ijk_Boring,
6545 IRConst_U64(guest_PC_curr_instr + simm64),
6546 OFFB_PC) );
6547 putPC(mkU64(guest_PC_curr_instr + 4));
6548 dres->whatNext = Dis_StopHere;
6549 dres->jk_StopHere = Ijk_Boring;
6550 DIP("cb%sz %s, 0x%llx\n",
6551 bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
6552 guest_PC_curr_instr + simm64);
6553 return True;
6554 }
6555
6556 /* -------------------- TB{N}Z -------------------- */
6557 /* 31 30 24 23 18 5 4
6558 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6559 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6560 */
6561 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
6562 UInt b5 = INSN(31,31);
6563 Bool bIfZ = INSN(24,24) == 0;
6564 UInt b40 = INSN(23,19);
6565 UInt imm14 = INSN(18,5);
6566 UInt tt = INSN(4,0);
6567 UInt bitNo = (b5 << 5) | b40;
6568 ULong uimm64 = imm14 << 2;
6569 Long simm64 = sx_to_64(uimm64, 16);
6570 IRExpr* cond
6571 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
6572 binop(Iop_And64,
6573 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
6574 mkU64(1)),
6575 mkU64(0));
6576 stmt( IRStmt_Exit(cond,
6577 Ijk_Boring,
6578 IRConst_U64(guest_PC_curr_instr + simm64),
6579 OFFB_PC) );
6580 putPC(mkU64(guest_PC_curr_instr + 4));
6581 dres->whatNext = Dis_StopHere;
6582 dres->jk_StopHere = Ijk_Boring;
6583 DIP("tb%sz %s, #%u, 0x%llx\n",
6584 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
6585 guest_PC_curr_instr + simm64);
6586 return True;
6587 }
6588
6589 /* -------------------- SVC -------------------- */
6590 /* 11010100 000 imm16 000 01
6591 Don't bother with anything except the imm16==0 case.
6592 */
6593 if (INSN(31,0) == 0xD4000001) {
6594 putPC(mkU64(guest_PC_curr_instr + 4));
6595 dres->whatNext = Dis_StopHere;
6596 dres->jk_StopHere = Ijk_Sys_syscall;
6597 DIP("svc #0\n");
6598 return True;
6599 }
6600
6601 /* ------------------ M{SR,RS} ------------------ */
6602 /* ---- Cases for TPIDR_EL0 ----
6603 0xD51BD0 010 Rt MSR tpidr_el0, rT
6604 0xD53BD0 010 Rt MRS rT, tpidr_el0
6605 */
6606 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
6607 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
6608 Bool toSys = INSN(21,21) == 0;
6609 UInt tt = INSN(4,0);
6610 if (toSys) {
6611 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
6612 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
6613 } else {
6614 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
6615 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
6616 }
6617 return True;
6618 }
6619 /* ---- Cases for FPCR ----
6620 0xD51B44 000 Rt MSR fpcr, rT
6621 0xD53B44 000 Rt MSR rT, fpcr
6622 */
6623 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
6624 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
6625 Bool toSys = INSN(21,21) == 0;
6626 UInt tt = INSN(4,0);
6627 if (toSys) {
6628 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
6629 DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
6630 } else {
6631 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
6632 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
6633 }
6634 return True;
6635 }
6636 /* ---- Cases for FPSR ----
6637 0xD51B44 001 Rt MSR fpsr, rT
6638 0xD53B44 001 Rt MSR rT, fpsr
6639 The only part of this we model is FPSR.QC. All other bits
6640 are ignored when writing to it and RAZ when reading from it.
6641 */
6642 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
6643 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
6644 Bool toSys = INSN(21,21) == 0;
6645 UInt tt = INSN(4,0);
6646 if (toSys) {
6647 /* Just deal with FPSR.QC. Make up a V128 value which is
6648 zero if Xt[27] is zero and any other value if Xt[27] is
6649 nonzero. */
6650 IRTemp qc64 = newTemp(Ity_I64);
6651 assign(qc64, binop(Iop_And64,
6652 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)),
6653 mkU64(1)));
6654 IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64));
6655 stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) );
6656 DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
6657 } else {
6658 /* Generate a value which is all zeroes except for bit 27,
6659 which must be zero if QCFLAG is all zeroes and one otherwise. */
6660 IRTemp qcV128 = newTempV128();
6661 assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 ));
6662 IRTemp qc64 = newTemp(Ity_I64);
6663 assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)),
6664 unop(Iop_V128to64, mkexpr(qcV128))));
6665 IRExpr* res = binop(Iop_Shl64,
6666 unop(Iop_1Uto64,
6667 binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))),
6668 mkU8(27));
6669 putIReg64orZR(tt, res);
6670 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
6671 }
6672 return True;
6673 }
6674 /* ---- Cases for NZCV ----
6675 D51B42 000 Rt MSR nzcv, rT
6676 D53B42 000 Rt MRS rT, nzcv
6677 The only parts of NZCV that actually exist are bits 31:28, which
6678 are the N Z C and V bits themselves. Hence the flags thunk provides
6679 all the state we need.
6680 */
6681 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
6682 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
6683 Bool toSys = INSN(21,21) == 0;
6684 UInt tt = INSN(4,0);
6685 if (toSys) {
6686 IRTemp t = newTemp(Ity_I64);
6687 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
6688 setFlags_COPY(t);
6689 DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
6690 } else {
6691 IRTemp res = newTemp(Ity_I64);
6692 assign(res, mk_arm64g_calculate_flags_nzcv());
6693 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
6694 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
6695 }
6696 return True;
6697 }
6698 /* ---- Cases for DCZID_EL0 ----
6699 Don't support arbitrary reads and writes to this register. Just
6700 return the value 16, which indicates that the DC ZVA instruction
6701 is not permitted, so we don't have to emulate it.
6702 D5 3B 00 111 Rt MRS rT, dczid_el0
6703 */
6704 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
6705 UInt tt = INSN(4,0);
6706 putIReg64orZR(tt, mkU64(1<<4));
6707 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
6708 return True;
6709 }
6710 /* ---- Cases for CTR_EL0 ----
6711 We just handle reads, and make up a value from the D and I line
6712 sizes in the VexArchInfo we are given, and patch in the following
6713 fields that the Foundation model gives ("natively"):
6714 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
6715 D5 3B 00 001 Rt MRS rT, dczid_el0
6716 */
6717 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
6718 UInt tt = INSN(4,0);
6719 /* Need to generate a value from dMinLine_lg2_szB and
6720 dMinLine_lg2_szB. The value in the register is in 32-bit
6721 units, so need to subtract 2 from the values in the
6722 VexArchInfo. We can assume that the values here are valid --
6723 disInstr_ARM64 checks them -- so there's no need to deal with
6724 out-of-range cases. */
6725 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
6726 && archinfo->arm64_dMinLine_lg2_szB <= 17
6727 && archinfo->arm64_iMinLine_lg2_szB >= 2
6728 && archinfo->arm64_iMinLine_lg2_szB <= 17);
6729 UInt val
6730 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
6731 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
6732 putIReg64orZR(tt, mkU64(val));
6733 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
6734 return True;
6735 }
6736 /* ---- Cases for CNTVCT_EL0 ----
6737 This is a timestamp counter of some sort. Support reads of it only
6738 by passing through to the host.
6739 D5 3B E0 010 Rt MRS Xt, cntvct_el0
6740 */
6741 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
6742 UInt tt = INSN(4,0);
6743 IRTemp val = newTemp(Ity_I64);
6744 IRExpr** args = mkIRExprVec_0();
6745 IRDirty* d = unsafeIRDirty_1_N (
6746 val,
6747 0/*regparms*/,
6748 "arm64g_dirtyhelper_MRS_CNTVCT_EL0",
6749 &arm64g_dirtyhelper_MRS_CNTVCT_EL0,
6750 args
6751 );
6752 /* execute the dirty call, dumping the result in val. */
6753 stmt( IRStmt_Dirty(d) );
6754 putIReg64orZR(tt, mkexpr(val));
6755 DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt));
6756 return True;
6757 }
6758
6759 /* ------------------ IC_IVAU ------------------ */
6760 /* D5 0B 75 001 Rt ic ivau, rT
6761 */
6762 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
6763 /* We will always be provided with a valid iMinLine value. */
6764 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
6765 && archinfo->arm64_iMinLine_lg2_szB <= 17);
6766 /* Round the requested address, in rT, down to the start of the
6767 containing block. */
6768 UInt tt = INSN(4,0);
6769 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
6770 IRTemp addr = newTemp(Ity_I64);
6771 assign( addr, binop( Iop_And64,
6772 getIReg64orZR(tt),
6773 mkU64(~(lineszB - 1))) );
6774 /* Set the invalidation range, request exit-and-invalidate, with
6775 continuation at the next instruction. */
6776 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
6777 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
6778 /* be paranoid ... */
6779 stmt( IRStmt_MBE(Imbe_Fence) );
6780 putPC(mkU64( guest_PC_curr_instr + 4 ));
6781 dres->whatNext = Dis_StopHere;
6782 dres->jk_StopHere = Ijk_InvalICache;
6783 DIP("ic ivau, %s\n", nameIReg64orZR(tt));
6784 return True;
6785 }
6786
6787 /* ------------------ DC_CVAU ------------------ */
6788 /* D5 0B 7B 001 Rt dc cvau, rT
6789 */
6790 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
6791 /* Exactly the same scheme as for IC IVAU, except we observe the
6792 dMinLine size, and request an Ijk_FlushDCache instead of
6793 Ijk_InvalICache. */
6794 /* We will always be provided with a valid dMinLine value. */
6795 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
6796 && archinfo->arm64_dMinLine_lg2_szB <= 17);
6797 /* Round the requested address, in rT, down to the start of the
6798 containing block. */
6799 UInt tt = INSN(4,0);
6800 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
6801 IRTemp addr = newTemp(Ity_I64);
6802 assign( addr, binop( Iop_And64,
6803 getIReg64orZR(tt),
6804 mkU64(~(lineszB - 1))) );
6805 /* Set the flush range, request exit-and-flush, with
6806 continuation at the next instruction. */
6807 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
6808 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
6809 /* be paranoid ... */
6810 stmt( IRStmt_MBE(Imbe_Fence) );
6811 putPC(mkU64( guest_PC_curr_instr + 4 ));
6812 dres->whatNext = Dis_StopHere;
6813 dres->jk_StopHere = Ijk_FlushDCache;
6814 DIP("dc cvau, %s\n", nameIReg64orZR(tt));
6815 return True;
6816 }
6817
6818 /* ------------------ ISB, DMB, DSB ------------------ */
6819 /* 31 21 11 7 6 4
6820 11010 10100 0 00 011 0011 CRm 1 01 11111 DMB opt
6821 11010 10100 0 00 011 0011 CRm 1 00 11111 DSB opt
6822 11010 10100 0 00 011 0011 CRm 1 10 11111 ISB opt
6823 */
6824 if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0)
6825 && INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1)
6826 && INSN(7,7) == 1
6827 && INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) {
6828 UInt opc = INSN(6,5);
6829 UInt CRm = INSN(11,8);
6830 vassert(opc <= 2 && CRm <= 15);
6831 stmt(IRStmt_MBE(Imbe_Fence));
6832 const HChar* opNames[3]
6833 = { "dsb", "dmb", "isb" };
6834 const HChar* howNames[16]
6835 = { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh",
6836 "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" };
6837 DIP("%s %s\n", opNames[opc], howNames[CRm]);
6838 return True;
6839 }
6840
6841 /* -------------------- NOP -------------------- */
6842 if (INSN(31,0) == 0xD503201F) {
6843 DIP("nop\n");
6844 return True;
6845 }
6846
6847 /* -------------------- BRK -------------------- */
6848 /* 31 23 20 4
6849 1101 0100 001 imm16 00000 BRK #imm16
6850 */
6851 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0)
6852 && INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) {
6853 UInt imm16 = INSN(20,5);
6854 /* Request SIGTRAP and then restart of this insn. */
6855 putPC(mkU64(guest_PC_curr_instr + 0));
6856 dres->whatNext = Dis_StopHere;
6857 dres->jk_StopHere = Ijk_SigTRAP;
6858 DIP("brk #%u\n", imm16);
6859 return True;
6860 }
6861
6862 /* ------------------- YIELD ------------------- */
6863 /* 31 23 15 7
6864 1101 0101 0000 0011 0010 0000 0011 1111
6865 */
6866 if (INSN(31,0) == 0xD503203F) {
6867 /* Request yield followed by continuation at the next insn. */
6868 putPC(mkU64(guest_PC_curr_instr + 4));
6869 dres->whatNext = Dis_StopHere;
6870 dres->jk_StopHere = Ijk_Yield;
6871 DIP("yield\n");
6872 return True;
6873 }
6874
6875 //fail:
6876 vex_printf("ARM64 front end: branch_etc\n");
6877 return False;
6878 # undef INSN
6879 }
6880
6881
6882 /*------------------------------------------------------------*/
6883 /*--- SIMD and FP instructions: helper functions ---*/
6884 /*------------------------------------------------------------*/
6885
6886 /* Some constructors for interleave/deinterleave expressions. */
6887
mk_CatEvenLanes64x2(IRTemp a10,IRTemp b10)6888 static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) {
6889 // returns a0 b0
6890 return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10));
6891 }
6892
mk_CatOddLanes64x2(IRTemp a10,IRTemp b10)6893 static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) {
6894 // returns a1 b1
6895 return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10));
6896 }
6897
mk_CatEvenLanes32x4(IRTemp a3210,IRTemp b3210)6898 static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
6899 // returns a2 a0 b2 b0
6900 return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210));
6901 }
6902
mk_CatOddLanes32x4(IRTemp a3210,IRTemp b3210)6903 static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
6904 // returns a3 a1 b3 b1
6905 return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210));
6906 }
6907
mk_InterleaveLO32x4(IRTemp a3210,IRTemp b3210)6908 static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) {
6909 // returns a1 b1 a0 b0
6910 return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210));
6911 }
6912
mk_InterleaveHI32x4(IRTemp a3210,IRTemp b3210)6913 static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) {
6914 // returns a3 b3 a2 b2
6915 return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210));
6916 }
6917
mk_CatEvenLanes16x8(IRTemp a76543210,IRTemp b76543210)6918 static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6919 // returns a6 a4 a2 a0 b6 b4 b2 b0
6920 return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
6921 }
6922
mk_CatOddLanes16x8(IRTemp a76543210,IRTemp b76543210)6923 static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6924 // returns a7 a5 a3 a1 b7 b5 b3 b1
6925 return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
6926 }
6927
mk_InterleaveLO16x8(IRTemp a76543210,IRTemp b76543210)6928 static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6929 // returns a3 b3 a2 b2 a1 b1 a0 b0
6930 return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210));
6931 }
6932
mk_InterleaveHI16x8(IRTemp a76543210,IRTemp b76543210)6933 static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6934 // returns a7 b7 a6 b6 a5 b5 a4 b4
6935 return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210));
6936 }
6937
mk_CatEvenLanes8x16(IRTemp aFEDCBA9876543210,IRTemp bFEDCBA9876543210)6938 static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
6939 IRTemp bFEDCBA9876543210 ) {
6940 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
6941 return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210),
6942 mkexpr(bFEDCBA9876543210));
6943 }
6944
mk_CatOddLanes8x16(IRTemp aFEDCBA9876543210,IRTemp bFEDCBA9876543210)6945 static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
6946 IRTemp bFEDCBA9876543210 ) {
6947 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
6948 return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210),
6949 mkexpr(bFEDCBA9876543210));
6950 }
6951
mk_InterleaveLO8x16(IRTemp aFEDCBA9876543210,IRTemp bFEDCBA9876543210)6952 static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
6953 IRTemp bFEDCBA9876543210 ) {
6954 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
6955 return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210),
6956 mkexpr(bFEDCBA9876543210));
6957 }
6958
mk_InterleaveHI8x16(IRTemp aFEDCBA9876543210,IRTemp bFEDCBA9876543210)6959 static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
6960 IRTemp bFEDCBA9876543210 ) {
6961 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
6962 return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210),
6963 mkexpr(bFEDCBA9876543210));
6964 }
6965
6966 /* Generate N copies of |bit| in the bottom of a ULong. */
Replicate(ULong bit,Int N)6967 static ULong Replicate ( ULong bit, Int N )
6968 {
6969 vassert(bit <= 1 && N >= 1 && N < 64);
6970 if (bit == 0) {
6971 return 0;
6972 } else {
6973 /* Careful. This won't work for N == 64. */
6974 return (1ULL << N) - 1;
6975 }
6976 }
6977
Replicate32x2(ULong bits32)6978 static ULong Replicate32x2 ( ULong bits32 )
6979 {
6980 vassert(0 == (bits32 & ~0xFFFFFFFFULL));
6981 return (bits32 << 32) | bits32;
6982 }
6983
Replicate16x4(ULong bits16)6984 static ULong Replicate16x4 ( ULong bits16 )
6985 {
6986 vassert(0 == (bits16 & ~0xFFFFULL));
6987 return Replicate32x2((bits16 << 16) | bits16);
6988 }
6989
Replicate8x8(ULong bits8)6990 static ULong Replicate8x8 ( ULong bits8 )
6991 {
6992 vassert(0 == (bits8 & ~0xFFULL));
6993 return Replicate16x4((bits8 << 8) | bits8);
6994 }
6995
6996 /* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
6997 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
6998 is 64. In the former case, the upper 32 bits of the returned value
6999 are guaranteed to be zero. */
VFPExpandImm(ULong imm8,Int N)7000 static ULong VFPExpandImm ( ULong imm8, Int N )
7001 {
7002 vassert(imm8 <= 0xFF);
7003 vassert(N == 32 || N == 64);
7004 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
7005 Int F = N - E - 1;
7006 ULong imm8_6 = (imm8 >> 6) & 1;
7007 /* sign: 1 bit */
7008 /* exp: E bits */
7009 /* frac: F bits */
7010 ULong sign = (imm8 >> 7) & 1;
7011 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
7012 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
7013 vassert(sign < (1ULL << 1));
7014 vassert(exp < (1ULL << E));
7015 vassert(frac < (1ULL << F));
7016 vassert(1 + E + F == N);
7017 ULong res = (sign << (E+F)) | (exp << F) | frac;
7018 return res;
7019 }
7020
7021 /* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
7022 This might fail, as indicated by the returned Bool. Page 2530 of
7023 the manual. */
AdvSIMDExpandImm(ULong * res,UInt op,UInt cmode,UInt imm8)7024 static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
7025 UInt op, UInt cmode, UInt imm8 )
7026 {
7027 vassert(op <= 1);
7028 vassert(cmode <= 15);
7029 vassert(imm8 <= 255);
7030
7031 *res = 0; /* will overwrite iff returning True */
7032
7033 ULong imm64 = 0;
7034 Bool testimm8 = False;
7035
7036 switch (cmode >> 1) {
7037 case 0:
7038 testimm8 = False; imm64 = Replicate32x2(imm8); break;
7039 case 1:
7040 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
7041 case 2:
7042 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
7043 case 3:
7044 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
7045 case 4:
7046 testimm8 = False; imm64 = Replicate16x4(imm8); break;
7047 case 5:
7048 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
7049 case 6:
7050 testimm8 = True;
7051 if ((cmode & 1) == 0)
7052 imm64 = Replicate32x2((imm8 << 8) | 0xFF);
7053 else
7054 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
7055 break;
7056 case 7:
7057 testimm8 = False;
7058 if ((cmode & 1) == 0 && op == 0)
7059 imm64 = Replicate8x8(imm8);
7060 if ((cmode & 1) == 0 && op == 1) {
7061 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
7062 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
7063 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
7064 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
7065 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
7066 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
7067 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
7068 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
7069 }
7070 if ((cmode & 1) == 1 && op == 0) {
7071 ULong imm8_7 = (imm8 >> 7) & 1;
7072 ULong imm8_6 = (imm8 >> 6) & 1;
7073 ULong imm8_50 = imm8 & 63;
7074 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19))
7075 | ((imm8_6 ^ 1) << (5 + 6 + 19))
7076 | (Replicate(imm8_6, 5) << (6 + 19))
7077 | (imm8_50 << 19);
7078 imm64 = Replicate32x2(imm32);
7079 }
7080 if ((cmode & 1) == 1 && op == 1) {
7081 // imm64 = imm8<7>:NOT(imm8<6>)
7082 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
7083 ULong imm8_7 = (imm8 >> 7) & 1;
7084 ULong imm8_6 = (imm8 >> 6) & 1;
7085 ULong imm8_50 = imm8 & 63;
7086 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
7087 | (Replicate(imm8_6, 8) << 54)
7088 | (imm8_50 << 48);
7089 }
7090 break;
7091 default:
7092 vassert(0);
7093 }
7094
7095 if (testimm8 && imm8 == 0)
7096 return False;
7097
7098 *res = imm64;
7099 return True;
7100 }
7101
7102 /* Help a bit for decoding laneage for vector operations that can be
7103 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
7104 and SZ bits, typically for vector floating point. */
getLaneInfo_Q_SZ(IRType * tyI,IRType * tyF,UInt * nLanes,Bool * zeroUpper,const HChar ** arrSpec,Bool bitQ,Bool bitSZ)7105 static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF,
7106 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
7107 /*OUT*/const HChar** arrSpec,
7108 Bool bitQ, Bool bitSZ )
7109 {
7110 vassert(bitQ == True || bitQ == False);
7111 vassert(bitSZ == True || bitSZ == False);
7112 if (bitQ && bitSZ) { // 2x64
7113 if (tyI) *tyI = Ity_I64;
7114 if (tyF) *tyF = Ity_F64;
7115 if (nLanes) *nLanes = 2;
7116 if (zeroUpper) *zeroUpper = False;
7117 if (arrSpec) *arrSpec = "2d";
7118 return True;
7119 }
7120 if (bitQ && !bitSZ) { // 4x32
7121 if (tyI) *tyI = Ity_I32;
7122 if (tyF) *tyF = Ity_F32;
7123 if (nLanes) *nLanes = 4;
7124 if (zeroUpper) *zeroUpper = False;
7125 if (arrSpec) *arrSpec = "4s";
7126 return True;
7127 }
7128 if (!bitQ && !bitSZ) { // 2x32
7129 if (tyI) *tyI = Ity_I32;
7130 if (tyF) *tyF = Ity_F32;
7131 if (nLanes) *nLanes = 2;
7132 if (zeroUpper) *zeroUpper = True;
7133 if (arrSpec) *arrSpec = "2s";
7134 return True;
7135 }
7136 // Else impliedly 1x64, which isn't allowed.
7137 return False;
7138 }
7139
7140 /* Helper for decoding laneage for shift-style vector operations
7141 that involve an immediate shift amount. */
getLaneInfo_IMMH_IMMB(UInt * shift,UInt * szBlg2,UInt immh,UInt immb)7142 static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
7143 UInt immh, UInt immb )
7144 {
7145 vassert(immh < (1<<4));
7146 vassert(immb < (1<<3));
7147 UInt immhb = (immh << 3) | immb;
7148 if (immh & 8) {
7149 if (shift) *shift = 128 - immhb;
7150 if (szBlg2) *szBlg2 = 3;
7151 return True;
7152 }
7153 if (immh & 4) {
7154 if (shift) *shift = 64 - immhb;
7155 if (szBlg2) *szBlg2 = 2;
7156 return True;
7157 }
7158 if (immh & 2) {
7159 if (shift) *shift = 32 - immhb;
7160 if (szBlg2) *szBlg2 = 1;
7161 return True;
7162 }
7163 if (immh & 1) {
7164 if (shift) *shift = 16 - immhb;
7165 if (szBlg2) *szBlg2 = 0;
7166 return True;
7167 }
7168 return False;
7169 }
7170
7171 /* Generate IR to fold all lanes of the V128 value in 'src' as
7172 characterised by the operator 'op', and return the result in the
7173 bottom bits of a V128, with all other bits set to zero. */
math_FOLDV(IRTemp src,IROp op)7174 static IRTemp math_FOLDV ( IRTemp src, IROp op )
7175 {
7176 /* The basic idea is to use repeated applications of Iop_CatEven*
7177 and Iop_CatOdd* operators to 'src' so as to clone each lane into
7178 a complete vector. Then fold all those vectors with 'op' and
7179 zero out all but the least significant lane. */
7180 switch (op) {
7181 case Iop_Min8Sx16: case Iop_Min8Ux16:
7182 case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: {
7183 /* NB: temp naming here is misleading -- the naming is for 8
7184 lanes of 16 bit, whereas what is being operated on is 16
7185 lanes of 8 bits. */
7186 IRTemp x76543210 = src;
7187 IRTemp x76547654 = newTempV128();
7188 IRTemp x32103210 = newTempV128();
7189 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7190 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
7191 IRTemp x76767676 = newTempV128();
7192 IRTemp x54545454 = newTempV128();
7193 IRTemp x32323232 = newTempV128();
7194 IRTemp x10101010 = newTempV128();
7195 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7196 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7197 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7198 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
7199 IRTemp x77777777 = newTempV128();
7200 IRTemp x66666666 = newTempV128();
7201 IRTemp x55555555 = newTempV128();
7202 IRTemp x44444444 = newTempV128();
7203 IRTemp x33333333 = newTempV128();
7204 IRTemp x22222222 = newTempV128();
7205 IRTemp x11111111 = newTempV128();
7206 IRTemp x00000000 = newTempV128();
7207 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7208 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7209 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7210 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7211 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7212 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7213 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7214 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7215 /* Naming not misleading after here. */
7216 IRTemp xAllF = newTempV128();
7217 IRTemp xAllE = newTempV128();
7218 IRTemp xAllD = newTempV128();
7219 IRTemp xAllC = newTempV128();
7220 IRTemp xAllB = newTempV128();
7221 IRTemp xAllA = newTempV128();
7222 IRTemp xAll9 = newTempV128();
7223 IRTemp xAll8 = newTempV128();
7224 IRTemp xAll7 = newTempV128();
7225 IRTemp xAll6 = newTempV128();
7226 IRTemp xAll5 = newTempV128();
7227 IRTemp xAll4 = newTempV128();
7228 IRTemp xAll3 = newTempV128();
7229 IRTemp xAll2 = newTempV128();
7230 IRTemp xAll1 = newTempV128();
7231 IRTemp xAll0 = newTempV128();
7232 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
7233 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
7234 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
7235 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
7236 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
7237 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
7238 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
7239 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
7240 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
7241 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
7242 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
7243 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
7244 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
7245 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
7246 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
7247 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
7248 IRTemp maxFE = newTempV128();
7249 IRTemp maxDC = newTempV128();
7250 IRTemp maxBA = newTempV128();
7251 IRTemp max98 = newTempV128();
7252 IRTemp max76 = newTempV128();
7253 IRTemp max54 = newTempV128();
7254 IRTemp max32 = newTempV128();
7255 IRTemp max10 = newTempV128();
7256 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
7257 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
7258 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
7259 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
7260 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
7261 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
7262 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
7263 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
7264 IRTemp maxFEDC = newTempV128();
7265 IRTemp maxBA98 = newTempV128();
7266 IRTemp max7654 = newTempV128();
7267 IRTemp max3210 = newTempV128();
7268 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
7269 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
7270 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7271 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7272 IRTemp maxFEDCBA98 = newTempV128();
7273 IRTemp max76543210 = newTempV128();
7274 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
7275 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
7276 IRTemp maxAllLanes = newTempV128();
7277 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
7278 mkexpr(max76543210)));
7279 IRTemp res = newTempV128();
7280 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
7281 return res;
7282 }
7283 case Iop_Min16Sx8: case Iop_Min16Ux8:
7284 case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: {
7285 IRTemp x76543210 = src;
7286 IRTemp x76547654 = newTempV128();
7287 IRTemp x32103210 = newTempV128();
7288 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7289 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
7290 IRTemp x76767676 = newTempV128();
7291 IRTemp x54545454 = newTempV128();
7292 IRTemp x32323232 = newTempV128();
7293 IRTemp x10101010 = newTempV128();
7294 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7295 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7296 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7297 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
7298 IRTemp x77777777 = newTempV128();
7299 IRTemp x66666666 = newTempV128();
7300 IRTemp x55555555 = newTempV128();
7301 IRTemp x44444444 = newTempV128();
7302 IRTemp x33333333 = newTempV128();
7303 IRTemp x22222222 = newTempV128();
7304 IRTemp x11111111 = newTempV128();
7305 IRTemp x00000000 = newTempV128();
7306 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7307 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7308 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7309 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7310 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7311 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7312 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7313 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7314 IRTemp max76 = newTempV128();
7315 IRTemp max54 = newTempV128();
7316 IRTemp max32 = newTempV128();
7317 IRTemp max10 = newTempV128();
7318 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
7319 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
7320 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
7321 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
7322 IRTemp max7654 = newTempV128();
7323 IRTemp max3210 = newTempV128();
7324 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7325 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7326 IRTemp max76543210 = newTempV128();
7327 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
7328 IRTemp res = newTempV128();
7329 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
7330 return res;
7331 }
7332 case Iop_Max32Fx4: case Iop_Min32Fx4:
7333 case Iop_Min32Sx4: case Iop_Min32Ux4:
7334 case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
7335 IRTemp x3210 = src;
7336 IRTemp x3232 = newTempV128();
7337 IRTemp x1010 = newTempV128();
7338 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
7339 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
7340 IRTemp x3333 = newTempV128();
7341 IRTemp x2222 = newTempV128();
7342 IRTemp x1111 = newTempV128();
7343 IRTemp x0000 = newTempV128();
7344 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
7345 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
7346 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
7347 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
7348 IRTemp max32 = newTempV128();
7349 IRTemp max10 = newTempV128();
7350 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
7351 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
7352 IRTemp max3210 = newTempV128();
7353 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7354 IRTemp res = newTempV128();
7355 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
7356 return res;
7357 }
7358 case Iop_Add64x2: {
7359 IRTemp x10 = src;
7360 IRTemp x00 = newTempV128();
7361 IRTemp x11 = newTempV128();
7362 assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10)));
7363 assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10)));
7364 IRTemp max10 = newTempV128();
7365 assign(max10, binop(op, mkexpr(x11), mkexpr(x00)));
7366 IRTemp res = newTempV128();
7367 assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10)));
7368 return res;
7369 }
7370 default:
7371 vassert(0);
7372 }
7373 }
7374
7375
7376 /* Generate IR for TBL and TBX. This deals with the 128 bit case
7377 only. */
math_TBL_TBX(IRTemp tab[4],UInt len,IRTemp src,IRTemp oor_values)7378 static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
7379 IRTemp oor_values )
7380 {
7381 vassert(len >= 0 && len <= 3);
7382
7383 /* Generate some useful constants as concisely as possible. */
7384 IRTemp half15 = newTemp(Ity_I64);
7385 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
7386 IRTemp half16 = newTemp(Ity_I64);
7387 assign(half16, mkU64(0x1010101010101010ULL));
7388
7389 /* A zero vector */
7390 IRTemp allZero = newTempV128();
7391 assign(allZero, mkV128(0x0000));
7392 /* A vector containing 15 in each 8-bit lane */
7393 IRTemp all15 = newTempV128();
7394 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
7395 /* A vector containing 16 in each 8-bit lane */
7396 IRTemp all16 = newTempV128();
7397 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
7398 /* A vector containing 32 in each 8-bit lane */
7399 IRTemp all32 = newTempV128();
7400 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
7401 /* A vector containing 48 in each 8-bit lane */
7402 IRTemp all48 = newTempV128();
7403 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
7404 /* A vector containing 64 in each 8-bit lane */
7405 IRTemp all64 = newTempV128();
7406 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
7407
7408 /* Group the 16/32/48/64 vectors so as to be indexable. */
7409 IRTemp allXX[4] = { all16, all32, all48, all64 };
7410
7411 /* Compute the result for each table vector, with zeroes in places
7412 where the index values are out of range, and OR them into the
7413 running vector. */
7414 IRTemp running_result = newTempV128();
7415 assign(running_result, mkV128(0));
7416
7417 UInt tabent;
7418 for (tabent = 0; tabent <= len; tabent++) {
7419 vassert(tabent >= 0 && tabent < 4);
7420 IRTemp bias = newTempV128();
7421 assign(bias,
7422 mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
7423 IRTemp biased_indices = newTempV128();
7424 assign(biased_indices,
7425 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
7426 IRTemp valid_mask = newTempV128();
7427 assign(valid_mask,
7428 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
7429 IRTemp safe_biased_indices = newTempV128();
7430 assign(safe_biased_indices,
7431 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
7432 IRTemp results_or_junk = newTempV128();
7433 assign(results_or_junk,
7434 binop(Iop_Perm8x16, mkexpr(tab[tabent]),
7435 mkexpr(safe_biased_indices)));
7436 IRTemp results_or_zero = newTempV128();
7437 assign(results_or_zero,
7438 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
7439 /* And OR that into the running result. */
7440 IRTemp tmp = newTempV128();
7441 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
7442 mkexpr(running_result)));
7443 running_result = tmp;
7444 }
7445
7446 /* So now running_result holds the overall result where the indices
7447 are in range, and zero in out-of-range lanes. Now we need to
7448 compute an overall validity mask and use this to copy in the
7449 lanes in the oor_values for out of range indices. This is
7450 unnecessary for TBL but will get folded out by iropt, so we lean
7451 on that and generate the same code for TBL and TBX here. */
7452 IRTemp overall_valid_mask = newTempV128();
7453 assign(overall_valid_mask,
7454 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
7455 IRTemp result = newTempV128();
7456 assign(result,
7457 binop(Iop_OrV128,
7458 mkexpr(running_result),
7459 binop(Iop_AndV128,
7460 mkexpr(oor_values),
7461 unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
7462 return result;
7463 }
7464
7465
7466 /* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
7467 an op which takes two I64s and produces a V128. That is, a widening
7468 operator. Generate IR which applies |opI64x2toV128| to either the
7469 lower (if |is2| is False) or upper (if |is2| is True) halves of
7470 |argL| and |argR|, and return the value in a new IRTemp.
7471 */
7472 static
math_BINARY_WIDENING_V128(Bool is2,IROp opI64x2toV128,IRExpr * argL,IRExpr * argR)7473 IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
7474 IRExpr* argL, IRExpr* argR )
7475 {
7476 IRTemp res = newTempV128();
7477 IROp slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
7478 assign(res, binop(opI64x2toV128, unop(slice, argL),
7479 unop(slice, argR)));
7480 return res;
7481 }
7482
7483
7484 /* Generate signed/unsigned absolute difference vector IR. */
7485 static
math_ABD(Bool isU,UInt size,IRExpr * argLE,IRExpr * argRE)7486 IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE )
7487 {
7488 vassert(size <= 3);
7489 IRTemp argL = newTempV128();
7490 IRTemp argR = newTempV128();
7491 IRTemp msk = newTempV128();
7492 IRTemp res = newTempV128();
7493 assign(argL, argLE);
7494 assign(argR, argRE);
7495 assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size),
7496 mkexpr(argL), mkexpr(argR)));
7497 assign(res,
7498 binop(Iop_OrV128,
7499 binop(Iop_AndV128,
7500 binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)),
7501 mkexpr(msk)),
7502 binop(Iop_AndV128,
7503 binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)),
7504 unop(Iop_NotV128, mkexpr(msk)))));
7505 return res;
7506 }
7507
7508
7509 /* Generate IR that takes a V128 and sign- or zero-widens
7510 either the lower or upper set of lanes to twice-as-wide,
7511 resulting in a new V128 value. */
7512 static
math_WIDEN_LO_OR_HI_LANES(Bool zWiden,Bool fromUpperHalf,UInt sizeNarrow,IRExpr * srcE)7513 IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf,
7514 UInt sizeNarrow, IRExpr* srcE )
7515 {
7516 IRTemp src = newTempV128();
7517 IRTemp res = newTempV128();
7518 assign(src, srcE);
7519 switch (sizeNarrow) {
7520 case X10:
7521 assign(res,
7522 binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2,
7523 binop(fromUpperHalf ? Iop_InterleaveHI32x4
7524 : Iop_InterleaveLO32x4,
7525 mkexpr(src),
7526 mkexpr(src)),
7527 mkU8(32)));
7528 break;
7529 case X01:
7530 assign(res,
7531 binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4,
7532 binop(fromUpperHalf ? Iop_InterleaveHI16x8
7533 : Iop_InterleaveLO16x8,
7534 mkexpr(src),
7535 mkexpr(src)),
7536 mkU8(16)));
7537 break;
7538 case X00:
7539 assign(res,
7540 binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8,
7541 binop(fromUpperHalf ? Iop_InterleaveHI8x16
7542 : Iop_InterleaveLO8x16,
7543 mkexpr(src),
7544 mkexpr(src)),
7545 mkU8(8)));
7546 break;
7547 default:
7548 vassert(0);
7549 }
7550 return res;
7551 }
7552
7553
7554 /* Generate IR that takes a V128 and sign- or zero-widens
7555 either the even or odd lanes to twice-as-wide,
7556 resulting in a new V128 value. */
7557 static
math_WIDEN_EVEN_OR_ODD_LANES(Bool zWiden,Bool fromOdd,UInt sizeNarrow,IRExpr * srcE)7558 IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd,
7559 UInt sizeNarrow, IRExpr* srcE )
7560 {
7561 IRTemp src = newTempV128();
7562 IRTemp res = newTempV128();
7563 IROp opSAR = mkVecSARN(sizeNarrow+1);
7564 IROp opSHR = mkVecSHRN(sizeNarrow+1);
7565 IROp opSHL = mkVecSHLN(sizeNarrow+1);
7566 IROp opSxR = zWiden ? opSHR : opSAR;
7567 UInt amt = 0;
7568 switch (sizeNarrow) {
7569 case X10: amt = 32; break;
7570 case X01: amt = 16; break;
7571 case X00: amt = 8; break;
7572 default: vassert(0);
7573 }
7574 assign(src, srcE);
7575 if (fromOdd) {
7576 assign(res, binop(opSxR, mkexpr(src), mkU8(amt)));
7577 } else {
7578 assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)),
7579 mkU8(amt)));
7580 }
7581 return res;
7582 }
7583
7584
7585 /* Generate IR that takes two V128s and narrows (takes lower half)
7586 of each lane, producing a single V128 value. */
7587 static
math_NARROW_LANES(IRTemp argHi,IRTemp argLo,UInt sizeNarrow)7588 IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow )
7589 {
7590 IRTemp res = newTempV128();
7591 assign(res, binop(mkVecCATEVENLANES(sizeNarrow),
7592 mkexpr(argHi), mkexpr(argLo)));
7593 return res;
7594 }
7595
7596
7597 /* Return a temp which holds the vector dup of the lane of width
7598 (1 << size) obtained from src[laneNo]. */
7599 static
math_DUP_VEC_ELEM(IRExpr * src,UInt size,UInt laneNo)7600 IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo )
7601 {
7602 vassert(size <= 3);
7603 /* Normalise |laneNo| so it is of the form
7604 x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
7605 This puts the bits we want to inspect at constant offsets
7606 regardless of the value of |size|.
7607 */
7608 UInt ix = laneNo << size;
7609 vassert(ix <= 15);
7610 IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID };
7611 switch (size) {
7612 case 0: /* B */
7613 ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16;
7614 /* fallthrough */
7615 case 1: /* H */
7616 ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8;
7617 /* fallthrough */
7618 case 2: /* S */
7619 ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4;
7620 /* fallthrough */
7621 case 3: /* D */
7622 ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2;
7623 break;
7624 default:
7625 vassert(0);
7626 }
7627 IRTemp res = newTempV128();
7628 assign(res, src);
7629 Int i;
7630 for (i = 3; i >= 0; i--) {
7631 if (ops[i] == Iop_INVALID)
7632 break;
7633 IRTemp tmp = newTempV128();
7634 assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res)));
7635 res = tmp;
7636 }
7637 return res;
7638 }
7639
7640
7641 /* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size
7642 selector encoded as shown below. Return a new V128 holding the
7643 selected lane from |srcV| dup'd out to V128, and also return the
7644 lane number, log2 of the lane size in bytes, and width-character via
7645 *laneNo, *laneSzLg2 and *laneCh respectively. It may be that imm5
7646 is an invalid selector, in which case return
7647 IRTemp_INVALID, 0, 0 and '?' respectively.
7648
7649 imm5 = xxxx1 signifies .b[xxxx]
7650 = xxx10 .h[xxx]
7651 = xx100 .s[xx]
7652 = x1000 .d[x]
7653 otherwise invalid
7654 */
7655 static
handle_DUP_VEC_ELEM(UInt * laneNo,UInt * laneSzLg2,HChar * laneCh,IRExpr * srcV,UInt imm5)7656 IRTemp handle_DUP_VEC_ELEM ( /*OUT*/UInt* laneNo,
7657 /*OUT*/UInt* laneSzLg2, /*OUT*/HChar* laneCh,
7658 IRExpr* srcV, UInt imm5 )
7659 {
7660 *laneNo = 0;
7661 *laneSzLg2 = 0;
7662 *laneCh = '?';
7663
7664 if (imm5 & 1) {
7665 *laneNo = (imm5 >> 1) & 15;
7666 *laneSzLg2 = 0;
7667 *laneCh = 'b';
7668 }
7669 else if (imm5 & 2) {
7670 *laneNo = (imm5 >> 2) & 7;
7671 *laneSzLg2 = 1;
7672 *laneCh = 'h';
7673 }
7674 else if (imm5 & 4) {
7675 *laneNo = (imm5 >> 3) & 3;
7676 *laneSzLg2 = 2;
7677 *laneCh = 's';
7678 }
7679 else if (imm5 & 8) {
7680 *laneNo = (imm5 >> 4) & 1;
7681 *laneSzLg2 = 3;
7682 *laneCh = 'd';
7683 }
7684 else {
7685 /* invalid */
7686 return IRTemp_INVALID;
7687 }
7688
7689 return math_DUP_VEC_ELEM(srcV, *laneSzLg2, *laneNo);
7690 }
7691
7692
7693 /* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */
7694 static
math_VEC_DUP_IMM(UInt size,ULong imm)7695 IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm )
7696 {
7697 IRType ty = Ity_INVALID;
7698 IRTemp rcS = IRTemp_INVALID;
7699 switch (size) {
7700 case X01:
7701 vassert(imm <= 0xFFFFULL);
7702 ty = Ity_I16;
7703 rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm ));
7704 break;
7705 case X10:
7706 vassert(imm <= 0xFFFFFFFFULL);
7707 ty = Ity_I32;
7708 rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm ));
7709 break;
7710 case X11:
7711 ty = Ity_I64;
7712 rcS = newTemp(ty); assign(rcS, mkU64(imm)); break;
7713 default:
7714 vassert(0);
7715 }
7716 IRTemp rcV = math_DUP_TO_V128(rcS, ty);
7717 return rcV;
7718 }
7719
7720
7721 /* Let |new64| be a V128 in which only the lower 64 bits are interesting,
7722 and the upper can contain any value -- it is ignored. If |is2| is False,
7723 generate IR to put |new64| in the lower half of vector reg |dd| and zero
7724 the upper half. If |is2| is True, generate IR to put |new64| in the upper
7725 half of vector reg |dd| and leave the lower half unchanged. This
7726 simulates the behaviour of the "foo/foo2" instructions in which the
7727 destination is half the width of sources, for example addhn/addhn2.
7728 */
7729 static
putLO64andZUorPutHI64(Bool is2,UInt dd,IRTemp new64)7730 void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 )
7731 {
7732 if (is2) {
7733 /* Get the old contents of Vdd, zero the upper half, and replace
7734 it with 'x'. */
7735 IRTemp t_zero_oldLO = newTempV128();
7736 assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
7737 IRTemp t_newHI_zero = newTempV128();
7738 assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64),
7739 mkV128(0x0000)));
7740 IRTemp res = newTempV128();
7741 assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO),
7742 mkexpr(t_newHI_zero)));
7743 putQReg128(dd, mkexpr(res));
7744 } else {
7745 /* This is simple. */
7746 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64)));
7747 }
7748 }
7749
7750
7751 /* Compute vector SQABS at lane size |size| for |srcE|, returning
7752 the q result in |*qabs| and the normal result in |*nabs|. */
7753 static
math_SQABS(IRTemp * qabs,IRTemp * nabs,IRExpr * srcE,UInt size)7754 void math_SQABS ( /*OUT*/IRTemp* qabs, /*OUT*/IRTemp* nabs,
7755 IRExpr* srcE, UInt size )
7756 {
7757 IRTemp src, mask, maskn, nsub, qsub;
7758 src = mask = maskn = nsub = qsub = IRTemp_INVALID;
7759 newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs);
7760 assign(src, srcE);
7761 assign(mask, binop(mkVecCMPGTS(size), mkV128(0x0000), mkexpr(src)));
7762 assign(maskn, unop(Iop_NotV128, mkexpr(mask)));
7763 assign(nsub, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
7764 assign(qsub, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
7765 assign(*nabs, binop(Iop_OrV128,
7766 binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)),
7767 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
7768 assign(*qabs, binop(Iop_OrV128,
7769 binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)),
7770 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
7771 }
7772
7773
7774 /* Compute vector SQNEG at lane size |size| for |srcE|, returning
7775 the q result in |*qneg| and the normal result in |*nneg|. */
7776 static
math_SQNEG(IRTemp * qneg,IRTemp * nneg,IRExpr * srcE,UInt size)7777 void math_SQNEG ( /*OUT*/IRTemp* qneg, /*OUT*/IRTemp* nneg,
7778 IRExpr* srcE, UInt size )
7779 {
7780 IRTemp src = IRTemp_INVALID;
7781 newTempsV128_3(&src, nneg, qneg);
7782 assign(src, srcE);
7783 assign(*nneg, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
7784 assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
7785 }
7786
7787
7788 /* Zero all except the least significant lane of |srcE|, where |size|
7789 indicates the lane size in the usual way. */
math_ZERO_ALL_EXCEPT_LOWEST_LANE(UInt size,IRExpr * srcE)7790 static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
7791 {
7792 vassert(size < 4);
7793 IRTemp t = newTempV128();
7794 assign(t, unop(mkVecZEROHIxxOFV128(size), srcE));
7795 return t;
7796 }
7797
7798
7799 /* Generate IR to compute vector widening MULL from either the lower
7800 (is2==False) or upper (is2==True) halves of vecN and vecM. The
7801 widening multiplies are unsigned when isU==True and signed when
7802 isU==False. |size| is the narrow lane size indication. Optionally,
7803 the product may be added to or subtracted from vecD, at the wide lane
7804 size. This happens when |mas| is 'a' (add) or 's' (sub). When |mas|
7805 is 'm' (only multiply) then the accumulate part does not happen, and
7806 |vecD| is expected to == IRTemp_INVALID.
7807
7808 Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
7809 are allowed. The result is returned in a new IRTemp, which is
7810 returned in *res. */
7811 static
math_MULL_ACC(IRTemp * res,Bool is2,Bool isU,UInt size,HChar mas,IRTemp vecN,IRTemp vecM,IRTemp vecD)7812 void math_MULL_ACC ( /*OUT*/IRTemp* res,
7813 Bool is2, Bool isU, UInt size, HChar mas,
7814 IRTemp vecN, IRTemp vecM, IRTemp vecD )
7815 {
7816 vassert(res && *res == IRTemp_INVALID);
7817 vassert(size <= 2);
7818 vassert(mas == 'm' || mas == 'a' || mas == 's');
7819 if (mas == 'm') vassert(vecD == IRTemp_INVALID);
7820 IROp mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size);
7821 IROp accOp = (mas == 'a') ? mkVecADD(size+1)
7822 : (mas == 's' ? mkVecSUB(size+1)
7823 : Iop_INVALID);
7824 IRTemp mul = math_BINARY_WIDENING_V128(is2, mulOp,
7825 mkexpr(vecN), mkexpr(vecM));
7826 *res = newTempV128();
7827 assign(*res, mas == 'm' ? mkexpr(mul)
7828 : binop(accOp, mkexpr(vecD), mkexpr(mul)));
7829 }
7830
7831
7832 /* Same as math_MULL_ACC, except the multiply is signed widening,
7833 the multiplied value is then doubled, before being added to or
7834 subtracted from the accumulated value. And everything is
7835 saturated. In all cases, saturation residuals are returned
7836 via (sat1q, sat1n), and in the accumulate cases,
7837 via (sat2q, sat2n) too. All results are returned in new temporaries.
7838 In the no-accumulate case, *sat2q and *sat2n are never instantiated,
7839 so the caller can tell this has happened. */
7840 static
math_SQDMULL_ACC(IRTemp * res,IRTemp * sat1q,IRTemp * sat1n,IRTemp * sat2q,IRTemp * sat2n,Bool is2,UInt size,HChar mas,IRTemp vecN,IRTemp vecM,IRTemp vecD)7841 void math_SQDMULL_ACC ( /*OUT*/IRTemp* res,
7842 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
7843 /*OUT*/IRTemp* sat2q, /*OUT*/IRTemp* sat2n,
7844 Bool is2, UInt size, HChar mas,
7845 IRTemp vecN, IRTemp vecM, IRTemp vecD )
7846 {
7847 vassert(size <= 2);
7848 vassert(mas == 'm' || mas == 'a' || mas == 's');
7849 /* Compute
7850 sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2
7851 sat1n = vecN.D[is2] *s vecM.d[is2] * 2
7852 IOW take either the low or high halves of vecN and vecM, signed widen,
7853 multiply, double that, and signedly saturate. Also compute the same
7854 but without saturation.
7855 */
7856 vassert(sat2q && *sat2q == IRTemp_INVALID);
7857 vassert(sat2n && *sat2n == IRTemp_INVALID);
7858 newTempsV128_3(sat1q, sat1n, res);
7859 IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size),
7860 mkexpr(vecN), mkexpr(vecM));
7861 IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size),
7862 mkexpr(vecN), mkexpr(vecM));
7863 assign(*sat1q, mkexpr(tq));
7864 assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn)));
7865
7866 /* If there is no accumulation, the final result is sat1q,
7867 and there's no assignment to sat2q or sat2n. */
7868 if (mas == 'm') {
7869 assign(*res, mkexpr(*sat1q));
7870 return;
7871 }
7872
7873 /* Compute
7874 sat2q = vecD +sq/-sq sat1q
7875 sat2n = vecD +/- sat1n
7876 result = sat2q
7877 */
7878 newTempsV128_2(sat2q, sat2n);
7879 assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1),
7880 mkexpr(vecD), mkexpr(*sat1q)));
7881 assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1),
7882 mkexpr(vecD), mkexpr(*sat1n)));
7883 assign(*res, mkexpr(*sat2q));
7884 }
7885
7886
7887 /* Generate IR for widening signed vector multiplies. The operands
7888 have their lane width signedly widened, and they are then multiplied
7889 at the wider width, returning results in two new IRTemps. */
7890 static
math_MULLS(IRTemp * resHI,IRTemp * resLO,UInt sizeNarrow,IRTemp argL,IRTemp argR)7891 void math_MULLS ( /*OUT*/IRTemp* resHI, /*OUT*/IRTemp* resLO,
7892 UInt sizeNarrow, IRTemp argL, IRTemp argR )
7893 {
7894 vassert(sizeNarrow <= 2);
7895 newTempsV128_2(resHI, resLO);
7896 IRTemp argLhi = newTemp(Ity_I64);
7897 IRTemp argLlo = newTemp(Ity_I64);
7898 IRTemp argRhi = newTemp(Ity_I64);
7899 IRTemp argRlo = newTemp(Ity_I64);
7900 assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL)));
7901 assign(argLlo, unop(Iop_V128to64, mkexpr(argL)));
7902 assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR)));
7903 assign(argRlo, unop(Iop_V128to64, mkexpr(argR)));
7904 IROp opMulls = mkVecMULLS(sizeNarrow);
7905 assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi)));
7906 assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo)));
7907 }
7908
7909
7910 /* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
7911 double that, possibly add a rounding constant (R variants), and take
7912 the high half. */
7913 static
math_SQDMULH(IRTemp * res,IRTemp * sat1q,IRTemp * sat1n,Bool isR,UInt size,IRTemp vN,IRTemp vM)7914 void math_SQDMULH ( /*OUT*/IRTemp* res,
7915 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
7916 Bool isR, UInt size, IRTemp vN, IRTemp vM )
7917 {
7918 vassert(size == X01 || size == X10); /* s or h only */
7919
7920 newTempsV128_3(res, sat1q, sat1n);
7921
7922 IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID;
7923 math_MULLS(&mullsHI, &mullsLO, size, vN, vM);
7924
7925 IRTemp addWide = mkVecADD(size+1);
7926
7927 if (isR) {
7928 assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM)));
7929
7930 Int rcShift = size == X01 ? 15 : 31;
7931 IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift);
7932 assign(*sat1n,
7933 binop(mkVecCATODDLANES(size),
7934 binop(addWide,
7935 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
7936 mkexpr(roundConst)),
7937 binop(addWide,
7938 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)),
7939 mkexpr(roundConst))));
7940 } else {
7941 assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM)));
7942
7943 assign(*sat1n,
7944 binop(mkVecCATODDLANES(size),
7945 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
7946 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO))));
7947 }
7948
7949 assign(*res, mkexpr(*sat1q));
7950 }
7951
7952
7953 /* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in
7954 a new temp in *res, and the Q difference pair in new temps in
7955 *qDiff1 and *qDiff2 respectively. |nm| denotes which of the
7956 three operations it is. */
7957 static
math_QSHL_IMM(IRTemp * res,IRTemp * qDiff1,IRTemp * qDiff2,IRTemp src,UInt size,UInt shift,const HChar * nm)7958 void math_QSHL_IMM ( /*OUT*/IRTemp* res,
7959 /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2,
7960 IRTemp src, UInt size, UInt shift, const HChar* nm )
7961 {
7962 vassert(size <= 3);
7963 UInt laneBits = 8 << size;
7964 vassert(shift < laneBits);
7965 newTempsV128_3(res, qDiff1, qDiff2);
7966 IRTemp z128 = newTempV128();
7967 assign(z128, mkV128(0x0000));
7968
7969 /* UQSHL */
7970 if (vex_streq(nm, "uqshl")) {
7971 IROp qop = mkVecQSHLNSATUU(size);
7972 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
7973 if (shift == 0) {
7974 /* No shift means no saturation. */
7975 assign(*qDiff1, mkexpr(z128));
7976 assign(*qDiff2, mkexpr(z128));
7977 } else {
7978 /* Saturation has occurred if any of the shifted-out bits are
7979 nonzero. We get the shifted-out bits by right-shifting the
7980 original value. */
7981 UInt rshift = laneBits - shift;
7982 vassert(rshift >= 1 && rshift < laneBits);
7983 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
7984 assign(*qDiff2, mkexpr(z128));
7985 }
7986 return;
7987 }
7988
7989 /* SQSHL */
7990 if (vex_streq(nm, "sqshl")) {
7991 IROp qop = mkVecQSHLNSATSS(size);
7992 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
7993 if (shift == 0) {
7994 /* No shift means no saturation. */
7995 assign(*qDiff1, mkexpr(z128));
7996 assign(*qDiff2, mkexpr(z128));
7997 } else {
7998 /* Saturation has occurred if any of the shifted-out bits are
7999 different from the top bit of the original value. */
8000 UInt rshift = laneBits - 1 - shift;
8001 vassert(rshift >= 0 && rshift < laneBits-1);
8002 /* qDiff1 is the shifted out bits, and the top bit of the original
8003 value, preceded by zeroes. */
8004 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8005 /* qDiff2 is the top bit of the original value, cloned the
8006 correct number of times. */
8007 assign(*qDiff2, binop(mkVecSHRN(size),
8008 binop(mkVecSARN(size), mkexpr(src),
8009 mkU8(laneBits-1)),
8010 mkU8(rshift)));
8011 /* This also succeeds in comparing the top bit of the original
8012 value to itself, which is a bit stupid, but not wrong. */
8013 }
8014 return;
8015 }
8016
8017 /* SQSHLU */
8018 if (vex_streq(nm, "sqshlu")) {
8019 IROp qop = mkVecQSHLNSATSU(size);
8020 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
8021 if (shift == 0) {
8022 /* If there's no shift, saturation depends on the top bit
8023 of the source. */
8024 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1)));
8025 assign(*qDiff2, mkexpr(z128));
8026 } else {
8027 /* Saturation has occurred if any of the shifted-out bits are
8028 nonzero. We get the shifted-out bits by right-shifting the
8029 original value. */
8030 UInt rshift = laneBits - shift;
8031 vassert(rshift >= 1 && rshift < laneBits);
8032 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8033 assign(*qDiff2, mkexpr(z128));
8034 }
8035 return;
8036 }
8037
8038 vassert(0);
8039 }
8040
8041
8042 /* Generate IR to do SRHADD and URHADD. */
8043 static
math_RHADD(UInt size,Bool isU,IRTemp aa,IRTemp bb)8044 IRTemp math_RHADD ( UInt size, Bool isU, IRTemp aa, IRTemp bb )
8045 {
8046 /* Generate this:
8047 (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1)
8048 */
8049 vassert(size <= 3);
8050 IROp opSHR = isU ? mkVecSHRN(size) : mkVecSARN(size);
8051 IROp opADD = mkVecADD(size);
8052 /* The only tricky bit is to generate the correct vector 1 constant. */
8053 const ULong ones64[4]
8054 = { 0x0101010101010101ULL, 0x0001000100010001ULL,
8055 0x0000000100000001ULL, 0x0000000000000001ULL };
8056 IRTemp imm64 = newTemp(Ity_I64);
8057 assign(imm64, mkU64(ones64[size]));
8058 IRTemp vecOne = newTempV128();
8059 assign(vecOne, binop(Iop_64HLtoV128, mkexpr(imm64), mkexpr(imm64)));
8060 IRTemp scaOne = newTemp(Ity_I8);
8061 assign(scaOne, mkU8(1));
8062 IRTemp res = newTempV128();
8063 assign(res,
8064 binop(opADD,
8065 binop(opSHR, mkexpr(aa), mkexpr(scaOne)),
8066 binop(opADD,
8067 binop(opSHR, mkexpr(bb), mkexpr(scaOne)),
8068 binop(opSHR,
8069 binop(opADD,
8070 binop(opADD,
8071 binop(Iop_AndV128, mkexpr(aa),
8072 mkexpr(vecOne)),
8073 binop(Iop_AndV128, mkexpr(bb),
8074 mkexpr(vecOne))
8075 ),
8076 mkexpr(vecOne)
8077 ),
8078 mkexpr(scaOne)
8079 )
8080 )
8081 )
8082 );
8083 return res;
8084 }
8085
8086
8087 /* QCFLAG tracks the SIMD sticky saturation status. Update the status
8088 thusly: if, after application of |opZHI| to both |qres| and |nres|,
8089 they have the same value, leave QCFLAG unchanged. Otherwise, set it
8090 (implicitly) to 1. |opZHI| may only be one of the Iop_ZeroHIxxofV128
8091 operators, or Iop_INVALID, in which case |qres| and |nres| are used
8092 unmodified. The presence |opZHI| means this function can be used to
8093 generate QCFLAG update code for both scalar and vector SIMD operations.
8094 */
8095 static
updateQCFLAGwithDifferenceZHI(IRTemp qres,IRTemp nres,IROp opZHI)8096 void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI )
8097 {
8098 IRTemp diff = newTempV128();
8099 IRTemp oldQCFLAG = newTempV128();
8100 IRTemp newQCFLAG = newTempV128();
8101 if (opZHI == Iop_INVALID) {
8102 assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)));
8103 } else {
8104 vassert(opZHI == Iop_ZeroHI64ofV128
8105 || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128);
8106 assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))));
8107 }
8108 assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128));
8109 assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff)));
8110 stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG)));
8111 }
8112
8113
8114 /* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres|
8115 are used unmodified, hence suitable for QCFLAG updates for whole-vector
8116 operations. */
8117 static
updateQCFLAGwithDifference(IRTemp qres,IRTemp nres)8118 void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres )
8119 {
8120 updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID);
8121 }
8122
8123
8124 /* Generate IR to rearrange two vector values in a way which is useful
8125 for doing S/D add-pair etc operations. There are 3 cases:
8126
8127 2d: [m1 m0] [n1 n0] --> [m1 n1] [m0 n0]
8128
8129 4s: [m3 m2 m1 m0] [n3 n2 n1 n0] --> [m3 m1 n3 n1] [m2 m0 n2 n0]
8130
8131 2s: [m2 m2 m1 m0] [n3 n2 n1 n0] --> [0 0 m1 n1] [0 0 m0 n0]
8132
8133 The cases are distinguished as follows:
8134 isD == True, bitQ == 1 => 2d
8135 isD == False, bitQ == 1 => 4s
8136 isD == False, bitQ == 0 => 2s
8137 */
8138 static
math_REARRANGE_FOR_FLOATING_PAIRWISE(IRTemp * rearrL,IRTemp * rearrR,IRTemp vecM,IRTemp vecN,Bool isD,UInt bitQ)8139 void math_REARRANGE_FOR_FLOATING_PAIRWISE (
8140 /*OUT*/IRTemp* rearrL, /*OUT*/IRTemp* rearrR,
8141 IRTemp vecM, IRTemp vecN, Bool isD, UInt bitQ
8142 )
8143 {
8144 vassert(rearrL && *rearrL == IRTemp_INVALID);
8145 vassert(rearrR && *rearrR == IRTemp_INVALID);
8146 *rearrL = newTempV128();
8147 *rearrR = newTempV128();
8148 if (isD) {
8149 // 2d case
8150 vassert(bitQ == 1);
8151 assign(*rearrL, binop(Iop_InterleaveHI64x2, mkexpr(vecM), mkexpr(vecN)));
8152 assign(*rearrR, binop(Iop_InterleaveLO64x2, mkexpr(vecM), mkexpr(vecN)));
8153 }
8154 else if (!isD && bitQ == 1) {
8155 // 4s case
8156 assign(*rearrL, binop(Iop_CatOddLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8157 assign(*rearrR, binop(Iop_CatEvenLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8158 } else {
8159 // 2s case
8160 vassert(!isD && bitQ == 0);
8161 IRTemp m1n1m0n0 = newTempV128();
8162 IRTemp m0n0m1n1 = newTempV128();
8163 assign(m1n1m0n0, binop(Iop_InterleaveLO32x4,
8164 mkexpr(vecM), mkexpr(vecN)));
8165 assign(m0n0m1n1, triop(Iop_SliceV128,
8166 mkexpr(m1n1m0n0), mkexpr(m1n1m0n0), mkU8(8)));
8167 assign(*rearrL, unop(Iop_ZeroHI64ofV128, mkexpr(m1n1m0n0)));
8168 assign(*rearrR, unop(Iop_ZeroHI64ofV128, mkexpr(m0n0m1n1)));
8169 }
8170 }
8171
8172
8173 /* Returns 2.0 ^ (-n) for n in 1 .. 64 */
two_to_the_minus(Int n)8174 static Double two_to_the_minus ( Int n )
8175 {
8176 if (n == 1) return 0.5;
8177 vassert(n >= 2 && n <= 64);
8178 Int half = n / 2;
8179 return two_to_the_minus(half) * two_to_the_minus(n - half);
8180 }
8181
8182
8183 /* Returns 2.0 ^ n for n in 1 .. 64 */
two_to_the_plus(Int n)8184 static Double two_to_the_plus ( Int n )
8185 {
8186 if (n == 1) return 2.0;
8187 vassert(n >= 2 && n <= 64);
8188 Int half = n / 2;
8189 return two_to_the_plus(half) * two_to_the_plus(n - half);
8190 }
8191
8192
8193 /*------------------------------------------------------------*/
8194 /*--- SIMD and FP instructions ---*/
8195 /*------------------------------------------------------------*/
8196
8197 static
dis_AdvSIMD_EXT(DisResult * dres,UInt insn)8198 Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn)
8199 {
8200 /* 31 29 23 21 20 15 14 10 9 4
8201 0 q 101110 op2 0 m 0 imm4 0 n d
8202 Decode fields: op2
8203 */
8204 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8205 if (INSN(31,31) != 0
8206 || INSN(29,24) != BITS6(1,0,1,1,1,0)
8207 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
8208 return False;
8209 }
8210 UInt bitQ = INSN(30,30);
8211 UInt op2 = INSN(23,22);
8212 UInt mm = INSN(20,16);
8213 UInt imm4 = INSN(14,11);
8214 UInt nn = INSN(9,5);
8215 UInt dd = INSN(4,0);
8216
8217 if (op2 == BITS2(0,0)) {
8218 /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
8219 IRTemp sHi = newTempV128();
8220 IRTemp sLo = newTempV128();
8221 IRTemp res = newTempV128();
8222 assign(sHi, getQReg128(mm));
8223 assign(sLo, getQReg128(nn));
8224 if (bitQ == 1) {
8225 if (imm4 == 0) {
8226 assign(res, mkexpr(sLo));
8227 } else {
8228 vassert(imm4 >= 1 && imm4 <= 15);
8229 assign(res, triop(Iop_SliceV128,
8230 mkexpr(sHi), mkexpr(sLo), mkU8(imm4)));
8231 }
8232 putQReg128(dd, mkexpr(res));
8233 DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
8234 } else {
8235 if (imm4 >= 8) return False;
8236 if (imm4 == 0) {
8237 assign(res, mkexpr(sLo));
8238 } else {
8239 vassert(imm4 >= 1 && imm4 <= 7);
8240 IRTemp hi64lo64 = newTempV128();
8241 assign(hi64lo64, binop(Iop_InterleaveLO64x2,
8242 mkexpr(sHi), mkexpr(sLo)));
8243 assign(res, triop(Iop_SliceV128,
8244 mkexpr(hi64lo64), mkexpr(hi64lo64), mkU8(imm4)));
8245 }
8246 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
8247 DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
8248 }
8249 return True;
8250 }
8251
8252 return False;
8253 # undef INSN
8254 }
8255
8256
8257 static
dis_AdvSIMD_TBL_TBX(DisResult * dres,UInt insn)8258 Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn)
8259 {
8260 /* 31 29 23 21 20 15 14 12 11 9 4
8261 0 q 001110 op2 0 m 0 len op 00 n d
8262 Decode fields: op2,len,op
8263 */
8264 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8265 if (INSN(31,31) != 0
8266 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8267 || INSN(21,21) != 0
8268 || INSN(15,15) != 0
8269 || INSN(11,10) != BITS2(0,0)) {
8270 return False;
8271 }
8272 UInt bitQ = INSN(30,30);
8273 UInt op2 = INSN(23,22);
8274 UInt mm = INSN(20,16);
8275 UInt len = INSN(14,13);
8276 UInt bitOP = INSN(12,12);
8277 UInt nn = INSN(9,5);
8278 UInt dd = INSN(4,0);
8279
8280 if (op2 == X00) {
8281 /* -------- 00,xx,0 TBL, xx register table -------- */
8282 /* -------- 00,xx,1 TBX, xx register table -------- */
8283 /* 31 28 20 15 14 12 9 4
8284 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8285 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8286 where Ta = 16b(q=1) or 8b(q=0)
8287 */
8288 Bool isTBX = bitOP == 1;
8289 /* The out-of-range values to use. */
8290 IRTemp oor_values = newTempV128();
8291 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
8292 /* src value */
8293 IRTemp src = newTempV128();
8294 assign(src, getQReg128(mm));
8295 /* The table values */
8296 IRTemp tab[4];
8297 UInt i;
8298 for (i = 0; i <= len; i++) {
8299 vassert(i < 4);
8300 tab[i] = newTempV128();
8301 assign(tab[i], getQReg128((nn + i) % 32));
8302 }
8303 IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
8304 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8305 const HChar* Ta = bitQ ==1 ? "16b" : "8b";
8306 const HChar* nm = isTBX ? "tbx" : "tbl";
8307 DIP("%s %s.%s, {v%u.16b .. v%u.16b}, %s.%s\n",
8308 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
8309 return True;
8310 }
8311
8312 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8313 return False;
8314 # undef INSN
8315 }
8316
8317
8318 static
dis_AdvSIMD_ZIP_UZP_TRN(DisResult * dres,UInt insn)8319 Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn)
8320 {
8321 /* 31 29 23 21 20 15 14 11 9 4
8322 0 q 001110 size 0 m 0 opcode 10 n d
8323 Decode fields: opcode
8324 */
8325 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8326 if (INSN(31,31) != 0
8327 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8328 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) {
8329 return False;
8330 }
8331 UInt bitQ = INSN(30,30);
8332 UInt size = INSN(23,22);
8333 UInt mm = INSN(20,16);
8334 UInt opcode = INSN(14,12);
8335 UInt nn = INSN(9,5);
8336 UInt dd = INSN(4,0);
8337
8338 if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) {
8339 /* -------- 001 UZP1 std7_std7_std7 -------- */
8340 /* -------- 101 UZP2 std7_std7_std7 -------- */
8341 if (bitQ == 0 && size == X11) return False; // implied 1d case
8342 Bool isUZP1 = opcode == BITS3(0,0,1);
8343 IROp op = isUZP1 ? mkVecCATEVENLANES(size)
8344 : mkVecCATODDLANES(size);
8345 IRTemp preL = newTempV128();
8346 IRTemp preR = newTempV128();
8347 IRTemp res = newTempV128();
8348 if (bitQ == 0) {
8349 assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm),
8350 getQReg128(nn)));
8351 assign(preR, mkexpr(preL));
8352 } else {
8353 assign(preL, getQReg128(mm));
8354 assign(preR, getQReg128(nn));
8355 }
8356 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8357 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8358 const HChar* nm = isUZP1 ? "uzp1" : "uzp2";
8359 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8360 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8361 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8362 return True;
8363 }
8364
8365 if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) {
8366 /* -------- 010 TRN1 std7_std7_std7 -------- */
8367 /* -------- 110 TRN2 std7_std7_std7 -------- */
8368 if (bitQ == 0 && size == X11) return False; // implied 1d case
8369 Bool isTRN1 = opcode == BITS3(0,1,0);
8370 IROp op1 = isTRN1 ? mkVecCATEVENLANES(size)
8371 : mkVecCATODDLANES(size);
8372 IROp op2 = mkVecINTERLEAVEHI(size);
8373 IRTemp srcM = newTempV128();
8374 IRTemp srcN = newTempV128();
8375 IRTemp res = newTempV128();
8376 assign(srcM, getQReg128(mm));
8377 assign(srcN, getQReg128(nn));
8378 assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)),
8379 binop(op1, mkexpr(srcN), mkexpr(srcN))));
8380 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8381 const HChar* nm = isTRN1 ? "trn1" : "trn2";
8382 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8383 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8384 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8385 return True;
8386 }
8387
8388 if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) {
8389 /* -------- 011 ZIP1 std7_std7_std7 -------- */
8390 /* -------- 111 ZIP2 std7_std7_std7 -------- */
8391 if (bitQ == 0 && size == X11) return False; // implied 1d case
8392 Bool isZIP1 = opcode == BITS3(0,1,1);
8393 IROp op = isZIP1 ? mkVecINTERLEAVELO(size)
8394 : mkVecINTERLEAVEHI(size);
8395 IRTemp preL = newTempV128();
8396 IRTemp preR = newTempV128();
8397 IRTemp res = newTempV128();
8398 if (bitQ == 0 && !isZIP1) {
8399 IRTemp z128 = newTempV128();
8400 assign(z128, mkV128(0x0000));
8401 // preL = Vm shifted left 32 bits
8402 // preR = Vn shifted left 32 bits
8403 assign(preL, triop(Iop_SliceV128,
8404 getQReg128(mm), mkexpr(z128), mkU8(12)));
8405 assign(preR, triop(Iop_SliceV128,
8406 getQReg128(nn), mkexpr(z128), mkU8(12)));
8407
8408 } else {
8409 assign(preL, getQReg128(mm));
8410 assign(preR, getQReg128(nn));
8411 }
8412 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8413 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8414 const HChar* nm = isZIP1 ? "zip1" : "zip2";
8415 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8416 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8417 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8418 return True;
8419 }
8420
8421 return False;
8422 # undef INSN
8423 }
8424
8425
8426 static
dis_AdvSIMD_across_lanes(DisResult * dres,UInt insn)8427 Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn)
8428 {
8429 /* 31 28 23 21 16 11 9 4
8430 0 q u 01110 size 11000 opcode 10 n d
8431 Decode fields: u,size,opcode
8432 */
8433 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8434 if (INSN(31,31) != 0
8435 || INSN(28,24) != BITS5(0,1,1,1,0)
8436 || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) {
8437 return False;
8438 }
8439 UInt bitQ = INSN(30,30);
8440 UInt bitU = INSN(29,29);
8441 UInt size = INSN(23,22);
8442 UInt opcode = INSN(16,12);
8443 UInt nn = INSN(9,5);
8444 UInt dd = INSN(4,0);
8445
8446 if (opcode == BITS5(0,0,0,1,1)) {
8447 /* -------- 0,xx,00011 SADDLV -------- */
8448 /* -------- 1,xx,00011 UADDLV -------- */
8449 /* size is the narrow size */
8450 if (size == X11 || (size == X10 && bitQ == 0)) return False;
8451 Bool isU = bitU == 1;
8452 IRTemp src = newTempV128();
8453 assign(src, getQReg128(nn));
8454 /* The basic plan is to widen the lower half, and if Q = 1,
8455 the upper half too. Add them together (if Q = 1), and in
8456 either case fold with add at twice the lane width.
8457 */
8458 IRExpr* widened
8459 = mkexpr(math_WIDEN_LO_OR_HI_LANES(
8460 isU, False/*!fromUpperHalf*/, size, mkexpr(src)));
8461 if (bitQ == 1) {
8462 widened
8463 = binop(mkVecADD(size+1),
8464 widened,
8465 mkexpr(math_WIDEN_LO_OR_HI_LANES(
8466 isU, True/*fromUpperHalf*/, size, mkexpr(src)))
8467 );
8468 }
8469 /* Now fold. */
8470 IRTemp tWi = newTempV128();
8471 assign(tWi, widened);
8472 IRTemp res = math_FOLDV(tWi, mkVecADD(size+1));
8473 putQReg128(dd, mkexpr(res));
8474 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8475 const HChar ch = "bhsd"[size];
8476 DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv",
8477 nameQReg128(dd), ch, nameQReg128(nn), arr);
8478 return True;
8479 }
8480
8481 UInt ix = 0;
8482 /**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; }
8483 else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; }
8484 else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; }
8485 /**/
8486 if (ix != 0) {
8487 /* -------- 0,xx,01010: SMAXV -------- (1) */
8488 /* -------- 1,xx,01010: UMAXV -------- (2) */
8489 /* -------- 0,xx,11010: SMINV -------- (3) */
8490 /* -------- 1,xx,11010: UMINV -------- (4) */
8491 /* -------- 0,xx,11011: ADDV -------- (5) */
8492 vassert(ix >= 1 && ix <= 5);
8493 if (size == X11) return False; // 1d,2d cases not allowed
8494 if (size == X10 && bitQ == 0) return False; // 2s case not allowed
8495 const IROp opMAXS[3]
8496 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
8497 const IROp opMAXU[3]
8498 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
8499 const IROp opMINS[3]
8500 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
8501 const IROp opMINU[3]
8502 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
8503 const IROp opADD[3]
8504 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4 };
8505 vassert(size < 3);
8506 IROp op = Iop_INVALID;
8507 const HChar* nm = NULL;
8508 switch (ix) {
8509 case 1: op = opMAXS[size]; nm = "smaxv"; break;
8510 case 2: op = opMAXU[size]; nm = "umaxv"; break;
8511 case 3: op = opMINS[size]; nm = "sminv"; break;
8512 case 4: op = opMINU[size]; nm = "uminv"; break;
8513 case 5: op = opADD[size]; nm = "addv"; break;
8514 default: vassert(0);
8515 }
8516 vassert(op != Iop_INVALID && nm != NULL);
8517 IRTemp tN1 = newTempV128();
8518 assign(tN1, getQReg128(nn));
8519 /* If Q == 0, we're just folding lanes in the lower half of
8520 the value. In which case, copy the lower half of the
8521 source into the upper half, so we can then treat it the
8522 same as the full width case. Except for the addition case,
8523 in which we have to zero out the upper half. */
8524 IRTemp tN2 = newTempV128();
8525 assign(tN2, bitQ == 0
8526 ? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1))
8527 : mk_CatEvenLanes64x2(tN1,tN1))
8528 : mkexpr(tN1));
8529 IRTemp res = math_FOLDV(tN2, op);
8530 if (res == IRTemp_INVALID)
8531 return False; /* means math_FOLDV
8532 doesn't handle this case yet */
8533 putQReg128(dd, mkexpr(res));
8534 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
8535 IRType laneTy = tys[size];
8536 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8537 DIP("%s %s, %s.%s\n", nm,
8538 nameQRegLO(dd, laneTy), nameQReg128(nn), arr);
8539 return True;
8540 }
8541
8542 if ((size == X00 || size == X10)
8543 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
8544 /* -------- 0,00,01100: FMAXMNV s_4s -------- */
8545 /* -------- 0,10,01100: FMINMNV s_4s -------- */
8546 /* -------- 1,00,01111: FMAXV s_4s -------- */
8547 /* -------- 1,10,01111: FMINV s_4s -------- */
8548 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
8549 if (bitQ == 0) return False; // Only 4s is allowed
8550 Bool isMIN = (size & 2) == 2;
8551 Bool isNM = opcode == BITS5(0,1,1,0,0);
8552 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(2);
8553 IRTemp src = newTempV128();
8554 assign(src, getQReg128(nn));
8555 IRTemp res = math_FOLDV(src, opMXX);
8556 putQReg128(dd, mkexpr(res));
8557 DIP("%s%sv s%u, %u.4s\n",
8558 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", dd, nn);
8559 return True;
8560 }
8561
8562 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8563 return False;
8564 # undef INSN
8565 }
8566
8567
8568 static
dis_AdvSIMD_copy(DisResult * dres,UInt insn)8569 Bool dis_AdvSIMD_copy(/*MB_OUT*/DisResult* dres, UInt insn)
8570 {
8571 /* 31 28 20 15 14 10 9 4
8572 0 q op 01110000 imm5 0 imm4 1 n d
8573 Decode fields: q,op,imm4
8574 */
8575 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8576 if (INSN(31,31) != 0
8577 || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
8578 || INSN(15,15) != 0 || INSN(10,10) != 1) {
8579 return False;
8580 }
8581 UInt bitQ = INSN(30,30);
8582 UInt bitOP = INSN(29,29);
8583 UInt imm5 = INSN(20,16);
8584 UInt imm4 = INSN(14,11);
8585 UInt nn = INSN(9,5);
8586 UInt dd = INSN(4,0);
8587
8588 /* -------- x,0,0000: DUP (element, vector) -------- */
8589 /* 31 28 20 15 9 4
8590 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
8591 */
8592 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
8593 UInt laneNo = 0;
8594 UInt laneSzLg2 = 0;
8595 HChar laneCh = '?';
8596 IRTemp res = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh,
8597 getQReg128(nn), imm5);
8598 if (res == IRTemp_INVALID)
8599 return False;
8600 if (bitQ == 0 && laneSzLg2 == X11)
8601 return False; /* .1d case */
8602 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8603 const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2);
8604 DIP("dup %s.%s, %s.%c[%u]\n",
8605 nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo);
8606 return True;
8607 }
8608
8609 /* -------- x,0,0001: DUP (general, vector) -------- */
8610 /* 31 28 20 15 9 4
8611 0q0 01110000 imm5 0 0001 1 n d DUP Vd.T, Rn
8612 Q=0 writes 64, Q=1 writes 128
8613 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
8614 xxx10 4H(q=0) or 8H(q=1), R=W
8615 xx100 2S(q=0) or 4S(q=1), R=W
8616 x1000 Invalid(q=0) or 2D(q=1), R=X
8617 x0000 Invalid(q=0) or Invalid(q=1)
8618 Require op=0, imm4=0001
8619 */
8620 if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) {
8621 Bool isQ = bitQ == 1;
8622 IRTemp w0 = newTemp(Ity_I64);
8623 const HChar* arT = "??";
8624 IRType laneTy = Ity_INVALID;
8625 if (imm5 & 1) {
8626 arT = isQ ? "16b" : "8b";
8627 laneTy = Ity_I8;
8628 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
8629 }
8630 else if (imm5 & 2) {
8631 arT = isQ ? "8h" : "4h";
8632 laneTy = Ity_I16;
8633 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
8634 }
8635 else if (imm5 & 4) {
8636 arT = isQ ? "4s" : "2s";
8637 laneTy = Ity_I32;
8638 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
8639 }
8640 else if ((imm5 & 8) && isQ) {
8641 arT = "2d";
8642 laneTy = Ity_I64;
8643 assign(w0, getIReg64orZR(nn));
8644 }
8645 else {
8646 /* invalid; leave laneTy unchanged. */
8647 }
8648 /* */
8649 if (laneTy != Ity_INVALID) {
8650 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
8651 putQReg128(dd, binop(Iop_64HLtoV128,
8652 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
8653 DIP("dup %s.%s, %s\n",
8654 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
8655 return True;
8656 }
8657 /* invalid */
8658 return False;
8659 }
8660
8661 /* -------- 1,0,0011: INS (general) -------- */
8662 /* 31 28 20 15 9 4
8663 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
8664 where Ts,ix = case imm5 of xxxx1 -> B, xxxx
8665 xxx10 -> H, xxx
8666 xx100 -> S, xx
8667 x1000 -> D, x
8668 */
8669 if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) {
8670 HChar ts = '?';
8671 UInt laneNo = 16;
8672 IRExpr* src = NULL;
8673 if (imm5 & 1) {
8674 src = unop(Iop_64to8, getIReg64orZR(nn));
8675 laneNo = (imm5 >> 1) & 15;
8676 ts = 'b';
8677 }
8678 else if (imm5 & 2) {
8679 src = unop(Iop_64to16, getIReg64orZR(nn));
8680 laneNo = (imm5 >> 2) & 7;
8681 ts = 'h';
8682 }
8683 else if (imm5 & 4) {
8684 src = unop(Iop_64to32, getIReg64orZR(nn));
8685 laneNo = (imm5 >> 3) & 3;
8686 ts = 's';
8687 }
8688 else if (imm5 & 8) {
8689 src = getIReg64orZR(nn);
8690 laneNo = (imm5 >> 4) & 1;
8691 ts = 'd';
8692 }
8693 /* */
8694 if (src) {
8695 vassert(laneNo < 16);
8696 putQRegLane(dd, laneNo, src);
8697 DIP("ins %s.%c[%u], %s\n",
8698 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
8699 return True;
8700 }
8701 /* invalid */
8702 return False;
8703 }
8704
8705 /* -------- x,0,0101: SMOV -------- */
8706 /* -------- x,0,0111: UMOV -------- */
8707 /* 31 28 20 15 9 4
8708 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
8709 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
8710 dest is Xd when q==1, Wd when q==0
8711 UMOV:
8712 Ts,index,ops = case q:imm5 of
8713 0:xxxx1 -> B, xxxx, 8Uto64
8714 1:xxxx1 -> invalid
8715 0:xxx10 -> H, xxx, 16Uto64
8716 1:xxx10 -> invalid
8717 0:xx100 -> S, xx, 32Uto64
8718 1:xx100 -> invalid
8719 1:x1000 -> D, x, copy64
8720 other -> invalid
8721 SMOV:
8722 Ts,index,ops = case q:imm5 of
8723 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
8724 1:xxxx1 -> B, xxxx, 8Sto64
8725 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
8726 1:xxx10 -> H, xxx, 16Sto64
8727 0:xx100 -> invalid
8728 1:xx100 -> S, xx, 32Sto64
8729 1:x1000 -> invalid
8730 other -> invalid
8731 */
8732 if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) || imm4 == BITS4(0,1,1,1))) {
8733 Bool isU = (imm4 & 2) == 2;
8734 const HChar* arTs = "??";
8735 UInt laneNo = 16; /* invalid */
8736 // Setting 'res' to non-NULL determines valid/invalid
8737 IRExpr* res = NULL;
8738 if (!bitQ && (imm5 & 1)) { // 0:xxxx1
8739 laneNo = (imm5 >> 1) & 15;
8740 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
8741 res = isU ? unop(Iop_8Uto64, lane)
8742 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
8743 arTs = "b";
8744 }
8745 else if (bitQ && (imm5 & 1)) { // 1:xxxx1
8746 laneNo = (imm5 >> 1) & 15;
8747 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
8748 res = isU ? NULL
8749 : unop(Iop_8Sto64, lane);
8750 arTs = "b";
8751 }
8752 else if (!bitQ && (imm5 & 2)) { // 0:xxx10
8753 laneNo = (imm5 >> 2) & 7;
8754 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
8755 res = isU ? unop(Iop_16Uto64, lane)
8756 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
8757 arTs = "h";
8758 }
8759 else if (bitQ && (imm5 & 2)) { // 1:xxx10
8760 laneNo = (imm5 >> 2) & 7;
8761 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
8762 res = isU ? NULL
8763 : unop(Iop_16Sto64, lane);
8764 arTs = "h";
8765 }
8766 else if (!bitQ && (imm5 & 4)) { // 0:xx100
8767 laneNo = (imm5 >> 3) & 3;
8768 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
8769 res = isU ? unop(Iop_32Uto64, lane)
8770 : NULL;
8771 arTs = "s";
8772 }
8773 else if (bitQ && (imm5 & 4)) { // 1:xxx10
8774 laneNo = (imm5 >> 3) & 3;
8775 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
8776 res = isU ? NULL
8777 : unop(Iop_32Sto64, lane);
8778 arTs = "s";
8779 }
8780 else if (bitQ && (imm5 & 8)) { // 1:x1000
8781 laneNo = (imm5 >> 4) & 1;
8782 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
8783 res = isU ? lane
8784 : NULL;
8785 arTs = "d";
8786 }
8787 /* */
8788 if (res) {
8789 vassert(laneNo < 16);
8790 putIReg64orZR(dd, res);
8791 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
8792 nameIRegOrZR(bitQ == 1, dd),
8793 nameQReg128(nn), arTs, laneNo);
8794 return True;
8795 }
8796 /* invalid */
8797 return False;
8798 }
8799
8800 /* -------- 1,1,xxxx: INS (element) -------- */
8801 /* 31 28 20 14 9 4
8802 011 01110000 imm5 0 imm4 n d INS Vd.Ts[ix1], Vn.Ts[ix2]
8803 where Ts,ix1,ix2
8804 = case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
8805 xxx10 -> H, xxx, imm4[3:1]
8806 xx100 -> S, xx, imm4[3:2]
8807 x1000 -> D, x, imm4[3:3]
8808 */
8809 if (bitQ == 1 && bitOP == 1) {
8810 HChar ts = '?';
8811 IRType ity = Ity_INVALID;
8812 UInt ix1 = 16;
8813 UInt ix2 = 16;
8814 if (imm5 & 1) {
8815 ts = 'b';
8816 ity = Ity_I8;
8817 ix1 = (imm5 >> 1) & 15;
8818 ix2 = (imm4 >> 0) & 15;
8819 }
8820 else if (imm5 & 2) {
8821 ts = 'h';
8822 ity = Ity_I16;
8823 ix1 = (imm5 >> 2) & 7;
8824 ix2 = (imm4 >> 1) & 7;
8825 }
8826 else if (imm5 & 4) {
8827 ts = 's';
8828 ity = Ity_I32;
8829 ix1 = (imm5 >> 3) & 3;
8830 ix2 = (imm4 >> 2) & 3;
8831 }
8832 else if (imm5 & 8) {
8833 ts = 'd';
8834 ity = Ity_I64;
8835 ix1 = (imm5 >> 4) & 1;
8836 ix2 = (imm4 >> 3) & 1;
8837 }
8838 /* */
8839 if (ity != Ity_INVALID) {
8840 vassert(ix1 < 16);
8841 vassert(ix2 < 16);
8842 putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity));
8843 DIP("ins %s.%c[%u], %s.%c[%u]\n",
8844 nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2);
8845 return True;
8846 }
8847 /* invalid */
8848 return False;
8849 }
8850
8851 return False;
8852 # undef INSN
8853 }
8854
8855
8856 static
dis_AdvSIMD_modified_immediate(DisResult * dres,UInt insn)8857 Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
8858 {
8859 /* 31 28 18 15 11 9 4
8860 0q op 01111 00000 abc cmode 01 defgh d
8861 Decode fields: q,op,cmode
8862 Bit 11 is really "o2", but it is always zero.
8863 */
8864 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8865 if (INSN(31,31) != 0
8866 || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
8867 || INSN(11,10) != BITS2(0,1)) {
8868 return False;
8869 }
8870 UInt bitQ = INSN(30,30);
8871 UInt bitOP = INSN(29,29);
8872 UInt cmode = INSN(15,12);
8873 UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5);
8874 UInt dd = INSN(4,0);
8875
8876 ULong imm64lo = 0;
8877 UInt op_cmode = (bitOP << 4) | cmode;
8878 Bool ok = False;
8879 Bool isORR = False;
8880 Bool isBIC = False;
8881 Bool isMOV = False;
8882 Bool isMVN = False;
8883 Bool isFMOV = False;
8884 switch (op_cmode) {
8885 /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
8886 /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
8887 /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
8888 /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
8889 case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
8890 case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
8891 ok = True; isMOV = True; break;
8892
8893 /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
8894 /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
8895 /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
8896 /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
8897 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
8898 case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
8899 ok = True; isORR = True; break;
8900
8901 /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
8902 /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
8903 case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
8904 ok = True; isMOV = True; break;
8905
8906 /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
8907 /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
8908 case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
8909 ok = True; isORR = True; break;
8910
8911 /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
8912 /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
8913 case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
8914 ok = True; isMOV = True; break;
8915
8916 /* -------- x,0,1110 MOVI 8-bit -------- */
8917 case BITS5(0,1,1,1,0):
8918 ok = True; isMOV = True; break;
8919
8920 /* -------- x,0,1111 FMOV (vector, immediate, F32) -------- */
8921 case BITS5(0,1,1,1,1): // 0:1111
8922 ok = True; isFMOV = True; break;
8923
8924 /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
8925 /* -------- x,1,0010 MVNI 32-bit shifted imm -------- */
8926 /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */
8927 /* -------- x,1,0110 MVNI 32-bit shifted imm -------- */
8928 case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
8929 case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
8930 ok = True; isMVN = True; break;
8931
8932 /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
8933 /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
8934 /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
8935 /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
8936 case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
8937 case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
8938 ok = True; isBIC = True; break;
8939
8940 /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
8941 /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
8942 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
8943 ok = True; isMVN = True; break;
8944
8945 /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
8946 /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
8947 case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
8948 ok = True; isBIC = True; break;
8949
8950 /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
8951 /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
8952 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
8953 ok = True; isMVN = True; break;
8954
8955 /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
8956 /* -------- 1,1,1110 MOVI 64-bit vector -------- */
8957 case BITS5(1,1,1,1,0):
8958 ok = True; isMOV = True; break;
8959
8960 /* -------- 1,1,1111 FMOV (vector, immediate, F64) -------- */
8961 case BITS5(1,1,1,1,1): // 1:1111
8962 ok = bitQ == 1; isFMOV = True; break;
8963
8964 default:
8965 break;
8966 }
8967 if (ok) {
8968 vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0)
8969 + (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0));
8970 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
8971 }
8972 if (ok) {
8973 if (isORR || isBIC) {
8974 ULong inv
8975 = isORR ? 0ULL : ~0ULL;
8976 IRExpr* immV128
8977 = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
8978 IRExpr* res
8979 = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
8980 const HChar* nm = isORR ? "orr" : "bic";
8981 if (bitQ == 0) {
8982 putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
8983 DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
8984 } else {
8985 putQReg128(dd, res);
8986 DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
8987 nameQReg128(dd), imm64lo, imm64lo);
8988 }
8989 }
8990 else if (isMOV || isMVN || isFMOV) {
8991 if (isMVN) imm64lo = ~imm64lo;
8992 ULong imm64hi = bitQ == 0 ? 0 : imm64lo;
8993 IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi),
8994 mkU64(imm64lo));
8995 putQReg128(dd, immV128);
8996 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
8997 }
8998 return True;
8999 }
9000 /* else fall through */
9001
9002 return False;
9003 # undef INSN
9004 }
9005
9006
9007 static
dis_AdvSIMD_scalar_copy(DisResult * dres,UInt insn)9008 Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn)
9009 {
9010 /* 31 28 20 15 14 10 9 4
9011 01 op 11110000 imm5 0 imm4 1 n d
9012 Decode fields: op,imm4
9013 */
9014 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9015 if (INSN(31,30) != BITS2(0,1)
9016 || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
9017 || INSN(15,15) != 0 || INSN(10,10) != 1) {
9018 return False;
9019 }
9020 UInt bitOP = INSN(29,29);
9021 UInt imm5 = INSN(20,16);
9022 UInt imm4 = INSN(14,11);
9023 UInt nn = INSN(9,5);
9024 UInt dd = INSN(4,0);
9025
9026 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
9027 /* -------- 0,0000 DUP (element, scalar) -------- */
9028 IRTemp w0 = newTemp(Ity_I64);
9029 const HChar* arTs = "??";
9030 IRType laneTy = Ity_INVALID;
9031 UInt laneNo = 16; /* invalid */
9032 if (imm5 & 1) {
9033 arTs = "b";
9034 laneNo = (imm5 >> 1) & 15;
9035 laneTy = Ity_I8;
9036 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
9037 }
9038 else if (imm5 & 2) {
9039 arTs = "h";
9040 laneNo = (imm5 >> 2) & 7;
9041 laneTy = Ity_I16;
9042 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
9043 }
9044 else if (imm5 & 4) {
9045 arTs = "s";
9046 laneNo = (imm5 >> 3) & 3;
9047 laneTy = Ity_I32;
9048 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
9049 }
9050 else if (imm5 & 8) {
9051 arTs = "d";
9052 laneNo = (imm5 >> 4) & 1;
9053 laneTy = Ity_I64;
9054 assign(w0, getQRegLane(nn, laneNo, laneTy));
9055 }
9056 else {
9057 /* invalid; leave laneTy unchanged. */
9058 }
9059 /* */
9060 if (laneTy != Ity_INVALID) {
9061 vassert(laneNo < 16);
9062 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
9063 DIP("dup %s, %s.%s[%u]\n",
9064 nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
9065 return True;
9066 }
9067 /* else fall through */
9068 }
9069
9070 return False;
9071 # undef INSN
9072 }
9073
9074
9075 static
dis_AdvSIMD_scalar_pairwise(DisResult * dres,UInt insn)9076 Bool dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult* dres, UInt insn)
9077 {
9078 /* 31 28 23 21 16 11 9 4
9079 01 u 11110 sz 11000 opcode 10 n d
9080 Decode fields: u,sz,opcode
9081 */
9082 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9083 if (INSN(31,30) != BITS2(0,1)
9084 || INSN(28,24) != BITS5(1,1,1,1,0)
9085 || INSN(21,17) != BITS5(1,1,0,0,0)
9086 || INSN(11,10) != BITS2(1,0)) {
9087 return False;
9088 }
9089 UInt bitU = INSN(29,29);
9090 UInt sz = INSN(23,22);
9091 UInt opcode = INSN(16,12);
9092 UInt nn = INSN(9,5);
9093 UInt dd = INSN(4,0);
9094
9095 if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) {
9096 /* -------- 0,11,11011 ADDP d_2d -------- */
9097 IRTemp xy = newTempV128();
9098 IRTemp xx = newTempV128();
9099 assign(xy, getQReg128(nn));
9100 assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy)));
9101 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9102 binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx))));
9103 DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn));
9104 return True;
9105 }
9106
9107 if (bitU == 1 && sz <= X01 && opcode == BITS5(0,1,1,0,1)) {
9108 /* -------- 1,00,01101 ADDP s_2s -------- */
9109 /* -------- 1,01,01101 ADDP d_2d -------- */
9110 Bool isD = sz == X01;
9111 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9112 IROp opADD = mkVecADDF(isD ? 3 : 2);
9113 IRTemp src = newTempV128();
9114 IRTemp argL = newTempV128();
9115 IRTemp argR = newTempV128();
9116 assign(src, getQReg128(nn));
9117 assign(argL, unop(opZHI, mkexpr(src)));
9118 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9119 mkU8(isD ? 8 : 4))));
9120 putQReg128(dd, unop(opZHI,
9121 triop(opADD, mkexpr(mk_get_IR_rounding_mode()),
9122 mkexpr(argL), mkexpr(argR))));
9123 DIP(isD ? "faddp d%u, v%u.2d\n" : "faddp s%u, v%u.2s\n", dd, nn);
9124 return True;
9125 }
9126
9127 if (bitU == 1
9128 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
9129 /* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */
9130 /* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */
9131 /* -------- 1,0x,01111 FMAXP d_2d, s_2s -------- */
9132 /* -------- 1,1x,01111 FMINP d_2d, s_2s -------- */
9133 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
9134 Bool isD = (sz & 1) == 1;
9135 Bool isMIN = (sz & 2) == 2;
9136 Bool isNM = opcode == BITS5(0,1,1,0,0);
9137 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9138 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
9139 IRTemp src = newTempV128();
9140 IRTemp argL = newTempV128();
9141 IRTemp argR = newTempV128();
9142 assign(src, getQReg128(nn));
9143 assign(argL, unop(opZHI, mkexpr(src)));
9144 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9145 mkU8(isD ? 8 : 4))));
9146 putQReg128(dd, unop(opZHI,
9147 binop(opMXX, mkexpr(argL), mkexpr(argR))));
9148 HChar c = isD ? 'd' : 's';
9149 DIP("%s%sp %c%u, v%u.2%c\n",
9150 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", c, dd, nn, c);
9151 return True;
9152 }
9153
9154 return False;
9155 # undef INSN
9156 }
9157
9158
9159 static
dis_AdvSIMD_scalar_shift_by_imm(DisResult * dres,UInt insn)9160 Bool dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult* dres, UInt insn)
9161 {
9162 /* 31 28 22 18 15 10 9 4
9163 01 u 111110 immh immb opcode 1 n d
9164 Decode fields: u,immh,opcode
9165 */
9166 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9167 if (INSN(31,30) != BITS2(0,1)
9168 || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) {
9169 return False;
9170 }
9171 UInt bitU = INSN(29,29);
9172 UInt immh = INSN(22,19);
9173 UInt immb = INSN(18,16);
9174 UInt opcode = INSN(15,11);
9175 UInt nn = INSN(9,5);
9176 UInt dd = INSN(4,0);
9177 UInt immhb = (immh << 3) | immb;
9178
9179 if ((immh & 8) == 8
9180 && (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0))) {
9181 /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
9182 /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
9183 /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
9184 /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
9185 Bool isU = bitU == 1;
9186 Bool isAcc = opcode == BITS5(0,0,0,1,0);
9187 UInt sh = 128 - immhb;
9188 vassert(sh >= 1 && sh <= 64);
9189 IROp op = isU ? Iop_ShrN64x2 : Iop_SarN64x2;
9190 IRExpr* src = getQReg128(nn);
9191 IRTemp shf = newTempV128();
9192 IRTemp res = newTempV128();
9193 if (sh == 64 && isU) {
9194 assign(shf, mkV128(0x0000));
9195 } else {
9196 UInt nudge = 0;
9197 if (sh == 64) {
9198 vassert(!isU);
9199 nudge = 1;
9200 }
9201 assign(shf, binop(op, src, mkU8(sh - nudge)));
9202 }
9203 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9204 : mkexpr(shf));
9205 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9206 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
9207 : (isU ? "ushr" : "sshr");
9208 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9209 return True;
9210 }
9211
9212 if ((immh & 8) == 8
9213 && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0))) {
9214 /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
9215 /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
9216 /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
9217 /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
9218 Bool isU = bitU == 1;
9219 Bool isAcc = opcode == BITS5(0,0,1,1,0);
9220 UInt sh = 128 - immhb;
9221 vassert(sh >= 1 && sh <= 64);
9222 IROp op = isU ? Iop_Rsh64Ux2 : Iop_Rsh64Sx2;
9223 vassert(sh >= 1 && sh <= 64);
9224 IRExpr* src = getQReg128(nn);
9225 IRTemp imm8 = newTemp(Ity_I8);
9226 assign(imm8, mkU8((UChar)(-sh)));
9227 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
9228 IRTemp shf = newTempV128();
9229 IRTemp res = newTempV128();
9230 assign(shf, binop(op, src, amt));
9231 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9232 : mkexpr(shf));
9233 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9234 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
9235 : (isU ? "urshr" : "srshr");
9236 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9237 return True;
9238 }
9239
9240 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) {
9241 /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
9242 UInt sh = 128 - immhb;
9243 vassert(sh >= 1 && sh <= 64);
9244 if (sh == 64) {
9245 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
9246 } else {
9247 /* sh is in range 1 .. 63 */
9248 ULong nmask = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1));
9249 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9250 IRTemp res = newTempV128();
9251 assign(res, binop(Iop_OrV128,
9252 binop(Iop_AndV128, getQReg128(dd), nmaskV),
9253 binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
9254 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9255 }
9256 DIP("sri d%u, d%u, #%u\n", dd, nn, sh);
9257 return True;
9258 }
9259
9260 if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9261 /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
9262 UInt sh = immhb - 64;
9263 vassert(sh >= 0 && sh < 64);
9264 putQReg128(dd,
9265 unop(Iop_ZeroHI64ofV128,
9266 sh == 0 ? getQReg128(nn)
9267 : binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9268 DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
9269 return True;
9270 }
9271
9272 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9273 /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
9274 UInt sh = immhb - 64;
9275 vassert(sh >= 0 && sh < 64);
9276 if (sh == 0) {
9277 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn)));
9278 } else {
9279 /* sh is in range 1 .. 63 */
9280 ULong nmask = (1ULL << sh) - 1;
9281 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9282 IRTemp res = newTempV128();
9283 assign(res, binop(Iop_OrV128,
9284 binop(Iop_AndV128, getQReg128(dd), nmaskV),
9285 binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9286 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9287 }
9288 DIP("sli d%u, d%u, #%u\n", dd, nn, sh);
9289 return True;
9290 }
9291
9292 if (opcode == BITS5(0,1,1,1,0)
9293 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
9294 /* -------- 0,01110 SQSHL #imm -------- */
9295 /* -------- 1,01110 UQSHL #imm -------- */
9296 /* -------- 1,01100 SQSHLU #imm -------- */
9297 UInt size = 0;
9298 UInt shift = 0;
9299 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9300 if (!ok) return False;
9301 vassert(size >= 0 && size <= 3);
9302 /* The shift encoding has opposite sign for the leftwards case.
9303 Adjust shift to compensate. */
9304 UInt lanebits = 8 << size;
9305 shift = lanebits - shift;
9306 vassert(shift >= 0 && shift < lanebits);
9307 const HChar* nm = NULL;
9308 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
9309 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
9310 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
9311 else vassert(0);
9312 IRTemp qDiff1 = IRTemp_INVALID;
9313 IRTemp qDiff2 = IRTemp_INVALID;
9314 IRTemp res = IRTemp_INVALID;
9315 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn));
9316 /* This relies on the fact that the zeroed out lanes generate zeroed
9317 result lanes and don't saturate, so there's no point in trimming
9318 the resulting res, qDiff1 or qDiff2 values. */
9319 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
9320 putQReg128(dd, mkexpr(res));
9321 updateQCFLAGwithDifference(qDiff1, qDiff2);
9322 const HChar arr = "bhsd"[size];
9323 DIP("%s %c%u, %c%u, #%u\n", nm, arr, dd, arr, nn, shift);
9324 return True;
9325 }
9326
9327 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
9328 || (bitU == 1
9329 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
9330 /* -------- 0,10010 SQSHRN #imm -------- */
9331 /* -------- 1,10010 UQSHRN #imm -------- */
9332 /* -------- 0,10011 SQRSHRN #imm -------- */
9333 /* -------- 1,10011 UQRSHRN #imm -------- */
9334 /* -------- 1,10000 SQSHRUN #imm -------- */
9335 /* -------- 1,10001 SQRSHRUN #imm -------- */
9336 UInt size = 0;
9337 UInt shift = 0;
9338 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9339 if (!ok || size == X11) return False;
9340 vassert(size >= X00 && size <= X10);
9341 vassert(shift >= 1 && shift <= (8 << size));
9342 const HChar* nm = "??";
9343 IROp op = Iop_INVALID;
9344 /* Decide on the name and the operation. */
9345 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
9346 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
9347 }
9348 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
9349 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
9350 }
9351 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
9352 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
9353 }
9354 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
9355 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
9356 }
9357 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
9358 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
9359 }
9360 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
9361 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
9362 }
9363 else vassert(0);
9364 /* Compute the result (Q, shifted value) pair. */
9365 IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn));
9366 IRTemp pair = newTempV128();
9367 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
9368 /* Update the result reg */
9369 IRTemp res64in128 = newTempV128();
9370 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
9371 putQReg128(dd, mkexpr(res64in128));
9372 /* Update the Q flag. */
9373 IRTemp q64q64 = newTempV128();
9374 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
9375 IRTemp z128 = newTempV128();
9376 assign(z128, mkV128(0x0000));
9377 updateQCFLAGwithDifference(q64q64, z128);
9378 /* */
9379 const HChar arrNarrow = "bhsd"[size];
9380 const HChar arrWide = "bhsd"[size+1];
9381 DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift);
9382 return True;
9383 }
9384
9385 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,0,0)) {
9386 /* -------- 0,!=00xx,11100 SCVTF d_d_imm, s_s_imm -------- */
9387 /* -------- 1,!=00xx,11100 UCVTF d_d_imm, s_s_imm -------- */
9388 UInt size = 0;
9389 UInt fbits = 0;
9390 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
9391 /* The following holds because immh is never zero. */
9392 vassert(ok);
9393 /* The following holds because immh >= 0100. */
9394 vassert(size == X10 || size == X11);
9395 Bool isD = size == X11;
9396 Bool isU = bitU == 1;
9397 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
9398 Double scale = two_to_the_minus(fbits);
9399 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
9400 : IRExpr_Const(IRConst_F32( (Float)scale ));
9401 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
9402 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
9403 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
9404 IRType tyF = isD ? Ity_F64 : Ity_F32;
9405 IRType tyI = isD ? Ity_I64 : Ity_I32;
9406 IRTemp src = newTemp(tyI);
9407 IRTemp res = newTemp(tyF);
9408 IRTemp rm = mk_get_IR_rounding_mode();
9409 assign(src, getQRegLane(nn, 0, tyI));
9410 assign(res, triop(opMUL, mkexpr(rm),
9411 binop(opCVT, mkexpr(rm), mkexpr(src)), scaleE));
9412 putQRegLane(dd, 0, mkexpr(res));
9413 if (!isD) {
9414 putQRegLane(dd, 1, mkU32(0));
9415 }
9416 putQRegLane(dd, 1, mkU64(0));
9417 const HChar ch = isD ? 'd' : 's';
9418 DIP("%s %c%u, %c%u, #%u\n", isU ? "ucvtf" : "scvtf",
9419 ch, dd, ch, nn, fbits);
9420 return True;
9421 }
9422
9423 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,1,1)) {
9424 /* -------- 0,!=00xx,11111 FCVTZS d_d_imm, s_s_imm -------- */
9425 /* -------- 1,!=00xx,11111 FCVTZU d_d_imm, s_s_imm -------- */
9426 UInt size = 0;
9427 UInt fbits = 0;
9428 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
9429 /* The following holds because immh is never zero. */
9430 vassert(ok);
9431 /* The following holds because immh >= 0100. */
9432 vassert(size == X10 || size == X11);
9433 Bool isD = size == X11;
9434 Bool isU = bitU == 1;
9435 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
9436 Double scale = two_to_the_plus(fbits);
9437 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
9438 : IRExpr_Const(IRConst_F32( (Float)scale ));
9439 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
9440 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
9441 : (isD ? Iop_F64toI64S : Iop_F32toI32S);
9442 IRType tyF = isD ? Ity_F64 : Ity_F32;
9443 IRType tyI = isD ? Ity_I64 : Ity_I32;
9444 IRTemp src = newTemp(tyF);
9445 IRTemp res = newTemp(tyI);
9446 IRTemp rm = newTemp(Ity_I32);
9447 assign(src, getQRegLane(nn, 0, tyF));
9448 assign(rm, mkU32(Irrm_ZERO));
9449 assign(res, binop(opCVT, mkexpr(rm),
9450 triop(opMUL, mkexpr(rm), mkexpr(src), scaleE)));
9451 putQRegLane(dd, 0, mkexpr(res));
9452 if (!isD) {
9453 putQRegLane(dd, 1, mkU32(0));
9454 }
9455 putQRegLane(dd, 1, mkU64(0));
9456 const HChar ch = isD ? 'd' : 's';
9457 DIP("%s %c%u, %c%u, #%u\n", isU ? "fcvtzu" : "fcvtzs",
9458 ch, dd, ch, nn, fbits);
9459 return True;
9460 }
9461
9462 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9463 return False;
9464 # undef INSN
9465 }
9466
9467
9468 static
dis_AdvSIMD_scalar_three_different(DisResult * dres,UInt insn)9469 Bool dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
9470 {
9471 /* 31 29 28 23 21 20 15 11 9 4
9472 01 U 11110 size 1 m opcode 00 n d
9473 Decode fields: u,opcode
9474 */
9475 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9476 if (INSN(31,30) != BITS2(0,1)
9477 || INSN(28,24) != BITS5(1,1,1,1,0)
9478 || INSN(21,21) != 1
9479 || INSN(11,10) != BITS2(0,0)) {
9480 return False;
9481 }
9482 UInt bitU = INSN(29,29);
9483 UInt size = INSN(23,22);
9484 UInt mm = INSN(20,16);
9485 UInt opcode = INSN(15,12);
9486 UInt nn = INSN(9,5);
9487 UInt dd = INSN(4,0);
9488 vassert(size < 4);
9489
9490 if (bitU == 0
9491 && (opcode == BITS4(1,1,0,1)
9492 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
9493 /* -------- 0,1101 SQDMULL -------- */ // 0 (ks)
9494 /* -------- 0,1001 SQDMLAL -------- */ // 1
9495 /* -------- 0,1011 SQDMLSL -------- */ // 2
9496 /* Widens, and size refers to the narrowed lanes. */
9497 UInt ks = 3;
9498 switch (opcode) {
9499 case BITS4(1,1,0,1): ks = 0; break;
9500 case BITS4(1,0,0,1): ks = 1; break;
9501 case BITS4(1,0,1,1): ks = 2; break;
9502 default: vassert(0);
9503 }
9504 vassert(ks >= 0 && ks <= 2);
9505 if (size == X00 || size == X11) return False;
9506 vassert(size <= 2);
9507 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
9508 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
9509 newTempsV128_3(&vecN, &vecM, &vecD);
9510 assign(vecN, getQReg128(nn));
9511 assign(vecM, getQReg128(mm));
9512 assign(vecD, getQReg128(dd));
9513 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
9514 False/*!is2*/, size, "mas"[ks],
9515 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
9516 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
9517 putQReg128(dd, unop(opZHI, mkexpr(res)));
9518 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
9519 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
9520 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
9521 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
9522 }
9523 const HChar* nm = ks == 0 ? "sqdmull"
9524 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
9525 const HChar arrNarrow = "bhsd"[size];
9526 const HChar arrWide = "bhsd"[size+1];
9527 DIP("%s %c%u, %c%u, %c%u\n",
9528 nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm);
9529 return True;
9530 }
9531
9532 return False;
9533 # undef INSN
9534 }
9535
9536
9537 static
dis_AdvSIMD_scalar_three_same(DisResult * dres,UInt insn)9538 Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
9539 {
9540 /* 31 29 28 23 21 20 15 10 9 4
9541 01 U 11110 size 1 m opcode 1 n d
9542 Decode fields: u,size,opcode
9543 */
9544 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9545 if (INSN(31,30) != BITS2(0,1)
9546 || INSN(28,24) != BITS5(1,1,1,1,0)
9547 || INSN(21,21) != 1
9548 || INSN(10,10) != 1) {
9549 return False;
9550 }
9551 UInt bitU = INSN(29,29);
9552 UInt size = INSN(23,22);
9553 UInt mm = INSN(20,16);
9554 UInt opcode = INSN(15,11);
9555 UInt nn = INSN(9,5);
9556 UInt dd = INSN(4,0);
9557 vassert(size < 4);
9558
9559 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
9560 /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
9561 /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
9562 /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
9563 /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
9564 Bool isADD = opcode == BITS5(0,0,0,0,1);
9565 Bool isU = bitU == 1;
9566 IROp qop = Iop_INVALID;
9567 IROp nop = Iop_INVALID;
9568 if (isADD) {
9569 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
9570 nop = mkVecADD(size);
9571 } else {
9572 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
9573 nop = mkVecSUB(size);
9574 }
9575 IRTemp argL = newTempV128();
9576 IRTemp argR = newTempV128();
9577 IRTemp qres = newTempV128();
9578 IRTemp nres = newTempV128();
9579 assign(argL, getQReg128(nn));
9580 assign(argR, getQReg128(mm));
9581 assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9582 size, binop(qop, mkexpr(argL), mkexpr(argR)))));
9583 assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9584 size, binop(nop, mkexpr(argL), mkexpr(argR)))));
9585 putQReg128(dd, mkexpr(qres));
9586 updateQCFLAGwithDifference(qres, nres);
9587 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
9588 : (isU ? "uqsub" : "sqsub");
9589 const HChar arr = "bhsd"[size];
9590 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
9591 return True;
9592 }
9593
9594 if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
9595 /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
9596 /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
9597 Bool isGT = bitU == 0;
9598 IRExpr* argL = getQReg128(nn);
9599 IRExpr* argR = getQReg128(mm);
9600 IRTemp res = newTempV128();
9601 assign(res,
9602 isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
9603 : binop(Iop_CmpGT64Ux2, argL, argR));
9604 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9605 DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
9606 nameQRegLO(dd, Ity_I64),
9607 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9608 return True;
9609 }
9610
9611 if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
9612 /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
9613 /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
9614 Bool isGE = bitU == 0;
9615 IRExpr* argL = getQReg128(nn);
9616 IRExpr* argR = getQReg128(mm);
9617 IRTemp res = newTempV128();
9618 assign(res,
9619 isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
9620 : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
9621 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9622 DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
9623 nameQRegLO(dd, Ity_I64),
9624 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9625 return True;
9626 }
9627
9628 if (size == X11 && (opcode == BITS5(0,1,0,0,0)
9629 || opcode == BITS5(0,1,0,1,0))) {
9630 /* -------- 0,xx,01000 SSHL d_d_d -------- */
9631 /* -------- 0,xx,01010 SRSHL d_d_d -------- */
9632 /* -------- 1,xx,01000 USHL d_d_d -------- */
9633 /* -------- 1,xx,01010 URSHL d_d_d -------- */
9634 Bool isU = bitU == 1;
9635 Bool isR = opcode == BITS5(0,1,0,1,0);
9636 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
9637 : (isU ? mkVecSHU(size) : mkVecSHS(size));
9638 IRTemp res = newTempV128();
9639 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
9640 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9641 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
9642 : (isU ? "ushl" : "sshl");
9643 DIP("%s %s, %s, %s\n", nm,
9644 nameQRegLO(dd, Ity_I64),
9645 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9646 return True;
9647 }
9648
9649 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
9650 /* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */
9651 /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
9652 /* -------- 1,xx,01001 UQSHL std4_std4_std4 -------- */
9653 /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
9654 Bool isU = bitU == 1;
9655 Bool isR = opcode == BITS5(0,1,0,1,1);
9656 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
9657 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
9658 /* This is a bit tricky. Since we're only interested in the lowest
9659 lane of the result, we zero out all the rest in the operands, so
9660 as to ensure that other lanes don't pollute the returned Q value.
9661 This works because it means, for the lanes we don't care about, we
9662 are shifting zero by zero, which can never saturate. */
9663 IRTemp res256 = newTemp(Ity_V256);
9664 IRTemp resSH = newTempV128();
9665 IRTemp resQ = newTempV128();
9666 IRTemp zero = newTempV128();
9667 assign(
9668 res256,
9669 binop(op,
9670 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))),
9671 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm)))));
9672 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
9673 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
9674 assign(zero, mkV128(0x0000));
9675 putQReg128(dd, mkexpr(resSH));
9676 updateQCFLAGwithDifference(resQ, zero);
9677 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
9678 : (isU ? "uqshl" : "sqshl");
9679 const HChar arr = "bhsd"[size];
9680 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
9681 return True;
9682 }
9683
9684 if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
9685 /* -------- 0,11,10000 ADD d_d_d -------- */
9686 /* -------- 1,11,10000 SUB d_d_d -------- */
9687 Bool isSUB = bitU == 1;
9688 IRTemp res = newTemp(Ity_I64);
9689 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
9690 getQRegLane(nn, 0, Ity_I64),
9691 getQRegLane(mm, 0, Ity_I64)));
9692 putQRegLane(dd, 0, mkexpr(res));
9693 putQRegLane(dd, 1, mkU64(0));
9694 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
9695 nameQRegLO(dd, Ity_I64),
9696 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9697 return True;
9698 }
9699
9700 if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
9701 /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
9702 /* -------- 1,11,10001 CMEQ d_d_d -------- */ // ==
9703 Bool isEQ = bitU == 1;
9704 IRExpr* argL = getQReg128(nn);
9705 IRExpr* argR = getQReg128(mm);
9706 IRTemp res = newTempV128();
9707 assign(res,
9708 isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
9709 : unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
9710 binop(Iop_AndV128, argL, argR),
9711 mkV128(0x0000))));
9712 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9713 DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
9714 nameQRegLO(dd, Ity_I64),
9715 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9716 return True;
9717 }
9718
9719 if (opcode == BITS5(1,0,1,1,0)) {
9720 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
9721 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
9722 if (size == X00 || size == X11) return False;
9723 Bool isR = bitU == 1;
9724 IRTemp res, sat1q, sat1n, vN, vM;
9725 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
9726 newTempsV128_2(&vN, &vM);
9727 assign(vN, getQReg128(nn));
9728 assign(vM, getQReg128(mm));
9729 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
9730 putQReg128(dd,
9731 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
9732 updateQCFLAGwithDifference(
9733 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)),
9734 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n)));
9735 const HChar arr = "bhsd"[size];
9736 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
9737 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
9738 return True;
9739 }
9740
9741 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
9742 /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
9743 IRType ity = size == X11 ? Ity_F64 : Ity_F32;
9744 IRTemp res = newTemp(ity);
9745 assign(res, unop(mkABSF(ity),
9746 triop(mkSUBF(ity),
9747 mkexpr(mk_get_IR_rounding_mode()),
9748 getQRegLO(nn,ity), getQRegLO(mm,ity))));
9749 putQReg128(dd, mkV128(0x0000));
9750 putQRegLO(dd, mkexpr(res));
9751 DIP("fabd %s, %s, %s\n",
9752 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9753 return True;
9754 }
9755
9756 if (bitU == 0 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
9757 /* -------- 0,0x,11011 FMULX d_d_d, s_s_s -------- */
9758 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
9759 IRType ity = size == X01 ? Ity_F64 : Ity_F32;
9760 IRTemp res = newTemp(ity);
9761 assign(res, triop(mkMULF(ity),
9762 mkexpr(mk_get_IR_rounding_mode()),
9763 getQRegLO(nn,ity), getQRegLO(mm,ity)));
9764 putQReg128(dd, mkV128(0x0000));
9765 putQRegLO(dd, mkexpr(res));
9766 DIP("fmulx %s, %s, %s\n",
9767 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9768 return True;
9769 }
9770
9771 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
9772 /* -------- 0,0x,11100 FCMEQ d_d_d, s_s_s -------- */
9773 /* -------- 1,0x,11100 FCMGE d_d_d, s_s_s -------- */
9774 Bool isD = size == X01;
9775 IRType ity = isD ? Ity_F64 : Ity_F32;
9776 Bool isGE = bitU == 1;
9777 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
9778 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
9779 IRTemp res = newTempV128();
9780 assign(res, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
9781 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
9782 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9783 mkexpr(res))));
9784 DIP("%s %s, %s, %s\n", isGE ? "fcmge" : "fcmeq",
9785 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9786 return True;
9787 }
9788
9789 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
9790 /* -------- 1,1x,11100 FCMGT d_d_d, s_s_s -------- */
9791 Bool isD = size == X11;
9792 IRType ity = isD ? Ity_F64 : Ity_F32;
9793 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
9794 IRTemp res = newTempV128();
9795 assign(res, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
9796 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9797 mkexpr(res))));
9798 DIP("%s %s, %s, %s\n", "fcmgt",
9799 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9800 return True;
9801 }
9802
9803 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
9804 /* -------- 1,0x,11101 FACGE d_d_d, s_s_s -------- */
9805 /* -------- 1,1x,11101 FACGT d_d_d, s_s_s -------- */
9806 Bool isD = (size & 1) == 1;
9807 IRType ity = isD ? Ity_F64 : Ity_F32;
9808 Bool isGT = (size & 2) == 2;
9809 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
9810 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
9811 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
9812 IRTemp res = newTempV128();
9813 assign(res, binop(opCMP, unop(opABS, getQReg128(mm)),
9814 unop(opABS, getQReg128(nn)))); // swapd
9815 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9816 mkexpr(res))));
9817 DIP("%s %s, %s, %s\n", isGT ? "facgt" : "facge",
9818 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9819 return True;
9820 }
9821
9822 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
9823 /* -------- 0,0x,11111: FRECPS d_d_d, s_s_s -------- */
9824 /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */
9825 Bool isSQRT = (size & 2) == 2;
9826 Bool isD = (size & 1) == 1;
9827 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
9828 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
9829 IRTemp res = newTempV128();
9830 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
9831 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9832 mkexpr(res))));
9833 HChar c = isD ? 'd' : 's';
9834 DIP("%s %c%u, %c%u, %c%u\n", isSQRT ? "frsqrts" : "frecps",
9835 c, dd, c, nn, c, mm);
9836 return True;
9837 }
9838
9839 return False;
9840 # undef INSN
9841 }
9842
9843
9844 static
dis_AdvSIMD_scalar_two_reg_misc(DisResult * dres,UInt insn)9845 Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
9846 {
9847 /* 31 29 28 23 21 16 11 9 4
9848 01 U 11110 size 10000 opcode 10 n d
9849 Decode fields: u,size,opcode
9850 */
9851 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9852 if (INSN(31,30) != BITS2(0,1)
9853 || INSN(28,24) != BITS5(1,1,1,1,0)
9854 || INSN(21,17) != BITS5(1,0,0,0,0)
9855 || INSN(11,10) != BITS2(1,0)) {
9856 return False;
9857 }
9858 UInt bitU = INSN(29,29);
9859 UInt size = INSN(23,22);
9860 UInt opcode = INSN(16,12);
9861 UInt nn = INSN(9,5);
9862 UInt dd = INSN(4,0);
9863 vassert(size < 4);
9864
9865 if (opcode == BITS5(0,0,0,1,1)) {
9866 /* -------- 0,xx,00011: SUQADD std4_std4 -------- */
9867 /* -------- 1,xx,00011: USQADD std4_std4 -------- */
9868 /* These are a bit tricky (to say the least). See comments on
9869 the vector variants (in dis_AdvSIMD_two_reg_misc) below for
9870 details. */
9871 Bool isUSQADD = bitU == 1;
9872 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
9873 : mkVecQADDEXTUSSATSS(size);
9874 IROp nop = mkVecADD(size);
9875 IRTemp argL = newTempV128();
9876 IRTemp argR = newTempV128();
9877 assign(argL, getQReg128(nn));
9878 assign(argR, getQReg128(dd));
9879 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9880 size, binop(qop, mkexpr(argL), mkexpr(argR)));
9881 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9882 size, binop(nop, mkexpr(argL), mkexpr(argR)));
9883 putQReg128(dd, mkexpr(qres));
9884 updateQCFLAGwithDifference(qres, nres);
9885 const HChar arr = "bhsd"[size];
9886 DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn);
9887 return True;
9888 }
9889
9890 if (opcode == BITS5(0,0,1,1,1)) {
9891 /* -------- 0,xx,00111 SQABS std4_std4 -------- */
9892 /* -------- 1,xx,00111 SQNEG std4_std4 -------- */
9893 Bool isNEG = bitU == 1;
9894 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
9895 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
9896 getQReg128(nn), size );
9897 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW));
9898 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW));
9899 putQReg128(dd, mkexpr(qres));
9900 updateQCFLAGwithDifference(qres, nres);
9901 const HChar arr = "bhsd"[size];
9902 DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn);
9903 return True;
9904 }
9905
9906 if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
9907 /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
9908 /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
9909 Bool isGT = bitU == 0;
9910 IRExpr* argL = getQReg128(nn);
9911 IRExpr* argR = mkV128(0x0000);
9912 IRTemp res = newTempV128();
9913 assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
9914 : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
9915 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9916 DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
9917 return True;
9918 }
9919
9920 if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
9921 /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
9922 /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
9923 Bool isEQ = bitU == 0;
9924 IRExpr* argL = getQReg128(nn);
9925 IRExpr* argR = mkV128(0x0000);
9926 IRTemp res = newTempV128();
9927 assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
9928 : unop(Iop_NotV128,
9929 binop(Iop_CmpGT64Sx2, argL, argR)));
9930 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9931 DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
9932 return True;
9933 }
9934
9935 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
9936 /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
9937 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9938 binop(Iop_CmpGT64Sx2, mkV128(0x0000),
9939 getQReg128(nn))));
9940 DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
9941 return True;
9942 }
9943
9944 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
9945 /* -------- 0,11,01011 ABS d_d -------- */
9946 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9947 unop(Iop_Abs64x2, getQReg128(nn))));
9948 DIP("abs d%u, d%u\n", dd, nn);
9949 return True;
9950 }
9951
9952 if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
9953 /* -------- 1,11,01011 NEG d_d -------- */
9954 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9955 binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn))));
9956 DIP("neg d%u, d%u\n", dd, nn);
9957 return True;
9958 }
9959
9960 UInt ix = 0; /*INVALID*/
9961 if (size >= X10) {
9962 switch (opcode) {
9963 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
9964 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
9965 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
9966 default: break;
9967 }
9968 }
9969 if (ix > 0) {
9970 /* -------- 0,1x,01100 FCMGT d_d_#0.0, s_s_#0.0 (ix 1) -------- */
9971 /* -------- 0,1x,01101 FCMEQ d_d_#0.0, s_s_#0.0 (ix 2) -------- */
9972 /* -------- 0,1x,01110 FCMLT d_d_#0.0, s_s_#0.0 (ix 3) -------- */
9973 /* -------- 1,1x,01100 FCMGE d_d_#0.0, s_s_#0.0 (ix 4) -------- */
9974 /* -------- 1,1x,01101 FCMLE d_d_#0.0, s_s_#0.0 (ix 5) -------- */
9975 Bool isD = size == X11;
9976 IRType ity = isD ? Ity_F64 : Ity_F32;
9977 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
9978 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
9979 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
9980 IROp opCmp = Iop_INVALID;
9981 Bool swap = False;
9982 const HChar* nm = "??";
9983 switch (ix) {
9984 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
9985 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
9986 case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
9987 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
9988 case 5: nm = "fcmle"; opCmp = opCmpLE; break;
9989 default: vassert(0);
9990 }
9991 IRExpr* zero = mkV128(0x0000);
9992 IRTemp res = newTempV128();
9993 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
9994 : binop(opCmp, getQReg128(nn), zero));
9995 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9996 mkexpr(res))));
9997
9998 DIP("%s %s, %s, #0.0\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
9999 return True;
10000 }
10001
10002 if (opcode == BITS5(1,0,1,0,0)
10003 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
10004 /* -------- 0,xx,10100: SQXTN -------- */
10005 /* -------- 1,xx,10100: UQXTN -------- */
10006 /* -------- 1,xx,10010: SQXTUN -------- */
10007 if (size == X11) return False;
10008 vassert(size < 3);
10009 IROp opN = Iop_INVALID;
10010 Bool zWiden = True;
10011 const HChar* nm = "??";
10012 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
10013 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
10014 }
10015 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
10016 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
10017 }
10018 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
10019 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
10020 }
10021 else vassert(0);
10022 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10023 size+1, getQReg128(nn));
10024 IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10025 size, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
10026 putQReg128(dd, mkexpr(resN));
10027 /* This widens zero lanes to zero, and compares it against zero, so all
10028 of the non-participating lanes make no contribution to the
10029 Q flag state. */
10030 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
10031 size, mkexpr(resN));
10032 updateQCFLAGwithDifference(src, resW);
10033 const HChar arrNarrow = "bhsd"[size];
10034 const HChar arrWide = "bhsd"[size+1];
10035 DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn);
10036 return True;
10037 }
10038
10039 if (opcode == BITS5(1,0,1,1,0) && bitU == 1 && size == X01) {
10040 /* -------- 1,01,10110 FCVTXN s_d -------- */
10041 /* Using Irrm_NEAREST here isn't right. The docs say "round to
10042 odd" but I don't know what that really means. */
10043 putQRegLO(dd,
10044 binop(Iop_F64toF32, mkU32(Irrm_NEAREST),
10045 getQRegLO(nn, Ity_F64)));
10046 putQRegLane(dd, 1, mkU32(0));
10047 putQRegLane(dd, 1, mkU64(0));
10048 DIP("fcvtxn s%u, d%u\n", dd, nn);
10049 return True;
10050 }
10051
10052 ix = 0; /*INVALID*/
10053 switch (opcode) {
10054 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
10055 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
10056 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
10057 default: break;
10058 }
10059 if (ix > 0) {
10060 /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10061 /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10062 /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10063 /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10064 /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10065 /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10066 /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10067 /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10068 /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10069 /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10070 Bool isD = (size & 1) == 1;
10071 IRType tyF = isD ? Ity_F64 : Ity_F32;
10072 IRType tyI = isD ? Ity_I64 : Ity_I32;
10073 IRRoundingMode irrm = 8; /*impossible*/
10074 HChar ch = '?';
10075 switch (ix) {
10076 case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
10077 case 2: ch = 'm'; irrm = Irrm_NegINF; break;
10078 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
10079 case 4: ch = 'p'; irrm = Irrm_PosINF; break;
10080 case 5: ch = 'z'; irrm = Irrm_ZERO; break;
10081 default: vassert(0);
10082 }
10083 IROp cvt = Iop_INVALID;
10084 if (bitU == 1) {
10085 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
10086 } else {
10087 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
10088 }
10089 IRTemp src = newTemp(tyF);
10090 IRTemp res = newTemp(tyI);
10091 assign(src, getQRegLane(nn, 0, tyF));
10092 assign(res, binop(cvt, mkU32(irrm), mkexpr(src)));
10093 putQRegLane(dd, 0, mkexpr(res)); /* bits 31-0 or 63-0 */
10094 if (!isD) {
10095 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10096 }
10097 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
10098 HChar sOrD = isD ? 'd' : 's';
10099 DIP("fcvt%c%c %c%u, %c%u\n", ch, bitU == 1 ? 'u' : 's',
10100 sOrD, dd, sOrD, nn);
10101 return True;
10102 }
10103
10104 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
10105 /* -------- 0,0x,11101: SCVTF d_d, s_s -------- */
10106 /* -------- 1,0x,11101: UCVTF d_d, s_s -------- */
10107 Bool isU = bitU == 1;
10108 Bool isD = (size & 1) == 1;
10109 IRType tyI = isD ? Ity_I64 : Ity_I32;
10110 IROp iop = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
10111 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
10112 IRTemp rm = mk_get_IR_rounding_mode();
10113 putQRegLO(dd, binop(iop, mkexpr(rm), getQRegLO(nn, tyI)));
10114 if (!isD) {
10115 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10116 }
10117 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
10118 HChar c = isD ? 'd' : 's';
10119 DIP("%ccvtf %c%u, %c%u\n", isU ? 'u' : 's', c, dd, c, nn);
10120 return True;
10121 }
10122
10123 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
10124 /* -------- 0,1x,11101: FRECPE d_d, s_s -------- */
10125 /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
10126 Bool isSQRT = bitU == 1;
10127 Bool isD = (size & 1) == 1;
10128 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
10129 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
10130 IRTemp resV = newTempV128();
10131 assign(resV, unop(op, getQReg128(nn)));
10132 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10133 mkexpr(resV))));
10134 HChar c = isD ? 'd' : 's';
10135 DIP("%s %c%u, %c%u\n", isSQRT ? "frsqrte" : "frecpe", c, dd, c, nn);
10136 return True;
10137 }
10138
10139 if (bitU == 0 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
10140 /* -------- 0,1x,11111: FRECPX d_d, s_s -------- */
10141 Bool isD = (size & 1) == 1;
10142 IRType ty = isD ? Ity_F64 : Ity_F32;
10143 IROp op = isD ? Iop_RecpExpF64 : Iop_RecpExpF32;
10144 IRTemp res = newTemp(ty);
10145 IRTemp rm = mk_get_IR_rounding_mode();
10146 assign(res, binop(op, mkexpr(rm), getQRegLane(nn, 0, ty)));
10147 putQReg128(dd, mkV128(0x0000));
10148 putQRegLane(dd, 0, mkexpr(res));
10149 HChar c = isD ? 'd' : 's';
10150 DIP("%s %c%u, %c%u\n", "frecpx", c, dd, c, nn);
10151 return True;
10152 }
10153
10154 return False;
10155 # undef INSN
10156 }
10157
10158
10159 static
dis_AdvSIMD_scalar_x_indexed_element(DisResult * dres,UInt insn)10160 Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn)
10161 {
10162 /* 31 28 23 21 20 19 15 11 9 4
10163 01 U 11111 size L M m opcode H 0 n d
10164 Decode fields are: u,size,opcode
10165 M is really part of the mm register number. Individual
10166 cases need to inspect L and H though.
10167 */
10168 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10169 if (INSN(31,30) != BITS2(0,1)
10170 || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) !=0) {
10171 return False;
10172 }
10173 UInt bitU = INSN(29,29);
10174 UInt size = INSN(23,22);
10175 UInt bitL = INSN(21,21);
10176 UInt bitM = INSN(20,20);
10177 UInt mmLO4 = INSN(19,16);
10178 UInt opcode = INSN(15,12);
10179 UInt bitH = INSN(11,11);
10180 UInt nn = INSN(9,5);
10181 UInt dd = INSN(4,0);
10182 vassert(size < 4);
10183 vassert(bitH < 2 && bitM < 2 && bitL < 2);
10184
10185 if (bitU == 0 && size >= X10
10186 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
10187 /* -------- 0,1x,0001 FMLA d_d_d[], s_s_s[] -------- */
10188 /* -------- 0,1x,0101 FMLS d_d_d[], s_s_s[] -------- */
10189 Bool isD = (size & 1) == 1;
10190 Bool isSUB = opcode == BITS4(0,1,0,1);
10191 UInt index;
10192 if (!isD) index = (bitH << 1) | bitL;
10193 else if (isD && bitL == 0) index = bitH;
10194 else return False; // sz:L == x11 => unallocated encoding
10195 vassert(index < (isD ? 2 : 4));
10196 IRType ity = isD ? Ity_F64 : Ity_F32;
10197 IRTemp elem = newTemp(ity);
10198 UInt mm = (bitM << 4) | mmLO4;
10199 assign(elem, getQRegLane(mm, index, ity));
10200 IRTemp dupd = math_DUP_TO_V128(elem, ity);
10201 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
10202 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
10203 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10204 IRTemp rm = mk_get_IR_rounding_mode();
10205 IRTemp t1 = newTempV128();
10206 IRTemp t2 = newTempV128();
10207 // FIXME: double rounding; use FMA primops instead
10208 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10209 assign(t2, triop(isSUB ? opSUB : opADD,
10210 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
10211 putQReg128(dd,
10212 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10213 mkexpr(t2))));
10214 const HChar c = isD ? 'd' : 's';
10215 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
10216 c, dd, c, nn, nameQReg128(mm), c, index);
10217 return True;
10218 }
10219
10220 if (size >= X10 && opcode == BITS4(1,0,0,1)) {
10221 /* -------- 0,1x,1001 FMUL d_d_d[], s_s_s[] -------- */
10222 /* -------- 1,1x,1001 FMULX d_d_d[], s_s_s[] -------- */
10223 Bool isD = (size & 1) == 1;
10224 Bool isMULX = bitU == 1;
10225 UInt index;
10226 if (!isD) index = (bitH << 1) | bitL;
10227 else if (isD && bitL == 0) index = bitH;
10228 else return False; // sz:L == x11 => unallocated encoding
10229 vassert(index < (isD ? 2 : 4));
10230 IRType ity = isD ? Ity_F64 : Ity_F32;
10231 IRTemp elem = newTemp(ity);
10232 UInt mm = (bitM << 4) | mmLO4;
10233 assign(elem, getQRegLane(mm, index, ity));
10234 IRTemp dupd = math_DUP_TO_V128(elem, ity);
10235 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10236 IRTemp rm = mk_get_IR_rounding_mode();
10237 IRTemp t1 = newTempV128();
10238 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
10239 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10240 putQReg128(dd,
10241 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10242 mkexpr(t1))));
10243 const HChar c = isD ? 'd' : 's';
10244 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isMULX ? "fmulx" : "fmul",
10245 c, dd, c, nn, nameQReg128(mm), c, index);
10246 return True;
10247 }
10248
10249 if (bitU == 0
10250 && (opcode == BITS4(1,0,1,1)
10251 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
10252 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
10253 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
10254 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
10255 /* Widens, and size refers to the narrowed lanes. */
10256 UInt ks = 3;
10257 switch (opcode) {
10258 case BITS4(1,0,1,1): ks = 0; break;
10259 case BITS4(0,0,1,1): ks = 1; break;
10260 case BITS4(0,1,1,1): ks = 2; break;
10261 default: vassert(0);
10262 }
10263 vassert(ks >= 0 && ks <= 2);
10264 UInt mm = 32; // invalid
10265 UInt ix = 16; // invalid
10266 switch (size) {
10267 case X00:
10268 return False; // h_b_b[] case is not allowed
10269 case X01:
10270 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10271 case X10:
10272 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10273 case X11:
10274 return False; // q_d_d[] case is not allowed
10275 default:
10276 vassert(0);
10277 }
10278 vassert(mm < 32 && ix < 16);
10279 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
10280 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
10281 newTempsV128_2(&vecN, &vecD);
10282 assign(vecN, getQReg128(nn));
10283 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10284 assign(vecD, getQReg128(dd));
10285 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
10286 False/*!is2*/, size, "mas"[ks],
10287 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
10288 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
10289 putQReg128(dd, unop(opZHI, mkexpr(res)));
10290 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
10291 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10292 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
10293 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
10294 }
10295 const HChar* nm = ks == 0 ? "sqmull"
10296 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
10297 const HChar arrNarrow = "bhsd"[size];
10298 const HChar arrWide = "bhsd"[size+1];
10299 DIP("%s %c%u, %c%u, v%u.%c[%u]\n",
10300 nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix);
10301 return True;
10302 }
10303
10304 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
10305 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
10306 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
10307 UInt mm = 32; // invalid
10308 UInt ix = 16; // invalid
10309 switch (size) {
10310 case X00:
10311 return False; // b case is not allowed
10312 case X01:
10313 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10314 case X10:
10315 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10316 case X11:
10317 return False; // q case is not allowed
10318 default:
10319 vassert(0);
10320 }
10321 vassert(mm < 32 && ix < 16);
10322 Bool isR = opcode == BITS4(1,1,0,1);
10323 IRTemp res, sat1q, sat1n, vN, vM;
10324 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
10325 vN = newTempV128();
10326 assign(vN, getQReg128(nn));
10327 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10328 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
10329 IROp opZHI = mkVecZEROHIxxOFV128(size);
10330 putQReg128(dd, unop(opZHI, mkexpr(res)));
10331 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10332 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
10333 HChar ch = size == X01 ? 'h' : 's';
10334 DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, (Int)dd, ix);
10335 return True;
10336 }
10337
10338 return False;
10339 # undef INSN
10340 }
10341
10342
10343 static
dis_AdvSIMD_shift_by_immediate(DisResult * dres,UInt insn)10344 Bool dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
10345 {
10346 /* 31 28 22 18 15 10 9 4
10347 0 q u 011110 immh immb opcode 1 n d
10348 Decode fields: u,opcode
10349 */
10350 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10351 if (INSN(31,31) != 0
10352 || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) {
10353 return False;
10354 }
10355 UInt bitQ = INSN(30,30);
10356 UInt bitU = INSN(29,29);
10357 UInt immh = INSN(22,19);
10358 UInt immb = INSN(18,16);
10359 UInt opcode = INSN(15,11);
10360 UInt nn = INSN(9,5);
10361 UInt dd = INSN(4,0);
10362
10363 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0)) {
10364 /* -------- 0,00000 SSHR std7_std7_#imm -------- */
10365 /* -------- 1,00000 USHR std7_std7_#imm -------- */
10366 /* -------- 0,00010 SSRA std7_std7_#imm -------- */
10367 /* -------- 1,00010 USRA std7_std7_#imm -------- */
10368 /* laneTy, shift = case immh:immb of
10369 0001:xxx -> B, SHR:8-xxx
10370 001x:xxx -> H, SHR:16-xxxx
10371 01xx:xxx -> S, SHR:32-xxxxx
10372 1xxx:xxx -> D, SHR:64-xxxxxx
10373 other -> invalid
10374 */
10375 UInt size = 0;
10376 UInt shift = 0;
10377 Bool isQ = bitQ == 1;
10378 Bool isU = bitU == 1;
10379 Bool isAcc = opcode == BITS5(0,0,0,1,0);
10380 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10381 if (!ok || (bitQ == 0 && size == X11)) return False;
10382 vassert(size >= 0 && size <= 3);
10383 UInt lanebits = 8 << size;
10384 vassert(shift >= 1 && shift <= lanebits);
10385 IROp op = isU ? mkVecSHRN(size) : mkVecSARN(size);
10386 IRExpr* src = getQReg128(nn);
10387 IRTemp shf = newTempV128();
10388 IRTemp res = newTempV128();
10389 if (shift == lanebits && isU) {
10390 assign(shf, mkV128(0x0000));
10391 } else {
10392 UInt nudge = 0;
10393 if (shift == lanebits) {
10394 vassert(!isU);
10395 nudge = 1;
10396 }
10397 assign(shf, binop(op, src, mkU8(shift - nudge)));
10398 }
10399 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
10400 : mkexpr(shf));
10401 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10402 HChar laneCh = "bhsd"[size];
10403 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10404 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
10405 : (isU ? "ushr" : "sshr");
10406 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10407 nameQReg128(dd), nLanes, laneCh,
10408 nameQReg128(nn), nLanes, laneCh, shift);
10409 return True;
10410 }
10411
10412 if (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0)) {
10413 /* -------- 0,00100 SRSHR std7_std7_#imm -------- */
10414 /* -------- 1,00100 URSHR std7_std7_#imm -------- */
10415 /* -------- 0,00110 SRSRA std7_std7_#imm -------- */
10416 /* -------- 1,00110 URSRA std7_std7_#imm -------- */
10417 /* laneTy, shift = case immh:immb of
10418 0001:xxx -> B, SHR:8-xxx
10419 001x:xxx -> H, SHR:16-xxxx
10420 01xx:xxx -> S, SHR:32-xxxxx
10421 1xxx:xxx -> D, SHR:64-xxxxxx
10422 other -> invalid
10423 */
10424 UInt size = 0;
10425 UInt shift = 0;
10426 Bool isQ = bitQ == 1;
10427 Bool isU = bitU == 1;
10428 Bool isAcc = opcode == BITS5(0,0,1,1,0);
10429 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10430 if (!ok || (bitQ == 0 && size == X11)) return False;
10431 vassert(size >= 0 && size <= 3);
10432 UInt lanebits = 8 << size;
10433 vassert(shift >= 1 && shift <= lanebits);
10434 IROp op = isU ? mkVecRSHU(size) : mkVecRSHS(size);
10435 IRExpr* src = getQReg128(nn);
10436 IRTemp imm8 = newTemp(Ity_I8);
10437 assign(imm8, mkU8((UChar)(-shift)));
10438 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
10439 IRTemp shf = newTempV128();
10440 IRTemp res = newTempV128();
10441 assign(shf, binop(op, src, amt));
10442 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
10443 : mkexpr(shf));
10444 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10445 HChar laneCh = "bhsd"[size];
10446 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10447 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
10448 : (isU ? "urshr" : "srshr");
10449 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10450 nameQReg128(dd), nLanes, laneCh,
10451 nameQReg128(nn), nLanes, laneCh, shift);
10452 return True;
10453 }
10454
10455 if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) {
10456 /* -------- 1,01000 SRI std7_std7_#imm -------- */
10457 /* laneTy, shift = case immh:immb of
10458 0001:xxx -> B, SHR:8-xxx
10459 001x:xxx -> H, SHR:16-xxxx
10460 01xx:xxx -> S, SHR:32-xxxxx
10461 1xxx:xxx -> D, SHR:64-xxxxxx
10462 other -> invalid
10463 */
10464 UInt size = 0;
10465 UInt shift = 0;
10466 Bool isQ = bitQ == 1;
10467 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10468 if (!ok || (bitQ == 0 && size == X11)) return False;
10469 vassert(size >= 0 && size <= 3);
10470 UInt lanebits = 8 << size;
10471 vassert(shift >= 1 && shift <= lanebits);
10472 IRExpr* src = getQReg128(nn);
10473 IRTemp res = newTempV128();
10474 if (shift == lanebits) {
10475 assign(res, getQReg128(dd));
10476 } else {
10477 assign(res, binop(mkVecSHRN(size), src, mkU8(shift)));
10478 IRExpr* nmask = binop(mkVecSHLN(size),
10479 mkV128(0xFFFF), mkU8(lanebits - shift));
10480 IRTemp tmp = newTempV128();
10481 assign(tmp, binop(Iop_OrV128,
10482 mkexpr(res),
10483 binop(Iop_AndV128, getQReg128(dd), nmask)));
10484 res = tmp;
10485 }
10486 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10487 HChar laneCh = "bhsd"[size];
10488 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10489 DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
10490 nameQReg128(dd), nLanes, laneCh,
10491 nameQReg128(nn), nLanes, laneCh, shift);
10492 return True;
10493 }
10494
10495 if (opcode == BITS5(0,1,0,1,0)) {
10496 /* -------- 0,01010 SHL std7_std7_#imm -------- */
10497 /* -------- 1,01010 SLI std7_std7_#imm -------- */
10498 /* laneTy, shift = case immh:immb of
10499 0001:xxx -> B, xxx
10500 001x:xxx -> H, xxxx
10501 01xx:xxx -> S, xxxxx
10502 1xxx:xxx -> D, xxxxxx
10503 other -> invalid
10504 */
10505 UInt size = 0;
10506 UInt shift = 0;
10507 Bool isSLI = bitU == 1;
10508 Bool isQ = bitQ == 1;
10509 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10510 if (!ok || (bitQ == 0 && size == X11)) return False;
10511 vassert(size >= 0 && size <= 3);
10512 /* The shift encoding has opposite sign for the leftwards case.
10513 Adjust shift to compensate. */
10514 UInt lanebits = 8 << size;
10515 shift = lanebits - shift;
10516 vassert(shift >= 0 && shift < lanebits);
10517 IROp op = mkVecSHLN(size);
10518 IRExpr* src = getQReg128(nn);
10519 IRTemp res = newTempV128();
10520 if (shift == 0) {
10521 assign(res, src);
10522 } else {
10523 assign(res, binop(op, src, mkU8(shift)));
10524 if (isSLI) {
10525 IRExpr* nmask = binop(mkVecSHRN(size),
10526 mkV128(0xFFFF), mkU8(lanebits - shift));
10527 IRTemp tmp = newTempV128();
10528 assign(tmp, binop(Iop_OrV128,
10529 mkexpr(res),
10530 binop(Iop_AndV128, getQReg128(dd), nmask)));
10531 res = tmp;
10532 }
10533 }
10534 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10535 HChar laneCh = "bhsd"[size];
10536 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10537 const HChar* nm = isSLI ? "sli" : "shl";
10538 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10539 nameQReg128(dd), nLanes, laneCh,
10540 nameQReg128(nn), nLanes, laneCh, shift);
10541 return True;
10542 }
10543
10544 if (opcode == BITS5(0,1,1,1,0)
10545 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
10546 /* -------- 0,01110 SQSHL std7_std7_#imm -------- */
10547 /* -------- 1,01110 UQSHL std7_std7_#imm -------- */
10548 /* -------- 1,01100 SQSHLU std7_std7_#imm -------- */
10549 UInt size = 0;
10550 UInt shift = 0;
10551 Bool isQ = bitQ == 1;
10552 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10553 if (!ok || (bitQ == 0 && size == X11)) return False;
10554 vassert(size >= 0 && size <= 3);
10555 /* The shift encoding has opposite sign for the leftwards case.
10556 Adjust shift to compensate. */
10557 UInt lanebits = 8 << size;
10558 shift = lanebits - shift;
10559 vassert(shift >= 0 && shift < lanebits);
10560 const HChar* nm = NULL;
10561 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
10562 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
10563 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
10564 else vassert(0);
10565 IRTemp qDiff1 = IRTemp_INVALID;
10566 IRTemp qDiff2 = IRTemp_INVALID;
10567 IRTemp res = IRTemp_INVALID;
10568 IRTemp src = newTempV128();
10569 assign(src, getQReg128(nn));
10570 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
10571 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10572 updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2,
10573 isQ ? Iop_INVALID : Iop_ZeroHI64ofV128);
10574 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10575 DIP("%s %s.%s, %s.%s, #%u\n", nm,
10576 nameQReg128(dd), arr, nameQReg128(nn), arr, shift);
10577 return True;
10578 }
10579
10580 if (bitU == 0
10581 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
10582 /* -------- 0,10000 SHRN{,2} #imm -------- */
10583 /* -------- 0,10001 RSHRN{,2} #imm -------- */
10584 /* Narrows, and size is the narrow size. */
10585 UInt size = 0;
10586 UInt shift = 0;
10587 Bool is2 = bitQ == 1;
10588 Bool isR = opcode == BITS5(1,0,0,0,1);
10589 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10590 if (!ok || size == X11) return False;
10591 vassert(shift >= 1);
10592 IRTemp t1 = newTempV128();
10593 IRTemp t2 = newTempV128();
10594 IRTemp t3 = newTempV128();
10595 assign(t1, getQReg128(nn));
10596 assign(t2, isR ? binop(mkVecADD(size+1),
10597 mkexpr(t1),
10598 mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1))))
10599 : mkexpr(t1));
10600 assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift)));
10601 IRTemp t4 = math_NARROW_LANES(t3, t3, size);
10602 putLO64andZUorPutHI64(is2, dd, t4);
10603 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10604 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10605 DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn",
10606 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
10607 return True;
10608 }
10609
10610 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
10611 || (bitU == 1
10612 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
10613 /* -------- 0,10010 SQSHRN{,2} #imm -------- */
10614 /* -------- 1,10010 UQSHRN{,2} #imm -------- */
10615 /* -------- 0,10011 SQRSHRN{,2} #imm -------- */
10616 /* -------- 1,10011 UQRSHRN{,2} #imm -------- */
10617 /* -------- 1,10000 SQSHRUN{,2} #imm -------- */
10618 /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
10619 UInt size = 0;
10620 UInt shift = 0;
10621 Bool is2 = bitQ == 1;
10622 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10623 if (!ok || size == X11) return False;
10624 vassert(shift >= 1 && shift <= (8 << size));
10625 const HChar* nm = "??";
10626 IROp op = Iop_INVALID;
10627 /* Decide on the name and the operation. */
10628 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
10629 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
10630 }
10631 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
10632 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
10633 }
10634 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
10635 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
10636 }
10637 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
10638 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
10639 }
10640 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
10641 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
10642 }
10643 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
10644 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
10645 }
10646 else vassert(0);
10647 /* Compute the result (Q, shifted value) pair. */
10648 IRTemp src128 = newTempV128();
10649 assign(src128, getQReg128(nn));
10650 IRTemp pair = newTempV128();
10651 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
10652 /* Update the result reg */
10653 IRTemp res64in128 = newTempV128();
10654 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
10655 putLO64andZUorPutHI64(is2, dd, res64in128);
10656 /* Update the Q flag. */
10657 IRTemp q64q64 = newTempV128();
10658 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
10659 IRTemp z128 = newTempV128();
10660 assign(z128, mkV128(0x0000));
10661 updateQCFLAGwithDifference(q64q64, z128);
10662 /* */
10663 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10664 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10665 DIP("%s %s.%s, %s.%s, #%u\n", nm,
10666 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
10667 return True;
10668 }
10669
10670 if (opcode == BITS5(1,0,1,0,0)) {
10671 /* -------- 0,10100 SSHLL{,2} #imm -------- */
10672 /* -------- 1,10100 USHLL{,2} #imm -------- */
10673 /* 31 28 22 18 15 9 4
10674 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
10675 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
10676 where Ta,Tb,sh
10677 = case immh of 1xxx -> invalid
10678 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
10679 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
10680 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
10681 0000 -> AdvSIMD modified immediate (???)
10682 */
10683 Bool isQ = bitQ == 1;
10684 Bool isU = bitU == 1;
10685 UInt immhb = (immh << 3) | immb;
10686 IRTemp src = newTempV128();
10687 IRTemp zero = newTempV128();
10688 IRExpr* res = NULL;
10689 UInt sh = 0;
10690 const HChar* ta = "??";
10691 const HChar* tb = "??";
10692 assign(src, getQReg128(nn));
10693 assign(zero, mkV128(0x0000));
10694 if (immh & 8) {
10695 /* invalid; don't assign to res */
10696 }
10697 else if (immh & 4) {
10698 sh = immhb - 32;
10699 vassert(sh < 32); /* so 32-sh is 1..32 */
10700 ta = "2d";
10701 tb = isQ ? "4s" : "2s";
10702 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
10703 : mk_InterleaveLO32x4(src, zero);
10704 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
10705 }
10706 else if (immh & 2) {
10707 sh = immhb - 16;
10708 vassert(sh < 16); /* so 16-sh is 1..16 */
10709 ta = "4s";
10710 tb = isQ ? "8h" : "4h";
10711 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
10712 : mk_InterleaveLO16x8(src, zero);
10713 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
10714 }
10715 else if (immh & 1) {
10716 sh = immhb - 8;
10717 vassert(sh < 8); /* so 8-sh is 1..8 */
10718 ta = "8h";
10719 tb = isQ ? "16b" : "8b";
10720 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
10721 : mk_InterleaveLO8x16(src, zero);
10722 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
10723 } else {
10724 vassert(immh == 0);
10725 /* invalid; don't assign to res */
10726 }
10727 /* */
10728 if (res) {
10729 putQReg128(dd, res);
10730 DIP("%cshll%s %s.%s, %s.%s, #%u\n",
10731 isU ? 'u' : 's', isQ ? "2" : "",
10732 nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
10733 return True;
10734 }
10735 return False;
10736 }
10737
10738 if (opcode == BITS5(1,1,1,0,0)) {
10739 /* -------- 0,11100 SCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
10740 /* -------- 1,11100 UCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
10741 /* If immh is of the form 00xx, the insn is invalid. */
10742 if (immh < BITS4(0,1,0,0)) return False;
10743 UInt size = 0;
10744 UInt fbits = 0;
10745 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
10746 /* The following holds because immh is never zero. */
10747 vassert(ok);
10748 /* The following holds because immh >= 0100. */
10749 vassert(size == X10 || size == X11);
10750 Bool isD = size == X11;
10751 Bool isU = bitU == 1;
10752 Bool isQ = bitQ == 1;
10753 if (isD && !isQ) return False; /* reject .1d case */
10754 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
10755 Double scale = two_to_the_minus(fbits);
10756 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
10757 : IRExpr_Const(IRConst_F32( (Float)scale ));
10758 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
10759 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
10760 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
10761 IRType tyF = isD ? Ity_F64 : Ity_F32;
10762 IRType tyI = isD ? Ity_I64 : Ity_I32;
10763 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
10764 vassert(nLanes == 2 || nLanes == 4);
10765 for (UInt i = 0; i < nLanes; i++) {
10766 IRTemp src = newTemp(tyI);
10767 IRTemp res = newTemp(tyF);
10768 IRTemp rm = mk_get_IR_rounding_mode();
10769 assign(src, getQRegLane(nn, i, tyI));
10770 assign(res, triop(opMUL, mkexpr(rm),
10771 binop(opCVT, mkexpr(rm), mkexpr(src)),
10772 scaleE));
10773 putQRegLane(dd, i, mkexpr(res));
10774 }
10775 if (!isQ) {
10776 putQRegLane(dd, 1, mkU64(0));
10777 }
10778 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10779 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "ucvtf" : "scvtf",
10780 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
10781 return True;
10782 }
10783
10784 if (opcode == BITS5(1,1,1,1,1)) {
10785 /* -------- 0,11111 FCVTZS {2d_2d,4s_4s,2s_2s}_imm -------- */
10786 /* -------- 1,11111 FCVTZU {2d_2d,4s_4s,2s_2s}_imm -------- */
10787 /* If immh is of the form 00xx, the insn is invalid. */
10788 if (immh < BITS4(0,1,0,0)) return False;
10789 UInt size = 0;
10790 UInt fbits = 0;
10791 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
10792 /* The following holds because immh is never zero. */
10793 vassert(ok);
10794 /* The following holds because immh >= 0100. */
10795 vassert(size == X10 || size == X11);
10796 Bool isD = size == X11;
10797 Bool isU = bitU == 1;
10798 Bool isQ = bitQ == 1;
10799 if (isD && !isQ) return False; /* reject .1d case */
10800 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
10801 Double scale = two_to_the_plus(fbits);
10802 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
10803 : IRExpr_Const(IRConst_F32( (Float)scale ));
10804 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
10805 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
10806 : (isD ? Iop_F64toI64S : Iop_F32toI32S);
10807 IRType tyF = isD ? Ity_F64 : Ity_F32;
10808 IRType tyI = isD ? Ity_I64 : Ity_I32;
10809 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
10810 vassert(nLanes == 2 || nLanes == 4);
10811 for (UInt i = 0; i < nLanes; i++) {
10812 IRTemp src = newTemp(tyF);
10813 IRTemp res = newTemp(tyI);
10814 IRTemp rm = newTemp(Ity_I32);
10815 assign(src, getQRegLane(nn, i, tyF));
10816 assign(rm, mkU32(Irrm_ZERO));
10817 assign(res, binop(opCVT, mkexpr(rm),
10818 triop(opMUL, mkexpr(rm),
10819 mkexpr(src), scaleE)));
10820 putQRegLane(dd, i, mkexpr(res));
10821 }
10822 if (!isQ) {
10823 putQRegLane(dd, 1, mkU64(0));
10824 }
10825 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10826 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "fcvtzu" : "fcvtzs",
10827 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
10828 return True;
10829 }
10830
10831 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10832 return False;
10833 # undef INSN
10834 }
10835
10836
10837 static
dis_AdvSIMD_three_different(DisResult * dres,UInt insn)10838 Bool dis_AdvSIMD_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
10839 {
10840 /* 31 30 29 28 23 21 20 15 11 9 4
10841 0 Q U 01110 size 1 m opcode 00 n d
10842 Decode fields: u,opcode
10843 */
10844 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10845 if (INSN(31,31) != 0
10846 || INSN(28,24) != BITS5(0,1,1,1,0)
10847 || INSN(21,21) != 1
10848 || INSN(11,10) != BITS2(0,0)) {
10849 return False;
10850 }
10851 UInt bitQ = INSN(30,30);
10852 UInt bitU = INSN(29,29);
10853 UInt size = INSN(23,22);
10854 UInt mm = INSN(20,16);
10855 UInt opcode = INSN(15,12);
10856 UInt nn = INSN(9,5);
10857 UInt dd = INSN(4,0);
10858 vassert(size < 4);
10859 Bool is2 = bitQ == 1;
10860
10861 if (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,1,0)) {
10862 /* -------- 0,0000 SADDL{2} -------- */
10863 /* -------- 1,0000 UADDL{2} -------- */
10864 /* -------- 0,0010 SSUBL{2} -------- */
10865 /* -------- 1,0010 USUBL{2} -------- */
10866 /* Widens, and size refers to the narrowed lanes. */
10867 if (size == X11) return False;
10868 vassert(size <= 2);
10869 Bool isU = bitU == 1;
10870 Bool isADD = opcode == BITS4(0,0,0,0);
10871 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
10872 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
10873 IRTemp res = newTempV128();
10874 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
10875 mkexpr(argL), mkexpr(argR)));
10876 putQReg128(dd, mkexpr(res));
10877 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10878 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10879 const HChar* nm = isADD ? (isU ? "uaddl" : "saddl")
10880 : (isU ? "usubl" : "ssubl");
10881 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10882 nameQReg128(dd), arrWide,
10883 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
10884 return True;
10885 }
10886
10887 if (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,0,1,1)) {
10888 /* -------- 0,0001 SADDW{2} -------- */
10889 /* -------- 1,0001 UADDW{2} -------- */
10890 /* -------- 0,0011 SSUBW{2} -------- */
10891 /* -------- 1,0011 USUBW{2} -------- */
10892 /* Widens, and size refers to the narrowed lanes. */
10893 if (size == X11) return False;
10894 vassert(size <= 2);
10895 Bool isU = bitU == 1;
10896 Bool isADD = opcode == BITS4(0,0,0,1);
10897 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
10898 IRTemp res = newTempV128();
10899 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
10900 getQReg128(nn), mkexpr(argR)));
10901 putQReg128(dd, mkexpr(res));
10902 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10903 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10904 const HChar* nm = isADD ? (isU ? "uaddw" : "saddw")
10905 : (isU ? "usubw" : "ssubw");
10906 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10907 nameQReg128(dd), arrWide,
10908 nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow);
10909 return True;
10910 }
10911
10912 if (opcode == BITS4(0,1,0,0) || opcode == BITS4(0,1,1,0)) {
10913 /* -------- 0,0100 ADDHN{2} -------- */
10914 /* -------- 1,0100 RADDHN{2} -------- */
10915 /* -------- 0,0110 SUBHN{2} -------- */
10916 /* -------- 1,0110 RSUBHN{2} -------- */
10917 /* Narrows, and size refers to the narrowed lanes. */
10918 if (size == X11) return False;
10919 vassert(size <= 2);
10920 const UInt shift[3] = { 8, 16, 32 };
10921 Bool isADD = opcode == BITS4(0,1,0,0);
10922 Bool isR = bitU == 1;
10923 /* Combined elements in wide lanes */
10924 IRTemp wide = newTempV128();
10925 IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
10926 getQReg128(nn), getQReg128(mm));
10927 if (isR) {
10928 wideE = binop(mkVecADD(size+1),
10929 wideE,
10930 mkexpr(math_VEC_DUP_IMM(size+1,
10931 1ULL << (shift[size]-1))));
10932 }
10933 assign(wide, wideE);
10934 /* Top halves of elements, still in wide lanes */
10935 IRTemp shrd = newTempV128();
10936 assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size])));
10937 /* Elements now compacted into lower 64 bits */
10938 IRTemp new64 = newTempV128();
10939 assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd)));
10940 putLO64andZUorPutHI64(is2, dd, new64);
10941 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10942 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10943 const HChar* nm = isADD ? (isR ? "raddhn" : "addhn")
10944 : (isR ? "rsubhn" : "subhn");
10945 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10946 nameQReg128(dd), arrNarrow,
10947 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
10948 return True;
10949 }
10950
10951 if (opcode == BITS4(0,1,0,1) || opcode == BITS4(0,1,1,1)) {
10952 /* -------- 0,0101 SABAL{2} -------- */
10953 /* -------- 1,0101 UABAL{2} -------- */
10954 /* -------- 0,0111 SABDL{2} -------- */
10955 /* -------- 1,0111 UABDL{2} -------- */
10956 /* Widens, and size refers to the narrowed lanes. */
10957 if (size == X11) return False;
10958 vassert(size <= 2);
10959 Bool isU = bitU == 1;
10960 Bool isACC = opcode == BITS4(0,1,0,1);
10961 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
10962 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
10963 IRTemp abd = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR));
10964 IRTemp res = newTempV128();
10965 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd))
10966 : mkexpr(abd));
10967 putQReg128(dd, mkexpr(res));
10968 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10969 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10970 const HChar* nm = isACC ? (isU ? "uabal" : "sabal")
10971 : (isU ? "uabdl" : "sabdl");
10972 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10973 nameQReg128(dd), arrWide,
10974 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
10975 return True;
10976 }
10977
10978 if (opcode == BITS4(1,1,0,0)
10979 || opcode == BITS4(1,0,0,0) || opcode == BITS4(1,0,1,0)) {
10980 /* -------- 0,1100 SMULL{2} -------- */ // 0 (ks)
10981 /* -------- 1,1100 UMULL{2} -------- */ // 0
10982 /* -------- 0,1000 SMLAL{2} -------- */ // 1
10983 /* -------- 1,1000 UMLAL{2} -------- */ // 1
10984 /* -------- 0,1010 SMLSL{2} -------- */ // 2
10985 /* -------- 1,1010 UMLSL{2} -------- */ // 2
10986 /* Widens, and size refers to the narrowed lanes. */
10987 UInt ks = 3;
10988 switch (opcode) {
10989 case BITS4(1,1,0,0): ks = 0; break;
10990 case BITS4(1,0,0,0): ks = 1; break;
10991 case BITS4(1,0,1,0): ks = 2; break;
10992 default: vassert(0);
10993 }
10994 vassert(ks >= 0 && ks <= 2);
10995 if (size == X11) return False;
10996 vassert(size <= 2);
10997 Bool isU = bitU == 1;
10998 IRTemp vecN = newTempV128();
10999 IRTemp vecM = newTempV128();
11000 IRTemp vecD = newTempV128();
11001 assign(vecN, getQReg128(nn));
11002 assign(vecM, getQReg128(mm));
11003 assign(vecD, getQReg128(dd));
11004 IRTemp res = IRTemp_INVALID;
11005 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
11006 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
11007 putQReg128(dd, mkexpr(res));
11008 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11009 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11010 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
11011 DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "",
11012 nameQReg128(dd), arrWide,
11013 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11014 return True;
11015 }
11016
11017 if (bitU == 0
11018 && (opcode == BITS4(1,1,0,1)
11019 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
11020 /* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks)
11021 /* -------- 0,1001 SQDMLAL{2} -------- */ // 1
11022 /* -------- 0,1011 SQDMLSL{2} -------- */ // 2
11023 /* Widens, and size refers to the narrowed lanes. */
11024 UInt ks = 3;
11025 switch (opcode) {
11026 case BITS4(1,1,0,1): ks = 0; break;
11027 case BITS4(1,0,0,1): ks = 1; break;
11028 case BITS4(1,0,1,1): ks = 2; break;
11029 default: vassert(0);
11030 }
11031 vassert(ks >= 0 && ks <= 2);
11032 if (size == X00 || size == X11) return False;
11033 vassert(size <= 2);
11034 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
11035 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
11036 newTempsV128_3(&vecN, &vecM, &vecD);
11037 assign(vecN, getQReg128(nn));
11038 assign(vecM, getQReg128(mm));
11039 assign(vecD, getQReg128(dd));
11040 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
11041 is2, size, "mas"[ks],
11042 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
11043 putQReg128(dd, mkexpr(res));
11044 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
11045 updateQCFLAGwithDifference(sat1q, sat1n);
11046 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
11047 updateQCFLAGwithDifference(sat2q, sat2n);
11048 }
11049 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11050 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11051 const HChar* nm = ks == 0 ? "sqdmull"
11052 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
11053 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11054 nameQReg128(dd), arrWide,
11055 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11056 return True;
11057 }
11058
11059 if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
11060 /* -------- 0,1110 PMULL{2} -------- */
11061 /* Widens, and size refers to the narrowed lanes. */
11062 if (size != X00) return False;
11063 IRTemp res
11064 = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
11065 getQReg128(nn), getQReg128(mm));
11066 putQReg128(dd, mkexpr(res));
11067 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11068 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11069 DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
11070 nameQReg128(dd), arrNarrow,
11071 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
11072 return True;
11073 }
11074
11075 return False;
11076 # undef INSN
11077 }
11078
11079
11080 static
dis_AdvSIMD_three_same(DisResult * dres,UInt insn)11081 Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
11082 {
11083 /* 31 30 29 28 23 21 20 15 10 9 4
11084 0 Q U 01110 size 1 m opcode 1 n d
11085 Decode fields: u,size,opcode
11086 */
11087 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11088 if (INSN(31,31) != 0
11089 || INSN(28,24) != BITS5(0,1,1,1,0)
11090 || INSN(21,21) != 1
11091 || INSN(10,10) != 1) {
11092 return False;
11093 }
11094 UInt bitQ = INSN(30,30);
11095 UInt bitU = INSN(29,29);
11096 UInt size = INSN(23,22);
11097 UInt mm = INSN(20,16);
11098 UInt opcode = INSN(15,11);
11099 UInt nn = INSN(9,5);
11100 UInt dd = INSN(4,0);
11101 vassert(size < 4);
11102
11103 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,1,0,0)) {
11104 /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
11105 /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
11106 /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
11107 /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
11108 if (size == X11) return False;
11109 Bool isADD = opcode == BITS5(0,0,0,0,0);
11110 Bool isU = bitU == 1;
11111 /* Widen both args out, do the math, narrow to final result. */
11112 IRTemp argL = newTempV128();
11113 IRTemp argLhi = IRTemp_INVALID;
11114 IRTemp argLlo = IRTemp_INVALID;
11115 IRTemp argR = newTempV128();
11116 IRTemp argRhi = IRTemp_INVALID;
11117 IRTemp argRlo = IRTemp_INVALID;
11118 IRTemp resHi = newTempV128();
11119 IRTemp resLo = newTempV128();
11120 IRTemp res = IRTemp_INVALID;
11121 assign(argL, getQReg128(nn));
11122 argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL));
11123 argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argL));
11124 assign(argR, getQReg128(mm));
11125 argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR));
11126 argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argR));
11127 IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1);
11128 IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1);
11129 assign(resHi, binop(opSxR,
11130 binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)),
11131 mkU8(1)));
11132 assign(resLo, binop(opSxR,
11133 binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)),
11134 mkU8(1)));
11135 res = math_NARROW_LANES ( resHi, resLo, size );
11136 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11137 const HChar* nm = isADD ? (isU ? "uhadd" : "shadd")
11138 : (isU ? "uhsub" : "shsub");
11139 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11140 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11141 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11142 return True;
11143 }
11144
11145 if (opcode == BITS5(0,0,0,1,0)) {
11146 /* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */
11147 /* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */
11148 if (bitQ == 0 && size == X11) return False; // implied 1d case
11149 Bool isU = bitU == 1;
11150 IRTemp argL = newTempV128();
11151 IRTemp argR = newTempV128();
11152 assign(argL, getQReg128(nn));
11153 assign(argR, getQReg128(mm));
11154 IRTemp res = math_RHADD(size, isU, argL, argR);
11155 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11156 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11157 DIP("%s %s.%s, %s.%s, %s.%s\n", isU ? "urhadd" : "srhadd",
11158 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11159 return True;
11160 }
11161
11162 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
11163 /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
11164 /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
11165 /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
11166 /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
11167 if (bitQ == 0 && size == X11) return False; // implied 1d case
11168 Bool isADD = opcode == BITS5(0,0,0,0,1);
11169 Bool isU = bitU == 1;
11170 IROp qop = Iop_INVALID;
11171 IROp nop = Iop_INVALID;
11172 if (isADD) {
11173 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
11174 nop = mkVecADD(size);
11175 } else {
11176 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
11177 nop = mkVecSUB(size);
11178 }
11179 IRTemp argL = newTempV128();
11180 IRTemp argR = newTempV128();
11181 IRTemp qres = newTempV128();
11182 IRTemp nres = newTempV128();
11183 assign(argL, getQReg128(nn));
11184 assign(argR, getQReg128(mm));
11185 assign(qres, math_MAYBE_ZERO_HI64_fromE(
11186 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
11187 assign(nres, math_MAYBE_ZERO_HI64_fromE(
11188 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
11189 putQReg128(dd, mkexpr(qres));
11190 updateQCFLAGwithDifference(qres, nres);
11191 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
11192 : (isU ? "uqsub" : "sqsub");
11193 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11194 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11195 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11196 return True;
11197 }
11198
11199 if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) {
11200 /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
11201 /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
11202 /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
11203 /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
11204 Bool isORx = (size & 2) == 2;
11205 Bool invert = (size & 1) == 1;
11206 IRTemp res = newTempV128();
11207 assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128,
11208 getQReg128(nn),
11209 invert ? unop(Iop_NotV128, getQReg128(mm))
11210 : getQReg128(mm)));
11211 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11212 const HChar* names[4] = { "and", "bic", "orr", "orn" };
11213 const HChar* ar = bitQ == 1 ? "16b" : "8b";
11214 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
11215 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
11216 return True;
11217 }
11218
11219 if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) {
11220 /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
11221 /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
11222 /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
11223 /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
11224 IRTemp argD = newTempV128();
11225 IRTemp argN = newTempV128();
11226 IRTemp argM = newTempV128();
11227 assign(argD, getQReg128(dd));
11228 assign(argN, getQReg128(nn));
11229 assign(argM, getQReg128(mm));
11230 const IROp opXOR = Iop_XorV128;
11231 const IROp opAND = Iop_AndV128;
11232 const IROp opNOT = Iop_NotV128;
11233 IRTemp res = newTempV128();
11234 switch (size) {
11235 case BITS2(0,0): /* EOR */
11236 assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN)));
11237 break;
11238 case BITS2(0,1): /* BSL */
11239 assign(res, binop(opXOR, mkexpr(argM),
11240 binop(opAND,
11241 binop(opXOR, mkexpr(argM), mkexpr(argN)),
11242 mkexpr(argD))));
11243 break;
11244 case BITS2(1,0): /* BIT */
11245 assign(res, binop(opXOR, mkexpr(argD),
11246 binop(opAND,
11247 binop(opXOR, mkexpr(argD), mkexpr(argN)),
11248 mkexpr(argM))));
11249 break;
11250 case BITS2(1,1): /* BIF */
11251 assign(res, binop(opXOR, mkexpr(argD),
11252 binop(opAND,
11253 binop(opXOR, mkexpr(argD), mkexpr(argN)),
11254 unop(opNOT, mkexpr(argM)))));
11255 break;
11256 default:
11257 vassert(0);
11258 }
11259 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11260 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
11261 const HChar* arr = bitQ == 1 ? "16b" : "8b";
11262 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size],
11263 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11264 return True;
11265 }
11266
11267 if (opcode == BITS5(0,0,1,1,0)) {
11268 /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
11269 /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
11270 if (bitQ == 0 && size == X11) return False; // implied 1d case
11271 Bool isGT = bitU == 0;
11272 IRExpr* argL = getQReg128(nn);
11273 IRExpr* argR = getQReg128(mm);
11274 IRTemp res = newTempV128();
11275 assign(res,
11276 isGT ? binop(mkVecCMPGTS(size), argL, argR)
11277 : binop(mkVecCMPGTU(size), argL, argR));
11278 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11279 const HChar* nm = isGT ? "cmgt" : "cmhi";
11280 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11281 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11282 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11283 return True;
11284 }
11285
11286 if (opcode == BITS5(0,0,1,1,1)) {
11287 /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
11288 /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
11289 if (bitQ == 0 && size == X11) return False; // implied 1d case
11290 Bool isGE = bitU == 0;
11291 IRExpr* argL = getQReg128(nn);
11292 IRExpr* argR = getQReg128(mm);
11293 IRTemp res = newTempV128();
11294 assign(res,
11295 isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL))
11296 : unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL)));
11297 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11298 const HChar* nm = isGE ? "cmge" : "cmhs";
11299 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11300 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11301 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11302 return True;
11303 }
11304
11305 if (opcode == BITS5(0,1,0,0,0) || opcode == BITS5(0,1,0,1,0)) {
11306 /* -------- 0,xx,01000 SSHL std7_std7_std7 -------- */
11307 /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
11308 /* -------- 1,xx,01000 USHL std7_std7_std7 -------- */
11309 /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
11310 if (bitQ == 0 && size == X11) return False; // implied 1d case
11311 Bool isU = bitU == 1;
11312 Bool isR = opcode == BITS5(0,1,0,1,0);
11313 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
11314 : (isU ? mkVecSHU(size) : mkVecSHS(size));
11315 IRTemp res = newTempV128();
11316 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
11317 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11318 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
11319 : (isU ? "ushl" : "sshl");
11320 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11321 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11322 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11323 return True;
11324 }
11325
11326 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
11327 /* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */
11328 /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
11329 /* -------- 1,xx,01001 UQSHL std7_std7_std7 -------- */
11330 /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
11331 if (bitQ == 0 && size == X11) return False; // implied 1d case
11332 Bool isU = bitU == 1;
11333 Bool isR = opcode == BITS5(0,1,0,1,1);
11334 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
11335 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
11336 /* This is a bit tricky. If we're only interested in the lowest 64 bits
11337 of the result (viz, bitQ == 0), then we must adjust the operands to
11338 ensure that the upper part of the result, that we don't care about,
11339 doesn't pollute the returned Q value. To do this, zero out the upper
11340 operand halves beforehand. This works because it means, for the
11341 lanes we don't care about, we are shifting zero by zero, which can
11342 never saturate. */
11343 IRTemp res256 = newTemp(Ity_V256);
11344 IRTemp resSH = newTempV128();
11345 IRTemp resQ = newTempV128();
11346 IRTemp zero = newTempV128();
11347 assign(res256, binop(op,
11348 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)),
11349 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm))));
11350 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
11351 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
11352 assign(zero, mkV128(0x0000));
11353 putQReg128(dd, mkexpr(resSH));
11354 updateQCFLAGwithDifference(resQ, zero);
11355 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
11356 : (isU ? "uqshl" : "sqshl");
11357 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11358 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11359 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11360 return True;
11361 }
11362
11363 if (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,0,1)) {
11364 /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
11365 /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
11366 /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
11367 /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
11368 if (bitQ == 0 && size == X11) return False; // implied 1d case
11369 Bool isU = bitU == 1;
11370 Bool isMAX = (opcode & 1) == 0;
11371 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
11372 : (isU ? mkVecMINU(size) : mkVecMINS(size));
11373 IRTemp t = newTempV128();
11374 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
11375 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
11376 const HChar* nm = isMAX ? (isU ? "umax" : "smax")
11377 : (isU ? "umin" : "smin");
11378 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11379 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11380 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11381 return True;
11382 }
11383
11384 if (opcode == BITS5(0,1,1,1,0) || opcode == BITS5(0,1,1,1,1)) {
11385 /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
11386 /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
11387 /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
11388 /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
11389 if (size == X11) return False; // 1d/2d cases not allowed
11390 Bool isU = bitU == 1;
11391 Bool isACC = opcode == BITS5(0,1,1,1,1);
11392 vassert(size <= 2);
11393 IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm));
11394 IRTemp t2 = newTempV128();
11395 assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd))
11396 : mkexpr(t1));
11397 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11398 const HChar* nm = isACC ? (isU ? "uaba" : "saba")
11399 : (isU ? "uabd" : "sabd");
11400 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11401 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11402 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11403 return True;
11404 }
11405
11406 if (opcode == BITS5(1,0,0,0,0)) {
11407 /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
11408 /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
11409 if (bitQ == 0 && size == X11) return False; // implied 1d case
11410 Bool isSUB = bitU == 1;
11411 IROp op = isSUB ? mkVecSUB(size) : mkVecADD(size);
11412 IRTemp t = newTempV128();
11413 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
11414 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
11415 const HChar* nm = isSUB ? "sub" : "add";
11416 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11417 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11418 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11419 return True;
11420 }
11421
11422 if (opcode == BITS5(1,0,0,0,1)) {
11423 /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
11424 /* -------- 1,xx,10001 CMEQ std7_std7_std7 -------- */ // ==
11425 if (bitQ == 0 && size == X11) return False; // implied 1d case
11426 Bool isEQ = bitU == 1;
11427 IRExpr* argL = getQReg128(nn);
11428 IRExpr* argR = getQReg128(mm);
11429 IRTemp res = newTempV128();
11430 assign(res,
11431 isEQ ? binop(mkVecCMPEQ(size), argL, argR)
11432 : unop(Iop_NotV128, binop(mkVecCMPEQ(size),
11433 binop(Iop_AndV128, argL, argR),
11434 mkV128(0x0000))));
11435 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11436 const HChar* nm = isEQ ? "cmeq" : "cmtst";
11437 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11438 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11439 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11440 return True;
11441 }
11442
11443 if (opcode == BITS5(1,0,0,1,0)) {
11444 /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
11445 /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
11446 if (bitQ == 0 && size == X11) return False; // implied 1d case
11447 Bool isMLS = bitU == 1;
11448 IROp opMUL = mkVecMUL(size);
11449 IROp opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size);
11450 IRTemp res = newTempV128();
11451 if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) {
11452 assign(res, binop(opADDSUB,
11453 getQReg128(dd),
11454 binop(opMUL, getQReg128(nn), getQReg128(mm))));
11455 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11456 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11457 DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla",
11458 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11459 return True;
11460 }
11461 return False;
11462 }
11463
11464 if (opcode == BITS5(1,0,0,1,1)) {
11465 /* -------- 0,xx,10011 MUL std7_std7_std7 -------- */
11466 /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
11467 if (bitQ == 0 && size == X11) return False; // implied 1d case
11468 Bool isPMUL = bitU == 1;
11469 const IROp opsPMUL[4]
11470 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
11471 IROp opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size);
11472 IRTemp res = newTempV128();
11473 if (opMUL != Iop_INVALID) {
11474 assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm)));
11475 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11476 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11477 DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul",
11478 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11479 return True;
11480 }
11481 return False;
11482 }
11483
11484 if (opcode == BITS5(1,0,1,0,0) || opcode == BITS5(1,0,1,0,1)) {
11485 /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
11486 /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
11487 /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
11488 /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
11489 if (size == X11) return False;
11490 Bool isU = bitU == 1;
11491 Bool isMAX = opcode == BITS5(1,0,1,0,0);
11492 IRTemp vN = newTempV128();
11493 IRTemp vM = newTempV128();
11494 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
11495 : (isU ? mkVecMINU(size) : mkVecMINS(size));
11496 assign(vN, getQReg128(nn));
11497 assign(vM, getQReg128(mm));
11498 IRTemp res128 = newTempV128();
11499 assign(res128,
11500 binop(op,
11501 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
11502 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
11503 /* In the half-width case, use CatEL32x4 to extract the half-width
11504 result from the full-width result. */
11505 IRExpr* res
11506 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
11507 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
11508 mkexpr(res128)))
11509 : mkexpr(res128);
11510 putQReg128(dd, res);
11511 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11512 const HChar* nm = isMAX ? (isU ? "umaxp" : "smaxp")
11513 : (isU ? "uminp" : "sminp");
11514 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11515 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11516 return True;
11517 }
11518
11519 if (opcode == BITS5(1,0,1,1,0)) {
11520 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
11521 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
11522 if (size == X00 || size == X11) return False;
11523 Bool isR = bitU == 1;
11524 IRTemp res, sat1q, sat1n, vN, vM;
11525 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
11526 newTempsV128_2(&vN, &vM);
11527 assign(vN, getQReg128(nn));
11528 assign(vM, getQReg128(mm));
11529 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
11530 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11531 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
11532 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
11533 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11534 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
11535 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11536 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11537 return True;
11538 }
11539
11540 if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) {
11541 /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
11542 if (bitQ == 0 && size == X11) return False; // implied 1d case
11543 IRTemp vN = newTempV128();
11544 IRTemp vM = newTempV128();
11545 assign(vN, getQReg128(nn));
11546 assign(vM, getQReg128(mm));
11547 IRTemp res128 = newTempV128();
11548 assign(res128,
11549 binop(mkVecADD(size),
11550 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
11551 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
11552 /* In the half-width case, use CatEL32x4 to extract the half-width
11553 result from the full-width result. */
11554 IRExpr* res
11555 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
11556 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
11557 mkexpr(res128)))
11558 : mkexpr(res128);
11559 putQReg128(dd, res);
11560 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11561 DIP("addp %s.%s, %s.%s, %s.%s\n",
11562 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11563 return True;
11564 }
11565
11566 if (bitU == 0
11567 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
11568 /* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11569 /* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11570 /* -------- 0,0x,11110 FMAX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11571 /* -------- 0,1x,11110 FMIN 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11572 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
11573 Bool isD = (size & 1) == 1;
11574 if (bitQ == 0 && isD) return False; // implied 1d case
11575 Bool isMIN = (size & 2) == 2;
11576 Bool isNM = opcode == BITS5(1,1,0,0,0);
11577 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? X11 : X10);
11578 IRTemp res = newTempV128();
11579 assign(res, binop(opMXX, getQReg128(nn), getQReg128(mm)));
11580 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11581 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11582 DIP("%s%s %s.%s, %s.%s, %s.%s\n",
11583 isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
11584 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11585 return True;
11586 }
11587
11588 if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
11589 /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11590 /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11591 Bool isD = (size & 1) == 1;
11592 Bool isSUB = (size & 2) == 2;
11593 if (bitQ == 0 && isD) return False; // implied 1d case
11594 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
11595 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11596 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
11597 IRTemp rm = mk_get_IR_rounding_mode();
11598 IRTemp t1 = newTempV128();
11599 IRTemp t2 = newTempV128();
11600 // FIXME: double rounding; use FMA primops instead
11601 assign(t1, triop(opMUL,
11602 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11603 assign(t2, triop(isSUB ? opSUB : opADD,
11604 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
11605 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11606 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11607 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla",
11608 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11609 return True;
11610 }
11611
11612 if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) {
11613 /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11614 /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11615 Bool isD = (size & 1) == 1;
11616 Bool isSUB = (size & 2) == 2;
11617 if (bitQ == 0 && isD) return False; // implied 1d case
11618 const IROp ops[4]
11619 = { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 };
11620 IROp op = ops[size];
11621 IRTemp rm = mk_get_IR_rounding_mode();
11622 IRTemp t1 = newTempV128();
11623 IRTemp t2 = newTempV128();
11624 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11625 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
11626 putQReg128(dd, mkexpr(t2));
11627 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11628 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd",
11629 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11630 return True;
11631 }
11632
11633 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
11634 /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11635 Bool isD = (size & 1) == 1;
11636 if (bitQ == 0 && isD) return False; // implied 1d case
11637 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11638 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
11639 IRTemp rm = mk_get_IR_rounding_mode();
11640 IRTemp t1 = newTempV128();
11641 IRTemp t2 = newTempV128();
11642 // FIXME: use Abd primop instead?
11643 assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11644 assign(t2, unop(opABS, mkexpr(t1)));
11645 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11646 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11647 DIP("fabd %s.%s, %s.%s, %s.%s\n",
11648 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11649 return True;
11650 }
11651
11652 if (size <= X01 && opcode == BITS5(1,1,0,1,1)) {
11653 /* -------- 0,0x,11011 FMULX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11654 /* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11655 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
11656 Bool isD = (size & 1) == 1;
11657 Bool isMULX = bitU == 0;
11658 if (bitQ == 0 && isD) return False; // implied 1d case
11659 IRTemp rm = mk_get_IR_rounding_mode();
11660 IRTemp t1 = newTempV128();
11661 assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
11662 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11663 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
11664 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11665 DIP("%s %s.%s, %s.%s, %s.%s\n", isMULX ? "fmulx" : "fmul",
11666 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11667 return True;
11668 }
11669
11670 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
11671 /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11672 /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11673 Bool isD = (size & 1) == 1;
11674 if (bitQ == 0 && isD) return False; // implied 1d case
11675 Bool isGE = bitU == 1;
11676 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
11677 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
11678 IRTemp t1 = newTempV128();
11679 assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
11680 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
11681 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
11682 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11683 DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq",
11684 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11685 return True;
11686 }
11687
11688 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
11689 /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11690 Bool isD = (size & 1) == 1;
11691 if (bitQ == 0 && isD) return False; // implied 1d case
11692 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
11693 IRTemp t1 = newTempV128();
11694 assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
11695 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
11696 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11697 DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
11698 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11699 return True;
11700 }
11701
11702 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
11703 /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11704 /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11705 Bool isD = (size & 1) == 1;
11706 Bool isGT = (size & 2) == 2;
11707 if (bitQ == 0 && isD) return False; // implied 1d case
11708 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
11709 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
11710 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
11711 IRTemp t1 = newTempV128();
11712 assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)),
11713 unop(opABS, getQReg128(nn)))); // swapd
11714 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
11715 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11716 DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge",
11717 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11718 return True;
11719 }
11720
11721 if (bitU == 1
11722 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
11723 /* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11724 /* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11725 /* -------- 1,0x,11110 FMAXP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11726 /* -------- 1,1x,11110 FMINP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11727 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
11728 Bool isD = (size & 1) == 1;
11729 if (bitQ == 0 && isD) return False; // implied 1d case
11730 Bool isMIN = (size & 2) == 2;
11731 Bool isNM = opcode == BITS5(1,1,0,0,0);
11732 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
11733 IRTemp srcN = newTempV128();
11734 IRTemp srcM = newTempV128();
11735 IRTemp preL = IRTemp_INVALID;
11736 IRTemp preR = IRTemp_INVALID;
11737 assign(srcN, getQReg128(nn));
11738 assign(srcM, getQReg128(mm));
11739 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
11740 srcM, srcN, isD, bitQ);
11741 putQReg128(
11742 dd, math_MAYBE_ZERO_HI64_fromE(
11743 bitQ,
11744 binop(opMXX, mkexpr(preL), mkexpr(preR))));
11745 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11746 DIP("%s%sp %s.%s, %s.%s, %s.%s\n",
11747 isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
11748 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11749 return True;
11750 }
11751
11752 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,0)) {
11753 /* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11754 Bool isD = size == X01;
11755 if (bitQ == 0 && isD) return False; // implied 1d case
11756 IRTemp srcN = newTempV128();
11757 IRTemp srcM = newTempV128();
11758 IRTemp preL = IRTemp_INVALID;
11759 IRTemp preR = IRTemp_INVALID;
11760 assign(srcN, getQReg128(nn));
11761 assign(srcM, getQReg128(mm));
11762 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
11763 srcM, srcN, isD, bitQ);
11764 putQReg128(
11765 dd, math_MAYBE_ZERO_HI64_fromE(
11766 bitQ,
11767 triop(mkVecADDF(isD ? 3 : 2),
11768 mkexpr(mk_get_IR_rounding_mode()),
11769 mkexpr(preL), mkexpr(preR))));
11770 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11771 DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
11772 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11773 return True;
11774 }
11775
11776 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) {
11777 /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11778 Bool isD = (size & 1) == 1;
11779 if (bitQ == 0 && isD) return False; // implied 1d case
11780 vassert(size <= 1);
11781 const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 };
11782 IROp op = ops[size];
11783 IRTemp rm = mk_get_IR_rounding_mode();
11784 IRTemp t1 = newTempV128();
11785 IRTemp t2 = newTempV128();
11786 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11787 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
11788 putQReg128(dd, mkexpr(t2));
11789 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11790 DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
11791 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11792 return True;
11793 }
11794
11795 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
11796 /* -------- 0,0x,11111: FRECPS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11797 /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11798 Bool isSQRT = (size & 2) == 2;
11799 Bool isD = (size & 1) == 1;
11800 if (bitQ == 0 && isD) return False; // implied 1d case
11801 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
11802 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
11803 IRTemp res = newTempV128();
11804 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
11805 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11806 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11807 DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT ? "frsqrts" : "frecps",
11808 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11809 return True;
11810 }
11811
11812 return False;
11813 # undef INSN
11814 }
11815
11816
11817 static
dis_AdvSIMD_two_reg_misc(DisResult * dres,UInt insn)11818 Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
11819 {
11820 /* 31 30 29 28 23 21 16 11 9 4
11821 0 Q U 01110 size 10000 opcode 10 n d
11822 Decode fields: U,size,opcode
11823 */
11824 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11825 if (INSN(31,31) != 0
11826 || INSN(28,24) != BITS5(0,1,1,1,0)
11827 || INSN(21,17) != BITS5(1,0,0,0,0)
11828 || INSN(11,10) != BITS2(1,0)) {
11829 return False;
11830 }
11831 UInt bitQ = INSN(30,30);
11832 UInt bitU = INSN(29,29);
11833 UInt size = INSN(23,22);
11834 UInt opcode = INSN(16,12);
11835 UInt nn = INSN(9,5);
11836 UInt dd = INSN(4,0);
11837 vassert(size < 4);
11838
11839 if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) {
11840 /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
11841 /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
11842 /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
11843 const IROp iops[3] = { Iop_Reverse8sIn64_x2,
11844 Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 };
11845 vassert(size <= 2);
11846 IRTemp res = newTempV128();
11847 assign(res, unop(iops[size], getQReg128(nn)));
11848 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11849 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11850 DIP("%s %s.%s, %s.%s\n", "rev64",
11851 nameQReg128(dd), arr, nameQReg128(nn), arr);
11852 return True;
11853 }
11854
11855 if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) {
11856 /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
11857 /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
11858 Bool isH = size == X01;
11859 IRTemp res = newTempV128();
11860 IROp iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4;
11861 assign(res, unop(iop, getQReg128(nn)));
11862 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11863 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11864 DIP("%s %s.%s, %s.%s\n", "rev32",
11865 nameQReg128(dd), arr, nameQReg128(nn), arr);
11866 return True;
11867 }
11868
11869 if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) {
11870 /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
11871 IRTemp res = newTempV128();
11872 assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn)));
11873 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11874 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11875 DIP("%s %s.%s, %s.%s\n", "rev16",
11876 nameQReg128(dd), arr, nameQReg128(nn), arr);
11877 return True;
11878 }
11879
11880 if (opcode == BITS5(0,0,0,1,0) || opcode == BITS5(0,0,1,1,0)) {
11881 /* -------- 0,xx,00010: SADDLP std6_std6 -------- */
11882 /* -------- 1,xx,00010: UADDLP std6_std6 -------- */
11883 /* -------- 0,xx,00110: SADALP std6_std6 -------- */
11884 /* -------- 1,xx,00110: UADALP std6_std6 -------- */
11885 /* Widens, and size refers to the narrow size. */
11886 if (size == X11) return False; // no 1d or 2d cases
11887 Bool isU = bitU == 1;
11888 Bool isACC = opcode == BITS5(0,0,1,1,0);
11889 IRTemp src = newTempV128();
11890 IRTemp sum = newTempV128();
11891 IRTemp res = newTempV128();
11892 assign(src, getQReg128(nn));
11893 assign(sum,
11894 binop(mkVecADD(size+1),
11895 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
11896 isU, True/*fromOdd*/, size, mkexpr(src))),
11897 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
11898 isU, False/*!fromOdd*/, size, mkexpr(src)))));
11899 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd))
11900 : mkexpr(sum));
11901 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11902 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11903 const HChar* arrWide = nameArr_Q_SZ(bitQ, size+1);
11904 DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp")
11905 : (isU ? "uaddlp" : "saddlp"),
11906 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
11907 return True;
11908 }
11909
11910 if (opcode == BITS5(0,0,0,1,1)) {
11911 /* -------- 0,xx,00011: SUQADD std7_std7 -------- */
11912 /* -------- 1,xx,00011: USQADD std7_std7 -------- */
11913 if (bitQ == 0 && size == X11) return False; // implied 1d case
11914 Bool isUSQADD = bitU == 1;
11915 /* This is switched (in the US vs SU sense) deliberately.
11916 SUQADD corresponds to the ExtUSsatSS variants and
11917 USQADD corresponds to the ExtSUsatUU variants.
11918 See libvex_ir for more details. */
11919 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
11920 : mkVecQADDEXTUSSATSS(size);
11921 IROp nop = mkVecADD(size);
11922 IRTemp argL = newTempV128();
11923 IRTemp argR = newTempV128();
11924 IRTemp qres = newTempV128();
11925 IRTemp nres = newTempV128();
11926 /* Because the two arguments to the addition are implicitly
11927 extended differently (one signedly, the other unsignedly) it is
11928 important to present them to the primop in the correct order. */
11929 assign(argL, getQReg128(nn));
11930 assign(argR, getQReg128(dd));
11931 assign(qres, math_MAYBE_ZERO_HI64_fromE(
11932 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
11933 assign(nres, math_MAYBE_ZERO_HI64_fromE(
11934 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
11935 putQReg128(dd, mkexpr(qres));
11936 updateQCFLAGwithDifference(qres, nres);
11937 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11938 DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd",
11939 nameQReg128(dd), arr, nameQReg128(nn), arr);
11940 return True;
11941 }
11942
11943 if (opcode == BITS5(0,0,1,0,0)) {
11944 /* -------- 0,xx,00100: CLS std6_std6 -------- */
11945 /* -------- 1,xx,00100: CLZ std6_std6 -------- */
11946 if (size == X11) return False; // no 1d or 2d cases
11947 const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 };
11948 const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 };
11949 Bool isCLZ = bitU == 1;
11950 IRTemp res = newTempV128();
11951 vassert(size <= 2);
11952 assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
11953 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11954 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11955 DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
11956 nameQReg128(dd), arr, nameQReg128(nn), arr);
11957 return True;
11958 }
11959
11960 if (size == X00 && opcode == BITS5(0,0,1,0,1)) {
11961 /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
11962 /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
11963 IRTemp res = newTempV128();
11964 assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn)));
11965 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11966 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
11967 DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not",
11968 nameQReg128(dd), arr, nameQReg128(nn), arr);
11969 return True;
11970 }
11971
11972 if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) {
11973 /* -------- 1,01,00101 RBIT 16b_16b, 8b_8b -------- */
11974 IRTemp res = newTempV128();
11975 assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn)));
11976 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11977 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
11978 DIP("%s %s.%s, %s.%s\n", "rbit",
11979 nameQReg128(dd), arr, nameQReg128(nn), arr);
11980 return True;
11981 }
11982
11983 if (opcode == BITS5(0,0,1,1,1)) {
11984 /* -------- 0,xx,00111 SQABS std7_std7 -------- */
11985 /* -------- 1,xx,00111 SQNEG std7_std7 -------- */
11986 if (bitQ == 0 && size == X11) return False; // implied 1d case
11987 Bool isNEG = bitU == 1;
11988 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
11989 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
11990 getQReg128(nn), size );
11991 IRTemp qres = newTempV128(), nres = newTempV128();
11992 assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW));
11993 assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW));
11994 putQReg128(dd, mkexpr(qres));
11995 updateQCFLAGwithDifference(qres, nres);
11996 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11997 DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs",
11998 nameQReg128(dd), arr, nameQReg128(nn), arr);
11999 return True;
12000 }
12001
12002 if (opcode == BITS5(0,1,0,0,0)) {
12003 /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
12004 /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
12005 if (bitQ == 0 && size == X11) return False; // implied 1d case
12006 Bool isGT = bitU == 0;
12007 IRExpr* argL = getQReg128(nn);
12008 IRExpr* argR = mkV128(0x0000);
12009 IRTemp res = newTempV128();
12010 IROp opGTS = mkVecCMPGTS(size);
12011 assign(res, isGT ? binop(opGTS, argL, argR)
12012 : unop(Iop_NotV128, binop(opGTS, argR, argL)));
12013 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12014 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12015 DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge",
12016 nameQReg128(dd), arr, nameQReg128(nn), arr);
12017 return True;
12018 }
12019
12020 if (opcode == BITS5(0,1,0,0,1)) {
12021 /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
12022 /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
12023 if (bitQ == 0 && size == X11) return False; // implied 1d case
12024 Bool isEQ = bitU == 0;
12025 IRExpr* argL = getQReg128(nn);
12026 IRExpr* argR = mkV128(0x0000);
12027 IRTemp res = newTempV128();
12028 assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR)
12029 : unop(Iop_NotV128,
12030 binop(mkVecCMPGTS(size), argL, argR)));
12031 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12032 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12033 DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le",
12034 nameQReg128(dd), arr, nameQReg128(nn), arr);
12035 return True;
12036 }
12037
12038 if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) {
12039 /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
12040 if (bitQ == 0 && size == X11) return False; // implied 1d case
12041 IRExpr* argL = getQReg128(nn);
12042 IRExpr* argR = mkV128(0x0000);
12043 IRTemp res = newTempV128();
12044 assign(res, binop(mkVecCMPGTS(size), argR, argL));
12045 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12046 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12047 DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
12048 nameQReg128(dd), arr, nameQReg128(nn), arr);
12049 return True;
12050 }
12051
12052 if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) {
12053 /* -------- 0,xx,01011: ABS std7_std7 -------- */
12054 if (bitQ == 0 && size == X11) return False; // implied 1d case
12055 IRTemp res = newTempV128();
12056 assign(res, unop(mkVecABS(size), getQReg128(nn)));
12057 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12058 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12059 DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
12060 return True;
12061 }
12062
12063 if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) {
12064 /* -------- 1,xx,01011: NEG std7_std7 -------- */
12065 if (bitQ == 0 && size == X11) return False; // implied 1d case
12066 IRTemp res = newTempV128();
12067 assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn)));
12068 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12069 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12070 DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
12071 return True;
12072 }
12073
12074 UInt ix = 0; /*INVALID*/
12075 if (size >= X10) {
12076 switch (opcode) {
12077 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
12078 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
12079 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
12080 default: break;
12081 }
12082 }
12083 if (ix > 0) {
12084 /* -------- 0,1x,01100 FCMGT 2d_2d,4s_4s,2s_2s _#0.0 (ix 1) -------- */
12085 /* -------- 0,1x,01101 FCMEQ 2d_2d,4s_4s,2s_2s _#0.0 (ix 2) -------- */
12086 /* -------- 0,1x,01110 FCMLT 2d_2d,4s_4s,2s_2s _#0.0 (ix 3) -------- */
12087 /* -------- 1,1x,01100 FCMGE 2d_2d,4s_4s,2s_2s _#0.0 (ix 4) -------- */
12088 /* -------- 1,1x,01101 FCMLE 2d_2d,4s_4s,2s_2s _#0.0 (ix 5) -------- */
12089 if (bitQ == 0 && size == X11) return False; // implied 1d case
12090 Bool isD = size == X11;
12091 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
12092 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
12093 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
12094 IROp opCmp = Iop_INVALID;
12095 Bool swap = False;
12096 const HChar* nm = "??";
12097 switch (ix) {
12098 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
12099 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
12100 case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
12101 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
12102 case 5: nm = "fcmle"; opCmp = opCmpLE; break;
12103 default: vassert(0);
12104 }
12105 IRExpr* zero = mkV128(0x0000);
12106 IRTemp res = newTempV128();
12107 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
12108 : binop(opCmp, getQReg128(nn), zero));
12109 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12110 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12111 DIP("%s %s.%s, %s.%s, #0.0\n", nm,
12112 nameQReg128(dd), arr, nameQReg128(nn), arr);
12113 return True;
12114 }
12115
12116 if (size >= X10 && opcode == BITS5(0,1,1,1,1)) {
12117 /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
12118 /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
12119 if (bitQ == 0 && size == X11) return False; // implied 1d case
12120 Bool isFNEG = bitU == 1;
12121 IROp op = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2)
12122 : (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2);
12123 IRTemp res = newTempV128();
12124 assign(res, unop(op, getQReg128(nn)));
12125 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12126 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12127 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
12128 nameQReg128(dd), arr, nameQReg128(nn), arr);
12129 return True;
12130 }
12131
12132 if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
12133 /* -------- 0,xx,10010: XTN{,2} -------- */
12134 if (size == X11) return False;
12135 vassert(size < 3);
12136 Bool is2 = bitQ == 1;
12137 IROp opN = mkVecNARROWUN(size);
12138 IRTemp resN = newTempV128();
12139 assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn))));
12140 putLO64andZUorPutHI64(is2, dd, resN);
12141 const HChar* nm = "xtn";
12142 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12143 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12144 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
12145 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12146 return True;
12147 }
12148
12149 if (opcode == BITS5(1,0,1,0,0)
12150 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
12151 /* -------- 0,xx,10100: SQXTN{,2} -------- */
12152 /* -------- 1,xx,10100: UQXTN{,2} -------- */
12153 /* -------- 1,xx,10010: SQXTUN{,2} -------- */
12154 if (size == X11) return False;
12155 vassert(size < 3);
12156 Bool is2 = bitQ == 1;
12157 IROp opN = Iop_INVALID;
12158 Bool zWiden = True;
12159 const HChar* nm = "??";
12160 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
12161 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
12162 }
12163 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
12164 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
12165 }
12166 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
12167 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
12168 }
12169 else vassert(0);
12170 IRTemp src = newTempV128();
12171 assign(src, getQReg128(nn));
12172 IRTemp resN = newTempV128();
12173 assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
12174 putLO64andZUorPutHI64(is2, dd, resN);
12175 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
12176 size, mkexpr(resN));
12177 updateQCFLAGwithDifference(src, resW);
12178 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12179 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12180 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
12181 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12182 return True;
12183 }
12184
12185 if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
12186 /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
12187 /* Widens, and size is the narrow size. */
12188 if (size == X11) return False;
12189 Bool is2 = bitQ == 1;
12190 IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size);
12191 IROp opSHL = mkVecSHLN(size+1);
12192 IRTemp src = newTempV128();
12193 IRTemp res = newTempV128();
12194 assign(src, getQReg128(nn));
12195 assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)),
12196 mkU8(8 << size)));
12197 putQReg128(dd, mkexpr(res));
12198 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12199 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12200 DIP("shll%s %s.%s, %s.%s, #%d\n", is2 ? "2" : "",
12201 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size);
12202 return True;
12203 }
12204
12205 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,0)) {
12206 /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */
12207 UInt nLanes = size == X00 ? 4 : 2;
12208 IRType srcTy = size == X00 ? Ity_F32 : Ity_F64;
12209 IROp opCvt = size == X00 ? Iop_F32toF16 : Iop_F64toF32;
12210 IRTemp rm = mk_get_IR_rounding_mode();
12211 IRTemp src[nLanes];
12212 for (UInt i = 0; i < nLanes; i++) {
12213 src[i] = newTemp(srcTy);
12214 assign(src[i], getQRegLane(nn, i, srcTy));
12215 }
12216 for (UInt i = 0; i < nLanes; i++) {
12217 putQRegLane(dd, nLanes * bitQ + i,
12218 binop(opCvt, mkexpr(rm), mkexpr(src[i])));
12219 }
12220 if (bitQ == 0) {
12221 putQRegLane(dd, 1, mkU64(0));
12222 }
12223 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12224 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
12225 DIP("fcvtn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12226 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12227 return True;
12228 }
12229
12230 if (bitU == 1 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
12231 /* -------- 1,01,10110: FCVTXN 2s/4s_2d -------- */
12232 /* Using Irrm_NEAREST here isn't right. The docs say "round to
12233 odd" but I don't know what that really means. */
12234 IRType srcTy = Ity_F64;
12235 IROp opCvt = Iop_F64toF32;
12236 IRTemp src[2];
12237 for (UInt i = 0; i < 2; i++) {
12238 src[i] = newTemp(srcTy);
12239 assign(src[i], getQRegLane(nn, i, srcTy));
12240 }
12241 for (UInt i = 0; i < 2; i++) {
12242 putQRegLane(dd, 2 * bitQ + i,
12243 binop(opCvt, mkU32(Irrm_NEAREST), mkexpr(src[i])));
12244 }
12245 if (bitQ == 0) {
12246 putQRegLane(dd, 1, mkU64(0));
12247 }
12248 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12249 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
12250 DIP("fcvtxn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12251 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12252 return True;
12253 }
12254
12255 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) {
12256 /* -------- 0,0x,10111: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
12257 UInt nLanes = size == X00 ? 4 : 2;
12258 IRType srcTy = size == X00 ? Ity_F16 : Ity_F32;
12259 IROp opCvt = size == X00 ? Iop_F16toF32 : Iop_F32toF64;
12260 IRTemp src[nLanes];
12261 for (UInt i = 0; i < nLanes; i++) {
12262 src[i] = newTemp(srcTy);
12263 assign(src[i], getQRegLane(nn, nLanes * bitQ + i, srcTy));
12264 }
12265 for (UInt i = 0; i < nLanes; i++) {
12266 putQRegLane(dd, i, unop(opCvt, mkexpr(src[i])));
12267 }
12268 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12269 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
12270 DIP("fcvtl%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12271 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
12272 return True;
12273 }
12274
12275 ix = 0;
12276 if (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,0,0,1)) {
12277 ix = 1 + ((((bitU & 1) << 2) | ((size & 2) << 0)) | ((opcode & 1) << 0));
12278 // = 1 + bitU[0]:size[1]:opcode[0]
12279 vassert(ix >= 1 && ix <= 8);
12280 if (ix == 7) ix = 0;
12281 }
12282 if (ix > 0) {
12283 /* -------- 0,0x,11000 FRINTN 2d_2d, 4s_4s, 2s_2s (1) -------- */
12284 /* -------- 0,0x,11001 FRINTM 2d_2d, 4s_4s, 2s_2s (2) -------- */
12285 /* -------- 0,1x,11000 FRINTP 2d_2d, 4s_4s, 2s_2s (3) -------- */
12286 /* -------- 0,1x,11001 FRINTZ 2d_2d, 4s_4s, 2s_2s (4) -------- */
12287 /* -------- 1,0x,11000 FRINTA 2d_2d, 4s_4s, 2s_2s (5) -------- */
12288 /* -------- 1,0x,11001 FRINTX 2d_2d, 4s_4s, 2s_2s (6) -------- */
12289 /* -------- 1,1x,11000 (apparently unassigned) (7) -------- */
12290 /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
12291 /* rm plan:
12292 FRINTN: tieeven -- !! FIXME KLUDGED !!
12293 FRINTM: -inf
12294 FRINTP: +inf
12295 FRINTZ: zero
12296 FRINTA: tieaway -- !! FIXME KLUDGED !!
12297 FRINTX: per FPCR + "exact = TRUE"
12298 FRINTI: per FPCR
12299 */
12300 Bool isD = (size & 1) == 1;
12301 if (bitQ == 0 && isD) return False; // implied 1d case
12302
12303 IRTemp irrmRM = mk_get_IR_rounding_mode();
12304
12305 UChar ch = '?';
12306 IRTemp irrm = newTemp(Ity_I32);
12307 switch (ix) {
12308 case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break;
12309 case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break;
12310 case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break;
12311 case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break;
12312 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
12313 case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break;
12314 // I am unsure about the following, due to the "integral exact"
12315 // description in the manual. What does it mean? (frintx, that is)
12316 case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break;
12317 case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break;
12318 default: vassert(0);
12319 }
12320
12321 IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
12322 if (isD) {
12323 for (UInt i = 0; i < 2; i++) {
12324 putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
12325 getQRegLane(nn, i, Ity_F64)));
12326 }
12327 } else {
12328 UInt n = bitQ==1 ? 4 : 2;
12329 for (UInt i = 0; i < n; i++) {
12330 putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
12331 getQRegLane(nn, i, Ity_F32)));
12332 }
12333 if (bitQ == 0)
12334 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
12335 }
12336 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12337 DIP("frint%c %s.%s, %s.%s\n", ch,
12338 nameQReg128(dd), arr, nameQReg128(nn), arr);
12339 return True;
12340 }
12341
12342 ix = 0; /*INVALID*/
12343 switch (opcode) {
12344 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
12345 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
12346 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
12347 default: break;
12348 }
12349 if (ix > 0) {
12350 /* -------- 0,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
12351 /* -------- 0,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
12352 /* -------- 0,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
12353 /* -------- 0,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
12354 /* -------- 0,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
12355 /* -------- 1,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
12356 /* -------- 1,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
12357 /* -------- 1,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
12358 /* -------- 1,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
12359 /* -------- 1,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
12360 Bool isD = (size & 1) == 1;
12361 if (bitQ == 0 && isD) return False; // implied 1d case
12362
12363 IRRoundingMode irrm = 8; /*impossible*/
12364 HChar ch = '?';
12365 switch (ix) {
12366 case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
12367 case 2: ch = 'm'; irrm = Irrm_NegINF; break;
12368 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
12369 case 4: ch = 'p'; irrm = Irrm_PosINF; break;
12370 case 5: ch = 'z'; irrm = Irrm_ZERO; break;
12371 default: vassert(0);
12372 }
12373 IROp cvt = Iop_INVALID;
12374 if (bitU == 1) {
12375 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
12376 } else {
12377 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
12378 }
12379 if (isD) {
12380 for (UInt i = 0; i < 2; i++) {
12381 putQRegLane(dd, i, binop(cvt, mkU32(irrm),
12382 getQRegLane(nn, i, Ity_F64)));
12383 }
12384 } else {
12385 UInt n = bitQ==1 ? 4 : 2;
12386 for (UInt i = 0; i < n; i++) {
12387 putQRegLane(dd, i, binop(cvt, mkU32(irrm),
12388 getQRegLane(nn, i, Ity_F32)));
12389 }
12390 if (bitQ == 0)
12391 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
12392 }
12393 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12394 DIP("fcvt%c%c %s.%s, %s.%s\n", ch, bitU == 1 ? 'u' : 's',
12395 nameQReg128(dd), arr, nameQReg128(nn), arr);
12396 return True;
12397 }
12398
12399 if (size == X10 && opcode == BITS5(1,1,1,0,0)) {
12400 /* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */
12401 /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
12402 Bool isREC = bitU == 0;
12403 IROp op = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4;
12404 IRTemp res = newTempV128();
12405 assign(res, unop(op, getQReg128(nn)));
12406 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12407 const HChar* nm = isREC ? "urecpe" : "ursqrte";
12408 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12409 DIP("%s %s.%s, %s.%s\n", nm,
12410 nameQReg128(dd), arr, nameQReg128(nn), arr);
12411 return True;
12412 }
12413
12414 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
12415 /* -------- 0,0x,11101: SCVTF -------- */
12416 /* -------- 1,0x,11101: UCVTF -------- */
12417 /* 31 28 22 21 15 9 4
12418 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
12419 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
12420 with laneage:
12421 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
12422 */
12423 Bool isQ = bitQ == 1;
12424 Bool isU = bitU == 1;
12425 Bool isF64 = (size & 1) == 1;
12426 if (isQ || !isF64) {
12427 IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
12428 UInt nLanes = 0;
12429 Bool zeroHI = False;
12430 const HChar* arrSpec = NULL;
12431 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
12432 isQ, isF64 );
12433 IROp iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
12434 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
12435 IRTemp rm = mk_get_IR_rounding_mode();
12436 UInt i;
12437 vassert(ok); /* the 'if' above should ensure this */
12438 for (i = 0; i < nLanes; i++) {
12439 putQRegLane(dd, i,
12440 binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI)));
12441 }
12442 if (zeroHI) {
12443 putQRegLane(dd, 1, mkU64(0));
12444 }
12445 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
12446 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
12447 return True;
12448 }
12449 /* else fall through */
12450 }
12451
12452 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
12453 /* -------- 0,1x,11101: FRECPE 2d_2d, 4s_4s, 2s_2s -------- */
12454 /* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */
12455 Bool isSQRT = bitU == 1;
12456 Bool isD = (size & 1) == 1;
12457 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
12458 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
12459 if (bitQ == 0 && isD) return False; // implied 1d case
12460 IRTemp resV = newTempV128();
12461 assign(resV, unop(op, getQReg128(nn)));
12462 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
12463 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12464 DIP("%s %s.%s, %s.%s\n", isSQRT ? "frsqrte" : "frecpe",
12465 nameQReg128(dd), arr, nameQReg128(nn), arr);
12466 return True;
12467 }
12468
12469 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
12470 /* -------- 1,1x,11111: FSQRT 2d_2d, 4s_4s, 2s_2s -------- */
12471 Bool isD = (size & 1) == 1;
12472 IROp op = isD ? Iop_Sqrt64Fx2 : Iop_Sqrt32Fx4;
12473 if (bitQ == 0 && isD) return False; // implied 1d case
12474 IRTemp resV = newTempV128();
12475 assign(resV, binop(op, mkexpr(mk_get_IR_rounding_mode()),
12476 getQReg128(nn)));
12477 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
12478 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12479 DIP("%s %s.%s, %s.%s\n", "fsqrt",
12480 nameQReg128(dd), arr, nameQReg128(nn), arr);
12481 return True;
12482 }
12483
12484 return False;
12485 # undef INSN
12486 }
12487
12488
12489 static
dis_AdvSIMD_vector_x_indexed_elem(DisResult * dres,UInt insn)12490 Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
12491 {
12492 /* 31 28 23 21 20 19 15 11 9 4
12493 0 Q U 01111 size L M m opcode H 0 n d
12494 Decode fields are: u,size,opcode
12495 M is really part of the mm register number. Individual
12496 cases need to inspect L and H though.
12497 */
12498 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12499 if (INSN(31,31) != 0
12500 || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) {
12501 return False;
12502 }
12503 UInt bitQ = INSN(30,30);
12504 UInt bitU = INSN(29,29);
12505 UInt size = INSN(23,22);
12506 UInt bitL = INSN(21,21);
12507 UInt bitM = INSN(20,20);
12508 UInt mmLO4 = INSN(19,16);
12509 UInt opcode = INSN(15,12);
12510 UInt bitH = INSN(11,11);
12511 UInt nn = INSN(9,5);
12512 UInt dd = INSN(4,0);
12513 vassert(size < 4);
12514 vassert(bitH < 2 && bitM < 2 && bitL < 2);
12515
12516 if (bitU == 0 && size >= X10
12517 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
12518 /* -------- 0,1x,0001 FMLA 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12519 /* -------- 0,1x,0101 FMLS 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12520 if (bitQ == 0 && size == X11) return False; // implied 1d case
12521 Bool isD = (size & 1) == 1;
12522 Bool isSUB = opcode == BITS4(0,1,0,1);
12523 UInt index;
12524 if (!isD) index = (bitH << 1) | bitL;
12525 else if (isD && bitL == 0) index = bitH;
12526 else return False; // sz:L == x11 => unallocated encoding
12527 vassert(index < (isD ? 2 : 4));
12528 IRType ity = isD ? Ity_F64 : Ity_F32;
12529 IRTemp elem = newTemp(ity);
12530 UInt mm = (bitM << 4) | mmLO4;
12531 assign(elem, getQRegLane(mm, index, ity));
12532 IRTemp dupd = math_DUP_TO_V128(elem, ity);
12533 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
12534 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
12535 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
12536 IRTemp rm = mk_get_IR_rounding_mode();
12537 IRTemp t1 = newTempV128();
12538 IRTemp t2 = newTempV128();
12539 // FIXME: double rounding; use FMA primops instead
12540 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
12541 assign(t2, triop(isSUB ? opSUB : opADD,
12542 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
12543 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
12544 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12545 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
12546 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm),
12547 isD ? 'd' : 's', index);
12548 return True;
12549 }
12550
12551 if (size >= X10 && opcode == BITS4(1,0,0,1)) {
12552 /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12553 /* -------- 1,1x,1001 FMULX 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12554 if (bitQ == 0 && size == X11) return False; // implied 1d case
12555 Bool isD = (size & 1) == 1;
12556 Bool isMULX = bitU == 1;
12557 UInt index;
12558 if (!isD) index = (bitH << 1) | bitL;
12559 else if (isD && bitL == 0) index = bitH;
12560 else return False; // sz:L == x11 => unallocated encoding
12561 vassert(index < (isD ? 2 : 4));
12562 IRType ity = isD ? Ity_F64 : Ity_F32;
12563 IRTemp elem = newTemp(ity);
12564 UInt mm = (bitM << 4) | mmLO4;
12565 assign(elem, getQRegLane(mm, index, ity));
12566 IRTemp dupd = math_DUP_TO_V128(elem, ity);
12567 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
12568 IRTemp res = newTempV128();
12569 assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
12570 mkexpr(mk_get_IR_rounding_mode()),
12571 getQReg128(nn), mkexpr(dupd)));
12572 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12573 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12574 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n",
12575 isMULX ? "fmulx" : "fmul", nameQReg128(dd), arr,
12576 nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
12577 return True;
12578 }
12579
12580 if ((bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,1,0,0)))
12581 || (bitU == 0 && opcode == BITS4(1,0,0,0))) {
12582 /* -------- 1,xx,0000 MLA s/h variants only -------- */
12583 /* -------- 1,xx,0100 MLS s/h variants only -------- */
12584 /* -------- 0,xx,1000 MUL s/h variants only -------- */
12585 Bool isMLA = opcode == BITS4(0,0,0,0);
12586 Bool isMLS = opcode == BITS4(0,1,0,0);
12587 UInt mm = 32; // invalid
12588 UInt ix = 16; // invalid
12589 switch (size) {
12590 case X00:
12591 return False; // b case is not allowed
12592 case X01:
12593 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12594 case X10:
12595 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12596 case X11:
12597 return False; // d case is not allowed
12598 default:
12599 vassert(0);
12600 }
12601 vassert(mm < 32 && ix < 16);
12602 IROp opMUL = mkVecMUL(size);
12603 IROp opADD = mkVecADD(size);
12604 IROp opSUB = mkVecSUB(size);
12605 HChar ch = size == X01 ? 'h' : 's';
12606 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12607 IRTemp vecD = newTempV128();
12608 IRTemp vecN = newTempV128();
12609 IRTemp res = newTempV128();
12610 assign(vecD, getQReg128(dd));
12611 assign(vecN, getQReg128(nn));
12612 IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM));
12613 if (isMLA || isMLS) {
12614 assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod));
12615 } else {
12616 assign(res, prod);
12617 }
12618 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12619 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12620 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla"
12621 : (isMLS ? "mls" : "mul"),
12622 nameQReg128(dd), arr,
12623 nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
12624 return True;
12625 }
12626
12627 if (opcode == BITS4(1,0,1,0)
12628 || opcode == BITS4(0,0,1,0) || opcode == BITS4(0,1,1,0)) {
12629 /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
12630 /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
12631 /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
12632 /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
12633 /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
12634 /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
12635 /* Widens, and size refers to the narrowed lanes. */
12636 UInt ks = 3;
12637 switch (opcode) {
12638 case BITS4(1,0,1,0): ks = 0; break;
12639 case BITS4(0,0,1,0): ks = 1; break;
12640 case BITS4(0,1,1,0): ks = 2; break;
12641 default: vassert(0);
12642 }
12643 vassert(ks >= 0 && ks <= 2);
12644 Bool isU = bitU == 1;
12645 Bool is2 = bitQ == 1;
12646 UInt mm = 32; // invalid
12647 UInt ix = 16; // invalid
12648 switch (size) {
12649 case X00:
12650 return False; // h_b_b[] case is not allowed
12651 case X01:
12652 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12653 case X10:
12654 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12655 case X11:
12656 return False; // q_d_d[] case is not allowed
12657 default:
12658 vassert(0);
12659 }
12660 vassert(mm < 32 && ix < 16);
12661 IRTemp vecN = newTempV128();
12662 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12663 IRTemp vecD = newTempV128();
12664 assign(vecN, getQReg128(nn));
12665 assign(vecD, getQReg128(dd));
12666 IRTemp res = IRTemp_INVALID;
12667 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
12668 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
12669 putQReg128(dd, mkexpr(res));
12670 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
12671 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12672 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12673 HChar ch = size == X01 ? 'h' : 's';
12674 DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
12675 isU ? 'u' : 's', nm, is2 ? "2" : "",
12676 nameQReg128(dd), arrWide,
12677 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
12678 return True;
12679 }
12680
12681 if (bitU == 0
12682 && (opcode == BITS4(1,0,1,1)
12683 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
12684 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
12685 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
12686 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
12687 /* Widens, and size refers to the narrowed lanes. */
12688 UInt ks = 3;
12689 switch (opcode) {
12690 case BITS4(1,0,1,1): ks = 0; break;
12691 case BITS4(0,0,1,1): ks = 1; break;
12692 case BITS4(0,1,1,1): ks = 2; break;
12693 default: vassert(0);
12694 }
12695 vassert(ks >= 0 && ks <= 2);
12696 Bool is2 = bitQ == 1;
12697 UInt mm = 32; // invalid
12698 UInt ix = 16; // invalid
12699 switch (size) {
12700 case X00:
12701 return False; // h_b_b[] case is not allowed
12702 case X01:
12703 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12704 case X10:
12705 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12706 case X11:
12707 return False; // q_d_d[] case is not allowed
12708 default:
12709 vassert(0);
12710 }
12711 vassert(mm < 32 && ix < 16);
12712 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
12713 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
12714 newTempsV128_2(&vecN, &vecD);
12715 assign(vecN, getQReg128(nn));
12716 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12717 assign(vecD, getQReg128(dd));
12718 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
12719 is2, size, "mas"[ks],
12720 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
12721 putQReg128(dd, mkexpr(res));
12722 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
12723 updateQCFLAGwithDifference(sat1q, sat1n);
12724 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
12725 updateQCFLAGwithDifference(sat2q, sat2n);
12726 }
12727 const HChar* nm = ks == 0 ? "sqdmull"
12728 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
12729 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12730 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12731 HChar ch = size == X01 ? 'h' : 's';
12732 DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
12733 nm, is2 ? "2" : "",
12734 nameQReg128(dd), arrWide,
12735 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
12736 return True;
12737 }
12738
12739 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
12740 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
12741 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
12742 UInt mm = 32; // invalid
12743 UInt ix = 16; // invalid
12744 switch (size) {
12745 case X00:
12746 return False; // b case is not allowed
12747 case X01:
12748 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12749 case X10:
12750 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12751 case X11:
12752 return False; // q case is not allowed
12753 default:
12754 vassert(0);
12755 }
12756 vassert(mm < 32 && ix < 16);
12757 Bool isR = opcode == BITS4(1,1,0,1);
12758 IRTemp res, sat1q, sat1n, vN, vM;
12759 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
12760 vN = newTempV128();
12761 assign(vN, getQReg128(nn));
12762 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12763 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
12764 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12765 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
12766 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
12767 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
12768 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12769 HChar ch = size == X01 ? 'h' : 's';
12770 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
12771 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
12772 return True;
12773 }
12774
12775 return False;
12776 # undef INSN
12777 }
12778
12779
12780 static
dis_AdvSIMD_crypto_aes(DisResult * dres,UInt insn)12781 Bool dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult* dres, UInt insn)
12782 {
12783 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12784 return False;
12785 # undef INSN
12786 }
12787
12788
12789 static
dis_AdvSIMD_crypto_three_reg_sha(DisResult * dres,UInt insn)12790 Bool dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
12791 {
12792 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12793 return False;
12794 # undef INSN
12795 }
12796
12797
12798 static
dis_AdvSIMD_crypto_two_reg_sha(DisResult * dres,UInt insn)12799 Bool dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
12800 {
12801 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12802 return False;
12803 # undef INSN
12804 }
12805
12806
12807 static
dis_AdvSIMD_fp_compare(DisResult * dres,UInt insn)12808 Bool dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult* dres, UInt insn)
12809 {
12810 /* 31 28 23 21 20 15 13 9 4
12811 000 11110 ty 1 m op 1000 n opcode2
12812 The first 3 bits are really "M 0 S", but M and S are always zero.
12813 Decode fields are: ty,op,opcode2
12814 */
12815 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12816 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
12817 || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) {
12818 return False;
12819 }
12820 UInt ty = INSN(23,22);
12821 UInt mm = INSN(20,16);
12822 UInt op = INSN(15,14);
12823 UInt nn = INSN(9,5);
12824 UInt opcode2 = INSN(4,0);
12825 vassert(ty < 4);
12826
12827 if (ty <= X01 && op == X00
12828 && (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
12829 /* -------- 0x,00,00000 FCMP d_d, s_s -------- */
12830 /* -------- 0x,00,01000 FCMP d_#0, s_#0 -------- */
12831 /* -------- 0x,00,10000 FCMPE d_d, s_s -------- */
12832 /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
12833 /* 31 23 20 15 9 4
12834 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
12835 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
12836 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
12837 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
12838
12839 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
12840 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
12841 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
12842 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
12843
12844 FCMPE generates Invalid Operation exn if either arg is any kind
12845 of NaN. FCMP generates Invalid Operation exn if either arg is a
12846 signalling NaN. We ignore this detail here and produce the same
12847 IR for both.
12848 */
12849 Bool isD = (ty & 1) == 1;
12850 Bool isCMPE = (opcode2 & 16) == 16;
12851 Bool cmpZero = (opcode2 & 8) == 8;
12852 IRType ity = isD ? Ity_F64 : Ity_F32;
12853 Bool valid = True;
12854 if (cmpZero && mm != 0) valid = False;
12855 if (valid) {
12856 IRTemp argL = newTemp(ity);
12857 IRTemp argR = newTemp(ity);
12858 IRTemp irRes = newTemp(Ity_I32);
12859 assign(argL, getQRegLO(nn, ity));
12860 assign(argR,
12861 cmpZero
12862 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
12863 : getQRegLO(mm, ity));
12864 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
12865 mkexpr(argL), mkexpr(argR)));
12866 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
12867 IRTemp nzcv_28x0 = newTemp(Ity_I64);
12868 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
12869 setFlags_COPY(nzcv_28x0);
12870 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity),
12871 cmpZero ? "#0.0" : nameQRegLO(mm, ity));
12872 return True;
12873 }
12874 return False;
12875 }
12876
12877 return False;
12878 # undef INSN
12879 }
12880
12881
12882 static
dis_AdvSIMD_fp_conditional_compare(DisResult * dres,UInt insn)12883 Bool dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult* dres, UInt insn)
12884 {
12885 /* 31 28 23 21 20 15 11 9 4 3
12886 000 11110 ty 1 m cond 01 n op nzcv
12887 The first 3 bits are really "M 0 S", but M and S are always zero.
12888 Decode fields are: ty,op
12889 */
12890 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12891 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
12892 || INSN(21,21) != 1 || INSN(11,10) != BITS2(0,1)) {
12893 return False;
12894 }
12895 UInt ty = INSN(23,22);
12896 UInt mm = INSN(20,16);
12897 UInt cond = INSN(15,12);
12898 UInt nn = INSN(9,5);
12899 UInt op = INSN(4,4);
12900 UInt nzcv = INSN(3,0);
12901 vassert(ty < 4 && op <= 1);
12902
12903 if (ty <= BITS2(0,1)) {
12904 /* -------- 00,0 FCCMP s_s -------- */
12905 /* -------- 00,1 FCCMPE s_s -------- */
12906 /* -------- 01,0 FCCMP d_d -------- */
12907 /* -------- 01,1 FCCMPE d_d -------- */
12908
12909 /* FCCMPE generates Invalid Operation exn if either arg is any kind
12910 of NaN. FCCMP generates Invalid Operation exn if either arg is a
12911 signalling NaN. We ignore this detail here and produce the same
12912 IR for both.
12913 */
12914 Bool isD = (ty & 1) == 1;
12915 Bool isCMPE = op == 1;
12916 IRType ity = isD ? Ity_F64 : Ity_F32;
12917 IRTemp argL = newTemp(ity);
12918 IRTemp argR = newTemp(ity);
12919 IRTemp irRes = newTemp(Ity_I32);
12920 assign(argL, getQRegLO(nn, ity));
12921 assign(argR, getQRegLO(mm, ity));
12922 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
12923 mkexpr(argL), mkexpr(argR)));
12924 IRTemp condT = newTemp(Ity_I1);
12925 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
12926 IRTemp nzcvT = mk_convert_IRCmpF64Result_to_NZCV(irRes);
12927
12928 IRTemp nzcvT_28x0 = newTemp(Ity_I64);
12929 assign(nzcvT_28x0, binop(Iop_Shl64, mkexpr(nzcvT), mkU8(28)));
12930
12931 IRExpr* nzcvF_28x0 = mkU64(((ULong)nzcv) << 28);
12932
12933 IRTemp nzcv_28x0 = newTemp(Ity_I64);
12934 assign(nzcv_28x0, IRExpr_ITE(mkexpr(condT),
12935 mkexpr(nzcvT_28x0), nzcvF_28x0));
12936 setFlags_COPY(nzcv_28x0);
12937 DIP("fccmp%s %s, %s, #%u, %s\n", isCMPE ? "e" : "",
12938 nameQRegLO(nn, ity), nameQRegLO(mm, ity), nzcv, nameCC(cond));
12939 return True;
12940 }
12941
12942 return False;
12943 # undef INSN
12944 }
12945
12946
12947 static
dis_AdvSIMD_fp_conditional_select(DisResult * dres,UInt insn)12948 Bool dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult* dres, UInt insn)
12949 {
12950 /* 31 23 21 20 15 11 9 5
12951 000 11110 ty 1 m cond 11 n d
12952 The first 3 bits are really "M 0 S", but M and S are always zero.
12953 Decode fields: ty
12954 */
12955 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12956 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) || INSN(21,21) != 1
12957 || INSN(11,10) != BITS2(1,1)) {
12958 return False;
12959 }
12960 UInt ty = INSN(23,22);
12961 UInt mm = INSN(20,16);
12962 UInt cond = INSN(15,12);
12963 UInt nn = INSN(9,5);
12964 UInt dd = INSN(4,0);
12965 if (ty <= X01) {
12966 /* -------- 00: FCSEL s_s -------- */
12967 /* -------- 00: FCSEL d_d -------- */
12968 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
12969 IRTemp srcT = newTemp(ity);
12970 IRTemp srcF = newTemp(ity);
12971 IRTemp res = newTemp(ity);
12972 assign(srcT, getQRegLO(nn, ity));
12973 assign(srcF, getQRegLO(mm, ity));
12974 assign(res, IRExpr_ITE(
12975 unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
12976 mkexpr(srcT), mkexpr(srcF)));
12977 putQReg128(dd, mkV128(0x0000));
12978 putQRegLO(dd, mkexpr(res));
12979 DIP("fcsel %s, %s, %s, %s\n",
12980 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity),
12981 nameCC(cond));
12982 return True;
12983 }
12984 return False;
12985 # undef INSN
12986 }
12987
12988
12989 static
dis_AdvSIMD_fp_data_proc_1_source(DisResult * dres,UInt insn)12990 Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn)
12991 {
12992 /* 31 28 23 21 20 14 9 4
12993 000 11110 ty 1 opcode 10000 n d
12994 The first 3 bits are really "M 0 S", but M and S are always zero.
12995 Decode fields: ty,opcode
12996 */
12997 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12998 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
12999 || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) {
13000 return False;
13001 }
13002 UInt ty = INSN(23,22);
13003 UInt opcode = INSN(20,15);
13004 UInt nn = INSN(9,5);
13005 UInt dd = INSN(4,0);
13006
13007 if (ty <= X01 && opcode <= BITS6(0,0,0,0,1,1)) {
13008 /* -------- 0x,000000: FMOV d_d, s_s -------- */
13009 /* -------- 0x,000001: FABS d_d, s_s -------- */
13010 /* -------- 0x,000010: FNEG d_d, s_s -------- */
13011 /* -------- 0x,000011: FSQRT d_d, s_s -------- */
13012 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
13013 IRTemp src = newTemp(ity);
13014 IRTemp res = newTemp(ity);
13015 const HChar* nm = "??";
13016 assign(src, getQRegLO(nn, ity));
13017 switch (opcode) {
13018 case BITS6(0,0,0,0,0,0):
13019 nm = "fmov"; assign(res, mkexpr(src)); break;
13020 case BITS6(0,0,0,0,0,1):
13021 nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break;
13022 case BITS6(0,0,0,0,1,0):
13023 nm = "fabs"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break;
13024 case BITS6(0,0,0,0,1,1):
13025 nm = "fsqrt";
13026 assign(res, binop(mkSQRTF(ity),
13027 mkexpr(mk_get_IR_rounding_mode()),
13028 mkexpr(src))); break;
13029 default:
13030 vassert(0);
13031 }
13032 putQReg128(dd, mkV128(0x0000));
13033 putQRegLO(dd, mkexpr(res));
13034 DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
13035 return True;
13036 }
13037
13038 if ( (ty == X11 && (opcode == BITS6(0,0,0,1,0,0)
13039 || opcode == BITS6(0,0,0,1,0,1)))
13040 || (ty == X00 && (opcode == BITS6(0,0,0,1,1,1)
13041 || opcode == BITS6(0,0,0,1,0,1)))
13042 || (ty == X01 && (opcode == BITS6(0,0,0,1,1,1)
13043 || opcode == BITS6(0,0,0,1,0,0)))) {
13044 /* -------- 11,000100: FCVT s_h -------- */
13045 /* -------- 11,000101: FCVT d_h -------- */
13046 /* -------- 00,000111: FCVT h_s -------- */
13047 /* -------- 00,000101: FCVT d_s -------- */
13048 /* -------- 01,000111: FCVT h_d -------- */
13049 /* -------- 01,000100: FCVT s_d -------- */
13050 /* 31 23 21 16 14 9 4
13051 000 11110 11 10001 00 10000 n d FCVT Sd, Hn
13052 --------- 11 ----- 01 --------- FCVT Dd, Hn
13053 --------- 00 ----- 11 --------- FCVT Hd, Sn
13054 --------- 00 ----- 01 --------- FCVT Dd, Sn
13055 --------- 01 ----- 11 --------- FCVT Hd, Dn
13056 --------- 01 ----- 00 --------- FCVT Sd, Dn
13057 Rounding, when dst is smaller than src, is per the FPCR.
13058 */
13059 UInt b2322 = ty;
13060 UInt b1615 = opcode & BITS2(1,1);
13061 switch ((b2322 << 2) | b1615) {
13062 case BITS4(0,0,0,1): // S -> D
13063 case BITS4(1,1,0,1): { // H -> D
13064 Bool srcIsH = b2322 == BITS2(1,1);
13065 IRType srcTy = srcIsH ? Ity_F16 : Ity_F32;
13066 IRTemp res = newTemp(Ity_F64);
13067 assign(res, unop(srcIsH ? Iop_F16toF64 : Iop_F32toF64,
13068 getQRegLO(nn, srcTy)));
13069 putQReg128(dd, mkV128(0x0000));
13070 putQRegLO(dd, mkexpr(res));
13071 DIP("fcvt %s, %s\n",
13072 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, srcTy));
13073 return True;
13074 }
13075 case BITS4(0,1,0,0): // D -> S
13076 case BITS4(0,1,1,1): { // D -> H
13077 Bool dstIsH = b1615 == BITS2(1,1);
13078 IRType dstTy = dstIsH ? Ity_F16 : Ity_F32;
13079 IRTemp res = newTemp(dstTy);
13080 assign(res, binop(dstIsH ? Iop_F64toF16 : Iop_F64toF32,
13081 mkexpr(mk_get_IR_rounding_mode()),
13082 getQRegLO(nn, Ity_F64)));
13083 putQReg128(dd, mkV128(0x0000));
13084 putQRegLO(dd, mkexpr(res));
13085 DIP("fcvt %s, %s\n",
13086 nameQRegLO(dd, dstTy), nameQRegLO(nn, Ity_F64));
13087 return True;
13088 }
13089 case BITS4(0,0,1,1): // S -> H
13090 case BITS4(1,1,0,0): { // H -> S
13091 Bool toH = b1615 == BITS2(1,1);
13092 IRType srcTy = toH ? Ity_F32 : Ity_F16;
13093 IRType dstTy = toH ? Ity_F16 : Ity_F32;
13094 IRTemp res = newTemp(dstTy);
13095 if (toH) {
13096 assign(res, binop(Iop_F32toF16,
13097 mkexpr(mk_get_IR_rounding_mode()),
13098 getQRegLO(nn, srcTy)));
13099
13100 } else {
13101 assign(res, unop(Iop_F16toF32,
13102 getQRegLO(nn, srcTy)));
13103 }
13104 putQReg128(dd, mkV128(0x0000));
13105 putQRegLO(dd, mkexpr(res));
13106 DIP("fcvt %s, %s\n",
13107 nameQRegLO(dd, dstTy), nameQRegLO(nn, srcTy));
13108 return True;
13109 }
13110 default:
13111 break;
13112 }
13113 /* else unhandled */
13114 return False;
13115 }
13116
13117 if (ty <= X01
13118 && opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1)
13119 && opcode != BITS6(0,0,1,1,0,1)) {
13120 /* -------- 0x,001000 FRINTN d_d, s_s -------- */
13121 /* -------- 0x,001001 FRINTP d_d, s_s -------- */
13122 /* -------- 0x,001010 FRINTM d_d, s_s -------- */
13123 /* -------- 0x,001011 FRINTZ d_d, s_s -------- */
13124 /* -------- 0x,001100 FRINTA d_d, s_s -------- */
13125 /* -------- 0x,001110 FRINTX d_d, s_s -------- */
13126 /* -------- 0x,001111 FRINTI d_d, s_s -------- */
13127 /* 31 23 21 17 14 9 4
13128 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
13129 rm
13130 x==0 => S-registers, x==1 => D-registers
13131 rm (17:15) encodings:
13132 111 per FPCR (FRINTI)
13133 001 +inf (FRINTP)
13134 010 -inf (FRINTM)
13135 011 zero (FRINTZ)
13136 000 tieeven (FRINTN) -- !! FIXME KLUDGED !!
13137 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
13138 110 per FPCR + "exact = TRUE" (FRINTX)
13139 101 unallocated
13140 */
13141 Bool isD = (ty & 1) == 1;
13142 UInt rm = opcode & BITS6(0,0,0,1,1,1);
13143 IRType ity = isD ? Ity_F64 : Ity_F32;
13144 IRExpr* irrmE = NULL;
13145 UChar ch = '?';
13146 switch (rm) {
13147 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
13148 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
13149 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
13150 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
13151 case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
13152 // I am unsure about the following, due to the "integral exact"
13153 // description in the manual. What does it mean? (frintx, that is)
13154 case BITS3(1,1,0):
13155 ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
13156 case BITS3(1,1,1):
13157 ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
13158 // The following is a kludge. There's no Irrm_ value to represent
13159 // this ("to nearest, with ties to even")
13160 case BITS3(0,0,0): ch = 'n'; irrmE = mkU32(Irrm_NEAREST); break;
13161 default: break;
13162 }
13163 if (irrmE) {
13164 IRTemp src = newTemp(ity);
13165 IRTemp dst = newTemp(ity);
13166 assign(src, getQRegLO(nn, ity));
13167 assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13168 irrmE, mkexpr(src)));
13169 putQReg128(dd, mkV128(0x0000));
13170 putQRegLO(dd, mkexpr(dst));
13171 DIP("frint%c %s, %s\n",
13172 ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
13173 return True;
13174 }
13175 return False;
13176 }
13177
13178 return False;
13179 # undef INSN
13180 }
13181
13182
13183 static
dis_AdvSIMD_fp_data_proc_2_source(DisResult * dres,UInt insn)13184 Bool dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult* dres, UInt insn)
13185 {
13186 /* 31 28 23 21 20 15 11 9 4
13187 000 11110 ty 1 m opcode 10 n d
13188 The first 3 bits are really "M 0 S", but M and S are always zero.
13189 Decode fields: ty, opcode
13190 */
13191 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13192 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13193 || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) {
13194 return False;
13195 }
13196 UInt ty = INSN(23,22);
13197 UInt mm = INSN(20,16);
13198 UInt opcode = INSN(15,12);
13199 UInt nn = INSN(9,5);
13200 UInt dd = INSN(4,0);
13201
13202 if (ty <= X01 && opcode <= BITS4(0,1,1,1)) {
13203 /* ------- 0x,0000: FMUL d_d, s_s ------- */
13204 /* ------- 0x,0001: FDIV d_d, s_s ------- */
13205 /* ------- 0x,0010: FADD d_d, s_s ------- */
13206 /* ------- 0x,0011: FSUB d_d, s_s ------- */
13207 /* ------- 0x,0100: FMAX d_d, s_s ------- */
13208 /* ------- 0x,0101: FMIN d_d, s_s ------- */
13209 /* ------- 0x,0110: FMAXNM d_d, s_s ------- (FIXME KLUDGED) */
13210 /* ------- 0x,0111: FMINNM d_d, s_s ------- (FIXME KLUDGED) */
13211 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
13212 IROp iop = Iop_INVALID;
13213 const HChar* nm = "???";
13214 switch (opcode) {
13215 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break;
13216 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break;
13217 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break;
13218 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break;
13219 case BITS4(0,1,0,0): nm = "fmax"; iop = mkVecMAXF(ty+2); break;
13220 case BITS4(0,1,0,1): nm = "fmin"; iop = mkVecMINF(ty+2); break;
13221 case BITS4(0,1,1,0): nm = "fmaxnm"; iop = mkVecMAXF(ty+2); break; //!!
13222 case BITS4(0,1,1,1): nm = "fminnm"; iop = mkVecMINF(ty+2); break; //!!
13223 default: vassert(0);
13224 }
13225 if (opcode <= BITS4(0,0,1,1)) {
13226 // This is really not good code. TODO: avoid width-changing
13227 IRTemp res = newTemp(ity);
13228 assign(res, triop(iop, mkexpr(mk_get_IR_rounding_mode()),
13229 getQRegLO(nn, ity), getQRegLO(mm, ity)));
13230 putQReg128(dd, mkV128(0));
13231 putQRegLO(dd, mkexpr(res));
13232 } else {
13233 putQReg128(dd, unop(mkVecZEROHIxxOFV128(ty+2),
13234 binop(iop, getQReg128(nn), getQReg128(mm))));
13235 }
13236 DIP("%s %s, %s, %s\n",
13237 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
13238 return True;
13239 }
13240
13241 if (ty <= X01 && opcode == BITS4(1,0,0,0)) {
13242 /* ------- 0x,1000: FNMUL d_d, s_s ------- */
13243 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
13244 IROp iop = mkMULF(ity);
13245 IROp iopn = mkNEGF(ity);
13246 const HChar* nm = "fnmul";
13247 IRExpr* resE = unop(iopn,
13248 triop(iop, mkexpr(mk_get_IR_rounding_mode()),
13249 getQRegLO(nn, ity), getQRegLO(mm, ity)));
13250 IRTemp res = newTemp(ity);
13251 assign(res, resE);
13252 putQReg128(dd, mkV128(0));
13253 putQRegLO(dd, mkexpr(res));
13254 DIP("%s %s, %s, %s\n",
13255 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
13256 return True;
13257 }
13258
13259 return False;
13260 # undef INSN
13261 }
13262
13263
13264 static
dis_AdvSIMD_fp_data_proc_3_source(DisResult * dres,UInt insn)13265 Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn)
13266 {
13267 /* 31 28 23 21 20 15 14 9 4
13268 000 11111 ty o1 m o0 a n d
13269 The first 3 bits are really "M 0 S", but M and S are always zero.
13270 Decode fields: ty,o1,o0
13271 */
13272 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13273 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
13274 return False;
13275 }
13276 UInt ty = INSN(23,22);
13277 UInt bitO1 = INSN(21,21);
13278 UInt mm = INSN(20,16);
13279 UInt bitO0 = INSN(15,15);
13280 UInt aa = INSN(14,10);
13281 UInt nn = INSN(9,5);
13282 UInt dd = INSN(4,0);
13283 vassert(ty < 4);
13284
13285 if (ty <= X01) {
13286 /* -------- 0x,0,0 FMADD d_d_d_d, s_s_s_s -------- */
13287 /* -------- 0x,0,1 FMSUB d_d_d_d, s_s_s_s -------- */
13288 /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
13289 /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
13290 /* -------------------- F{N}M{ADD,SUB} -------------------- */
13291 /* 31 22 20 15 14 9 4 ix
13292 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
13293 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
13294 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
13295 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
13296 where Fx=Dx when sz=1, Fx=Sx when sz=0
13297
13298 -----SPEC------ ----IMPL----
13299 fmadd a + n * m a + n * m
13300 fmsub a + (-n) * m a - n * m
13301 fnmadd (-a) + (-n) * m -(a + n * m)
13302 fnmsub (-a) + n * m -(a - n * m)
13303 */
13304 Bool isD = (ty & 1) == 1;
13305 UInt ix = (bitO1 << 1) | bitO0;
13306 IRType ity = isD ? Ity_F64 : Ity_F32;
13307 IROp opADD = mkADDF(ity);
13308 IROp opSUB = mkSUBF(ity);
13309 IROp opMUL = mkMULF(ity);
13310 IROp opNEG = mkNEGF(ity);
13311 IRTemp res = newTemp(ity);
13312 IRExpr* eA = getQRegLO(aa, ity);
13313 IRExpr* eN = getQRegLO(nn, ity);
13314 IRExpr* eM = getQRegLO(mm, ity);
13315 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
13316 IRExpr* eNxM = triop(opMUL, rm, eN, eM);
13317 switch (ix) {
13318 case 0: assign(res, triop(opADD, rm, eA, eNxM)); break;
13319 case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break;
13320 case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
13321 case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
13322 default: vassert(0);
13323 }
13324 putQReg128(dd, mkV128(0x0000));
13325 putQRegLO(dd, mkexpr(res));
13326 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
13327 DIP("%s %s, %s, %s, %s\n",
13328 names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity),
13329 nameQRegLO(mm, ity), nameQRegLO(aa, ity));
13330 return True;
13331 }
13332
13333 return False;
13334 # undef INSN
13335 }
13336
13337
13338 static
dis_AdvSIMD_fp_immediate(DisResult * dres,UInt insn)13339 Bool dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
13340 {
13341 /* 31 28 23 21 20 12 9 4
13342 000 11110 ty 1 imm8 100 imm5 d
13343 The first 3 bits are really "M 0 S", but M and S are always zero.
13344 */
13345 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13346 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13347 || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) {
13348 return False;
13349 }
13350 UInt ty = INSN(23,22);
13351 UInt imm8 = INSN(20,13);
13352 UInt imm5 = INSN(9,5);
13353 UInt dd = INSN(4,0);
13354
13355 /* ------- 00,00000: FMOV s_imm ------- */
13356 /* ------- 01,00000: FMOV d_imm ------- */
13357 if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) {
13358 Bool isD = (ty & 1) == 1;
13359 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
13360 if (!isD) {
13361 vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
13362 }
13363 putQReg128(dd, mkV128(0));
13364 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
13365 DIP("fmov %s, #0x%llx\n",
13366 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
13367 return True;
13368 }
13369
13370 return False;
13371 # undef INSN
13372 }
13373
13374
13375 static
dis_AdvSIMD_fp_to_from_fixedp_conv(DisResult * dres,UInt insn)13376 Bool dis_AdvSIMD_fp_to_from_fixedp_conv(/*MB_OUT*/DisResult* dres, UInt insn)
13377 {
13378 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13379 /* 31 30 29 28 23 21 20 18 15 9 4
13380 sf 0 0 11110 type 0 rmode opcode scale n d
13381 The first 3 bits are really "sf 0 S", but S is always zero.
13382 Decode fields: sf,type,rmode,opcode
13383 */
13384 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13385 if (INSN(30,29) != BITS2(0,0)
13386 || INSN(28,24) != BITS5(1,1,1,1,0)
13387 || INSN(21,21) != 0) {
13388 return False;
13389 }
13390 UInt bitSF = INSN(31,31);
13391 UInt ty = INSN(23,22); // type
13392 UInt rm = INSN(20,19); // rmode
13393 UInt op = INSN(18,16); // opcode
13394 UInt sc = INSN(15,10); // scale
13395 UInt nn = INSN(9,5);
13396 UInt dd = INSN(4,0);
13397
13398 if (ty <= X01 && rm == X11
13399 && (op == BITS3(0,0,0) || op == BITS3(0,0,1))) {
13400 /* -------- (ix) sf ty rm opc -------- */
13401 /* -------- 0 0 00 11 000: FCVTZS w_s_#fbits -------- */
13402 /* -------- 1 0 01 11 000: FCVTZS w_d_#fbits -------- */
13403 /* -------- 2 1 00 11 000: FCVTZS x_s_#fbits -------- */
13404 /* -------- 3 1 01 11 000: FCVTZS x_d_#fbits -------- */
13405
13406 /* -------- 4 0 00 11 001: FCVTZU w_s_#fbits -------- */
13407 /* -------- 5 0 01 11 001: FCVTZU w_d_#fbits -------- */
13408 /* -------- 6 1 00 11 001: FCVTZU x_s_#fbits -------- */
13409 /* -------- 7 1 01 11 001: FCVTZU x_d_#fbits -------- */
13410 Bool isI64 = bitSF == 1;
13411 Bool isF64 = (ty & 1) == 1;
13412 Bool isU = (op & 1) == 1;
13413 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
13414
13415 Int fbits = 64 - sc;
13416 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
13417
13418 Double scale = two_to_the_plus(fbits);
13419 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
13420 : IRExpr_Const(IRConst_F32( (Float)scale ));
13421 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
13422
13423 const IROp ops[8]
13424 = { Iop_F32toI32S, Iop_F64toI32S, Iop_F32toI64S, Iop_F64toI64S,
13425 Iop_F32toI32U, Iop_F64toI32U, Iop_F32toI64U, Iop_F64toI64U };
13426 IRTemp irrm = newTemp(Ity_I32);
13427 assign(irrm, mkU32(Irrm_ZERO));
13428
13429 IRExpr* src = getQRegLO(nn, isF64 ? Ity_F64 : Ity_F32);
13430 IRExpr* res = binop(ops[ix], mkexpr(irrm),
13431 triop(opMUL, mkexpr(irrm), src, scaleE));
13432 putIRegOrZR(isI64, dd, res);
13433
13434 DIP("fcvtz%c %s, %s, #%d\n",
13435 isU ? 'u' : 's', nameIRegOrZR(isI64, dd),
13436 nameQRegLO(nn, isF64 ? Ity_F64 : Ity_F32), fbits);
13437 return True;
13438 }
13439
13440 /* ------ sf,ty,rm,opc ------ */
13441 /* ------ x,0x,00,010 SCVTF s/d, w/x, #fbits ------ */
13442 /* ------ x,0x,00,011 UCVTF s/d, w/x, #fbits ------ */
13443 /* (ix) sf S 28 ty rm opc 15 9 4
13444 0 0 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Wn, #fbits
13445 1 0 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Wn, #fbits
13446 2 1 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Xn, #fbits
13447 3 1 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Xn, #fbits
13448
13449 4 0 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Wn, #fbits
13450 5 0 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Wn, #fbits
13451 6 1 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Xn, #fbits
13452 7 1 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Xn, #fbits
13453
13454 These are signed/unsigned conversion from integer registers to
13455 FP registers, all 4 32/64-bit combinations, rounded per FPCR,
13456 scaled per |scale|.
13457 */
13458 if (ty <= X01 && rm == X00
13459 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))
13460 && (bitSF == 1 || ((sc >> 5) & 1) == 1)) {
13461 Bool isI64 = bitSF == 1;
13462 Bool isF64 = (ty & 1) == 1;
13463 Bool isU = (op & 1) == 1;
13464 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
13465
13466 Int fbits = 64 - sc;
13467 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
13468
13469 Double scale = two_to_the_minus(fbits);
13470 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
13471 : IRExpr_Const(IRConst_F32( (Float)scale ));
13472 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
13473
13474 const IROp ops[8]
13475 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
13476 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
13477 IRExpr* src = getIRegOrZR(isI64, nn);
13478 IRExpr* res = (isF64 && !isI64)
13479 ? unop(ops[ix], src)
13480 : binop(ops[ix],
13481 mkexpr(mk_get_IR_rounding_mode()), src);
13482 putQReg128(dd, mkV128(0));
13483 putQRegLO(dd, triop(opMUL, mkU32(Irrm_NEAREST), res, scaleE));
13484
13485 DIP("%ccvtf %s, %s, #%d\n",
13486 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
13487 nameIRegOrZR(isI64, nn), fbits);
13488 return True;
13489 }
13490
13491 return False;
13492 # undef INSN
13493 }
13494
13495
13496 static
dis_AdvSIMD_fp_to_from_int_conv(DisResult * dres,UInt insn)13497 Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn)
13498 {
13499 /* 31 30 29 28 23 21 20 18 15 9 4
13500 sf 0 0 11110 type 1 rmode opcode 000000 n d
13501 The first 3 bits are really "sf 0 S", but S is always zero.
13502 Decode fields: sf,type,rmode,opcode
13503 */
13504 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13505 if (INSN(30,29) != BITS2(0,0)
13506 || INSN(28,24) != BITS5(1,1,1,1,0)
13507 || INSN(21,21) != 1
13508 || INSN(15,10) != BITS6(0,0,0,0,0,0)) {
13509 return False;
13510 }
13511 UInt bitSF = INSN(31,31);
13512 UInt ty = INSN(23,22); // type
13513 UInt rm = INSN(20,19); // rmode
13514 UInt op = INSN(18,16); // opcode
13515 UInt nn = INSN(9,5);
13516 UInt dd = INSN(4,0);
13517
13518 // op = 000, 001
13519 /* -------- FCVT{N,P,M,Z,A}{S,U} (scalar, integer) -------- */
13520 /* 30 23 20 18 15 9 4
13521 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
13522 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
13523 ---------------- 01 -------------- FCVTP-------- (round to +inf)
13524 ---------------- 10 -------------- FCVTM-------- (round to -inf)
13525 ---------------- 11 -------------- FCVTZ-------- (round to zero)
13526 ---------------- 00 100 ---------- FCVTAS------- (nearest, ties away)
13527 ---------------- 00 101 ---------- FCVTAU------- (nearest, ties away)
13528
13529 Rd is Xd when sf==1, Wd when sf==0
13530 Fn is Dn when x==1, Sn when x==0
13531 20:19 carry the rounding mode, using the same encoding as FPCR
13532 */
13533 if (ty <= X01
13534 && ( ((op == BITS3(0,0,0) || op == BITS3(0,0,1)) && True)
13535 || ((op == BITS3(1,0,0) || op == BITS3(1,0,1)) && rm == BITS2(0,0))
13536 )
13537 ) {
13538 Bool isI64 = bitSF == 1;
13539 Bool isF64 = (ty & 1) == 1;
13540 Bool isU = (op & 1) == 1;
13541 /* Decide on the IR rounding mode to use. */
13542 IRRoundingMode irrm = 8; /*impossible*/
13543 HChar ch = '?';
13544 if (op == BITS3(0,0,0) || op == BITS3(0,0,1)) {
13545 switch (rm) {
13546 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
13547 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
13548 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
13549 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
13550 default: vassert(0);
13551 }
13552 } else {
13553 vassert(op == BITS3(1,0,0) || op == BITS3(1,0,1));
13554 switch (rm) {
13555 case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST; break;
13556 default: vassert(0);
13557 }
13558 }
13559 vassert(irrm != 8);
13560 /* Decide on the conversion primop, based on the source size,
13561 dest size and signedness (8 possibilities). Case coding:
13562 F32 ->s I32 0
13563 F32 ->u I32 1
13564 F32 ->s I64 2
13565 F32 ->u I64 3
13566 F64 ->s I32 4
13567 F64 ->u I32 5
13568 F64 ->s I64 6
13569 F64 ->u I64 7
13570 */
13571 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
13572 vassert(ix < 8);
13573 const IROp iops[8]
13574 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
13575 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
13576 IROp iop = iops[ix];
13577 // A bit of ATCery: bounce all cases we haven't seen an example of.
13578 if (/* F32toI32S */
13579 (iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
13580 || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
13581 || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
13582 || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,S */
13583 /* F32toI32U */
13584 || (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
13585 || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
13586 || (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */
13587 || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,S */
13588 /* F32toI64S */
13589 || (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
13590 || (iop == Iop_F32toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Sn */
13591 || (iop == Iop_F32toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Sn */
13592 || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,S */
13593 /* F32toI64U */
13594 || (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
13595 || (iop == Iop_F32toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Sn */
13596 || (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */
13597 || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,S */
13598 /* F64toI32S */
13599 || (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
13600 || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
13601 || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
13602 || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,D */
13603 /* F64toI32U */
13604 || (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
13605 || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
13606 || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
13607 || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,D */
13608 /* F64toI64S */
13609 || (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
13610 || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
13611 || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
13612 || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,D */
13613 /* F64toI64U */
13614 || (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
13615 || (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */
13616 || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
13617 || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,D */
13618 ) {
13619 /* validated */
13620 } else {
13621 return False;
13622 }
13623 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
13624 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
13625 IRTemp src = newTemp(srcTy);
13626 IRTemp dst = newTemp(dstTy);
13627 assign(src, getQRegLO(nn, srcTy));
13628 assign(dst, binop(iop, mkU32(irrm), mkexpr(src)));
13629 putIRegOrZR(isI64, dd, mkexpr(dst));
13630 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
13631 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
13632 return True;
13633 }
13634
13635 // op = 010, 011
13636 /* -------------- {S,U}CVTF (scalar, integer) -------------- */
13637 /* (ix) sf S 28 ty rm op 15 9 4
13638 0 0 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
13639 1 0 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
13640 2 1 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
13641 3 1 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
13642
13643 4 0 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
13644 5 0 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
13645 6 1 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
13646 7 1 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
13647
13648 These are signed/unsigned conversion from integer registers to
13649 FP registers, all 4 32/64-bit combinations, rounded per FPCR.
13650 */
13651 if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))) {
13652 Bool isI64 = bitSF == 1;
13653 Bool isF64 = (ty & 1) == 1;
13654 Bool isU = (op & 1) == 1;
13655 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
13656 const IROp ops[8]
13657 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
13658 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
13659 IRExpr* src = getIRegOrZR(isI64, nn);
13660 IRExpr* res = (isF64 && !isI64)
13661 ? unop(ops[ix], src)
13662 : binop(ops[ix],
13663 mkexpr(mk_get_IR_rounding_mode()), src);
13664 putQReg128(dd, mkV128(0));
13665 putQRegLO(dd, res);
13666 DIP("%ccvtf %s, %s\n",
13667 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
13668 nameIRegOrZR(isI64, nn));
13669 return True;
13670 }
13671
13672 // op = 110, 111
13673 /* -------- FMOV (general) -------- */
13674 /* case sf S ty rm op 15 9 4
13675 (1) 0 0 0 11110 00 1 00 111 000000 n d FMOV Sd, Wn
13676 (2) 1 0 0 11110 01 1 00 111 000000 n d FMOV Dd, Xn
13677 (3) 1 0 0 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
13678
13679 (4) 0 0 0 11110 00 1 00 110 000000 n d FMOV Wd, Sn
13680 (5) 1 0 0 11110 01 1 00 110 000000 n d FMOV Xd, Dn
13681 (6) 1 0 0 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
13682 */
13683 if (1) {
13684 UInt ix = 0; // case
13685 if (bitSF == 0) {
13686 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
13687 ix = 1;
13688 else
13689 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
13690 ix = 4;
13691 } else {
13692 vassert(bitSF == 1);
13693 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
13694 ix = 2;
13695 else
13696 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
13697 ix = 5;
13698 else
13699 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
13700 ix = 3;
13701 else
13702 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
13703 ix = 6;
13704 }
13705 if (ix > 0) {
13706 switch (ix) {
13707 case 1:
13708 putQReg128(dd, mkV128(0));
13709 putQRegLO(dd, getIReg32orZR(nn));
13710 DIP("fmov s%u, w%u\n", dd, nn);
13711 break;
13712 case 2:
13713 putQReg128(dd, mkV128(0));
13714 putQRegLO(dd, getIReg64orZR(nn));
13715 DIP("fmov d%u, x%u\n", dd, nn);
13716 break;
13717 case 3:
13718 putQRegHI64(dd, getIReg64orZR(nn));
13719 DIP("fmov v%u.d[1], x%u\n", dd, nn);
13720 break;
13721 case 4:
13722 putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
13723 DIP("fmov w%u, s%u\n", dd, nn);
13724 break;
13725 case 5:
13726 putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
13727 DIP("fmov x%u, d%u\n", dd, nn);
13728 break;
13729 case 6:
13730 putIReg64orZR(dd, getQRegHI64(nn));
13731 DIP("fmov x%u, v%u.d[1]\n", dd, nn);
13732 break;
13733 default:
13734 vassert(0);
13735 }
13736 return True;
13737 }
13738 /* undecodable; fall through */
13739 }
13740
13741 return False;
13742 # undef INSN
13743 }
13744
13745
13746 static
dis_ARM64_simd_and_fp(DisResult * dres,UInt insn)13747 Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
13748 {
13749 Bool ok;
13750 ok = dis_AdvSIMD_EXT(dres, insn);
13751 if (UNLIKELY(ok)) return True;
13752 ok = dis_AdvSIMD_TBL_TBX(dres, insn);
13753 if (UNLIKELY(ok)) return True;
13754 ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn);
13755 if (UNLIKELY(ok)) return True;
13756 ok = dis_AdvSIMD_across_lanes(dres, insn);
13757 if (UNLIKELY(ok)) return True;
13758 ok = dis_AdvSIMD_copy(dres, insn);
13759 if (UNLIKELY(ok)) return True;
13760 ok = dis_AdvSIMD_modified_immediate(dres, insn);
13761 if (UNLIKELY(ok)) return True;
13762 ok = dis_AdvSIMD_scalar_copy(dres, insn);
13763 if (UNLIKELY(ok)) return True;
13764 ok = dis_AdvSIMD_scalar_pairwise(dres, insn);
13765 if (UNLIKELY(ok)) return True;
13766 ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn);
13767 if (UNLIKELY(ok)) return True;
13768 ok = dis_AdvSIMD_scalar_three_different(dres, insn);
13769 if (UNLIKELY(ok)) return True;
13770 ok = dis_AdvSIMD_scalar_three_same(dres, insn);
13771 if (UNLIKELY(ok)) return True;
13772 ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
13773 if (UNLIKELY(ok)) return True;
13774 ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
13775 if (UNLIKELY(ok)) return True;
13776 ok = dis_AdvSIMD_shift_by_immediate(dres, insn);
13777 if (UNLIKELY(ok)) return True;
13778 ok = dis_AdvSIMD_three_different(dres, insn);
13779 if (UNLIKELY(ok)) return True;
13780 ok = dis_AdvSIMD_three_same(dres, insn);
13781 if (UNLIKELY(ok)) return True;
13782 ok = dis_AdvSIMD_two_reg_misc(dres, insn);
13783 if (UNLIKELY(ok)) return True;
13784 ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
13785 if (UNLIKELY(ok)) return True;
13786 ok = dis_AdvSIMD_crypto_aes(dres, insn);
13787 if (UNLIKELY(ok)) return True;
13788 ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn);
13789 if (UNLIKELY(ok)) return True;
13790 ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn);
13791 if (UNLIKELY(ok)) return True;
13792 ok = dis_AdvSIMD_fp_compare(dres, insn);
13793 if (UNLIKELY(ok)) return True;
13794 ok = dis_AdvSIMD_fp_conditional_compare(dres, insn);
13795 if (UNLIKELY(ok)) return True;
13796 ok = dis_AdvSIMD_fp_conditional_select(dres, insn);
13797 if (UNLIKELY(ok)) return True;
13798 ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn);
13799 if (UNLIKELY(ok)) return True;
13800 ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn);
13801 if (UNLIKELY(ok)) return True;
13802 ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn);
13803 if (UNLIKELY(ok)) return True;
13804 ok = dis_AdvSIMD_fp_immediate(dres, insn);
13805 if (UNLIKELY(ok)) return True;
13806 ok = dis_AdvSIMD_fp_to_from_fixedp_conv(dres, insn);
13807 if (UNLIKELY(ok)) return True;
13808 ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn);
13809 if (UNLIKELY(ok)) return True;
13810 return False;
13811 }
13812
13813
13814 /*------------------------------------------------------------*/
13815 /*--- Disassemble a single ARM64 instruction ---*/
13816 /*------------------------------------------------------------*/
13817
13818 /* Disassemble a single ARM64 instruction into IR. The instruction
13819 has is located at |guest_instr| and has guest IP of
13820 |guest_PC_curr_instr|, which will have been set before the call
13821 here. Returns True iff the instruction was decoded, in which case
13822 *dres will be set accordingly, or False, in which case *dres should
13823 be ignored by the caller. */
13824
13825 static
disInstr_ARM64_WRK(DisResult * dres,Bool (* resteerOkFn)(void *,Addr),Bool resteerCisOk,void * callback_opaque,const UChar * guest_instr,const VexArchInfo * archinfo,const VexAbiInfo * abiinfo)13826 Bool disInstr_ARM64_WRK (
13827 /*MB_OUT*/DisResult* dres,
13828 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
13829 Bool resteerCisOk,
13830 void* callback_opaque,
13831 const UChar* guest_instr,
13832 const VexArchInfo* archinfo,
13833 const VexAbiInfo* abiinfo
13834 )
13835 {
13836 // A macro to fish bits out of 'insn'.
13837 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13838
13839 //ZZ DisResult dres;
13840 //ZZ UInt insn;
13841 //ZZ //Bool allow_VFP = False;
13842 //ZZ //UInt hwcaps = archinfo->hwcaps;
13843 //ZZ IRTemp condT; /* :: Ity_I32 */
13844 //ZZ UInt summary;
13845 //ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
13846 //ZZ
13847 //ZZ /* What insn variants are we supporting today? */
13848 //ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
13849 //ZZ // etc etc
13850
13851 /* Set result defaults. */
13852 dres->whatNext = Dis_Continue;
13853 dres->len = 4;
13854 dres->continueAt = 0;
13855 dres->jk_StopHere = Ijk_INVALID;
13856
13857 /* At least this is simple on ARM64: insns are all 4 bytes long, and
13858 4-aligned. So just fish the whole thing out of memory right now
13859 and have done. */
13860 UInt insn = getUIntLittleEndianly( guest_instr );
13861
13862 if (0) vex_printf("insn: 0x%x\n", insn);
13863
13864 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
13865
13866 vassert(0 == (guest_PC_curr_instr & 3ULL));
13867
13868 /* ----------------------------------------------------------- */
13869
13870 /* Spot "Special" instructions (see comment at top of file). */
13871 {
13872 const UChar* code = guest_instr;
13873 /* Spot the 16-byte preamble:
13874 93CC0D8C ror x12, x12, #3
13875 93CC358C ror x12, x12, #13
13876 93CCCD8C ror x12, x12, #51
13877 93CCF58C ror x12, x12, #61
13878 */
13879 UInt word1 = 0x93CC0D8C;
13880 UInt word2 = 0x93CC358C;
13881 UInt word3 = 0x93CCCD8C;
13882 UInt word4 = 0x93CCF58C;
13883 if (getUIntLittleEndianly(code+ 0) == word1 &&
13884 getUIntLittleEndianly(code+ 4) == word2 &&
13885 getUIntLittleEndianly(code+ 8) == word3 &&
13886 getUIntLittleEndianly(code+12) == word4) {
13887 /* Got a "Special" instruction preamble. Which one is it? */
13888 if (getUIntLittleEndianly(code+16) == 0xAA0A014A
13889 /* orr x10,x10,x10 */) {
13890 /* X3 = client_request ( X4 ) */
13891 DIP("x3 = client_request ( x4 )\n");
13892 putPC(mkU64( guest_PC_curr_instr + 20 ));
13893 dres->jk_StopHere = Ijk_ClientReq;
13894 dres->whatNext = Dis_StopHere;
13895 return True;
13896 }
13897 else
13898 if (getUIntLittleEndianly(code+16) == 0xAA0B016B
13899 /* orr x11,x11,x11 */) {
13900 /* X3 = guest_NRADDR */
13901 DIP("x3 = guest_NRADDR\n");
13902 dres->len = 20;
13903 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
13904 return True;
13905 }
13906 else
13907 if (getUIntLittleEndianly(code+16) == 0xAA0C018C
13908 /* orr x12,x12,x12 */) {
13909 /* branch-and-link-to-noredir X8 */
13910 DIP("branch-and-link-to-noredir x8\n");
13911 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
13912 putPC(getIReg64orZR(8));
13913 dres->jk_StopHere = Ijk_NoRedir;
13914 dres->whatNext = Dis_StopHere;
13915 return True;
13916 }
13917 else
13918 if (getUIntLittleEndianly(code+16) == 0xAA090129
13919 /* orr x9,x9,x9 */) {
13920 /* IR injection */
13921 DIP("IR injection\n");
13922 vex_inject_ir(irsb, Iend_LE);
13923 // Invalidate the current insn. The reason is that the IRop we're
13924 // injecting here can change. In which case the translation has to
13925 // be redone. For ease of handling, we simply invalidate all the
13926 // time.
13927 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
13928 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20)));
13929 putPC(mkU64( guest_PC_curr_instr + 20 ));
13930 dres->whatNext = Dis_StopHere;
13931 dres->jk_StopHere = Ijk_InvalICache;
13932 return True;
13933 }
13934 /* We don't know what it is. */
13935 return False;
13936 /*NOTREACHED*/
13937 }
13938 }
13939
13940 /* ----------------------------------------------------------- */
13941
13942 /* Main ARM64 instruction decoder starts here. */
13943
13944 Bool ok = False;
13945
13946 /* insn[28:25] determines the top-level grouping, so let's start
13947 off with that.
13948
13949 For all of these dis_ARM64_ functions, we pass *dres with the
13950 normal default results "insn OK, 4 bytes long, keep decoding" so
13951 they don't need to change it. However, decodes of control-flow
13952 insns may cause *dres to change.
13953 */
13954 switch (INSN(28,25)) {
13955 case BITS4(1,0,0,0): case BITS4(1,0,0,1):
13956 // Data processing - immediate
13957 ok = dis_ARM64_data_processing_immediate(dres, insn);
13958 break;
13959 case BITS4(1,0,1,0): case BITS4(1,0,1,1):
13960 // Branch, exception generation and system instructions
13961 ok = dis_ARM64_branch_etc(dres, insn, archinfo);
13962 break;
13963 case BITS4(0,1,0,0): case BITS4(0,1,1,0):
13964 case BITS4(1,1,0,0): case BITS4(1,1,1,0):
13965 // Loads and stores
13966 ok = dis_ARM64_load_store(dres, insn);
13967 break;
13968 case BITS4(0,1,0,1): case BITS4(1,1,0,1):
13969 // Data processing - register
13970 ok = dis_ARM64_data_processing_register(dres, insn);
13971 break;
13972 case BITS4(0,1,1,1): case BITS4(1,1,1,1):
13973 // Data processing - SIMD and floating point
13974 ok = dis_ARM64_simd_and_fp(dres, insn);
13975 break;
13976 case BITS4(0,0,0,0): case BITS4(0,0,0,1):
13977 case BITS4(0,0,1,0): case BITS4(0,0,1,1):
13978 // UNALLOCATED
13979 break;
13980 default:
13981 vassert(0); /* Can't happen */
13982 }
13983
13984 /* If the next-level down decoders failed, make sure |dres| didn't
13985 get changed. */
13986 if (!ok) {
13987 vassert(dres->whatNext == Dis_Continue);
13988 vassert(dres->len == 4);
13989 vassert(dres->continueAt == 0);
13990 vassert(dres->jk_StopHere == Ijk_INVALID);
13991 }
13992
13993 return ok;
13994
13995 # undef INSN
13996 }
13997
13998
13999 /*------------------------------------------------------------*/
14000 /*--- Top-level fn ---*/
14001 /*------------------------------------------------------------*/
14002
14003 /* Disassemble a single instruction into IR. The instruction
14004 is located in host memory at &guest_code[delta]. */
14005
disInstr_ARM64(IRSB * irsb_IN,Bool (* resteerOkFn)(void *,Addr),Bool resteerCisOk,void * callback_opaque,const UChar * guest_code_IN,Long delta_IN,Addr guest_IP,VexArch guest_arch,const VexArchInfo * archinfo,const VexAbiInfo * abiinfo,VexEndness host_endness_IN,Bool sigill_diag_IN)14006 DisResult disInstr_ARM64 ( IRSB* irsb_IN,
14007 Bool (*resteerOkFn) ( void*, Addr ),
14008 Bool resteerCisOk,
14009 void* callback_opaque,
14010 const UChar* guest_code_IN,
14011 Long delta_IN,
14012 Addr guest_IP,
14013 VexArch guest_arch,
14014 const VexArchInfo* archinfo,
14015 const VexAbiInfo* abiinfo,
14016 VexEndness host_endness_IN,
14017 Bool sigill_diag_IN )
14018 {
14019 DisResult dres;
14020 vex_bzero(&dres, sizeof(dres));
14021
14022 /* Set globals (see top of this file) */
14023 vassert(guest_arch == VexArchARM64);
14024
14025 irsb = irsb_IN;
14026 host_endness = host_endness_IN;
14027 guest_PC_curr_instr = (Addr64)guest_IP;
14028
14029 /* Sanity checks */
14030 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
14031 vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
14032 vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
14033
14034 /* Try to decode */
14035 Bool ok = disInstr_ARM64_WRK( &dres,
14036 resteerOkFn, resteerCisOk, callback_opaque,
14037 &guest_code_IN[delta_IN],
14038 archinfo, abiinfo );
14039 if (ok) {
14040 /* All decode successes end up here. */
14041 vassert(dres.len == 4 || dres.len == 20);
14042 switch (dres.whatNext) {
14043 case Dis_Continue:
14044 putPC( mkU64(dres.len + guest_PC_curr_instr) );
14045 break;
14046 case Dis_ResteerU:
14047 case Dis_ResteerC:
14048 putPC(mkU64(dres.continueAt));
14049 break;
14050 case Dis_StopHere:
14051 break;
14052 default:
14053 vassert(0);
14054 }
14055 DIP("\n");
14056 } else {
14057 /* All decode failures end up here. */
14058 if (sigill_diag_IN) {
14059 Int i, j;
14060 UChar buf[64];
14061 UInt insn
14062 = getUIntLittleEndianly( &guest_code_IN[delta_IN] );
14063 vex_bzero(buf, sizeof(buf));
14064 for (i = j = 0; i < 32; i++) {
14065 if (i > 0) {
14066 if ((i & 7) == 0) buf[j++] = ' ';
14067 else if ((i & 3) == 0) buf[j++] = '\'';
14068 }
14069 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
14070 }
14071 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
14072 vex_printf("disInstr(arm64): %s\n", buf);
14073 }
14074
14075 /* Tell the dispatcher that this insn cannot be decoded, and so
14076 has not been executed, and (is currently) the next to be
14077 executed. PC should be up-to-date since it is made so at the
14078 start of each insn, but nevertheless be paranoid and update
14079 it again right now. */
14080 putPC( mkU64(guest_PC_curr_instr) );
14081 dres.len = 0;
14082 dres.whatNext = Dis_StopHere;
14083 dres.jk_StopHere = Ijk_NoDecode;
14084 dres.continueAt = 0;
14085 }
14086 return dres;
14087 }
14088
14089
14090 /*--------------------------------------------------------------------*/
14091 /*--- end guest_arm64_toIR.c ---*/
14092 /*--------------------------------------------------------------------*/
14093